trial_index,submit_time,queue_time,worker_generator_uuid,start_time,end_time,run_time,program_string,exit_code,signal,hostname,OO_Info_SLURM_JOB_ID,arm_name,trial_status,generation_node,VAL_LOSS,PARAMS,batch_size,ffn,max_turnstiles,epochs,n_layers,d_model,max_sequence_length,lr,n_heads,val_batches,batches_per_epoch
0,,,,,,,,,,,,0_0,RUNNING,SOBOL,,,307,27,45,48,2,3,54,0.063384837764567145135252701493,30,128,20
1,,,,,,,,,,,,1_0,RUNNING,SOBOL,,,173,15,33,175,1,2,23,0.034098375645839708314444038706,1,128,20
2,1778317528,41,8eb7b6fd-45a6-44db-a267-3aa0d159f529,1778317569,1778320326,2757,python3 /data/horse/ws/s3811141-grok3/train.py --batch-size=38 --topo --topo-every 1 --kelp-every=1 --n-layers=1 --d-model=2 --n-heads=13 --tokenizer_initial_nr=1000 --lr=0.02021439389253770758 --batches-per-epoch=20 --val-batches=128 --epochs=76 --task=turnstile --max-seq-len 44 --max-turnstiles 53 --scheduler=cosine --ffn=24,0,,c94,3512908,2_0,COMPLETED,SOBOL,0.6679720000000000101891828308,4498,38,24,53,76,1,2,44,0.020214393892537707575485228517,13,128,20
3,,,,,,,,,,,,3_0,RUNNING,SOBOL,,,412,5,6,146,2,3,16,0.077622730962404712373725601537,18,128,20
4,,,,,,,,,,,,4_0,RUNNING,SOBOL,,,512,19,20,114,2,3,35,0.012238136337769963193911060273,12,128,20
5,,,,,,,,,,,,5_0,RUNNING,SOBOL,,,125,8,40,83,1,2,11,0.091548388707318906210019804348,23,128,20
6,,,,,,,,,,,,6_0,RUNNING,SOBOL,,,207,32,19,182,1,2,64,0.055725390407766778322340428531,27,128,20
7,,,,,,,,,,,,7_0,RUNNING,SOBOL,,,337,13,59,16,2,3,29,0.048293164856589403111097880128,8,128,20
8,,,,,,,,,,,,8_0,RUNNING,SOBOL,,,382,22,14,94,1,3,41,0.005565302072638832718565193858,9,128,20
9,,,,,,,,,,,,9_0,RUNNING,SOBOL,,,256,2,61,128,2,2,5,0.098129659898825369324093514933,22,128,20
10,1778317531,43,8eb7b6fd-45a6-44db-a267-3aa0d159f529,1778317574,1778319185,1611,python3 /data/horse/ws/s3811141-grok3/train.py --batch-size=81 --topo --topo-every 1 --kelp-every=1 --n-layers=2 --d-model=2 --n-heads=26 --tokenizer_initial_nr=1000 --lr=0.06240741495487392432 --batches-per-epoch=20 --val-batches=128 --epochs=29 --task=turnstile --max-seq-len 58 --max-turnstiles 26 --scheduler=cosine --ffn=26,0,,c3,3512914,10_0,COMPLETED,SOBOL,0.341519999999999990247800951693,28602,81,26,26,29,2,2,58,0.062407414954873924317624300784,26,128,20
11,,,,,,,,,,,,11_0,RUNNING,SOBOL,,,463,15,38,192,1,3,34,0.041714886560315828090139689266,5,128,20
12,,,,,,,,,,,,12_0,RUNNING,SOBOL,,,430,29,51,164,1,3,52,0.069984039784804080719382568532,31,128,20
13,1778317528,46,8eb7b6fd-45a6-44db-a267-3aa0d159f529,1778317574,1778319015,1441,python3 /data/horse/ws/s3811141-grok3/train.py --batch-size=51 --topo --topo-every 1 --kelp-every=1 --n-layers=2 --d-model=2 --n-heads=4 --tokenizer_initial_nr=1000 --lr=0.02739540530244624281 --batches-per-epoch=20 --val-batches=128 --epochs=34 --task=turnstile --max-seq-len 24 --max-turnstiles 11 --scheduler=cosine --ffn=10,0,,c12,3512912,13_0,COMPLETED,SOBOL,0.040972000000000001473932087492,1086,51,10,11,34,2,2,24,0.027395405302446242806757581434,4,128,20
14,,,,,,,,,,,,14_0,RUNNING,SOBOL,,,156,18,47,133,2,2,46,0.013605979518001805328442088694,16,128,20
15,,,,,,,,,,,,15_0,RUNNING,SOBOL,,,294,7,27,66,1,3,14,0.084322684568731209875380727681,19,128,20
16,,,,,,,,,,,,16_0,RUNNING,SOBOL,,,279,18,63,184,2,2,21,0.017420113803414908476563383033,21,128,20
17,1778317527,41,8eb7b6fd-45a6-44db-a267-3aa0d159f529,1778317568,1778318007,439,python3 /data/horse/ws/s3811141-grok3/train.py --batch-size=141 --topo --topo-every 1 --kelp-every=1 --n-layers=1 --d-model=3 --n-heads=10 --tokenizer_initial_nr=1000 --lr=0.08498700039470727385 --batches-per-epoch=20 --val-batches=128 --epochs=13 --task=turnstile --max-seq-len 57 --max-turnstiles 15 --scheduler=cosine --ffn=6,0,,c15,3512910,17_0,COMPLETED,SOBOL,0.528437999999999963307573125348,2248,141,6,15,13,1,3,57,0.084987000394707273853711626543,10,128,20
18,1778317528,18,8eb7b6fd-45a6-44db-a267-3aa0d159f529,1778317546,1778321826,4280,python3 /data/horse/ws/s3811141-grok3/train.py --batch-size=67 --topo --topo-every 1 --kelp-every=1 --n-layers=1 --d-model=3 --n-heads=6 --tokenizer_initial_nr=1000 --lr=0.07398091874812143254 --batches-per-epoch=20 --val-batches=128 --epochs=106 --task=turnstile --max-seq-len 18 --max-turnstiles 35 --scheduler=cosine --ffn=30,0,,c11,3512902,18_0,COMPLETED,SOBOL,0.667648000000000019227286429668,1568,67,30,35,106,1,3,18,0.073980918748121432537701025467,6,128,20
19,,,,,,,,,,,,19_0,RUNNING,SOBOL,,,446,11,24,92,2,2,43,0.028292015097978989723870313355,25,128,20
2026-05-09 11:05:13 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, Started OmniOpt2 run...
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #1/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #2/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #3/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #4/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #5/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #6/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #7/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #8/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #9/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #10/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #11/20
2026-05-09 11:05:14 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #12/20
2026-05-09 11:05:15 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #13/20
2026-05-09 11:05:15 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #14/20
2026-05-09 11:05:15 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #15/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #16/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #17/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #18/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #19/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, getting new HP set #20/20
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, requested 20 jobs, got 20, 0.35 s/job
2026-05-09 11:05:20 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #1/20 start
2026-05-09 11:05:21 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #2/20 start
2026-05-09 11:05:21 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #3/20 start
2026-05-09 11:05:21 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #4/20 start
2026-05-09 11:05:21 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #5/20 start
2026-05-09 11:05:22 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #6/20 start
2026-05-09 11:05:22 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #7/20 start
2026-05-09 11:05:22 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #8/20 start
2026-05-09 11:05:23 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #9/20 start
2026-05-09 11:05:23 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #10/20 start
2026-05-09 11:05:24 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #11/20 start
2026-05-09 11:05:24 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #12/20 start
2026-05-09 11:05:24 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #13/20 start
2026-05-09 11:05:24 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #14/20 start
2026-05-09 11:05:25 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #15/20 start
2026-05-09 11:05:25 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #16/20 start
2026-05-09 11:05:25 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #17/20 start
2026-05-09 11:05:25 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #18/20 start
2026-05-09 11:05:25 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #19/20 start
2026-05-09 11:05:26 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, eval #20/20 start
2026-05-09 11:05:26 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, starting new job
2026-05-09 11:05:27 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 1 = ∑1/20, started new job
2026-05-09 11:05:27 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 1 = ∑1/20, starting new job
2026-05-09 11:05:27 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 2 = ∑2/20, started new job
2026-05-09 11:05:27 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 2 = ∑2/20, starting new job
2026-05-09 11:05:27 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 3 = ∑3/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 3 = ∑3/20, starting new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 4 = ∑4/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 3 = ∑3/20, starting new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 4 = ∑4/20, starting new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 5 = ∑5/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 6 = ∑6/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 6 = ∑6/20, starting new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 2 = ∑2/20, starting new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 7 = ∑7/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 8 = ∑8/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 9 = ∑9/20, started new job
2026-05-09 11:05:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, unknown 10 = ∑10/20, started new job
2026-05-09 11:05:29 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/1 = ∑11/20, started new job
2026-05-09 11:05:29 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/2 = ∑12/20, started new job
2026-05-09 11:05:29 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/3 = ∑13/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/4 = ∑14/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/5 = ∑15/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/6 = ∑16/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/7 = ∑17/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/8 = ∑18/20, started new job
2026-05-09 11:05:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/9 = ∑19/20, started new job
2026-05-09 11:05:31 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/9 = ∑19/20, starting new job
2026-05-09 11:05:31 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/10 = ∑20/20, started new job
2026-05-09 11:05:32 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, pending/unknown 10/10 = ∑20/20, waiting for 20 jobs
2026-05-09 11:05:36 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, running/pending 10/10 = ∑20/20, waiting for 20 jobs
2026-05-09 11:06:12 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, running 20 = ∑20/20, waiting for 20 jobs
2026-05-09 11:13:28 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, running 20 = ∑20/20, new result: VAL_LOSS: 0.528438, PARAMS: 2248.000000
2026-05-09 11:13:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 1 done, running 19 = ∑19/20, waiting for 20 jobs, finished 1 job
2026-05-09 11:13:30 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 1 done, running 19 = ∑19/20, waiting for 19 jobs
2026-05-09 11:30:16 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 1 done, running 19 = ∑19/20, new result: VAL_LOSS: 0.040972, PARAMS: 1086.000000
2026-05-09 11:30:19 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 2 done, running 18 = ∑18/20, waiting for 19 jobs, finished 1 job
2026-05-09 11:30:19 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 2 done, running 18 = ∑18/20, waiting for 18 jobs
2026-05-09 11:33:05 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 2 done, running 18 = ∑18/20, new result: VAL_LOSS: 0.341520, PARAMS: 28602.000000
2026-05-09 11:33:09 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 3 done, running 17 = ∑17/20, waiting for 18 jobs, finished 1 job
2026-05-09 11:33:09 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 3 done, running 17 = ∑17/20, waiting for 17 jobs
2026-05-09 11:52:06 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 3 done, running 17 = ∑17/20, new result: VAL_LOSS: 0.667972, PARAMS: 4498.000000
2026-05-09 11:52:10 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 4 done, running 16 = ∑16/20, waiting for 17 jobs, finished 1 job
2026-05-09 11:52:10 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 4 done, running 16 = ∑16/20, waiting for 16 jobs
2026-05-09 12:17:07 (8eb7b6fd-45a6-44db-a267-3aa0d159f529): SOBOL, 4 done, running 16 = ∑16/20, new result: VAL_LOSS: 0.667648, PARAMS: 1568.000000
This logs the CPU and RAM usage of the main worker process.
timestamp,ram_usage_mb,cpu_usage_percent
1778317509,791.91796875,10.4
1778317569,826.13671875,9.9
1778317629,826.125,9.7
1778317689,834.0078125,11.4
1778317749,833.9375,9.9
1778317809,833.9609375,9.9
1778317869,833.94140625,9.9
1778317929,833.96484375,9.9
1778317989,833.96875,9.9
1778318049,834.51953125,9.9
1778318109,834.55078125,8.6
1778318169,834.546875,11
1778318229,834.55078125,10.2
1778318289,834.5546875,10.2
1778318349,834.53125,10.4
1778318409,834.609375,10.4
1778318469,834.65625,10.4
1778318529,834.70703125,10.3
1778318589,834.8203125,11.9
1778318649,834.875,10.4
1778318709,834.93359375,10.3
1778318769,835,10.4
1778318829,835.078125,10.3
1778318889,835.15625,10.4
1778318949,835.1796875,10.4
1778319009,835.2578125,9.2
1778319069,836.2890625,11.9
1778319129,836.30078125,10.9
1778319189,836.51171875,10.5
1778319249,836.52734375,10.5
1778319309,836.51171875,10.4
1778319369,836.56640625,10.5
1778319429,836.5625,10.6
1778319489,836.5625,10.6
1778319549,836.6328125,9.5
1778319609,836.69921875,10.3
1778319669,836.7890625,10.2
1778319729,836.82421875,10.9
1778319789,836.90625,10.9
1778319849,837.0078125,10.8
1778319909,835.6328125,10.9
1778319969,835.625,10.9
1778320029,835.70703125,10.9
1778320089,835.75,10.8
1778320149,835.80078125,10.9
1778320209,835.8671875,10.9
1778320269,835.95703125,10.8
1778320330,836.48046875,10.9
1778320390,836.5546875,10.9
1778320450,836.55859375,10.9
1778320510,836.5703125,11
1778320570,836.57421875,10.9
1778320630,836.58984375,10.9
1778320690,836.56640625,10.8
1778320750,836.59375,10.8
1778320810,836.6875,10.9
1778320870,836.734375,10.9
1778320930,836.78125,10.8
1778320990,836.86328125,10.9
1778321050,836.890625,10.9
1778321110,836.9609375,10.9
1778321170,837.015625,10.8
1778321230,837.07421875,10.9
1778321290,837.171875,11.1
1778321350,837.2265625,11.6
1778321410,837.265625,11
1778321470,837.3203125,10.8
1778321530,837.3984375,10.9
1778321590,837.47265625,10.9
1778321650,837.51953125,10.8
1778321710,837.60546875,10.9
1778321770,837.68359375,11.4