data: add fine-tuning run results (lr=3e-5, 30 epochs)
val loss 1.19 → 0.77, val perplexity 3.29 → 2.15. Best epoch 20, early stop at epoch 30 (patience=10). Improvement over previous lr=1e-5 run (best val ppl 2.22). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,51 +1,31 @@
|
||||
epoch,train_loss,val_loss,val_ppl,lr,elapsed_s
|
||||
1,1.185128,1.244704,3.47,8.000000e-06,9.9
|
||||
2,1.129786,1.168368,3.22,9.994161e-06,9.2
|
||||
3,1.074786,1.102074,3.01,9.968238e-06,8.8
|
||||
4,1.004364,1.045185,2.84,9.921690e-06,8.5
|
||||
5,0.963113,0.995150,2.71,9.854709e-06,9.2
|
||||
6,0.903885,0.953348,2.59,9.767574e-06,9.1
|
||||
7,0.867560,0.927149,2.53,9.660647e-06,8.1
|
||||
8,0.848291,0.911215,2.49,9.534372e-06,9.1
|
||||
9,0.817798,0.897135,2.45,9.389272e-06,9.2
|
||||
10,0.815333,0.885900,2.43,9.225950e-06,8.2
|
||||
11,0.804101,0.876820,2.40,9.045085e-06,9.7
|
||||
12,0.791397,0.868702,2.38,8.847427e-06,9.2
|
||||
13,0.778087,0.860407,2.36,8.633796e-06,8.5
|
||||
14,0.771410,0.853698,2.35,8.405079e-06,10.7
|
||||
15,0.768661,0.848427,2.34,8.162227e-06,10.7
|
||||
16,0.762623,0.842908,2.32,7.906247e-06,9.4
|
||||
17,0.749388,0.837927,2.31,7.638201e-06,8.2
|
||||
18,0.745191,0.833950,2.30,7.359203e-06,9.1
|
||||
19,0.745993,0.830711,2.29,7.070412e-06,9.2
|
||||
20,0.726599,0.827279,2.29,6.773024e-06,9.0
|
||||
21,0.722057,0.824505,2.28,6.468277e-06,8.4
|
||||
22,0.723956,0.822224,2.28,6.157433e-06,9.2
|
||||
23,0.721438,0.819970,2.27,5.841785e-06,9.2
|
||||
24,0.717564,0.817087,2.26,5.522642e-06,8.1
|
||||
25,0.711637,0.814592,2.26,5.201330e-06,9.2
|
||||
26,0.712329,0.812606,2.25,4.879181e-06,9.2
|
||||
27,0.705078,0.810716,2.25,4.557534e-06,8.1
|
||||
28,0.697062,0.808805,2.25,4.237724e-06,9.2
|
||||
29,0.700164,0.807415,2.24,3.921079e-06,10.2
|
||||
30,0.695335,0.806321,2.24,3.608913e-06,8.7
|
||||
31,0.687925,0.804961,2.24,3.302521e-06,8.9
|
||||
32,0.691783,0.803864,2.23,3.003177e-06,9.3
|
||||
33,0.690663,0.802969,2.23,2.712123e-06,9.3
|
||||
34,0.692106,0.801836,2.23,2.430566e-06,8.1
|
||||
35,0.691675,0.801080,2.23,2.159676e-06,9.2
|
||||
36,0.678919,0.800619,2.23,1.900578e-06,9.2
|
||||
37,0.682140,0.800421,2.23,1.654347e-06,8.2
|
||||
38,0.689051,0.800192,2.23,1.422005e-06,9.2
|
||||
39,0.686498,0.799934,2.23,1.204517e-06,9.3
|
||||
40,0.690576,0.799613,2.22,1.002786e-06,8.2
|
||||
41,0.684055,0.799269,2.22,8.176493e-07,9.5
|
||||
42,0.685102,0.799062,2.22,6.498753e-07,9.3
|
||||
43,0.689241,0.798999,2.22,5.001606e-07,8.6
|
||||
44,0.689855,0.798879,2.22,3.691268e-07,8.7
|
||||
45,0.679218,0.798795,2.22,2.573178e-07,9.1
|
||||
46,0.680892,0.798748,2.22,1.651979e-07,9.1
|
||||
47,0.688506,0.798713,2.22,9.314937e-08,8.3
|
||||
48,0.684029,0.798699,2.22,4.147145e-08,9.2
|
||||
49,0.685409,0.798693,2.22,1.037863e-08,9.2
|
||||
50,0.684188,0.798692,2.22,0.000000e+00,8.1
|
||||
1,1.173289,1.190454,3.29,2.400000e-05,11.1
|
||||
2,1.031668,1.017097,2.77,2.998248e-05,9.3
|
||||
3,0.900459,0.914644,2.50,2.990471e-05,8.2
|
||||
4,0.826531,0.877679,2.41,2.976507e-05,9.1
|
||||
5,0.797045,0.851717,2.34,2.956413e-05,9.2
|
||||
6,0.759114,0.836767,2.31,2.930272e-05,8.7
|
||||
7,0.736987,0.819369,2.27,2.898194e-05,8.6
|
||||
8,0.722803,0.806593,2.24,2.860312e-05,9.1
|
||||
9,0.693306,0.797257,2.22,2.816782e-05,9.1
|
||||
10,0.683278,0.792332,2.21,2.767785e-05,8.2
|
||||
11,0.673061,0.787863,2.20,2.713525e-05,9.1
|
||||
12,0.655914,0.782984,2.19,2.654228e-05,9.1
|
||||
13,0.643172,0.777573,2.18,2.590139e-05,8.3
|
||||
14,0.635985,0.774572,2.17,2.521524e-05,9.0
|
||||
15,0.630730,0.773065,2.17,2.448668e-05,9.1
|
||||
16,0.622494,0.771514,2.16,2.371874e-05,9.0
|
||||
17,0.606942,0.769548,2.16,2.291460e-05,8.1
|
||||
18,0.601119,0.768194,2.16,2.207761e-05,9.1
|
||||
19,0.601939,0.768208,2.16,2.121123e-05,9.1
|
||||
20,0.580447,0.766817,2.15,2.031907e-05,8.2
|
||||
21,0.574881,0.767509,2.15,1.940483e-05,9.1
|
||||
22,0.576981,0.769625,2.16,1.847230e-05,9.1
|
||||
23,0.567170,0.770998,2.16,1.752536e-05,8.6
|
||||
24,0.564600,0.771246,2.16,1.656793e-05,8.7
|
||||
25,0.556949,0.772251,2.16,1.560399e-05,9.1
|
||||
26,0.556080,0.770962,2.16,1.463754e-05,9.0
|
||||
27,0.551530,0.769089,2.16,1.367260e-05,8.2
|
||||
28,0.542789,0.768548,2.16,1.271317e-05,9.0
|
||||
29,0.542809,0.770213,2.16,1.176324e-05,9.1
|
||||
30,0.537889,0.771124,2.16,1.082674e-05,8.2
|
||||
|
||||
|
Reference in New Issue
Block a user