data: add fine-tuning run results (lr=3e-5, 30 epochs)

val loss 1.19 → 0.77, val perplexity 3.29 → 2.15.
Best epoch 20, early stop at epoch 30 (patience=10).
Improvement over previous lr=1e-5 run (best val ppl 2.22).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-21 20:52:39 +03:00
parent 7c0d147956
commit d9585ec008
3 changed files with 68 additions and 108 deletions
+30 -50
View File
@@ -1,51 +1,31 @@
epoch,train_loss,val_loss,val_ppl,lr,elapsed_s epoch,train_loss,val_loss,val_ppl,lr,elapsed_s
1,1.185128,1.244704,3.47,8.000000e-06,9.9 1,1.173289,1.190454,3.29,2.400000e-05,11.1
2,1.129786,1.168368,3.22,9.994161e-06,9.2 2,1.031668,1.017097,2.77,2.998248e-05,9.3
3,1.074786,1.102074,3.01,9.968238e-06,8.8 3,0.900459,0.914644,2.50,2.990471e-05,8.2
4,1.004364,1.045185,2.84,9.921690e-06,8.5 4,0.826531,0.877679,2.41,2.976507e-05,9.1
5,0.963113,0.995150,2.71,9.854709e-06,9.2 5,0.797045,0.851717,2.34,2.956413e-05,9.2
6,0.903885,0.953348,2.59,9.767574e-06,9.1 6,0.759114,0.836767,2.31,2.930272e-05,8.7
7,0.867560,0.927149,2.53,9.660647e-06,8.1 7,0.736987,0.819369,2.27,2.898194e-05,8.6
8,0.848291,0.911215,2.49,9.534372e-06,9.1 8,0.722803,0.806593,2.24,2.860312e-05,9.1
9,0.817798,0.897135,2.45,9.389272e-06,9.2 9,0.693306,0.797257,2.22,2.816782e-05,9.1
10,0.815333,0.885900,2.43,9.225950e-06,8.2 10,0.683278,0.792332,2.21,2.767785e-05,8.2
11,0.804101,0.876820,2.40,9.045085e-06,9.7 11,0.673061,0.787863,2.20,2.713525e-05,9.1
12,0.791397,0.868702,2.38,8.847427e-06,9.2 12,0.655914,0.782984,2.19,2.654228e-05,9.1
13,0.778087,0.860407,2.36,8.633796e-06,8.5 13,0.643172,0.777573,2.18,2.590139e-05,8.3
14,0.771410,0.853698,2.35,8.405079e-06,10.7 14,0.635985,0.774572,2.17,2.521524e-05,9.0
15,0.768661,0.848427,2.34,8.162227e-06,10.7 15,0.630730,0.773065,2.17,2.448668e-05,9.1
16,0.762623,0.842908,2.32,7.906247e-06,9.4 16,0.622494,0.771514,2.16,2.371874e-05,9.0
17,0.749388,0.837927,2.31,7.638201e-06,8.2 17,0.606942,0.769548,2.16,2.291460e-05,8.1
18,0.745191,0.833950,2.30,7.359203e-06,9.1 18,0.601119,0.768194,2.16,2.207761e-05,9.1
19,0.745993,0.830711,2.29,7.070412e-06,9.2 19,0.601939,0.768208,2.16,2.121123e-05,9.1
20,0.726599,0.827279,2.29,6.773024e-06,9.0 20,0.580447,0.766817,2.15,2.031907e-05,8.2
21,0.722057,0.824505,2.28,6.468277e-06,8.4 21,0.574881,0.767509,2.15,1.940483e-05,9.1
22,0.723956,0.822224,2.28,6.157433e-06,9.2 22,0.576981,0.769625,2.16,1.847230e-05,9.1
23,0.721438,0.819970,2.27,5.841785e-06,9.2 23,0.567170,0.770998,2.16,1.752536e-05,8.6
24,0.717564,0.817087,2.26,5.522642e-06,8.1 24,0.564600,0.771246,2.16,1.656793e-05,8.7
25,0.711637,0.814592,2.26,5.201330e-06,9.2 25,0.556949,0.772251,2.16,1.560399e-05,9.1
26,0.712329,0.812606,2.25,4.879181e-06,9.2 26,0.556080,0.770962,2.16,1.463754e-05,9.0
27,0.705078,0.810716,2.25,4.557534e-06,8.1 27,0.551530,0.769089,2.16,1.367260e-05,8.2
28,0.697062,0.808805,2.25,4.237724e-06,9.2 28,0.542789,0.768548,2.16,1.271317e-05,9.0
29,0.700164,0.807415,2.24,3.921079e-06,10.2 29,0.542809,0.770213,2.16,1.176324e-05,9.1
30,0.695335,0.806321,2.24,3.608913e-06,8.7 30,0.537889,0.771124,2.16,1.082674e-05,8.2
31,0.687925,0.804961,2.24,3.302521e-06,8.9
32,0.691783,0.803864,2.23,3.003177e-06,9.3
33,0.690663,0.802969,2.23,2.712123e-06,9.3
34,0.692106,0.801836,2.23,2.430566e-06,8.1
35,0.691675,0.801080,2.23,2.159676e-06,9.2
36,0.678919,0.800619,2.23,1.900578e-06,9.2
37,0.682140,0.800421,2.23,1.654347e-06,8.2
38,0.689051,0.800192,2.23,1.422005e-06,9.2
39,0.686498,0.799934,2.23,1.204517e-06,9.3
40,0.690576,0.799613,2.22,1.002786e-06,8.2
41,0.684055,0.799269,2.22,8.176493e-07,9.5
42,0.685102,0.799062,2.22,6.498753e-07,9.3
43,0.689241,0.798999,2.22,5.001606e-07,8.6
44,0.689855,0.798879,2.22,3.691268e-07,8.7
45,0.679218,0.798795,2.22,2.573178e-07,9.1
46,0.680892,0.798748,2.22,1.651979e-07,9.1
47,0.688506,0.798713,2.22,9.314937e-08,8.3
48,0.684029,0.798699,2.22,4.147145e-08,9.2
49,0.685409,0.798693,2.22,1.037863e-08,9.2
50,0.684188,0.798692,2.22,0.000000e+00,8.1
1 epoch train_loss val_loss val_ppl lr elapsed_s
2 1 1.185128 1.173289 1.244704 1.190454 3.47 3.29 8.000000e-06 2.400000e-05 9.9 11.1
3 2 1.129786 1.031668 1.168368 1.017097 3.22 2.77 9.994161e-06 2.998248e-05 9.2 9.3
4 3 1.074786 0.900459 1.102074 0.914644 3.01 2.50 9.968238e-06 2.990471e-05 8.8 8.2
5 4 1.004364 0.826531 1.045185 0.877679 2.84 2.41 9.921690e-06 2.976507e-05 8.5 9.1
6 5 0.963113 0.797045 0.995150 0.851717 2.71 2.34 9.854709e-06 2.956413e-05 9.2
7 6 0.903885 0.759114 0.953348 0.836767 2.59 2.31 9.767574e-06 2.930272e-05 9.1 8.7
8 7 0.867560 0.736987 0.927149 0.819369 2.53 2.27 9.660647e-06 2.898194e-05 8.1 8.6
9 8 0.848291 0.722803 0.911215 0.806593 2.49 2.24 9.534372e-06 2.860312e-05 9.1
10 9 0.817798 0.693306 0.897135 0.797257 2.45 2.22 9.389272e-06 2.816782e-05 9.2 9.1
11 10 0.815333 0.683278 0.885900 0.792332 2.43 2.21 9.225950e-06 2.767785e-05 8.2
12 11 0.804101 0.673061 0.876820 0.787863 2.40 2.20 9.045085e-06 2.713525e-05 9.7 9.1
13 12 0.791397 0.655914 0.868702 0.782984 2.38 2.19 8.847427e-06 2.654228e-05 9.2 9.1
14 13 0.778087 0.643172 0.860407 0.777573 2.36 2.18 8.633796e-06 2.590139e-05 8.5 8.3
15 14 0.771410 0.635985 0.853698 0.774572 2.35 2.17 8.405079e-06 2.521524e-05 10.7 9.0
16 15 0.768661 0.630730 0.848427 0.773065 2.34 2.17 8.162227e-06 2.448668e-05 10.7 9.1
17 16 0.762623 0.622494 0.842908 0.771514 2.32 2.16 7.906247e-06 2.371874e-05 9.4 9.0
18 17 0.749388 0.606942 0.837927 0.769548 2.31 2.16 7.638201e-06 2.291460e-05 8.2 8.1
19 18 0.745191 0.601119 0.833950 0.768194 2.30 2.16 7.359203e-06 2.207761e-05 9.1
20 19 0.745993 0.601939 0.830711 0.768208 2.29 2.16 7.070412e-06 2.121123e-05 9.2 9.1
21 20 0.726599 0.580447 0.827279 0.766817 2.29 2.15 6.773024e-06 2.031907e-05 9.0 8.2
22 21 0.722057 0.574881 0.824505 0.767509 2.28 2.15 6.468277e-06 1.940483e-05 8.4 9.1
23 22 0.723956 0.576981 0.822224 0.769625 2.28 2.16 6.157433e-06 1.847230e-05 9.2 9.1
24 23 0.721438 0.567170 0.819970 0.770998 2.27 2.16 5.841785e-06 1.752536e-05 9.2 8.6
25 24 0.717564 0.564600 0.817087 0.771246 2.26 2.16 5.522642e-06 1.656793e-05 8.1 8.7
26 25 0.711637 0.556949 0.814592 0.772251 2.26 2.16 5.201330e-06 1.560399e-05 9.2 9.1
27 26 0.712329 0.556080 0.812606 0.770962 2.25 2.16 4.879181e-06 1.463754e-05 9.2 9.0
28 27 0.705078 0.551530 0.810716 0.769089 2.25 2.16 4.557534e-06 1.367260e-05 8.1 8.2
29 28 0.697062 0.542789 0.808805 0.768548 2.25 2.16 4.237724e-06 1.271317e-05 9.2 9.0
30 29 0.700164 0.542809 0.807415 0.770213 2.24 2.16 3.921079e-06 1.176324e-05 10.2 9.1
31 30 0.695335 0.537889 0.806321 0.771124 2.24 2.16 3.608913e-06 1.082674e-05 8.7 8.2
31 0.687925 0.804961 2.24 3.302521e-06 8.9
32 0.691783 0.803864 2.23 3.003177e-06 9.3
33 0.690663 0.802969 2.23 2.712123e-06 9.3
34 0.692106 0.801836 2.23 2.430566e-06 8.1
35 0.691675 0.801080 2.23 2.159676e-06 9.2
36 0.678919 0.800619 2.23 1.900578e-06 9.2
37 0.682140 0.800421 2.23 1.654347e-06 8.2
38 0.689051 0.800192 2.23 1.422005e-06 9.2
39 0.686498 0.799934 2.23 1.204517e-06 9.3
40 0.690576 0.799613 2.22 1.002786e-06 8.2
41 0.684055 0.799269 2.22 8.176493e-07 9.5
42 0.685102 0.799062 2.22 6.498753e-07 9.3
43 0.689241 0.798999 2.22 5.001606e-07 8.6
44 0.689855 0.798879 2.22 3.691268e-07 8.7
45 0.679218 0.798795 2.22 2.573178e-07 9.1
46 0.680892 0.798748 2.22 1.651979e-07 9.1
47 0.688506 0.798713 2.22 9.314937e-08 8.3
48 0.684029 0.798699 2.22 4.147145e-08 9.2
49 0.685409 0.798693 2.22 1.037863e-08 9.2
50 0.684188 0.798692 2.22 0.000000e+00 8.1
+36 -56
View File
@@ -2,12 +2,12 @@
==================================================== ====================================================
FINE-TUNING REPORT FINE-TUNING REPORT
==================================================== ====================================================
Total epochs run : 50 Total epochs run : 30
Best epoch (val loss) : 50 Best epoch (val loss) : 20
Convergence epoch : 30 (val ≤ best+1 %) Convergence epoch : 15 (val ≤ best+1 %)
Best val loss : 0.7987 Best val loss : 0.7668
Best val perplexity : 2.22 Best val perplexity : 2.15
Final train loss : 0.6842 Final train loss : 0.5379
Unique parameters : 1,396,416 Unique parameters : 1,396,416
Checkpoint : checkpoints/finetuned.pt Checkpoint : checkpoints/finetuned.pt
Log CSV : checkpoints/finetuned.log.csv Log CSV : checkpoints/finetuned.log.csv
@@ -15,53 +15,33 @@
epoch train val ppl lr epoch train val ppl lr
----- -------- -------- ------- ---------- ----- -------- -------- ------- ----------
1 1.1851 1.2447 3.47 8.00e-06 1 1.1733 1.1905 3.29 2.40e-05
2 1.1298 1.1684 3.22 9.99e-06 2 1.0317 1.0171 2.77 3.00e-05
3 1.0748 1.1021 3.01 9.97e-06 3 0.9005 0.9146 2.50 2.99e-05
4 1.0044 1.0452 2.84 9.92e-06 4 0.8265 0.8777 2.41 2.98e-05
5 0.9631 0.9951 2.71 9.85e-06 5 0.7970 0.8517 2.34 2.96e-05
6 0.9039 0.9533 2.59 9.77e-06 6 0.7591 0.8368 2.31 2.93e-05
7 0.8676 0.9271 2.53 9.66e-06 7 0.7370 0.8194 2.27 2.90e-05
8 0.8483 0.9112 2.49 9.53e-06 8 0.7228 0.8066 2.24 2.86e-05
9 0.8178 0.8971 2.45 9.39e-06 9 0.6933 0.7973 2.22 2.82e-05
10 0.8153 0.8859 2.43 9.23e-06 10 0.6833 0.7923 2.21 2.77e-05
11 0.8041 0.8768 2.40 9.05e-06 11 0.6731 0.7879 2.20 2.71e-05
12 0.7914 0.8687 2.38 8.85e-06 12 0.6559 0.7830 2.19 2.65e-05
13 0.7781 0.8604 2.36 8.63e-06 13 0.6432 0.7776 2.18 2.59e-05
14 0.7714 0.8537 2.35 8.41e-06 14 0.6360 0.7746 2.17 2.52e-05
15 0.7687 0.8484 2.34 8.16e-06 15 0.6307 0.7731 2.17 2.45e-05
16 0.7626 0.8429 2.32 7.91e-06 16 0.6225 0.7715 2.16 2.37e-05
17 0.7494 0.8379 2.31 7.64e-06 17 0.6069 0.7695 2.16 2.29e-05
18 0.7452 0.8339 2.30 7.36e-06 18 0.6011 0.7682 2.16 2.21e-05
19 0.7460 0.8307 2.29 7.07e-06 19 0.6019 0.7682 2.16 2.12e-05
20 0.7266 0.8273 2.29 6.77e-06 20 0.5804 0.7668 2.15 2.03e-05 ←
21 0.7221 0.8245 2.28 6.47e-06 21 0.5749 0.7675 2.15 1.94e-05
22 0.7240 0.8222 2.28 6.16e-06 22 0.5770 0.7696 2.16 1.85e-05
23 0.7214 0.8200 2.27 5.84e-06 23 0.5672 0.7710 2.16 1.75e-05
24 0.7176 0.8171 2.26 5.52e-06 24 0.5646 0.7712 2.16 1.66e-05
25 0.7116 0.8146 2.26 5.20e-06 25 0.5569 0.7723 2.16 1.56e-05
26 0.7123 0.8126 2.25 4.88e-06 26 0.5561 0.7710 2.16 1.46e-05
27 0.7051 0.8107 2.25 4.56e-06 27 0.5515 0.7691 2.16 1.37e-05
28 0.6971 0.8088 2.25 4.24e-06 28 0.5428 0.7685 2.16 1.27e-05
29 0.7002 0.8074 2.24 3.92e-06 29 0.5428 0.7702 2.16 1.18e-05
30 0.6953 0.8063 2.24 3.61e-06 30 0.5379 0.7711 2.16 1.08e-05
31 0.6879 0.8050 2.24 3.30e-06
32 0.6918 0.8039 2.23 3.00e-06
33 0.6907 0.8030 2.23 2.71e-06
34 0.6921 0.8018 2.23 2.43e-06
35 0.6917 0.8011 2.23 2.16e-06
36 0.6789 0.8006 2.23 1.90e-06
37 0.6821 0.8004 2.23 1.65e-06
38 0.6891 0.8002 2.23 1.42e-06
39 0.6865 0.7999 2.23 1.20e-06
40 0.6906 0.7996 2.22 1.00e-06
41 0.6841 0.7993 2.22 8.18e-07
42 0.6851 0.7991 2.22 6.50e-07
43 0.6892 0.7990 2.22 5.00e-07
44 0.6899 0.7989 2.22 3.69e-07
45 0.6792 0.7988 2.22 2.57e-07
46 0.6809 0.7987 2.22 1.65e-07
47 0.6885 0.7987 2.22 9.31e-08
48 0.6840 0.7987 2.22 4.15e-08
49 0.6854 0.7987 2.22 1.04e-08
50 0.6842 0.7987 2.22 0.00e+00 ←
Binary file not shown.