data: add pre-training results from Google Colab run

Includes log CSV (50 epochs), loss-curve plot, and report.
Training ran on Colab GPU (T4).

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-20 13:10:34 +03:00
parent 89770dd009
commit 329952b02e
3 changed files with 121 additions and 0 deletions
+51
View File
@@ -0,0 +1,51 @@
epoch,train_loss,val_loss,val_ppl,lr,elapsed_s
1,2.031937,0.808181,2.24,2.205000e-04,11.4
2,0.641424,0.550909,1.73,2.998721e-04,9.6
3,0.523860,0.496441,1.64,2.991598e-04,9.6
4,0.485698,0.472027,1.60,2.978255e-04,9.8
5,0.464184,0.447461,1.56,2.958747e-04,9.4
6,0.445964,0.434830,1.54,2.933156e-04,9.7
7,0.431950,0.417041,1.52,2.901587e-04,9.8
8,0.417696,0.409715,1.51,2.864174e-04,9.8
9,0.405625,0.396861,1.49,2.821072e-04,10.0
10,0.394811,0.391014,1.48,2.772460e-04,9.9
11,0.384599,0.378818,1.46,2.718542e-04,10.1
12,0.376229,0.370746,1.45,2.659542e-04,10.2
13,0.366664,0.364249,1.44,2.595706e-04,10.3
14,0.358899,0.353221,1.42,2.527301e-04,10.3
15,0.351163,0.345543,1.41,2.454612e-04,10.2
16,0.344542,0.343143,1.41,2.377941e-04,10.0
17,0.337549,0.336707,1.40,2.297610e-04,10.1
18,0.331382,0.332268,1.39,2.213952e-04,10.1
19,0.325570,0.322937,1.38,2.127316e-04,10.1
20,0.318492,0.319304,1.38,2.038065e-04,10.1
21,0.313770,0.315012,1.37,1.946569e-04,10.0
22,0.307178,0.311228,1.37,1.853211e-04,10.2
23,0.302469,0.303362,1.35,1.758381e-04,10.2
24,0.297134,0.302971,1.35,1.662472e-04,10.2
25,0.292665,0.292786,1.34,1.565886e-04,10.2
26,0.287050,0.289937,1.34,1.469026e-04,10.0
27,0.282454,0.289310,1.34,1.372294e-04,10.1
28,0.278259,0.286254,1.33,1.276095e-04,10.2
29,0.274782,0.282411,1.33,1.180830e-04,10.1
30,0.270312,0.278289,1.32,1.086896e-04,10.1
31,0.267001,0.274995,1.32,9.946846e-05,10.1
32,0.263096,0.271817,1.31,9.045806e-05,10.0
33,0.260614,0.269074,1.31,8.169597e-05,10.1
34,0.257799,0.269102,1.31,7.321873e-05,10.1
35,0.253950,0.266719,1.31,6.506170e-05,10.2
36,0.251757,0.264989,1.30,5.725888e-05,10.1
37,0.249786,0.263033,1.30,4.984283e-05,10.0
38,0.247241,0.260050,1.30,4.284447e-05,10.1
39,0.245589,0.258710,1.30,3.629298e-05,10.2
40,0.243220,0.258440,1.29,3.021569e-05,10.1
41,0.242131,0.257187,1.29,2.463794e-05,10.1
42,0.240936,0.256695,1.29,1.958300e-05,10.1
43,0.239800,0.255997,1.29,1.507193e-05,10.0
44,0.238705,0.255310,1.29,1.112356e-05,10.1
45,0.238149,0.254971,1.29,7.754357e-06,10.1
46,0.237226,0.254995,1.29,4.978363e-06,10.1
47,0.236467,0.254608,1.29,2.807158e-06,10.2
48,0.236280,0.254222,1.29,1.249797e-06,9.9
49,0.235897,0.254291,1.29,3.127754e-07,10.2
50,0.237100,0.254293,1.29,0.000000e+00,10.1
1 epoch train_loss val_loss val_ppl lr elapsed_s
2 1 2.031937 0.808181 2.24 2.205000e-04 11.4
3 2 0.641424 0.550909 1.73 2.998721e-04 9.6
4 3 0.523860 0.496441 1.64 2.991598e-04 9.6
5 4 0.485698 0.472027 1.60 2.978255e-04 9.8
6 5 0.464184 0.447461 1.56 2.958747e-04 9.4
7 6 0.445964 0.434830 1.54 2.933156e-04 9.7
8 7 0.431950 0.417041 1.52 2.901587e-04 9.8
9 8 0.417696 0.409715 1.51 2.864174e-04 9.8
10 9 0.405625 0.396861 1.49 2.821072e-04 10.0
11 10 0.394811 0.391014 1.48 2.772460e-04 9.9
12 11 0.384599 0.378818 1.46 2.718542e-04 10.1
13 12 0.376229 0.370746 1.45 2.659542e-04 10.2
14 13 0.366664 0.364249 1.44 2.595706e-04 10.3
15 14 0.358899 0.353221 1.42 2.527301e-04 10.3
16 15 0.351163 0.345543 1.41 2.454612e-04 10.2
17 16 0.344542 0.343143 1.41 2.377941e-04 10.0
18 17 0.337549 0.336707 1.40 2.297610e-04 10.1
19 18 0.331382 0.332268 1.39 2.213952e-04 10.1
20 19 0.325570 0.322937 1.38 2.127316e-04 10.1
21 20 0.318492 0.319304 1.38 2.038065e-04 10.1
22 21 0.313770 0.315012 1.37 1.946569e-04 10.0
23 22 0.307178 0.311228 1.37 1.853211e-04 10.2
24 23 0.302469 0.303362 1.35 1.758381e-04 10.2
25 24 0.297134 0.302971 1.35 1.662472e-04 10.2
26 25 0.292665 0.292786 1.34 1.565886e-04 10.2
27 26 0.287050 0.289937 1.34 1.469026e-04 10.0
28 27 0.282454 0.289310 1.34 1.372294e-04 10.1
29 28 0.278259 0.286254 1.33 1.276095e-04 10.2
30 29 0.274782 0.282411 1.33 1.180830e-04 10.1
31 30 0.270312 0.278289 1.32 1.086896e-04 10.1
32 31 0.267001 0.274995 1.32 9.946846e-05 10.1
33 32 0.263096 0.271817 1.31 9.045806e-05 10.0
34 33 0.260614 0.269074 1.31 8.169597e-05 10.1
35 34 0.257799 0.269102 1.31 7.321873e-05 10.1
36 35 0.253950 0.266719 1.31 6.506170e-05 10.2
37 36 0.251757 0.264989 1.30 5.725888e-05 10.1
38 37 0.249786 0.263033 1.30 4.984283e-05 10.0
39 38 0.247241 0.260050 1.30 4.284447e-05 10.1
40 39 0.245589 0.258710 1.30 3.629298e-05 10.2
41 40 0.243220 0.258440 1.29 3.021569e-05 10.1
42 41 0.242131 0.257187 1.29 2.463794e-05 10.1
43 42 0.240936 0.256695 1.29 1.958300e-05 10.1
44 43 0.239800 0.255997 1.29 1.507193e-05 10.0
45 44 0.238705 0.255310 1.29 1.112356e-05 10.1
46 45 0.238149 0.254971 1.29 7.754357e-06 10.1
47 46 0.237226 0.254995 1.29 4.978363e-06 10.1
48 47 0.236467 0.254608 1.29 2.807158e-06 10.2
49 48 0.236280 0.254222 1.29 1.249797e-06 9.9
50 49 0.235897 0.254291 1.29 3.127754e-07 10.2
51 50 0.237100 0.254293 1.29 0.000000e+00 10.1