data: update pretrained checkpoint results (BAR-free tokenizer)
Re-run pre-training results with the corrected 84-token vocabulary and max_seq_len=320. Previous checkpoint was trained on stale data with BAR tokens and a corrupted tokenizer. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -1,51 +1,51 @@
|
||||
epoch,train_loss,val_loss,val_ppl,lr,elapsed_s
|
||||
1,2.031937,0.808181,2.24,2.205000e-04,11.4
|
||||
2,0.641424,0.550909,1.73,2.998721e-04,9.6
|
||||
3,0.523860,0.496441,1.64,2.991598e-04,9.6
|
||||
4,0.485698,0.472027,1.60,2.978255e-04,9.8
|
||||
5,0.464184,0.447461,1.56,2.958747e-04,9.4
|
||||
6,0.445964,0.434830,1.54,2.933156e-04,9.7
|
||||
7,0.431950,0.417041,1.52,2.901587e-04,9.8
|
||||
8,0.417696,0.409715,1.51,2.864174e-04,9.8
|
||||
9,0.405625,0.396861,1.49,2.821072e-04,10.0
|
||||
10,0.394811,0.391014,1.48,2.772460e-04,9.9
|
||||
11,0.384599,0.378818,1.46,2.718542e-04,10.1
|
||||
12,0.376229,0.370746,1.45,2.659542e-04,10.2
|
||||
13,0.366664,0.364249,1.44,2.595706e-04,10.3
|
||||
14,0.358899,0.353221,1.42,2.527301e-04,10.3
|
||||
15,0.351163,0.345543,1.41,2.454612e-04,10.2
|
||||
16,0.344542,0.343143,1.41,2.377941e-04,10.0
|
||||
17,0.337549,0.336707,1.40,2.297610e-04,10.1
|
||||
18,0.331382,0.332268,1.39,2.213952e-04,10.1
|
||||
19,0.325570,0.322937,1.38,2.127316e-04,10.1
|
||||
20,0.318492,0.319304,1.38,2.038065e-04,10.1
|
||||
21,0.313770,0.315012,1.37,1.946569e-04,10.0
|
||||
22,0.307178,0.311228,1.37,1.853211e-04,10.2
|
||||
23,0.302469,0.303362,1.35,1.758381e-04,10.2
|
||||
24,0.297134,0.302971,1.35,1.662472e-04,10.2
|
||||
25,0.292665,0.292786,1.34,1.565886e-04,10.2
|
||||
26,0.287050,0.289937,1.34,1.469026e-04,10.0
|
||||
27,0.282454,0.289310,1.34,1.372294e-04,10.1
|
||||
28,0.278259,0.286254,1.33,1.276095e-04,10.2
|
||||
29,0.274782,0.282411,1.33,1.180830e-04,10.1
|
||||
30,0.270312,0.278289,1.32,1.086896e-04,10.1
|
||||
31,0.267001,0.274995,1.32,9.946846e-05,10.1
|
||||
32,0.263096,0.271817,1.31,9.045806e-05,10.0
|
||||
33,0.260614,0.269074,1.31,8.169597e-05,10.1
|
||||
34,0.257799,0.269102,1.31,7.321873e-05,10.1
|
||||
35,0.253950,0.266719,1.31,6.506170e-05,10.2
|
||||
36,0.251757,0.264989,1.30,5.725888e-05,10.1
|
||||
37,0.249786,0.263033,1.30,4.984283e-05,10.0
|
||||
38,0.247241,0.260050,1.30,4.284447e-05,10.1
|
||||
39,0.245589,0.258710,1.30,3.629298e-05,10.2
|
||||
40,0.243220,0.258440,1.29,3.021569e-05,10.1
|
||||
41,0.242131,0.257187,1.29,2.463794e-05,10.1
|
||||
42,0.240936,0.256695,1.29,1.958300e-05,10.1
|
||||
43,0.239800,0.255997,1.29,1.507193e-05,10.0
|
||||
44,0.238705,0.255310,1.29,1.112356e-05,10.1
|
||||
45,0.238149,0.254971,1.29,7.754357e-06,10.1
|
||||
46,0.237226,0.254995,1.29,4.978363e-06,10.1
|
||||
47,0.236467,0.254608,1.29,2.807158e-06,10.2
|
||||
48,0.236280,0.254222,1.29,1.249797e-06,9.9
|
||||
49,0.235897,0.254291,1.29,3.127754e-07,10.2
|
||||
50,0.237100,0.254293,1.29,0.000000e+00,10.1
|
||||
1,2.043105,0.860380,2.36,2.205000e-04,13.1
|
||||
2,0.682436,0.587271,1.80,2.998721e-04,11.9
|
||||
3,0.567941,0.544875,1.72,2.991598e-04,12.0
|
||||
4,0.529446,0.512912,1.67,2.978255e-04,12.5
|
||||
5,0.505409,0.490817,1.63,2.958747e-04,12.4
|
||||
6,0.484891,0.471718,1.60,2.933156e-04,12.5
|
||||
7,0.467122,0.456903,1.58,2.901587e-04,12.7
|
||||
8,0.450230,0.442813,1.56,2.864174e-04,12.9
|
||||
9,0.435896,0.428490,1.53,2.821072e-04,13.1
|
||||
10,0.425630,0.420062,1.52,2.772460e-04,13.1
|
||||
11,0.414810,0.411151,1.51,2.718542e-04,12.9
|
||||
12,0.405492,0.409687,1.51,2.659542e-04,12.9
|
||||
13,0.396882,0.391923,1.48,2.595706e-04,12.9
|
||||
14,0.387616,0.387274,1.47,2.527301e-04,12.8
|
||||
15,0.379135,0.385116,1.47,2.454612e-04,12.9
|
||||
16,0.371748,0.374518,1.45,2.377941e-04,13.0
|
||||
17,0.364497,0.367260,1.44,2.297610e-04,12.9
|
||||
18,0.357427,0.364524,1.44,2.213952e-04,12.9
|
||||
19,0.350312,0.358540,1.43,2.127316e-04,12.9
|
||||
20,0.342951,0.349801,1.42,2.038065e-04,12.9
|
||||
21,0.337651,0.343782,1.41,1.946569e-04,12.8
|
||||
22,0.330809,0.337008,1.40,1.853211e-04,12.8
|
||||
23,0.324771,0.332336,1.39,1.758381e-04,12.8
|
||||
24,0.319391,0.324907,1.38,1.662472e-04,12.8
|
||||
25,0.314073,0.321501,1.38,1.565886e-04,12.9
|
||||
26,0.309813,0.317718,1.37,1.469026e-04,12.8
|
||||
27,0.304261,0.313438,1.37,1.372294e-04,12.9
|
||||
28,0.299998,0.310763,1.36,1.276095e-04,12.9
|
||||
29,0.295039,0.307241,1.36,1.180830e-04,12.9
|
||||
30,0.290108,0.303446,1.35,1.086896e-04,12.8
|
||||
31,0.288020,0.302041,1.35,9.946846e-05,12.8
|
||||
32,0.283507,0.299317,1.35,9.045806e-05,12.8
|
||||
33,0.280522,0.294816,1.34,8.169597e-05,12.8
|
||||
34,0.275877,0.291919,1.34,7.321873e-05,12.9
|
||||
35,0.273687,0.288819,1.33,6.506170e-05,12.8
|
||||
36,0.270566,0.287831,1.33,5.725888e-05,13.0
|
||||
37,0.267893,0.286515,1.33,4.984283e-05,13.0
|
||||
38,0.265996,0.284756,1.33,4.284447e-05,13.0
|
||||
39,0.264527,0.283663,1.33,3.629298e-05,13.0
|
||||
40,0.262261,0.282717,1.33,3.021569e-05,12.9
|
||||
41,0.260812,0.282175,1.33,2.463794e-05,12.8
|
||||
42,0.258872,0.280704,1.32,1.958300e-05,12.8
|
||||
43,0.257864,0.280204,1.32,1.507193e-05,12.8
|
||||
44,0.256770,0.279358,1.32,1.112356e-05,12.8
|
||||
45,0.254942,0.279263,1.32,7.754357e-06,13.0
|
||||
46,0.255560,0.278873,1.32,4.978363e-06,12.8
|
||||
47,0.255011,0.278650,1.32,2.807158e-06,12.9
|
||||
48,0.254304,0.278583,1.32,1.249797e-06,12.8
|
||||
49,0.252442,0.278481,1.32,3.127754e-07,12.8
|
||||
50,0.253867,0.278494,1.32,0.000000e+00,12.8
|
||||
|
||||
|
Reference in New Issue
Block a user