From 329952b02e46e23955bacdd40cf19214ded4a3ee Mon Sep 17 00:00:00 2001 From: Masahiko AMANO Date: Wed, 20 May 2026 13:10:34 +0300 Subject: [PATCH] data: add pre-training results from Google Colab run Includes log CSV (50 epochs), loss-curve plot, and report. Training ran on Colab GPU (T4). Co-Authored-By: Claude Sonnet 4.6 --- checkpoints/pretrained.log.csv | 51 +++++++++++++++++++++++ checkpoints/pretrained.report.txt | 67 +++++++++++++++++++++++++++++++ checkpoints/pretrained_curves.png | 3 ++ 3 files changed, 121 insertions(+) create mode 100644 checkpoints/pretrained.log.csv create mode 100644 checkpoints/pretrained.report.txt create mode 100644 checkpoints/pretrained_curves.png diff --git a/checkpoints/pretrained.log.csv b/checkpoints/pretrained.log.csv new file mode 100644 index 0000000..cd6218a --- /dev/null +++ b/checkpoints/pretrained.log.csv @@ -0,0 +1,51 @@ +epoch,train_loss,val_loss,val_ppl,lr,elapsed_s +1,2.031937,0.808181,2.24,2.205000e-04,11.4 +2,0.641424,0.550909,1.73,2.998721e-04,9.6 +3,0.523860,0.496441,1.64,2.991598e-04,9.6 +4,0.485698,0.472027,1.60,2.978255e-04,9.8 +5,0.464184,0.447461,1.56,2.958747e-04,9.4 +6,0.445964,0.434830,1.54,2.933156e-04,9.7 +7,0.431950,0.417041,1.52,2.901587e-04,9.8 +8,0.417696,0.409715,1.51,2.864174e-04,9.8 +9,0.405625,0.396861,1.49,2.821072e-04,10.0 +10,0.394811,0.391014,1.48,2.772460e-04,9.9 +11,0.384599,0.378818,1.46,2.718542e-04,10.1 +12,0.376229,0.370746,1.45,2.659542e-04,10.2 +13,0.366664,0.364249,1.44,2.595706e-04,10.3 +14,0.358899,0.353221,1.42,2.527301e-04,10.3 +15,0.351163,0.345543,1.41,2.454612e-04,10.2 +16,0.344542,0.343143,1.41,2.377941e-04,10.0 +17,0.337549,0.336707,1.40,2.297610e-04,10.1 +18,0.331382,0.332268,1.39,2.213952e-04,10.1 +19,0.325570,0.322937,1.38,2.127316e-04,10.1 +20,0.318492,0.319304,1.38,2.038065e-04,10.1 +21,0.313770,0.315012,1.37,1.946569e-04,10.0 +22,0.307178,0.311228,1.37,1.853211e-04,10.2 +23,0.302469,0.303362,1.35,1.758381e-04,10.2 +24,0.297134,0.302971,1.35,1.662472e-04,10.2 +25,0.292665,0.292786,1.34,1.565886e-04,10.2 +26,0.287050,0.289937,1.34,1.469026e-04,10.0 +27,0.282454,0.289310,1.34,1.372294e-04,10.1 +28,0.278259,0.286254,1.33,1.276095e-04,10.2 +29,0.274782,0.282411,1.33,1.180830e-04,10.1 +30,0.270312,0.278289,1.32,1.086896e-04,10.1 +31,0.267001,0.274995,1.32,9.946846e-05,10.1 +32,0.263096,0.271817,1.31,9.045806e-05,10.0 +33,0.260614,0.269074,1.31,8.169597e-05,10.1 +34,0.257799,0.269102,1.31,7.321873e-05,10.1 +35,0.253950,0.266719,1.31,6.506170e-05,10.2 +36,0.251757,0.264989,1.30,5.725888e-05,10.1 +37,0.249786,0.263033,1.30,4.984283e-05,10.0 +38,0.247241,0.260050,1.30,4.284447e-05,10.1 +39,0.245589,0.258710,1.30,3.629298e-05,10.2 +40,0.243220,0.258440,1.29,3.021569e-05,10.1 +41,0.242131,0.257187,1.29,2.463794e-05,10.1 +42,0.240936,0.256695,1.29,1.958300e-05,10.1 +43,0.239800,0.255997,1.29,1.507193e-05,10.0 +44,0.238705,0.255310,1.29,1.112356e-05,10.1 +45,0.238149,0.254971,1.29,7.754357e-06,10.1 +46,0.237226,0.254995,1.29,4.978363e-06,10.1 +47,0.236467,0.254608,1.29,2.807158e-06,10.2 +48,0.236280,0.254222,1.29,1.249797e-06,9.9 +49,0.235897,0.254291,1.29,3.127754e-07,10.2 +50,0.237100,0.254293,1.29,0.000000e+00,10.1 diff --git a/checkpoints/pretrained.report.txt b/checkpoints/pretrained.report.txt new file mode 100644 index 0000000..6539626 --- /dev/null +++ b/checkpoints/pretrained.report.txt @@ -0,0 +1,67 @@ + +==================================================== + PRE-TRAINING REPORT +==================================================== + Total epochs run : 50 + Best epoch (val loss) : 48 + Convergence epoch : 42 (val ≤ best+1 %) + Best val loss : 0.2542 + Best val perplexity : 1.29 + Final train loss : 0.2371 + Unique parameters : 1,384,128 + Checkpoint : checkpoints/pretrained.pt + Log CSV : checkpoints/pretrained.log.csv +==================================================== + + epoch train val ppl lr + ----- -------- -------- ------- ---------- + 1 2.0319 0.8082 2.24 2.20e-04 + 2 0.6414 0.5509 1.73 3.00e-04 + 3 0.5239 0.4964 1.64 2.99e-04 + 4 0.4857 0.4720 1.60 2.98e-04 + 5 0.4642 0.4475 1.56 2.96e-04 + 6 0.4460 0.4348 1.54 2.93e-04 + 7 0.4320 0.4170 1.52 2.90e-04 + 8 0.4177 0.4097 1.51 2.86e-04 + 9 0.4056 0.3969 1.49 2.82e-04 + 10 0.3948 0.3910 1.48 2.77e-04 + 11 0.3846 0.3788 1.46 2.72e-04 + 12 0.3762 0.3707 1.45 2.66e-04 + 13 0.3667 0.3642 1.44 2.60e-04 + 14 0.3589 0.3532 1.42 2.53e-04 + 15 0.3512 0.3455 1.41 2.45e-04 + 16 0.3445 0.3431 1.41 2.38e-04 + 17 0.3375 0.3367 1.40 2.30e-04 + 18 0.3314 0.3323 1.39 2.21e-04 + 19 0.3256 0.3229 1.38 2.13e-04 + 20 0.3185 0.3193 1.38 2.04e-04 + 21 0.3138 0.3150 1.37 1.95e-04 + 22 0.3072 0.3112 1.37 1.85e-04 + 23 0.3025 0.3034 1.35 1.76e-04 + 24 0.2971 0.3030 1.35 1.66e-04 + 25 0.2927 0.2928 1.34 1.57e-04 + 26 0.2871 0.2899 1.34 1.47e-04 + 27 0.2825 0.2893 1.34 1.37e-04 + 28 0.2783 0.2863 1.33 1.28e-04 + 29 0.2748 0.2824 1.33 1.18e-04 + 30 0.2703 0.2783 1.32 1.09e-04 + 31 0.2670 0.2750 1.32 9.95e-05 + 32 0.2631 0.2718 1.31 9.05e-05 + 33 0.2606 0.2691 1.31 8.17e-05 + 34 0.2578 0.2691 1.31 7.32e-05 + 35 0.2540 0.2667 1.31 6.51e-05 + 36 0.2518 0.2650 1.30 5.73e-05 + 37 0.2498 0.2630 1.30 4.98e-05 + 38 0.2472 0.2601 1.30 4.28e-05 + 39 0.2456 0.2587 1.30 3.63e-05 + 40 0.2432 0.2584 1.29 3.02e-05 + 41 0.2421 0.2572 1.29 2.46e-05 + 42 0.2409 0.2567 1.29 1.96e-05 + 43 0.2398 0.2560 1.29 1.51e-05 + 44 0.2387 0.2553 1.29 1.11e-05 + 45 0.2381 0.2550 1.29 7.75e-06 + 46 0.2372 0.2550 1.29 4.98e-06 + 47 0.2365 0.2546 1.29 2.81e-06 + 48 0.2363 0.2542 1.29 1.25e-06 ← + 49 0.2359 0.2543 1.29 3.13e-07 + 50 0.2371 0.2543 1.29 0.00e+00 diff --git a/checkpoints/pretrained_curves.png b/checkpoints/pretrained_curves.png new file mode 100644 index 0000000..3e49afa --- /dev/null +++ b/checkpoints/pretrained_curves.png @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bf9dbd3467054ed286e94de37a9417d896c9400cbeb88039c295a9b23e28c6a +size 67980