diff --git a/scripts/train.py b/scripts/train.py index e267927..143d8a1 100644 --- a/scripts/train.py +++ b/scripts/train.py @@ -55,10 +55,10 @@ TRAIN_CFG = TrainConfig( output=CHECKPOINT, init_from=INIT_FROM, # Small corpus (~45 train files) → ~6 batches/epoch. - # 50 epochs × 6 = ~300 gradient steps; patience=10 gives a 60-step window. - epochs=50, + # 30 epochs × 6 = ~180 gradient steps; patience=10 gives a 60-step window. + epochs=30, batch_size=8, - lr=1e-5, + lr=3e-5, warmup_steps=10, patience=10, seed=42,