8f657ca916
make_colab_zip.py now accepts --mode pretrain|finetune (default: pretrain).
Finetune mode bundles scripts/train.py + data/processed/user/{train,val}/*.pt
plus an optional --include-checkpoint flag for pretrained.pt.
notebooks/colab_finetune.ipynb covers the full Colab fine-tuning workflow:
upload zip → upload pretrained.pt → verify data → train → inspect → download.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
7.6 KiB
7.6 KiB
hamori — fine-tuning on personal chord corpus¶
This notebook fine-tunes a pre-trained ChordTransformer on your tokenized .pt files using Google Colab (GPU T4 recommended).
Prerequisites (done locally before uploading):
python scripts/prepare_data.py --input-dir data/raw_user --output-dir data/processed/userpython scripts/make_colab_zip.py --mode finetune- Have
checkpoints/pretrained.ptfrom a completed pre-training run.
Steps:
- Check GPU
- Upload
hamori_colab_finetune.zip - Extract and install dependencies
- Upload
pretrained.ptcheckpoint - Verify processed data
- Run fine-tuning
- Inspect results
- Download checkpoint and logs
In [ ]:
# ── 1. GPU check ────────────────────────────────────────────────────────────
import torch
if torch.cuda.is_available():
print(f"GPU: {torch.cuda.get_device_name(0)}")
print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
else:
print("No GPU found — training will be slow on CPU.")
print("Go to Runtime → Change runtime type → T4 GPU and re-run.")
In [ ]:
# ── 2. Upload hamori_colab_finetune.zip ──────────────────────────────────────
# Build it locally first:
# python scripts/make_colab_zip.py --mode finetune
from google.colab import files
uploaded = files.upload() # select hamori_colab_finetune.zip
print("Uploaded:", list(uploaded.keys()))
In [ ]:
# ── 3. Extract and install dependencies ─────────────────────────────────────
import zipfile, os
WORK_DIR = "/content/hamori"
os.makedirs(WORK_DIR, exist_ok=True)
zip_name = [k for k in uploaded if k.endswith(".zip")][0]
with zipfile.ZipFile(zip_name) as zf:
zf.extractall(WORK_DIR)
print(f"Extracted {len(zf.namelist())} files to {WORK_DIR}")
os.chdir(WORK_DIR)
print("Working directory:", os.getcwd())
In [ ]:
# Colab ships torch; only install the extra deps
!pip install -q pretty_midi mido music21 matplotlib
In [ ]:
# ── 4. Upload pretrained checkpoint ─────────────────────────────────────────
# Skip this cell if you built the zip with --include-checkpoint.
import os
from pathlib import Path
from google.colab import files
ckpt_path = Path("checkpoints/pretrained.pt")
if ckpt_path.exists():
print(f"Checkpoint already present: {ckpt_path} ({ckpt_path.stat().st_size / 1e6:.1f} MB)")
else:
print("Upload checkpoints/pretrained.pt from your local machine.")
uploaded_ckpt = files.upload() # select pretrained.pt
ckpt_path.parent.mkdir(parents=True, exist_ok=True)
src = list(uploaded_ckpt.keys())[0]
os.rename(src, ckpt_path)
print(f"Saved to {ckpt_path} ({ckpt_path.stat().st_size / 1e6:.1f} MB)")
In [ ]:
# ── 5. Verify processed user corpus ─────────────────────────────────────────
from pathlib import Path
train_pt = list(Path("data/processed/user/train").glob("*.pt"))
val_pt = list(Path("data/processed/user/val").glob("*.pt"))
print(f"Train: {len(train_pt)} files")
print(f"Val: {len(val_pt)} files")
if not train_pt:
print()
print("ERROR: no training data found.")
print("Run locally first: python scripts/prepare_data.py ")
print(" --input-dir data/raw_user --output-dir data/processed/user")
print("Then rebuild the zip: python scripts/make_colab_zip.py --mode finetune")
In [ ]:
# ── 6. Fine-tune ─────────────────────────────────────────────────────────────
# Outputs:
# checkpoints/finetuned.pt
# checkpoints/finetuned.log.csv
# checkpoints/finetuned_curves.png
# checkpoints/finetuned.report.txt
!python scripts/train.py
In [ ]:
# ── 7a. Show report ───────────────────────────────────────────────────────────
from pathlib import Path
report = Path("checkpoints/finetuned.report.txt")
if report.exists():
print(report.read_text(encoding="utf-8"))
else:
print("Report not found — training may have failed.")
In [ ]:
# ── 7b. Show loss curves ─────────────────────────────────────────────────────
from IPython.display import Image
Image("checkpoints/finetuned_curves.png")
In [ ]:
# ── 8. Download results ───────────────────────────────────────────────────────
import shutil
from google.colab import files
shutil.make_archive("/content/finetune_results", "zip", WORK_DIR, "checkpoints")
files.download("/content/finetune_results.zip")