{ "nbformat": 4, "nbformat_minor": 5, "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "name": "python", "version": "3.11.0" } }, "cells": [ { "cell_type": "markdown", "id": "title", "metadata": {}, "source": [ "# hamori — fine-tuning on personal chord corpus\n", "\n", "This notebook fine-tunes a pre-trained ChordTransformer on your tokenized `.pt` files using Google Colab (GPU T4 recommended).\n", "\n", "**Prerequisites (done locally before uploading):**\n", "- `python scripts/prepare_data.py --input-dir data/raw_user --output-dir data/processed/user`\n", "- `python scripts/make_colab_zip.py --mode finetune`\n", "- Have `checkpoints/pretrained.pt` from a completed pre-training run.\n", "\n", "**Steps:**\n", "1. Check GPU\n", "2. Upload `hamori_colab_finetune.zip`\n", "3. Extract and install dependencies\n", "4. Upload `pretrained.pt` checkpoint\n", "5. Verify processed data\n", "6. Run fine-tuning\n", "7. Inspect results\n", "8. Download checkpoint and logs" ] }, { "cell_type": "code", "execution_count": null, "id": "gpu-check", "metadata": {}, "outputs": [], "source": [ "# ── 1. GPU check ────────────────────────────────────────────────────────────\n", "import torch\n", "if torch.cuda.is_available():\n", " print(f\"GPU: {torch.cuda.get_device_name(0)}\")\n", " print(f\"VRAM: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB\")\n", "else:\n", " print(\"No GPU found — training will be slow on CPU.\")\n", " print(\"Go to Runtime → Change runtime type → T4 GPU and re-run.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "upload-zip", "metadata": {}, "outputs": [], "source": [ "# ── 2. Upload hamori_colab_finetune.zip ──────────────────────────────────────\n", "# Build it locally first:\n", "# python scripts/make_colab_zip.py --mode finetune\n", "from google.colab import files\n", "uploaded = files.upload() # select hamori_colab_finetune.zip\n", "print(\"Uploaded:\", list(uploaded.keys()))" ] }, { "cell_type": "code", "execution_count": null, "id": "extract", "metadata": {}, "outputs": [], "source": [ "# ── 3. Extract and install dependencies ─────────────────────────────────────\n", "import zipfile, os\n", "\n", "WORK_DIR = \"/content/hamori\"\n", "os.makedirs(WORK_DIR, exist_ok=True)\n", "\n", "zip_name = [k for k in uploaded if k.endswith(\".zip\")][0]\n", "with zipfile.ZipFile(zip_name) as zf:\n", " zf.extractall(WORK_DIR)\n", " print(f\"Extracted {len(zf.namelist())} files to {WORK_DIR}\")\n", "\n", "os.chdir(WORK_DIR)\n", "print(\"Working directory:\", os.getcwd())" ] }, { "cell_type": "code", "execution_count": null, "id": "install-deps", "metadata": {}, "outputs": [], "source": [ "# Colab ships torch; only install the extra deps\n", "!pip install -q pretty_midi mido music21 matplotlib" ] }, { "cell_type": "code", "execution_count": null, "id": "upload-checkpoint", "metadata": {}, "outputs": [], "source": [ "# ── 4. Upload pretrained checkpoint ─────────────────────────────────────────\n", "# Skip this cell if you built the zip with --include-checkpoint.\n", "import os\n", "from pathlib import Path\n", "from google.colab import files\n", "\n", "ckpt_path = Path(\"checkpoints/pretrained.pt\")\n", "if ckpt_path.exists():\n", " print(f\"Checkpoint already present: {ckpt_path} ({ckpt_path.stat().st_size / 1e6:.1f} MB)\")\n", "else:\n", " print(\"Upload checkpoints/pretrained.pt from your local machine.\")\n", " uploaded_ckpt = files.upload() # select pretrained.pt\n", " ckpt_path.parent.mkdir(parents=True, exist_ok=True)\n", " src = list(uploaded_ckpt.keys())[0]\n", " os.rename(src, ckpt_path)\n", " print(f\"Saved to {ckpt_path} ({ckpt_path.stat().st_size / 1e6:.1f} MB)\")" ] }, { "cell_type": "code", "execution_count": null, "id": "verify-data", "metadata": {}, "outputs": [], "source": [ "# ── 5. Verify processed user corpus ─────────────────────────────────────────\n", "from pathlib import Path\n", "train_pt = list(Path(\"data/processed/user/train\").glob(\"*.pt\"))\n", "val_pt = list(Path(\"data/processed/user/val\").glob(\"*.pt\"))\n", "print(f\"Train: {len(train_pt)} files\")\n", "print(f\"Val: {len(val_pt)} files\")\n", "if not train_pt:\n", " print()\n", " print(\"ERROR: no training data found.\")\n", " print(\"Run locally first: python scripts/prepare_data.py \")\n", " print(\" --input-dir data/raw_user --output-dir data/processed/user\")\n", " print(\"Then rebuild the zip: python scripts/make_colab_zip.py --mode finetune\")" ] }, { "cell_type": "code", "execution_count": null, "id": "finetune", "metadata": {}, "outputs": [], "source": [ "# ── 6. Fine-tune ─────────────────────────────────────────────────────────────\n", "# Outputs:\n", "# checkpoints/finetuned.pt\n", "# checkpoints/finetuned.log.csv\n", "# checkpoints/finetuned_curves.png\n", "# checkpoints/finetuned.report.txt\n", "!python scripts/train.py" ] }, { "cell_type": "code", "execution_count": null, "id": "show-report", "metadata": {}, "outputs": [], "source": [ "# ── 7a. Show report ───────────────────────────────────────────────────────────\n", "from pathlib import Path\n", "report = Path(\"checkpoints/finetuned.report.txt\")\n", "if report.exists():\n", " print(report.read_text(encoding=\"utf-8\"))\n", "else:\n", " print(\"Report not found — training may have failed.\")" ] }, { "cell_type": "code", "execution_count": null, "id": "show-curves", "metadata": {}, "outputs": [], "source": [ "# ── 7b. Show loss curves ─────────────────────────────────────────────────────\n", "from IPython.display import Image\n", "Image(\"checkpoints/finetuned_curves.png\")" ] }, { "cell_type": "code", "execution_count": null, "id": "download", "metadata": {}, "outputs": [], "source": [ "# ── 8. Download results ───────────────────────────────────────────────────────\n", "import shutil\n", "from google.colab import files\n", "\n", "shutil.make_archive(\"/content/finetune_results\", \"zip\", WORK_DIR, \"checkpoints\")\n", "files.download(\"/content/finetune_results.zip\")" ] } ] }