From c4dd2fb690ffb8e0aff8bf37987788b1d208eba6 Mon Sep 17 00:00:00 2001 From: Masahiko AMANO Date: Thu, 21 May 2026 19:47:32 +0300 Subject: [PATCH] refactor: reorganize data/processed/ into mcgill/ and user/ subdirs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Moved data/processed/{train,val,holdout}/ → data/processed/mcgill/{train,val,holdout}/ so both corpora have their own namespace under data/processed/. Updated PRETRAIN_DATA paths in make_colab_zip.py accordingly (path remap workaround no longer needed). Co-Authored-By: Claude Sonnet 4.6 --- scripts/make_colab_zip.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/make_colab_zip.py b/scripts/make_colab_zip.py index 755d708..c387c2e 100644 --- a/scripts/make_colab_zip.py +++ b/scripts/make_colab_zip.py @@ -4,8 +4,8 @@ pretrain mode (default): - src/ (all Python modules) - scripts/pretrain.py - requirements.txt - - data/processed/mcgill/train/*.pt (remapped from data/processed/train/) - - data/processed/mcgill/val/*.pt (remapped from data/processed/val/) + - data/processed/mcgill/train/*.pt + - data/processed/mcgill/val/*.pt finetune mode: - src/ (all Python modules) @@ -57,8 +57,8 @@ MODE_SCRIPTS: dict[str, list[str]] = { # Local dir → arc path inside zip PRETRAIN_DATA: list[tuple[Path, str]] = [ - (ROOT / "data" / "processed" / "train", "data/processed/mcgill/train"), - (ROOT / "data" / "processed" / "val", "data/processed/mcgill/val"), + (ROOT / "data" / "processed" / "mcgill" / "train", "data/processed/mcgill/train"), + (ROOT / "data" / "processed" / "mcgill" / "val", "data/processed/mcgill/val"), ] FINETUNE_DATA: list[tuple[Path, str]] = [