refactor: reorganize data/processed/ into mcgill/ and user/ subdirs
Moved data/processed/{train,val,holdout}/ → data/processed/mcgill/{train,val,holdout}/
so both corpora have their own namespace under data/processed/.
Updated PRETRAIN_DATA paths in make_colab_zip.py accordingly
(path remap workaround no longer needed).
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -4,8 +4,8 @@ pretrain mode (default):
|
|||||||
- src/ (all Python modules)
|
- src/ (all Python modules)
|
||||||
- scripts/pretrain.py
|
- scripts/pretrain.py
|
||||||
- requirements.txt
|
- requirements.txt
|
||||||
- data/processed/mcgill/train/*.pt (remapped from data/processed/train/)
|
- data/processed/mcgill/train/*.pt
|
||||||
- data/processed/mcgill/val/*.pt (remapped from data/processed/val/)
|
- data/processed/mcgill/val/*.pt
|
||||||
|
|
||||||
finetune mode:
|
finetune mode:
|
||||||
- src/ (all Python modules)
|
- src/ (all Python modules)
|
||||||
@@ -57,8 +57,8 @@ MODE_SCRIPTS: dict[str, list[str]] = {
|
|||||||
|
|
||||||
# Local dir → arc path inside zip
|
# Local dir → arc path inside zip
|
||||||
PRETRAIN_DATA: list[tuple[Path, str]] = [
|
PRETRAIN_DATA: list[tuple[Path, str]] = [
|
||||||
(ROOT / "data" / "processed" / "train", "data/processed/mcgill/train"),
|
(ROOT / "data" / "processed" / "mcgill" / "train", "data/processed/mcgill/train"),
|
||||||
(ROOT / "data" / "processed" / "val", "data/processed/mcgill/val"),
|
(ROOT / "data" / "processed" / "mcgill" / "val", "data/processed/mcgill/val"),
|
||||||
]
|
]
|
||||||
|
|
||||||
FINETUNE_DATA: list[tuple[Path, str]] = [
|
FINETUNE_DATA: list[tuple[Path, str]] = [
|
||||||
|
|||||||
Reference in New Issue
Block a user