diff --git a/src/tokenizer.py b/src/tokenizer.py index 63ea7ca..e1819fa 100644 --- a/src/tokenizer.py +++ b/src/tokenizer.py @@ -508,6 +508,12 @@ def detokenize_to_period(token_ids: list[int]) -> ChordPeriod: pos_in_bar, positions_per_bar, ) + if not bars: + raise ChordFormatError( + "token sequence produced no complete bars " + f"(last partial bar had {pos_in_bar}/{positions_per_bar} positions)" + ) + return ChordPeriod( title="detokenized", key=key, diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 3abe24f..b457896 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -16,6 +16,7 @@ from src.tokenizer import ( ID_TO_TOKEN, TOKEN_TO_ID, VOCAB, + ChordFormatError, ChordPeriod, detokenize_to_period, parse_chord_file, @@ -206,3 +207,17 @@ class TestRoundTrip: t = parse_chord_symbol(recovered.bars[0][0]) assert t.root == "A" assert t.quality == "m" + + def test_empty_bar_sequence_raises(self): + # BOS + metadata only, then EOS — no body tokens → must raise + ids = [ + TOKEN_TO_ID[""], + TOKEN_TO_ID["MODE_major"], + TOKEN_TO_ID["TIME_4/4"], + TOKEN_TO_ID["SUB_4"], + TOKEN_TO_ID["STYLE_H1K0"], + TOKEN_TO_ID["FUNC_chorus"], + TOKEN_TO_ID[""], + ] + with pytest.raises(ChordFormatError, match="no complete bars"): + detokenize_to_period(ids)