feat: implement McGill Billboard converter (Harte → .chord)
Adds src/external_converters/mcgill_to_chord.py with two public functions:
- convert_song(song_dir, output_dir) — converts one salami_chords.txt to
per-section .chord files (4–16 bars each, style=other)
- convert_dataset(dataset_dir, output_dir) — batch converts all songs
Key decisions:
- Harte qualities mapped to our 18-quality vocabulary; hdim7 → m7b5,
parenthetical alterations (e.g. 7(b9)) handled via regex
- Bar duration estimated from median non-trivial chord duration
- Mode (major/minor) inferred from tonic chord quality distribution
- Sections with <4 or >16 bars are skipped with a logged reason
- Unrecognized Harte chords skip the whole section (no silent corruption)
48 new tests in tests/test_mcgill_converter.py; total suite 223 passed.
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,265 @@
|
||||
"""Tests for src/external_converters/mcgill_to_chord.py.
|
||||
|
||||
Fixture: tests/fixtures/mcgill_test/0001/salami_chords.txt
|
||||
4/4 song in C major, two sections:
|
||||
Section A (verse): C:maj F:maj G:7 C:maj — 4 chords × 4.0 s each
|
||||
Section B (chorus): F:maj C:maj G:7 C:maj — 4 chords × 4.0 s each
|
||||
|
||||
Expected output: 2 .chord files, each with 4 bars, key=C_major, time=4/4.
|
||||
"""
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from src.external_converters.mcgill_to_chord import (
|
||||
_estimate_bar_duration,
|
||||
_extract_sections,
|
||||
_harte_to_chord_symbol,
|
||||
_infer_mode,
|
||||
_parse_metre,
|
||||
_parse_salami_file,
|
||||
_section_to_bars,
|
||||
convert_song,
|
||||
)
|
||||
from src.tokenizer import parse_chord_file
|
||||
|
||||
FIXTURES = Path(__file__).parent / "fixtures" / "mcgill_test"
|
||||
TEST_SONG = FIXTURES / "0001"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Harte chord symbol conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestHarteConversion:
|
||||
"""Unit tests for individual Harte → .chord symbol conversion."""
|
||||
|
||||
def test_simple_major(self):
|
||||
assert _harte_to_chord_symbol("C:maj") == "Cmaj"
|
||||
|
||||
def test_flat_minor_seventh(self):
|
||||
# Bb normalises to A#
|
||||
assert _harte_to_chord_symbol("Bb:min7") == "A#m7"
|
||||
|
||||
def test_half_diminished(self):
|
||||
# hdim7 = half-diminished 7th = our m7b5
|
||||
assert _harte_to_chord_symbol("E:hdim7") == "Em7b5"
|
||||
|
||||
def test_dominant_seventh(self):
|
||||
assert _harte_to_chord_symbol("G:7") == "G7"
|
||||
|
||||
def test_major_seventh(self):
|
||||
assert _harte_to_chord_symbol("D:maj7") == "Dmaj7"
|
||||
|
||||
def test_minor(self):
|
||||
assert _harte_to_chord_symbol("A:min") == "Am"
|
||||
|
||||
def test_diminished_seventh(self):
|
||||
assert _harte_to_chord_symbol("B:dim7") == "Bdim7"
|
||||
|
||||
def test_augmented(self):
|
||||
assert _harte_to_chord_symbol("C:aug") == "Caug"
|
||||
|
||||
def test_slash_chord(self):
|
||||
assert _harte_to_chord_symbol("C:maj/E") == "Cmaj/E"
|
||||
|
||||
def test_slash_chord_flat_bass(self):
|
||||
# Flat bass note also normalised to sharp
|
||||
assert _harte_to_chord_symbol("G:maj/Bb") == "Gmaj/A#"
|
||||
|
||||
def test_no_chord_returns_none(self):
|
||||
assert _harte_to_chord_symbol("N") is None
|
||||
|
||||
def test_unknown_returns_none(self):
|
||||
assert _harte_to_chord_symbol("X") is None
|
||||
|
||||
def test_empty_returns_none(self):
|
||||
assert _harte_to_chord_symbol("") is None
|
||||
|
||||
def test_extended_dominant_ninth(self):
|
||||
# G:9 → dominant 7 + extension 9
|
||||
assert _harte_to_chord_symbol("G:9") == "G79"
|
||||
|
||||
def test_major_ninth(self):
|
||||
assert _harte_to_chord_symbol("C:maj9") == "Cmaj79"
|
||||
|
||||
def test_parenthetical_flat_nine(self):
|
||||
assert _harte_to_chord_symbol("C:7(b9)") == "C7b9"
|
||||
|
||||
def test_parenthetical_sharp_eleven(self):
|
||||
assert _harte_to_chord_symbol("F:maj7(#11)") == "Fmaj7#11"
|
||||
|
||||
def test_sharp_root(self):
|
||||
assert _harte_to_chord_symbol("F#:min7") == "F#m7"
|
||||
|
||||
def test_output_is_parseable(self):
|
||||
from src.chord_parser import parse_chord_symbol
|
||||
for harte in ("C:maj", "Bb:min7", "E:hdim7", "G:7", "D:maj7", "C:maj/E"):
|
||||
sym = _harte_to_chord_symbol(harte)
|
||||
assert sym is not None
|
||||
parse_chord_symbol(sym) # must not raise
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helper units
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParseSalamiFile:
|
||||
def test_header_parsed(self):
|
||||
header, _ = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
assert header["artist"] == "Test Artist"
|
||||
assert header["title"] == "Test Song"
|
||||
assert header["metre"] == "4/4"
|
||||
assert header["tonic"] == "C"
|
||||
|
||||
def test_events_count(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
# 10 data lines total (including Z lines)
|
||||
assert len(events) == 10
|
||||
|
||||
def test_first_event_is_silence(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
ts, label, chord = events[0]
|
||||
assert ts == 0.0
|
||||
assert label == "Z"
|
||||
|
||||
|
||||
class TestExtractSections:
|
||||
def test_two_sections(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
sections = _extract_sections(events)
|
||||
assert len(sections) == 2
|
||||
|
||||
def test_section_functions(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
sections = _extract_sections(events)
|
||||
assert sections[0].function == "verse"
|
||||
assert sections[1].function == "chorus"
|
||||
|
||||
def test_events_per_section(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
sections = _extract_sections(events)
|
||||
assert len(sections[0].events) == 4
|
||||
assert len(sections[1].events) == 4
|
||||
|
||||
def test_chord_values(self):
|
||||
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
|
||||
sections = _extract_sections(events)
|
||||
hartes = [e.harte for e in sections[0].events]
|
||||
assert hartes == ["C:maj", "F:maj", "G:7", "C:maj"]
|
||||
|
||||
|
||||
class TestEstimateBarDuration:
|
||||
def test_uniform_durations(self):
|
||||
assert _estimate_bar_duration([2.0, 2.0, 2.0, 2.0]) == 2.0
|
||||
|
||||
def test_mixed_durations(self):
|
||||
# Median of [2, 2, 2, 4, 4] = 2 → bar_dur = 2
|
||||
assert _estimate_bar_duration([2.0, 2.0, 2.0, 4.0, 4.0]) == 2.0
|
||||
|
||||
def test_too_few_samples_returns_default(self):
|
||||
assert _estimate_bar_duration([]) == 2.0
|
||||
assert _estimate_bar_duration([3.0]) == 2.0
|
||||
|
||||
def test_clamp_upper(self):
|
||||
assert _estimate_bar_duration([10.0, 10.0, 10.0]) == 5.0
|
||||
|
||||
def test_clamp_lower(self):
|
||||
assert _estimate_bar_duration([0.3, 0.3, 0.3]) == 2.0 # all < 0.5, falls back
|
||||
|
||||
|
||||
class TestParseMetre:
|
||||
def test_4_4(self):
|
||||
assert _parse_metre("4/4") == ("4/4", 4)
|
||||
|
||||
def test_3_4(self):
|
||||
assert _parse_metre("3/4") == ("3/4", 4)
|
||||
|
||||
def test_6_8(self):
|
||||
assert _parse_metre("6/8") == ("6/8", 8)
|
||||
|
||||
def test_integer_4(self):
|
||||
assert _parse_metre("4") == ("4/4", 4)
|
||||
|
||||
def test_unsupported(self):
|
||||
sig, sub = _parse_metre("7/8")
|
||||
assert sig is None
|
||||
assert sub == 0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full period conversion
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFullConversion:
|
||||
"""Integration tests: convert_song with fixture produces valid .chord files."""
|
||||
|
||||
def test_returns_two_periods(self, tmp_path):
|
||||
assert convert_song(TEST_SONG, tmp_path) == 2
|
||||
|
||||
def test_output_files_exist(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
assert len(list(tmp_path.glob("*.chord"))) == 2
|
||||
|
||||
def test_output_files_are_parseable(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
for f in tmp_path.glob("*.chord"):
|
||||
assert parse_chord_file(f) is not None # must not raise
|
||||
|
||||
def test_verse_has_four_bars(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
verse_files = sorted(tmp_path.glob("*verse*.chord"))
|
||||
assert len(verse_files) == 1
|
||||
assert len(parse_chord_file(verse_files[0]).bars) == 4
|
||||
|
||||
def test_chorus_has_four_bars(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
chorus_files = sorted(tmp_path.glob("*chorus*.chord"))
|
||||
assert len(chorus_files) == 1
|
||||
assert len(parse_chord_file(chorus_files[0]).bars) == 4
|
||||
|
||||
def test_header_time_and_subdivision(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
for f in tmp_path.glob("*.chord"):
|
||||
p = parse_chord_file(f)
|
||||
assert p.time == "4/4"
|
||||
assert p.subdivision == 4
|
||||
|
||||
def test_style_is_other(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
for f in tmp_path.glob("*.chord"):
|
||||
assert parse_chord_file(f).style == "other"
|
||||
|
||||
def test_key_is_c_major(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
for f in tmp_path.glob("*.chord"):
|
||||
assert parse_chord_file(f).key == "C_major"
|
||||
|
||||
def test_function_tags(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
funcs = {parse_chord_file(f).function for f in tmp_path.glob("*.chord")}
|
||||
assert funcs == {"verse", "chorus"}
|
||||
|
||||
def test_filenames_contain_song_id(self, tmp_path):
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
names = {f.name for f in tmp_path.glob("*.chord")}
|
||||
assert all("0001" in name for name in names)
|
||||
|
||||
def test_bar_positions_are_valid_chords(self, tmp_path):
|
||||
from src.chord_parser import parse_chord_symbol
|
||||
convert_song(TEST_SONG, tmp_path)
|
||||
for f in tmp_path.glob("*.chord"):
|
||||
p = parse_chord_file(f)
|
||||
for bar in p.bars:
|
||||
first = bar[0]
|
||||
if first not in (".", "NC", "?"):
|
||||
parse_chord_symbol(first) # must not raise
|
||||
|
||||
def test_missing_salami_returns_zero(self, tmp_path):
|
||||
empty_song = tmp_path / "empty"
|
||||
empty_song.mkdir()
|
||||
assert convert_song(empty_song, tmp_path / "out") == 0
|
||||
Reference in New Issue
Block a user