Files
hamori/tests/test_mcgill_converter.py
T
H1K0 ea32bf43b2 feat: implement McGill Billboard converter (Harte → .chord)
Adds src/external_converters/mcgill_to_chord.py with two public functions:
  - convert_song(song_dir, output_dir) — converts one salami_chords.txt to
    per-section .chord files (4–16 bars each, style=other)
  - convert_dataset(dataset_dir, output_dir) — batch converts all songs

Key decisions:
  - Harte qualities mapped to our 18-quality vocabulary; hdim7 → m7b5,
    parenthetical alterations (e.g. 7(b9)) handled via regex
  - Bar duration estimated from median non-trivial chord duration
  - Mode (major/minor) inferred from tonic chord quality distribution
  - Sections with <4 or >16 bars are skipped with a logged reason
  - Unrecognized Harte chords skip the whole section (no silent corruption)

48 new tests in tests/test_mcgill_converter.py; total suite 223 passed.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 17:04:02 +03:00

266 lines
9.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Tests for src/external_converters/mcgill_to_chord.py.
Fixture: tests/fixtures/mcgill_test/0001/salami_chords.txt
4/4 song in C major, two sections:
Section A (verse): C:maj F:maj G:7 C:maj — 4 chords × 4.0 s each
Section B (chorus): F:maj C:maj G:7 C:maj — 4 chords × 4.0 s each
Expected output: 2 .chord files, each with 4 bars, key=C_major, time=4/4.
"""
from pathlib import Path
import pytest
from src.external_converters.mcgill_to_chord import (
_estimate_bar_duration,
_extract_sections,
_harte_to_chord_symbol,
_infer_mode,
_parse_metre,
_parse_salami_file,
_section_to_bars,
convert_song,
)
from src.tokenizer import parse_chord_file
FIXTURES = Path(__file__).parent / "fixtures" / "mcgill_test"
TEST_SONG = FIXTURES / "0001"
# ---------------------------------------------------------------------------
# Harte chord symbol conversion
# ---------------------------------------------------------------------------
class TestHarteConversion:
"""Unit tests for individual Harte → .chord symbol conversion."""
def test_simple_major(self):
assert _harte_to_chord_symbol("C:maj") == "Cmaj"
def test_flat_minor_seventh(self):
# Bb normalises to A#
assert _harte_to_chord_symbol("Bb:min7") == "A#m7"
def test_half_diminished(self):
# hdim7 = half-diminished 7th = our m7b5
assert _harte_to_chord_symbol("E:hdim7") == "Em7b5"
def test_dominant_seventh(self):
assert _harte_to_chord_symbol("G:7") == "G7"
def test_major_seventh(self):
assert _harte_to_chord_symbol("D:maj7") == "Dmaj7"
def test_minor(self):
assert _harte_to_chord_symbol("A:min") == "Am"
def test_diminished_seventh(self):
assert _harte_to_chord_symbol("B:dim7") == "Bdim7"
def test_augmented(self):
assert _harte_to_chord_symbol("C:aug") == "Caug"
def test_slash_chord(self):
assert _harte_to_chord_symbol("C:maj/E") == "Cmaj/E"
def test_slash_chord_flat_bass(self):
# Flat bass note also normalised to sharp
assert _harte_to_chord_symbol("G:maj/Bb") == "Gmaj/A#"
def test_no_chord_returns_none(self):
assert _harte_to_chord_symbol("N") is None
def test_unknown_returns_none(self):
assert _harte_to_chord_symbol("X") is None
def test_empty_returns_none(self):
assert _harte_to_chord_symbol("") is None
def test_extended_dominant_ninth(self):
# G:9 → dominant 7 + extension 9
assert _harte_to_chord_symbol("G:9") == "G79"
def test_major_ninth(self):
assert _harte_to_chord_symbol("C:maj9") == "Cmaj79"
def test_parenthetical_flat_nine(self):
assert _harte_to_chord_symbol("C:7(b9)") == "C7b9"
def test_parenthetical_sharp_eleven(self):
assert _harte_to_chord_symbol("F:maj7(#11)") == "Fmaj7#11"
def test_sharp_root(self):
assert _harte_to_chord_symbol("F#:min7") == "F#m7"
def test_output_is_parseable(self):
from src.chord_parser import parse_chord_symbol
for harte in ("C:maj", "Bb:min7", "E:hdim7", "G:7", "D:maj7", "C:maj/E"):
sym = _harte_to_chord_symbol(harte)
assert sym is not None
parse_chord_symbol(sym) # must not raise
# ---------------------------------------------------------------------------
# Helper units
# ---------------------------------------------------------------------------
class TestParseSalamiFile:
def test_header_parsed(self):
header, _ = _parse_salami_file(TEST_SONG / "salami_chords.txt")
assert header["artist"] == "Test Artist"
assert header["title"] == "Test Song"
assert header["metre"] == "4/4"
assert header["tonic"] == "C"
def test_events_count(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
# 10 data lines total (including Z lines)
assert len(events) == 10
def test_first_event_is_silence(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
ts, label, chord = events[0]
assert ts == 0.0
assert label == "Z"
class TestExtractSections:
def test_two_sections(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
sections = _extract_sections(events)
assert len(sections) == 2
def test_section_functions(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
sections = _extract_sections(events)
assert sections[0].function == "verse"
assert sections[1].function == "chorus"
def test_events_per_section(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
sections = _extract_sections(events)
assert len(sections[0].events) == 4
assert len(sections[1].events) == 4
def test_chord_values(self):
_, events = _parse_salami_file(TEST_SONG / "salami_chords.txt")
sections = _extract_sections(events)
hartes = [e.harte for e in sections[0].events]
assert hartes == ["C:maj", "F:maj", "G:7", "C:maj"]
class TestEstimateBarDuration:
def test_uniform_durations(self):
assert _estimate_bar_duration([2.0, 2.0, 2.0, 2.0]) == 2.0
def test_mixed_durations(self):
# Median of [2, 2, 2, 4, 4] = 2 → bar_dur = 2
assert _estimate_bar_duration([2.0, 2.0, 2.0, 4.0, 4.0]) == 2.0
def test_too_few_samples_returns_default(self):
assert _estimate_bar_duration([]) == 2.0
assert _estimate_bar_duration([3.0]) == 2.0
def test_clamp_upper(self):
assert _estimate_bar_duration([10.0, 10.0, 10.0]) == 5.0
def test_clamp_lower(self):
assert _estimate_bar_duration([0.3, 0.3, 0.3]) == 2.0 # all < 0.5, falls back
class TestParseMetre:
def test_4_4(self):
assert _parse_metre("4/4") == ("4/4", 4)
def test_3_4(self):
assert _parse_metre("3/4") == ("3/4", 4)
def test_6_8(self):
assert _parse_metre("6/8") == ("6/8", 8)
def test_integer_4(self):
assert _parse_metre("4") == ("4/4", 4)
def test_unsupported(self):
sig, sub = _parse_metre("7/8")
assert sig is None
assert sub == 0
# ---------------------------------------------------------------------------
# Full period conversion
# ---------------------------------------------------------------------------
class TestFullConversion:
"""Integration tests: convert_song with fixture produces valid .chord files."""
def test_returns_two_periods(self, tmp_path):
assert convert_song(TEST_SONG, tmp_path) == 2
def test_output_files_exist(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
assert len(list(tmp_path.glob("*.chord"))) == 2
def test_output_files_are_parseable(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
for f in tmp_path.glob("*.chord"):
assert parse_chord_file(f) is not None # must not raise
def test_verse_has_four_bars(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
verse_files = sorted(tmp_path.glob("*verse*.chord"))
assert len(verse_files) == 1
assert len(parse_chord_file(verse_files[0]).bars) == 4
def test_chorus_has_four_bars(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
chorus_files = sorted(tmp_path.glob("*chorus*.chord"))
assert len(chorus_files) == 1
assert len(parse_chord_file(chorus_files[0]).bars) == 4
def test_header_time_and_subdivision(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
for f in tmp_path.glob("*.chord"):
p = parse_chord_file(f)
assert p.time == "4/4"
assert p.subdivision == 4
def test_style_is_other(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
for f in tmp_path.glob("*.chord"):
assert parse_chord_file(f).style == "other"
def test_key_is_c_major(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
for f in tmp_path.glob("*.chord"):
assert parse_chord_file(f).key == "C_major"
def test_function_tags(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
funcs = {parse_chord_file(f).function for f in tmp_path.glob("*.chord")}
assert funcs == {"verse", "chorus"}
def test_filenames_contain_song_id(self, tmp_path):
convert_song(TEST_SONG, tmp_path)
names = {f.name for f in tmp_path.glob("*.chord")}
assert all("0001" in name for name in names)
def test_bar_positions_are_valid_chords(self, tmp_path):
from src.chord_parser import parse_chord_symbol
convert_song(TEST_SONG, tmp_path)
for f in tmp_path.glob("*.chord"):
p = parse_chord_file(f)
for bar in p.bars:
first = bar[0]
if first not in (".", "NC", "?"):
parse_chord_symbol(first) # must not raise
def test_missing_salami_returns_zero(self, tmp_path):
empty_song = tmp_path / "empty"
empty_song.mkdir()
assert convert_song(empty_song, tmp_path / "out") == 0