diff --git a/src/external_converters/mcgill_to_chord.py b/src/external_converters/mcgill_to_chord.py new file mode 100644 index 0000000..e2913b7 --- /dev/null +++ b/src/external_converters/mcgill_to_chord.py @@ -0,0 +1,642 @@ +"""Convert McGill Billboard dataset (salami_chords.txt) to .chord files. + +McGill Billboard format: + Each song is a subdirectory (e.g. 0003/, 0004/) containing salami_chords.txt. + The file has a header (# key: value) followed by tab-separated data lines: + \\t\\t + + Section labels: 'Z' (silence/boundary), a letter (e.g. 'A', 'B,verse'), or '.' (continuation). + Chords: Harte notation (e.g. C:maj, Bb:min7, N for no chord, X for unknown). + +Public API: + convert_dataset(dataset_dir, output_dir) -- convert entire dataset directory + convert_song(song_dir, output_dir) -- convert one song directory + +CLI: + python -m src.external_converters.mcgill_to_chord [--out ] + +Example: + python -m src.external_converters.mcgill_to_chord data/raw_external/mcgill/ \\ + --out data/raw_external/mcgill_converted/ +""" + +from __future__ import annotations + +import argparse +import logging +import re +import statistics +from collections import Counter +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +log = logging.getLogger(__name__) + +# --------------------------------------------------------------------------- +# Harte quality → (our_quality, our_extension) +# --------------------------------------------------------------------------- + +_HARTE_QUALITY: dict[str, tuple[str, str]] = { + "maj": ("maj", "none"), + "min": ("m", "none"), + "dim": ("dim", "none"), + "aug": ("aug", "none"), + "sus4": ("sus4", "none"), + "sus2": ("sus2", "none"), + "maj7": ("maj7", "none"), + "min7": ("m7", "none"), + "7": ("7", "none"), + "hdim7": ("m7b5", "none"), + "dim7": ("dim7", "none"), + "minmaj7": ("mM7", "none"), + "maj6": ("6", "none"), + "min6": ("m6", "none"), + "6": ("6", "none"), + "7sus4": ("7sus4", "none"), + "9": ("7", "9"), + "maj9": ("maj7", "9"), + "min9": ("m7", "9"), + "11": ("7", "11"), + "maj11": ("maj7", "11"), + "min11": ("m7", "11"), + "13": ("7", "13"), + "maj13": ("maj7", "13"), + "min13": ("m7", "13"), + "1": ("maj", "none"), # root only → major + "5": ("maj", "none"), # power chord → major (no 3rd) + "": ("maj", "none"), # bare root +} + +# Parenthetical alterations in Harte (e.g. '7(b9)') → our extension token +_HARTE_PAREN_EXT: dict[str, str] = { + "b9": "b9", + "#9": "#9", + "#11": "#11", + "b13": "b13", + "13": "13", + "11": "11", + "9": "9", +} + +# McGill Billboard section function strings → our function tokens +_FUNCTION_MAP: dict[str, str] = { + "intro": "intro", + "verse": "verse", + "pre-chorus": "prechorus", + "pre_chorus": "prechorus", + "prechorus": "prechorus", + "pre": "prechorus", + "chorus": "chorus", + "refrain": "chorus", + "bridge": "bridge", + "outro": "outro", + "coda": "outro", + "end": "outro", + "interlude": "interlude", + "instrumental": "interlude", + "solo": "interlude", + "break": "other", + "transition": "other", + "other": "other", +} + +_VALID_NOTES: frozenset[str] = frozenset( + {"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"} +) + +_FLAT_TO_SHARP: dict[str, str] = { + "Cb": "B", "Db": "C#", "Eb": "D#", "Fb": "E", + "Gb": "F#", "Ab": "G#", "Bb": "A#", +} + +_VALID_TIMES: frozenset[str] = frozenset({"4/4", "3/4", "6/8", "2/4", "12/8"}) + +# Quality families used for mode inference +_MAJOR_QUALITIES: frozenset[str] = frozenset( + {"maj", "maj7", "6", "add9", "aug", "sus2", "sus4", "7sus4", "aug7"} +) +_MINOR_QUALITIES: frozenset[str] = frozenset( + {"m", "m7", "mM7", "m6", "m7b5", "dim", "dim7"} +) + +# --------------------------------------------------------------------------- +# Internal data structures +# --------------------------------------------------------------------------- + + +@dataclass +class _ChordEvent: + start: float + duration: float # seconds + harte: str # Harte chord string: 'N', 'X', 'C:maj', etc. + + +@dataclass +class _Section: + letter: str # section letter, e.g. 'A', 'B' + function: str # our function token, e.g. 'verse', 'chorus' + events: list[_ChordEvent] = field(default_factory=list) + + +# --------------------------------------------------------------------------- +# Note / chord helpers +# --------------------------------------------------------------------------- + + +def _normalize_note(raw: str) -> Optional[str]: + """Return sharp-canonical note name, or None if unrecognized.""" + note = _FLAT_TO_SHARP.get(raw, raw) + return note if note in _VALID_NOTES else None + + +def _harte_to_chord_symbol(harte: str) -> Optional[str]: + """Convert a Harte chord string to our .chord format symbol. + + Args: + harte: Harte notation string, e.g. 'C:maj', 'Bb:min7', 'E:hdim7/G#'. + + Returns: + Our chord symbol (e.g. 'Cmaj', 'A#m7', 'Em7b5/G#'), or None for + N (no chord), X (unknown), or any unparseable input. + """ + harte = harte.strip() + if harte in ("N", "X", ""): + return None + + # Extract slash bass note (rightmost '/') + bass_note = "root" + if "/" in harte: + main, bass_raw = harte.rsplit("/", 1) + if len(bass_raw) >= 2 and bass_raw[1] in "#b": + raw_b, tail = bass_raw[:2], bass_raw[2:] + else: + raw_b, tail = bass_raw[:1], bass_raw[1:] + if tail or not raw_b: + return None + bn = _normalize_note(raw_b) + if bn is None: + return None + bass_note = bn + harte = main + + # Split root from quality on first ':' + if ":" in harte: + colon = harte.index(":") + root_str = harte[:colon] + quality_str = harte[colon + 1:] + else: + root_str = harte + quality_str = "" + + # Parse root + if not root_str or root_str[0] not in "CDEFGAB": + return None + if len(root_str) >= 2 and root_str[1] in "#b": + raw_root, leftover = root_str[:2], root_str[2:] + else: + raw_root, leftover = root_str[:1], root_str[1:] + if leftover: + return None + root = _normalize_note(raw_root) + if root is None: + return None + + # Parse quality — handle parenthetical alterations like '7(b9)' + m = re.match(r'^([^(]*)\(([^)]+)\)$', quality_str) + if m: + base_qual, alt = m.group(1), m.group(2) + base_result = _HARTE_QUALITY.get(base_qual) + if base_result is None: + return None + our_quality = base_result[0] + our_ext = _HARTE_PAREN_EXT.get(alt) + if our_ext is None: + return None + else: + result = _HARTE_QUALITY.get(quality_str) + if result is None: + return None + our_quality, our_ext = result + + q_ext = our_quality + ("" if our_ext == "none" else our_ext) + bass_part = "" if bass_note == "root" else f"/{bass_note}" + return root + q_ext + bass_part + + +# --------------------------------------------------------------------------- +# File parsing +# --------------------------------------------------------------------------- + + +def _parse_salami_file( + path: Path, +) -> tuple[dict[str, str], list[tuple[float, str, str]]]: + """Parse a salami_chords.txt file. + + Returns: + (header, events) where header maps lowercase field names to values, + and events is a list of (timestamp, label, chord) triples. + label may be 'Z', a section letter (possibly with ',function'), or '.'. + chord is in Harte notation or '' when the column is absent. + """ + header: dict[str, str] = {} + events: list[tuple[float, str, str]] = [] + + for raw in path.read_text(encoding="utf-8").splitlines(): + line = raw.strip() + if not line: + continue + if line.startswith("#"): + if ":" in line: + content = line[1:].strip() + k, v = content.split(":", 1) + header[k.strip().lower()] = v.strip() + continue + parts = line.split("\t") + if len(parts) < 2: + continue + try: + ts = float(parts[0]) + except ValueError: + continue + label = parts[1].strip() + chord = parts[2].strip() if len(parts) > 2 else "" + events.append((ts, label, chord)) + + return header, events + + +# --------------------------------------------------------------------------- +# Section extraction +# --------------------------------------------------------------------------- + + +def _parse_section_label(label: str) -> tuple[str, str]: + """Parse 'A,verse' → (letter='A', function='verse').""" + if "," in label: + letter, func_raw = label.split(",", 1) + func = _FUNCTION_MAP.get(func_raw.strip().lower(), "other") + else: + letter = label + func = "other" + return letter.strip(), func + + +def _extract_sections( + events: list[tuple[float, str, str]], +) -> list[_Section]: + """Group raw event triples into _Section objects with _ChordEvent lists.""" + sections: list[_Section] = [] + current: Optional[_Section] = None + timestamps = [e[0] for e in events] + + for i, (ts, label, chord) in enumerate(events): + dur = timestamps[i + 1] - ts if i + 1 < len(timestamps) else 0.0 + + if label in ("Z", ""): + current = None + continue + + if label == ".": + if current is not None and chord and dur > 0: + current.events.append(_ChordEvent(ts, dur, chord)) + continue + + # New section starts here + letter, func = _parse_section_label(label) + current = _Section(letter=letter, function=func) + sections.append(current) + if chord and dur > 0: + current.events.append(_ChordEvent(ts, dur, chord)) + + return sections + + +# --------------------------------------------------------------------------- +# Bar quantization +# --------------------------------------------------------------------------- + + +def _estimate_bar_duration(durations: list[float]) -> float: + """Estimate duration of one bar in seconds. + + Uses the median of non-trivial chord durations as a proxy for one bar. + Clamped to [1.0, 5.0] s (covers ~48–240 BPM in 4/4). + Falls back to 2.0 s when fewer than 3 samples. + """ + valid = [d for d in durations if d > 0.5] + if len(valid) < 3: + return 2.0 + return max(1.0, min(5.0, statistics.median(valid))) + + +def _expected_positions(time: str, subdivision: int) -> int: + """Number of positions per bar for the given time signature and subdivision.""" + num, denom = (int(x) for x in time.split("/")) + return (num * subdivision) // denom + + +def _section_to_bars( + section: _Section, + bar_duration: float, + time: str, + subdivision: int, +) -> Optional[list[list[str]]]: + """Convert a section's chord events to a list of bars. + + Returns None if any event contains an unrecognized Harte chord symbol; + the caller will skip the section and log a reason. + """ + positions_per_bar = _expected_positions(time, subdivision) + bars: list[list[str]] = [] + + for event in section.events: + if event.harte == "N": + first_pos = "NC" + elif event.harte == "X": + first_pos = "?" + else: + sym = _harte_to_chord_symbol(event.harte) + if sym is None: + log.debug( + "unrecognized Harte chord %r in section %s", + event.harte, section.letter, + ) + return None + first_pos = sym + + n_bars = max(1, round(event.duration / bar_duration)) + bars.append([first_pos] + ["."] * (positions_per_bar - 1)) + for _ in range(n_bars - 1): + # Hold chord across additional bars + bars.append(["."] * positions_per_bar) + + return bars + + +# --------------------------------------------------------------------------- +# Mode inference +# --------------------------------------------------------------------------- + + +def _infer_mode(tonic: str, sections: list[_Section]) -> str: + """Determine 'major' or 'minor' from tonic chord quality distribution. + + Counts occurrences of the tonic root in major-family vs minor-family + qualities across all sections. Returns 'major' on a tie or no data. + """ + major_count = 0 + minor_count = 0 + + for section in sections: + for event in section.events: + if not event.harte or event.harte in ("N", "X"): + continue + # Extract root without a full Harte parse + colon = event.harte.find(":") + root_part = event.harte[:colon] if colon != -1 else event.harte + root_str = root_part.split("/")[0] + if len(root_str) >= 2 and root_str[1] in "#b": + raw_root = root_str[:2] + else: + raw_root = root_str[:1] + if not raw_root: + continue + root = _normalize_note(raw_root) + if root != tonic: + continue + # Extract quality + quality_str = event.harte[colon + 1:] if colon != -1 else "" + if "/" in quality_str: + quality_str = quality_str[: quality_str.index("/")] + base = re.sub(r'\([^)]*\)', "", quality_str).strip() + result = _HARTE_QUALITY.get(base) + if result is None: + continue + our_quality = result[0] + if our_quality in _MAJOR_QUALITIES: + major_count += 1 + elif our_quality in _MINOR_QUALITIES: + minor_count += 1 + + return "minor" if minor_count > major_count else "major" + + +# --------------------------------------------------------------------------- +# Metre parsing +# --------------------------------------------------------------------------- + + +def _parse_metre(metre: str) -> tuple[Optional[str], int]: + """Parse metre string → (time_sig, subdivision). Returns (None, 0) if unsupported.""" + m = metre.strip() + if m in _VALID_TIMES: + sub = 8 if m in ("6/8", "12/8") else 4 + return m, sub + try: + mapping = {4: ("4/4", 4), 3: ("3/4", 4), 2: ("2/4", 4)} + return mapping.get(int(m), (None, 0)) + except ValueError: + return None, 0 + + +# --------------------------------------------------------------------------- +# File writing +# --------------------------------------------------------------------------- + + +def _write_chord_file( + path: Path, + title: str, + key: str, + time: str, + subdivision: int, + function: Optional[str], + bars: list[list[str]], +) -> None: + """Write a harmonic period to a .chord file.""" + lines = [ + f"# title: {title}", + f"# key: {key}", + f"# time: {time}", + f"# subdivision: {subdivision}", + "# style: other", + ] + if function: + lines.append(f"# function: {function}") + lines.append("") # blank line before body + + for i in range(0, len(bars), 4): + chunk = bars[i : i + 4] + line = " ".join(f"| {' '.join(b)}" for b in chunk) + " |" + lines.append(line) + + path.write_text("\n".join(lines) + "\n", encoding="utf-8") + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def convert_song(song_dir: Path, output_dir: Path) -> int: + """Convert one McGill Billboard song directory to .chord files. + + Args: + song_dir: Directory containing salami_chords.txt (e.g. 0003/). + output_dir: Destination directory for .chord files (created if absent). + + Returns: + Number of .chord files successfully written. + """ + salami = song_dir / "salami_chords.txt" + if not salami.exists(): + log.warning("no salami_chords.txt in %s, skipping", song_dir) + return 0 + + try: + header, raw_events = _parse_salami_file(salami) + except Exception as exc: + log.error("failed to parse %s: %s", salami, exc) + return 0 + + song_id = song_dir.name + + time_sig, subdivision = _parse_metre(header.get("metre", "4/4")) + if time_sig is None: + log.warning( + "unsupported metre %r in %s, skipping", header.get("metre"), song_dir + ) + return 0 + + tonic_raw = header.get("tonic", "C").strip() + tonic = _normalize_note(tonic_raw) or "C" + + sections = _extract_sections(raw_events) + if not sections: + log.warning("no sections found in %s", salami) + return 0 + + all_durations = [ + e.duration + for s in sections + for e in s.events + if e.harte not in ("N", "X", "") and e.duration > 0.5 + ] + bar_duration = _estimate_bar_duration(all_durations) + mode = _infer_mode(tonic, sections) + key = f"{tonic}_{mode}" + + artist = header.get("artist", "unknown") + song_title = header.get("title", "unknown") + + output_dir.mkdir(parents=True, exist_ok=True) + n_saved = 0 + skip_reasons: Counter[str] = Counter() + + for idx, section in enumerate(sections): + bars = _section_to_bars(section, bar_duration, time_sig, subdivision) + if bars is None: + skip_reasons["unrecognized_chord"] += 1 + continue + + n = len(bars) + if n < 4: + log.debug( + "section %s in %s: %d bar(s) < 4, skipping", + section.letter, song_id, n, + ) + skip_reasons["too_short"] += 1 + continue + if n > 16: + log.debug( + "section %s in %s: %d bars > 16, skipping", + section.letter, song_id, n, + ) + skip_reasons["too_long"] += 1 + continue + + func = section.function + filename = f"mcgill_{song_id}_{idx:02d}_{func}.chord" + out_path = output_dir / filename + period_title = f"{artist} - {song_title} ({section.letter},{func})" + _write_chord_file( + out_path, period_title, key, time_sig, subdivision, + func if func != "unspecified" else None, bars, + ) + n_saved += 1 + log.debug("wrote %s", out_path.name) + + if skip_reasons: + log.info( + "song %s: saved=%d skipped=%s", song_id, n_saved, dict(skip_reasons) + ) + else: + log.info("song %s: saved %d period(s)", song_id, n_saved) + + return n_saved + + +def convert_dataset(dataset_dir: Path, output_dir: Path) -> tuple[int, int]: + """Convert all song directories in a McGill Billboard dataset. + + Args: + dataset_dir: Root directory containing per-song subdirectories. + output_dir: Destination directory for .chord files. + + Returns: + (n_saved, n_empty) where n_empty counts songs that produced no output. + """ + output_dir.mkdir(parents=True, exist_ok=True) + n_saved = 0 + n_empty = 0 + + for song_dir in sorted(d for d in dataset_dir.iterdir() if d.is_dir()): + saved = convert_song(song_dir, output_dir) + n_saved += saved + if saved == 0: + n_empty += 1 + + log.info( + "conversion complete: %d periods saved, %d songs produced no output", + n_saved, n_empty, + ) + return n_saved, n_empty + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + parser = argparse.ArgumentParser( + description="Convert McGill Billboard dataset to .chord files.", + epilog=( + "Example:\n" + " python -m src.external_converters.mcgill_to_chord " + "data/raw_external/mcgill/ --out data/raw_external/mcgill_converted/" + ), + formatter_class=argparse.RawDescriptionHelpFormatter, + ) + parser.add_argument( + "dataset_dir", type=Path, metavar="dataset_dir", + help="directory containing per-song subdirectories (0003/, 0004/, ...)", + ) + parser.add_argument( + "--out", type=Path, + default=Path("data/raw_external/mcgill_converted"), + metavar="output_dir", + help="destination for .chord files (default: data/raw_external/mcgill_converted/)", + ) + parser.add_argument( + "--log-level", default="INFO", + choices=["DEBUG", "INFO", "WARNING", "ERROR"], + help="logging verbosity (default: INFO)", + ) + args = parser.parse_args() + + logging.basicConfig(level=getattr(logging, args.log_level), format="%(message)s") + n_saved, n_empty = convert_dataset(args.dataset_dir, args.out) + print(f"Saved {n_saved} periods. {n_empty} song(s) produced no output.") + print(f"Output: {args.out}") diff --git a/tests/fixtures/mcgill_test/0001/salami_chords.txt b/tests/fixtures/mcgill_test/0001/salami_chords.txt new file mode 100644 index 0000000..1925b8b --- /dev/null +++ b/tests/fixtures/mcgill_test/0001/salami_chords.txt @@ -0,0 +1,15 @@ +# artist: Test Artist +# title: Test Song +# metre: 4/4 +# tonic: C + +0.000000 Z +4.000000 A,verse C:maj +8.000000 . F:maj +12.000000 . G:7 +16.000000 . C:maj +20.000000 B,chorus F:maj +24.000000 . C:maj +28.000000 . G:7 +32.000000 . C:maj +36.000000 Z diff --git a/tests/test_mcgill_converter.py b/tests/test_mcgill_converter.py new file mode 100644 index 0000000..33bdcea --- /dev/null +++ b/tests/test_mcgill_converter.py @@ -0,0 +1,265 @@ +"""Tests for src/external_converters/mcgill_to_chord.py. + +Fixture: tests/fixtures/mcgill_test/0001/salami_chords.txt + 4/4 song in C major, two sections: + Section A (verse): C:maj F:maj G:7 C:maj — 4 chords × 4.0 s each + Section B (chorus): F:maj C:maj G:7 C:maj — 4 chords × 4.0 s each + +Expected output: 2 .chord files, each with 4 bars, key=C_major, time=4/4. +""" + +from pathlib import Path + +import pytest + +from src.external_converters.mcgill_to_chord import ( + _estimate_bar_duration, + _extract_sections, + _harte_to_chord_symbol, + _infer_mode, + _parse_metre, + _parse_salami_file, + _section_to_bars, + convert_song, +) +from src.tokenizer import parse_chord_file + +FIXTURES = Path(__file__).parent / "fixtures" / "mcgill_test" +TEST_SONG = FIXTURES / "0001" + + +# --------------------------------------------------------------------------- +# Harte chord symbol conversion +# --------------------------------------------------------------------------- + + +class TestHarteConversion: + """Unit tests for individual Harte → .chord symbol conversion.""" + + def test_simple_major(self): + assert _harte_to_chord_symbol("C:maj") == "Cmaj" + + def test_flat_minor_seventh(self): + # Bb normalises to A# + assert _harte_to_chord_symbol("Bb:min7") == "A#m7" + + def test_half_diminished(self): + # hdim7 = half-diminished 7th = our m7b5 + assert _harte_to_chord_symbol("E:hdim7") == "Em7b5" + + def test_dominant_seventh(self): + assert _harte_to_chord_symbol("G:7") == "G7" + + def test_major_seventh(self): + assert _harte_to_chord_symbol("D:maj7") == "Dmaj7" + + def test_minor(self): + assert _harte_to_chord_symbol("A:min") == "Am" + + def test_diminished_seventh(self): + assert _harte_to_chord_symbol("B:dim7") == "Bdim7" + + def test_augmented(self): + assert _harte_to_chord_symbol("C:aug") == "Caug" + + def test_slash_chord(self): + assert _harte_to_chord_symbol("C:maj/E") == "Cmaj/E" + + def test_slash_chord_flat_bass(self): + # Flat bass note also normalised to sharp + assert _harte_to_chord_symbol("G:maj/Bb") == "Gmaj/A#" + + def test_no_chord_returns_none(self): + assert _harte_to_chord_symbol("N") is None + + def test_unknown_returns_none(self): + assert _harte_to_chord_symbol("X") is None + + def test_empty_returns_none(self): + assert _harte_to_chord_symbol("") is None + + def test_extended_dominant_ninth(self): + # G:9 → dominant 7 + extension 9 + assert _harte_to_chord_symbol("G:9") == "G79" + + def test_major_ninth(self): + assert _harte_to_chord_symbol("C:maj9") == "Cmaj79" + + def test_parenthetical_flat_nine(self): + assert _harte_to_chord_symbol("C:7(b9)") == "C7b9" + + def test_parenthetical_sharp_eleven(self): + assert _harte_to_chord_symbol("F:maj7(#11)") == "Fmaj7#11" + + def test_sharp_root(self): + assert _harte_to_chord_symbol("F#:min7") == "F#m7" + + def test_output_is_parseable(self): + from src.chord_parser import parse_chord_symbol + for harte in ("C:maj", "Bb:min7", "E:hdim7", "G:7", "D:maj7", "C:maj/E"): + sym = _harte_to_chord_symbol(harte) + assert sym is not None + parse_chord_symbol(sym) # must not raise + + +# --------------------------------------------------------------------------- +# Helper units +# --------------------------------------------------------------------------- + + +class TestParseSalamiFile: + def test_header_parsed(self): + header, _ = _parse_salami_file(TEST_SONG / "salami_chords.txt") + assert header["artist"] == "Test Artist" + assert header["title"] == "Test Song" + assert header["metre"] == "4/4" + assert header["tonic"] == "C" + + def test_events_count(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + # 10 data lines total (including Z lines) + assert len(events) == 10 + + def test_first_event_is_silence(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + ts, label, chord = events[0] + assert ts == 0.0 + assert label == "Z" + + +class TestExtractSections: + def test_two_sections(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + sections = _extract_sections(events) + assert len(sections) == 2 + + def test_section_functions(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + sections = _extract_sections(events) + assert sections[0].function == "verse" + assert sections[1].function == "chorus" + + def test_events_per_section(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + sections = _extract_sections(events) + assert len(sections[0].events) == 4 + assert len(sections[1].events) == 4 + + def test_chord_values(self): + _, events = _parse_salami_file(TEST_SONG / "salami_chords.txt") + sections = _extract_sections(events) + hartes = [e.harte for e in sections[0].events] + assert hartes == ["C:maj", "F:maj", "G:7", "C:maj"] + + +class TestEstimateBarDuration: + def test_uniform_durations(self): + assert _estimate_bar_duration([2.0, 2.0, 2.0, 2.0]) == 2.0 + + def test_mixed_durations(self): + # Median of [2, 2, 2, 4, 4] = 2 → bar_dur = 2 + assert _estimate_bar_duration([2.0, 2.0, 2.0, 4.0, 4.0]) == 2.0 + + def test_too_few_samples_returns_default(self): + assert _estimate_bar_duration([]) == 2.0 + assert _estimate_bar_duration([3.0]) == 2.0 + + def test_clamp_upper(self): + assert _estimate_bar_duration([10.0, 10.0, 10.0]) == 5.0 + + def test_clamp_lower(self): + assert _estimate_bar_duration([0.3, 0.3, 0.3]) == 2.0 # all < 0.5, falls back + + +class TestParseMetre: + def test_4_4(self): + assert _parse_metre("4/4") == ("4/4", 4) + + def test_3_4(self): + assert _parse_metre("3/4") == ("3/4", 4) + + def test_6_8(self): + assert _parse_metre("6/8") == ("6/8", 8) + + def test_integer_4(self): + assert _parse_metre("4") == ("4/4", 4) + + def test_unsupported(self): + sig, sub = _parse_metre("7/8") + assert sig is None + assert sub == 0 + + +# --------------------------------------------------------------------------- +# Full period conversion +# --------------------------------------------------------------------------- + + +class TestFullConversion: + """Integration tests: convert_song with fixture produces valid .chord files.""" + + def test_returns_two_periods(self, tmp_path): + assert convert_song(TEST_SONG, tmp_path) == 2 + + def test_output_files_exist(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + assert len(list(tmp_path.glob("*.chord"))) == 2 + + def test_output_files_are_parseable(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + for f in tmp_path.glob("*.chord"): + assert parse_chord_file(f) is not None # must not raise + + def test_verse_has_four_bars(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + verse_files = sorted(tmp_path.glob("*verse*.chord")) + assert len(verse_files) == 1 + assert len(parse_chord_file(verse_files[0]).bars) == 4 + + def test_chorus_has_four_bars(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + chorus_files = sorted(tmp_path.glob("*chorus*.chord")) + assert len(chorus_files) == 1 + assert len(parse_chord_file(chorus_files[0]).bars) == 4 + + def test_header_time_and_subdivision(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + for f in tmp_path.glob("*.chord"): + p = parse_chord_file(f) + assert p.time == "4/4" + assert p.subdivision == 4 + + def test_style_is_other(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + for f in tmp_path.glob("*.chord"): + assert parse_chord_file(f).style == "other" + + def test_key_is_c_major(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + for f in tmp_path.glob("*.chord"): + assert parse_chord_file(f).key == "C_major" + + def test_function_tags(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + funcs = {parse_chord_file(f).function for f in tmp_path.glob("*.chord")} + assert funcs == {"verse", "chorus"} + + def test_filenames_contain_song_id(self, tmp_path): + convert_song(TEST_SONG, tmp_path) + names = {f.name for f in tmp_path.glob("*.chord")} + assert all("0001" in name for name in names) + + def test_bar_positions_are_valid_chords(self, tmp_path): + from src.chord_parser import parse_chord_symbol + convert_song(TEST_SONG, tmp_path) + for f in tmp_path.glob("*.chord"): + p = parse_chord_file(f) + for bar in p.bars: + first = bar[0] + if first not in (".", "NC", "?"): + parse_chord_symbol(first) # must not raise + + def test_missing_salami_returns_zero(self, tmp_path): + empty_song = tmp_path / "empty" + empty_song.mkdir() + assert convert_song(empty_song, tmp_path / "out") == 0