diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ed8d17a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,7 @@ +torch +music21 +pretty_midi +pytest +matplotlib +numpy +pandas diff --git a/src/chord_parser.py b/src/chord_parser.py new file mode 100644 index 0000000..96a00a0 --- /dev/null +++ b/src/chord_parser.py @@ -0,0 +1,270 @@ +"""Chord symbol parser for .chord files. + +Parses a chord symbol string (e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D') into a +ChordTokens dataclass. See docs/chord_format_spec.md §4 for the full spec. + +Usage: + from src.chord_parser import parse_chord_symbol, ChordParseError + tokens = parse_chord_symbol("Fmaj9") + # ChordTokens(root='F', quality='maj7', extension='9', bass='root') +""" + +from __future__ import annotations + +from dataclasses import dataclass + + +class ChordParseError(ValueError): + """Raised when a chord symbol cannot be parsed.""" + + +@dataclass(frozen=True) +class ChordTokens: + """Factorized chord representation — one string per token slot.""" + + root: str # one of the 12 chromatic pitch classes, e.g. 'C', 'F#', 'A#' + quality: str # one of the 18 canonical quality names, e.g. 'maj7', 'm', 'dim7' + extension: str # one of 8 values: 'none' or e.g. '9', 'b9', '#11' + bass: str # 'root' or one of the 12 pitch classes + + +# --------------------------------------------------------------------------- +# Lookup tables +# --------------------------------------------------------------------------- + +_VALID_ROOTS: frozenset[str] = frozenset( + {"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"} +) + +_FLAT_TO_SHARP: dict[str, str] = { + "Cb": "B", + "Db": "C#", + "Eb": "D#", + "Fb": "E", + "Gb": "F#", + "Ab": "G#", + "Bb": "A#", +} + +# Maps the quality+extension string (after the root, before any '/') to +# (canonical_quality, canonical_extension). Covers: +# - standalone qualities with all their alternative spellings +# - shorthand expansions where a 7th is implied (e.g. 'maj9' → maj7 + 9) +# - Unicode symbols (°, Δ, ø) +_QUAL_EXT_MAP: dict[str, tuple[str, str]] = { + # empty root suffix → plain major + "": ("maj", "none"), + # --- major --- + "maj": ("maj", "none"), + "maj7": ("maj7", "none"), + "M7": ("maj7", "none"), + "Δ7": ("maj7", "none"), + "Δ": ("maj7", "none"), + "maj6": ("6", "none"), + # major shorthands (dominant/major 7th implied by the extension numeral) + "maj9": ("maj7", "9"), + "maj11": ("maj7", "11"), + "maj13": ("maj7", "13"), + # --- minor --- + "m": ("m", "none"), + "min": ("m", "none"), + "-": ("m", "none"), + "m7": ("m7", "none"), + "min7": ("m7", "none"), + "-7": ("m7", "none"), + # minor shorthands (m7 implied) + "m9": ("m7", "9"), + "min9": ("m7", "9"), + "m11": ("m7", "11"), + "min11": ("m7", "11"), + "m13": ("m7", "13"), + "min13": ("m7", "13"), + # minor sixth + "m6": ("m6", "none"), + "min6": ("m6", "none"), + # half-diminished + "m7b5": ("m7b5", "none"), + "min7b5": ("m7b5", "none"), + "m7♭5": ("m7b5", "none"), + "ø": ("m7b5", "none"), + "ø7": ("m7b5", "none"), + # minor-major seventh + "mM7": ("mM7", "none"), + "m(maj7)": ("mM7", "none"), + "minMaj7": ("mM7", "none"), + # minor add9 + "madd9": ("m(add9)", "none"), + "m(add9)": ("m(add9)", "none"), + "m(add2)": ("m(add9)", "none"), + # --- dominant --- + "7": ("7", "none"), + # dominant shorthands (dominant 7th implied) + "9": ("7", "9"), + "11": ("7", "11"), + "13": ("7", "13"), + # --- diminished --- + "dim": ("dim", "none"), + "°": ("dim", "none"), + "dim7": ("dim7", "none"), + "°7": ("dim7", "none"), + # --- augmented --- + "aug": ("aug", "none"), + "+": ("aug", "none"), + "aug7": ("aug7", "none"), + "+7": ("aug7", "none"), + "7#5": ("aug7", "none"), + # --- suspended --- + "sus2": ("sus2", "none"), + "sus4": ("sus4", "none"), + "sus": ("sus4", "none"), + "7sus4": ("7sus4", "none"), + "7sus": ("7sus4", "none"), + # --- sixth / add --- + "6": ("6", "none"), + "add9": ("add9", "none"), + "2": ("add9", "none"), +} + +# Maps quality-only strings (no extension) to canonical quality names. +# Used when an explicit extension suffix has been stripped from the end. +_QUAL_ONLY_MAP: dict[str, str] = { + "": "maj", + "maj": "maj", + "maj7": "maj7", + "M7": "maj7", + "Δ7": "maj7", + "Δ": "maj7", + "maj6": "6", + "m": "m", + "min": "m", + "-": "m", + "m7": "m7", + "min7": "m7", + "-7": "m7", + "m6": "m6", + "min6": "m6", + "m7b5": "m7b5", + "ø": "m7b5", + "ø7": "m7b5", + "mM7": "mM7", + "7": "7", + "dim": "dim", + "°": "dim", + "dim7": "dim7", + "°7": "dim7", + "aug": "aug", + "+": "aug", + "aug7": "aug7", + "+7": "aug7", + "7#5": "aug7", + "sus2": "sus2", + "sus4": "sus4", + "sus": "sus4", + "7sus4": "7sus4", + "7sus": "7sus4", + "6": "6", + "add9": "add9", +} + +# Extension suffixes tried longest-first to avoid greedy ambiguity. +# (e.g. 'b13' must be tried before '13' so 'maj7b13' is not split at '13') +_EXT_SUFFIXES: tuple[str, ...] = ("b13", "#11", "b9", "#9", "13", "11", "9") + + +# --------------------------------------------------------------------------- +# Internal helpers +# --------------------------------------------------------------------------- + + +def _normalize_note(raw: str) -> str: + note = _FLAT_TO_SHARP.get(raw, raw) + if note not in _VALID_ROOTS: + raise ChordParseError(f"invalid note: {raw!r}") + return note + + +def _extract_root(s: str) -> tuple[str, str]: + """Return (normalized_root, remaining_suffix).""" + if not s or s[0] not in "CDEFGAB": + raise ChordParseError( + f"chord symbol must start with a note letter A–G: {s!r}" + ) + if len(s) >= 2 and s[1] in "#b": + raw_root, rest = s[:2], s[2:] + else: + raw_root, rest = s[0], s[1:] + return _normalize_note(raw_root), rest + + +def _parse_bass(s: str) -> str: + s = s.strip() + if not s: + raise ChordParseError("empty bass note after '/'") + if s[0] not in "CDEFGAB": + raise ChordParseError(f"invalid bass note: {s!r}") + if len(s) >= 2 and s[1] in "#b": + raw, tail = s[:2], s[2:] + else: + raw, tail = s[0], s[1:] + if tail: + raise ChordParseError(f"invalid bass note (trailing characters): {s!r}") + return _normalize_note(raw) + + +def _parse_quality_ext(s: str) -> tuple[str, str]: + """Return (canonical_quality, canonical_extension) for the suffix string.""" + # Direct lookup: handles standalone qualities, Unicode variants, shorthands. + if s in _QUAL_EXT_MAP: + return _QUAL_EXT_MAP[s] + + # Try stripping a known extension suffix from the right. + for ext in _EXT_SUFFIXES: + if s.endswith(ext): + qual_s = s[: -len(ext)] + if qual_s in _QUAL_ONLY_MAP: + return _QUAL_ONLY_MAP[qual_s], ext + + raise ChordParseError(f"unrecognized quality/extension: {s!r}") + + +# --------------------------------------------------------------------------- +# Public API +# --------------------------------------------------------------------------- + + +def parse_chord_symbol(symbol: str) -> ChordTokens: + """Parse a chord symbol string into factorized token slots. + + Args: + symbol: Chord symbol, e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D'. + + Returns: + ChordTokens(root, quality, extension, bass) with all values drawn + from the vocabularies in docs/chord_format_spec.md §4.2–4.5. + + Raises: + ChordParseError: If the symbol is empty, unrecognized, or malformed. + """ + if not symbol or not symbol.strip(): + raise ChordParseError("empty chord symbol") + + symbol = symbol.strip() + + slash_count = symbol.count("/") + if slash_count > 1: + raise ChordParseError(f"multiple '/' in chord symbol: {symbol!r}") + + if slash_count == 1: + chord_part, bass_str = symbol.split("/") + bass = _parse_bass(bass_str) + else: + chord_part = symbol + bass = "root" + + if not chord_part: + raise ChordParseError(f"missing chord before '/': {symbol!r}") + + root, rest = _extract_root(chord_part) + quality, extension = _parse_quality_ext(rest) + + return ChordTokens(root=root, quality=quality, extension=extension, bass=bass) diff --git a/tests/test_chord_parser.py b/tests/test_chord_parser.py new file mode 100644 index 0000000..e24525b --- /dev/null +++ b/tests/test_chord_parser.py @@ -0,0 +1,452 @@ +"""Tests for src/chord_parser.py. + +Coverage: + - All 18 canonical qualities with root C + - At least 2 examples per extension (including shorthand-expanded forms) + - Slash chords with various bass notes (including sharp/flat basses) + - Both sharp and flat root spellings (flat → sharp normalization) + - All examples from the §4.6 parse table in chord_format_spec.md + - Invalid inputs → ChordParseError +""" + +import pytest + +from src.chord_parser import ChordParseError, ChordTokens, parse_chord_symbol + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def parse(symbol: str) -> ChordTokens: + return parse_chord_symbol(symbol) + + +# --------------------------------------------------------------------------- +# §4.6 — full parse table examples (spec-mandated) +# --------------------------------------------------------------------------- + + +class TestSpecExamples: + def test_C(self): + t = parse("C") + assert t == ChordTokens("C", "maj", "none", "root") + + def test_Am(self): + t = parse("Am") + assert t == ChordTokens("A", "m", "none", "root") + + def test_Fsharpm7(self): + t = parse("F#m7") + assert t == ChordTokens("F#", "m7", "none", "root") + + def test_Cmaj9_shorthand(self): + # Shorthand: maj9 → quality=maj7, extension=9 + t = parse("Cmaj9") + assert t == ChordTokens("C", "maj7", "9", "root") + + def test_G7sus4(self): + t = parse("G7sus4") + assert t == ChordTokens("G", "7sus4", "none", "root") + + def test_F_slash_G(self): + t = parse("F/G") + assert t == ChordTokens("F", "maj", "none", "G") + + def test_Bb7b9_slash_D(self): + # Bb normalises to A# + t = parse("Bb7b9/D") + assert t == ChordTokens("A#", "7", "b9", "D") + + def test_Em7b5(self): + t = parse("Em7b5") + assert t == ChordTokens("E", "m7b5", "none", "root") + + def test_Dsharpdim7(self): + t = parse("D#dim7") + assert t == ChordTokens("D#", "dim7", "none", "root") + + +# --------------------------------------------------------------------------- +# All 18 qualities with root C (primary spellings) +# --------------------------------------------------------------------------- + + +class TestAllQualities: + """One test per canonical quality, using the primary spelling.""" + + def test_maj(self): + assert parse("C") == ChordTokens("C", "maj", "none", "root") + + def test_m(self): + assert parse("Cm") == ChordTokens("C", "m", "none", "root") + + def test_dim(self): + assert parse("Cdim") == ChordTokens("C", "dim", "none", "root") + + def test_aug(self): + assert parse("Caug") == ChordTokens("C", "aug", "none", "root") + + def test_sus2(self): + assert parse("Csus2") == ChordTokens("C", "sus2", "none", "root") + + def test_sus4(self): + assert parse("Csus4") == ChordTokens("C", "sus4", "none", "root") + + def test_maj7(self): + assert parse("Cmaj7") == ChordTokens("C", "maj7", "none", "root") + + def test_m7(self): + assert parse("Cm7") == ChordTokens("C", "m7", "none", "root") + + def test_7(self): + assert parse("C7") == ChordTokens("C", "7", "none", "root") + + def test_m7b5(self): + assert parse("Cm7b5") == ChordTokens("C", "m7b5", "none", "root") + + def test_dim7(self): + assert parse("Cdim7") == ChordTokens("C", "dim7", "none", "root") + + def test_mM7(self): + assert parse("CmM7") == ChordTokens("C", "mM7", "none", "root") + + def test_7sus4(self): + assert parse("C7sus4") == ChordTokens("C", "7sus4", "none", "root") + + def test_aug7(self): + assert parse("Caug7") == ChordTokens("C", "aug7", "none", "root") + + def test_6(self): + assert parse("C6") == ChordTokens("C", "6", "none", "root") + + def test_m6(self): + assert parse("Cm6") == ChordTokens("C", "m6", "none", "root") + + def test_add9(self): + assert parse("Cadd9") == ChordTokens("C", "add9", "none", "root") + + def test_m_add9(self): + assert parse("Cm(add9)") == ChordTokens("C", "m(add9)", "none", "root") + + +# --------------------------------------------------------------------------- +# Alternative quality spellings +# --------------------------------------------------------------------------- + + +class TestQualityAlternatives: + # minor: m / min / - + def test_min_spelling(self): + assert parse("Cmin").quality == "m" + + def test_dash_spelling(self): + assert parse("C-").quality == "m" + + # dim: ° + def test_degree_dim(self): + assert parse("C°").quality == "dim" + + # aug: + + def test_plus_aug(self): + assert parse("C+").quality == "aug" + + # sus: sus alone → sus4 + def test_sus_alone(self): + assert parse("Csus").quality == "sus4" + + # maj7 alternatives: M7, Δ7, Δ + def test_M7(self): + assert parse("CM7").quality == "maj7" + + def test_delta7(self): + assert parse("CΔ7").quality == "maj7" + + def test_delta(self): + assert parse("CΔ").quality == "maj7" + + # maj6 → quality=6 + def test_maj6(self): + assert parse("Cmaj6").quality == "6" + + # m7 alternatives: min7, -7 + def test_min7(self): + assert parse("Cmin7").quality == "m7" + + def test_dash7(self): + assert parse("C-7").quality == "m7" + + # m7b5 alternatives: ø, ø7, min7b5 + def test_half_dim_ø(self): + assert parse("Cø").quality == "m7b5" + + def test_half_dim_ø7(self): + assert parse("Cø7").quality == "m7b5" + + def test_min7b5(self): + assert parse("Cmin7b5").quality == "m7b5" + + # dim7: °7 + def test_degree7(self): + assert parse("C°7").quality == "dim7" + + # mM7 alternatives: m(maj7), minMaj7 + def test_m_maj7_parens(self): + assert parse("Cm(maj7)").quality == "mM7" + + def test_minMaj7(self): + assert parse("CminMaj7").quality == "mM7" + + # aug7 alternatives: +7, 7#5 + def test_plus7(self): + assert parse("C+7").quality == "aug7" + + def test_7sharp5(self): + assert parse("C7#5").quality == "aug7" + + # 7sus4 alternative: 7sus + def test_7sus(self): + assert parse("C7sus").quality == "7sus4" + + # m6 alternative: min6 + def test_min6(self): + assert parse("Cmin6").quality == "m6" + + # add9 alternative: 2 + def test_2_for_add9(self): + assert parse("C2").quality == "add9" + + # m(add9) alternatives: madd9, m(add2) + def test_madd9(self): + assert parse("Cmadd9").quality == "m(add9)" + + def test_m_add2(self): + assert parse("Cm(add2)").quality == "m(add9)" + + +# --------------------------------------------------------------------------- +# Extensions — at least 2 examples each, including shorthands +# --------------------------------------------------------------------------- + + +class TestExtensions: + # extension=9 + def test_ext_9_via_shorthand_dominant(self): + # C9 → quality=7 (dominant 7th implied), extension=9 + t = parse("C9") + assert t.quality == "7" + assert t.extension == "9" + + def test_ext_9_via_maj_shorthand(self): + # Fmaj9 → quality=maj7, extension=9 + t = parse("Fmaj9") + assert t.quality == "maj7" + assert t.extension == "9" + + def test_ext_9_explicit(self): + # G7 + explicit 9 + t = parse("G7") + assert t.extension == "none" + # G9 = dominant 9th + t2 = parse("G9") + assert t2.quality == "7" + assert t2.extension == "9" + + def test_ext_9_minor_shorthand(self): + # Cm9 → quality=m7, extension=9 + t = parse("Cm9") + assert t.quality == "m7" + assert t.extension == "9" + + # extension=b9 + def test_ext_b9_dominant(self): + t = parse("G7b9") + assert t == ChordTokens("G", "7", "b9", "root") + + def test_ext_b9_minor7(self): + t = parse("Cm7b9") + assert t == ChordTokens("C", "m7", "b9", "root") + + # extension=#9 + def test_ext_sharp9_dominant(self): + t = parse("C7#9") + assert t == ChordTokens("C", "7", "#9", "root") + + def test_ext_sharp9_aug7(self): + t = parse("Gaug7#9") + assert t == ChordTokens("G", "aug7", "#9", "root") + + # extension=11 + def test_ext_11_dominant_shorthand(self): + # C11 → quality=7, extension=11 + t = parse("C11") + assert t.quality == "7" + assert t.extension == "11" + + def test_ext_11_minor_shorthand(self): + # Cm11 → quality=m7, extension=11 + t = parse("Cm11") + assert t.quality == "m7" + assert t.extension == "11" + + # extension=#11 + def test_ext_sharp11_maj7(self): + t = parse("Cmaj7#11") + assert t == ChordTokens("C", "maj7", "#11", "root") + + def test_ext_sharp11_dominant(self): + t = parse("G7#11") + assert t == ChordTokens("G", "7", "#11", "root") + + # extension=13 + def test_ext_13_dominant_shorthand(self): + # C13 → quality=7, extension=13 + t = parse("C13") + assert t.quality == "7" + assert t.extension == "13" + + def test_ext_13_maj_shorthand(self): + # Fmaj13 → quality=maj7, extension=13 + t = parse("Fmaj13") + assert t.quality == "maj7" + assert t.extension == "13" + + def test_ext_13_minor_shorthand(self): + # Cm13 → quality=m7, extension=13 + t = parse("Cm13") + assert t.quality == "m7" + assert t.extension == "13" + + # extension=b13 + def test_ext_b13_dominant(self): + t = parse("C7b13") + assert t == ChordTokens("C", "7", "b13", "root") + + def test_ext_b13_minor7(self): + t = parse("Gm7b13") + assert t == ChordTokens("G", "m7", "b13", "root") + + +# --------------------------------------------------------------------------- +# Slash chords (§4.5) +# --------------------------------------------------------------------------- + + +class TestSlashChords: + def test_F_slash_A(self): + t = parse("F/A") + assert t == ChordTokens("F", "maj", "none", "A") + + def test_G_slash_B(self): + t = parse("G/B") + assert t == ChordTokens("G", "maj", "none", "B") + + def test_F_slash_G_on_chord(self): + t = parse("F/G") + assert t == ChordTokens("F", "maj", "none", "G") + + def test_Em7_slash_G(self): + t = parse("Em7/G") + assert t == ChordTokens("E", "m7", "none", "G") + + def test_D_slash_Fsharp(self): + t = parse("D/F#") + assert t == ChordTokens("D", "maj", "none", "F#") + + def test_Dm9_slash_F(self): + # Dm9 is a shorthand: quality=m7, extension=9 + t = parse("Dm9/F") + assert t == ChordTokens("D", "m7", "9", "F") + + def test_slash_bass_flat_normalised(self): + # Bass Ab → G# + t = parse("C/Ab") + assert t.bass == "G#" + + def test_slash_bass_sharp(self): + t = parse("Cmaj7/E") + assert t == ChordTokens("C", "maj7", "none", "E") + + def test_cmaj7_slash_Bflat(self): + # Bass Bb → A# + t = parse("Cmaj7/Bb") + assert t.bass == "A#" + + +# --------------------------------------------------------------------------- +# Root spellings — sharp and flat +# --------------------------------------------------------------------------- + + +class TestRootNormalization: + def test_sharp_roots(self): + for root in ("C#", "D#", "F#", "G#", "A#"): + t = parse(f"{root}m7") + assert t.root == root + + def test_flat_to_sharp_normalization(self): + cases = [ + ("Db", "C#"), + ("Eb", "D#"), + ("Gb", "F#"), + ("Ab", "G#"), + ("Bb", "A#"), + ("Cb", "B"), + ("Fb", "E"), + ] + for flat, sharp in cases: + t = parse(f"{flat}maj7") + assert t.root == sharp, f"{flat} should normalise to {sharp}" + + def test_natural_roots(self): + for root in ("C", "D", "E", "F", "G", "A", "B"): + t = parse(f"{root}") + assert t.root == root + + +# --------------------------------------------------------------------------- +# Invalid inputs → ChordParseError +# --------------------------------------------------------------------------- + + +class TestInvalidInputs: + def test_empty_string(self): + with pytest.raises(ChordParseError): + parse("") + + def test_whitespace_only(self): + with pytest.raises(ChordParseError): + parse(" ") + + def test_unknown_root(self): + with pytest.raises(ChordParseError): + parse("Xyz") + + def test_trailing_slash_no_bass(self): + with pytest.raises(ChordParseError): + parse("C7/") + + def test_invalid_bass_note(self): + with pytest.raises(ChordParseError): + parse("C/Z") + + def test_invalid_bass_number(self): + with pytest.raises(ChordParseError): + parse("C/4") + + def test_multiple_slashes(self): + with pytest.raises(ChordParseError): + parse("C/E/G") + + def test_unknown_quality(self): + with pytest.raises(ChordParseError): + parse("Cxyz") + + def test_slash_with_no_chord(self): + with pytest.raises(ChordParseError): + parse("/G") + + def test_lowercase_root(self): + with pytest.raises(ChordParseError): + parse("cmaj7")