feat: implement chord symbol parser with full test suite

Adds src/chord_parser.py with parse_chord_symbol() → ChordTokens.
Handles all 18 qualities (including Unicode °/Δ/ø variants and
alternative spellings), shorthand expansion (maj9 → maj7+ext9,
C9 → 7+ext9, etc.), slash chords, and flat→sharp root normalization.
Raises ChordParseError with a descriptive message on bad input.

Adds tests/test_chord_parser.py: 90 tests covering all qualities,
all 7 extension values (including shorthands), slash chords, root
normalization, all §4.6 spec examples, and 10 invalid-input cases.

Adds requirements.txt with project dependencies.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-19 11:17:10 +03:00
parent 75fa07bf6c
commit dd77de00d0
3 changed files with 729 additions and 0 deletions
+7
View File
@@ -0,0 +1,7 @@
torch
music21
pretty_midi
pytest
matplotlib
numpy
pandas
+270
View File
@@ -0,0 +1,270 @@
"""Chord symbol parser for .chord files.
Parses a chord symbol string (e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D') into a
ChordTokens dataclass. See docs/chord_format_spec.md §4 for the full spec.
Usage:
from src.chord_parser import parse_chord_symbol, ChordParseError
tokens = parse_chord_symbol("Fmaj9")
# ChordTokens(root='F', quality='maj7', extension='9', bass='root')
"""
from __future__ import annotations
from dataclasses import dataclass
class ChordParseError(ValueError):
"""Raised when a chord symbol cannot be parsed."""
@dataclass(frozen=True)
class ChordTokens:
"""Factorized chord representation — one string per token slot."""
root: str # one of the 12 chromatic pitch classes, e.g. 'C', 'F#', 'A#'
quality: str # one of the 18 canonical quality names, e.g. 'maj7', 'm', 'dim7'
extension: str # one of 8 values: 'none' or e.g. '9', 'b9', '#11'
bass: str # 'root' or one of the 12 pitch classes
# ---------------------------------------------------------------------------
# Lookup tables
# ---------------------------------------------------------------------------
_VALID_ROOTS: frozenset[str] = frozenset(
{"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"}
)
_FLAT_TO_SHARP: dict[str, str] = {
"Cb": "B",
"Db": "C#",
"Eb": "D#",
"Fb": "E",
"Gb": "F#",
"Ab": "G#",
"Bb": "A#",
}
# Maps the quality+extension string (after the root, before any '/') to
# (canonical_quality, canonical_extension). Covers:
# - standalone qualities with all their alternative spellings
# - shorthand expansions where a 7th is implied (e.g. 'maj9' → maj7 + 9)
# - Unicode symbols (°, Δ, ø)
_QUAL_EXT_MAP: dict[str, tuple[str, str]] = {
# empty root suffix → plain major
"": ("maj", "none"),
# --- major ---
"maj": ("maj", "none"),
"maj7": ("maj7", "none"),
"M7": ("maj7", "none"),
"Δ7": ("maj7", "none"),
"Δ": ("maj7", "none"),
"maj6": ("6", "none"),
# major shorthands (dominant/major 7th implied by the extension numeral)
"maj9": ("maj7", "9"),
"maj11": ("maj7", "11"),
"maj13": ("maj7", "13"),
# --- minor ---
"m": ("m", "none"),
"min": ("m", "none"),
"-": ("m", "none"),
"m7": ("m7", "none"),
"min7": ("m7", "none"),
"-7": ("m7", "none"),
# minor shorthands (m7 implied)
"m9": ("m7", "9"),
"min9": ("m7", "9"),
"m11": ("m7", "11"),
"min11": ("m7", "11"),
"m13": ("m7", "13"),
"min13": ("m7", "13"),
# minor sixth
"m6": ("m6", "none"),
"min6": ("m6", "none"),
# half-diminished
"m7b5": ("m7b5", "none"),
"min7b5": ("m7b5", "none"),
"m7♭5": ("m7b5", "none"),
"ø": ("m7b5", "none"),
"ø7": ("m7b5", "none"),
# minor-major seventh
"mM7": ("mM7", "none"),
"m(maj7)": ("mM7", "none"),
"minMaj7": ("mM7", "none"),
# minor add9
"madd9": ("m(add9)", "none"),
"m(add9)": ("m(add9)", "none"),
"m(add2)": ("m(add9)", "none"),
# --- dominant ---
"7": ("7", "none"),
# dominant shorthands (dominant 7th implied)
"9": ("7", "9"),
"11": ("7", "11"),
"13": ("7", "13"),
# --- diminished ---
"dim": ("dim", "none"),
"°": ("dim", "none"),
"dim7": ("dim7", "none"),
"°7": ("dim7", "none"),
# --- augmented ---
"aug": ("aug", "none"),
"+": ("aug", "none"),
"aug7": ("aug7", "none"),
"+7": ("aug7", "none"),
"7#5": ("aug7", "none"),
# --- suspended ---
"sus2": ("sus2", "none"),
"sus4": ("sus4", "none"),
"sus": ("sus4", "none"),
"7sus4": ("7sus4", "none"),
"7sus": ("7sus4", "none"),
# --- sixth / add ---
"6": ("6", "none"),
"add9": ("add9", "none"),
"2": ("add9", "none"),
}
# Maps quality-only strings (no extension) to canonical quality names.
# Used when an explicit extension suffix has been stripped from the end.
_QUAL_ONLY_MAP: dict[str, str] = {
"": "maj",
"maj": "maj",
"maj7": "maj7",
"M7": "maj7",
"Δ7": "maj7",
"Δ": "maj7",
"maj6": "6",
"m": "m",
"min": "m",
"-": "m",
"m7": "m7",
"min7": "m7",
"-7": "m7",
"m6": "m6",
"min6": "m6",
"m7b5": "m7b5",
"ø": "m7b5",
"ø7": "m7b5",
"mM7": "mM7",
"7": "7",
"dim": "dim",
"°": "dim",
"dim7": "dim7",
"°7": "dim7",
"aug": "aug",
"+": "aug",
"aug7": "aug7",
"+7": "aug7",
"7#5": "aug7",
"sus2": "sus2",
"sus4": "sus4",
"sus": "sus4",
"7sus4": "7sus4",
"7sus": "7sus4",
"6": "6",
"add9": "add9",
}
# Extension suffixes tried longest-first to avoid greedy ambiguity.
# (e.g. 'b13' must be tried before '13' so 'maj7b13' is not split at '13')
_EXT_SUFFIXES: tuple[str, ...] = ("b13", "#11", "b9", "#9", "13", "11", "9")
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _normalize_note(raw: str) -> str:
note = _FLAT_TO_SHARP.get(raw, raw)
if note not in _VALID_ROOTS:
raise ChordParseError(f"invalid note: {raw!r}")
return note
def _extract_root(s: str) -> tuple[str, str]:
"""Return (normalized_root, remaining_suffix)."""
if not s or s[0] not in "CDEFGAB":
raise ChordParseError(
f"chord symbol must start with a note letter AG: {s!r}"
)
if len(s) >= 2 and s[1] in "#b":
raw_root, rest = s[:2], s[2:]
else:
raw_root, rest = s[0], s[1:]
return _normalize_note(raw_root), rest
def _parse_bass(s: str) -> str:
s = s.strip()
if not s:
raise ChordParseError("empty bass note after '/'")
if s[0] not in "CDEFGAB":
raise ChordParseError(f"invalid bass note: {s!r}")
if len(s) >= 2 and s[1] in "#b":
raw, tail = s[:2], s[2:]
else:
raw, tail = s[0], s[1:]
if tail:
raise ChordParseError(f"invalid bass note (trailing characters): {s!r}")
return _normalize_note(raw)
def _parse_quality_ext(s: str) -> tuple[str, str]:
"""Return (canonical_quality, canonical_extension) for the suffix string."""
# Direct lookup: handles standalone qualities, Unicode variants, shorthands.
if s in _QUAL_EXT_MAP:
return _QUAL_EXT_MAP[s]
# Try stripping a known extension suffix from the right.
for ext in _EXT_SUFFIXES:
if s.endswith(ext):
qual_s = s[: -len(ext)]
if qual_s in _QUAL_ONLY_MAP:
return _QUAL_ONLY_MAP[qual_s], ext
raise ChordParseError(f"unrecognized quality/extension: {s!r}")
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def parse_chord_symbol(symbol: str) -> ChordTokens:
"""Parse a chord symbol string into factorized token slots.
Args:
symbol: Chord symbol, e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D'.
Returns:
ChordTokens(root, quality, extension, bass) with all values drawn
from the vocabularies in docs/chord_format_spec.md §4.24.5.
Raises:
ChordParseError: If the symbol is empty, unrecognized, or malformed.
"""
if not symbol or not symbol.strip():
raise ChordParseError("empty chord symbol")
symbol = symbol.strip()
slash_count = symbol.count("/")
if slash_count > 1:
raise ChordParseError(f"multiple '/' in chord symbol: {symbol!r}")
if slash_count == 1:
chord_part, bass_str = symbol.split("/")
bass = _parse_bass(bass_str)
else:
chord_part = symbol
bass = "root"
if not chord_part:
raise ChordParseError(f"missing chord before '/': {symbol!r}")
root, rest = _extract_root(chord_part)
quality, extension = _parse_quality_ext(rest)
return ChordTokens(root=root, quality=quality, extension=extension, bass=bass)
+452
View File
@@ -0,0 +1,452 @@
"""Tests for src/chord_parser.py.
Coverage:
- All 18 canonical qualities with root C
- At least 2 examples per extension (including shorthand-expanded forms)
- Slash chords with various bass notes (including sharp/flat basses)
- Both sharp and flat root spellings (flat → sharp normalization)
- All examples from the §4.6 parse table in chord_format_spec.md
- Invalid inputs → ChordParseError
"""
import pytest
from src.chord_parser import ChordParseError, ChordTokens, parse_chord_symbol
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def parse(symbol: str) -> ChordTokens:
return parse_chord_symbol(symbol)
# ---------------------------------------------------------------------------
# §4.6 — full parse table examples (spec-mandated)
# ---------------------------------------------------------------------------
class TestSpecExamples:
def test_C(self):
t = parse("C")
assert t == ChordTokens("C", "maj", "none", "root")
def test_Am(self):
t = parse("Am")
assert t == ChordTokens("A", "m", "none", "root")
def test_Fsharpm7(self):
t = parse("F#m7")
assert t == ChordTokens("F#", "m7", "none", "root")
def test_Cmaj9_shorthand(self):
# Shorthand: maj9 → quality=maj7, extension=9
t = parse("Cmaj9")
assert t == ChordTokens("C", "maj7", "9", "root")
def test_G7sus4(self):
t = parse("G7sus4")
assert t == ChordTokens("G", "7sus4", "none", "root")
def test_F_slash_G(self):
t = parse("F/G")
assert t == ChordTokens("F", "maj", "none", "G")
def test_Bb7b9_slash_D(self):
# Bb normalises to A#
t = parse("Bb7b9/D")
assert t == ChordTokens("A#", "7", "b9", "D")
def test_Em7b5(self):
t = parse("Em7b5")
assert t == ChordTokens("E", "m7b5", "none", "root")
def test_Dsharpdim7(self):
t = parse("D#dim7")
assert t == ChordTokens("D#", "dim7", "none", "root")
# ---------------------------------------------------------------------------
# All 18 qualities with root C (primary spellings)
# ---------------------------------------------------------------------------
class TestAllQualities:
"""One test per canonical quality, using the primary spelling."""
def test_maj(self):
assert parse("C") == ChordTokens("C", "maj", "none", "root")
def test_m(self):
assert parse("Cm") == ChordTokens("C", "m", "none", "root")
def test_dim(self):
assert parse("Cdim") == ChordTokens("C", "dim", "none", "root")
def test_aug(self):
assert parse("Caug") == ChordTokens("C", "aug", "none", "root")
def test_sus2(self):
assert parse("Csus2") == ChordTokens("C", "sus2", "none", "root")
def test_sus4(self):
assert parse("Csus4") == ChordTokens("C", "sus4", "none", "root")
def test_maj7(self):
assert parse("Cmaj7") == ChordTokens("C", "maj7", "none", "root")
def test_m7(self):
assert parse("Cm7") == ChordTokens("C", "m7", "none", "root")
def test_7(self):
assert parse("C7") == ChordTokens("C", "7", "none", "root")
def test_m7b5(self):
assert parse("Cm7b5") == ChordTokens("C", "m7b5", "none", "root")
def test_dim7(self):
assert parse("Cdim7") == ChordTokens("C", "dim7", "none", "root")
def test_mM7(self):
assert parse("CmM7") == ChordTokens("C", "mM7", "none", "root")
def test_7sus4(self):
assert parse("C7sus4") == ChordTokens("C", "7sus4", "none", "root")
def test_aug7(self):
assert parse("Caug7") == ChordTokens("C", "aug7", "none", "root")
def test_6(self):
assert parse("C6") == ChordTokens("C", "6", "none", "root")
def test_m6(self):
assert parse("Cm6") == ChordTokens("C", "m6", "none", "root")
def test_add9(self):
assert parse("Cadd9") == ChordTokens("C", "add9", "none", "root")
def test_m_add9(self):
assert parse("Cm(add9)") == ChordTokens("C", "m(add9)", "none", "root")
# ---------------------------------------------------------------------------
# Alternative quality spellings
# ---------------------------------------------------------------------------
class TestQualityAlternatives:
# minor: m / min / -
def test_min_spelling(self):
assert parse("Cmin").quality == "m"
def test_dash_spelling(self):
assert parse("C-").quality == "m"
# dim: °
def test_degree_dim(self):
assert parse("").quality == "dim"
# aug: +
def test_plus_aug(self):
assert parse("C+").quality == "aug"
# sus: sus alone → sus4
def test_sus_alone(self):
assert parse("Csus").quality == "sus4"
# maj7 alternatives: M7, Δ7, Δ
def test_M7(self):
assert parse("CM7").quality == "maj7"
def test_delta7(self):
assert parse("CΔ7").quality == "maj7"
def test_delta(self):
assert parse("").quality == "maj7"
# maj6 → quality=6
def test_maj6(self):
assert parse("Cmaj6").quality == "6"
# m7 alternatives: min7, -7
def test_min7(self):
assert parse("Cmin7").quality == "m7"
def test_dash7(self):
assert parse("C-7").quality == "m7"
# m7b5 alternatives: ø, ø7, min7b5
def test_half_dim_ø(self):
assert parse("").quality == "m7b5"
def test_half_dim_ø7(self):
assert parse("Cø7").quality == "m7b5"
def test_min7b5(self):
assert parse("Cmin7b5").quality == "m7b5"
# dim7: °7
def test_degree7(self):
assert parse("C°7").quality == "dim7"
# mM7 alternatives: m(maj7), minMaj7
def test_m_maj7_parens(self):
assert parse("Cm(maj7)").quality == "mM7"
def test_minMaj7(self):
assert parse("CminMaj7").quality == "mM7"
# aug7 alternatives: +7, 7#5
def test_plus7(self):
assert parse("C+7").quality == "aug7"
def test_7sharp5(self):
assert parse("C7#5").quality == "aug7"
# 7sus4 alternative: 7sus
def test_7sus(self):
assert parse("C7sus").quality == "7sus4"
# m6 alternative: min6
def test_min6(self):
assert parse("Cmin6").quality == "m6"
# add9 alternative: 2
def test_2_for_add9(self):
assert parse("C2").quality == "add9"
# m(add9) alternatives: madd9, m(add2)
def test_madd9(self):
assert parse("Cmadd9").quality == "m(add9)"
def test_m_add2(self):
assert parse("Cm(add2)").quality == "m(add9)"
# ---------------------------------------------------------------------------
# Extensions — at least 2 examples each, including shorthands
# ---------------------------------------------------------------------------
class TestExtensions:
# extension=9
def test_ext_9_via_shorthand_dominant(self):
# C9 → quality=7 (dominant 7th implied), extension=9
t = parse("C9")
assert t.quality == "7"
assert t.extension == "9"
def test_ext_9_via_maj_shorthand(self):
# Fmaj9 → quality=maj7, extension=9
t = parse("Fmaj9")
assert t.quality == "maj7"
assert t.extension == "9"
def test_ext_9_explicit(self):
# G7 + explicit 9
t = parse("G7")
assert t.extension == "none"
# G9 = dominant 9th
t2 = parse("G9")
assert t2.quality == "7"
assert t2.extension == "9"
def test_ext_9_minor_shorthand(self):
# Cm9 → quality=m7, extension=9
t = parse("Cm9")
assert t.quality == "m7"
assert t.extension == "9"
# extension=b9
def test_ext_b9_dominant(self):
t = parse("G7b9")
assert t == ChordTokens("G", "7", "b9", "root")
def test_ext_b9_minor7(self):
t = parse("Cm7b9")
assert t == ChordTokens("C", "m7", "b9", "root")
# extension=#9
def test_ext_sharp9_dominant(self):
t = parse("C7#9")
assert t == ChordTokens("C", "7", "#9", "root")
def test_ext_sharp9_aug7(self):
t = parse("Gaug7#9")
assert t == ChordTokens("G", "aug7", "#9", "root")
# extension=11
def test_ext_11_dominant_shorthand(self):
# C11 → quality=7, extension=11
t = parse("C11")
assert t.quality == "7"
assert t.extension == "11"
def test_ext_11_minor_shorthand(self):
# Cm11 → quality=m7, extension=11
t = parse("Cm11")
assert t.quality == "m7"
assert t.extension == "11"
# extension=#11
def test_ext_sharp11_maj7(self):
t = parse("Cmaj7#11")
assert t == ChordTokens("C", "maj7", "#11", "root")
def test_ext_sharp11_dominant(self):
t = parse("G7#11")
assert t == ChordTokens("G", "7", "#11", "root")
# extension=13
def test_ext_13_dominant_shorthand(self):
# C13 → quality=7, extension=13
t = parse("C13")
assert t.quality == "7"
assert t.extension == "13"
def test_ext_13_maj_shorthand(self):
# Fmaj13 → quality=maj7, extension=13
t = parse("Fmaj13")
assert t.quality == "maj7"
assert t.extension == "13"
def test_ext_13_minor_shorthand(self):
# Cm13 → quality=m7, extension=13
t = parse("Cm13")
assert t.quality == "m7"
assert t.extension == "13"
# extension=b13
def test_ext_b13_dominant(self):
t = parse("C7b13")
assert t == ChordTokens("C", "7", "b13", "root")
def test_ext_b13_minor7(self):
t = parse("Gm7b13")
assert t == ChordTokens("G", "m7", "b13", "root")
# ---------------------------------------------------------------------------
# Slash chords (§4.5)
# ---------------------------------------------------------------------------
class TestSlashChords:
def test_F_slash_A(self):
t = parse("F/A")
assert t == ChordTokens("F", "maj", "none", "A")
def test_G_slash_B(self):
t = parse("G/B")
assert t == ChordTokens("G", "maj", "none", "B")
def test_F_slash_G_on_chord(self):
t = parse("F/G")
assert t == ChordTokens("F", "maj", "none", "G")
def test_Em7_slash_G(self):
t = parse("Em7/G")
assert t == ChordTokens("E", "m7", "none", "G")
def test_D_slash_Fsharp(self):
t = parse("D/F#")
assert t == ChordTokens("D", "maj", "none", "F#")
def test_Dm9_slash_F(self):
# Dm9 is a shorthand: quality=m7, extension=9
t = parse("Dm9/F")
assert t == ChordTokens("D", "m7", "9", "F")
def test_slash_bass_flat_normalised(self):
# Bass Ab → G#
t = parse("C/Ab")
assert t.bass == "G#"
def test_slash_bass_sharp(self):
t = parse("Cmaj7/E")
assert t == ChordTokens("C", "maj7", "none", "E")
def test_cmaj7_slash_Bflat(self):
# Bass Bb → A#
t = parse("Cmaj7/Bb")
assert t.bass == "A#"
# ---------------------------------------------------------------------------
# Root spellings — sharp and flat
# ---------------------------------------------------------------------------
class TestRootNormalization:
def test_sharp_roots(self):
for root in ("C#", "D#", "F#", "G#", "A#"):
t = parse(f"{root}m7")
assert t.root == root
def test_flat_to_sharp_normalization(self):
cases = [
("Db", "C#"),
("Eb", "D#"),
("Gb", "F#"),
("Ab", "G#"),
("Bb", "A#"),
("Cb", "B"),
("Fb", "E"),
]
for flat, sharp in cases:
t = parse(f"{flat}maj7")
assert t.root == sharp, f"{flat} should normalise to {sharp}"
def test_natural_roots(self):
for root in ("C", "D", "E", "F", "G", "A", "B"):
t = parse(f"{root}")
assert t.root == root
# ---------------------------------------------------------------------------
# Invalid inputs → ChordParseError
# ---------------------------------------------------------------------------
class TestInvalidInputs:
def test_empty_string(self):
with pytest.raises(ChordParseError):
parse("")
def test_whitespace_only(self):
with pytest.raises(ChordParseError):
parse(" ")
def test_unknown_root(self):
with pytest.raises(ChordParseError):
parse("Xyz")
def test_trailing_slash_no_bass(self):
with pytest.raises(ChordParseError):
parse("C7/")
def test_invalid_bass_note(self):
with pytest.raises(ChordParseError):
parse("C/Z")
def test_invalid_bass_number(self):
with pytest.raises(ChordParseError):
parse("C/4")
def test_multiple_slashes(self):
with pytest.raises(ChordParseError):
parse("C/E/G")
def test_unknown_quality(self):
with pytest.raises(ChordParseError):
parse("Cxyz")
def test_slash_with_no_chord(self):
with pytest.raises(ChordParseError):
parse("/G")
def test_lowercase_root(self):
with pytest.raises(ChordParseError):
parse("cmaj7")