dd77de00d0
Adds src/chord_parser.py with parse_chord_symbol() → ChordTokens. Handles all 18 qualities (including Unicode °/Δ/ø variants and alternative spellings), shorthand expansion (maj9 → maj7+ext9, C9 → 7+ext9, etc.), slash chords, and flat→sharp root normalization. Raises ChordParseError with a descriptive message on bad input. Adds tests/test_chord_parser.py: 90 tests covering all qualities, all 7 extension values (including shorthands), slash chords, root normalization, all §4.6 spec examples, and 10 invalid-input cases. Adds requirements.txt with project dependencies. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
271 lines
7.8 KiB
Python
271 lines
7.8 KiB
Python
"""Chord symbol parser for .chord files.
|
||
|
||
Parses a chord symbol string (e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D') into a
|
||
ChordTokens dataclass. See docs/chord_format_spec.md §4 for the full spec.
|
||
|
||
Usage:
|
||
from src.chord_parser import parse_chord_symbol, ChordParseError
|
||
tokens = parse_chord_symbol("Fmaj9")
|
||
# ChordTokens(root='F', quality='maj7', extension='9', bass='root')
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass
|
||
|
||
|
||
class ChordParseError(ValueError):
|
||
"""Raised when a chord symbol cannot be parsed."""
|
||
|
||
|
||
@dataclass(frozen=True)
|
||
class ChordTokens:
|
||
"""Factorized chord representation — one string per token slot."""
|
||
|
||
root: str # one of the 12 chromatic pitch classes, e.g. 'C', 'F#', 'A#'
|
||
quality: str # one of the 18 canonical quality names, e.g. 'maj7', 'm', 'dim7'
|
||
extension: str # one of 8 values: 'none' or e.g. '9', 'b9', '#11'
|
||
bass: str # 'root' or one of the 12 pitch classes
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Lookup tables
|
||
# ---------------------------------------------------------------------------
|
||
|
||
_VALID_ROOTS: frozenset[str] = frozenset(
|
||
{"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"}
|
||
)
|
||
|
||
_FLAT_TO_SHARP: dict[str, str] = {
|
||
"Cb": "B",
|
||
"Db": "C#",
|
||
"Eb": "D#",
|
||
"Fb": "E",
|
||
"Gb": "F#",
|
||
"Ab": "G#",
|
||
"Bb": "A#",
|
||
}
|
||
|
||
# Maps the quality+extension string (after the root, before any '/') to
|
||
# (canonical_quality, canonical_extension). Covers:
|
||
# - standalone qualities with all their alternative spellings
|
||
# - shorthand expansions where a 7th is implied (e.g. 'maj9' → maj7 + 9)
|
||
# - Unicode symbols (°, Δ, ø)
|
||
_QUAL_EXT_MAP: dict[str, tuple[str, str]] = {
|
||
# empty root suffix → plain major
|
||
"": ("maj", "none"),
|
||
# --- major ---
|
||
"maj": ("maj", "none"),
|
||
"maj7": ("maj7", "none"),
|
||
"M7": ("maj7", "none"),
|
||
"Δ7": ("maj7", "none"),
|
||
"Δ": ("maj7", "none"),
|
||
"maj6": ("6", "none"),
|
||
# major shorthands (dominant/major 7th implied by the extension numeral)
|
||
"maj9": ("maj7", "9"),
|
||
"maj11": ("maj7", "11"),
|
||
"maj13": ("maj7", "13"),
|
||
# --- minor ---
|
||
"m": ("m", "none"),
|
||
"min": ("m", "none"),
|
||
"-": ("m", "none"),
|
||
"m7": ("m7", "none"),
|
||
"min7": ("m7", "none"),
|
||
"-7": ("m7", "none"),
|
||
# minor shorthands (m7 implied)
|
||
"m9": ("m7", "9"),
|
||
"min9": ("m7", "9"),
|
||
"m11": ("m7", "11"),
|
||
"min11": ("m7", "11"),
|
||
"m13": ("m7", "13"),
|
||
"min13": ("m7", "13"),
|
||
# minor sixth
|
||
"m6": ("m6", "none"),
|
||
"min6": ("m6", "none"),
|
||
# half-diminished
|
||
"m7b5": ("m7b5", "none"),
|
||
"min7b5": ("m7b5", "none"),
|
||
"m7♭5": ("m7b5", "none"),
|
||
"ø": ("m7b5", "none"),
|
||
"ø7": ("m7b5", "none"),
|
||
# minor-major seventh
|
||
"mM7": ("mM7", "none"),
|
||
"m(maj7)": ("mM7", "none"),
|
||
"minMaj7": ("mM7", "none"),
|
||
# minor add9
|
||
"madd9": ("m(add9)", "none"),
|
||
"m(add9)": ("m(add9)", "none"),
|
||
"m(add2)": ("m(add9)", "none"),
|
||
# --- dominant ---
|
||
"7": ("7", "none"),
|
||
# dominant shorthands (dominant 7th implied)
|
||
"9": ("7", "9"),
|
||
"11": ("7", "11"),
|
||
"13": ("7", "13"),
|
||
# --- diminished ---
|
||
"dim": ("dim", "none"),
|
||
"°": ("dim", "none"),
|
||
"dim7": ("dim7", "none"),
|
||
"°7": ("dim7", "none"),
|
||
# --- augmented ---
|
||
"aug": ("aug", "none"),
|
||
"+": ("aug", "none"),
|
||
"aug7": ("aug7", "none"),
|
||
"+7": ("aug7", "none"),
|
||
"7#5": ("aug7", "none"),
|
||
# --- suspended ---
|
||
"sus2": ("sus2", "none"),
|
||
"sus4": ("sus4", "none"),
|
||
"sus": ("sus4", "none"),
|
||
"7sus4": ("7sus4", "none"),
|
||
"7sus": ("7sus4", "none"),
|
||
# --- sixth / add ---
|
||
"6": ("6", "none"),
|
||
"add9": ("add9", "none"),
|
||
"2": ("add9", "none"),
|
||
}
|
||
|
||
# Maps quality-only strings (no extension) to canonical quality names.
|
||
# Used when an explicit extension suffix has been stripped from the end.
|
||
_QUAL_ONLY_MAP: dict[str, str] = {
|
||
"": "maj",
|
||
"maj": "maj",
|
||
"maj7": "maj7",
|
||
"M7": "maj7",
|
||
"Δ7": "maj7",
|
||
"Δ": "maj7",
|
||
"maj6": "6",
|
||
"m": "m",
|
||
"min": "m",
|
||
"-": "m",
|
||
"m7": "m7",
|
||
"min7": "m7",
|
||
"-7": "m7",
|
||
"m6": "m6",
|
||
"min6": "m6",
|
||
"m7b5": "m7b5",
|
||
"ø": "m7b5",
|
||
"ø7": "m7b5",
|
||
"mM7": "mM7",
|
||
"7": "7",
|
||
"dim": "dim",
|
||
"°": "dim",
|
||
"dim7": "dim7",
|
||
"°7": "dim7",
|
||
"aug": "aug",
|
||
"+": "aug",
|
||
"aug7": "aug7",
|
||
"+7": "aug7",
|
||
"7#5": "aug7",
|
||
"sus2": "sus2",
|
||
"sus4": "sus4",
|
||
"sus": "sus4",
|
||
"7sus4": "7sus4",
|
||
"7sus": "7sus4",
|
||
"6": "6",
|
||
"add9": "add9",
|
||
}
|
||
|
||
# Extension suffixes tried longest-first to avoid greedy ambiguity.
|
||
# (e.g. 'b13' must be tried before '13' so 'maj7b13' is not split at '13')
|
||
_EXT_SUFFIXES: tuple[str, ...] = ("b13", "#11", "b9", "#9", "13", "11", "9")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Internal helpers
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def _normalize_note(raw: str) -> str:
|
||
note = _FLAT_TO_SHARP.get(raw, raw)
|
||
if note not in _VALID_ROOTS:
|
||
raise ChordParseError(f"invalid note: {raw!r}")
|
||
return note
|
||
|
||
|
||
def _extract_root(s: str) -> tuple[str, str]:
|
||
"""Return (normalized_root, remaining_suffix)."""
|
||
if not s or s[0] not in "CDEFGAB":
|
||
raise ChordParseError(
|
||
f"chord symbol must start with a note letter A–G: {s!r}"
|
||
)
|
||
if len(s) >= 2 and s[1] in "#b":
|
||
raw_root, rest = s[:2], s[2:]
|
||
else:
|
||
raw_root, rest = s[0], s[1:]
|
||
return _normalize_note(raw_root), rest
|
||
|
||
|
||
def _parse_bass(s: str) -> str:
|
||
s = s.strip()
|
||
if not s:
|
||
raise ChordParseError("empty bass note after '/'")
|
||
if s[0] not in "CDEFGAB":
|
||
raise ChordParseError(f"invalid bass note: {s!r}")
|
||
if len(s) >= 2 and s[1] in "#b":
|
||
raw, tail = s[:2], s[2:]
|
||
else:
|
||
raw, tail = s[0], s[1:]
|
||
if tail:
|
||
raise ChordParseError(f"invalid bass note (trailing characters): {s!r}")
|
||
return _normalize_note(raw)
|
||
|
||
|
||
def _parse_quality_ext(s: str) -> tuple[str, str]:
|
||
"""Return (canonical_quality, canonical_extension) for the suffix string."""
|
||
# Direct lookup: handles standalone qualities, Unicode variants, shorthands.
|
||
if s in _QUAL_EXT_MAP:
|
||
return _QUAL_EXT_MAP[s]
|
||
|
||
# Try stripping a known extension suffix from the right.
|
||
for ext in _EXT_SUFFIXES:
|
||
if s.endswith(ext):
|
||
qual_s = s[: -len(ext)]
|
||
if qual_s in _QUAL_ONLY_MAP:
|
||
return _QUAL_ONLY_MAP[qual_s], ext
|
||
|
||
raise ChordParseError(f"unrecognized quality/extension: {s!r}")
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Public API
|
||
# ---------------------------------------------------------------------------
|
||
|
||
|
||
def parse_chord_symbol(symbol: str) -> ChordTokens:
|
||
"""Parse a chord symbol string into factorized token slots.
|
||
|
||
Args:
|
||
symbol: Chord symbol, e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D'.
|
||
|
||
Returns:
|
||
ChordTokens(root, quality, extension, bass) with all values drawn
|
||
from the vocabularies in docs/chord_format_spec.md §4.2–4.5.
|
||
|
||
Raises:
|
||
ChordParseError: If the symbol is empty, unrecognized, or malformed.
|
||
"""
|
||
if not symbol or not symbol.strip():
|
||
raise ChordParseError("empty chord symbol")
|
||
|
||
symbol = symbol.strip()
|
||
|
||
slash_count = symbol.count("/")
|
||
if slash_count > 1:
|
||
raise ChordParseError(f"multiple '/' in chord symbol: {symbol!r}")
|
||
|
||
if slash_count == 1:
|
||
chord_part, bass_str = symbol.split("/")
|
||
bass = _parse_bass(bass_str)
|
||
else:
|
||
chord_part = symbol
|
||
bass = "root"
|
||
|
||
if not chord_part:
|
||
raise ChordParseError(f"missing chord before '/': {symbol!r}")
|
||
|
||
root, rest = _extract_root(chord_part)
|
||
quality, extension = _parse_quality_ext(rest)
|
||
|
||
return ChordTokens(root=root, quality=quality, extension=extension, bass=bass)
|