"""Chord symbol parser for .chord files. Parses a chord symbol string (e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D') into a ChordTokens dataclass. See docs/chord_format_spec.md §4 for the full spec. Usage: from src.chord_parser import parse_chord_symbol, ChordParseError tokens = parse_chord_symbol("Fmaj9") # ChordTokens(root='F', quality='maj7', extension='9', bass='root') """ from __future__ import annotations from dataclasses import dataclass class ChordParseError(ValueError): """Raised when a chord symbol cannot be parsed.""" @dataclass(frozen=True) class ChordTokens: """Factorized chord representation — one string per token slot.""" root: str # one of the 12 chromatic pitch classes, e.g. 'C', 'F#', 'A#' quality: str # one of the 18 canonical quality names, e.g. 'maj7', 'm', 'dim7' extension: str # one of 8 values: 'none' or e.g. '9', 'b9', '#11' bass: str # 'root' or one of the 12 pitch classes # --------------------------------------------------------------------------- # Lookup tables # --------------------------------------------------------------------------- _VALID_ROOTS: frozenset[str] = frozenset( {"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"} ) _FLAT_TO_SHARP: dict[str, str] = { "Cb": "B", "Db": "C#", "Eb": "D#", "Fb": "E", "Gb": "F#", "Ab": "G#", "Bb": "A#", } # Maps the quality+extension string (after the root, before any '/') to # (canonical_quality, canonical_extension). Covers: # - standalone qualities with all their alternative spellings # - shorthand expansions where a 7th is implied (e.g. 'maj9' → maj7 + 9) # - Unicode symbols (°, Δ, ø) _QUAL_EXT_MAP: dict[str, tuple[str, str]] = { # empty root suffix → plain major "": ("maj", "none"), # --- major --- "maj": ("maj", "none"), "maj7": ("maj7", "none"), "M7": ("maj7", "none"), "Δ7": ("maj7", "none"), "Δ": ("maj7", "none"), "maj6": ("6", "none"), # major shorthands (dominant/major 7th implied by the extension numeral) "maj9": ("maj7", "9"), "maj11": ("maj7", "11"), "maj13": ("maj7", "13"), # --- minor --- "m": ("m", "none"), "min": ("m", "none"), "-": ("m", "none"), "m7": ("m7", "none"), "min7": ("m7", "none"), "-7": ("m7", "none"), # minor shorthands (m7 implied) "m9": ("m7", "9"), "min9": ("m7", "9"), "m11": ("m7", "11"), "min11": ("m7", "11"), "m13": ("m7", "13"), "min13": ("m7", "13"), # minor sixth "m6": ("m6", "none"), "min6": ("m6", "none"), # half-diminished "m7b5": ("m7b5", "none"), "min7b5": ("m7b5", "none"), "m7♭5": ("m7b5", "none"), "ø": ("m7b5", "none"), "ø7": ("m7b5", "none"), # minor-major seventh "mM7": ("mM7", "none"), "m(maj7)": ("mM7", "none"), "minMaj7": ("mM7", "none"), # minor add9 "madd9": ("m(add9)", "none"), "m(add9)": ("m(add9)", "none"), "m(add2)": ("m(add9)", "none"), # --- dominant --- "7": ("7", "none"), # dominant shorthands (dominant 7th implied) "9": ("7", "9"), "11": ("7", "11"), "13": ("7", "13"), # --- diminished --- "dim": ("dim", "none"), "°": ("dim", "none"), "dim7": ("dim7", "none"), "°7": ("dim7", "none"), # --- augmented --- "aug": ("aug", "none"), "+": ("aug", "none"), "aug7": ("aug7", "none"), "+7": ("aug7", "none"), "7#5": ("aug7", "none"), # --- suspended --- "sus2": ("sus2", "none"), "sus4": ("sus4", "none"), "sus": ("sus4", "none"), "7sus4": ("7sus4", "none"), "7sus": ("7sus4", "none"), # --- sixth / add --- "6": ("6", "none"), "add9": ("add9", "none"), "2": ("add9", "none"), } # Maps quality-only strings (no extension) to canonical quality names. # Used when an explicit extension suffix has been stripped from the end. _QUAL_ONLY_MAP: dict[str, str] = { "": "maj", "maj": "maj", "maj7": "maj7", "M7": "maj7", "Δ7": "maj7", "Δ": "maj7", "maj6": "6", "m": "m", "min": "m", "-": "m", "m7": "m7", "min7": "m7", "-7": "m7", "m6": "m6", "min6": "m6", "m7b5": "m7b5", "ø": "m7b5", "ø7": "m7b5", "mM7": "mM7", "7": "7", "dim": "dim", "°": "dim", "dim7": "dim7", "°7": "dim7", "aug": "aug", "+": "aug", "aug7": "aug7", "+7": "aug7", "7#5": "aug7", "sus2": "sus2", "sus4": "sus4", "sus": "sus4", "7sus4": "7sus4", "7sus": "7sus4", "6": "6", "add9": "add9", } # Extension suffixes tried longest-first to avoid greedy ambiguity. # (e.g. 'b13' must be tried before '13' so 'maj7b13' is not split at '13') _EXT_SUFFIXES: tuple[str, ...] = ("b13", "#11", "b9", "#9", "13", "11", "9") # --------------------------------------------------------------------------- # Internal helpers # --------------------------------------------------------------------------- def _normalize_note(raw: str) -> str: note = _FLAT_TO_SHARP.get(raw, raw) if note not in _VALID_ROOTS: raise ChordParseError(f"invalid note: {raw!r}") return note def _extract_root(s: str) -> tuple[str, str]: """Return (normalized_root, remaining_suffix).""" if not s or s[0] not in "CDEFGAB": raise ChordParseError( f"chord symbol must start with a note letter A–G: {s!r}" ) if len(s) >= 2 and s[1] in "#b": raw_root, rest = s[:2], s[2:] else: raw_root, rest = s[0], s[1:] return _normalize_note(raw_root), rest def _parse_bass(s: str) -> str: s = s.strip() if not s: raise ChordParseError("empty bass note after '/'") if s[0] not in "CDEFGAB": raise ChordParseError(f"invalid bass note: {s!r}") if len(s) >= 2 and s[1] in "#b": raw, tail = s[:2], s[2:] else: raw, tail = s[0], s[1:] if tail: raise ChordParseError(f"invalid bass note (trailing characters): {s!r}") return _normalize_note(raw) def _parse_quality_ext(s: str) -> tuple[str, str]: """Return (canonical_quality, canonical_extension) for the suffix string.""" # Direct lookup: handles standalone qualities, Unicode variants, shorthands. if s in _QUAL_EXT_MAP: return _QUAL_EXT_MAP[s] # Try stripping a known extension suffix from the right. for ext in _EXT_SUFFIXES: if s.endswith(ext): qual_s = s[: -len(ext)] if qual_s in _QUAL_ONLY_MAP: return _QUAL_ONLY_MAP[qual_s], ext raise ChordParseError(f"unrecognized quality/extension: {s!r}") # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def parse_chord_symbol(symbol: str) -> ChordTokens: """Parse a chord symbol string into factorized token slots. Args: symbol: Chord symbol, e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D'. Returns: ChordTokens(root, quality, extension, bass) with all values drawn from the vocabularies in docs/chord_format_spec.md §4.2–4.5. Raises: ChordParseError: If the symbol is empty, unrecognized, or malformed. """ if not symbol or not symbol.strip(): raise ChordParseError("empty chord symbol") symbol = symbol.strip() slash_count = symbol.count("/") if slash_count > 1: raise ChordParseError(f"multiple '/' in chord symbol: {symbol!r}") if slash_count == 1: chord_part, bass_str = symbol.split("/") bass = _parse_bass(bass_str) else: chord_part = symbol bass = "root" if not chord_part: raise ChordParseError(f"missing chord before '/': {symbol!r}") root, rest = _extract_root(chord_part) quality, extension = _parse_quality_ext(rest) return ChordTokens(root=root, quality=quality, extension=extension, bass=bass)