Files
hamori/src/chord_parser.py
T
H1K0 dd77de00d0 feat: implement chord symbol parser with full test suite
Adds src/chord_parser.py with parse_chord_symbol() → ChordTokens.
Handles all 18 qualities (including Unicode °/Δ/ø variants and
alternative spellings), shorthand expansion (maj9 → maj7+ext9,
C9 → 7+ext9, etc.), slash chords, and flat→sharp root normalization.
Raises ChordParseError with a descriptive message on bad input.

Adds tests/test_chord_parser.py: 90 tests covering all qualities,
all 7 extension values (including shorthands), slash chords, root
normalization, all §4.6 spec examples, and 10 invalid-input cases.

Adds requirements.txt with project dependencies.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-19 11:17:10 +03:00

271 lines
7.8 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Chord symbol parser for .chord files.
Parses a chord symbol string (e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D') into a
ChordTokens dataclass. See docs/chord_format_spec.md §4 for the full spec.
Usage:
from src.chord_parser import parse_chord_symbol, ChordParseError
tokens = parse_chord_symbol("Fmaj9")
# ChordTokens(root='F', quality='maj7', extension='9', bass='root')
"""
from __future__ import annotations
from dataclasses import dataclass
class ChordParseError(ValueError):
"""Raised when a chord symbol cannot be parsed."""
@dataclass(frozen=True)
class ChordTokens:
"""Factorized chord representation — one string per token slot."""
root: str # one of the 12 chromatic pitch classes, e.g. 'C', 'F#', 'A#'
quality: str # one of the 18 canonical quality names, e.g. 'maj7', 'm', 'dim7'
extension: str # one of 8 values: 'none' or e.g. '9', 'b9', '#11'
bass: str # 'root' or one of the 12 pitch classes
# ---------------------------------------------------------------------------
# Lookup tables
# ---------------------------------------------------------------------------
_VALID_ROOTS: frozenset[str] = frozenset(
{"C", "C#", "D", "D#", "E", "F", "F#", "G", "G#", "A", "A#", "B"}
)
_FLAT_TO_SHARP: dict[str, str] = {
"Cb": "B",
"Db": "C#",
"Eb": "D#",
"Fb": "E",
"Gb": "F#",
"Ab": "G#",
"Bb": "A#",
}
# Maps the quality+extension string (after the root, before any '/') to
# (canonical_quality, canonical_extension). Covers:
# - standalone qualities with all their alternative spellings
# - shorthand expansions where a 7th is implied (e.g. 'maj9' → maj7 + 9)
# - Unicode symbols (°, Δ, ø)
_QUAL_EXT_MAP: dict[str, tuple[str, str]] = {
# empty root suffix → plain major
"": ("maj", "none"),
# --- major ---
"maj": ("maj", "none"),
"maj7": ("maj7", "none"),
"M7": ("maj7", "none"),
"Δ7": ("maj7", "none"),
"Δ": ("maj7", "none"),
"maj6": ("6", "none"),
# major shorthands (dominant/major 7th implied by the extension numeral)
"maj9": ("maj7", "9"),
"maj11": ("maj7", "11"),
"maj13": ("maj7", "13"),
# --- minor ---
"m": ("m", "none"),
"min": ("m", "none"),
"-": ("m", "none"),
"m7": ("m7", "none"),
"min7": ("m7", "none"),
"-7": ("m7", "none"),
# minor shorthands (m7 implied)
"m9": ("m7", "9"),
"min9": ("m7", "9"),
"m11": ("m7", "11"),
"min11": ("m7", "11"),
"m13": ("m7", "13"),
"min13": ("m7", "13"),
# minor sixth
"m6": ("m6", "none"),
"min6": ("m6", "none"),
# half-diminished
"m7b5": ("m7b5", "none"),
"min7b5": ("m7b5", "none"),
"m7♭5": ("m7b5", "none"),
"ø": ("m7b5", "none"),
"ø7": ("m7b5", "none"),
# minor-major seventh
"mM7": ("mM7", "none"),
"m(maj7)": ("mM7", "none"),
"minMaj7": ("mM7", "none"),
# minor add9
"madd9": ("m(add9)", "none"),
"m(add9)": ("m(add9)", "none"),
"m(add2)": ("m(add9)", "none"),
# --- dominant ---
"7": ("7", "none"),
# dominant shorthands (dominant 7th implied)
"9": ("7", "9"),
"11": ("7", "11"),
"13": ("7", "13"),
# --- diminished ---
"dim": ("dim", "none"),
"°": ("dim", "none"),
"dim7": ("dim7", "none"),
"°7": ("dim7", "none"),
# --- augmented ---
"aug": ("aug", "none"),
"+": ("aug", "none"),
"aug7": ("aug7", "none"),
"+7": ("aug7", "none"),
"7#5": ("aug7", "none"),
# --- suspended ---
"sus2": ("sus2", "none"),
"sus4": ("sus4", "none"),
"sus": ("sus4", "none"),
"7sus4": ("7sus4", "none"),
"7sus": ("7sus4", "none"),
# --- sixth / add ---
"6": ("6", "none"),
"add9": ("add9", "none"),
"2": ("add9", "none"),
}
# Maps quality-only strings (no extension) to canonical quality names.
# Used when an explicit extension suffix has been stripped from the end.
_QUAL_ONLY_MAP: dict[str, str] = {
"": "maj",
"maj": "maj",
"maj7": "maj7",
"M7": "maj7",
"Δ7": "maj7",
"Δ": "maj7",
"maj6": "6",
"m": "m",
"min": "m",
"-": "m",
"m7": "m7",
"min7": "m7",
"-7": "m7",
"m6": "m6",
"min6": "m6",
"m7b5": "m7b5",
"ø": "m7b5",
"ø7": "m7b5",
"mM7": "mM7",
"7": "7",
"dim": "dim",
"°": "dim",
"dim7": "dim7",
"°7": "dim7",
"aug": "aug",
"+": "aug",
"aug7": "aug7",
"+7": "aug7",
"7#5": "aug7",
"sus2": "sus2",
"sus4": "sus4",
"sus": "sus4",
"7sus4": "7sus4",
"7sus": "7sus4",
"6": "6",
"add9": "add9",
}
# Extension suffixes tried longest-first to avoid greedy ambiguity.
# (e.g. 'b13' must be tried before '13' so 'maj7b13' is not split at '13')
_EXT_SUFFIXES: tuple[str, ...] = ("b13", "#11", "b9", "#9", "13", "11", "9")
# ---------------------------------------------------------------------------
# Internal helpers
# ---------------------------------------------------------------------------
def _normalize_note(raw: str) -> str:
note = _FLAT_TO_SHARP.get(raw, raw)
if note not in _VALID_ROOTS:
raise ChordParseError(f"invalid note: {raw!r}")
return note
def _extract_root(s: str) -> tuple[str, str]:
"""Return (normalized_root, remaining_suffix)."""
if not s or s[0] not in "CDEFGAB":
raise ChordParseError(
f"chord symbol must start with a note letter AG: {s!r}"
)
if len(s) >= 2 and s[1] in "#b":
raw_root, rest = s[:2], s[2:]
else:
raw_root, rest = s[0], s[1:]
return _normalize_note(raw_root), rest
def _parse_bass(s: str) -> str:
s = s.strip()
if not s:
raise ChordParseError("empty bass note after '/'")
if s[0] not in "CDEFGAB":
raise ChordParseError(f"invalid bass note: {s!r}")
if len(s) >= 2 and s[1] in "#b":
raw, tail = s[:2], s[2:]
else:
raw, tail = s[0], s[1:]
if tail:
raise ChordParseError(f"invalid bass note (trailing characters): {s!r}")
return _normalize_note(raw)
def _parse_quality_ext(s: str) -> tuple[str, str]:
"""Return (canonical_quality, canonical_extension) for the suffix string."""
# Direct lookup: handles standalone qualities, Unicode variants, shorthands.
if s in _QUAL_EXT_MAP:
return _QUAL_EXT_MAP[s]
# Try stripping a known extension suffix from the right.
for ext in _EXT_SUFFIXES:
if s.endswith(ext):
qual_s = s[: -len(ext)]
if qual_s in _QUAL_ONLY_MAP:
return _QUAL_ONLY_MAP[qual_s], ext
raise ChordParseError(f"unrecognized quality/extension: {s!r}")
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def parse_chord_symbol(symbol: str) -> ChordTokens:
"""Parse a chord symbol string into factorized token slots.
Args:
symbol: Chord symbol, e.g. 'Cmaj7', 'F#m7/A', 'Bb7b9/D'.
Returns:
ChordTokens(root, quality, extension, bass) with all values drawn
from the vocabularies in docs/chord_format_spec.md §4.24.5.
Raises:
ChordParseError: If the symbol is empty, unrecognized, or malformed.
"""
if not symbol or not symbol.strip():
raise ChordParseError("empty chord symbol")
symbol = symbol.strip()
slash_count = symbol.count("/")
if slash_count > 1:
raise ChordParseError(f"multiple '/' in chord symbol: {symbol!r}")
if slash_count == 1:
chord_part, bass_str = symbol.split("/")
bass = _parse_bass(bass_str)
else:
chord_part = symbol
bass = "root"
if not chord_part:
raise ChordParseError(f"missing chord before '/': {symbol!r}")
root, rest = _extract_root(chord_part)
quality, extension = _parse_quality_ext(rest)
return ChordTokens(root=root, quality=quality, extension=extension, bass=bass)