refactor: replace fixed STYLE_user with open-ended style tag system

- STYLE_user renamed to STYLE_H1K0 in VOCAB (author's personal tag)
- Style field now accepts any [A-Za-z][A-Za-z0-9_]* identifier in .chord files
- Unknown styles fall back to STYLE_other at tokenization time with a log warning
- Test fixtures updated to style: other; drop closed _VALID_STYLES frozenset
- Spec bumped to v2.1: documents open style field, fallback behaviour, and §5.7
  guide on registering a new style token

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-05-20 00:29:52 +03:00
parent 84ba7b4743
commit 4fd8ece170
12 changed files with 60 additions and 38 deletions
+12 -7
View File
@@ -17,6 +17,7 @@ See docs/chord_format_spec.md §5.2 for the vocabulary specification.
from __future__ import annotations
import logging
import re
from dataclasses import dataclass, replace
from pathlib import Path
@@ -67,9 +68,6 @@ _FLAT_TO_SHARP: dict[str, str] = {
}
_VALID_TIMES: frozenset[str] = frozenset({"4/4", "3/4", "6/8", "2/4", "12/8"})
_VALID_STYLES: frozenset[str] = frozenset(
{"user", "jpop", "classical", "jazz", "other"}
)
_VALID_FUNCTIONS: frozenset[str] = frozenset({
"verse", "prechorus", "chorus", "bridge",
"intro", "outro", "interlude", "other",
@@ -90,7 +88,7 @@ VOCAB: list[str] = [
# Subdivision (2)
"SUB_4", "SUB_8",
# Style (5)
"STYLE_user", "STYLE_jpop", "STYLE_classical", "STYLE_jazz", "STYLE_other",
"STYLE_H1K0", "STYLE_jpop", "STYLE_classical", "STYLE_jazz", "STYLE_other",
# Function (9)
"FUNC_verse", "FUNC_prechorus", "FUNC_chorus", "FUNC_bridge",
"FUNC_intro", "FUNC_outro", "FUNC_interlude", "FUNC_other", "FUNC_unspecified",
@@ -239,8 +237,11 @@ def parse_chord_file(path: Path) -> ChordPeriod:
)
style = header["style"]
if style not in _VALID_STYLES:
raise ChordFormatError(f"{fname}: invalid style '{style}'")
if not re.match(r'^[A-Za-z][A-Za-z0-9_]*$', style):
raise ChordFormatError(
f"{fname}: invalid style '{style}' — must be a non-empty identifier"
" ([A-Za-z][A-Za-z0-9_]*)"
)
raw_function = header.get("function", "")
if raw_function and raw_function not in _VALID_FUNCTIONS:
@@ -355,7 +356,11 @@ def tokenize_period(period: ChordPeriod) -> list[int]:
ids.append(TOKEN_TO_ID[f"MODE_{mode}"])
ids.append(TOKEN_TO_ID[f"TIME_{p.time}"])
ids.append(TOKEN_TO_ID[f"SUB_{p.subdivision}"])
ids.append(TOKEN_TO_ID[f"STYLE_{p.style}"])
style_token = f"STYLE_{p.style}"
if style_token not in TOKEN_TO_ID:
log.warning("unknown style %r — mapping to STYLE_other", p.style)
style_token = "STYLE_other"
ids.append(TOKEN_TO_ID[style_token])
ids.append(TOKEN_TO_ID[f"FUNC_{p.function}"])
for bar in p.bars: