compact_sets/src/transcriber.py

700 lines
19 KiB
Python
Raw Normal View History

#!/usr/bin/env python
"""
LilyPond Transcriber - Convert chord data to LilyPond parts and PDF.
Usage:
python src/transcriber.py --name compact_sets_1
Or import and use programmatically:
from src.transcriber import transcribe
transcribe(chords, name="my_piece")
"""
import json
import math
import subprocess
import sys
from fractions import Fraction
from pathlib import Path
NOTE_NAMES_SHARPS = [
"c",
"cis",
"d",
"dis",
"e",
"f",
"fis",
"g",
"gis",
"a",
"ais",
"b",
]
NOTE_NAMES_FLATS = [
"c",
"des",
"d",
"ees",
"e",
"f",
"ges",
"g",
"aes",
"a",
"bes",
"b",
]
OCTAVE_STRINGS = [
",,,,",
",,,",
",,",
",",
"",
"'",
"''",
"'''",
"''''",
"'''''",
"''''''",
]
DURATION_MAP = {
1: "8",
2: "4",
3: "4.",
4: "1",
6: "2.",
8: "2",
}
def cps_to_midi(freq):
"""Convert frequency in Hz to MIDI note number."""
if freq <= 0:
return -1
return 12 * math.log2(freq / 440.0) + 69
def midi_to_pitch_class(midi):
"""Get pitch class (0-11) from MIDI note number."""
if midi < 0:
return -1
return round(midi) % 12
def midi_to_octave(midi):
"""Get LilyPond octave number from MIDI note number."""
if midi < 0:
return -1
return (round(midi) // 12) - 1
def get_clef_for_midi(midi):
"""Determine clef based on MIDI note number.
Two-threshold system at middle C (C4 = MIDI 60):
- MIDI >= 60: treble clef
- MIDI < 60: bass clef
"""
if midi >= 60:
return "treble"
else:
return "bass"
def freq_to_lilypond(freq, spelling="sharps", prev_pitch=None):
"""Convert frequency to LilyPond note name.
Args:
freq: Frequency in Hz
spelling: "sharps" or "flats" (determines base preference)
prev_pitch: Previous pitch class (for contextual spelling)
Returns:
LilyPond note string (e.g., "ais''", "ees'") or "r" for rest
"""
if freq <= 0:
return "r"
midi = cps_to_midi(freq)
pc = midi_to_pitch_class(midi)
octave = midi_to_octave(midi)
if spelling == "flats":
note_name = NOTE_NAMES_FLATS[pc]
else:
note_name = NOTE_NAMES_SHARPS[pc]
oct_str = OCTAVE_STRINGS[octave + 1] if octave >= -4 else ",," * (-octave - 4)
return note_name + oct_str
def duration_to_lilypond(beats):
"""Convert quarter-note beats to LilyPond duration string."""
beats = int(round(beats))
return DURATION_MAP.get(beats, "4")
def format_cents_deviation(freq):
"""Format cent deviation from nearest equal-tempered note."""
if freq <= 0:
return None
midi = cps_to_midi(freq)
deviation = (midi - round(midi)) * 100
deviation = round(deviation)
if deviation > 0:
return f"+{deviation}"
else:
return str(deviation)
def format_dim_diff(dim_diff, ref):
"""Format dimensional difference markup."""
if dim_diff is None or ref is None or ref < 0 or dim_diff == 0:
return ""
diff_str = str(abs(dim_diff))
if dim_diff > 1:
diff_str += ""
elif dim_diff < 0:
diff_str += ""
ref_names = ["IV", "III", "II", "I"]
ref_name = ref_names[ref] if 0 <= ref <= 3 else ""
return f'_\\markup {{ \\lower #3 \\pad-markup #0.2 \\concat{{ "{ref_name}"\\normal-size-super " {diff_str}" }} }}'
def generate_part(voice_data, voice_name, voice_idx, clef=None, beats_per_measure=4):
"""Generate LilyPond music string for a single voice.
Args:
voice_data: List of [freq, duration_beats, ref, dim_diff] events
voice_name: Voice name (e.g., "I", "II", "III")
voice_idx: Voice index (0=I, 1=II, 2=III)
clef: LilyPond clef name (e.g., "treble", "alto", "bass") - optional, determined from first note if not provided
beats_per_measure: Beats per measure (default 4 for 4/4)
Returns:
LilyPond music string with clef and time signature
"""
if not voice_data:
return "\\numericTimeSignature \\time 4/4\n"
first_freq = voice_data[0][0]
if clef is None:
initial_clef = get_clef_for_midi(cps_to_midi(first_freq))
else:
initial_clef = clef
prefix = f"\\clef {initial_clef}\n"
prefix += "\\numericTimeSignature \\time 4/4\n"
spelling = "sharps"
notes = []
for event in voice_data:
freq = event[0]
dur_beats = event[1] if len(event) > 1 else 1
ref = event[2] if len(event) > 2 else None
dim_diff = event[3] if len(event) > 3 else None
is_rest = freq <= 0
note_str = freq_to_lilypond(freq, spelling)
dur_str = duration_to_lilypond(dur_beats)
notes.append(
{
"freq": freq,
"is_rest": is_rest,
"note_str": note_str,
"dur_str": dur_str,
"dur_beats": dur_beats,
"ref": ref,
"dim_diff": dim_diff,
}
)
measures = []
current_measure_notes = []
beat_in_measure = 0
current_clef = initial_clef
for i, note_data in enumerate(notes):
freq = note_data["freq"]
is_rest = note_data["is_rest"]
midi = cps_to_midi(freq)
required_clef = get_clef_for_midi(midi)
clef_change = required_clef != current_clef
if clef_change:
current_clef = required_clef
has_prev = i > 0
prev_freq = notes[i - 1]["freq"] if has_prev else None
prev_is_rest = notes[i - 1]["is_rest"] if has_prev else True
is_tied_from_prev = (
has_prev and not is_rest and not prev_is_rest and freq == prev_freq
)
cents_dev = (
format_cents_deviation(freq)
if not is_rest and not is_tied_from_prev
else None
)
dim_markup = (
format_dim_diff(note_data["dim_diff"], note_data["ref"])
if not is_rest
else ""
)
note_str_full = note_data["note_str"] + note_data["dur_str"]
markup = ""
if cents_dev or dim_markup:
if cents_dev:
markup += f'^\\markup {{ \\pad-markup #0.2 "{cents_dev}" }}'
if dim_markup:
markup += dim_markup
note_str_full += markup
has_next = i < len(notes) - 1
next_freq = notes[i + 1]["freq"] if has_next else None
next_is_rest = notes[i + 1]["is_rest"] if has_next else True
is_tied_to_next = (
has_next and not is_rest and not next_is_rest and freq == next_freq
)
if is_tied_to_next:
note_str_full += " ~"
else:
note_str_full = " " + note_str_full
if clef_change:
note_str_full = f"\\clef {current_clef} {note_str_full}"
current_measure_notes.append(note_str_full)
beats_this_event = int(round(note_data["dur_beats"]))
beat_in_measure += beats_this_event
while beat_in_measure >= beats_per_measure:
beat_in_measure -= beats_per_measure
measures.append("".join(current_measure_notes))
current_measure_notes = []
if current_measure_notes:
measures.append("".join(current_measure_notes))
music_str = ""
for i, measure in enumerate(measures):
music_str += "{ " + measure + " }"
if i < len(measures) - 1:
music_str += ' \n\\bar "|" '
music_str += '\n\\bar "|."'
return prefix + music_str
def generate_parts(music_data, name, output_dir="lilypond"):
"""Generate LilyPond part files.
Args:
music_data: List of voices, each voice is a list of events
name: Name for the output (e.g., "compact_sets_1")
output_dir: Base output directory
"""
includes_dir = Path(output_dir) / name / "includes"
includes_dir.mkdir(parents=True, exist_ok=True)
voice_order = [
(3, "I"),
(2, "II"),
(1, "III"),
(0, "IV"),
]
for voice_idx, voice_name in voice_order:
if voice_idx >= len(music_data):
continue
voice_data = music_data[voice_idx]
part_str = generate_part(voice_data, voice_name, voice_idx)
part_file = includes_dir / f"part_{voice_name}.ly"
with open(part_file, "w") as f:
f.write(part_str)
print(f"Generated: {part_file}")
def _is_adjacent(hs1: tuple, hs2: tuple) -> bool:
"""Check if two hs_arrays are adjacent (differ by ±1 in exactly one dimension, excluding dim 0)."""
diff_count = 0
for i in range(1, len(hs1)):
diff = abs(hs1[i] - hs2[i])
if diff > 1:
return False
if diff == 1:
diff_count += 1
return diff_count == 1
def _compute_dim_diff(current: tuple, prev: tuple, primes: list[int]) -> int:
"""Compute dim_diff between two hs_arrays. Returns prime * direction."""
for i in range(1, len(primes) + 1):
diff = current[i] - prev[i]
if diff == 1:
return primes[i - 1]
if diff == -1:
return -primes[i - 1]
return 0
def _find_ref_and_dim_diff(
current_hs: tuple, prev_chord: list, staying_voices: list, primes: list[int]
) -> tuple[int, int]:
"""Find ref (staying voice index) and dim_diff for a changed pitch.
Args:
current_hs: hs_array of current pitch
prev_chord: list of hs_arrays from previous chord
staying_voices: indices of voices that stay
primes: list of primes for dimensional calculation
Returns:
(ref, dim_diff) tuple
"""
if not staying_voices:
return -1, 0
adjacent = []
for idx in staying_voices:
prev_hs = prev_chord[idx]
if _is_adjacent(current_hs, prev_hs):
dim_diff = _compute_dim_diff(current_hs, prev_hs, primes)
adjacent.append((idx, dim_diff))
if not adjacent:
return -1, 0
adjacent.sort(key=lambda x: abs(x[1]))
return adjacent[0]
def _find_ref_in_same_chord(
pitch_idx: int, chord_pitches: list, primes: list[int]
) -> tuple[int, int]:
"""Find ref (other pitch index) and dim_diff within the same chord.
Args:
pitch_idx: index of the current pitch in the chord
chord_pitches: list of hs_arrays for all pitches in the chord
primes: list of primes for dimensional calculation
Returns:
(ref, dim_diff) tuple where ref is index of adjacent pitch in same chord
"""
current_hs = chord_pitches[pitch_idx]
adjacent = []
for idx, other_hs in enumerate(chord_pitches):
if idx == pitch_idx:
continue
if _is_adjacent(current_hs, other_hs):
dim_diff = _compute_dim_diff(current_hs, other_hs, primes)
adjacent.append((idx, dim_diff))
if not adjacent:
return -1, 0
adjacent.sort(key=lambda x: abs(x[1]))
return adjacent[0]
def output_chords_to_music_data(chords, fundamental=55, chord_duration=4, dims=None):
"""Convert output_chords.json format to generic music data.
Args:
chords: List of chords from output_chords.json
fundamental: Fundamental frequency in Hz
chord_duration: Duration of each chord in beats
dims: Tuple of prime dimensions (optional, for computing dim_diff)
Returns:
List of voices, each voice is a list of [freq, duration, ref, dim_diff]
"""
if not chords:
return []
# Compute primes from dims (skip dimension 0 which is the fundamental)
if dims is not None:
primes = list(dims[1:]) # Skip first prime (2)
else:
primes = [3, 5, 7, 11] # Default fallback
num_voices = len(chords[0])
music_data = [[] for _ in range(num_voices)]
prev_chord = None
for chord in chords:
current_hs = [tuple(p["hs_array"]) for p in chord]
if prev_chord is None:
staying_voices = []
else:
staying_voices = [
i for i in range(num_voices) if current_hs[i] == prev_chord[i]
]
for voice_idx, pitch in enumerate(chord):
if voice_idx >= num_voices:
break
frac = Fraction(pitch["fraction"])
freq = fundamental * float(frac)
current_hs_array = current_hs[voice_idx]
if prev_chord is None:
ref, dim_diff = _find_ref_in_same_chord(voice_idx, current_hs, primes)
elif current_hs_array == prev_chord[voice_idx]:
ref = -1
dim_diff = 0
else:
ref, dim_diff = _find_ref_and_dim_diff(
current_hs_array, prev_chord, staying_voices, primes
)
event = [freq, chord_duration, ref, dim_diff]
music_data[voice_idx].append(event)
prev_chord = current_hs
return music_data
def generate_score(name, num_voices, output_dir="lilypond"):
"""Generate full score .ly file from template.
Args:
name: Name for the output (used as title)
num_voices: Number of voices/staves to generate
output_dir: Base output directory
"""
template_path = Path(output_dir) / name / "score_template.ly"
if not template_path.exists():
print(f"Error: Template not found: {template_path}")
return False
score_path = Path(output_dir) / name / f"{name}.ly"
score_text = template_path.read_text()
score_text = score_text.replace("{NAME}", name)
from datetime import date
today = date.today().strftime("%d %b %Y")
score_text = score_text.replace("{DATE}", today)
voice_names = ["I", "II", "III", "IV", "V", "VI", "VII", "VIII"]
staves = ""
for i in range(num_voices):
v_name = voice_names[i]
staves += f'''
\\new Staff = "{v_name}" \\with {{
instrumentName = "{v_name}"
shortInstrumentName = "{v_name}"
midiInstrument = #"clarinet"
}}
{{
\\include "includes/part_{v_name}.ly"
}}
'''
score_block = f"""\\score{{
<<
2026-03-23 20:00:21 +01:00
\\new SemiStaffGroup {{
<<
{staves}
>>
}}
>>
\\layout{{}}
}}"""
score_text = score_text.replace("{SCORE}", score_block)
with open(score_path, "w") as f:
f.write(score_text)
print(f"Generated: {score_path}")
return True
def generate_pdf(name, lilypond_dir="lilypond", output_dir="."):
"""Generate PDF from LilyPond score.
Args:
name: Name of the piece (LilyPond file should be {name}.ly)
lilypond_dir: Directory containing the LilyPond file
output_dir: Directory for PDF output
Returns:
Path to generated PDF, or None if failed
"""
ly_file = Path(lilypond_dir) / name / f"{name}.ly"
if not ly_file.exists():
print(f"Error: LilyPond file not found: {ly_file}")
return None
output_base = Path(output_dir) / name
output_base.mkdir(parents=True, exist_ok=True)
try:
result = subprocess.run(
[
"lilypond",
"-o",
str(output_base),
"-f",
"pdf",
str(ly_file),
],
capture_output=True,
text=True,
)
if result.returncode != 0:
print(f"LilyPond error:\n{result.stderr}")
return None
pdf_path = output_base / f"{name}.pdf"
if pdf_path.exists():
print(f"Generated: {pdf_path}")
return pdf_path
else:
print(f"Warning: LilyPond ran but PDF not found at {pdf_path}")
print(f"Output: {result.stdout}")
return None
except FileNotFoundError:
print("Error: lilypond command not found. Is LilyPond installed?")
return None
def transcribe(
chords,
name,
fundamental=55,
output_dir="lilypond",
generate_pdf_flag=True,
):
"""Main transcription function.
Args:
chords: Chord data (list from output_chords.json or music_data format)
name: Name for the output
fundamental: Fundamental frequency in Hz
output_dir: Base output directory
generate_pdf_flag: Whether to generate PDF
Returns:
Dictionary with paths to generated files
"""
import shutil
# Handle both old format (list of chords) and new format (dict with dims + chords)
dims = None
if isinstance(chords, dict) and "chords" in chords:
dims = tuple(chords["dims"])
chords = chords["chords"]
if chords and isinstance(chords[0], list) and isinstance(chords[0][0], dict):
music_data = output_chords_to_music_data(chords, fundamental, dims=dims)
else:
music_data = chords
output_path = Path(output_dir) / name
output_path.mkdir(parents=True, exist_ok=True)
template_source = Path(__file__).parent.parent / "lilypond" / "score_template.ly"
if template_source.exists():
shutil.copy(template_source, output_path / "score_template.ly")
generate_parts(music_data, name, output_dir)
num_voices = len(music_data)
generate_score(name, num_voices, output_dir)
result = {
"parts_dir": str(Path(output_dir) / name / "includes"),
"score_file": str(Path(output_dir) / name / f"{name}.ly"),
}
if generate_pdf_flag:
pdf_path = generate_pdf(name, output_dir, output_dir)
if pdf_path:
result["pdf"] = str(pdf_path)
return result
def main():
import argparse
parser = argparse.ArgumentParser(description="LilyPond Transcriber")
parser.add_argument(
"--output-dir", default="output", help="Directory with output_chords.json"
)
parser.add_argument(
"--chords-file", default=None, help="Chords file (default: output_chords.json)"
)
parser.add_argument(
"--name", default="compact_sets_transcription", help="Name for output files"
)
parser.add_argument(
"--fundamental", type=float, default=55, help="Fundamental frequency in Hz"
)
parser.add_argument(
"--lilypond-dir", default="lilypond", help="Base LilyPond output directory"
)
parser.add_argument("--no-pdf", action="store_true", help="Skip PDF generation")
args = parser.parse_args()
chords_file = args.chords_file
if chords_file is None:
chords_file = Path(args.output_dir) / "output_chords.json"
if not Path(chords_file).exists():
print(f"Error: Chords file not found: {chords_file}")
print("Run compact_sets.py first to generate chords.")
sys.exit(1)
with open(chords_file) as f:
chords = json.load(f)
print(f"Loaded {len(chords)} chords from {chords_file}")
result = transcribe(
chords,
args.name,
fundamental=args.fundamental,
output_dir=args.lilypond_dir,
generate_pdf_flag=not args.no_pdf,
)
print("\nGenerated files:")
for key, path in result.items():
print(f" {key}: {path}")
if __name__ == "__main__":
main()