#!/usr/bin/env python """ LilyPond Transcriber - Convert chord data to LilyPond parts and PDF. Usage: python src/transcriber.py --name compact_sets_1 Or import and use programmatically: from src.transcriber import transcribe transcribe(chords, name="my_piece") """ import json import math import subprocess import sys from fractions import Fraction from pathlib import Path NOTE_NAMES_SHARPS = [ "c", "cis", "d", "dis", "e", "f", "fis", "g", "gis", "a", "ais", "b", ] NOTE_NAMES_FLATS = [ "c", "des", "d", "ees", "e", "f", "ges", "g", "aes", "a", "bes", "b", ] OCTAVE_STRINGS = [ ",,,,", ",,,", ",,", ",", "", "'", "''", "'''", "''''", "'''''", "''''''", ] DURATION_MAP = { 1: "8", 2: "4", 3: "4.", 4: "1", 6: "2.", 8: "2", } def cps_to_midi(freq): """Convert frequency in Hz to MIDI note number.""" if freq <= 0: return -1 return 12 * math.log2(freq / 440.0) + 69 def midi_to_pitch_class(midi): """Get pitch class (0-11) from MIDI note number.""" if midi < 0: return -1 return round(midi) % 12 def midi_to_octave(midi): """Get LilyPond octave number from MIDI note number.""" if midi < 0: return -1 return (round(midi) // 12) - 1 def get_clef_for_midi(midi): """Determine clef based on MIDI note number. Two-threshold system at middle C (C4 = MIDI 60): - MIDI >= 60: treble clef - MIDI < 60: bass clef """ if midi >= 60: return "treble" else: return "bass" def freq_to_lilypond(freq, spelling="sharps", prev_pitch=None): """Convert frequency to LilyPond note name. Args: freq: Frequency in Hz spelling: "sharps" or "flats" (determines base preference) prev_pitch: Previous pitch class (for contextual spelling) Returns: LilyPond note string (e.g., "ais''", "ees'") or "r" for rest """ if freq <= 0: return "r" midi = cps_to_midi(freq) pc = midi_to_pitch_class(midi) octave = midi_to_octave(midi) if spelling == "flats": note_name = NOTE_NAMES_FLATS[pc] else: note_name = NOTE_NAMES_SHARPS[pc] oct_str = OCTAVE_STRINGS[octave + 1] if octave >= -4 else ",," * (-octave - 4) return note_name + oct_str def duration_to_lilypond(beats): """Convert quarter-note beats to LilyPond duration string.""" beats = int(round(beats)) return DURATION_MAP.get(beats, "4") def format_cents_deviation(freq): """Format cent deviation from nearest equal-tempered note.""" if freq <= 0: return None midi = cps_to_midi(freq) deviation = (midi - round(midi)) * 100 deviation = round(deviation) if deviation > 0: return f"+{deviation}" else: return str(deviation) def format_dim_diff(dim_diff, ref): """Format dimensional difference markup.""" if dim_diff is None or ref is None or ref < 0 or dim_diff == 0: return "" diff_str = str(abs(dim_diff)) if dim_diff > 1: diff_str += "↑" elif dim_diff < 0: diff_str += "↓" ref_names = ["IV", "III", "II", "I"] ref_name = ref_names[ref] if 0 <= ref <= 3 else "" return f'_\\markup {{ \\lower #3 \\pad-markup #0.2 \\concat{{ "{ref_name}"\\normal-size-super " {diff_str}" }} }}' def generate_part(voice_data, voice_name, voice_idx, clef=None, beats_per_measure=4): """Generate LilyPond music string for a single voice. Args: voice_data: List of [freq, duration_beats, ref, dim_diff] events voice_name: Voice name (e.g., "I", "II", "III") voice_idx: Voice index (0=I, 1=II, 2=III) clef: LilyPond clef name (e.g., "treble", "alto", "bass") - optional, determined from first note if not provided beats_per_measure: Beats per measure (default 4 for 4/4) Returns: LilyPond music string with clef and time signature """ if not voice_data: return "\\numericTimeSignature \\time 4/4\n" first_freq = voice_data[0][0] if clef is None: initial_clef = get_clef_for_midi(cps_to_midi(first_freq)) else: initial_clef = clef prefix = f"\\clef {initial_clef}\n" prefix += "\\numericTimeSignature \\time 4/4\n" spelling = "sharps" notes = [] for event in voice_data: freq = event[0] dur_beats = event[1] if len(event) > 1 else 1 ref = event[2] if len(event) > 2 else None dim_diff = event[3] if len(event) > 3 else None is_rest = freq <= 0 note_str = freq_to_lilypond(freq, spelling) dur_str = duration_to_lilypond(dur_beats) notes.append( { "freq": freq, "is_rest": is_rest, "note_str": note_str, "dur_str": dur_str, "dur_beats": dur_beats, "ref": ref, "dim_diff": dim_diff, } ) measures = [] current_measure_notes = [] beat_in_measure = 0 current_clef = initial_clef for i, note_data in enumerate(notes): freq = note_data["freq"] is_rest = note_data["is_rest"] midi = cps_to_midi(freq) required_clef = get_clef_for_midi(midi) clef_change = required_clef != current_clef if clef_change: current_clef = required_clef has_prev = i > 0 prev_freq = notes[i - 1]["freq"] if has_prev else None prev_is_rest = notes[i - 1]["is_rest"] if has_prev else True is_tied_from_prev = ( has_prev and not is_rest and not prev_is_rest and freq == prev_freq ) cents_dev = ( format_cents_deviation(freq) if not is_rest and not is_tied_from_prev else None ) dim_markup = ( format_dim_diff(note_data["dim_diff"], note_data["ref"]) if not is_rest else "" ) note_str_full = note_data["note_str"] + note_data["dur_str"] markup = "" if cents_dev or dim_markup: if cents_dev: markup += f'^\\markup {{ \\pad-markup #0.2 "{cents_dev}" }}' if dim_markup: markup += dim_markup note_str_full += markup has_next = i < len(notes) - 1 next_freq = notes[i + 1]["freq"] if has_next else None next_is_rest = notes[i + 1]["is_rest"] if has_next else True is_tied_to_next = ( has_next and not is_rest and not next_is_rest and freq == next_freq ) if is_tied_to_next: note_str_full += " ~" else: note_str_full = " " + note_str_full if clef_change: note_str_full = f"\\clef {current_clef} {note_str_full}" current_measure_notes.append(note_str_full) beats_this_event = int(round(note_data["dur_beats"])) beat_in_measure += beats_this_event while beat_in_measure >= beats_per_measure: beat_in_measure -= beats_per_measure measures.append("".join(current_measure_notes)) current_measure_notes = [] if current_measure_notes: measures.append("".join(current_measure_notes)) music_str = "" for i, measure in enumerate(measures): music_str += "{ " + measure + " }" if i < len(measures) - 1: music_str += ' \n\\bar "|" ' music_str += '\n\\bar "|."' return prefix + music_str def generate_parts(music_data, name, output_dir="lilypond"): """Generate LilyPond part files. Args: music_data: List of voices, each voice is a list of events name: Name for the output (e.g., "compact_sets_1") output_dir: Base output directory """ includes_dir = Path(output_dir) / name / "includes" includes_dir.mkdir(parents=True, exist_ok=True) voice_order = [ (3, "I"), (2, "II"), (1, "III"), (0, "IV"), ] for voice_idx, voice_name in voice_order: if voice_idx >= len(music_data): continue voice_data = music_data[voice_idx] part_str = generate_part(voice_data, voice_name, voice_idx) part_file = includes_dir / f"part_{voice_name}.ly" with open(part_file, "w") as f: f.write(part_str) print(f"Generated: {part_file}") def _is_adjacent(hs1: tuple, hs2: tuple) -> bool: """Check if two hs_arrays are adjacent (differ by ±1 in exactly one dimension, excluding dim 0).""" diff_count = 0 for i in range(1, len(hs1)): diff = abs(hs1[i] - hs2[i]) if diff > 1: return False if diff == 1: diff_count += 1 return diff_count == 1 def _compute_dim_diff(current: tuple, prev: tuple, primes: list[int]) -> int: """Compute dim_diff between two hs_arrays. Returns prime * direction.""" for i in range(1, len(primes) + 1): diff = current[i] - prev[i] if diff == 1: return primes[i - 1] if diff == -1: return -primes[i - 1] return 0 def _find_ref_and_dim_diff( current_hs: tuple, prev_chord: list, staying_voices: list, primes: list[int] ) -> tuple[int, int]: """Find ref (staying voice index) and dim_diff for a changed pitch. Args: current_hs: hs_array of current pitch prev_chord: list of hs_arrays from previous chord staying_voices: indices of voices that stay primes: list of primes for dimensional calculation Returns: (ref, dim_diff) tuple """ if not staying_voices: return -1, 0 adjacent = [] for idx in staying_voices: prev_hs = prev_chord[idx] if _is_adjacent(current_hs, prev_hs): dim_diff = _compute_dim_diff(current_hs, prev_hs, primes) adjacent.append((idx, dim_diff)) if not adjacent: return -1, 0 adjacent.sort(key=lambda x: abs(x[1])) return adjacent[0] def _find_ref_in_same_chord( pitch_idx: int, chord_pitches: list, primes: list[int] ) -> tuple[int, int]: """Find ref (other pitch index) and dim_diff within the same chord. Args: pitch_idx: index of the current pitch in the chord chord_pitches: list of hs_arrays for all pitches in the chord primes: list of primes for dimensional calculation Returns: (ref, dim_diff) tuple where ref is index of adjacent pitch in same chord """ current_hs = chord_pitches[pitch_idx] adjacent = [] for idx, other_hs in enumerate(chord_pitches): if idx == pitch_idx: continue if _is_adjacent(current_hs, other_hs): dim_diff = _compute_dim_diff(current_hs, other_hs, primes) adjacent.append((idx, dim_diff)) if not adjacent: return -1, 0 adjacent.sort(key=lambda x: abs(x[1])) return adjacent[0] def output_chords_to_music_data(chords, fundamental=55, chord_duration=4, dims=None): """Convert output_chords.json format to generic music data. Args: chords: List of chords from output_chords.json fundamental: Fundamental frequency in Hz chord_duration: Duration of each chord in beats dims: Tuple of prime dimensions (optional, for computing dim_diff) Returns: List of voices, each voice is a list of [freq, duration, ref, dim_diff] """ if not chords: return [] # Compute primes from dims (skip dimension 0 which is the fundamental) if dims is not None: primes = list(dims[1:]) # Skip first prime (2) else: primes = [3, 5, 7, 11] # Default fallback num_voices = len(chords[0]) music_data = [[] for _ in range(num_voices)] prev_chord = None for chord in chords: current_hs = [tuple(p["hs_array"]) for p in chord] if prev_chord is None: staying_voices = [] else: staying_voices = [ i for i in range(num_voices) if current_hs[i] == prev_chord[i] ] for voice_idx, pitch in enumerate(chord): if voice_idx >= num_voices: break frac = Fraction(pitch["fraction"]) freq = fundamental * float(frac) current_hs_array = current_hs[voice_idx] if prev_chord is None: ref, dim_diff = _find_ref_in_same_chord(voice_idx, current_hs, primes) elif current_hs_array == prev_chord[voice_idx]: ref = -1 dim_diff = 0 else: ref, dim_diff = _find_ref_and_dim_diff( current_hs_array, prev_chord, staying_voices, primes ) event = [freq, chord_duration, ref, dim_diff] music_data[voice_idx].append(event) prev_chord = current_hs return music_data def generate_score(name, num_voices, output_dir="lilypond"): """Generate full score .ly file from template. Args: name: Name for the output (used as title) num_voices: Number of voices/staves to generate output_dir: Base output directory """ template_path = Path(output_dir) / name / "score_template.ly" if not template_path.exists(): print(f"Error: Template not found: {template_path}") return False score_path = Path(output_dir) / name / f"{name}.ly" score_text = template_path.read_text() score_text = score_text.replace("{NAME}", name) from datetime import date today = date.today().strftime("%d %b %Y") score_text = score_text.replace("{DATE}", today) voice_names = ["I", "II", "III", "IV", "V", "VI", "VII", "VIII"] staves = "" for i in range(num_voices): v_name = voice_names[i] staves += f''' \\new Staff = "{v_name}" \\with {{ instrumentName = "{v_name}" shortInstrumentName = "{v_name}" midiInstrument = #"clarinet" }} {{ \\include "includes/part_{v_name}.ly" }} ''' score_block = f"""\\score{{ << \\new SemiStaffGroup {{ << {staves} >> }} >> \\layout{{}} }}""" score_text = score_text.replace("{SCORE}", score_block) with open(score_path, "w") as f: f.write(score_text) print(f"Generated: {score_path}") return True def generate_pdf(name, lilypond_dir="lilypond", output_dir="."): """Generate PDF from LilyPond score. Args: name: Name of the piece (LilyPond file should be {name}.ly) lilypond_dir: Directory containing the LilyPond file output_dir: Directory for PDF output Returns: Path to generated PDF, or None if failed """ ly_file = Path(lilypond_dir) / name / f"{name}.ly" if not ly_file.exists(): print(f"Error: LilyPond file not found: {ly_file}") return None output_base = Path(output_dir) / name output_base.mkdir(parents=True, exist_ok=True) try: result = subprocess.run( [ "lilypond", "-o", str(output_base), "-f", "pdf", str(ly_file), ], capture_output=True, text=True, ) if result.returncode != 0: print(f"LilyPond error:\n{result.stderr}") return None pdf_path = output_base / f"{name}.pdf" if pdf_path.exists(): print(f"Generated: {pdf_path}") return pdf_path else: print(f"Warning: LilyPond ran but PDF not found at {pdf_path}") print(f"Output: {result.stdout}") return None except FileNotFoundError: print("Error: lilypond command not found. Is LilyPond installed?") return None def transcribe( chords, name, fundamental=55, output_dir="lilypond", generate_pdf_flag=True, ): """Main transcription function. Args: chords: Chord data (list from output_chords.json or music_data format) name: Name for the output fundamental: Fundamental frequency in Hz output_dir: Base output directory generate_pdf_flag: Whether to generate PDF Returns: Dictionary with paths to generated files """ import shutil # Handle both old format (list of chords) and new format (dict with dims + chords) dims = None if isinstance(chords, dict) and "chords" in chords: dims = tuple(chords["dims"]) chords = chords["chords"] if chords and isinstance(chords[0], list) and isinstance(chords[0][0], dict): music_data = output_chords_to_music_data(chords, fundamental, dims=dims) else: music_data = chords output_path = Path(output_dir) / name output_path.mkdir(parents=True, exist_ok=True) template_source = Path(__file__).parent.parent / "lilypond" / "score_template.ly" if template_source.exists(): shutil.copy(template_source, output_path / "score_template.ly") generate_parts(music_data, name, output_dir) num_voices = len(music_data) generate_score(name, num_voices, output_dir) result = { "parts_dir": str(Path(output_dir) / name / "includes"), "score_file": str(Path(output_dir) / name / f"{name}.ly"), } if generate_pdf_flag: pdf_path = generate_pdf(name, output_dir, output_dir) if pdf_path: result["pdf"] = str(pdf_path) return result def main(): import argparse parser = argparse.ArgumentParser(description="LilyPond Transcriber") parser.add_argument( "--output-dir", default="output", help="Directory with output_chords.json" ) parser.add_argument( "--chords-file", default=None, help="Chords file (default: output_chords.json)" ) parser.add_argument( "--name", default="compact_sets_transcription", help="Name for output files" ) parser.add_argument( "--fundamental", type=float, default=55, help="Fundamental frequency in Hz" ) parser.add_argument( "--lilypond-dir", default="lilypond", help="Base LilyPond output directory" ) parser.add_argument("--no-pdf", action="store_true", help="Skip PDF generation") args = parser.parse_args() chords_file = args.chords_file if chords_file is None: chords_file = Path(args.output_dir) / "output_chords.json" if not Path(chords_file).exists(): print(f"Error: Chords file not found: {chords_file}") print("Run compact_sets.py first to generate chords.") sys.exit(1) with open(chords_file) as f: chords = json.load(f) print(f"Loaded {len(chords)} chords from {chords_file}") result = transcribe( chords, args.name, fundamental=args.fundamental, output_dir=args.lilypond_dir, generate_pdf_flag=not args.no_pdf, ) print("\nGenerated files:") for key, path in result.items(): print(f" {key}: {path}") if __name__ == "__main__": main()