compact_sets/src/transcriber.py

#!/usr/bin/env python
"""
LilyPond Transcriber - Convert chord data to LilyPond parts and PDF.

Usage:
    python src/transcriber.py --name compact_sets_1

Or import and use programmatically:
    from src.transcriber import transcribe
    transcribe(chords, name="my_piece")
"""

import json
import math
import subprocess
import sys
from fractions import Fraction
from pathlib import Path


NOTE_NAMES_SHARPS = [
    "c",
    "cis",
    "d",
    "dis",
    "e",
    "f",
    "fis",
    "g",
    "gis",
    "a",
    "ais",
    "b",
]
NOTE_NAMES_FLATS = [
    "c",
    "des",
    "d",
    "ees",
    "e",
    "f",
    "ges",
    "g",
    "aes",
    "a",
    "bes",
    "b",
]

OCTAVE_STRINGS = [
    ",,,,",
    ",,,",
    ",,",
    ",",
    "",
    "'",
    "''",
    "'''",
    "''''",
    "'''''",
    "''''''",
]

DURATION_MAP = {
    1: "8",
    2: "4",
    3: "4.",
    4: "1",
    6: "2.",
    8: "2",
}


def cps_to_midi(freq):
    """Convert frequency in Hz to MIDI note number."""
    if freq <= 0:
        return -1
    return 12 * math.log2(freq / 440.0) + 69


def midi_to_pitch_class(midi):
    """Get pitch class (0-11) from MIDI note number."""
    if midi < 0:
        return -1
    return round(midi) % 12


def midi_to_octave(midi):
    """Get LilyPond octave number from MIDI note number."""
    if midi < 0:
        return -1
    return (round(midi) // 12) - 1


def get_clef_for_midi(midi):
    """Determine clef based on MIDI note number.

    Two-threshold system at middle C (C4 = MIDI 60):
    - MIDI >= 60: treble clef
    - MIDI < 60: bass clef
    """
    if midi >= 60:
        return "treble"
    else:
        return "bass"


def freq_to_lilypond(freq, spelling="sharps", prev_pitch=None):
    """Convert frequency to LilyPond note name.

    Args:
        freq: Frequency in Hz
        spelling: "sharps" or "flats" (determines base preference)
        prev_pitch: Previous pitch class (for contextual spelling)

    Returns:
        LilyPond note string (e.g., "ais''", "ees'") or "r" for rest
    """
    if freq <= 0:
        return "r"

    midi = cps_to_midi(freq)
    pc = midi_to_pitch_class(midi)
    octave = midi_to_octave(midi)

    if spelling == "flats":
        note_name = NOTE_NAMES_FLATS[pc]
    else:
        note_name = NOTE_NAMES_SHARPS[pc]

    oct_str = OCTAVE_STRINGS[octave + 1] if octave >= -4 else ",," * (-octave - 4)
    return note_name + oct_str


def duration_to_lilypond(beats):
    """Convert quarter-note beats to LilyPond duration string."""
    beats = int(round(beats))
    return DURATION_MAP.get(beats, "4")


def format_cents_deviation(freq):
    """Format cent deviation from nearest equal-tempered note."""
    if freq <= 0:
        return None

    midi = cps_to_midi(freq)
    deviation = (midi - round(midi)) * 100
    deviation = round(deviation)

    if deviation > 0:
        return f"+{deviation}"
    else:
        return str(deviation)


def format_dim_diff(dim_diff, ref):
    """Format dimensional difference markup."""
    if dim_diff is None or ref is None or ref < 0 or dim_diff == 0:
        return ""

    diff_str = str(abs(dim_diff))
    if dim_diff > 1:
        diff_str += "↑"
    elif dim_diff < 0:
        diff_str += "↓"

    ref_names = ["IV", "III", "II", "I"]
    ref_name = ref_names[ref] if 0 <= ref <= 3 else ""

    return f'_\\markup {{ \\lower #3 \\pad-markup #0.2 \\concat{{ "{ref_name}"\\normal-size-super " {diff_str}" }} }}'


def generate_part(voice_data, voice_name, voice_idx, clef=None, beats_per_measure=4):
    """Generate LilyPond music string for a single voice.

    Args:
        voice_data: List of [freq, duration_beats, ref, dim_diff] events
        voice_name: Voice name (e.g., "I", "II", "III")
        voice_idx: Voice index (0=I, 1=II, 2=III)
        clef: LilyPond clef name (e.g., "treble", "alto", "bass") - optional, determined from first note if not provided
        beats_per_measure: Beats per measure (default 4 for 4/4)

    Returns:
        LilyPond music string with clef and time signature
    """
    if not voice_data:
        return "\\numericTimeSignature \\time 4/4\n"

    first_freq = voice_data[0][0]
    if clef is None:
        initial_clef = get_clef_for_midi(cps_to_midi(first_freq))
    else:
        initial_clef = clef

    prefix = f"\\clef {initial_clef}\n"
    prefix += "\\numericTimeSignature \\time 4/4\n"

    spelling = "sharps"

    notes = []
    for event in voice_data:
        freq = event[0]
        dur_beats = event[1] if len(event) > 1 else 1
        ref = event[2] if len(event) > 2 else None
        dim_diff = event[3] if len(event) > 3 else None

        is_rest = freq <= 0
        note_str = freq_to_lilypond(freq, spelling)
        dur_str = duration_to_lilypond(dur_beats)

        notes.append(
            {
                "freq": freq,
                "is_rest": is_rest,
                "note_str": note_str,
                "dur_str": dur_str,
                "dur_beats": dur_beats,
                "ref": ref,
                "dim_diff": dim_diff,
            }
        )

    measures = []
    current_measure_notes = []
    beat_in_measure = 0
    current_clef = initial_clef

    for i, note_data in enumerate(notes):
        freq = note_data["freq"]
        is_rest = note_data["is_rest"]

        midi = cps_to_midi(freq)
        required_clef = get_clef_for_midi(midi)
        clef_change = required_clef != current_clef
        if clef_change:
            current_clef = required_clef

        has_prev = i > 0
        prev_freq = notes[i - 1]["freq"] if has_prev else None
        prev_is_rest = notes[i - 1]["is_rest"] if has_prev else True
        is_tied_from_prev = (
            has_prev and not is_rest and not prev_is_rest and freq == prev_freq
        )

        cents_dev = (
            format_cents_deviation(freq)
            if not is_rest and not is_tied_from_prev
            else None
        )
        dim_markup = (
            format_dim_diff(note_data["dim_diff"], note_data["ref"])
            if not is_rest
            else ""
        )

        note_str_full = note_data["note_str"] + note_data["dur_str"]

        markup = ""
        if cents_dev or dim_markup:
            if cents_dev:
                markup += f'^\\markup {{ \\pad-markup #0.2 "{cents_dev}" }}'
            if dim_markup:
                markup += dim_markup
            note_str_full += markup

        has_next = i < len(notes) - 1
        next_freq = notes[i + 1]["freq"] if has_next else None
        next_is_rest = notes[i + 1]["is_rest"] if has_next else True
        is_tied_to_next = (
            has_next and not is_rest and not next_is_rest and freq == next_freq
        )

        if is_tied_to_next:
            note_str_full += " ~"
        else:
            note_str_full = " " + note_str_full

        if clef_change:
            note_str_full = f"\\clef {current_clef} {note_str_full}"

        current_measure_notes.append(note_str_full)

        beats_this_event = int(round(note_data["dur_beats"]))
        beat_in_measure += beats_this_event

        while beat_in_measure >= beats_per_measure:
            beat_in_measure -= beats_per_measure
            measures.append("".join(current_measure_notes))
            current_measure_notes = []

    if current_measure_notes:
        measures.append("".join(current_measure_notes))

    music_str = ""
    for i, measure in enumerate(measures):
        music_str += "{ " + measure + " }"
        if i < len(measures) - 1:
            music_str += '  \n\\bar "|"  '

    music_str += '\n\\bar "|."'

    return prefix + music_str


def generate_parts(music_data, name, output_dir="lilypond"):
    """Generate LilyPond part files.

    Args:
        music_data: List of voices, each voice is a list of events
        name: Name for the output (e.g., "compact_sets_1")
        output_dir: Base output directory
    """
    includes_dir = Path(output_dir) / name / "includes"
    includes_dir.mkdir(parents=True, exist_ok=True)

    voice_order = [
        (3, "I"),
        (2, "II"),
        (1, "III"),
        (0, "IV"),
    ]

    for voice_idx, voice_name in voice_order:
        if voice_idx >= len(music_data):
            continue
        voice_data = music_data[voice_idx]
        part_str = generate_part(voice_data, voice_name, voice_idx)

        part_file = includes_dir / f"part_{voice_name}.ly"
        with open(part_file, "w") as f:
            f.write(part_str)

        print(f"Generated: {part_file}")


def _is_adjacent(hs1: tuple, hs2: tuple) -> bool:
    """Check if two hs_arrays are adjacent (differ by ±1 in exactly one dimension, excluding dim 0)."""
    diff_count = 0
    for i in range(1, len(hs1)):
        diff = abs(hs1[i] - hs2[i])
        if diff > 1:
            return False
        if diff == 1:
            diff_count += 1
    return diff_count == 1


def _compute_dim_diff(current: tuple, prev: tuple, primes: list[int]) -> int:
    """Compute dim_diff between two hs_arrays. Returns prime * direction."""
    for i in range(1, len(primes) + 1):
        diff = current[i] - prev[i]
        if diff == 1:
            return primes[i - 1]
        if diff == -1:
            return -primes[i - 1]
    return 0


def _find_ref_and_dim_diff(
    current_hs: tuple, prev_chord: list, staying_voices: list, primes: list[int]
) -> tuple[int, int]:
    """Find ref (staying voice index) and dim_diff for a changed pitch.

    Args:
        current_hs: hs_array of current pitch
        prev_chord: list of hs_arrays from previous chord
        staying_voices: indices of voices that stay
        primes: list of primes for dimensional calculation

    Returns:
        (ref, dim_diff) tuple
    """
    if not staying_voices:
        return -1, 0

    adjacent = []
    for idx in staying_voices:
        prev_hs = prev_chord[idx]
        if _is_adjacent(current_hs, prev_hs):
            dim_diff = _compute_dim_diff(current_hs, prev_hs, primes)
            adjacent.append((idx, dim_diff))

    if not adjacent:
        return -1, 0

    adjacent.sort(key=lambda x: abs(x[1]))
    return adjacent[0]


def _find_ref_in_same_chord(
    pitch_idx: int, chord_pitches: list, primes: list[int]
) -> tuple[int, int]:
    """Find ref (other pitch index) and dim_diff within the same chord.

    Args:
        pitch_idx: index of the current pitch in the chord
        chord_pitches: list of hs_arrays for all pitches in the chord
        primes: list of primes for dimensional calculation

    Returns:
        (ref, dim_diff) tuple where ref is index of adjacent pitch in same chord
    """
    current_hs = chord_pitches[pitch_idx]
    adjacent = []

    for idx, other_hs in enumerate(chord_pitches):
        if idx == pitch_idx:
            continue
        if _is_adjacent(current_hs, other_hs):
            dim_diff = _compute_dim_diff(current_hs, other_hs, primes)
            adjacent.append((idx, dim_diff))

    if not adjacent:
        return -1, 0

    adjacent.sort(key=lambda x: abs(x[1]))
    return adjacent[0]


def output_chords_to_music_data(chords, fundamental=55, chord_duration=4, dims=None):
    """Convert output_chords.json format to generic music data.

    Args:
        chords: List of chords from output_chords.json
        fundamental: Fundamental frequency in Hz
        chord_duration: Duration of each chord in beats
        dims: Tuple of prime dimensions (optional, for computing dim_diff)

    Returns:
        List of voices, each voice is a list of [freq, duration, ref, dim_diff]
    """
    if not chords:
        return []

    # Compute primes from dims (skip dimension 0 which is the fundamental)
    if dims is not None:
        primes = list(dims[1:])  # Skip first prime (2)
    else:
        primes = [3, 5, 7, 11]  # Default fallback

    num_voices = len(chords[0])

    music_data = [[] for _ in range(num_voices)]

    prev_chord = None
    for chord in chords:
        current_hs = [tuple(p["hs_array"]) for p in chord]

        if prev_chord is None:
            staying_voices = []
        else:
            staying_voices = [
                i for i in range(num_voices) if current_hs[i] == prev_chord[i]
            ]

        for voice_idx, pitch in enumerate(chord):
            if voice_idx >= num_voices:
                break

            frac = Fraction(pitch["fraction"])
            freq = fundamental * float(frac)
            current_hs_array = current_hs[voice_idx]

            if prev_chord is None:
                ref, dim_diff = _find_ref_in_same_chord(voice_idx, current_hs, primes)
            elif current_hs_array == prev_chord[voice_idx]:
                ref = -1
                dim_diff = 0
            else:
                ref, dim_diff = _find_ref_and_dim_diff(
                    current_hs_array, prev_chord, staying_voices, primes
                )

            event = [freq, chord_duration, ref, dim_diff]
            music_data[voice_idx].append(event)

        prev_chord = current_hs

    return music_data


def generate_score(name, num_voices, output_dir="lilypond"):
    """Generate full score .ly file from template.

    Args:
        name: Name for the output (used as title)
        num_voices: Number of voices/staves to generate
        output_dir: Base output directory
    """
    template_path = Path(output_dir) / name / "score_template.ly"
    if not template_path.exists():
        print(f"Error: Template not found: {template_path}")
        return False

    score_path = Path(output_dir) / name / f"{name}.ly"
    score_text = template_path.read_text()

    score_text = score_text.replace("{NAME}", name)

    from datetime import date

    today = date.today().strftime("%d %b %Y")
    score_text = score_text.replace("{DATE}", today)

    voice_names = ["I", "II", "III", "IV", "V", "VI", "VII", "VIII"]
    staves = ""
    for i in range(num_voices):
        v_name = voice_names[i]
        staves += f'''
        \\new Staff = "{v_name}" \\with {{
          instrumentName = "{v_name}"
          shortInstrumentName = "{v_name}"
          midiInstrument = #"clarinet"
        }}
        {{
          \\include "includes/part_{v_name}.ly"
        }}
'''

    score_block = f"""\\score{{
  <<
    \\new SemiStaffGroup {{
      <<
{staves}
      >>
    }}
  >>
  \\layout{{}}
}}"""

    score_text = score_text.replace("{SCORE}", score_block)

    with open(score_path, "w") as f:
        f.write(score_text)

    print(f"Generated: {score_path}")
    return True


def generate_pdf(name, lilypond_dir="lilypond", output_dir="."):
    """Generate PDF from LilyPond score.

    Args:
        name: Name of the piece (LilyPond file should be {name}.ly)
        lilypond_dir: Directory containing the LilyPond file
        output_dir: Directory for PDF output

    Returns:
        Path to generated PDF, or None if failed
    """
    ly_file = Path(lilypond_dir) / name / f"{name}.ly"
    if not ly_file.exists():
        print(f"Error: LilyPond file not found: {ly_file}")
        return None

    output_base = Path(output_dir) / name
    output_base.mkdir(parents=True, exist_ok=True)

    try:
        result = subprocess.run(
            [
                "lilypond",
                "-o",
                str(output_base),
                "-f",
                "pdf",
                str(ly_file),
            ],
            capture_output=True,
            text=True,
        )

        if result.returncode != 0:
            print(f"LilyPond error:\n{result.stderr}")
            return None

        pdf_path = output_base / f"{name}.pdf"
        if pdf_path.exists():
            print(f"Generated: {pdf_path}")
            return pdf_path
        else:
            print(f"Warning: LilyPond ran but PDF not found at {pdf_path}")
            print(f"Output: {result.stdout}")
            return None

    except FileNotFoundError:
        print("Error: lilypond command not found. Is LilyPond installed?")
        return None


def transcribe(
    chords,
    name,
    fundamental=55,
    output_dir="lilypond",
    generate_pdf_flag=True,
):
    """Main transcription function.

    Args:
        chords: Chord data (list from output_chords.json or music_data format)
        name: Name for the output
        fundamental: Fundamental frequency in Hz
        output_dir: Base output directory
        generate_pdf_flag: Whether to generate PDF

    Returns:
        Dictionary with paths to generated files
    """
    import shutil

    # Handle both old format (list of chords) and new format (dict with dims + chords)
    dims = None
    if isinstance(chords, dict) and "chords" in chords:
        dims = tuple(chords["dims"])
        chords = chords["chords"]

    if chords and isinstance(chords[0], list) and isinstance(chords[0][0], dict):
        music_data = output_chords_to_music_data(chords, fundamental, dims=dims)
    else:
        music_data = chords

    output_path = Path(output_dir) / name
    output_path.mkdir(parents=True, exist_ok=True)

    template_source = Path(__file__).parent.parent / "lilypond" / "score_template.ly"
    if template_source.exists():
        shutil.copy(template_source, output_path / "score_template.ly")

    generate_parts(music_data, name, output_dir)

    num_voices = len(music_data)
    generate_score(name, num_voices, output_dir)

    result = {
        "parts_dir": str(Path(output_dir) / name / "includes"),
        "score_file": str(Path(output_dir) / name / f"{name}.ly"),
    }

    if generate_pdf_flag:
        pdf_path = generate_pdf(name, output_dir, output_dir)
        if pdf_path:
            result["pdf"] = str(pdf_path)

    return result


def main():
    import argparse

    parser = argparse.ArgumentParser(description="LilyPond Transcriber")
    parser.add_argument(
        "--output-dir", default="output", help="Directory with output_chords.json"
    )
    parser.add_argument(
        "--chords-file", default=None, help="Chords file (default: output_chords.json)"
    )
    parser.add_argument(
        "--name", default="compact_sets_transcription", help="Name for output files"
    )
    parser.add_argument(
        "--fundamental", type=float, default=55, help="Fundamental frequency in Hz"
    )
    parser.add_argument(
        "--lilypond-dir", default="lilypond", help="Base LilyPond output directory"
    )
    parser.add_argument("--no-pdf", action="store_true", help="Skip PDF generation")

    args = parser.parse_args()

    chords_file = args.chords_file
    if chords_file is None:
        chords_file = Path(args.output_dir) / "output_chords.json"

    if not Path(chords_file).exists():
        print(f"Error: Chords file not found: {chords_file}")
        print("Run compact_sets.py first to generate chords.")
        sys.exit(1)

    with open(chords_file) as f:
        chords = json.load(f)

    print(f"Loaded {len(chords)} chords from {chords_file}")

    result = transcribe(
        chords,
        args.name,
        fundamental=args.fundamental,
        output_dir=args.lilypond_dir,
        generate_pdf_flag=not args.no_pdf,
    )

    print("\nGenerated files:")
    for key, path in result.items():
        print(f"  {key}: {path}")


if __name__ == "__main__":
    main()