compact_sets/src/analyze.py

"""Analyze chord sequence outputs."""

import argparse
import json
from pathlib import Path


def analyze_chords(
    chords: list,
    config: dict | None = None,
    graph_path: list | None = None,
) -> dict:
    """Analyze chord sequence and return metrics.

    Args:
        chords: List of chords, each chord is a list of pitch dicts
        config: Optional config with:
            - target_range_octaves: target octaves (default: 2.0)
            - melodic_threshold_max: max cents per voice movement (default: 300)
            - max_path: path length (default: 50)
            - graph_nodes: total nodes in graph (optional, for Hamiltonian coverage)
        graph_path: Optional list of graph node hashes for Hamiltonian analysis

    Returns:
        Dict with analysis metrics
    """
    if config is None:
        config = {}

    target_octaves = config.get("target_range_octaves", 2.0)
    melodic_max = config.get("melodic_threshold_max", 300)
    max_path = config.get("max_path", 50)
    graph_nodes = config.get("graph_nodes", None)

    # Basic info
    num_chords = len(chords)
    num_voices = len(chords[0]) if chords else 0
    num_steps = num_chords - 1 if num_chords > 0 else 0

    # ========== Melodic Threshold ==========
    melodic_violations = 0
    max_violation = 0
    total_movement = 0
    max_movement = 0

    # ========== Contrary Motion ==========
    contrary_motion_steps = 0

    # ========== DCA (Voice Stay Counts) ==========
    # Track how long each voice stays before changing
    voice_stay_counts = [0] * num_voices  # Current stay count per voice
    stay_counts_when_changed = []  # All stay counts recorded when voices changed
    max_voice_stay = 0

    # ========== Hamiltonian ==========
    unique_nodes = set()
    node_hashes = []

    for i in range(1, num_chords):
        cent_diffs = []
        voices_changed = 0

        for v in range(num_voices):
            curr_cents = chords[i][v]["cents"]
            prev_cents = chords[i - 1][v]["cents"]
            diff = curr_cents - prev_cents
            cent_diffs.append(diff)

            # Melodic
            abs_diff = abs(diff)
            total_movement += abs_diff
            max_movement = max(max_movement, abs_diff)
            if abs_diff > melodic_max:
                melodic_violations += 1
                max_violation = max(max_violation, abs_diff)

            # DCA
            if abs_diff > 0:
                voices_changed += 1

        # Track unique nodes
        node_hash = tuple(
            tuple(p["hs_array"]) for p in chords[i]
        )  # Convert lists to tuples for hashing
        unique_nodes.add(node_hash)
        node_hashes.append(node_hash)

        # Contrary motion: sorted_diffs[0] < 0 and sorted_diffs[-1] > 0
        if len(cent_diffs) >= 2:
            sorted_diffs = sorted(cent_diffs)
            if sorted_diffs[0] < 0 and sorted_diffs[-1] > 0:
                contrary_motion_steps += 1

        # DCA: Track stay counts per voice
        for v in range(num_voices):
            curr_cents = chords[i][v]["cents"]
            prev_cents = chords[i - 1][v]["cents"]
            if curr_cents != prev_cents:
                # Voice changed - record how long it stayed
                stay_counts_when_changed.append(voice_stay_counts[v])
                max_voice_stay = max(max_voice_stay, voice_stay_counts[v])
                voice_stay_counts[v] = 0  # Reset stay count
            else:
                voice_stay_counts[v] += 1  # Increment stay count

    # ========== Target Range ==========
    target_cents = target_octaves * 1200

    if chords:
        start_avg = sum(p["cents"] for p in chords[0]) / len(chords[0])
        end_avg = sum(p["cents"] for p in chords[-1]) / len(chords[-1])
        actual_cents = end_avg - start_avg
        target_percent = (actual_cents / target_cents) * 100 if target_cents > 0 else 0
    else:
        start_avg = end_avg = actual_cents = target_percent = 0

    # ========== DCA Summary ==========
    avg_voice_stay = (
        sum(stay_counts_when_changed) / len(stay_counts_when_changed)
        if stay_counts_when_changed
        else 0
    )

    # ========== Hamiltonian Coverage ==========
    # Use graph_path if provided (accurate), otherwise hash output chords (may differ due to transposition)
    if graph_path:
        hamiltonian_unique_nodes = len(set(graph_path))
    else:
        hamiltonian_unique_nodes = len(unique_nodes)

    hamiltonian_coverage = (
        (hamiltonian_unique_nodes / graph_nodes * 100) if graph_nodes else None
    )

    return {
        "num_chords": num_chords,
        "num_voices": num_voices,
        "num_steps": num_steps,
        # Melodic
        "melodic_max": melodic_max,
        "melodic_violations": melodic_violations,
        "melodic_max_violation": max_violation,
        "melodic_avg_movement": total_movement / num_steps if num_steps > 0 else 0,
        "melodic_max_movement": max_movement,
        # Contrary Motion
        "contrary_motion_steps": contrary_motion_steps,
        "contrary_motion_percent": (
            (contrary_motion_steps / num_steps * 100) if num_steps > 0 else 0
        ),
        # DCA
        "dca_avg_voice_stay": avg_voice_stay,
        "dca_max_voice_stay": max_voice_stay,
        # Hamiltonian
        "hamiltonian_unique_nodes": hamiltonian_unique_nodes,
        "hamiltonian_coverage": hamiltonian_coverage,
        # Target Range
        "target_octaves": target_octaves,
        "target_cents": target_cents,
        "target_start_cents": start_avg,
        "target_end_cents": end_avg,
        "target_actual_cents": actual_cents,
        "target_percent": target_percent,
    }


def format_analysis(metrics: dict) -> str:
    """Format analysis metrics as readable output."""
    lines = [
        "=== Analysis ===",
        f"Path: {metrics['num_chords']} chords, {metrics['num_steps']} steps, {metrics['num_voices']} voices",
        "",
        "--- Melodic Threshold ---",
        f"Max allowed: {metrics['melodic_max']} cents",
        f"Violations: {metrics['melodic_violations']}",
        f"Max violation: {metrics['melodic_max_violation']:.0f} cents",
        f"Avg movement: {metrics['melodic_avg_movement']:.1f} cents",
        f"Max movement: {metrics['melodic_max_movement']:.0f} cents",
        "",
        "--- Contrary Motion ---",
        f"Steps with contrary: {metrics['contrary_motion_steps']}",
        f"Percentage: {metrics['contrary_motion_percent']:.1f}%",
        "",
        "--- DCA (Voice Stay) ---",
        f"Avg stay count: {metrics['dca_avg_voice_stay']:.2f} steps",
        f"Max stay count: {metrics['dca_max_voice_stay']} steps",
        "",
        "--- Hamiltonian ---",
        f"Unique nodes: {metrics['hamiltonian_unique_nodes']}",
    ]

    if metrics["hamiltonian_coverage"] is not None:
        lines.append(f"Coverage: {metrics['hamiltonian_coverage']:.1f}%")

    lines.extend(
        [
            "",
            "--- Target Range ---",
            f"Target: {metrics['target_octaves']} octaves ({metrics['target_cents']:.0f} cents)",
            f"Start: {metrics['target_start_cents']:.0f} cents",
            f"End: {metrics['target_end_cents']:.0f} cents",
            f"Achieved: {metrics['target_actual_cents']:.0f} cents ({metrics['target_percent']:.1f}%)",
        ]
    )

    return "\n".join(lines)


def analyze_file(file_path: str | Path, config: dict | None = None) -> dict:
    """Load and analyze a chord file."""
    file_path = Path(file_path)
    with open(file_path) as f:
        chords = json.load(f)

    # Try to load graph_path if it exists
    graph_path = None
    graph_path_file = file_path.parent / "graph_path.json"
    if graph_path_file.exists():
        with open(graph_path_file) as f:
            graph_path = json.load(f)

    return analyze_chords(chords, config, graph_path)


def main():
    parser = argparse.ArgumentParser(description="Analyze chord sequence outputs")
    parser.add_argument(
        "file",
        nargs="?",
        default="output/output_chords.json",
        help="Path to chord JSON file (default: output/output_chords.json)",
    )
    parser.add_argument(
        "--json",
        action="store_true",
        help="Output raw JSON instead of formatted text",
    )
    parser.add_argument(
        "--target-range",
        type=float,
        default=2.0,
        help="Target range in octaves (default: 2.0)",
    )
    parser.add_argument(
        "--melodic-max",
        type=int,
        default=300,
        help="Max melodic threshold in cents (default: 300)",
    )
    parser.add_argument(
        "--max-path",
        type=int,
        default=50,
        help="Max path length (default: 50)",
    )
    parser.add_argument(
        "--graph-nodes",
        type=int,
        default=None,
        help="Total nodes in graph (for Hamiltonian coverage)",
    )
    args = parser.parse_args()

    file_path = Path(args.file)
    if not file_path.exists():
        print(f"Error: File not found: {file_path}")
        return 1

    config = {
        "target_range_octaves": args.target_range,
        "melodic_threshold_max": args.melodic_max,
        "max_path": args.max_path,
        "graph_nodes": args.graph_nodes,
    }

    metrics = analyze_file(file_path, config)

    if args.json:
        print(json.dumps(metrics, indent=2))
    else:
        print(format_analysis(metrics))

    return 0


if __name__ == "__main__":
    exit(main())