compact_sets/src/io.py

#!/usr/bin/env python
"""
I/O functions and CLI main entry point.
"""

import json
from fractions import Fraction
from pathlib import Path
from random import seed


def write_chord_sequence(seq: list["Chord"], path: str) -> None:
    """Write a chord sequence to a JSON file."""
    serializable = []
    for chord in seq:
        chord_data = []
        for pitch in chord._pitches:
            chord_data.append(
                {
                    "hs_array": list(pitch.hs_array),
                    "fraction": str(pitch.to_fraction()),
                    "cents": pitch.to_cents(),
                }
            )
        serializable.append(chord_data)

    content = json.dumps(serializable, indent=2)
    content = content.replace("[[[", "[\n\t[[")
    content = content.replace(", [[", ",\n\t[[")
    content = content.replace("]]]", "]]\n]")

    with open(path, "w") as f:
        f.write(content)


def write_chord_sequence_readable(seq: list["Chord"], path: str) -> None:
    """Write chord sequence as tuple of hs_arrays - one line per chord."""
    with open(path, "w") as f:
        f.write("(\n")
        for i, chord in enumerate(seq):
            arrays = tuple(p.hs_array for p in chord._pitches)
            f.write(f"  {arrays},\n")
        f.write(")\n")


def write_chord_sequence_frequencies(
    seq: list["Chord"], path: str, fundamental: float = 100.0
) -> None:
    """Write chord sequence as frequencies in Hz - one line per chord."""
    with open(path, "w") as f:
        f.write("(\n")
        for chord in seq:
            freqs = tuple(fundamental * float(p.to_fraction()) for p in chord._pitches)
            f.write(f"  {freqs},\n")
        f.write(")\n")


def graph_to_dict(graph: "nx.MultiDiGraph") -> dict:
    """Serialize graph to a dict for JSON."""
    from .pitch import Pitch
    from .chord import Chord

    nodes = []
    node_to_idx = {}
    for idx, chord in enumerate(graph.nodes()):
        nodes.append(
            {
                "pitches": [list(p.hs_array) for p in chord.pitches],
                "dims": list(chord.dims),
            }
        )
        node_to_idx[id(chord)] = idx

    edges = []
    for u, v, data in graph.edges(data=True):
        edges.append(
            {
                "src_idx": node_to_idx[id(u)],
                "dst_idx": node_to_idx[id(v)],
                "transposition": list(
                    data.get(
                        "transposition", Pitch(tuple([0] * len(u.dims)), u.dims)
                    ).hs_array
                ),
                "weight": data.get("weight", 1.0),
                "movements": {str(k): v for k, v in data.get("movements", {}).items()},
                "cent_diffs": data.get("cent_diffs", []),
                "voice_crossing": data.get("voice_crossing", False),
                "is_directly_tunable": data.get("is_directly_tunable", False),
            }
        )

    return {
        "nodes": nodes,
        "edges": edges,
    }


def graph_from_dict(data: dict) -> "nx.MultiDiGraph":
    """Deserialize graph from dict."""
    import networkx as nx
    from .pitch import Pitch
    from .chord import Chord

    nodes = []
    for node_data in data["nodes"]:
        pitches = tuple(
            Pitch(tuple(arr), tuple(node_data["dims"])) for arr in node_data["pitches"]
        )
        nodes.append(Chord(pitches, tuple(node_data["dims"])))

    graph = nx.MultiDiGraph()
    for node in nodes:
        graph.add_node(node)

    for edge_data in data["edges"]:
        u = nodes[edge_data["src_idx"]]
        v = nodes[edge_data["dst_idx"]]
        trans = Pitch(tuple(edge_data["transposition"]), u.dims)
        movements = {int(k): v for k, v in edge_data["movements"].items()}

        graph.add_edge(
            u,
            v,
            transposition=trans,
            weight=edge_data.get("weight", 1.0),
            movements=movements,
            cent_diffs=edge_data.get("cent_diffs", []),
            voice_crossing=edge_data.get("voice_crossing", False),
            is_directly_tunable=edge_data.get("is_directly_tunable", False),
        )

    return graph


def save_graph_pickle(graph: "nx.MultiDiGraph", path: str) -> None:
    """Save graph to pickle file."""
    import pickle

    with open(path, "wb") as f:
        pickle.dump(graph, f)


def load_graph_pickle(path: str) -> "nx.MultiDiGraph":
    """Load graph from pickle file."""
    import pickle

    with open(path, "rb") as f:
        return pickle.load(f)


def save_graph_json(graph: "nx.MultiDiGraph", path: str) -> None:
    """Save graph to JSON file."""
    data = graph_to_dict(graph)
    with open(path, "w") as f:
        json.dump(data, f, indent=2)


def load_graph_json(path: str) -> "nx.MultiDiGraph":
    """Load graph from JSON file."""
    import json

    with open(path, "r") as f:
        data = json.load(f)
    return graph_from_dict(data)


def get_cache_key(
    dims: int, chord_size: int, symdiff_min: int, symdiff_max: int
) -> str:
    """Generate cache key from parameters."""
    return f"d{dims}_n{size}_s{min}-{max}".replace("{size}", str(chord_size))


def load_graph_from_cache(
    cache_dir: str,
    dims: int,
    chord_size: int,
    symdiff_min: int,
    symdiff_max: int,
) -> tuple["nx.MultiDiGraph | None", bool]:
    """
    Try to load graph from cache.

    Returns:
        (graph, was_cached): graph if found, False if not found
    """
    cache_key = f"d{dims}_n{chord_size}_s{symdiff_min}-{symdiff_max}"
    pkl_path = Path(cache_dir) / f"{cache_key}.pkl"
    json_path = Path(cache_dir) / f"{cache_key}.json"

    # Try pickle first (faster)
    if pkl_path.exists():
        try:
            graph = load_graph_pickle(str(pkl_path))
            return graph, True
        except Exception as e:
            print(f"Warning: Failed to load pickle cache: {e}")

    # Try JSON
    if json_path.exists():
        try:
            graph = load_graph_json(str(json_path))
            return graph, True
        except Exception as e:
            print(f"Warning: Failed to load JSON cache: {e}")

    return None, False


def save_graph_to_cache(
    graph: "nx.MultiDiGraph",
    cache_dir: str,
    dims: int,
    chord_size: int,
    symdiff_min: int,
    symdiff_max: int,
) -> None:
    """Save graph to cache in both pickle and JSON formats."""
    import os

    cache_key = f"d{dims}_n{chord_size}_s{symdiff_min}-{symdiff_max}"
    pkl_path = Path(cache_dir) / f"{cache_key}.pkl"
    json_path = Path(cache_dir) / f"{cache_key}.json"

    os.makedirs(cache_dir, exist_ok=True)

    # Save both formats
    try:
        save_graph_pickle(graph, str(pkl_path))
        print(f"Cached to {pkl_path}")
    except Exception as e:
        print(f"Warning: Failed to save pickle: {e}")

    try:
        save_graph_json(graph, str(json_path))
        print(f"Cached to {json_path}")
    except Exception as e:
        print(f"Warning: Failed to save JSON: {e}")


def main():
    """Demo: Generate compact sets and build graph."""
    import argparse
    from .pitch import DIMS_4, DIMS_5, DIMS_7, DIMS_8
    from .harmonic_space import HarmonicSpace
    from .graph import PathFinder

    parser = argparse.ArgumentParser(
        description="Generate chord paths in harmonic space"
    )
    parser.add_argument(
        "--symdiff-min",
        type=int,
        default=2,
        help="Minimum symmetric difference between chords",
    )
    parser.add_argument(
        "--symdiff-max",
        type=int,
        default=2,
        help="Maximum symmetric difference between chords",
    )
    parser.add_argument(
        "--melodic-min",
        type=int,
        default=0,
        help="Minimum cents for any pitch movement (0 = no minimum)",
    )
    parser.add_argument(
        "--melodic-max",
        type=int,
        default=500,
        help="Maximum cents for any pitch movement (0 = no maximum)",
    )
    parser.add_argument(
        "--target-range",
        type=float,
        default=0,
        help="Target range in octaves for rising register (default: disabled, 2 = two octaves)",
    )
    parser.add_argument(
        "--voice-crossing",
        action="store_true",
        help="Allow edges where voices cross (default: reject)",
    )
    parser.add_argument(
        "--direct-tuning",
        action="store_true",
        default=True,
        help="Require edges to be directly tunable (default: enabled)",
    )
    parser.add_argument(
        "--weight-melodic",
        type=float,
        default=1,
        help="Weight for melodic threshold factor (0=disabled, default: 1)",
    )
    parser.add_argument(
        "--weight-contrary-motion",
        type=float,
        default=0,
        help="Weight for contrary motion factor (0=disabled, default: 0)",
    )
    parser.add_argument(
        "--weight-hamiltonian",
        type=float,
        default=1,
        help="Weight for Hamiltonian factor (0=disabled, default: 1)",
    )
    parser.add_argument(
        "--weight-dca",
        type=float,
        default=1,
        help="Weight for DCA factor - favors edges where voices stay (0=disabled, default: 1)",
    )
    parser.add_argument(
        "--weight-target-range",
        type=float,
        default=1,
        help="Weight for target range factor (0=disabled, default: 1)",
    )
    parser.add_argument(
        "--dims", type=int, default=7, help="Number of prime dimensions (4, 5, 7, or 8)"
    )
    parser.add_argument("--chord-size", type=int, default=3, help="Size of chords")
    parser.add_argument("--max-path", type=int, default=50, help="Maximum path length")
    parser.add_argument(
        "--seed", type=int, default=None, help="Random seed (default: random)"
    )
    parser.add_argument(
        "--cache-dir",
        type=str,
        default="./cache",
        help="Cache directory for graphs",
    )
    parser.add_argument(
        "--rebuild-cache",
        action="store_true",
        help="Force rebuild graph (ignore cache)",
    )
    parser.add_argument(
        "--no-cache",
        action="store_true",
        help="Disable caching",
    )
    parser.add_argument(
        "--output-dir",
        type=str,
        default="output",
        help="Output directory for generated files",
    )
    args = parser.parse_args()

    # Select dims
    if args.dims == 4:
        dims = DIMS_4
    elif args.dims == 5:
        dims = DIMS_5
    elif args.dims == 7:
        dims = DIMS_7
    elif args.dims == 8:
        dims = DIMS_8
    else:
        dims = DIMS_7

    space = HarmonicSpace(dims, collapsed=True)
    print(f"Space: {space}")
    print(f"Symdiff: {args.symdiff_min} to {args.symdiff_max}")

    # Try to load from cache
    graph = None
    was_cached = False

    if not args.no_cache and not args.rebuild_cache:
        graph, was_cached = load_graph_from_cache(
            args.cache_dir,
            args.dims,
            args.chord_size,
            args.symdiff_min,
            args.symdiff_max,
        )
        if was_cached:
            print(f"Loaded graph from cache")
            print(
                f"Graph: {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
            )

    # Build graph if not loaded from cache
    if graph is None:
        print("Generating connected sets...")
        chords = space.generate_connected_sets(
            min_size=args.chord_size, max_size=args.chord_size
        )
        print(f"Found {len(chords)} unique chords")

        print("Building voice leading graph...")
        graph = space.build_voice_leading_graph(
            chords,
            symdiff_min=args.symdiff_min,
            symdiff_max=args.symdiff_max,
        )
        print(
            f"Graph: {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges"
        )

        # Save to cache
        if not args.no_cache:
            save_graph_to_cache(
                graph,
                args.cache_dir,
                args.dims,
                args.chord_size,
                args.symdiff_min,
                args.symdiff_max,
            )

    # Find stochastic path
    print("Finding stochastic path...")
    path_finder = PathFinder(graph)
    if args.seed is not None:
        seed(args.seed)

    weights_config = path_finder._default_weights_config()
    weights_config["melodic_threshold_min"] = args.melodic_min
    weights_config["melodic_threshold_max"] = args.melodic_max
    weights_config["voice_crossing_allowed"] = args.voice_crossing
    weights_config["direct_tuning"] = args.direct_tuning

    # Soft factor weights
    weights_config["weight_melodic"] = args.weight_melodic
    weights_config["weight_contrary_motion"] = args.weight_contrary_motion
    weights_config["weight_hamiltonian"] = args.weight_hamiltonian
    weights_config["weight_dca"] = args.weight_dca

    # Target range
    if args.target_range > 0:
        weights_config["target_range"] = True
        weights_config["target_range_octaves"] = args.target_range
        weights_config["weight_target_range"] = args.weight_target_range
    else:
        weights_config["weight_target_range"] = 0  # disabled

    weights_config["max_path"] = args.max_path

    path = path_finder.find_stochastic_path(
        max_length=args.max_path, weights_config=weights_config
    )
    print(f"Path length: {len(path)}")

    # Create output directory and write files
    import os

    os.makedirs(args.output_dir, exist_ok=True)

    write_chord_sequence(path, os.path.join(args.output_dir, "output_chords.json"))
    print(f"Written to {args.output_dir}/output_chords.json")

    write_chord_sequence_readable(
        path, os.path.join(args.output_dir, "output_chords.txt")
    )
    print(f"Written to {args.output_dir}/output_chords.txt")

    write_chord_sequence_frequencies(
        path, os.path.join(args.output_dir, "output_frequencies.txt")
    )
    print(f"Written to {args.output_dir}/output_frequencies.txt")


if __name__ == "__main__":
    main()