compact_sets/compact_sets.py

#!/usr/bin/env python
"""
Compact Sets: A rational theory of harmony

Based on Michael Winter's theory of conjunct connected sets in harmonic space,
combining ideas from Tom Johnson, James Tenney, and Larry Polansky.

Mathematical foundations:
- Harmonic space: multidimensional lattice where dimensions = prime factors
- Connected sets: chords forming a connected sublattice
- Voice leading graphs: edges based on symmetric difference + melodic thresholds
"""

from __future__ import annotations
from fractions import Fraction
from itertools import combinations, permutations, product
from math import prod, log
from operator import add
from random import choice, choices, seed
from typing import Iterator

import networkx as nx


# ============================================================================
# CONSTANTS
# ============================================================================

DIMS_8 = (2, 3, 5, 7, 11, 13, 17, 19)
DIMS_7 = (2, 3, 5, 7, 11, 13, 17)
DIMS_5 = (2, 3, 5, 7, 11)
DIMS_4 = (2, 3, 5, 7)


# ============================================================================
# PITCH
# ============================================================================


class Pitch:
    """
    A point in harmonic space.

    Represented as an array of exponents on prime dimensions.
    Example: (0, 1, 0, 0) represents 3/2 (perfect fifth) in CHS_7
    """

    def __init__(self, hs_array: tuple[int, ...], dims: tuple[int, ...] | None = None):
        """
        Initialize a pitch from a harmonic series array.

        Args:
            hs_array: Tuple of exponents for each prime dimension
            dims: Tuple of primes defining the harmonic space (defaults to DIMS_7)
        """
        self.hs_array = hs_array
        self.dims = dims if dims is not None else DIMS_7

    def __hash__(self) -> int:
        return hash(self.hs_array)

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Pitch):
            return NotImplemented
        return self.hs_array == other.hs_array

    def __repr__(self) -> str:
        return f"Pitch({self.hs_array})"

    def __iter__(self):
        return iter(self.hs_array)

    def __len__(self) -> int:
        return len(self.hs_array)

    def __getitem__(self, index: int) -> int:
        return self.hs_array[index]

    def to_fraction(self) -> Fraction:
        """Convert to frequency ratio (e.g., 3/2)."""
        return Fraction(
            prod(pow(self.dims[d], self.hs_array[d]) for d in range(len(self.dims)))
        )

    def to_cents(self) -> float:
        """Convert to cents (relative to 1/1 = 0 cents)."""
        fr = self.to_fraction()
        return 1200 * log(float(fr), 2)

    def collapse(self) -> Pitch:
        """
        Collapse pitch so frequency ratio is in [1, 2).

        This removes octave information, useful for pitch classes.
        """
        collapsed = list(self.hs_array)
        fr = self.to_fraction()

        if fr < 1:
            while fr < 1:
                fr *= 2
                collapsed[0] += 1
        elif fr >= 2:
            while fr >= 2:
                fr /= 2
                collapsed[0] -= 1

        return Pitch(tuple(collapsed), self.dims)

    def expand(self) -> Pitch:
        """Expand pitch to normalized octave position."""
        return self.collapse()

    def transpose(self, trans: Pitch) -> Pitch:
        """Transpose by another pitch (add exponents element-wise)."""
        return Pitch(tuple(map(add, self.hs_array, trans.hs_array)), self.dims)

    def pitch_difference(self, other: Pitch) -> Pitch:
        """Calculate the pitch difference (self - other)."""
        return Pitch(
            tuple(self.hs_array[d] - other.hs_array[d] for d in range(len(self.dims))),
            self.dims,
        )


# ============================================================================
# CHORD
# ============================================================================


class Chord:
    """
    A set of pitches forming a connected subgraph in harmonic space.

    A chord is a tuple of Pitches. Two chords are equivalent under
    transposition if they have the same intervallic structure.
    """

    def __init__(self, pitches: tuple[Pitch, ...], dims: tuple[int, ...] | None = None):
        """
        Initialize a chord from a tuple of pitches.

        Args:
            pitches: Tuple of Pitch objects
            dims: Harmonic space dimensions (defaults to DIMS_7)
        """
        self.dims = dims if dims is not None else DIMS_7
        self._pitches = pitches

    def __hash__(self) -> int:
        return hash(self._pitches)

    def __eq__(self, other: object) -> bool:
        if not isinstance(other, Chord):
            return NotImplemented
        return self._pitches == other._pitches

    def __repr__(self) -> str:
        return f"Chord({self._pitches})"

    def __iter__(self) -> Iterator[Pitch]:
        return iter(self._pitches)

    def __len__(self) -> int:
        return len(self._pitches)

    def __getitem__(self, index: int) -> Pitch:
        return self._pitches[index]

    @property
    def pitches(self) -> tuple[Pitch, ...]:
        """Get the pitches as a tuple."""
        return self._pitches

    @property
    def collapsed_pitches(self) -> set[Pitch]:
        """Get all pitches collapsed to pitch class."""
        return set(p.collapse() for p in self._pitches)

    def is_connected(self) -> bool:
        """
        Check if the chord forms a connected subgraph in harmonic space.

        A set is connected if every pitch can be reached from every other
        by stepping through adjacent pitches (differing by ±1 in one dimension).
        """
        if len(self._pitches) <= 1:
            return True

        # Build adjacency through single steps
        adj = {p: set() for p in self._pitches}

        for i, p1 in enumerate(self._pitches):
            for p2 in self._pitches[i + 1 :]:
                if self._is_adjacent(p1, p2):
                    adj[p1].add(p2)
                    adj[p2].add(p1)

        # BFS from first pitch
        visited = {self._pitches[0]}
        queue = [self._pitches[0]]

        while queue:
            current = queue.pop(0)
            for neighbor in adj[current]:
                if neighbor not in visited:
                    visited.add(neighbor)
                    queue.append(neighbor)

        return len(visited) == len(self._pitches)

    def _is_adjacent(self, p1: Pitch, p2: Pitch) -> bool:
        """Check if two pitches are adjacent (differ by ±1 in exactly one dimension).

        For collapsed harmonic space, skip dimension 0 (the 2/octave dimension).
        """
        diff_count = 0
        # Start from dimension 1 (skip dimension 0 = octave in CHS)
        for d in range(1, len(self.dims)):
            diff = abs(p1[d] - p2[d])
            if diff > 1:
                return False
            if diff == 1:
                diff_count += 1
        return diff_count == 1

    def symmetric_difference_size(self, other: Chord) -> int:
        """Calculate the size of symmetric difference between two chords."""
        set1 = set(p.collapse() for p in self._pitches)
        set2 = set(p.collapse() for p in other._pitches)
        return len(set1.symmetric_difference(set2))

    def size_difference(self, other: Chord) -> int:
        """Calculate the absolute difference in chord sizes."""
        return abs(len(self._pitches) - len(other._pitches))

    def expand_all(self) -> list[Pitch]:
        """Expand all pitches to normalized octave positions."""
        return [p.expand() for p in self._pitches]

    def transpose(self, trans: Pitch) -> Chord:
        """Transpose the entire chord."""
        return Chord(tuple(p.transpose(trans) for p in self._pitches), self.dims)

    def sorted_by_frequency(self) -> list[Pitch]:
        """Sort pitches by frequency (low to high)."""
        return sorted(self._pitches, key=lambda p: p.to_fraction())


# ============================================================================
# HARMONIC SPACE
# ============================================================================


class HarmonicSpace:
    """
    Harmonic space HS_l or collapsed harmonic space CHS_l.

    A multidimensional lattice where each dimension corresponds to a prime factor.
    """

    def __init__(self, dims: tuple[int, ...] = DIMS_7, collapsed: bool = True):
        """
        Initialize harmonic space.

        Args:
            dims: Tuple of primes defining the space (e.g., (2, 3, 5, 7))
            collapsed: If True, use collapsed harmonic space (CHS_l)
        """
        self.dims = dims
        self.collapsed = collapsed

    def __repr__(self) -> str:
        suffix = " (collapsed)" if self.collapsed else ""
        return f"HarmonicSpace({self.dims}{suffix})"

    def pitch(self, hs_array: tuple[int, ...]) -> Pitch:
        """Create a Pitch in this space."""
        return Pitch(hs_array, self.dims)

    def chord(self, pitches: tuple[Pitch, ...]) -> Chord:
        """Create a Chord in this space."""
        return Chord(pitches, self.dims)

    def root(self) -> Pitch:
        """Get the root pitch (1/1)."""
        return self.pitch(tuple(0 for _ in self.dims))

    def _branch_from(self, vertex: tuple[int, ...]) -> set[tuple[int, ...]]:
        """
        Get all vertices adjacent to the given vertex.

        For collapsed harmonic space, skip dimension 0 (the octave dimension).
        """
        branches = set()

        # Skip dimension 0 (octave) in collapsed harmonic space
        start_dim = 1 if self.collapsed else 0

        for i in range(start_dim, len(self.dims)):
            for delta in (-1, 1):
                branch = list(vertex)
                branch[i] += delta
                branches.add(tuple(branch))

        return branches

    def generate_connected_sets(self, min_size: int, max_size: int) -> set[Chord]:
        """
        Generate all unique connected sets of a given size.

        Args:
            min_size: Minimum number of pitches in a chord
            max_size: Maximum number of pitches in a chord

        Returns:
            Set of unique Chord objects
        """
        root = tuple(0 for _ in self.dims)

        def grow(
            chord: tuple[tuple[int, ...], ...],
            connected: set[tuple[int, ...]],
            visited: set[tuple[int, ...]],
        ) -> Iterator[tuple[tuple[int, ...], ...]]:
            """Recursively grow connected sets."""

            # Yield if within size bounds
            if min_size <= len(chord) <= max_size:
                # Wrap pitches and sort by frequency
                wrapped = []
                for p in chord:
                    wrapped_p = self._wrap_pitch(p)
                    wrapped.append(wrapped_p)

                wrapped.sort(key=lambda p: self.pitch(p).to_fraction())
                yield tuple(wrapped)

            # Continue growing if not at max size
            if len(chord) < max_size:
                visited = set(visited)
                for b in connected:
                    if b not in visited:
                        extended = chord + (b,)
                        new_connected = connected | self._branch_from(b)
                        visited.add(b)
                        yield from grow(extended, new_connected, visited)

        # Start generation from root
        connected = self._branch_from(root)
        visited = {root}

        results = set()
        for chord_arrays in grow((root,), connected, visited):
            pitches = tuple(self.pitch(arr) for arr in chord_arrays)
            results.add(Chord(pitches, self.dims))

        return results

    def _wrap_pitch(self, hs_array: tuple[int, ...]) -> tuple[int, ...]:
        """Wrap a pitch so its frequency ratio is in [1, 2)."""
        p = self.pitch(hs_array)
        return p.collapse().hs_array

    def build_voice_leading_graph(
        self,
        chords: set[Chord],
        change: int = 1,
        melodic_threshold_cents: float | None = None,
    ) -> nx.MultiDiGraph:
        """
        Build a voice leading graph from a set of chords.

        Args:
            chords: Set of Chord objects
            change: Number of pitches that change between chords
            melodic_threshold_cents: If set, filter edges by max pitch movement

        Returns:
            NetworkX MultiDiGraph
        """
        # Calculate symdiff from change
        # For chords of size n: symdiff = 2 * change
        chord_size = len(list(chords)[0]) if chords else 3
        symdiff_range = (2 * change, 2 * change)

        graph = nx.MultiDiGraph()

        # Add all chords as nodes
        for chord in chords:
            graph.add_node(chord)

        # Add edges based on local morphological constraints
        for c1, c2 in combinations(chords, 2):
            edges = self._find_valid_edges(
                c1, c2, symdiff_range, melodic_threshold_cents
            )
            for edge_data in edges:
                trans, weight = edge_data
                graph.add_edge(c1, c2, transposition=trans, weight=weight)
                graph.add_edge(
                    c2,
                    c1,
                    transposition=self._invert_transposition(trans),
                    weight=weight,
                )

        return graph

    def _find_valid_edges(
        self,
        c1: Chord,
        c2: Chord,
        symdiff_range: tuple[int, int],
        melodic_threshold_cents: float | None,
    ) -> list[tuple[Pitch, float]]:
        """
        Find all valid edges between two chords.

        Tests all transpositions of c2 to find ones that satisfy
        the symmetric difference constraint AND each changing pitch
        is connected (adjacent) to a pitch in the previous chord.
        """
        edges = []

        # Try all transpositions where at least one pitch matches (collapsed)
        for p1 in c1.pitches:
            for p2 in c2.pitches:
                trans = p1.pitch_difference(p2)

                # Transpose c2
                c2_transposed = c2.transpose(trans)

                # Check symmetric difference on COLLAPSED pitch classes
                symdiff = self._calc_symdiff_collapsed(c1, c2_transposed)

                if not (symdiff_range[0] <= symdiff <= symdiff_range[1]):
                    continue

                # CRITICAL: Each changing pitch must be connected to a pitch in c1
                voice_lead_ok = self._check_voice_leading_connectivity(
                    c1, c2_transposed
                )

                if not voice_lead_ok:
                    continue

                # Check melodic threshold if specified
                if melodic_threshold_cents is not None:
                    if not self._check_melodic_threshold(
                        c1.pitches, c2_transposed.pitches, melodic_threshold_cents
                    ):
                        continue

                # Valid edge found
                edges.append((trans, 1.0))

        return edges

    def _calc_symdiff_collapsed(self, c1: Chord, c2: Chord) -> int:
        """Calculate symmetric difference on COLLAPSED pitch classes."""
        set1 = set(p.collapse() for p in c1.pitches)
        set2 = set(p.collapse() for p in c2.pitches)
        return len(set1.symmetric_difference(set2))

    def _check_voice_leading_connectivity(self, c1: Chord, c2: Chord) -> bool:
        """
        Check that each pitch that changes is connected (adjacent in lattice)
        to some pitch in the previous chord.

        A pitch changes if it's not in the common set (collapsed).
        Each changing pitch must be adjacent (±1 in one dimension) to a pitch in c1.
        """
        c1_collapsed = set(p.collapse() for p in c1.pitches)
        c2_collapsed = set(p.collapse() for p in c2.pitches)

        # Find pitches that change
        common = c1_collapsed & c2_collapsed
        changing = c2_collapsed - c1_collapsed

        if not changing:
            return False  # No change = no edge

        # For each changing pitch, check if it's adjacent to any pitch in c1
        for p2 in changing:
            is_adjacent = False
            for p1 in c1_collapsed:
                if self._is_adjacent_pitches(p1, p2):
                    is_adjacent = True
                    break
            if not is_adjacent:
                return False  # A changing pitch is not connected

        return True

    def _is_adjacent_pitches(self, p1: Pitch, p2: Pitch) -> bool:
        """Check if two collapsed pitches are adjacent (differ by ±1 in one dimension).

        For collapsed harmonic space, skip dimension 0 (the octave dimension).
        """
        diff_count = 0
        # Skip dimension 0 (octave) in CHS
        for d in range(1, len(self.dims)):
            diff = abs(p1[d] - p2[d])
            if diff > 1:
                return False
            if diff == 1:
                diff_count += 1
        return diff_count == 1

    def _check_melodic_threshold(
        self,
        c1,
        c2,
        threshold_cents: float,
    ) -> bool:
        """Check if pitch movements stay within melodic threshold."""
        # Find common pitches (ignoring octaves)
        c1_collapsed = [p.collapse() for p in c1]
        c2_collapsed = [p.collapse() for p in c2]

        common = set(c1_collapsed) & set(c2_collapsed)

        if not common:
            return False

        # Check movements from common pitches
        for p1 in c1:
            p1_c = p1.collapse()
            if p1_c in common:
                for p2 in c2:
                    p2_c = p2.collapse()
                    if p1_c == p2_c:
                        # Found matching pitch, check cent difference
                        cents = abs(p1.to_cents() - p2.to_cents())
                        if cents > threshold_cents:
                            return False

        return True

    def _invert_transposition(self, trans: Pitch) -> Pitch:
        """Invert a transposition."""
        return Pitch(tuple(-t for t in trans.hs_array), self.dims)


# ============================================================================
# PATH FINDER
# ============================================================================


class PathFinder:
    """Finds paths through voice leading graphs."""

    def __init__(self, graph: nx.MultiDiGraph):
        self.graph = graph

    def find_stochastic_path(
        self,
        start_chord: Chord | None = None,
        max_length: int = 100,
        weights_config: dict | None = None,
    ) -> list[Chord]:
        """
        Find a stochastic path through the graph.

        Args:
            start_chord: Starting chord (random if None)
            max_length: Maximum path length
            weights_config: Configuration for edge weighting

        Returns:
            List of Chord objects representing the path
        """
        if weights_config is None:
            weights_config = self._default_weights_config()

        # Initialize
        chords = self._initialize_chords(start_chord)
        current = chords[-1][0] if chords else None

        if current is None or len(self.graph.nodes()) == 0:
            return []

        path = [current]
        # Cumulative transposition - starts as identity (no transposition)
        identity = Pitch(tuple(0 for _ in current.dims), current.dims)
        cumulative_trans = identity
        last_graph_nodes = (current,)

        for _ in range(max_length):
            # Always find edges from original graph node (not transposed)
            out_edges = list(self.graph.out_edges(current, data=True))

            if not out_edges:
                break

            # Calculate weights for each edge
            weights = self._calculate_edge_weights(
                out_edges, path, last_graph_nodes, weights_config
            )

            # Select edge stochastically
            edge = choices(out_edges, weights=weights)[0]
            next_node = edge[1]  # Original chord in graph
            trans = edge[2].get("transposition", None)

            # Accumulate transposition
            if trans is not None:
                cumulative_trans = cumulative_trans.transpose(trans)

            # Output = next_node transposed by cumulative_trans
            sounding_chord = next_node.transpose(cumulative_trans)

            # Move to next graph node (original form for edge lookup)
            current = next_node

            path.append(sounding_chord)
            last_graph_nodes = last_graph_nodes + (current,)
            if len(last_graph_nodes) > 2:
                last_graph_nodes = last_graph_nodes[-2:]

        return path

    def _initialize_chords(self, start_chord: Chord | None) -> tuple:
        """Initialize chord sequence."""
        if start_chord is not None:
            return ((start_chord, start_chord),)

        # Random start
        nodes = list(self.graph.nodes())
        if nodes:
            return ((choice(nodes), choice(nodes)),)

        return ()

    def _default_weights_config(self) -> dict:
        """Default weights configuration."""
        return {
            "movement_size": True,
            "contrary_motion": True,
            "direct_tuning": True,
            "voice_crossing": True,
            "sustained_voice": False,
            "transposition": False,
        }

    def _calculate_edge_weights(
        self,
        out_edges: list,
        path: list[Chord],
        last_chords: tuple[Chord, ...],
        config: dict,
    ) -> list[float]:
        """Calculate weights for edges based on configuration."""
        weights = []

        for edge in out_edges:
            w = 1.0
            edge_data = edge[2]

            # Movement size weight
            if config.get("movement_size", False):
                movements = edge_data.get("movements", {})
                cent_diffs = [
                    abs(v.get("cent_difference", 0))
                    for v in movements.values()
                    if v.get("cent_difference") is not None
                ]
                if cent_diffs:
                    max_diff = max(cent_diffs)
                    if max_diff < 100:
                        w *= 1000
                    elif max_diff < 200:
                        w *= 10

            # Contrary motion weight
            if config.get("contrary_motion", False):
                movements = edge_data.get("movements", {})
                cent_diffs = sorted(
                    [
                        v.get("cent_difference", 0)
                        for v in movements.values()
                        if v.get("cent_difference") is not None
                    ]
                )
                if len(cent_diffs) >= 3:
                    if cent_diffs[0] < 0 and cent_diffs[-1] > 0:
                        w *= 100

            # Direct tuning weight
            if config.get("direct_tuning", False):
                if edge_data.get("is_directly_tunable", False):
                    w *= 10

            # Voice crossing weight (prefer no crossing)
            if config.get("voice_crossing", False):
                # Simplified: prefer edges where more pitches stay in order
                w *= 10

            weights.append(w)

        return weights

    def is_hamiltonian(self, path: list[Chord]) -> bool:
        """Check if a path is Hamiltonian (visits all nodes exactly once)."""
        return len(path) == len(self.graph.nodes()) and len(set(path)) == len(path)


# ============================================================================
# I/O
# ============================================================================


def write_chord_sequence(seq: list[Chord], path: str) -> None:
    """Write a chord sequence to a JSON file."""
    import json

    # Convert to serializable format
    serializable = []
    for chord in seq:
        chord_data = []
        for pitch in chord.sorted_by_frequency():
            chord_data.append(
                {
                    "hs_array": list(pitch.hs_array),
                    "fraction": str(pitch.to_fraction()),
                    "cents": pitch.to_cents(),
                }
            )
        serializable.append(chord_data)

    # Write with formatting
    content = json.dumps(serializable, indent=2)
    content = content.replace("[[[", "[\n\t[[")
    content = content.replace(", [[", ",\n\t[[")
    content = content.replace("]]]", "]]\n]")

    with open(path, "w") as f:
        f.write(content)


def write_chord_sequence_readable(seq: list[Chord], path: str) -> None:
    """Write chord sequence as tuple of hs_arrays - one line per chord."""
    with open(path, "w") as f:
        f.write("(\n")
        for i, chord in enumerate(seq):
            arrays = tuple(p.hs_array for p in chord.sorted_by_frequency())
            f.write(f"  {arrays},\n")
        f.write(")\n")


# ============================================================================
# MAIN / DEMO
# ============================================================================


def main():
    """Demo: Generate compact sets and build graph."""
    import argparse

    parser = argparse.ArgumentParser(
        description="Generate chord paths in harmonic space"
    )
    parser.add_argument(
        "--change",
        type=int,
        default=1,
        help="Number of pitches that change between chords",
    )
    parser.add_argument(
        "--dims", type=int, default=7, help="Number of prime dimensions (4, 5, 7, or 8)"
    )
    parser.add_argument("--chord-size", type=int, default=3, help="Size of chords")
    parser.add_argument("--max-path", type=int, default=50, help="Maximum path length")
    parser.add_argument("--seed", type=int, default=42, help="Random seed")
    args = parser.parse_args()

    # Select dims based on argument
    if args.dims == 4:
        dims = DIMS_4
    elif args.dims == 5:
        dims = DIMS_5
    elif args.dims == 7:
        dims = DIMS_7
    elif args.dims == 8:
        dims = DIMS_8
    else:
        dims = DIMS_7

    # Set up harmonic space
    space = HarmonicSpace(dims, collapsed=True)
    print(f"Space: {space}")
    print(f"Change: {args.change} pitch(es) per transition")

    # Generate connected sets
    print("Generating connected sets...")
    chords = space.generate_connected_sets(
        min_size=args.chord_size, max_size=args.chord_size
    )
    print(f"Found {len(chords)} unique chords")

    # Build voice leading graph
    print("Building voice leading graph...")
    graph = space.build_voice_leading_graph(
        chords, change=args.change, melodic_threshold_cents=200
    )
    print(f"Graph: {graph.number_of_nodes()} nodes, {graph.number_of_edges()} edges")

    # Find stochastic path
    print("Finding stochastic path...")
    path_finder = PathFinder(graph)
    seed(args.seed)
    path = path_finder.find_stochastic_path(max_length=args.max_path)
    print(f"Path length: {len(path)}")

    # Write output
    write_chord_sequence(path, "output_chords.json")
    print("Written to output_chords.json")

    write_chord_sequence_readable(path, "output_chords.txt")
    print("Written to output_chords.txt")


if __name__ == "__main__":
    main()