"""OpenEvolve SelectionPolicy component — round-robin island rotation, 3-tier parent
sampling (explore / exploit-from-archive / weighted), and island-best/elite/diverse/random inspirations.
"""
from __future__ import annotations

from ...components.selection import SelectionPolicy
from ...records import Genome, RunState, Selection
from .population import _fitness

class OpenEvolveSelectionPolicy(SelectionPolicy):
    """The ``ProgramDatabase`` sampling policy, on the read side of the loop.

    Each :meth:`select` rotates the population's ``current_island`` round-robin (the parallel
    controller distributes iterations across islands; the synchronous mirror is per-step rotation),
    then:

    * **Parent** (``_sample_parent`` 3-tier): with prob ``exploration_ratio`` (0.2) a uniform-random
      program from the current island; with prob ``exploitation_ratio`` (0.7) a uniform pick from the
      global archive, preferring archive members on the current island; otherwise (~0.1)
      fitness-weighted from the current island.
    * **Inspirations** (``_sample_inspirations``): the island's best program, then the top
      ``elite_selection_ratio`` (0.1) of the island, then diverse picks from nearby MAP-Elites cells,
      then random fill — all distinct from the parent, capped at ``num_inspirations``.

    The sampling island is published to ``state.signals["openevolve"]["island"]`` and stamped onto the
    CHILD by :class:`OpenEvolveProposer` (mirroring OpenEvolve's ``add(target_island=...)``); the shared
    parent genome is never mutated. All randomness flows through ``self.rng``.
    """

    def __init__(self, seed: int = 0, num_islands: int = 5,
                 exploration_ratio: float = 0.2, exploitation_ratio: float = 0.7,
                 elite_selection_ratio: float = 0.1, num_inspirations: int = 3,
                 num_diverse: int = 2, feature_dimensions: list[str] | None = None):
        super().__init__(seed)
        self.num_islands = num_islands
        self.exploration_ratio = exploration_ratio
        self.exploitation_ratio = exploitation_ratio
        self.elite_selection_ratio = elite_selection_ratio
        self.num_inspirations = num_inspirations
        self.num_diverse = num_diverse
        self.feature_dimensions = list(feature_dimensions or ["complexity", "diversity"])
        self._first = True

    def _fit(self, genome: Genome) -> float:
        return _fitness(genome, self.feature_dimensions)

    def _weighted(self, members: list[Genome]) -> Genome:
        weights = [max(self._fit(g), 0.001) for g in members]
        total = sum(weights)
        r = self.rng.random() * total
        upto = 0.0
        for g, w in zip(members, weights):
            upto += w
            if upto >= r:
                return g
        return members[-1]

    def select(self, population, state: RunState | None = None) -> Selection:
        members = population.all()
        if not members:
            raise RuntimeError("cannot select from an empty population")

        # rotate island round-robin (skip the very first select so we evolve island 0 first)
        if self._first:
            self._first = False
        else:
            population.next_island()
        island = population.current_island

        island_members = population.query({"island": island}) or members
        archive = population.query({"archive": True})

        # ---- 3-tier parent sampler  (_sample_parent) ----
        r = self.rng.random()
        if r < self.exploration_ratio:
            parent = self.rng.choice(island_members)                       # explore: uniform/island
        elif r < self.exploration_ratio + self.exploitation_ratio and archive:
            in_island = [g for g in archive if g.metadata.get("island") == island]
            parent = self.rng.choice(in_island or archive)                 # exploit: archive (pref island)
        else:
            parent = self._weighted(island_members)                        # random: fitness-weighted/island

        # NB: do NOT mutate parent.metadata['island'] — the parent is a shared stored genome and may
        # physically live on another island (archive cross-island fallback). OpenEvolve stamps the
        # sampling island on the CHILD (add(target_island=...)); galapagos publishes it via the signal
        # below and OpenEvolveProposer stamps it onto the child.
        inspirations = self._inspirations(population, parent, island, island_members)

        if state is not None:
            state.signals["openevolve"] = {
                "island": island,
                "tier": ("explore" if r < self.exploration_ratio
                         else "exploit" if r < self.exploration_ratio + self.exploitation_ratio and archive
                         else "random"),
                "island_generations": list(population.island_generations),
            }
        # pool = the PARENT'S island (fitness-sorted), so the PromptBuilder's evolution-history sections
        # stay island-scoped — mirroring OpenEvolve's get_top_programs(island_idx=parent_island)
        # (iteration.py) rather than leaking cross-island programs from the global store.
        return Selection(parent=parent, inspirations=inspirations, pool=island_members)

    def _inspirations(self, population, parent: Genome, island: int,
                      island_members: list[Genome]) -> list[Genome]:
        """LIVE OpenEvolve ``sample_from_island`` (database.py:454-464): a uniform random sample of the
        parent's island excluding the parent, capped at ``num_inspirations``. (The structured
        island-best/elite/diverse-cell construction is the SYNCHRONOUS ``_sample_inspirations`` path,
        which is dead code in the reference — the parallel controller is the live path.)"""
        other = [g for g in island_members if g.id != parent.id]
        n = self.num_inspirations
        return other if len(other) <= n else self.rng.sample(other, n)
