"""EvoX SelectionPolicy component — thin adapter over the ACTIVE evolved strategy's ``sample()``.

One file per component (see scaffold.py). In EvoX the selection policy is not fixed code — it is
the LLM-written ``EvolvedStrategy`` hosted by :class:`~galapagos.scaffolds.evox.population.
EvoXPopulation`. This policy is the loop adapter: it calls the active strategy's
``sample(num_context_programs)``, unpacks the ``({label: parent}, {label: [inspirations]})``
contract, and publishes ``{label, parent_id, context_ids}`` on ``state.signals["evox"]`` (the
PromptBuilder renders the label; the Proposer stamps ``parent_info``/``context_ids`` on the child
— generated strategies READ those fields back).

Runtime fallback (the upstream ``database.sample()`` half of ``_restore_fallback_database``): when
the swapped-in strategy raises during ``sample``, the error is signalled on
``state.signals["evox"]["strategy_error"]``, the population restores the previous strategy, and
the sample is retried once with the restored strategy.

``observe`` is intentionally minimal — the window scorer and stagnation counter live in the
scaffold's ``after_step``/``periodic`` (observe is skipped on NO_DIFF steps). All randomness flows
through ``self.rng``, which is injected into the active strategy on every select (covering both
the initial strategy and hot-swapped ones).
"""
from __future__ import annotations

from ...components.selection import SelectionPolicy
from ...records import Genome, RunState, Selection


class EvoXPolicy(SelectionPolicy):
    """Delegates parent/inspiration selection to the population's active evolved strategy."""

    def __init__(self, seed: int = 0, num_context_programs: int = 4):
        super().__init__(seed)
        self.num_context_programs = int(num_context_programs)

    # ---- select -------------------------------------------------------------------------------
    def select(self, population, state: RunState | None = None) -> Selection:
        population.inject_rng(self.rng)  # determinism: the strategy's rng is always the policy's
        sig = state.signals.setdefault("evox", {}) if state is not None else {}
        sig.pop("strategy_error", None)  # clear a stale error from a previously recovered select
        label, parent, inspirations = "", None, []
        for attempt in range(3):  # broken evolved sample(): restore + retry (the second restore
            try:                  # reloads the seed strategy — see population.restore_fallback)
                label, parent, inspirations = self._sample(population)
                break
            except Exception as e:  # noqa: BLE001
                sig["strategy_error"] = str(e)
                population.last_runtime_error = str(e)  # periodic() reads this for the signals
                if attempt == 2:
                    raise
                population.restore_fallback()
        sig.update(label=label, parent_id=parent.id,
                   context_ids=[g.id for g in inspirations])
        return Selection(parent=parent, inspirations=inspirations, pool=population.all())

    def _sample(self, population) -> tuple[str, Genome, list[Genome]]:
        """One ``sample()`` call, unpacked and contract-checked against the FULL runtime shape
        (anomalies raise into the fallback path above). ``Valid(·)`` only probes small fixed
        stores, so a strategy can pass validation yet return malformed output on larger runtime
        stores — every shape check the validator makes on ``sample()`` output is re-made here so
        the post-loop ``sig.update`` and the PromptBuilder only ever see validated Genomes."""
        parent_dict, context_dict = population.active_strategy.sample(
            num_context_programs=self.num_context_programs)
        if not isinstance(parent_dict, dict) or len(parent_dict) != 1:
            raise RuntimeError(
                f"strategy sample() must return exactly one parent, got {parent_dict!r}")
        label, parent = next(iter(parent_dict.items()))
        if not isinstance(parent, Genome):
            raise RuntimeError(f"strategy sample() parent must be a Genome, got {type(parent)}")
        if parent.id not in population.active_strategy.genomes:
            raise RuntimeError(
                f"strategy sample() parent (id={parent.id}) is not in the store")
        if not isinstance(context_dict, dict) and context_dict is not None:
            raise RuntimeError(f"strategy sample() context must be a Dict[str, List[Genome]], "
                               f"got {type(context_dict)}")
        inspirations: list[Genome] = []
        for ctx_label, genome_list in (context_dict or {}).items():
            if not isinstance(genome_list, list):
                raise RuntimeError(f"strategy sample() context[{ctx_label!r}] must be a list, "
                                   f"got {type(genome_list)}")
            for g in genome_list:
                if not isinstance(g, Genome):
                    raise RuntimeError(f"strategy sample() context[{ctx_label!r}] contains a "
                                       f"non-Genome object: {type(g)}")
            inspirations.extend(genome_list)
        return str(label or ""), parent, inspirations

    # ---- observe -------------------------------------------------------------------------------
    def observe(self, genome: Genome, state: RunState | None = None) -> None:
        """No-op: per-iteration accounting (window ticks, stagnation) lives in the scaffold's
        ``after_step``/``periodic`` because ``observe`` is skipped on NO_DIFF steps; the evolved
        strategy itself saw the child in ``add()``."""