"""Meta-Harness SelectionPolicy component — the deliberate ABSENCE of a selection rule.

One file per component (see scaffold.py). The reference has NO parent-selection mechanism, no
mutation operator, and no archive policy — all of that is delegated to the skill-steered proposer,
which freely reads the whole filesystem ``D`` (evolution summary, frontier, every prior source,
execution traces) and decides for itself what to build on (SKILL.md Step 3: "Copy a
top-performing base system ... then make targeted modifications"). The galapagos ``Selection``
therefore carries a *nominal* parent — ``population.best()``, the open mirror of the reference's
``pareto[0]["system"]`` (the frontier's highest-score member) — purely as the lineage anchor and
the diff mutation target; the rendered prompt explicitly tells the proposer that all
prior candidates are equally valid bases. ``inspirations`` = the full Pareto frontier (the
copy-then-edit source pool the PromptBuilder renders).

:meth:`select` publishes the proposer-facing signals on ``state.signals["meta_harness"]``: the
iteration index, ``candidates_per_proposal`` (k), the frontier table (name / combined_score /
cost rows the PromptBuilder renders), and the exploitation-axes rotation hint — one of the six
axes A-F, rotated once per *proposal round* (every k loop iterations = one reference iteration),
the chat-port mirror of the skill's "If last 3 iterations explored the same axis, pick different
ones" variety rule.

All randomness flows through ``self.rng`` (the policy itself draws none — the reference has no
selection randomness to port).
"""
from __future__ import annotations

from ...components.selection import SelectionPolicy
from ...records import RunState, Selection
from .population import display_name

# The six exploitation axes (SKILL.md "Exploitation axes: A=Prompt template, B=Memory content,
# C=Selection algorithm, D=Memory sizing, E=Learning trigger, F=LLM usage in learning").
# Sanctioned adaptation: the reference axes name memory-system mechanisms (its domain); galapagos
# genomes are arbitrary programs, so the labels are generalized to genome-generic mechanism
# families. The rotation/variety semantics are unchanged.
EXPLOITATION_AXES = [
    "A=Core algorithm",
    "B=Data representation",
    "C=Selection/ordering heuristics",
    "D=Resource sizing & budgets",
    "E=Control flow & update triggers",
    "F=Library usage",
]


class MetaHarnessPolicy(SelectionPolicy):
    """Nominal-parent selection (frontier top) + the proposer-facing signal bus."""

    def __init__(self, seed: int = 0, candidates_per_proposal: int = 3):
        super().__init__(seed)
        self.candidates_per_proposal = max(1, int(candidates_per_proposal))

    def select(self, population, state: RunState | None = None) -> Selection:
        members = population.all()
        if not members:
            raise RuntimeError("cannot select from an empty population")
        parent = population.best()
        frontier = population.frontier() if hasattr(population, "frontier") else [parent]

        if state is not None:
            sig = state.signals.setdefault("meta_harness", {})
            k = self.candidates_per_proposal
            proposal_round = max(0, state.iteration - 1) // k   # one round = one reference iter
            cost_of = getattr(population, "cost_of", lambda g: float(len(g.content)))
            sig.update({
                "iteration": state.iteration,
                "candidates_per_proposal": k,
                "axis_hint": EXPLOITATION_AXES[proposal_round % len(EXPLOITATION_AXES)],
                "frontier": [{"name": display_name(g), "combined_score": g.fitness,
                              "cost": cost_of(g)} for g in frontier],
            })
        return Selection(parent=parent, inspirations=list(frontier), pool=members)
