"""Meta-Harness Proposer component — ONE model call → k candidates, dispensed one per iteration.

One file per component (see scaffold.py). The reference proposer session writes k candidate
``.py`` files and declares them in ``pending_eval.json``; the outer loop import-checks each and
benchmarks the valid ones. The chat port makes ONE model call per proposal round whose response
carries all k candidates as ``### CANDIDATE <i>: <snake_case_name>`` sections (each: the
<=30-line report, then ONE fenced python block holding the full program). k is enforced socially
by the steering — exactly like the reference, where the outer loop "imposes no k; it evaluates
however many entries pending_eval.json contains" — so parsing is lenient and accepts any count.

**Queue semantics (sanctioned adaptation, also documented in scaffold.py):** the reference
evaluates all k candidates of an iteration before the next proposer session; the galapagos base
loop is strictly one-child-per-iteration. The proposer therefore keeps an internal FIFO of parsed
candidates and dispenses ONE per loop iteration, making a new model call only when the queue is
empty — reference N iterations x k candidates maps to N*k galapagos iterations (the bundled
budget 60 = 20 x 3).

**Interface validation:** the reference import-checks each candidate in a 30 s subprocess
(``from text_classification.agents.<name> import *``). The safe in-process analogue for arbitrary
task programs is ``compile(source, "<candidate>", "exec")`` — syntax-valid candidates pass;
invalid ones are NOT evaluated and are recorded as ``outcome: "failed"`` rows in Memory at
proposal time, with the compile error as their trace (the reference's failed-row behaviour; this
port records the row even when sibling candidates were valid — the reference silently drops the
failed rows of partially-invalid iterations, an information-loss quirk we do not reproduce).

**Failure semantics:** a response with headers but no parsable program per section, or with no
fenced block at all, yields zero queued candidates → the child is returned with
``metadata["changed"] = False`` (a NO_DIFF wasted step), mirroring the reference's abandoned
iteration when the proposer exits non-zero or writes no ``pending_eval.json``.

**Free-form degradation:** a reply with no ``### CANDIDATE`` headers yields exactly one parsed
candidate — the LAST fenced python block, name ``candidate_1``, empty report — so even an
unstructured reply still exercises the full loop.

Cost is recorded via ``env.state.record_cost`` on every model call. ``Genome.child`` copies the
parent's metadata wholesale, so dispensing strips the bookkeeping/transient keys (``admitted``,
``eval_failed``, ``seed``, ``candidate_name``, ``report``, ``iteration``, ``outcome``) before
stamping the fresh candidate fields (``candidate_name``/``report`` here; ``iteration``/``outcome``
in the scaffold's ``after_step``, once the eval verdict is known).
"""
from __future__ import annotations

import re

from ...components.proposer import Env, Proposer
from ...records import Genome

_FENCE = re.compile(r"```(?:python|py)?\s*\n(.*?)```", re.DOTALL)
# "### CANDIDATE <i>: <snake_case_name>" — lenient: 2-4 hashes, optional index, optional colon
_HEADER = re.compile(r"^[ \t]*#{2,4}\s*CANDIDATE\b[^:\n]*:?[ \t]*(?P<name>[^\n]*)$",
                     re.MULTILINE | re.IGNORECASE)
_NAME_CLEAN = re.compile(r"[^A-Za-z0-9_]+")
_STALE_KEYS = ("admitted", "eval_failed", "seed", "candidate_name", "report", "iteration",
               "outcome")
_MAX_REPORT_LINES = 30


class MetaHarnessProposer(Proposer):
    """One call → k parsed candidates → an internal FIFO dispensing one child per iteration."""

    modality = "prompt_call"

    def __init__(self, candidates_per_proposal: int = 3):
        self.candidates_per_proposal = max(1, int(candidates_per_proposal))  # steering-only
        self._queue: list[dict] = []   # parsed {name, report, source} awaiting dispatch (FIFO)

    # ---- parsing ---------------------------------------------------------------------------------
    @staticmethod
    def _clean_name(raw: str, index: int) -> str:
        name = _NAME_CLEAN.sub("_", raw).strip("_").lower()
        return name or f"candidate_{index}"

    def _parse(self, text: str) -> list[dict]:
        """Split the response into candidate sections. Each candidate = header name + the report
        (text before its first fence, capped at 30 lines) + the LAST fenced python block of its
        section (``source=None`` marks a section with no program — a parse failure)."""
        headers = list(_HEADER.finditer(text))
        if not headers:  # free-form degradation: one candidate from the last fence
            fences = _FENCE.findall(text)
            if not fences:
                return []
            return [{"name": "candidate_1", "report": "", "source": fences[-1].strip("\n")}]
        out: list[dict] = []
        for i, header in enumerate(headers, 1):
            end = headers[i].start() if i < len(headers) else len(text)
            section = text[header.end(): end]
            name = self._clean_name(header.group("name"), i)
            fences = list(_FENCE.finditer(section))
            if not fences:
                out.append({"name": name, "report": "", "source": None})
                continue
            report = section[: fences[0].start()].strip()
            report = "\n".join(report.splitlines()[:_MAX_REPORT_LINES])
            out.append({"name": name, "report": report,
                        "source": fences[-1].group(1).strip("\n")})
        return out

    # ---- the propose step --------------------------------------------------------------------------
    def _refill(self, prompt, env: Env) -> None:
        """One model call → parse → interface-validate → queue (the proposer session analogue)."""
        gen = env.model.generate(prompt)
        if env.state is not None:
            env.state.record_cost(gen.cost_usd, gen.prompt_tokens, gen.completion_tokens)
        iteration = env.state.iteration if env.state is not None else 0
        for cand in self._parse(gen.text or ""):
            source, error = cand["source"], ""
            if source is None:
                error = "no fenced python block in the candidate section"
            else:
                try:  # the 30 s import-check analogue, safe for arbitrary task programs
                    compile(source, "<candidate>", "exec")
                except Exception as exc:  # noqa: BLE001 — SyntaxError/ValueError etc.
                    error = f"{type(exc).__name__}: {exc}"
            if error:  # failed validation: never evaluated, recorded as a failed row NOW
                if env.memory is not None:
                    env.memory.write("", kind="failed", name=cand["name"], iteration=iteration,
                                     cost=float(len(source or "")),
                                     trace=f"interface validation failed: {error}")
                continue
            self._queue.append(cand)

    def propose(self, prompt, env: Env) -> Genome:
        parent = env.selection.parent
        if not self._queue:
            self._refill(prompt, env)

        if not self._queue:  # abandoned proposal round (reference: propose_claude → continue)
            child = (parent.child(parent.content) if parent is not None else Genome(content=""))
            for stale in _STALE_KEYS:
                child.metadata.pop(stale, None)
            child.metadata["changed"] = False
            return child

        cand = self._queue.pop(0)   # FIFO: dispense exactly one candidate per loop iteration
        if parent is None:          # delegated selection (not used by Meta-Harness; safety)
            child = Genome(content=cand["source"])
        else:
            child = parent.child(cand["source"],
                                 generation=parent.metadata.get("generation", 0) + 1)
        for stale in _STALE_KEYS:   # Genome.child copies metadata — strip the bookkeeping
            child.metadata.pop(stale, None)
        child.metadata.update(
            changed=(parent is None or cand["source"].strip() != parent.content.strip()),
            candidate_name=cand["name"],
            report=cand["report"],
        )
        return child
