AdaEvolve
Hierarchical adaptive search: G-signal exploration intensity, UCB island allocation, and LLM meta-guidance on stagnation.
"""AdaEvolve — a faithful port of "AdaEvolve: Adaptive LLM-Driven Zeroth-Order Optimization"
(UC Berkeley), following its reference implementation in SkyDiscover wherever paper and code
diverge. One module per component:
population.py -> AdaEvolveArchipelago (per-island QD archives + migration + spawning)
selection_policy.py -> AdaEvolvePolicy (G-signal intensity + 3-mode split + UCB islands)
prompt_builder.py -> AdaEvolvePromptBuilder (AdaEvolve template + search_guidance + mode labels)
proposer.py -> AdaEvolveProposer (SEARCH/REPLACE diff, full-rewrite fallback)
evaluator.py -> AdaEvolveEvaluator (task-supplied)
memory.py -> AdaEvolveParadigmMemory (Level-3 paradigm tracker)
scaffold.py -> AdaEvolveScaffold (the orchestrator that composes the six)
The scaffold reproduces ``AdaEvolveController``'s control flow around select/observe:
* ``before_step`` — (1) adaptive-state seeding (the upstream ``_seed_all_islands`` split): the very
first time each INITIAL island's ``AdaptiveState`` is unseeded (``best_score == -inf``) with archive
members, island 0 is routed through ``record_evaluation`` (full UCB visits/rewards + a one-time
improvement-window tick, since the island-0 seed is a new global best) and islands 1+ through
``receive_external_improvement`` (best/G only — they are migration adds upstream). DYNAMICALLY
spawned islands are NOT touched (upstream's ``_seed_new_island`` adds copies directly, bypassing the
adapter, so their state stays ``-inf`` until their first real evaluation registers δ=1.0).
(2) the iteration-START paradigm trigger (``_maybe_generate_paradigms``): when the tracker reports
stagnation, ONE guide-LLM call installs strict-JSON breakthrough ideas (parse fallbacks: direct JSON
→ ```json block → any code block → bracket scan; a parse failure installs nothing and is re-attempted
next iteration, matching upstream — no cooldown). Running it here forces the paradigm onto THIS
iteration's child (``select`` replaces the parent with the global best while a paradigm is active).
* ``step`` — the ``_run_normal_step`` → ``_generate_child`` in-iteration retry: up to
``general.inner_retry_times`` (= 1 + ``max_error_retries`` = 3) attempts, each RESAMPLING
parent/mode/inspirations on the fixed current island and folding the prior error into the prompt as
RETRY CONTEXT; the first child that passes the eval-failure gate is admitted (the QD archive may
still crowding-reject it, updating nothing — upstream's ``if was_added``), and an all-attempts-fail
iteration admits no child. Iteration advances by one per iteration regardless of retries.
* ``after_step`` — ONE improvement-window tick per admitted non-migrant child (the galapagos home of
``ParadigmTracker.record_improvement``, which upstream fires inside ``add()`` for accepted
non-migration children only). A crowding-rejected or fully-failed child writes nothing.
* ``periodic`` — the ``finally: end_iteration`` mirror (runs every iteration, even failed ones): UCB
island rotation + ring migration + dynamic-spawn check via the policy.
Deviations from the reference (sanctioned): upstream's ``use_unified_archive`` ablation switch
(quality-diversity archive vs legacy capped-list mode) is NOT ported — the QD archive is always
on. Upstream's ``diff_based_generation``/``allow_full_rewrites`` knobs are likewise not exposed:
the proposer is hard-wired to SEARCH/REPLACE diffs (whole-line ``apply_diff``) with a full-rewrite
fallback (the upstream default behaviour). ``max_parallel_iteration`` is forced to 1 (AdaEvolve is
strictly sequential upstream).
"""
from __future__ import annotations
import json
import logging
import re
from ...components.proposer import Env
from ...config import GalapagosConfig
from ...models import GalapagosModel
from ...models.base import Prompt
from ...records import Genome
from ..base_scaffold import GalapagosScaffold
from ..registry import register_scaffold
# one module per component (the AdaEvolve scaffold method)
from .memory import AdaEvolveParadigmMemory
from .population import AdaEvolveArchipelago
from .prompt_builder import AdaEvolvePromptBuilder
from .proposer import AdaEvolveProposer
from .selection_policy import AdaEvolvePolicy
log = logging.getLogger(__name__)
# ParadigmGenerator system message (code mode) — verbatim
_PARADIGM_SYSTEM = (
"You are an expert algorithm researcher and strategic advisor. Your task is to suggest "
"breakthrough algorithmic IDEAS in a JSON object with an 'ideas' array. Each idea describes a "
"high-level strategy (not code). Think carefully and deeply. Analyze the problem thoroughly, "
"understand the evaluation metric by reading the evaluator code, and suggest breakthrough "
"ideas that are correct, actionable, and will actually help improve the solution. Focus on "
"ideas that are fundamentally different from what has been tried. You MUST always return at "
"least one idea."
)
_JSON_FENCE = re.compile(r"```json\s*\n(.*?)```", re.DOTALL)
_ANY_FENCE = re.compile(r"```[a-zA-Z]*\s*\n(.*?)```", re.DOTALL)
_BRACKET = re.compile(r"\[\s*\{.*\}\s*\]", re.DOTALL)
_FENCE_RUN = re.compile(r"`{3,}") # any run of >=3 backticks (fence opener/closer)
_MAX_EVALUATOR_CODE_CHARS = 12000
def _defence(text: str) -> str:
"""Neutralize fence markers in free text injected into the paradigm prompt, so the current
best program's fence stays the LAST fenced block (collapse any ``` run to two backticks)."""
return _FENCE_RUN.sub("``", text)
def _parse_paradigms(text: str) -> list[dict]:
"""The upstream parse-fallback chain; returns ``[]`` on ANY failure (never raises)."""
candidates = [text]
for pattern in (_JSON_FENCE, _ANY_FENCE, _BRACKET):
m = pattern.search(text)
if m:
candidates.append(m.group(1) if pattern is not _BRACKET else m.group(0))
for candidate in candidates:
try:
data = json.loads(candidate.strip())
except Exception: # noqa: BLE001 — fall through the chain
continue
ideas = data.get("ideas") if isinstance(data, dict) else data
if not isinstance(ideas, list):
continue
valid = [i for i in ideas if isinstance(i, dict)
and all(k in i for k in ("idea", "description", "approach_type"))]
if valid:
return valid
return []
# ---------------------------------------------------------------------------------------------
# ParadigmGenerator user-prompt sections (paradigm/generator.py, code mode) — ported verbatim.
# Two sanctioned adaptations protect the diff-parsing invariant: the JSON output example
# is rendered as 4-space-indented text (NOT a fenced block — a trailing ```json fence would be
# echoed back and mis-parsed as a real paradigm), and the current best program stays the
# LAST fenced block in the prompt (evaluator code is fenced but rendered before it; injected
# free text is fence-sanitized).
# ---------------------------------------------------------------------------------------------
_ANALYZE_FIRST = """\
**CRITICAL: ANALYZE THE CURRENT PROGRAM FIRST**
Before suggesting new ideas, carefully analyze the Current Program above:
- What algorithm/approach does it use? (This is what's WORKING - score {score})
- What are its strengths? (Why does it achieve this score?)
- What are its weaknesses? (What limits further improvement?)
- How can you improve it? (How to beat it?)
**IMPORTANT:** The program above is the CURRENT program that needs to be improved. Start by \
understanding what works, then suggest breakthrough ideas that build on or improve it."""
_ANALYSIS_FRAMEWORK = """\
## Analysis Framework - Complete Before Generating Ideas
**STEP 0: Understand the TASK (MOST IMPORTANT - DO THIS FIRST)**
- What is the problem asking you to do?
- What is the goal or objective? (maximize, minimize, optimize)
- What are the inputs and outputs?
- What needs to be improved? (variables/decisions that affect the goal)
- What constraints exist?
**STEP 1: Analyze the Evaluator Code**
- How are solutions scored?
- What metrics are computed?
- What causes failures or penalties?
**STEP 2: Identify Metrics**
- What is the primary metric or Pareto objective set?
- How is it calculated?
- What secondary metrics exist?
- If variance/std is penalized, the program needs consistency across scenarios
**STEP 3: Identify Constraints**
- What conditions must be satisfied?
- What validation happens?
- What causes score penalties?
**STEP 4: Identify Problem Structure**
- Is processing sequential or global?
- Are decision variables discrete or continuous?
- What dependencies exist between decisions?
- **CRITICAL:** What data does your program receive vs what the evaluator uses?
- **CRITICAL:** How are metrics computed across components? Independently then aggregated, or jointly?
**STEP 5: Determine Appropriate Approach**
- Match approach to problem structure
- Consider what has worked vs failed before
- Identify promising library/technique combinations
**STEP 6: Identify Improvement Opportunities**
- What would increase each metric?
- What would satisfy constraints better?
- What fundamentally different approaches could work?
Current best score is {score}. Your ideas must improve the configured optimization targets and, \
in multiobjective mode, explicitly reason about objective trade-offs."""
_TRIED_NONE = """\
## Previously Tried Ideas
No previous paradigms have been tried yet. You have freedom to explore any approach."""
_TRIED_SOME = """\
## Previously Tried Ideas - CHECK THIS FIRST
**CRITICAL:** Review what was already tried. Do NOT suggest ideas that use
the same libraries, functions, or approaches as FAILED attempts.
{tried}
**STRICT PROHIBITION:** Do NOT keep suggesting approaches that have already failed.
If an approach failed, understand WHY before suggesting similar techniques.
Prioritize approaches that are fundamentally different from failed attempts.
**Learning from Failures - Understand Root Causes:**
When a technique fails badly (score decreased significantly), understand WHY before suggesting alternatives:
- **Fundamental mismatch:** Wrong problem type (e.g., continuous optimizer on discrete problem) -> avoid that entire class of approaches
- **Structural mismatch:** Wrong approach for problem structure (e.g., linear proxy for non-linear objective) -> use approaches that match the actual structure
- **Implementation issues:** If the same library failed multiple times or very badly (>10% decrease), it likely indicates a fundamental mismatch - suggest a different class of approaches"""
_TECHNIQUES = """\
## Technique Guidance
**Note:** Standard scientific libraries (scipy, numpy, etc.) are available. PyTorch and TensorFlow are not available.
**For Continuous Optimization with Constraints:**
- scipy.optimize.minimize with constraint handling (SLSQP, trust-constr)
- Multiple initial guesses for global optimization
- Geometric approaches (Voronoi, convex hull)
**For Discrete/Combinatorial Problems:**
- Greedy heuristics with good ordering
- Local search (swaps, moves)
- scipy.optimize.linear_sum_assignment for assignment problems
- scipy.optimize.linprog for linear constraints
**For Graph/Network Problems:**
- NetworkX algorithms (shortest path, min spanning tree, flow)
- Spectral methods (eigenvalue-based ordering)
**For Repair/Reconstruction:**
- Heuristic-based detection and correction
- Structural constraint exploitation
- Averaging/interpolation for consistency
**For Robust Filtering/Noise Reduction:**
- scipy.signal (medfilt, savgol_filter, wiener) for direct filtering
- Use methods that handle outliers better than mean-based (median, percentile)
- Do NOT use scipy.optimize.minimize to tune filter parameters
- Use filtering functions directly, not multi-stage optimization
**General Principles:**
- Prefer single-function library calls over multi-stage pipelines
- Match algorithm to problem structure
- Simple approaches with good heuristics often beat complex methods
## ANTI-PATTERNS - Critical rules about what NOT to do
1. **Do NOT use multi-stage optimization**: Do NOT call one function then optimize its output. Deterministic setup code followed by a single optimization call is allowed.
2. **Do NOT use scipy.optimize.minimize for hyperparameter tuning**: Use minimize to solve the problem directly, NOT to tune parameters for another function.
3. **Do NOT use scipy.optimize.minimize for discrete problems**: Continuous optimizers cannot handle discrete constraint violations properly.
4. **Each idea MUST be a single-function library call**: Do NOT suggest multi-stage processing (e.g., "call A then call B").
**AVOID:** DEAP, genetic algorithm libraries, domain-specific complex libraries, custom research algorithms, or any library requiring additional `pip install`
**Learning from Success:**
When an approach succeeds, think: what principle made it work? Learn and think of better ideas, don't just add complexity. If breakthrough patterns are known, prioritize approaches that match them.
## DIVERSITY REQUIREMENTS
Before generating ideas, explicitly think:
- Idea 1: [Type A - e.g., algorithmic refinement or library-based approach]
- Idea 2: [Type B - e.g., structural change or processing pattern - DIFFERENT from A]
- Idea 3: [Type C - e.g., different technique or optimization method - DIFFERENT from A and B]
**Verify:** Are these DIFFERENT types? NOT variations of the same approach.
Each idea must:
- Use DIFFERENT libraries/techniques than failed attempts
- Target DIFFERENT metrics/aspects from the evaluator
- Be independently implementable
- Prefer clear implementations (different != more complex)
### Be Specific and Actionable
Not vague: "Try optimization"
Specific: "Use scipy.optimize.minimize with SLSQP method"
- Include exact library names, function names, methods, parameters
- Provide step-by-step implementation guide
- Focus on core logic that implements the idea correctly
- Handle edge cases and avoid errors/warnings
- For optimization: use multiple initializations, appropriate iteration counts and convergence criteria (evaluation timeout: {timeout}s)"""
# JSON example rendered as 4-space-indented text — deliberately NOT a fenced block (see above)
_OUTPUT_FORMAT = """\
## Output Format
**IMPORTANT:** Respond with a JSON object containing exactly {num} idea objects under the "ideas" key.
Do not include code patches or diffs — describe strategies in natural language.
Generate {num} breakthrough ideas of DIFFERENT types.
Each idea must be a JSON object with these fields:
- "idea": Clear, direct description with library/technique name
- "description": Detailed implementation guide (5-10 sentences)
- "what_to_optimize": What metrics/areas to focus on
- "cautions": Important implementation details to watch for
- "approach_type": Exact "library.function" format (e.g., "scipy.optimize.minimize")
**Diversity Requirement:** Each idea must use a DIFFERENT approach type.
Do not generate variations of the same technique.
Return ONLY a JSON object in this shape: {"ideas": [ ... ]} with {num} paradigm objects. No other text.
Example:
{
"ideas": [
{
"idea": "Use scipy.optimize.minimize with SLSQP",
"description": "Apply scipy.optimize.minimize directly to optimize all variables together...",
"what_to_optimize": "primary evaluator score",
"cautions": "Ensure constraints are properly formulated, use multiple starting points",
"approach_type": "scipy.optimize.minimize"
}
]
}"""
@register_scaffold("adaevolve")
class AdaEvolveScaffold(GalapagosScaffold):
name = "adaevolve"
@classmethod
def build_components(cls, config: GalapagosConfig, model: GalapagosModel | None) -> dict:
seed = int(config.seed)
pop = config.population
sel = config.selection_policy
mem = config.memory
num_islands = int(pop.num_islands)
return {
"population": AdaEvolveArchipelago(
num_islands=num_islands,
population_size=int(pop.population_size),
k_neighbors=int(pop.k_neighbors),
archive_elite_ratio=float(pop.archive_elite_ratio),
fitness_weight=float(pop.fitness_weight),
novelty_weight=float(pop.novelty_weight),
pareto_weight=float(pop.pareto_weight),
migration_count=int(pop.migration_count),
),
"selection_policy": AdaEvolvePolicy(
seed=seed,
num_islands=num_islands,
decay=float(sel.decay),
intensity_min=float(sel.intensity_min),
intensity_max=float(sel.intensity_max),
use_adaptive_search=bool(sel.use_adaptive_search),
fixed_intensity=float(sel.fixed_intensity),
use_ucb_selection=bool(sel.use_ucb_selection),
use_migration=bool(sel.use_migration),
migration_interval=int(sel.migration_interval),
use_dynamic_islands=bool(sel.use_dynamic_islands),
max_islands=int(sel.max_islands),
spawn_productivity_threshold=float(sel.spawn_productivity_threshold),
spawn_cooldown_iterations=int(sel.spawn_cooldown_iterations),
local_context_program_ratio=float(sel.local_context_program_ratio),
num_inspirations=int(sel.num_inspirations),
),
"prompt_builder": AdaEvolvePromptBuilder(),
"proposer": AdaEvolveProposer(),
"memory": AdaEvolveParadigmMemory(
window_size=int(mem.paradigm_window_size),
improvement_threshold=float(mem.paradigm_improvement_threshold),
max_uses=int(mem.paradigm_max_uses),
max_tried=int(mem.paradigm_max_tried),
num_to_generate=int(mem.paradigm_num_to_generate),
),
}
# ---- setup: force sequential execution + stash the evaluator timeout -----------------------
def setup(self, task) -> None:
# AdaEvolve runs ONE strictly-sequential loop upstream; its adapter/island/UCB state and the
# step() override's in-iteration retry require serial iterations. The base parallel loop
# (max_parallel_iteration>1) dispatches _parallel_step, which BYPASSES step() entirely, so
# clamp to 1 (also what meta_harness does for its stateful proposer).
if int(self.general.max_parallel_iteration) > 1:
log.warning("adaevolve requires sequential iterations; forcing max_parallel_iteration=1 "
"(was %d)", self.general.max_parallel_iteration)
self.general.max_parallel_iteration = 1
super().setup(task)
# the PromptBuilder has no evaluator handle; stash the timeout it needs for {timeout_warning}
timeout = getattr(self.evaluator, "timeout", None)
if timeout:
self.state.signals.setdefault("adaevolve", {})["evaluator_timeout"] = timeout
# ---- before_step: paradigm generation (iteration START) + adaptive-state seeding -----------
def before_step(self) -> None:
"""Two pieces of upstream control-flow that run BEFORE a child is generated:
1. **Adaptive-state seeding** — upstream seeds island 0 through a *non-migration* ``add``
(``record_evaluation``: bumps the island's UCB visits/rewards + improvement/evaluation
counters + best_score + G) and every other island through a *migration* ``add``
(``receive_external_improvement``: best_score + G only, no UCB credit). The galapagos
bootstrap appends seed copies directly, so this hook reproduces that exact split once per
island whose ``AdaptiveState`` is still unseeded (``best_score == -inf``). Cheap, runs every
iteration, so freshly spawned islands (also migration-seeded) are covered before their first
visit too.
2. **Paradigm breakthrough** — upstream checks ``is_paradigm_stagnating()`` at the TOP of
``_run_iteration`` and, if stuck, generates paradigms BEFORE sampling so the breakthrough is
forced onto THIS iteration's child (``select`` replaces the parent with the global best while
a paradigm is active). This hook is that check's galapagos home (it used to live in
``periodic`` at the END of the iteration, which delayed the paradigm to the next child).
"""
policy, population = self.selection_policy, self.population
adapter = getattr(policy, "adapter", None)
archives = getattr(population, "archives", None)
if adapter is not None and archives is not None:
# Only the INITIAL islands are seeded through the adapter: upstream's _seed_all_islands
# routes island 0 as a non-migration add (record_evaluation) and islands 1+ as migration
# adds (receive_external_improvement). A DYNAMICALLY SPAWNED island is seeded by
# archive.add DIRECTLY (upstream _seed_new_island bypasses the adapter), so its
# AdaptiveState must stay best_score=-inf until its first real evaluation registers δ=1.0 —
# so the seeding loop must NOT touch spawned islands. Capture the initial count before any
# spawn (periodic/end_iteration, which spawns, runs after step()).
if not hasattr(self, "_initial_islands"):
self._initial_islands = population.num_islands
self._adapter_seeded: set[int] = set()
while len(adapter.states) < population.num_islands: # keep dims in sync (defensive)
adapter.add_dimension()
for i in range(min(self._initial_islands, len(archives))):
archive = archives[i]
# seed each initial island exactly ONCE (one-shot set, not a best_score==-inf guard:
# a degenerate seed with -inf fitness would never lift best_score off -inf and would
# otherwise re-seed — and re-tick — every iteration)
if i not in self._adapter_seeded and archive.members:
self._adapter_seeded.add(i)
best_fit = max(g.fitness for g in archive.members)
if i == 0:
adapter.record_evaluation(0, best_fit) # non-migration seed → full UCB credit
# upstream's island-0 seed is a new global best, so add()'s
# paradigm_tracker.record_improvement fires ONCE at init → the improvement
# window starts as [1.0] (gating when paradigm stagnation can first trigger)
self.memory.write("", kind="improvement", improved=True, best_score=best_fit)
else:
adapter.receive_external_improvement(i, best_fit) # migration seed → best/G only
self._maybe_generate_paradigms()
# ---- step: in-iteration retry with per-attempt resampling (AdaEvolveController loop) --------
def step(self, task) -> None:
"""One AdaEvolve iteration = up to ``1 + max_error_retries`` (= ``general.inner_retry_times``,
3) generate+evaluate attempts, each RESAMPLING the parent/mode/inspirations on the current
island and folding the prior attempt's error into the prompt as RETRY CONTEXT. The first child
that passes the eval-failure gate is admitted (the QD archive may still crowding-reject it,
which updates nothing — exactly upstream's ``if was_added``); an all-attempts-fail iteration
admits no child. The island is FIXED for the whole iteration (chosen by the previous
``end_iteration``); only the mode/parent/inspirations resample — i.e.
``AdaEvolveController._run_normal_step`` → ``_generate_child``. Iteration accounting is one step
per iteration regardless of retries (upstream advances by one ``range`` step in a ``finally``).
The base ``_attempt`` is deliberately NOT used: it holds the parent fixed across retries and
appends a fenced ``feedback_section`` that would break the last-fenced-block diff invariant."""
self.state.iteration += 1
self.before_step()
sig = self.state.signals.setdefault("adaevolve", {})
attempts = max(1, int(self.general.inner_retry_times))
error_context: str | None = None
child = None
res = None
outcome = "no_diff"
for _attempt in range(attempts):
# a paradigm consumed by an earlier attempt may have exhausted mid-iteration; upstream
# re-fetches the active paradigm per ``_generate_child``, so refresh the flag that
# ``select``/``build`` read before each attempt
sig["paradigm_active"] = bool(
getattr(self.memory, "has_active_paradigm", lambda: False)())
selection = self.selection_policy.select(self.population, self.state) # resample parent+mode+inspirations
if error_context:
sig["error_context"] = error_context
else:
sig.pop("error_context", None)
prompt = self.prompt_builder.build(selection, self.memory, self.state)
# upstream marks the paradigm used immediately after the prompt is built (once PER attempt)
if sig.get("paradigm_active") and hasattr(self.memory, "use_paradigm"):
self.memory.use_paradigm()
env = Env(model=self.model, selection=selection, evaluator=self.evaluator,
memory=self.memory, state=self.state)
child = self.proposer.propose(prompt, env)
if not child.metadata.get("changed", True): # parse failure (no diff applied) → retry
outcome, res = "no_diff", None
error_context = "the SEARCH/REPLACE edit did not change the program (no diff applied)"
continue
res = self.evaluator.evaluate(child)
child.scores = res.metrics
child.artifacts = res.artifacts
child.metadata["valid"] = res.valid
if not self.population._is_eval_failure(child): # passed the eval-failure gate → success
outcome = "ok"
break # (the archive may still crowding-reject it)
outcome = "invalid" # eval failure → retry with RETRY CONTEXT
art = res.artifacts or {}
error_context = str(res.text_feedback or art.get("text_feedback")
or art.get("error") or "evaluation failed")
self._commit(child, res, outcome)
self.after_step(child, res)
def _commit(self, child, res, outcome: str) -> None:
"""Admission + best/recent/trajectory bookkeeping for one completed iteration — the
``_process_result`` half of ``_run_iteration``. Only an ``ok`` child reaches ``database.add``;
a fully-failed iteration admits nothing (upstream returns before ``_process_result``)."""
it, total = self.state.iteration, self.general.max_iterations
counter = f"{it:>{len(str(total))}}/{total}"
if outcome != "ok":
key = "no_diff" if outcome == "no_diff" else "invalid"
self.state.signals[key] = self.state.signals.get(key, 0) + 1
self._stale += 1
if child is not None:
# a fully-failed iteration never went through population.add, so the child still carries
# the parent's COPIED metadata["admitted"]=True (every archived member, incl. the seed,
# is admitted). Clear it so after_step writes NO improvement-window tick — upstream fires
# record_improvement only inside add() for an accepted child, never on a failed iteration.
child.metadata["admitted"] = False
self._history.append(child)
log.info("iter %s all %d attempts failed (%s) — no child admitted",
counter, max(1, int(self.general.inner_retry_times)), outcome)
self._record_event(outcome)
return
admitted = self.population.add(child) # archive admission gate (sets metadata["admitted"])
self.selection_policy.observe(child, self.state) # record_evaluation iff admitted (internally gated)
self._history.append(child)
if admitted:
self._note_recent(child)
prev_best = self.state.best.fitness if self.state.best else float("-inf")
improved = admitted and child.fitness > prev_best
if improved:
self.state.best = child
self._stale = 0
else:
self._stale += 1
best_fit = self.state.best.fitness if self.state.best else float("-inf")
log.info("iter %s child=%s score=%.4f best=%.4f%s%s", counter, child.id,
child.fitness, best_fit, " 🌟 new best" if improved else "",
"" if admitted else " [archive-rejected]")
self._record(child, res, new_best=improved)
# ---- after_step: one improvement-window tick per admitted non-migrant child ----------------
def after_step(self, child: Genome, result) -> None:
"""The galapagos home of ``ParadigmTracker.record_improvement``, which upstream fires inside
``add()`` for every non-migration program the archive ACCEPTED (``if was_added and not
is_migration``). Crowding-rejected and fully-failed children update nothing, matching that
guard. (Error-context plumbing and the gated-best repair the cross-iteration port needed are
gone: the in-iteration retry now feeds errors directly, and ``_commit`` only ever promotes an
admitted child to ``state.best``.)"""
sig = self.state.signals.setdefault("adaevolve", {})
if (child is not None and child.metadata.get("admitted")
and not child.metadata.get("migrated_from")):
improved = self.state.best is child
best = self.state.best.fitness if self.state.best else child.fitness
self.memory.write("", kind="improvement", improved=improved, best_score=best)
if self.state.best is not None:
sig["global_best"] = self.state.best.fitness
# ---- periodic: end-of-iteration scheduling (UCB rotation + migration + dynamic spawn) -------
def periodic(self) -> None:
"""Upstream ``end_iteration`` runs in a ``finally`` every iteration (even failed/no-diff ones);
``periodic`` is its galapagos mirror. The paradigm trigger has moved to ``before_step`` so the
breakthrough lands on the same iteration's child, matching ``_run_iteration``'s top-of-loop
check."""
self.selection_policy.end_iteration(self.state.iteration, self.population)
# ---- the Level-3 paradigm-breakthrough trigger (run at iteration START via before_step) -----
def _maybe_generate_paradigms(self) -> None:
"""``_run_iteration``'s top-of-iteration paradigm check: when the tracker reports global
stagnation (full window, no active paradigm, improvement-rate below threshold), ONE guide-LLM
call requests strict-JSON breakthrough ideas (parse fallbacks: direct JSON → ```json block →
any code block → bracket scan). Installed ideas are capped at ``paradigm_num_to_generate``; a
parse failure installs nothing and — exactly like upstream, which re-checks
``is_paradigm_stagnating()`` and re-attempts generation EVERY iteration while still stuck — the
search simply continues to the next iteration's check (no cooldown). Setting ``paradigm_active``
makes THIS iteration's ``select`` force the parent to the global best."""
if not bool(self.config.memory.use_paradigm_breakthrough):
return
memory = self.memory
if self.model is None or not hasattr(memory, "is_stagnating") or not memory.is_stagnating():
return
sig = self.state.signals.setdefault("adaevolve", {})
best = self.population.best() or self.state.best
if best is None:
return
num = int(self.config.memory.paradigm_num_to_generate)
prompt = Prompt(system=_PARADIGM_SYSTEM, user=self._paradigm_prompt(best, num))
gen = self.model.generate(prompt)
if self.state is not None:
self.state.record_cost(gen.cost_usd, gen.prompt_tokens, gen.completion_tokens)
ideas = _parse_paradigms(gen.text or "")[:num] # cap installs at paradigm_num_to_generate
if not ideas:
# a parse failure must never crash the search; upstream re-attempts next iteration while
# still stagnating (no back-off), so just return and let the next iteration re-check
log.debug("paradigm generation produced no ideas — will retry next iteration if still stuck")
return
memory.write("", kind="paradigms", paradigms=ideas, best_score=best.fitness)
log.info("paradigm breakthrough — installed %d idea(s) at iter %d",
len(ideas), self.state.iteration)
sig["paradigm_active"] = memory.has_active_paradigm()
# ---- the ParadigmGenerator prompt -----------------------------------------------------------
def _evaluator_source(self) -> str:
"""The task evaluator's SOURCE CODE (``SubprocessEvaluator.evaluator_path``), capped at
``_MAX_EVALUATOR_CODE_CHARS`` and fence-sanitized; ``""`` on any attribute/IO failure (the
section is then omitted gracefully)."""
try:
path = self.evaluator.evaluator_path
with open(path, encoding="utf-8") as f:
code = f.read()
except Exception: # noqa: BLE001 — no evaluator_path / unreadable file: omit the section
return ""
if len(code) > _MAX_EVALUATOR_CODE_CHARS:
code = code[:_MAX_EVALUATOR_CODE_CHARS] + "\n# ... (truncated)"
return _defence(code)
def _paradigm_prompt(self, best: Genome, num: int) -> str:
"""The ``ParadigmGenerator._build_prompt`` port (code mode): problem context (objective +
optimization targets + evaluator source code) → current best program + score → the
ANALYZE-FIRST directive → the 7-step Analysis Framework → Previously Tried Ideas (STRICT
PROHIBITION + root-cause taxonomy) → Technique Guidance/ANTI-PATTERNS/DIVERSITY
REQUIREMENTS → evaluator feedback on the best program → Output Format. The best program is
the LAST fenced block (everything after it is fence-free; the JSON example is indented,
not fenced — so a non-JSON reply simply yields no paradigm and the search just
continues, by design)."""
score = f"{best.fitness:.6f}"
sections: list[str] = []
# ---- problem context (reference _build_problem_context, scalar mode) ----
sections.append(f"## Problem Objective\n\n{self.state.task_context or 'N/A'}")
sections.append("## Optimization Targets\n\n"
"Optimize the primary scalar score defined by the evaluator.")
evaluator_code = self._evaluator_source()
if evaluator_code:
sections.append("## Evaluator Code (shows how solutions are scored)\n\n"
f"```python\n{evaluator_code}\n```")
sections.append(f"## Current Best Program (score: {score})\n\n"
f"```python\n{best.content}\n```\n\n"
"**CRITICAL:** Analyze the current program first. What algorithm does it "
"use?\nWhat are its strengths and weaknesses? How can you improve upon it?")
sections.append(_ANALYZE_FIRST.replace("{score}", score))
sections.append(_ANALYSIS_FRAMEWORK.replace("{score}", score))
# ---- previously tried ideas (reference _build_previously_tried_section) ----
tried = (self.memory.get_previously_tried_ideas()
if hasattr(self.memory, "get_previously_tried_ideas") else [])
if tried:
formatted = "\n".join(f"- {_defence(idea)}" for idea in tried)
sections.append(_TRIED_SOME.replace("{tried}", formatted))
else:
sections.append(_TRIED_NONE)
timeout = getattr(self.evaluator, "timeout", 300)
sections.append(_TECHNIQUES.replace("{timeout}", str(timeout)))
# ---- evaluator feedback on the best program (inserted before Output Format) ----
feedback = best.artifacts.get("text_feedback")
if feedback:
text = str(feedback)
if len(text) > 2000:
text = text[:2000] + "\n... (truncated)"
sections.append("## Evaluator Feedback on Current Best Program\n"
"The evaluator analyzed cases where the current program fails. "
f"Use this to inform your breakthrough ideas:\n\n{_defence(text)}")
sections.append(_OUTPUT_FORMAT.replace("{num}", str(num)))
return "\n\n".join(sections)