AdaEvolve
Hierarchical adaptive search: G-signal exploration intensity, UCB island allocation, and LLM meta-guidance on stagnation.
"""AdaEvolve PromptBuilder component — the AdaEvolve diff template with the computed
``{search_guidance}`` slot and the mode-aware EXPLORE/EXPLOIT parent labels.
One file per component (see scaffold.py). Faithful port of SkyDiscover's
``AdaEvolveContextBuilder`` + ``templates/diff_user_message.txt``, adapted to one galapagos hard
invariant: the current program must be the LAST fenced ``python`` block in the user message (see
``components/prompt.py``), so the diff Proposer can locate it. The
upstream template renders ``{search_guidance}`` *after* the current solution; since the retry
section carries a plain fence (which would shadow the program block), this port renders the whole
``{search_guidance}`` *before* the current solution and keeps everything after the program block
fence-free. Section content is otherwise verbatim.
``{search_guidance}`` assembly, in the upstream priority order: evaluator feedback on the parent
(truncated 2000 chars) → the active paradigm's "## BREAKTHROUGH IDEA" block (read from Memory) →
sibling context ("## PREVIOUS ATTEMPTS ON THIS PARENT", IMPROVED/REGRESSED/NO CHANGE at ±0.001)
→ "## RETRY CONTEXT" with the previous iteration's failure error from
``state.signals["adaevolve"]["error_context"]``. All free text injected into the guidance
(evaluator feedback, paradigm block, error text) is fence-sanitized — any run of three-plus
backticks collapses to two — so adversarial feedback can never shift the fence pairing and the
current program stays the LAST fenced block.
Faithful to the reference templates (``_format_metrics`` / ``_format_previous_attempts`` /
``_format_other_context_programs`` / ``_format_current_program`` / ``_identify_improvement_areas``):
the ``# Current Solution Information`` metrics block, the ``# Program Generation History``
(``## Previous Attempts`` — always "No previous attempts yet." because ``AdaEvolveController``
never populates ``previous_programs`` — then ``## Other Context Solutions``), the ``# Current
Solution`` ``## Program Information`` sub-block, the focus areas, and the ``# Task`` tail (worked
SEARCH/REPLACE example + the ``## IMPORTANT`` follow-instruction + ``{timeout_warning}``).
Deviations from the reference templates (beyond the search_guidance move documented above):
* a ``# Task Description`` section (``state.task_context``) is prepended — galapagos conveys the
task in the user message rather than a per-task ``prompt.system_message``;
* inspiration code is rendered in FULL by default (``max_snippet_chars=None``, faithful to the
reference, which never caps context-solution code); it is only fence-sanitized — any run of
three-plus backticks collapses to two — so the current program stays the LAST fenced block. The
inspirations are display-only and never re-extracted; ``max_snippet_chars`` is an optional opt-in
size bound, not a default truncation;
* the inspiration ``Score breakdown:`` items render one-per-line (the galapagos house style shared
by every default-template scaffold) rather than the reference's single concatenated line, and
whitespace (a blank line / a trailing space) is normalized — display-only, no semantic effect.
"""
from __future__ import annotations
import re
from ...components.prompt import PromptBuilder
from ...models.base import Prompt
from ...records import Genome, RunState, Selection
# config prompt.system_message default fallback — verbatim
_ADAEVOLVE_SYSTEM = (
"You are an expert tasked with iteratively improving a solution.\n"
"Your goal is to maximize the COMBINED SCORE while exploring diverse approaches.\n"
"The system maintains a collection of diverse solutions - both high combined score AND "
"diversity are valuable."
)
# mode labels rendered under the "# Current Solution" heading — verbatim (code variants)
EXPLORE_LABEL = """\
## PARENT SELECTION CONTEXT
This parent was selected through diversity-driven sampling to explore different regions.
### EXPLORATION GUIDANCE
- Consider alternative algorithmic approaches
- Don't be constrained by the parent's approach
- Look for fundamentally different algorithms or novel techniques
- Balance creativity with correctness
Your goal: Discover new approaches that might outperform current solutions."""
EXPLOIT_LABEL = """\
## PARENT SELECTION CONTEXT
This parent was selected from the archive of top-performing programs.
### OPTIMIZATION GUIDANCE
- This solution works well, but meaningful improvements are still possible
- You may refine the existing approach OR introduce better algorithms
- Consider: algorithmic improvements, better data structures, efficient libraries
- Ensure correctness is maintained
Your goal: Improve upon this solution."""
_MODE_LABELS = {"exploration": EXPLORE_LABEL, "exploitation": EXPLOIT_LABEL} # balanced: no label
_FEEDBACK_HEADER = (
"## EVALUATOR FEEDBACK ON CURRENT PROGRAM\n"
"The evaluator analyzed cases where the current program failed and produced the following "
"diagnostic feedback. Use this to make targeted improvements:"
)
# the # Task tail of diff_user_message.txt; the SEARCH/REPLACE format block AND the worked example
# are unfenced upstream (they use <<<<<<< markers, not backticks) and stay unfenced here so the
# current program remains the last fenced block
_TASK_INSTRUCTIONS = """\
You MUST use the exact SEARCH/REPLACE diff format shown below to indicate changes:
<<<<<<< SEARCH
# Original code to find and replace (must match exactly)
=======
# New replacement code
>>>>>>> REPLACE
Example of valid diff format:
<<<<<<< SEARCH
for i in range(m):
for j in range(p):
for k in range(n):
C[i, j] += A[i, k] * B[k, j]
=======
# Reorder loops for better memory access pattern
for i in range(m):
for k in range(n):
for j in range(p):
C[i, j] += A[i, k] * B[k, j]
>>>>>>> REPLACE
**CRITICAL**: You can suggest multiple changes. Each SEARCH section must EXACTLY match code in \
"# Current Solution" - copy it character-for-character, preserving all whitespace and indentation. \
Do NOT paraphrase or reformat.
Be thoughtful about your changes and explain your reasoning thoroughly.
Include a concise docstring at the start of functions describing the exact approach taken.
IMPORTANT: If an instruction header of "## IMPORTANT: ..." is given below the "# Current Solution", you MUST follow it. Otherwise,
focus on targeted improvements of the program. """
# full-rewrite mode tail (general.mutation_approach="full_rewrite") — the model returns the complete program
_TASK_INSTRUCTIONS_REWRITE = """\
Provide the complete new program solution in a single ```python code block.
IMPORTANT: Make sure your rewritten program maintains the same inputs and outputs \
as the original program, but with improved internal implementation.
Be thoughtful about your changes and explain your reasoning thoroughly.
Include a concise docstring at the start of functions describing the exact approach taken."""
_FENCE_RUN = re.compile(r"`{3,}") # any run of >=3 backticks (a fence opener/closer)
def _defence(text: str) -> str:
"""Neutralize fence markers in injected free text (evaluator feedback, paradigm text, error
context): collapse any run of three-plus backticks to two, so the current-program-is-the-last-
fenced-block invariant holds even against adversarial feedback."""
return _FENCE_RUN.sub("``", text)
class AdaEvolvePromptBuilder(PromptBuilder):
"""Renders the AdaEvolve user message: solution info → generation history + inspirations →
search guidance → current solution (mode label + the LAST fenced python block) → task."""
def __init__(self, max_feedback_chars: int = 2000, max_snippet_chars: int | None = None):
self.max_feedback_chars = max_feedback_chars
# None = render inspiration code in FULL (faithful to the reference, which never caps context
# solution code); an integer is an optional opt-in prompt-size bound, not the default.
self.max_snippet_chars = max_snippet_chars
def build(self, selection: Selection, memory=None, state: RunState | None = None) -> Prompt:
parent = selection.parent
if parent is None: # delegated selection (not used by AdaEvolve, kept for safety)
return Prompt(system=_ADAEVOLVE_SYSTEM, user=(state.task_context if state else ""))
sig = (state.signals.get("adaevolve", {}) if state is not None else {}) or {}
sections: list[str] = []
if state and state.task_context:
sections.append(f"# Task Description\n{state.task_context}")
# --- # Current Solution Information --- ({metrics} + {improvement_areas})
# {metrics} == _format_metrics: combined_score first (top-level dash), then error, then a
# "Metrics:" breakdown of the remaining keys (combined_score + error excluded; floats at .4f,
# ints/strs/bools verbatim) — NOT a flat indented dump of every score.
info = ["# Current Solution Information", "- Main Metrics: "]
combined = parent.scores.get("combined_score")
if combined is not None:
info.append(f"- combined_score: {combined:.4f}"
if isinstance(combined, (int, float)) and not isinstance(combined, bool)
else f"- combined_score: {combined}")
error = parent.scores.get("error")
if error:
info.append(f"- error: {error}")
other = {k: v for k, v in parent.scores.items() if k not in ("combined_score", "error")}
if other:
info.append("")
info.append("Metrics:")
for k, v in other.items(): # _format_metrics: floats at .4f, int/str/bool verbatim, others skipped
if isinstance(v, float):
info.append(f" - {k}: {v:.4f}")
elif isinstance(v, (int, str, bool)):
info.append(f" - {k}: {v}")
info.append(f"- Focus areas: {self._improvement_areas(parent)}")
sections.append("\n".join(info))
# --- # Program Generation History --- ({previous_attempts} + {other_context_programs})
history = self._history(selection)
if history:
sections.append("# Program Generation History\n" + history)
# --- {search_guidance} --- (moved BEFORE the current solution; see module docstring)
guidance = self._search_guidance(parent, sig, memory)
if guidance:
sections.append(guidance)
# --- # Current Solution --- (mode label + ## Program Information + the LAST fenced block)
label = _MODE_LABELS.get(sig.get("mode"), "")
current = "# Current Solution"
if label:
current += "\n" + label
# ## Program Information (combined_score / error / Score breakdown) — _format_current_program
# renders it before the code even though the same metrics also appear under "# Current Solution
# Information"; fence-free so the program stays the LAST fenced block.
prog_info = ["", "## Program Information"]
cs = parent.scores.get("combined_score")
if cs is not None and isinstance(cs, (int, float)) and not isinstance(cs, bool):
prog_info.append(f"combined_score: {cs:.4f}")
err = parent.scores.get("error")
if err:
prog_info.append(f"error: {err}")
breakdown = [f" - {k}: {v:.4f}" if isinstance(v, float) else f" - {k}: {v}"
for k, v in parent.scores.items()
if k not in ("combined_score", "error") and isinstance(v, (int, float, str, bool))]
if breakdown:
prog_info.append("Score breakdown:")
prog_info.extend(breakdown)
current += "\n" + "\n".join(prog_info)
current += f"\n\n```python\n{parent.content}\n```"
sections.append(current)
# --- # Task --- ({task_objective} + diversity lines + diff/rewrite instr, switched by the base)
verb = self.by_approach("Suggest improvements to", "Rewrite")
instr = self.by_approach(_TASK_INSTRUCTIONS, _TASK_INSTRUCTIONS_REWRITE)
# {timeout_warning}: the trailing template line, rendered iff the evaluator advertises a timeout
# (stashed in signals by the scaffold, which holds the evaluator handle the builder lacks)
timeout = sig.get("evaluator_timeout")
timeout_warning = (f"\n\n- Time limit: Programs should complete execution within {timeout} "
"seconds; otherwise, they will timeout." if timeout else "")
sections.append(
"# Task\n"
f"{verb} the program that will improve its COMBINED_SCORE.\n"
"The system maintains diversity across these dimensions: score, complexity.\n"
"Different solutions with similar combined_score but different features are valuable.\n\n"
+ instr + timeout_warning
)
return Prompt(system=_ADAEVOLVE_SYSTEM, user="\n\n".join(sections))
# ---- section builders -----------------------------------------------------------------------
@staticmethod
def _improvement_areas(parent: Genome) -> str:
"""``_identify_improvement_areas`` with EMPTY ``previous_programs``. The AdaEvolveController's
``_generate_child`` context never sets ``previous_programs`` (that key is populated ONLY by the
DEFAULT controller's ``_build_context``, a path AdaEvolve never runs), so the score-trend bullet
is always skipped. What remains, exactly as the default builder emits it: a simplification
bullet when the solution exceeds ``suggest_simplification_after_chars`` (500), else the default
focus line — each rendered as ``- {area}`` (so ``- Focus areas: - {area}`` matches the upstream
template's inline ``- Focus areas: {improvement_areas}``)."""
areas: list[str] = []
if len(parent.content) > 500:
areas.append("Consider simplifying - solution length exceeds 500 characters")
if not areas:
areas.append("Focus on improving the combined_score")
return "\n".join(f"- {a}" for a in areas)
def _history(self, selection: Selection) -> str:
"""``{previous_attempts}`` + ``{other_context_programs}``.
``{previous_attempts}`` is ALWAYS "No previous attempts yet." — the AdaEvolveController's
``_generate_child`` context never populates ``previous_programs`` (that key is set only by the
DEFAULT controller's ``_build_context``, a path AdaEvolve never runs), so
``_format_previous_attempts([])`` returns the empty-state string every iteration.
``{other_context_programs}`` is the inspiration set in the default DICT-branch rendering:
``## Other Context Solutions`` + the "may be relevant to the current task:" preamble + per
program ``### Program i (combined_score: x.xxxx)`` + Score breakdown + fenced code (full by
default + fence-sanitized — display-only, never re-extracted, so the current program stays
the LAST fenced block)."""
out: list[str] = ["## Previous Attempts\n\nNo previous attempts yet."]
if selection.inspirations:
lines = ["These programs represent diverse approaches and creative solutions that may "
"be relevant to the current task:\n"]
for i, g in enumerate(selection.inspirations, 1):
cs = g.scores.get("combined_score")
lines.append(f"### Program {i} (combined_score: {cs:.4f})"
if isinstance(cs, (int, float)) and not isinstance(cs, bool)
else f"### Program {i}")
if g.scores.get("error"):
lines.append(f"- error: {g.scores['error']}")
breakdown = [f" - {k}: {v:.4f}" if isinstance(v, float) else f" - {k}: {v}"
for k, v in g.scores.items()
if k not in ("combined_score", "error") and isinstance(v, (int, float, str, bool))]
if breakdown:
lines.append("Score breakdown:")
lines.extend(breakdown)
snippet = _defence(g.content if self.max_snippet_chars is None
else g.content[: self.max_snippet_chars])
lines.append(f"\n```python\n{snippet}\n```\n")
out.append("## Other Context Solutions\n" + "\n".join(lines))
return "\n\n".join(out)
def _search_guidance(self, parent: Genome, sig: dict, memory) -> str:
"""``_build_search_guidance``: feedback → paradigm → siblings → retry, joined by blanks."""
parts: list[str] = []
feedback = parent.artifacts.get("text_feedback")
if feedback:
# SkyDiscover _format_evaluator_feedback (adaevolve/builder.py:297-300): only truncate when
# the feedback EXCEEDS the cap, and append a "... (truncated)" marker so the LLM knows it was
# cut (a short feedback is shown verbatim — byte-identical to upstream). galapagos
# additionally fence-sanitizes via _defence (the kept fence-safety invariant, residual B).
fb = str(feedback)
if len(fb) > self.max_feedback_chars:
fb = fb[: self.max_feedback_chars] + "\n... (truncated)"
text = _defence(fb)
parts.append(f"{_FEEDBACK_HEADER}\n\n{text}")
if memory is not None:
paradigm_block = memory.read()
if paradigm_block:
parts.append(_defence(paradigm_block))
siblings = sig.get("siblings") or []
if siblings:
improved = sum(1 for s in siblings if s["delta"] > 0.001)
regressed = sum(1 for s in siblings if s["delta"] < -0.001)
unchanged = len(siblings) - improved - regressed
lines = ["## PREVIOUS ATTEMPTS ON THIS PARENT",
f"Summary: {improved} improved, {unchanged} unchanged, {regressed} regressed"]
for i, s in enumerate(siblings, 1):
tag = ("IMPROVED" if s["delta"] > 0.001
else "REGRESSED" if s["delta"] < -0.001 else "NO CHANGE")
lines.append(f" {i}. {s['parent_fitness']:.4f} -> {s['child_fitness']:.4f} "
f"({s['delta']:+.4f}) [{tag}]")
lines.append("Avoid repeating approaches that didn't work.")
parts.append("\n".join(lines))
error_context = sig.get("error_context")
if error_context:
parts.append("## RETRY CONTEXT\nPrevious attempt failed with error:\n```\n"
f"{_defence(str(error_context))}\n```\nPlease fix this issue in your response.")
return "\n\n".join(parts)