OpenEvolve
Island-model MAP-Elites evolutionary search with diff mutation (the open AlphaEvolve).
"""OpenEvolve PromptBuilder component — a faithful port of the live OpenEvolve ``diff_user``
template (``openevolve/prompts/defaults/diff_user.txt`` + ``evolution_history.txt`` +
``previous_attempt.txt`` + ``top_program.txt`` + ``inspiration_program.txt`` + ``fragments.json``),
not the older in-file string constants.
User message, in order:
``# Current Program Information`` (Fitness / Feature coordinates / Focus areas) → ``## Last Execution
Output`` (artifacts) → ``# Program Evolution History`` (``## Previous Attempts`` + ``## Top Performing
Programs`` + ``## Diverse Programs`` + ``## Inspiration Programs``) → ``# Current Program`` (the parent,
kept LAST so a diff Proposer can locate it) → ``# Task`` (the SEARCH/REPLACE instruction).
"""
from __future__ import annotations
from ...components.prompt import PromptBuilder
from ...models.base import Prompt
from ...records import Genome, RunState, Selection
from .population import _fitness
# openevolve/prompts/defaults/system_message.txt (the live default; the diff format lives in the user # Task)
_OPENEVOLVE_SYSTEM = (
"You are an expert software developer tasked with iteratively improving a codebase.\n"
"Your goal is to maximize the FITNESS SCORE while exploring diverse solutions across feature "
"dimensions.\n"
"The system maintains a collection of diverse programs - both high fitness AND diversity are valuable."
)
# diff_user.txt # Task section (verbatim)
_TASK_DIFF = (
"# Task\n"
"Suggest improvements to the program that will improve its FITNESS SCORE.\n"
"The system maintains diversity across these dimensions: {dims}\n"
"Different solutions with similar fitness but different features are valuable.\n\n"
"You MUST use the exact SEARCH/REPLACE diff format shown below to indicate changes:\n\n"
"<<<<<<< SEARCH\n"
"# Original code to find and replace (must match exactly)\n"
"=======\n"
"# New replacement code\n"
">>>>>>> REPLACE\n\n"
"Example of valid diff format:\n"
"<<<<<<< SEARCH\n"
"for i in range(m):\n"
" for j in range(p):\n"
" for k in range(n):\n"
" C[i, j] += A[i, k] * B[k, j]\n"
"=======\n"
"# Reorder loops for better memory access pattern\n"
"for i in range(m):\n"
" for k in range(n):\n"
" for j in range(p):\n"
" C[i, j] += A[i, k] * B[k, j]\n"
">>>>>>> REPLACE\n\n"
"You can suggest multiple changes. Each SEARCH section must exactly match code in the current "
"program.\nBe thoughtful about your changes and explain your reasoning thoroughly.\n\n"
"IMPORTANT: Do not rewrite the entire program - focus on targeted improvements."
)
# full_rewrite_user.txt # Task section (verbatim) — used when general.mutation_approach="full_rewrite"
_TASK_REWRITE = (
"# Task\n"
"Rewrite the program to improve its FITNESS SCORE.\n"
"The system maintains diversity across these dimensions: {dims}\n"
"Different solutions with similar fitness but different features are valuable.\n"
"Provide the complete new program code.\n\n"
"IMPORTANT: Make sure your rewritten program maintains the same inputs and outputs\n"
"as the original program, but with improved internal implementation.\n\n"
"```python\n# Your rewritten program here\n```"
)
class OpenEvolvePromptBuilder(PromptBuilder):
def __init__(self, num_top_programs: int = 3, num_diverse: int = 2, max_artifact_bytes: int = 20480,
max_snippet_chars: int | None = None, suggest_simplification_after_chars: int = 500,
feature_dimensions: list[str] | None = None):
# max_snippet_chars=None → full program code in history sections (OpenEvolve does not truncate
# per-program code; only artifacts are byte-capped).
self.num_top_programs = num_top_programs
self.num_diverse = num_diverse
self.max_artifact_bytes = max_artifact_bytes
self.max_snippet_chars = max_snippet_chars
self.suggest_simplification_after_chars = suggest_simplification_after_chars
self.feature_dimensions = list(feature_dimensions or ["complexity", "diversity"])
def _fit(self, genome: Genome) -> float:
return _fitness(genome, self.feature_dimensions)
def build(self, selection: Selection, memory=None, state: RunState | None = None) -> Prompt:
parent = selection.parent
# OpenEvolve conveys the task via a task-specific system message; we append the task context there.
system = _OPENEVOLVE_SYSTEM
if state and state.task_context:
system = f"{system}\n\n{state.task_context}"
if parent is None: # delegated selection (not used by OpenEvolve, kept for safety)
return Prompt(system=system, user=(state.task_context if state else ""))
dims = ", ".join(self.feature_dimensions) if self.feature_dimensions else "None"
sections: list[str] = []
# --- # Current Program Information ---
sections.append(
"# Current Program Information\n"
f"- Fitness: {self._fit(parent):.4f}\n"
f"- Feature coordinates: {self._feature_coords(parent)}\n"
f"- Focus areas: {self._improvement_areas(parent, selection)}"
)
# --- ## Last Execution Output (artifacts) ---
artifacts = self._render_artifacts(parent)
if artifacts:
sections.append(artifacts)
# --- # Program Evolution History --- (## Previous Attempts + ## Top Performing Programs are
# always present, matching OpenEvolve's evolution_history.txt; Diverse/Inspiration are conditional)
sections.append("# Program Evolution History\n" + self._evolution_history(selection))
# --- # Current Program --- (MUST be the last fenced python block)
sections.append(f"# Current Program\n```python\n{parent.content}\n```")
# --- # Task --- (diff_user.txt vs full_rewrite_user.txt, switched by the base PromptBuilder)
sections.append(self.by_approach(_TASK_DIFF, _TASK_REWRITE).format(dims=dims))
return Prompt(system=system, user="\n\n".join(sections))
# ---- # Current Program Information helpers ----------------------------------------------
def _feature_coords(self, parent: Genome) -> str:
"""format_feature_coordinates: raw values of the feature dims that the evaluator returned."""
parts = []
for dim in self.feature_dimensions:
v = parent.scores.get(dim)
if isinstance(v, (int, float)) and not isinstance(v, bool):
parts.append(f"{dim}={float(v):.2f}")
elif v is not None:
parts.append(f"{dim}={v}")
return ", ".join(parts)
def _improvement_areas(self, parent: Genome, selection: Selection) -> str:
"""_identify_improvement_areas: fitness trend vs the most recent previous attempt + feature
exploration note + code-length hint, else default guidance (one ``- `` line each)."""
areas: list[str] = []
cur = self._fit(parent)
# OpenEvolve compares to previous_programs[-1] = the LAST of the island top-N (includes parent),
# not the lowest island member.
top = sorted(selection.pool, key=self._fit, reverse=True)[: self.num_top_programs]
if top:
pf = self._fit(top[-1])
if cur > pf:
areas.append(f"Fitness improved: {pf:.4f} → {cur:.4f}")
elif cur < pf:
areas.append(f"Fitness declined: {pf:.4f} → {cur:.4f}. Consider revising recent changes.")
else:
areas.append(f"Fitness unchanged at {cur:.4f}")
coords = self._feature_coords(parent)
areas.append(f"Exploring {coords} region of solution space" if coords else "No feature coordinates")
if self.suggest_simplification_after_chars and len(parent.content) > self.suggest_simplification_after_chars:
areas.append(f"Consider simplifying - code length exceeds "
f"{self.suggest_simplification_after_chars} characters")
if not areas:
areas.append("Focus on improving fitness while maintaining diversity")
return "\n".join(f"- {a}" for a in areas)
def _render_artifacts(self, parent: Genome) -> str:
"""_render_artifacts: render every artifact key (not just text_feedback) under one section."""
arts = {k: v for k, v in parent.artifacts.items() if k != "response"}
if not arts:
return ""
blocks = []
for key, value in arts.items():
content = str(value)
if len(content) > self.max_artifact_bytes:
content = content[: self.max_artifact_bytes] + "\n... (truncated)"
blocks.append(f"### {key}\n```\n{content}\n```")
return "## Last Execution Output\n\n" + "\n\n".join(blocks)
# ---- # Program Evolution History --------------------------------------------------------
def _evolution_history(self, selection: Selection) -> str:
parent = selection.parent
# selection.pool is the parent's ISLAND (set by OpenEvolveSelectionPolicy), fitness-sorted.
# OpenEvolve's previous_programs/top_programs INCLUDE the parent (it can appear in the history
# AND as "# Current Program"), so we do not exclude it.
pool = sorted(selection.pool, key=self._fit, reverse=True)
out: list[str] = []
# ## Previous Attempts — ALWAYS emitted (OpenEvolve's evolution_history.txt has this header
# unconditionally; the content is empty when there are no prior attempts). Each renders what it
# CHANGED, per-metric performance, and an outcome judged vs THAT program's own parent.
recent = pool[:3]
lines = []
for i, g in enumerate(recent, 1):
changes = g.metadata.get("changes") or "Unknown changes"
outcome = self._outcome(g) or "Mixed results"
lines.append(f"### Attempt {i}\n- Changes: {changes}\n"
f"- Metrics: {self._performance(g)}\n- Outcome: {outcome}")
out.append("## Previous Attempts\n\n" + "\n\n".join(lines))
# ## Top Performing Programs — ALSO always emitted (unconditional header in the reference
# template); top num_top_programs with score + snippet + key features, empty content when none.
top = pool[: self.num_top_programs]
blocks = [self._program_block(i + 1, g, "Performs well on") for i, g in enumerate(top)]
out.append("## Top Performing Programs\n\n" + "\n\n".join(blocks))
# ## Diverse Programs — num_diverse spread across the rest of the island (wires num_diverse)
# OpenEvolve draws the diverse set from the next ranks after the top programs
# (top_programs[num_top : num_top+num_diverse]) — near-elites, not a stride across the whole tail.
diverse = pool[self.num_top_programs : self.num_top_programs + self.num_diverse]
if self.num_diverse > 0 and diverse:
blocks = [self._program_block(f"D{i + 1}", g, "Alternative approach to")
for i, g in enumerate(diverse)]
out.append("## Diverse Programs\n\n" + "\n\n".join(blocks))
# ## Inspiration Programs — the selection's inspirations with Type + Unique approach
if selection.inspirations:
blocks = []
for i, g in enumerate(selection.inspirations, 1):
snippet = g.content[: self.max_snippet_chars]
blocks.append(f"### Inspiration {i} (Score: {self._fit(g):.4f}, Type: {self._program_type(g)})\n"
f"```python\n{snippet}\n```\nUnique approach: {self._unique_features(g)}")
out.append("## Inspiration Programs\n\n"
"These programs represent diverse approaches and creative solutions that may "
"inspire new ideas:\n\n" + "\n\n".join(blocks))
return "\n\n".join(out)
def _program_block(self, number, g: Genome, prefix: str) -> str:
snippet = g.content[: self.max_snippet_chars]
feats = ", ".join(f"{prefix} {k}" for k in list(g.scores)[:2]) or "—"
return (f"### Program {number} (Score: {self._fit(g):.4f})\n"
f"```python\n{snippet}\n```\nKey features: {feats}")
def _program_type(self, g: Genome) -> str:
if g.metadata.get("migrant"):
return "Migrant"
s = self._fit(g)
if s >= 0.8:
return "High-Performer"
if s >= 0.6:
return "Alternative"
if s >= 0.4:
return "Experimental"
return "Exploratory"
def _unique_features(self, g: Genome) -> str:
feats: list[str] = []
for name, value in g.scores.items():
if isinstance(value, (int, float)) and not isinstance(value, bool):
if value >= 0.9:
feats.append(f"Excellent {name} ({value:.4f})")
elif value <= 0.3:
feats.append(f"Alternative approach to {name}")
code = g.content.lower()
if "numpy" in code or "np." in code:
feats.append("Uses numpy operations")
if "class" in code and "def __init__" in code:
feats.append("Object-oriented structure")
if not feats:
feats.append(f"{self._program_type(g)} solution worth examining")
return ", ".join(feats[: self.num_top_programs])
# ---- shared (per-program performance + outcome vs own parent) ---------------------------
def _performance(self, g: Genome) -> str:
parts = []
for k, v in g.scores.items():
parts.append(f"{k}: {v:.4f}" if isinstance(v, (int, float)) and not isinstance(v, bool)
else f"{k}: {v}")
return ", ".join(parts) if parts else f"{self._fit(g):.4f}"
def _outcome(self, g: Genome) -> str | None:
parent_metrics = g.metadata.get("parent_metrics")
if not isinstance(parent_metrics, dict) or not parent_metrics:
return None
improved = regressed = 0
for k, v in g.scores.items():
if k in self.feature_dimensions:
continue
pv = parent_metrics.get(k)
if not isinstance(v, (int, float)) or isinstance(v, bool):
continue
if not isinstance(pv, (int, float)) or isinstance(pv, bool):
continue
if v > pv:
improved += 1
elif v < pv:
regressed += 1
if improved and not regressed:
return "Improvement in all metrics"
if regressed and not improved:
return "Regression in all metrics"
if improved or regressed:
return "Mixed results"
return "Mixed results"