OpenEvolve/openevolve

OpenEvolve

Island-model MAP-Elites evolutionary search with diff mutation (the open AlphaEvolve).

Test-time searchApache-2.0

"""OpenEvolve PromptBuilder component — a faithful port of the live OpenEvolve ``diff_user`` template (``openevolve/prompts/defaults/diff_user.txt`` + ``evolution_history.txt`` + ``previous_attempt.txt`` + ``top_program.txt`` + ``inspiration_program.txt`` + ``fragments.json``), not the older in-file string constants. User message, in order: ``# Current Program Information`` (Fitness / Feature coordinates / Focus areas) → ``## Last Execution Output`` (artifacts) → ``# Program Evolution History`` (``## Previous Attempts`` + ``## Top Performing Programs`` + ``## Diverse Programs`` + ``## Inspiration Programs``) → ``# Current Program`` (the parent, kept LAST so a diff Proposer can locate it) → ``# Task`` (the SEARCH/REPLACE instruction). """ from __future__ import annotations from ...components.prompt import PromptBuilder from ...models.base import Prompt from ...records import Genome, RunState, Selection from .population import _fitness # openevolve/prompts/defaults/system_message.txt (the live default; the diff format lives in the user # Task) _OPENEVOLVE_SYSTEM = ( "You are an expert software developer tasked with iteratively improving a codebase.\n" "Your goal is to maximize the FITNESS SCORE while exploring diverse solutions across feature " "dimensions.\n" "The system maintains a collection of diverse programs - both high fitness AND diversity are valuable." ) # diff_user.txt # Task section (verbatim) _TASK_DIFF = ( "# Task\n" "Suggest improvements to the program that will improve its FITNESS SCORE.\n" "The system maintains diversity across these dimensions: {dims}\n" "Different solutions with similar fitness but different features are valuable.\n\n" "You MUST use the exact SEARCH/REPLACE diff format shown below to indicate changes:\n\n" "<<<<<<< SEARCH\n" "# Original code to find and replace (must match exactly)\n" "=======\n" "# New replacement code\n" ">>>>>>> REPLACE\n\n" "Example of valid diff format:\n" "<<<<<<< SEARCH\n" "for i in range(m):\n" " for j in range(p):\n" " for k in range(n):\n" " C[i, j] += A[i, k] * B[k, j]\n" "=======\n" "# Reorder loops for better memory access pattern\n" "for i in range(m):\n" " for k in range(n):\n" " for j in range(p):\n" " C[i, j] += A[i, k] * B[k, j]\n" ">>>>>>> REPLACE\n\n" "You can suggest multiple changes. Each SEARCH section must exactly match code in the current " "program.\nBe thoughtful about your changes and explain your reasoning thoroughly.\n\n" "IMPORTANT: Do not rewrite the entire program - focus on targeted improvements." ) # full_rewrite_user.txt # Task section (verbatim) — used when general.mutation_approach="full_rewrite" _TASK_REWRITE = ( "# Task\n" "Rewrite the program to improve its FITNESS SCORE.\n" "The system maintains diversity across these dimensions: {dims}\n" "Different solutions with similar fitness but different features are valuable.\n" "Provide the complete new program code.\n\n" "IMPORTANT: Make sure your rewritten program maintains the same inputs and outputs\n" "as the original program, but with improved internal implementation.\n\n" "```python\n# Your rewritten program here\n```" ) class OpenEvolvePromptBuilder(PromptBuilder): def __init__(self, num_top_programs: int = 3, num_diverse: int = 2, max_artifact_bytes: int = 20480, max_snippet_chars: int | None = None, suggest_simplification_after_chars: int = 500, feature_dimensions: list[str] | None = None): # max_snippet_chars=None → full program code in history sections (OpenEvolve does not truncate # per-program code; only artifacts are byte-capped). self.num_top_programs = num_top_programs self.num_diverse = num_diverse self.max_artifact_bytes = max_artifact_bytes self.max_snippet_chars = max_snippet_chars self.suggest_simplification_after_chars = suggest_simplification_after_chars self.feature_dimensions = list(feature_dimensions or ["complexity", "diversity"]) def _fit(self, genome: Genome) -> float: return _fitness(genome, self.feature_dimensions) def build(self, selection: Selection, memory=None, state: RunState | None = None) -> Prompt: parent = selection.parent # OpenEvolve conveys the task via a task-specific system message; we append the task context there. system = _OPENEVOLVE_SYSTEM if state and state.task_context: system = f"{system}\n\n{state.task_context}" if parent is None: # delegated selection (not used by OpenEvolve, kept for safety) return Prompt(system=system, user=(state.task_context if state else "")) dims = ", ".join(self.feature_dimensions) if self.feature_dimensions else "None" sections: list[str] = [] # --- # Current Program Information --- sections.append( "# Current Program Information\n" f"- Fitness: {self._fit(parent):.4f}\n" f"- Feature coordinates: {self._feature_coords(parent)}\n" f"- Focus areas: {self._improvement_areas(parent, selection)}" ) # --- ## Last Execution Output (artifacts) --- artifacts = self._render_artifacts(parent) if artifacts: sections.append(artifacts) # --- # Program Evolution History --- (## Previous Attempts + ## Top Performing Programs are # always present, matching OpenEvolve's evolution_history.txt; Diverse/Inspiration are conditional) sections.append("# Program Evolution History\n" + self._evolution_history(selection)) # --- # Current Program --- (MUST be the last fenced python block) sections.append(f"# Current Program\n```python\n{parent.content}\n```") # --- # Task --- (diff_user.txt vs full_rewrite_user.txt, switched by the base PromptBuilder) sections.append(self.by_approach(_TASK_DIFF, _TASK_REWRITE).format(dims=dims)) return Prompt(system=system, user="\n\n".join(sections)) # ---- # Current Program Information helpers ---------------------------------------------- def _feature_coords(self, parent: Genome) -> str: """format_feature_coordinates: raw values of the feature dims that the evaluator returned.""" parts = [] for dim in self.feature_dimensions: v = parent.scores.get(dim) if isinstance(v, (int, float)) and not isinstance(v, bool): parts.append(f"{dim}={float(v):.2f}") elif v is not None: parts.append(f"{dim}={v}") return ", ".join(parts) def _improvement_areas(self, parent: Genome, selection: Selection) -> str: """_identify_improvement_areas: fitness trend vs the most recent previous attempt + feature exploration note + code-length hint, else default guidance (one ``- `` line each).""" areas: list[str] = [] cur = self._fit(parent) # OpenEvolve compares to previous_programs[-1] = the LAST of the island top-N (includes parent), # not the lowest island member. top = sorted(selection.pool, key=self._fit, reverse=True)[: self.num_top_programs] if top: pf = self._fit(top[-1]) if cur > pf: areas.append(f"Fitness improved: {pf:.4f} → {cur:.4f}") elif cur < pf: areas.append(f"Fitness declined: {pf:.4f} → {cur:.4f}. Consider revising recent changes.") else: areas.append(f"Fitness unchanged at {cur:.4f}") coords = self._feature_coords(parent) areas.append(f"Exploring {coords} region of solution space" if coords else "No feature coordinates") if self.suggest_simplification_after_chars and len(parent.content) > self.suggest_simplification_after_chars: areas.append(f"Consider simplifying - code length exceeds " f"{self.suggest_simplification_after_chars} characters") if not areas: areas.append("Focus on improving fitness while maintaining diversity") return "\n".join(f"- {a}" for a in areas) def _render_artifacts(self, parent: Genome) -> str: """_render_artifacts: render every artifact key (not just text_feedback) under one section.""" arts = {k: v for k, v in parent.artifacts.items() if k != "response"} if not arts: return "" blocks = [] for key, value in arts.items(): content = str(value) if len(content) > self.max_artifact_bytes: content = content[: self.max_artifact_bytes] + "\n... (truncated)" blocks.append(f"### {key}\n```\n{content}\n```") return "## Last Execution Output\n\n" + "\n\n".join(blocks) # ---- # Program Evolution History -------------------------------------------------------- def _evolution_history(self, selection: Selection) -> str: parent = selection.parent # selection.pool is the parent's ISLAND (set by OpenEvolveSelectionPolicy), fitness-sorted. # OpenEvolve's previous_programs/top_programs INCLUDE the parent (it can appear in the history # AND as "# Current Program"), so we do not exclude it. pool = sorted(selection.pool, key=self._fit, reverse=True) out: list[str] = [] # ## Previous Attempts — ALWAYS emitted (OpenEvolve's evolution_history.txt has this header # unconditionally; the content is empty when there are no prior attempts). Each renders what it # CHANGED, per-metric performance, and an outcome judged vs THAT program's own parent. recent = pool[:3] lines = [] for i, g in enumerate(recent, 1): changes = g.metadata.get("changes") or "Unknown changes" outcome = self._outcome(g) or "Mixed results" lines.append(f"### Attempt {i}\n- Changes: {changes}\n" f"- Metrics: {self._performance(g)}\n- Outcome: {outcome}") out.append("## Previous Attempts\n\n" + "\n\n".join(lines)) # ## Top Performing Programs — ALSO always emitted (unconditional header in the reference # template); top num_top_programs with score + snippet + key features, empty content when none. top = pool[: self.num_top_programs] blocks = [self._program_block(i + 1, g, "Performs well on") for i, g in enumerate(top)] out.append("## Top Performing Programs\n\n" + "\n\n".join(blocks)) # ## Diverse Programs — num_diverse spread across the rest of the island (wires num_diverse) # OpenEvolve draws the diverse set from the next ranks after the top programs # (top_programs[num_top : num_top+num_diverse]) — near-elites, not a stride across the whole tail. diverse = pool[self.num_top_programs : self.num_top_programs + self.num_diverse] if self.num_diverse > 0 and diverse: blocks = [self._program_block(f"D{i + 1}", g, "Alternative approach to") for i, g in enumerate(diverse)] out.append("## Diverse Programs\n\n" + "\n\n".join(blocks)) # ## Inspiration Programs — the selection's inspirations with Type + Unique approach if selection.inspirations: blocks = [] for i, g in enumerate(selection.inspirations, 1): snippet = g.content[: self.max_snippet_chars] blocks.append(f"### Inspiration {i} (Score: {self._fit(g):.4f}, Type: {self._program_type(g)})\n" f"```python\n{snippet}\n```\nUnique approach: {self._unique_features(g)}") out.append("## Inspiration Programs\n\n" "These programs represent diverse approaches and creative solutions that may " "inspire new ideas:\n\n" + "\n\n".join(blocks)) return "\n\n".join(out) def _program_block(self, number, g: Genome, prefix: str) -> str: snippet = g.content[: self.max_snippet_chars] feats = ", ".join(f"{prefix} {k}" for k in list(g.scores)[:2]) or "—" return (f"### Program {number} (Score: {self._fit(g):.4f})\n" f"```python\n{snippet}\n```\nKey features: {feats}") def _program_type(self, g: Genome) -> str: if g.metadata.get("migrant"): return "Migrant" s = self._fit(g) if s >= 0.8: return "High-Performer" if s >= 0.6: return "Alternative" if s >= 0.4: return "Experimental" return "Exploratory" def _unique_features(self, g: Genome) -> str: feats: list[str] = [] for name, value in g.scores.items(): if isinstance(value, (int, float)) and not isinstance(value, bool): if value >= 0.9: feats.append(f"Excellent {name} ({value:.4f})") elif value <= 0.3: feats.append(f"Alternative approach to {name}") code = g.content.lower() if "numpy" in code or "np." in code: feats.append("Uses numpy operations") if "class" in code and "def __init__" in code: feats.append("Object-oriented structure") if not feats: feats.append(f"{self._program_type(g)} solution worth examining") return ", ".join(feats[: self.num_top_programs]) # ---- shared (per-program performance + outcome vs own parent) --------------------------- def _performance(self, g: Genome) -> str: parts = [] for k, v in g.scores.items(): parts.append(f"{k}: {v:.4f}" if isinstance(v, (int, float)) and not isinstance(v, bool) else f"{k}: {v}") return ", ".join(parts) if parts else f"{self._fit(g):.4f}" def _outcome(self, g: Genome) -> str | None: parent_metrics = g.metadata.get("parent_metrics") if not isinstance(parent_metrics, dict) or not parent_metrics: return None improved = regressed = 0 for k, v in g.scores.items(): if k in self.feature_dimensions: continue pv = parent_metrics.get(k) if not isinstance(v, (int, float)) or isinstance(v, bool): continue if not isinstance(pv, (int, float)) or isinstance(pv, bool): continue if v > pv: improved += 1 elif v < pv: regressed += 1 if improved and not regressed: return "Improvement in all metrics" if regressed and not improved: return "Regression in all metrics" if improved or regressed: return "Mixed results" return "Mixed results"