Maintain a fixed-width beam of promising programs; expand one per step, prune by fitness+diversity.
"""Beam Search SelectionPolicy component — pick the parent from the beam by one of four strategies.
The sampling side of SkyDiscover's ``BeamSearchDatabase`` (``sample`` + ``_select_best`` /
``_select_stochastic`` / ``_select_round_robin`` / ``_select_diversity_weighted``). Reads the beam
maintained by :class:`~galapagos.scaffolds.beam_search.population.BeamPopulation` and shares its
fitness (``population.score``) and diversity metric (``_code_distance``).
"""
from __future__ import annotations
import math
from ...components.selection import SelectionPolicy
from ...records import Genome, RunState, Selection
from .population import _code_distance
class BeamSelectionPolicy(SelectionPolicy):
"""Draw the parent from the current beam by ``strategy``; inspirations are the global top
programs, excluding the parent.
Strategies: ``best`` (highest score), ``round_robin`` (cycle the beam in score order),
``stochastic`` (softmax over score at ``temperature``), ``diversity_weighted`` (softmax over
``(1-w)·score + w·avg_distance_to_recently_expanded``). The recently-expanded parents are the
policy's adaptive state; all randomness flows through ``self.rng``.
"""
def __init__(self, seed: int = 0, strategy: str = "diversity_weighted", temperature: float = 1.0,
diversity_weight: float = 0.3, num_inspirations: int = 4):
super().__init__(seed)
self.strategy = strategy
self.temperature = temperature
self.diversity_weight = diversity_weight
self.num_inspirations = num_inspirations
self.expanded: list[Genome] = [] # recently expanded parents (diversity_weighted lookback)
self._rr = 0
def select(self, population, state: RunState | None = None) -> Selection:
beam = population.beam_members()
if not beam:
best = population.best()
if best is None:
raise RuntimeError("cannot select from an empty population")
beam = [best]
parent = self._pick(beam, population)
self.expanded.append(parent)
self.expanded = self.expanded[-50:]
# inspirations = global top-N by RAW combined_score. SkyDiscover's get_top_programs (context
# programs) ranks by the base get_score with NO depth penalty — the depth penalty applies only
# to beam pruning and parent selection (population.score). Rank by genome.fitness here so a
# non-zero beam_depth_penalty never leaks into context selection. (Identical to ranking by
# population.score when beam_depth_penalty == 0, the default.)
top = sorted(population.all(), key=lambda g: g.fitness, reverse=True)[: self.num_inspirations + 1]
inspirations = [g for g in top if g.id != parent.id][: self.num_inspirations]
return Selection(parent=parent, inspirations=inspirations, pool=population.all())
def _pick(self, beam: list[Genome], population) -> Genome:
if len(beam) == 1 or self.strategy == "best":
return max(beam, key=population.score)
if self.strategy == "round_robin":
ordered = sorted(beam, key=population.score, reverse=True)
g = ordered[self._rr % len(ordered)]
self._rr += 1
return g
if self.strategy == "diversity_weighted":
return self._diversity_weighted(beam, population)
return self._softmax_pick(beam, [population.score(g) for g in beam]) # "stochastic" / unknown
def _diversity_weighted(self, beam: list[Genome], population) -> Genome:
recent = self.expanded[-10:]
if not recent:
return self._softmax_pick(beam, [population.score(g) for g in beam])
combined = []
for g in beam:
diversity = sum(_code_distance(g.content, r.content) for r in recent) / len(recent)
combined.append((1 - self.diversity_weight) * population.score(g) + self.diversity_weight * diversity)
return self._softmax_pick(beam, combined)
def _softmax_pick(self, beam: list[Genome], scores: list[float]) -> Genome:
if self.temperature <= 0:
return beam[scores.index(max(scores))]
m = max(scores)
weights = [math.exp((s - m) / self.temperature) for s in scores]
total = sum(weights) or 1.0
r = self.rng.random() * total
upto = 0.0
for g, w in zip(beam, weights):
upto += w
if upto >= r:
return g
return beam[-1]