default/topk

Top-K
Always expand the single best program, with the next K as context. Pure greedy elitism.
Test-time searchApache-2.0
29 lines · 1.5 KBpythonDownload
"""Top-K SelectionPolicy component — parent = rank-1, inspirations = ranks 2..K+1 (greedy elitism)."""
from __future__ import annotations

from ...components.selection import SelectionPolicy
from ...records import RunState, Selection


class TopKPolicy(SelectionPolicy):
    """Port of ``TopKDatabase.sample``: the best program is the parent, the next K are context.

    Stateless — every :meth:`select` re-ranks the population by ``combined_score`` and slices the top
    ``num_inspirations + 1``. On the lone-seed step the seed is used as its own single inspiration
    (mirroring ``TopKDatabase.sample``'s ``context_programs=[top_programs[0]]``), so inspirations are
    never empty. There is no adaptive state to update, so ``observe`` is the inherited no-op.
    """

    def __init__(self, seed: int = 0, num_inspirations: int = 4):
        super().__init__(seed)
        self.num_inspirations = num_inspirations

    def select(self, population, state: RunState | None = None) -> Selection:
        top = population.query({"top": self.num_inspirations + 1})
        if not top:
            raise RuntimeError("cannot select from an empty population")
        parent = top[0]                                    # rank 1 = best
        inspirations = top[1 : self.num_inspirations + 1]  # ranks 2..K+1
        if not inspirations:
            inspirations = [parent]   # lone-seed step: SkyDiscover uses the only program as its own context
        return Selection(parent=parent, inspirations=inspirations, pool=population.all())