"""Reusable, typed recipe builders for common lab workflows."""
from __future__ import annotations
import re
from dataclasses import dataclass, replace
from pathlib import Path
from typing import Any
from .bundles import BenchmarkBundle, ideation_bundle, optimization_bundle
from .conditions import Constraint, Factor, FactorKind, Level
from .hypotheses import AnalysisPlan, Hypothesis, HypothesisDirection, HypothesisKind, OutcomeSpec
from .study import Block, RunBudget, SeedPolicy, Study
_AGENT_FACTOR_NAMES = frozenset({"agent_id", "agent", "agent_spec"})
_PROBLEM_FACTOR_NAMES = frozenset({"problem_id", "problem"})
[docs]
@dataclass(slots=True)
class RecipeStudyConfig:
"""Shared typed overrides for recipe study construction.
Any field set to ``None`` keeps the recipe default. Any field set to a non-``None``
value replaces that section of the study definition wholesale.
"""
study_id: str | None = None
title: str | None = None
description: str | None = None
authors: tuple[str, ...] | None = None
rationale: str | None = None
tags: tuple[str, ...] | None = None
hypotheses: tuple[Hypothesis, ...] | None = None
factors: tuple[Factor, ...] | None = None
blocks: tuple[Block, ...] | None = None
constraints: tuple[Constraint, ...] | None = None
design_spec: dict[str, Any] | None = None
outcomes: tuple[OutcomeSpec, ...] | None = None
analysis_plans: tuple[AnalysisPlan, ...] | None = None
run_budget: RunBudget | None = None
seed_policy: SeedPolicy | None = None
output_dir: Path | None = None
provenance_metadata: dict[str, Any] | None = None
notes: str | None = None
problem_ids: tuple[str, ...] | None = None
agent_specs: tuple[str, ...] | None = None
primary_outcomes: tuple[str, ...] | None = None
secondary_outcomes: tuple[str, ...] | None = None
bundle: BenchmarkBundle | None = None
[docs]
@dataclass(slots=True)
class AgentArchitectureComparisonConfig(RecipeStudyConfig):
"""Overrides for the agent architecture comparison recipe."""
[docs]
@dataclass(slots=True)
class PromptFramingConfig(RecipeStudyConfig):
"""Overrides for the prompt framing recipe."""
[docs]
@dataclass(slots=True)
class GrammarScaffoldConfig(RecipeStudyConfig):
"""Overrides for the grammar scaffold recipe."""
[docs]
@dataclass(slots=True)
class HumanVsAgentProcessConfig(RecipeStudyConfig):
"""Overrides for the human-vs-agent process recipe."""
[docs]
@dataclass(slots=True)
class DiversityAndExplorationConfig(RecipeStudyConfig):
"""Overrides for the diversity and exploration recipe."""
[docs]
@dataclass(slots=True)
class OptimizationBenchmarkConfig(RecipeStudyConfig):
"""Overrides for the optimization benchmark recipe."""
[docs]
@dataclass(slots=True)
class ComparisonStudyConfig(RecipeStudyConfig):
"""Overrides for comparison-study recipe scaffolds."""
comparison_factor: Factor | None = None
secondary_factor: Factor | None = None
[docs]
@dataclass(slots=True)
class UnivariateComparisonConfig(ComparisonStudyConfig):
"""Overrides for the univariate comparison recipe."""
[docs]
@dataclass(slots=True)
class BivariateComparisonConfig(ComparisonStudyConfig):
"""Overrides for the bivariate comparison recipe."""
[docs]
@dataclass(slots=True)
class StrategyComparisonConfig(ComparisonStudyConfig):
"""Overrides for the packaged-problem strategy comparison recipe."""
def _default_outcomes() -> tuple[OutcomeSpec, ...]:
"""Return baseline outcomes used by recipe scaffolds."""
return (
OutcomeSpec(
name="primary_outcome",
source_table="runs",
column="primary_outcome",
aggregation="mean",
primary=True,
expected_type="float",
description="Primary study objective metric.",
),
OutcomeSpec(
name="latency_s",
source_table="runs",
column="latency_s",
aggregation="mean",
primary=False,
expected_type="float",
description="Run latency in seconds.",
),
)
def _default_analysis_plan(
hypothesis_id: str,
*,
tests: tuple[str, ...] = ("difference_in_means", "regression"),
plots: tuple[str, ...] = ("condition_means",),
export_tables: tuple[str, ...] = ("summary_by_condition",),
random_effects: tuple[str, ...] = (),
) -> AnalysisPlan:
"""Return a compact default analysis plan."""
return AnalysisPlan(
analysis_plan_id="ap1",
hypothesis_ids=(hypothesis_id,),
tests=tests,
outcomes=("primary_outcome",),
random_effects=random_effects,
plots=plots,
export_tables=export_tables,
multiple_comparison_policy="holm",
)
def _apply_recipe_config(study: Study, config: RecipeStudyConfig | None) -> Study:
"""Apply typed config overrides to a default recipe study."""
if config is None:
return study
resolved_study_id = config.study_id if config.study_id is not None else study.study_id
study_output_dir = (
study.output_dir if study.output_dir is not None else Path("artifacts") / study.study_id
)
if config.output_dir is not None:
resolved_output_dir = config.output_dir
elif config.study_id is not None:
resolved_output_dir = Path("artifacts") / resolved_study_id
else:
resolved_output_dir = study_output_dir
bundle_problem_ids = (
config.bundle.problem_ids if config.bundle is not None else study.problem_ids
)
bundle_agent_specs = (
config.bundle.agent_specs if config.bundle is not None else study.agent_specs
)
return Study(
study_id=resolved_study_id,
title=config.title if config.title is not None else study.title,
description=config.description if config.description is not None else study.description,
authors=config.authors if config.authors is not None else study.authors,
rationale=config.rationale if config.rationale is not None else study.rationale,
tags=config.tags if config.tags is not None else study.tags,
hypotheses=config.hypotheses if config.hypotheses is not None else study.hypotheses,
factors=config.factors if config.factors is not None else study.factors,
blocks=config.blocks if config.blocks is not None else study.blocks,
constraints=config.constraints if config.constraints is not None else study.constraints,
design_spec=dict(config.design_spec)
if config.design_spec is not None
else dict(study.design_spec),
outcomes=config.outcomes if config.outcomes is not None else study.outcomes,
analysis_plans=(
config.analysis_plans if config.analysis_plans is not None else study.analysis_plans
),
run_budget=config.run_budget if config.run_budget is not None else study.run_budget,
seed_policy=config.seed_policy if config.seed_policy is not None else study.seed_policy,
output_dir=resolved_output_dir,
provenance_metadata=(
dict(config.provenance_metadata)
if config.provenance_metadata is not None
else dict(study.provenance_metadata)
),
notes=config.notes if config.notes is not None else study.notes,
problem_ids=config.problem_ids if config.problem_ids is not None else bundle_problem_ids,
agent_specs=config.agent_specs if config.agent_specs is not None else bundle_agent_specs,
primary_outcomes=(
config.primary_outcomes
if config.primary_outcomes is not None
else study.primary_outcomes
),
secondary_outcomes=(
config.secondary_outcomes
if config.secondary_outcomes is not None
else study.secondary_outcomes
),
)
def _comparison_level(
*,
name: str,
value: str,
label: str | None = None,
is_baseline: bool = False,
role: str | None = None,
metadata: dict[str, Any] | None = None,
) -> Level:
"""Build one comparison-aware level with baseline/treatment metadata."""
resolved_metadata = dict(metadata or {})
resolved_metadata.setdefault("role", role or ("baseline" if is_baseline else "treatment"))
resolved_metadata.setdefault("is_baseline", is_baseline)
return Level(name=name, value=value, label=label, metadata=resolved_metadata)
def _comparison_factor(
*,
name: str,
description: str,
levels: tuple[Level, ...],
) -> Factor:
"""Build one manipulated factor annotated as a comparison axis."""
return Factor(
name=name,
description=description,
kind=FactorKind.MANIPULATED,
levels=levels,
metadata={"comparison_axis": True},
)
def _slug_level_name(value: str, *, index: int, seen: set[str]) -> str:
"""Normalize a stable level name and keep duplicates unique."""
base = re.sub(r"[^a-z0-9]+", "_", value.lower()).strip("_") or f"level_{index}"
candidate = base
suffix = 2
while candidate in seen:
candidate = f"{base}_{suffix}"
suffix += 1
seen.add(candidate)
return candidate
def _strategy_factor_from_bundle(bundle: BenchmarkBundle) -> Factor:
"""Build a default agent-strategy comparison factor from one benchmark bundle."""
baseline_index = 0
for index, agent_spec in enumerate(bundle.agent_specs):
lowered = agent_spec.lower()
if "baseline" in lowered or "random" in lowered:
baseline_index = index
break
seen: set[str] = set()
levels = tuple(
_comparison_level(
name=_slug_level_name(agent_spec, index=index, seen=seen),
value=agent_spec,
label=agent_spec.replace("-", " ").replace("_", " ").title(),
is_baseline=index == baseline_index,
)
for index, agent_spec in enumerate(bundle.agent_specs)
)
return _comparison_factor(
name="agent_id",
description="Agent strategy or runtime binding under comparison.",
levels=levels,
)
def _resolve_comparison_factors(
*,
config: ComparisonStudyConfig | None,
default_factors: tuple[Factor, ...],
max_factors: int | None = None,
) -> tuple[Factor, ...]:
"""Resolve comparison factors with explicit precedence rules."""
if config is not None and config.factors is not None:
return config.factors
resolved = list(default_factors)
if config is not None:
if config.comparison_factor is not None:
if resolved:
resolved[0] = config.comparison_factor
else:
resolved.append(config.comparison_factor)
if config.secondary_factor is not None:
if len(resolved) >= 2:
resolved[1] = config.secondary_factor
elif len(resolved) == 1:
resolved.append(config.secondary_factor)
else:
resolved.extend(
factor
for factor in (config.comparison_factor, config.secondary_factor)
if factor is not None
)
if max_factors is not None:
return tuple(resolved[:max_factors])
return tuple(resolved)
def _comparison_hypothesis(
*,
factor_names: tuple[str, ...],
label: str,
statement: str,
kind: HypothesisKind = HypothesisKind.EFFECT,
) -> Hypothesis:
"""Build a default comparison hypothesis."""
return Hypothesis(
hypothesis_id="h1",
label=label,
statement=statement,
kind=kind,
independent_vars=factor_names,
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.DIFFERENT,
linked_analysis_plan_id="ap1",
)
def _finalize_comparison_bindings(
study: Study,
*,
config: ComparisonStudyConfig | None,
selected_bundle: BenchmarkBundle,
default_problem_ids: tuple[str, ...],
default_agent_specs: tuple[str, ...],
) -> Study:
"""Normalize bundle bindings so factor-bound comparisons do not Cartesian-expand."""
factor_names = {factor.name for factor in study.factors}
problem_ids = study.problem_ids
agent_specs = study.agent_specs
if factor_names & _PROBLEM_FACTOR_NAMES:
problem_ids = ()
elif config is not None and config.problem_ids is None and config.bundle is not None:
problem_ids = selected_bundle.problem_ids
elif not problem_ids:
problem_ids = default_problem_ids
if factor_names & _AGENT_FACTOR_NAMES:
agent_specs = ()
elif not agent_specs or (
config is not None and config.agent_specs is None and config.bundle is not None
):
agent_specs = default_agent_specs
if problem_ids == study.problem_ids and agent_specs == study.agent_specs:
return study
return replace(study, problem_ids=problem_ids, agent_specs=agent_specs)
def _build_comparison_study(
*,
config: ComparisonStudyConfig | None,
study_id: str,
title: str,
description: str,
rationale: str,
tags: tuple[str, ...],
default_bundle: BenchmarkBundle,
default_factors: tuple[Factor, ...],
default_problem_ids: tuple[str, ...] | None = None,
default_agent_specs: tuple[str, ...] | None = None,
hypothesis_label: str,
hypothesis_statement: str,
hypothesis_kind: HypothesisKind = HypothesisKind.EFFECT,
analysis_plots: tuple[str, ...] = ("condition_means",),
analysis_export_tables: tuple[str, ...] = ("summary_by_condition",),
) -> Study:
"""Assemble a comparison-study scaffold and apply typed overrides."""
selected_bundle = (
config.bundle if config is not None and config.bundle is not None else default_bundle
)
resolved_factors = _resolve_comparison_factors(
config=config,
default_factors=default_factors,
)
factor_names = tuple(factor.name for factor in resolved_factors)
resolved_problem_ids = default_problem_ids or selected_bundle.problem_ids
resolved_agent_specs: tuple[str, ...]
if default_agent_specs is None:
resolved_agent_specs = (
(selected_bundle.agent_specs[0],) if selected_bundle.agent_specs else ("default-agent",)
)
else:
resolved_agent_specs = default_agent_specs
defaults = Study(
study_id=study_id,
title=title,
description=description,
authors=("Design Research Collective",),
rationale=rationale,
tags=tags,
hypotheses=(
_comparison_hypothesis(
factor_names=factor_names,
label=hypothesis_label,
statement=hypothesis_statement,
kind=hypothesis_kind,
),
),
factors=resolved_factors,
design_spec={"kind": "constrained_factorial", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(
_default_analysis_plan(
"h1",
plots=analysis_plots,
export_tables=analysis_export_tables,
),
),
run_budget=RunBudget(replicates=2, parallelism=1),
seed_policy=SeedPolicy(base_seed=37),
output_dir=Path("artifacts") / study_id,
problem_ids=resolved_problem_ids,
agent_specs=resolved_agent_specs,
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
configured = _apply_recipe_config(defaults, config)
return _finalize_comparison_bindings(
configured,
config=config,
selected_bundle=selected_bundle,
default_problem_ids=resolved_problem_ids,
default_agent_specs=resolved_agent_specs,
)
[docs]
def build_univariate_comparison_study(
config: UnivariateComparisonConfig | None = None,
) -> Study:
"""Build a one-factor comparison study scaffold over packaged problems."""
bundle = ideation_bundle()
return _build_comparison_study(
config=config,
study_id="univariate-comparison",
title="Univariate Comparison Study",
description="Compare one manipulated condition across a packaged problem bundle.",
rationale="Provide a compact scaffold for one-axis benchmark comparisons.",
tags=("comparison", "univariate"),
default_bundle=bundle,
default_factors=(
_comparison_factor(
name="comparison_arm",
description="Primary comparison arm.",
levels=(
_comparison_level(
name="baseline",
value="baseline",
label="Baseline",
is_baseline=True,
),
_comparison_level(name="treatment", value="treatment", label="Treatment"),
),
),
),
hypothesis_label="Univariate Comparison Effect",
hypothesis_statement="The comparison arm changes the primary outcome.",
)
[docs]
def build_bivariate_comparison_study(
config: BivariateComparisonConfig | None = None,
) -> Study:
"""Build a two-factor comparison study scaffold over packaged problems."""
bundle = ideation_bundle()
return _build_comparison_study(
config=config,
study_id="bivariate-comparison",
title="Bivariate Comparison Study",
description="Compare two manipulated axes across a packaged problem bundle.",
rationale="Provide a reusable scaffold for pairwise comparison designs and interactions.",
tags=("comparison", "bivariate"),
default_bundle=bundle,
default_factors=(
_comparison_factor(
name="comparison_arm",
description="Primary comparison arm.",
levels=(
_comparison_level(
name="baseline",
value="baseline",
label="Baseline",
is_baseline=True,
),
_comparison_level(name="treatment", value="treatment", label="Treatment"),
),
),
_comparison_factor(
name="prompt_regime",
description="Secondary comparison axis.",
levels=(
_comparison_level(
name="standard",
value="standard",
label="Standard",
is_baseline=True,
),
_comparison_level(name="structured", value="structured", label="Structured"),
),
),
),
hypothesis_label="Bivariate Comparison Effect",
hypothesis_statement="The comparison axes jointly change the primary outcome.",
analysis_plots=("condition_means", "interaction_means"),
analysis_export_tables=("summary_by_condition", "summary_by_interaction"),
)
[docs]
def build_strategy_comparison_study(
config: StrategyComparisonConfig | None = None,
) -> Study:
"""Build a packaged-problem strategy comparison study scaffold."""
bundle = (
config.bundle if config is not None and config.bundle is not None else optimization_bundle()
)
default_factor = (
config.comparison_factor
if config is not None and config.comparison_factor is not None
else _strategy_factor_from_bundle(bundle)
)
return _build_comparison_study(
config=config,
study_id="strategy-comparison",
title="Strategy Comparison Study",
description="Compare named agent strategies on packaged benchmark problems.",
rationale="Centralize canonical strategy-comparison wiring for packaged benchmarks.",
tags=("comparison", "strategy", "benchmark"),
default_bundle=bundle,
default_factors=(default_factor,),
default_problem_ids=bundle.problem_ids,
default_agent_specs=bundle.agent_specs,
hypothesis_label="Strategy Comparison Effect",
hypothesis_statement=(
"Agent strategy changes the primary outcome on the packaged benchmark."
),
hypothesis_kind=HypothesisKind.ROBUSTNESS,
)
[docs]
def build_agent_architecture_comparison_study(
config: AgentArchitectureComparisonConfig | None = None,
) -> Study:
"""Build a study comparing agent architecture choices across prompt difficulty."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Architecture Effect",
statement="Agent architecture changes primary outcome.",
kind=HypothesisKind.EFFECT,
independent_vars=("agent_architecture", "prompt_difficulty"),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.DIFFERENT,
linked_analysis_plan_id="ap1",
)
defaults = Study(
study_id="agent-architecture-comparison",
title="Agent Architecture Comparison",
description="Compare architecture variants across prompt difficulty manipulations.",
authors=("Design Research Collective",),
rationale="Benchmark architecture and workflow pattern differences.",
tags=("ideation", "architecture"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="agent_architecture",
description="Agent architecture family.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="direct", value="direct-llm"),
Level(name="multistep", value="multi-step"),
Level(name="reflective", value="reflective"),
),
),
Factor(
name="prompt_difficulty",
description="Prompt complexity level.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="easy", value="easy"),
Level(name="medium", value="medium"),
Level(name="hard", value="hard"),
),
),
),
blocks=(Block(name="problem_family", levels=("ideation", "optimization")),),
design_spec={"kind": "constrained_factorial", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(_default_analysis_plan("h1"),),
run_budget=RunBudget(replicates=2, parallelism=1),
seed_policy=SeedPolicy(base_seed=7),
output_dir=Path("artifacts") / "agent-architecture-comparison",
problem_ids=("problem-a", "problem-b"),
agent_specs=("direct-llm", "multi-step", "reflective"),
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
[docs]
def build_prompt_framing_study(config: PromptFramingConfig | None = None) -> Study:
"""Build an ideation study with framing and prompt manipulation."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Prompt Framing Effect",
statement="Prompt framing changes novelty and diversity outcomes.",
kind=HypothesisKind.EFFECT,
independent_vars=("prompt_frame", "prompt_difficulty"),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.DIFFERENT,
linked_analysis_plan_id="ap1",
)
defaults = Study(
study_id="prompt-framing-study",
title="Prompt Framing Study",
description="Measure framing effects in ideation tasks.",
authors=("Design Research Collective",),
rationale="Quantify framing manipulations in creative generation.",
tags=("ideation", "framing"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="prompt_frame",
description="Prompt framing style.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="neutral", value="neutral"),
Level(name="challenge", value="challenge"),
Level(name="analogy", value="analogy"),
),
),
Factor(
name="prompt_difficulty",
description="Prompt difficulty.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="low", value="low"),
Level(name="high", value="high"),
),
),
),
blocks=(Block(name="domain", levels=("mobility", "health")),),
design_spec={"kind": "randomized_block", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(_default_analysis_plan("h1"),),
run_budget=RunBudget(replicates=3, parallelism=1),
seed_policy=SeedPolicy(base_seed=11),
output_dir=Path("artifacts") / "prompt-framing-study",
problem_ids=("ideation-1", "ideation-2", "ideation-3"),
agent_specs=("baseline-agent", "creative-agent"),
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
[docs]
def build_grammar_scaffold_study(config: GrammarScaffoldConfig | None = None) -> Study:
"""Build a study comparing unconstrained and grammar-guided generation."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Grammar Guidance Effect",
statement="Grammar-guided generation improves primary outcome.",
kind=HypothesisKind.EFFECT,
independent_vars=("generation_mode",),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.GREATER,
linked_analysis_plan_id="ap1",
)
defaults = Study(
study_id="grammar-scaffold-study",
title="Grammar Scaffold Study",
description="Benchmark constrained vs unconstrained generation modes.",
authors=("Design Research Collective",),
rationale="Assess structural scaffolds for design-generation quality.",
tags=("grammar", "constrained-generation"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="generation_mode",
description="Generation scaffold type.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="free", value="unconstrained"),
Level(name="grammar", value="grammar-guided"),
Level(name="tool", value="tool-guided"),
),
),
),
blocks=(Block(name="problem_family", levels=("grammar", "text")),),
design_spec={"kind": "full_factorial", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(_default_analysis_plan("h1"),),
run_budget=RunBudget(replicates=2, parallelism=1),
seed_policy=SeedPolicy(base_seed=19),
output_dir=Path("artifacts") / "grammar-scaffold-study",
problem_ids=("grammar-1", "grammar-2"),
agent_specs=("direct-llm", "workflow-agent"),
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
[docs]
def build_human_vs_agent_process_study(config: HumanVsAgentProcessConfig | None = None) -> Study:
"""Build a study comparing human-only, AI-assisted, and hybrid teams."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Teaming Configuration Effect",
statement="Hybrid teams alter process traces and outcomes.",
kind=HypothesisKind.MODERATION,
independent_vars=("team_mode",),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.DIFFERENT,
linked_analysis_plan_id="ap1",
)
defaults = Study(
study_id="human-vs-agent-process",
title="Human vs Agent Process Study",
description="Capture communication and action traces across teaming modes.",
authors=("Design Research Collective",),
rationale="Compare human-only, AI-assisted, and hybrid workflows.",
tags=("teaming", "process-trace"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="team_mode",
description="Team configuration.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="human", value="human-only"),
Level(name="assist", value="ai-assisted"),
Level(name="hybrid", value="hybrid"),
),
),
),
blocks=(Block(name="cohort", levels=("novice", "expert")),),
design_spec={"kind": "repeated_measures", "counterbalance": True},
outcomes=_default_outcomes(),
analysis_plans=(_default_analysis_plan("h1"),),
run_budget=RunBudget(replicates=1, parallelism=1),
seed_policy=SeedPolicy(base_seed=23),
output_dir=Path("artifacts") / "human-vs-agent-process",
problem_ids=("teaming-1", "teaming-2"),
agent_specs=("human-only", "ai-assisted", "hybrid"),
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
[docs]
def build_diversity_and_exploration_study(
config: DiversityAndExplorationConfig | None = None,
) -> Study:
"""Build a study evaluating diversity and exploration outcomes."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Exploration Strategy Robustness",
statement="Exploration-heavy strategies increase diversity metrics.",
kind=HypothesisKind.ROBUSTNESS,
independent_vars=("search_strategy",),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.GREATER,
linked_analysis_plan_id="ap1",
)
defaults = Study(
study_id="diversity-exploration",
title="Diversity and Exploration Study",
description="Assess exploration strategies across benchmark families.",
authors=("Design Research Collective",),
rationale="Characterize diversity-quality tradeoffs.",
tags=("optimization", "diversity"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="search_strategy",
description="Exploration strategy.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="greedy", value="greedy"),
Level(name="epsilon", value="epsilon-greedy"),
Level(name="ucb", value="ucb"),
),
),
),
blocks=(Block(name="problem_family", levels=("bench-a", "bench-b", "bench-c")),),
design_spec={"kind": "randomized_block", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(_default_analysis_plan("h1"),),
run_budget=RunBudget(replicates=2, parallelism=1),
seed_policy=SeedPolicy(base_seed=29),
output_dir=Path("artifacts") / "diversity-exploration",
problem_ids=("opt-a", "opt-b", "opt-c"),
agent_specs=("deterministic", "self-learning"),
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
[docs]
def build_optimization_benchmark_study(
config: OptimizationBenchmarkConfig | None = None,
) -> Study:
"""Build a benchmark study for optimization generalization and learning effects."""
hypothesis = Hypothesis(
hypothesis_id="h1",
label="Learning Strategy Generalization",
statement="Self-learning agents outperform deterministic baselines across families.",
kind=HypothesisKind.ROBUSTNESS,
independent_vars=("learning_strategy", "tuning_regime"),
dependent_vars=("primary_outcome",),
direction=HypothesisDirection.GREATER,
linked_analysis_plan_id="ap1",
)
bundle = optimization_bundle()
defaults = Study(
study_id="optimization-benchmark",
title="Optimization Benchmark Study",
description=(
"Compare self-learning and deterministic baselines across optimization "
"families and tuning regimes."
),
authors=("Design Research Collective",),
rationale="Measure optimization quality, robustness, and cross-family generalization.",
tags=("optimization", "benchmark", "generalization"),
hypotheses=(hypothesis,),
factors=(
Factor(
name="learning_strategy",
description="Agent learning approach.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="deterministic", value="deterministic-baseline"),
Level(name="self_learning", value="self-learning-agent"),
),
),
Factor(
name="tuning_regime",
description="Hyperparameter tuning regime.",
kind=FactorKind.MANIPULATED,
levels=(
Level(name="conservative", value="conservative"),
Level(name="aggressive", value="aggressive"),
),
),
),
blocks=(Block(name="problem_family", levels=("small", "medium", "large")),),
design_spec={"kind": "randomized_block", "randomize": True},
outcomes=_default_outcomes(),
analysis_plans=(
AnalysisPlan(
analysis_plan_id="ap1",
hypothesis_ids=("h1",),
tests=("difference_in_means", "mixed_effects"),
outcomes=("primary_outcome",),
random_effects=("problem_family",),
plots=("family_condition_means",),
export_tables=("optimization_summary",),
multiple_comparison_policy="holm",
),
),
run_budget=RunBudget(replicates=2, parallelism=1),
seed_policy=SeedPolicy(base_seed=31),
output_dir=Path("artifacts") / "optimization-benchmark",
problem_ids=bundle.problem_ids,
agent_specs=bundle.agent_specs,
primary_outcomes=("primary_outcome",),
secondary_outcomes=("latency_s",),
)
return _apply_recipe_config(defaults, config)
__all__ = [
"AgentArchitectureComparisonConfig",
"BivariateComparisonConfig",
"ComparisonStudyConfig",
"DiversityAndExplorationConfig",
"GrammarScaffoldConfig",
"HumanVsAgentProcessConfig",
"OptimizationBenchmarkConfig",
"PromptFramingConfig",
"RecipeStudyConfig",
"StrategyComparisonConfig",
"UnivariateComparisonConfig",
"build_agent_architecture_comparison_study",
"build_bivariate_comparison_study",
"build_diversity_and_exploration_study",
"build_grammar_scaffold_study",
"build_human_vs_agent_process_study",
"build_optimization_benchmark_study",
"build_prompt_framing_study",
"build_strategy_comparison_study",
"build_univariate_comparison_study",
]