Source code for design_research_experiments.recipes

"""Reusable, typed recipe builders for common lab workflows."""

from __future__ import annotations

from dataclasses import dataclass
from pathlib import Path
from typing import Any

from .bundles import BenchmarkBundle, optimization_bundle
from .conditions import Constraint, Factor, FactorKind, Level
from .hypotheses import AnalysisPlan, Hypothesis, HypothesisDirection, HypothesisKind, OutcomeSpec
from .study import Block, RunBudget, SeedPolicy, Study


[docs] @dataclass(slots=True) class RecipeStudyConfig: """Shared typed overrides for recipe study construction. Any field set to ``None`` keeps the recipe default. Any field set to a non-``None`` value replaces that section of the study definition wholesale. """ study_id: str | None = None title: str | None = None description: str | None = None authors: tuple[str, ...] | None = None rationale: str | None = None tags: tuple[str, ...] | None = None hypotheses: tuple[Hypothesis, ...] | None = None factors: tuple[Factor, ...] | None = None blocks: tuple[Block, ...] | None = None constraints: tuple[Constraint, ...] | None = None design_spec: dict[str, Any] | None = None outcomes: tuple[OutcomeSpec, ...] | None = None analysis_plans: tuple[AnalysisPlan, ...] | None = None run_budget: RunBudget | None = None seed_policy: SeedPolicy | None = None output_dir: Path | None = None provenance_metadata: dict[str, Any] | None = None notes: str | None = None problem_ids: tuple[str, ...] | None = None agent_specs: tuple[str, ...] | None = None primary_outcomes: tuple[str, ...] | None = None secondary_outcomes: tuple[str, ...] | None = None bundle: BenchmarkBundle | None = None
[docs] @dataclass(slots=True) class AgentArchitectureComparisonConfig(RecipeStudyConfig): """Overrides for the agent architecture comparison recipe."""
[docs] @dataclass(slots=True) class PromptFramingConfig(RecipeStudyConfig): """Overrides for the prompt framing recipe."""
[docs] @dataclass(slots=True) class GrammarScaffoldConfig(RecipeStudyConfig): """Overrides for the grammar scaffold recipe."""
[docs] @dataclass(slots=True) class HumanVsAgentProcessConfig(RecipeStudyConfig): """Overrides for the human-vs-agent process recipe."""
[docs] @dataclass(slots=True) class DiversityAndExplorationConfig(RecipeStudyConfig): """Overrides for the diversity and exploration recipe."""
[docs] @dataclass(slots=True) class OptimizationBenchmarkConfig(RecipeStudyConfig): """Overrides for the optimization benchmark recipe."""
def _default_outcomes() -> tuple[OutcomeSpec, ...]: """Return baseline outcomes used by recipe scaffolds.""" return ( OutcomeSpec( name="primary_outcome", source_table="runs", column="primary_outcome", aggregation="mean", primary=True, expected_type="float", description="Primary study objective metric.", ), OutcomeSpec( name="latency_s", source_table="runs", column="latency_s", aggregation="mean", primary=False, expected_type="float", description="Run latency in seconds.", ), ) def _default_analysis_plan(hypothesis_id: str) -> AnalysisPlan: """Return a compact default analysis plan.""" return AnalysisPlan( analysis_plan_id="ap1", hypothesis_ids=(hypothesis_id,), tests=("difference_in_means", "regression"), outcomes=("primary_outcome",), plots=("condition_means",), export_tables=("summary_by_condition",), multiple_comparison_policy="holm", ) def _apply_recipe_config(study: Study, config: RecipeStudyConfig | None) -> Study: """Apply typed config overrides to a default recipe study.""" if config is None: return study resolved_study_id = config.study_id if config.study_id is not None else study.study_id study_output_dir = ( study.output_dir if study.output_dir is not None else Path("artifacts") / study.study_id ) if config.output_dir is not None: resolved_output_dir = config.output_dir elif config.study_id is not None: resolved_output_dir = Path("artifacts") / resolved_study_id else: resolved_output_dir = study_output_dir bundle_problem_ids = ( config.bundle.problem_ids if config.bundle is not None else study.problem_ids ) bundle_agent_specs = ( config.bundle.agent_specs if config.bundle is not None else study.agent_specs ) return Study( study_id=resolved_study_id, title=config.title if config.title is not None else study.title, description=config.description if config.description is not None else study.description, authors=config.authors if config.authors is not None else study.authors, rationale=config.rationale if config.rationale is not None else study.rationale, tags=config.tags if config.tags is not None else study.tags, hypotheses=config.hypotheses if config.hypotheses is not None else study.hypotheses, factors=config.factors if config.factors is not None else study.factors, blocks=config.blocks if config.blocks is not None else study.blocks, constraints=config.constraints if config.constraints is not None else study.constraints, design_spec=dict(config.design_spec) if config.design_spec is not None else dict(study.design_spec), outcomes=config.outcomes if config.outcomes is not None else study.outcomes, analysis_plans=( config.analysis_plans if config.analysis_plans is not None else study.analysis_plans ), run_budget=config.run_budget if config.run_budget is not None else study.run_budget, seed_policy=config.seed_policy if config.seed_policy is not None else study.seed_policy, output_dir=resolved_output_dir, provenance_metadata=( dict(config.provenance_metadata) if config.provenance_metadata is not None else dict(study.provenance_metadata) ), notes=config.notes if config.notes is not None else study.notes, problem_ids=config.problem_ids if config.problem_ids is not None else bundle_problem_ids, agent_specs=config.agent_specs if config.agent_specs is not None else bundle_agent_specs, primary_outcomes=( config.primary_outcomes if config.primary_outcomes is not None else study.primary_outcomes ), secondary_outcomes=( config.secondary_outcomes if config.secondary_outcomes is not None else study.secondary_outcomes ), )
[docs] def build_agent_architecture_comparison_study( config: AgentArchitectureComparisonConfig | None = None, ) -> Study: """Build a study comparing agent architecture choices across prompt difficulty.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Architecture Effect", statement="Agent architecture changes primary outcome.", kind=HypothesisKind.EFFECT, independent_vars=("agent_architecture", "prompt_difficulty"), dependent_vars=("primary_outcome",), direction=HypothesisDirection.DIFFERENT, linked_analysis_plan_id="ap1", ) defaults = Study( study_id="agent-architecture-comparison", title="Agent Architecture Comparison", description="Compare architecture variants across prompt difficulty manipulations.", authors=("Design Research Collective",), rationale="Benchmark architecture and workflow pattern differences.", tags=("ideation", "architecture"), hypotheses=(hypothesis,), factors=( Factor( name="agent_architecture", description="Agent architecture family.", kind=FactorKind.MANIPULATED, levels=( Level(name="direct", value="direct-llm"), Level(name="multistep", value="multi-step"), Level(name="reflective", value="reflective"), ), ), Factor( name="prompt_difficulty", description="Prompt complexity level.", kind=FactorKind.MANIPULATED, levels=( Level(name="easy", value="easy"), Level(name="medium", value="medium"), Level(name="hard", value="hard"), ), ), ), blocks=(Block(name="problem_family", levels=("ideation", "optimization")),), design_spec={"kind": "constrained_factorial", "randomize": True}, outcomes=_default_outcomes(), analysis_plans=(_default_analysis_plan("h1"),), run_budget=RunBudget(replicates=2, parallelism=1), seed_policy=SeedPolicy(base_seed=7), output_dir=Path("artifacts") / "agent-architecture-comparison", problem_ids=("problem-a", "problem-b"), agent_specs=("direct-llm", "multi-step", "reflective"), primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
[docs] def build_prompt_framing_study(config: PromptFramingConfig | None = None) -> Study: """Build an ideation study with framing and prompt manipulation.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Prompt Framing Effect", statement="Prompt framing changes novelty and diversity outcomes.", kind=HypothesisKind.EFFECT, independent_vars=("prompt_frame", "prompt_difficulty"), dependent_vars=("primary_outcome",), direction=HypothesisDirection.DIFFERENT, linked_analysis_plan_id="ap1", ) defaults = Study( study_id="prompt-framing-study", title="Prompt Framing Study", description="Measure framing effects in ideation tasks.", authors=("Design Research Collective",), rationale="Quantify framing manipulations in creative generation.", tags=("ideation", "framing"), hypotheses=(hypothesis,), factors=( Factor( name="prompt_frame", description="Prompt framing style.", kind=FactorKind.MANIPULATED, levels=( Level(name="neutral", value="neutral"), Level(name="challenge", value="challenge"), Level(name="analogy", value="analogy"), ), ), Factor( name="prompt_difficulty", description="Prompt difficulty.", kind=FactorKind.MANIPULATED, levels=( Level(name="low", value="low"), Level(name="high", value="high"), ), ), ), blocks=(Block(name="domain", levels=("mobility", "health")),), design_spec={"kind": "randomized_block", "randomize": True}, outcomes=_default_outcomes(), analysis_plans=(_default_analysis_plan("h1"),), run_budget=RunBudget(replicates=3, parallelism=1), seed_policy=SeedPolicy(base_seed=11), output_dir=Path("artifacts") / "prompt-framing-study", problem_ids=("ideation-1", "ideation-2", "ideation-3"), agent_specs=("baseline-agent", "creative-agent"), primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
[docs] def build_grammar_scaffold_study(config: GrammarScaffoldConfig | None = None) -> Study: """Build a study comparing unconstrained and grammar-guided generation.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Grammar Guidance Effect", statement="Grammar-guided generation improves primary outcome.", kind=HypothesisKind.EFFECT, independent_vars=("generation_mode",), dependent_vars=("primary_outcome",), direction=HypothesisDirection.GREATER, linked_analysis_plan_id="ap1", ) defaults = Study( study_id="grammar-scaffold-study", title="Grammar Scaffold Study", description="Benchmark constrained vs unconstrained generation modes.", authors=("Design Research Collective",), rationale="Assess structural scaffolds for design-generation quality.", tags=("grammar", "constrained-generation"), hypotheses=(hypothesis,), factors=( Factor( name="generation_mode", description="Generation scaffold type.", kind=FactorKind.MANIPULATED, levels=( Level(name="free", value="unconstrained"), Level(name="grammar", value="grammar-guided"), Level(name="tool", value="tool-guided"), ), ), ), blocks=(Block(name="problem_family", levels=("grammar", "text")),), design_spec={"kind": "full_factorial", "randomize": True}, outcomes=_default_outcomes(), analysis_plans=(_default_analysis_plan("h1"),), run_budget=RunBudget(replicates=2, parallelism=1), seed_policy=SeedPolicy(base_seed=19), output_dir=Path("artifacts") / "grammar-scaffold-study", problem_ids=("grammar-1", "grammar-2"), agent_specs=("direct-llm", "workflow-agent"), primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
[docs] def build_human_vs_agent_process_study(config: HumanVsAgentProcessConfig | None = None) -> Study: """Build a study comparing human-only, AI-assisted, and hybrid teams.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Teaming Configuration Effect", statement="Hybrid teams alter process traces and outcomes.", kind=HypothesisKind.MODERATION, independent_vars=("team_mode",), dependent_vars=("primary_outcome",), direction=HypothesisDirection.DIFFERENT, linked_analysis_plan_id="ap1", ) defaults = Study( study_id="human-vs-agent-process", title="Human vs Agent Process Study", description="Capture communication and action traces across teaming modes.", authors=("Design Research Collective",), rationale="Compare human-only, AI-assisted, and hybrid workflows.", tags=("teaming", "process-trace"), hypotheses=(hypothesis,), factors=( Factor( name="team_mode", description="Team configuration.", kind=FactorKind.MANIPULATED, levels=( Level(name="human", value="human-only"), Level(name="assist", value="ai-assisted"), Level(name="hybrid", value="hybrid"), ), ), ), blocks=(Block(name="cohort", levels=("novice", "expert")),), design_spec={"kind": "repeated_measures", "counterbalance": True}, outcomes=_default_outcomes(), analysis_plans=(_default_analysis_plan("h1"),), run_budget=RunBudget(replicates=1, parallelism=1), seed_policy=SeedPolicy(base_seed=23), output_dir=Path("artifacts") / "human-vs-agent-process", problem_ids=("teaming-1", "teaming-2"), agent_specs=("human-only", "ai-assisted", "hybrid"), primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
[docs] def build_diversity_and_exploration_study( config: DiversityAndExplorationConfig | None = None, ) -> Study: """Build a study evaluating diversity and exploration outcomes.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Exploration Strategy Robustness", statement="Exploration-heavy strategies increase diversity metrics.", kind=HypothesisKind.ROBUSTNESS, independent_vars=("search_strategy",), dependent_vars=("primary_outcome",), direction=HypothesisDirection.GREATER, linked_analysis_plan_id="ap1", ) defaults = Study( study_id="diversity-exploration", title="Diversity and Exploration Study", description="Assess exploration strategies across benchmark families.", authors=("Design Research Collective",), rationale="Characterize diversity-quality tradeoffs.", tags=("optimization", "diversity"), hypotheses=(hypothesis,), factors=( Factor( name="search_strategy", description="Exploration strategy.", kind=FactorKind.MANIPULATED, levels=( Level(name="greedy", value="greedy"), Level(name="epsilon", value="epsilon-greedy"), Level(name="ucb", value="ucb"), ), ), ), blocks=(Block(name="problem_family", levels=("bench-a", "bench-b", "bench-c")),), design_spec={"kind": "randomized_block", "randomize": True}, outcomes=_default_outcomes(), analysis_plans=(_default_analysis_plan("h1"),), run_budget=RunBudget(replicates=2, parallelism=1), seed_policy=SeedPolicy(base_seed=29), output_dir=Path("artifacts") / "diversity-exploration", problem_ids=("opt-a", "opt-b", "opt-c"), agent_specs=("deterministic", "self-learning"), primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
[docs] def build_optimization_benchmark_study( config: OptimizationBenchmarkConfig | None = None, ) -> Study: """Build a benchmark study for optimization generalization and learning effects.""" hypothesis = Hypothesis( hypothesis_id="h1", label="Learning Strategy Generalization", statement="Self-learning agents outperform deterministic baselines across families.", kind=HypothesisKind.ROBUSTNESS, independent_vars=("learning_strategy", "tuning_regime"), dependent_vars=("primary_outcome",), direction=HypothesisDirection.GREATER, linked_analysis_plan_id="ap1", ) bundle = optimization_bundle() defaults = Study( study_id="optimization-benchmark", title="Optimization Benchmark Study", description=( "Compare self-learning and deterministic baselines across optimization " "families and tuning regimes." ), authors=("Design Research Collective",), rationale="Measure optimization quality, robustness, and cross-family generalization.", tags=("optimization", "benchmark", "generalization"), hypotheses=(hypothesis,), factors=( Factor( name="learning_strategy", description="Agent learning approach.", kind=FactorKind.MANIPULATED, levels=( Level(name="deterministic", value="deterministic-baseline"), Level(name="self_learning", value="self-learning-agent"), ), ), Factor( name="tuning_regime", description="Hyperparameter tuning regime.", kind=FactorKind.MANIPULATED, levels=( Level(name="conservative", value="conservative"), Level(name="aggressive", value="aggressive"), ), ), ), blocks=(Block(name="problem_family", levels=("small", "medium", "large")),), design_spec={"kind": "randomized_block", "randomize": True}, outcomes=_default_outcomes(), analysis_plans=( AnalysisPlan( analysis_plan_id="ap1", hypothesis_ids=("h1",), tests=("difference_in_means", "mixed_effects"), outcomes=("primary_outcome",), random_effects=("problem_family",), plots=("family_condition_means",), export_tables=("optimization_summary",), multiple_comparison_policy="holm", ), ), run_budget=RunBudget(replicates=2, parallelism=1), seed_policy=SeedPolicy(base_seed=31), output_dir=Path("artifacts") / "optimization-benchmark", problem_ids=bundle.problem_ids, agent_specs=bundle.agent_specs, primary_outcomes=("primary_outcome",), secondary_outcomes=("latency_s",), ) return _apply_recipe_config(defaults, config)
__all__ = [ "AgentArchitectureComparisonConfig", "DiversityAndExplorationConfig", "GrammarScaffoldConfig", "HumanVsAgentProcessConfig", "OptimizationBenchmarkConfig", "PromptFramingConfig", "RecipeStudyConfig", "build_agent_architecture_comparison_study", "build_diversity_and_exploration_study", "build_grammar_scaffold_study", "build_human_vs_agent_process_study", "build_optimization_benchmark_study", "build_prompt_framing_study", ]