Source code for design_research_analysis.runtime

"""Runtime environment and reproducibility helpers."""

from __future__ import annotations

import hashlib
import importlib
import json
import platform as _platform
import subprocess
import sys
from datetime import UTC, datetime
from importlib import metadata
from pathlib import Path
from sys import modules as _modules
from typing import Any

_TRACKED_PACKAGES = (
    "design-research-analysis",
    "numpy",
    "pandas",
    "matplotlib",
    "scipy",
    "statsmodels",
)


[docs] def is_google_colab() -> bool: """Return ``True`` when running inside Google Colab.""" return "google.colab" in _modules
[docs] def is_notebook() -> bool: """Return ``True`` when running in a notebook-style interactive shell.""" if is_google_colab(): return True try: ipython_module = importlib.import_module("IPython") except ImportError: return False get_ipython = getattr(ipython_module, "get_ipython", None) if get_ipython is None: return False shell = get_ipython() if shell is None: return False return type(shell).__name__ == "ZMQInteractiveShell"
def _hash_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as handle: while True: chunk = handle.read(1024 * 1024) if not chunk: break digest.update(chunk) return digest.hexdigest() def _run_git_command(args: list[str]) -> tuple[bool, str]: result = subprocess.run( ["git", *args], capture_output=True, check=False, text=True, ) return result.returncode == 0, result.stdout.strip() def _get_git_context(warnings: list[str]) -> dict[str, Any]: git_context: dict[str, Any] = { "commit": None, "branch": None, "is_dirty": None, "repo_root": None, } ok, repo_root = _run_git_command(["rev-parse", "--show-toplevel"]) if not ok: warnings.append("Git metadata unavailable; current working directory is not a git repo.") return git_context ok, commit = _run_git_command(["rev-parse", "HEAD"]) if not ok: warnings.append("Git metadata unavailable; failed to resolve current commit.") return git_context ok, branch = _run_git_command(["branch", "--show-current"]) if not ok: warnings.append("Git metadata unavailable; failed to resolve current branch.") return git_context ok, status = _run_git_command(["status", "--porcelain", "--untracked-files=no"]) if not ok: warnings.append("Git metadata unavailable; failed to inspect working tree status.") return git_context git_context["repo_root"] = repo_root git_context["commit"] = commit git_context["branch"] = branch or None git_context["is_dirty"] = bool(status) return git_context def _get_package_versions() -> dict[str, str]: versions: dict[str, str] = {} for package_name in _TRACKED_PACKAGES: version_value: str | None = None try: version_value = metadata.version(package_name) except metadata.PackageNotFoundError: import_name = package_name.replace("-", "_") try: module = importlib.import_module(import_name) except Exception: version_value = None else: module_version = getattr(module, "__version__", None) if isinstance(module_version, str): version_value = module_version if version_value is not None: versions[package_name] = version_value return versions
[docs] def capture_run_context( *, seed: int | None = None, input_paths: list[str | Path] | None = None, extra: dict[str, Any] | None = None, ) -> dict[str, Any]: """Capture deterministic provenance metadata for an analysis run.""" warnings: list[str] = [] resolved_inputs: list[dict[str, Any]] = [] for raw_path in input_paths or []: path = Path(raw_path).expanduser().resolve() if not path.exists(): raise FileNotFoundError(path) resolved_inputs.append( { "path": str(path), "sha256": _hash_file(path), "size_bytes": int(path.stat().st_size), } ) return { "timestamp_utc": datetime.now(UTC).isoformat(), "git": _get_git_context(warnings), "python": { "version": sys.version.split()[0], "executable": sys.executable, }, "platform": { "system": _platform.system(), "release": _platform.release(), "machine": _platform.machine(), "node": _platform.node(), }, "packages": _get_package_versions(), "random_seed": seed, "inputs": resolved_inputs, "extra": dict(extra or {}), "warnings": warnings, }
[docs] def write_run_manifest(context: dict[str, Any], outpath: str | Path) -> Path: """Write a run-context dictionary to a JSON manifest file.""" output_path = Path(outpath) if output_path.suffix.lower() != ".json": raise ValueError("Run manifests must be written to a .json file.") try: output_path.parent.mkdir(parents=True, exist_ok=True) except OSError as exc: raise ValueError( f"Failed to create output directory '{output_path.parent}': {exc}" ) from exc try: with output_path.open("w", encoding="utf-8") as handle: json.dump(context, handle, indent=2, sort_keys=True) handle.write("\n") except OSError as exc: raise ValueError(f"Failed to write run manifest '{output_path}': {exc}") from exc return output_path.resolve()
[docs] def attach_provenance(result: dict[str, Any], context: dict[str, Any]) -> dict[str, Any]: """Return a copy of ``result`` enriched with a ``provenance`` field.""" enriched = dict(result) enriched["provenance"] = context return enriched
__all__ = [ "attach_provenance", "capture_run_context", "is_google_colab", "is_notebook", "write_run_manifest", ]