Source code for design_research_analysis.runtime

"""Runtime environment and reproducibility helpers."""

from __future__ import annotations

import hashlib
import importlib
import json
import platform as _platform
import subprocess
import sys
from datetime import UTC, datetime
from importlib import metadata
from pathlib import Path
from sys import modules as _modules
from typing import Any

_TRACKED_PACKAGES = (
    "design-research-analysis",
    "numpy",
    "pandas",
    "matplotlib",
    "scipy",
    "statsmodels",
)



[docs]
def is_google_colab() -> bool:
    """Return ``True`` when running inside Google Colab."""
    return "google.colab" in _modules




[docs]
def is_notebook() -> bool:
    """Return ``True`` when running in a notebook-style interactive shell."""
    if is_google_colab():
        return True

    try:
        ipython_module = importlib.import_module("IPython")
    except ImportError:
        return False

    get_ipython = getattr(ipython_module, "get_ipython", None)
    if get_ipython is None:
        return False

    shell = get_ipython()
    if shell is None:
        return False
    return type(shell).__name__ == "ZMQInteractiveShell"



def _hash_file(path: Path) -> str:
    digest = hashlib.sha256()
    with path.open("rb") as handle:
        while True:
            chunk = handle.read(1024 * 1024)
            if not chunk:
                break
            digest.update(chunk)
    return digest.hexdigest()


def _run_git_command(args: list[str]) -> tuple[bool, str]:
    result = subprocess.run(
        ["git", *args],
        capture_output=True,
        check=False,
        text=True,
    )
    return result.returncode == 0, result.stdout.strip()


def _get_git_context(warnings: list[str]) -> dict[str, Any]:
    git_context: dict[str, Any] = {
        "commit": None,
        "branch": None,
        "is_dirty": None,
        "repo_root": None,
    }

    ok, repo_root = _run_git_command(["rev-parse", "--show-toplevel"])
    if not ok:
        warnings.append("Git metadata unavailable; current working directory is not a git repo.")
        return git_context

    ok, commit = _run_git_command(["rev-parse", "HEAD"])
    if not ok:
        warnings.append("Git metadata unavailable; failed to resolve current commit.")
        return git_context

    ok, branch = _run_git_command(["branch", "--show-current"])
    if not ok:
        warnings.append("Git metadata unavailable; failed to resolve current branch.")
        return git_context

    ok, status = _run_git_command(["status", "--porcelain", "--untracked-files=no"])
    if not ok:
        warnings.append("Git metadata unavailable; failed to inspect working tree status.")
        return git_context

    git_context["repo_root"] = repo_root
    git_context["commit"] = commit
    git_context["branch"] = branch or None
    git_context["is_dirty"] = bool(status)
    return git_context


def _get_package_versions() -> dict[str, str]:
    versions: dict[str, str] = {}
    for package_name in _TRACKED_PACKAGES:
        version_value: str | None = None
        try:
            version_value = metadata.version(package_name)
        except metadata.PackageNotFoundError:
            import_name = package_name.replace("-", "_")
            try:
                module = importlib.import_module(import_name)
            except Exception:
                version_value = None
            else:
                module_version = getattr(module, "__version__", None)
                if isinstance(module_version, str):
                    version_value = module_version
        if version_value is not None:
            versions[package_name] = version_value
    return versions



[docs]
def capture_run_context(
    *,
    seed: int | None = None,
    input_paths: list[str | Path] | None = None,
    extra: dict[str, Any] | None = None,
) -> dict[str, Any]:
    """Capture deterministic provenance metadata for an analysis run."""
    warnings: list[str] = []
    resolved_inputs: list[dict[str, Any]] = []

    for raw_path in input_paths or []:
        path = Path(raw_path).expanduser().resolve()
        if not path.exists():
            raise FileNotFoundError(path)
        resolved_inputs.append(
            {
                "path": str(path),
                "sha256": _hash_file(path),
                "size_bytes": int(path.stat().st_size),
            }
        )

    return {
        "timestamp_utc": datetime.now(UTC).isoformat(),
        "git": _get_git_context(warnings),
        "python": {
            "version": sys.version.split()[0],
            "executable": sys.executable,
        },
        "platform": {
            "system": _platform.system(),
            "release": _platform.release(),
            "machine": _platform.machine(),
            "node": _platform.node(),
        },
        "packages": _get_package_versions(),
        "random_seed": seed,
        "inputs": resolved_inputs,
        "extra": dict(extra or {}),
        "warnings": warnings,
    }




[docs]
def write_run_manifest(context: dict[str, Any], outpath: str | Path) -> Path:
    """Write a run-context dictionary to a JSON manifest file."""
    output_path = Path(outpath)
    if output_path.suffix.lower() != ".json":
        raise ValueError("Run manifests must be written to a .json file.")

    try:
        output_path.parent.mkdir(parents=True, exist_ok=True)
    except OSError as exc:
        raise ValueError(
            f"Failed to create output directory '{output_path.parent}': {exc}"
        ) from exc

    try:
        with output_path.open("w", encoding="utf-8") as handle:
            json.dump(context, handle, indent=2, sort_keys=True)
            handle.write("\n")
    except OSError as exc:
        raise ValueError(f"Failed to write run manifest '{output_path}': {exc}") from exc
    return output_path.resolve()




[docs]
def attach_provenance(result: dict[str, Any], context: dict[str, Any]) -> dict[str, Any]:
    """Return a copy of ``result`` enriched with a ``provenance`` field."""
    enriched = dict(result)
    enriched["provenance"] = context
    return enriched



__all__ = [
    "attach_provenance",
    "capture_run_context",
    "is_google_colab",
    "is_notebook",
    "write_run_manifest",
]