Source code for design_research_agents._memory._graph_extraction

"""Heuristic graph extraction helpers for design relationship text."""

from __future__ import annotations

import re

from design_research_agents._contracts._memory import GraphEdgeRecord, GraphNodeRecord

_RELATION_PATTERNS: tuple[tuple[re.Pattern[str], str], ...] = (
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+is connected to\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "connected_to",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+connects to\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "connected_to",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+drives\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "drives",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+powers\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "powers",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+supports\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "supports",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+uses\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "uses",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+depends on\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "depends_on",
    ),
    (
        re.compile(
            r"(?P<left>[A-Za-z][A-Za-z0-9 _'/-]{0,80})\s+defines\s+(?P<right>[A-Za-z][A-Za-z0-9 _'/-]{0,80})",
            re.IGNORECASE,
        ),
        "defines",
    ),
)


[docs] def extract_graph_records_from_text(text: str) -> tuple[list[GraphNodeRecord], list[GraphEdgeRecord]]: """Extract nodes and relationships from simple design statements. The extraction is intentionally heuristic and deterministic. It is useful for bootstrapping graph memory from structured requirement text, but it is not intended to replace model-based information extraction. Args: text: Source text containing simple relationship statements. Returns: Tuple ``(nodes, edges)`` extracted from the text. """ nodes_by_id: dict[str, GraphNodeRecord] = {} edges: list[GraphEdgeRecord] = [] normalized_sentences = _split_sentences(text) for sentence in normalized_sentences: for pattern, relationship in _RELATION_PATTERNS: match = pattern.search(sentence) if match is None: continue left_name = _normalize_entity_name(match.group("left")) right_name = _normalize_entity_name(match.group("right")) if not left_name or not right_name or left_name == right_name: continue left_node = _build_node(left_name) right_node = _build_node(right_name) nodes_by_id[left_node.node_id] = left_node nodes_by_id[right_node.node_id] = right_node edges.append( GraphEdgeRecord( source_id=left_node.node_id, target_id=right_node.node_id, relationship=relationship, metadata={"source_sentence": sentence}, ) ) break return list(nodes_by_id.values()), edges
def _split_sentences(text: str) -> list[str]: """Split free text into small sentence-like chunks.""" segments = re.split(r"[.;\n]+", text) return [segment.strip() for segment in segments if segment.strip()] def _normalize_entity_name(value: str) -> str: """Normalize one extracted entity name.""" collapsed = " ".join(value.strip().split()) if not collapsed: return "" return collapsed def _build_node(name: str) -> GraphNodeRecord: """Return one graph node for a normalized entity name.""" return GraphNodeRecord( node_id=_slugify(name), name=name, node_type="component", ) def _slugify(value: str) -> str: """Return a deterministic identifier derived from ``value``.""" normalized = re.sub(r"[^a-z0-9]+", "-", value.lower()).strip("-") return normalized or "entity" __all__ = ["extract_graph_records_from_text"]