Skip to content

XSD Catalogue

Extracts the function catalogue (names, port definitions, types) directly from the TransformGraph.xsd schema file. Provides caching for performance and replaces a previous JSON-based catalogue approach.

xsd_catalogue_utils

XSD Catalogue Utilities

This module extracts function catalogue information from the TransformGraph.xsd file. It replaces the JSON-based catalogue with XSD-based definitions.

load_function_catalogue_from_xsd

load_function_catalogue_from_xsd(xsd_path=None)

Parse the XSD and return a dictionary compatible with the old JSON format.

Uses caching to avoid re-parsing if the file hasn't changed.

RETURNS DESCRIPTION
Dict[str, Any]

{ "functions": { "filter_less_than": { "description": "...", "input_ports": {...}, "output_ports": {...} }, ... }, "types": {...}

Dict[str, Any]

}

Source code in src/choregraph/xsd_catalogue_utils.py
def load_function_catalogue_from_xsd(xsd_path: Path = None) -> Dict[str, Any]:
    """
    Parse the XSD and return a dictionary compatible with the old JSON format.

    Uses caching to avoid re-parsing if the file hasn't changed.

    Returns:
        {
            "functions": {
                "filter_less_than": {
                    "description": "...",
                    "input_ports": {...},
                    "output_ports": {...}
                }, ...
            },
            "types": {...}
        }
    """
    global _catalogue_cache, _xsd_mtime

    if xsd_path is None:
        xsd_path = Path(__file__).parent / "TransformGraph.xsd"

    # Check cache validity
    current_mtime = xsd_path.stat().st_mtime if xsd_path.exists() else None
    if _catalogue_cache is not None and _xsd_mtime == current_mtime:
        return _catalogue_cache

    # Parse the bundled XSD
    tree = etree.parse(str(xsd_path))
    root = tree if isinstance(tree, etree._Element) else tree.getroot()

    functions = {}

    # Find all complexType elements that define functions
    for complex_type in root.findall(f"{XS_NS}complexType"):
        func_data = _extract_function_from_complex_type(complex_type, root)
        if func_data:
            functions.update(func_data)

    # Extract types
    types = _extract_types_from_xsd(root)

    _catalogue_cache = {
        "functions": functions,
        "types": types
    }
    _xsd_mtime = current_mtime

    return _catalogue_cache

get_function_spec

get_function_spec(function_name, xsd_path=None)

Get the specification for a single function.

Returns None if function not found.

Source code in src/choregraph/xsd_catalogue_utils.py
def get_function_spec(function_name: str, xsd_path: Path = None) -> Optional[Dict[str, Any]]:
    """
    Get the specification for a single function.

    Returns None if function not found.
    """
    catalogue = load_function_catalogue_from_xsd(xsd_path)
    return catalogue.get("functions", {}).get(function_name)

get_port_type_info

get_port_type_info(port_type, xsd_path=None)

Get information about a port type.

Returns None if type not found.

Source code in src/choregraph/xsd_catalogue_utils.py
def get_port_type_info(port_type: str, xsd_path: Path = None) -> Optional[Dict[str, Any]]:
    """
    Get information about a port type.

    Returns None if type not found.
    """
    catalogue = load_function_catalogue_from_xsd(xsd_path)
    return catalogue.get("types", {}).get(port_type)

list_functions

list_functions(xsd_path=None)

List all available transformation functions.

Source code in src/choregraph/xsd_catalogue_utils.py
def list_functions(xsd_path: Path = None) -> list:
    """
    List all available transformation functions.
    """
    catalogue = load_function_catalogue_from_xsd(xsd_path)
    return list(catalogue.get("functions", {}).keys())

generate_catalogue_text

generate_catalogue_text(xsd_path=None)

Generate a human-readable text representation of the function catalogue. Useful for LLM prompts.

Source code in src/choregraph/xsd_catalogue_utils.py
def generate_catalogue_text(xsd_path: Path = None) -> str:
    """
    Generate a human-readable text representation of the function catalogue.
    Useful for LLM prompts.
    """
    catalogue = load_function_catalogue_from_xsd(xsd_path)

    lines = ["# Available Transformation Functions\n"]

    for func_name, func_spec in catalogue.get("functions", {}).items():
        lines.append(f"## {func_name}")
        lines.append(f"{func_spec.get('description', '')}\n")

        # Input ports
        lines.append("  **Input Ports:**")
        for port_name, port_spec in func_spec.get("input_ports", {}).items():
            required = "required" if port_spec.get("required", True) else "optional"
            connection = port_spec.get("connection", "static")
            lines.append(f"  - `{port_name}` ({port_spec['type']}, {connection}, {required}): {port_spec.get('description', '')}")

        # Output ports
        lines.append("\n  **Output Ports:**")
        for port_name, port_spec in func_spec.get("output_ports", {}).items():
            lines.append(f"- `{port_name}` ({port_spec['type']}): {port_spec.get('description', '')}")

        lines.append("")

    return "\n".join(lines)

clear_cache

clear_cache()

Clear the catalogue cache. Useful for testing.

Source code in src/choregraph/xsd_catalogue_utils.py
def clear_cache():
    """Clear the catalogue cache. Useful for testing."""
    global _catalogue_cache, _xsd_mtime
    _catalogue_cache = None
    _xsd_mtime = None