XSD Catalogue
Extracts the function catalogue (names, port definitions, types) directly from the
TransformGraph.xsd schema file. Provides caching for performance and replaces a
previous JSON-based catalogue approach.
xsd_catalogue_utils
XSD Catalogue Utilities
This module extracts function catalogue information from the TransformGraph.xsd file.
It replaces the JSON-based catalogue with XSD-based definitions.
load_function_catalogue_from_xsd
load_function_catalogue_from_xsd(xsd_path=None)
Parse the XSD and return a dictionary compatible with the old JSON format.
Uses caching to avoid re-parsing if the file hasn't changed.
| RETURNS |
DESCRIPTION |
Dict[str, Any]
|
{
"functions": {
"filter_less_than": {
"description": "...",
"input_ports": {...},
"output_ports": {...}
}, ...
},
"types": {...}
|
Dict[str, Any]
|
|
Source code in src/choregraph/xsd_catalogue_utils.py
| def load_function_catalogue_from_xsd(xsd_path: Path = None) -> Dict[str, Any]:
"""
Parse the XSD and return a dictionary compatible with the old JSON format.
Uses caching to avoid re-parsing if the file hasn't changed.
Returns:
{
"functions": {
"filter_less_than": {
"description": "...",
"input_ports": {...},
"output_ports": {...}
}, ...
},
"types": {...}
}
"""
global _catalogue_cache, _xsd_mtime
if xsd_path is None:
xsd_path = Path(__file__).parent / "TransformGraph.xsd"
# Check cache validity
current_mtime = xsd_path.stat().st_mtime if xsd_path.exists() else None
if _catalogue_cache is not None and _xsd_mtime == current_mtime:
return _catalogue_cache
# Parse the bundled XSD
tree = etree.parse(str(xsd_path))
root = tree if isinstance(tree, etree._Element) else tree.getroot()
functions = {}
# Find all complexType elements that define functions
for complex_type in root.findall(f"{XS_NS}complexType"):
func_data = _extract_function_from_complex_type(complex_type, root)
if func_data:
functions.update(func_data)
# Extract types
types = _extract_types_from_xsd(root)
_catalogue_cache = {
"functions": functions,
"types": types
}
_xsd_mtime = current_mtime
return _catalogue_cache
|
get_function_spec
get_function_spec(function_name, xsd_path=None)
Get the specification for a single function.
Returns None if function not found.
Source code in src/choregraph/xsd_catalogue_utils.py
| def get_function_spec(function_name: str, xsd_path: Path = None) -> Optional[Dict[str, Any]]:
"""
Get the specification for a single function.
Returns None if function not found.
"""
catalogue = load_function_catalogue_from_xsd(xsd_path)
return catalogue.get("functions", {}).get(function_name)
|
get_port_type_info
get_port_type_info(port_type, xsd_path=None)
Get information about a port type.
Returns None if type not found.
Source code in src/choregraph/xsd_catalogue_utils.py
| def get_port_type_info(port_type: str, xsd_path: Path = None) -> Optional[Dict[str, Any]]:
"""
Get information about a port type.
Returns None if type not found.
"""
catalogue = load_function_catalogue_from_xsd(xsd_path)
return catalogue.get("types", {}).get(port_type)
|
list_functions
list_functions(xsd_path=None)
List all available transformation functions.
Source code in src/choregraph/xsd_catalogue_utils.py
| def list_functions(xsd_path: Path = None) -> list:
"""
List all available transformation functions.
"""
catalogue = load_function_catalogue_from_xsd(xsd_path)
return list(catalogue.get("functions", {}).keys())
|
generate_catalogue_text
generate_catalogue_text(xsd_path=None)
Generate a human-readable text representation of the function catalogue.
Useful for LLM prompts.
Source code in src/choregraph/xsd_catalogue_utils.py
| def generate_catalogue_text(xsd_path: Path = None) -> str:
"""
Generate a human-readable text representation of the function catalogue.
Useful for LLM prompts.
"""
catalogue = load_function_catalogue_from_xsd(xsd_path)
lines = ["# Available Transformation Functions\n"]
for func_name, func_spec in catalogue.get("functions", {}).items():
lines.append(f"## {func_name}")
lines.append(f"{func_spec.get('description', '')}\n")
# Input ports
lines.append(" **Input Ports:**")
for port_name, port_spec in func_spec.get("input_ports", {}).items():
required = "required" if port_spec.get("required", True) else "optional"
connection = port_spec.get("connection", "static")
lines.append(f" - `{port_name}` ({port_spec['type']}, {connection}, {required}): {port_spec.get('description', '')}")
# Output ports
lines.append("\n **Output Ports:**")
for port_name, port_spec in func_spec.get("output_ports", {}).items():
lines.append(f"- `{port_name}` ({port_spec['type']}): {port_spec.get('description', '')}")
lines.append("")
return "\n".join(lines)
|
clear_cache
Clear the catalogue cache. Useful for testing.
Source code in src/choregraph/xsd_catalogue_utils.py
| def clear_cache():
"""Clear the catalogue cache. Useful for testing."""
global _catalogue_cache, _xsd_mtime
_catalogue_cache = None
_xsd_mtime = None
|