#!/usr/bin/env python3
# Copyright (c) 2026 Lark Technologies Pte. Ltd.
# SPDX-License-Identifier: MIT
from __future__ import annotations
import json
import re
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
SKILL_ROOT = Path(__file__).resolve().parent.parent
ASSETS_DIR = SKILL_ROOT / "assets"
REFERENCES_DIR = SKILL_ROOT / "references"
TEMPLATES_DIR = ASSETS_DIR / "templates"
CATALOG_PATH = REFERENCES_DIR / "template-catalog.md"
DEFAULT_INDEX_PATH = REFERENCES_DIR / "template-index.json"
LIGHTWEIGHT_INDEX_SCHEMA_VERSION = "1.1.0"
class TemplateToolError(Exception):
pass
def fail(message: str) -> None:
raise TemplateToolError(message)
def read_file(file_path: str | Path) -> str:
return Path(file_path).read_text(encoding="utf-8")
def normalize_whitespace(value: str) -> str:
return re.sub(r"\s+", " ", value).strip()
def strip_markdown(value: str) -> str:
return normalize_whitespace(
re.sub(r"^-\s*", "", re.sub(r"`([^`]+)`", r"\1", value.replace("**", "")))
)
def strip_xml(value: str) -> str:
stripped = re.sub(r"", r"\1", value)
stripped = re.sub(r"<[^>]+>", " ", stripped)
stripped = stripped.replace(" ", " ")
stripped = stripped.replace("&", "&")
stripped = stripped.replace("<", "<")
stripped = stripped.replace(">", ">")
stripped = stripped.replace(""", '"')
stripped = stripped.replace("'", "'")
return normalize_whitespace(stripped)
def compact_object(value: dict[str, Any]) -> dict[str, Any]:
return {key: entry for key, entry in value.items() if entry is not None}
def tokenize_query(value: str) -> list[str]:
normalized = normalize_whitespace(value.lower())
if not normalized:
return []
def append_unique(target: list[str], token: str) -> None:
token = token.strip()
if token and token not in target:
target.append(token)
tokens: list[str] = []
for item in [item.strip() for item in re.split(r"[\s,/|,。;;::()()【】\[\]《》<>]+", normalized) if item.strip()]:
append_unique(tokens, item)
# Chinese prompts are often complete sentences without separators, e.g.
# "帮我做一个季度工作汇报PPT". Add CJK n-grams so domain phrases such as
# "工作汇报" or "季度复盘" can still match catalog metadata.
for phrase in re.findall(r"[\u3400-\u9fff]+", normalized):
if len(phrase) < 2:
continue
max_size = min(6, len(phrase))
for size in range(max_size, 1, -1):
for start in range(0, len(phrase) - size + 1):
append_unique(tokens, phrase[start : start + size])
synonym_tokens = {
"浅色": ["light"],
"白底": ["light"],
"明亮": ["light"],
"深色": ["dark"],
"黑底": ["dark"],
"暗色": ["dark"],
"多彩": ["colorful"],
"活泼": ["colorful", "casual"],
"正式": ["formal"],
"商务": ["formal"],
"轻松": ["casual"],
"创意": ["creative"],
}
for token in list(tokens):
for keyword, aliases in synonym_tokens.items():
if keyword in token:
for alias in aliases:
append_unique(tokens, alias)
unique_tokens: list[str] = []
for token in tokens or [normalized]:
if token not in unique_tokens:
unique_tokens.append(token)
return unique_tokens
def parse_range_spec(range_spec: str) -> list[int]:
if not range_spec or not range_spec.strip():
fail("range is required")
numbers: set[int] = set()
for part in range_spec.split(","):
trimmed = part.strip()
if not trimmed:
continue
match = re.fullmatch(r"(\d+)(?:-(\d+))?", trimmed)
if not match:
fail(f"invalid range token: {trimmed}")
start = int(match.group(1))
end = int(match.group(2) or match.group(1))
if start < 1 or end < start:
fail(f"invalid range token: {trimmed}")
for index in range(start, end + 1):
numbers.add(index)
return sorted(numbers)
def compress_numbers(numbers: list[int]) -> str:
if not numbers:
return ""
parts: list[str] = []
start = numbers[0]
previous = numbers[0]
for current in numbers[1:]:
if current == previous + 1:
previous = current
continue
parts.append(f"{start}" if start == previous else f"{start}-{previous}")
start = current
previous = current
parts.append(f"{start}" if start == previous else f"{start}-{previous}")
return ",".join(parts)
def slice_array(items: list[Any], limit: int) -> list[Any]:
return items[: max(limit, 0)]
def count_tag(xml: str, tag_name: str) -> int:
return len(re.findall(fr"<{tag_name}\b", xml))
def extract_attribute(tag_source: str, name: str) -> str | None:
match = re.search(fr'{re.escape(name)}="([^"]+)"', tag_source)
return match.group(1) if match else None
def extract_numeric_attribute(tag_source: str, name: str) -> int | float | None:
raw = extract_attribute(tag_source, name)
if raw is None:
return None
try:
value = float(raw)
except ValueError:
return None
return int(value) if value.is_integer() else value
def sort_regions(regions: list[dict[str, Any]]) -> list[dict[str, Any]]:
return sorted(regions, key=lambda region: (region["y"], region["x"], region["kind"]))
def extract_slide_regions(slide_xml: str) -> list[dict[str, Any]]:
regions: list[dict[str, Any]] = []
for match in re.finditer(r"]*)>([\s\S]*?)", slide_xml):
attrs, content = match.group(1), match.group(2)
x = extract_numeric_attribute(attrs, "topLeftX")
y = extract_numeric_attribute(attrs, "topLeftY")
width = extract_numeric_attribute(attrs, "width")
height = extract_numeric_attribute(attrs, "height")
if all(value is not None for value in [x, y, width, height]):
text_type_match = re.search(r'textType="([^"]+)"', content)
regions.append(
{
"kind": "shape",
"type": extract_attribute(attrs, "type") or "shape",
"text_type": text_type_match.group(1) if text_type_match else None,
"x": x,
"y": y,
"width": width,
"height": height,
"area": width * height,
}
)
for match in re.finditer(r"<(img|table|chart)\b([^>]*)/?>", slide_xml):
kind, attrs = match.group(1), match.group(2)
x = extract_numeric_attribute(attrs, "topLeftX")
y = extract_numeric_attribute(attrs, "topLeftY")
width = extract_numeric_attribute(attrs, "width")
height = extract_numeric_attribute(attrs, "height")
if all(value is not None for value in [x, y, width, height]):
regions.append(
{
"kind": kind,
"type": kind,
"text_type": None,
"x": x,
"y": y,
"width": width,
"height": height,
"area": width * height,
}
)
return sort_regions(regions)
def build_bbox_summary(
regions: list[dict[str, Any]], slide_width: int | float | None, slide_height: int | float | None
) -> dict[str, Any]:
if not regions:
return {
"region_count": 0,
"occupied_bounds": None,
"regions": [],
"canvas": compact_object({"width": slide_width, "height": slide_height}),
}
min_x = regions[0]["x"]
min_y = regions[0]["y"]
max_x = regions[0]["x"] + regions[0]["width"]
max_y = regions[0]["y"] + regions[0]["height"]
for region in regions:
min_x = min(min_x, region["x"])
min_y = min(min_y, region["y"])
max_x = max(max_x, region["x"] + region["width"])
max_y = max(max_y, region["y"] + region["height"])
return {
"region_count": len(regions),
"canvas": compact_object({"width": slide_width, "height": slide_height}),
"occupied_bounds": {
"x": min_x,
"y": min_y,
"width": max_x - min_x,
"height": max_y - min_y,
},
"regions": slice_array(
[
compact_object(
{
"id": f'{region["kind"]}-{index + 1}',
"kind": region["kind"],
"type": region["type"],
"text_type": region.get("text_type"),
"x": region["x"],
"y": region["y"],
"width": region["width"],
"height": region["height"],
}
)
for index, region in enumerate(regions)
],
8,
),
}
def build_editable_regions(regions: list[dict[str, Any]]) -> list[dict[str, Any]]:
sorted_regions = sorted(regions, key=lambda region: region["area"], reverse=True)
return slice_array(
[
compact_object(
{
"id": f'{region["kind"]}-{index + 1}',
"kind": region["kind"],
"role": "image"
if region["kind"] == "img"
else region.get("text_type") or region["type"] or region["kind"],
"x": region["x"],
"y": region["y"],
"width": region["width"],
"height": region["height"],
}
)
for index, region in enumerate(sorted_regions)
],
8,
)
def detect_slide_layout_tags(
slide_xml: str, regions: list[dict[str, Any]], slide_width: int | float | None, slide_height: int | float | None
) -> list[str]:
tags: set[str] = set()
text_regions = [region for region in regions if region["kind"] == "shape" and region["type"] == "text"]
image_regions = [region for region in regions if region["kind"] == "img"]
table_regions = [region for region in regions if region["kind"] == "table"]
if table_regions:
tags.add("comparison-table")
if "= slide_width * 0.75
) or (
slide_height and biggest_image["height"] >= slide_height * 0.75
):
tags.add("full-bleed-image-caption")
elif biggest_text["x"] <= biggest_image["x"]:
tags.add("hero-text-left-image-right")
else:
tags.add("hero-image-left-text-right")
if len(text_regions) >= 4 and not image_regions:
distinct_x = len({round(region["x"] / 20) for region in text_regions})
distinct_y = len({round(region["y"] / 20) for region in text_regions})
if distinct_x >= 2 and distinct_y >= 2:
tags.add("2x2-metric-cards")
if len(text_regions) >= 2 and not image_regions:
width = slide_width or 960
left = any(region["x"] < width / 2 - 40 for region in text_regions)
right = any(region["x"] + region["width"] > width / 2 + 40 for region in text_regions)
if left and right:
tags.add("two-column-text")
if len(text_regions) <= 2 and not image_regions:
top_most = sorted(text_regions, key=lambda region: region["y"])[0] if text_regions else None
if top_most and top_most["y"] <= 120 and top_most["height"] <= 140:
tags.add("section-divider")
if not tags and text_regions:
tags.add("text-focused")
return sorted(tags)
def parse_theme_summary(theme_xml: str | None) -> dict[str, Any]:
if not theme_xml:
return {"has_theme_node": False, "text_styles": []}
text_styles_block = re.search(r"([\s\S]*?)", theme_xml)
text_styles: list[dict[str, Any]] = []
if text_styles_block:
for match in re.finditer(r"<(title|headline|sub-headline|body|caption)\b([^>]*)/?>", text_styles_block.group(1)):
text_styles.append(
compact_object(
{
"text_type": match.group(1),
"font_color": extract_attribute(match.group(2), "fontColor"),
"font_size": extract_attribute(match.group(2), "fontSize"),
"font_family": extract_attribute(match.group(2), "fontFamily"),
}
)
)
return {"has_theme_node": True, "text_styles": text_styles}
def extract_background_hint(slide_xml: str) -> str | None:
fill_color_match = re.search(r' dict[str, str] | None:
type_priority = {"title": 5, "headline": 4, "sub-headline": 3, "body": 2, "caption": 1}
content_pattern = re.compile(
r']*)textType="(title|headline|sub-headline|body|caption)"([^>]*)>([\s\S]*?)'
)
candidates: list[dict[str, Any]] = []
for match in content_pattern.finditer(slide_xml):
attrs = f"{match.group(1)} {match.group(3)}"
text = strip_xml(match.group(4))
if text:
candidates.append(
{
"text_type": match.group(2),
"text": text[:80],
"font_size": int(extract_attribute(attrs, "fontSize") or "0"),
"priority": type_priority.get(match.group(2), 0),
}
)
candidates.sort(key=lambda item: (-item["priority"], -item["font_size"], -len(item["text"])))
if candidates:
return {"text_type": candidates[0]["text_type"], "text": candidates[0]["text"]}
return None
def summarize_slide(slide_xml: str, slide_number: int, presentation_info: dict[str, Any] | None = None) -> dict[str, Any]:
presentation_info = presentation_info or {}
raw_width = presentation_info.get("width")
raw_height = presentation_info.get("height")
slide_width = int(float(raw_width)) if raw_width else None
slide_height = int(float(raw_height)) if raw_height else None
regions = extract_slide_regions(slide_xml)
return {
"slide_number": slide_number,
"title_hint": extract_title_hint(slide_xml),
"background_hint": extract_background_hint(slide_xml),
"layout_tags": detect_slide_layout_tags(slide_xml, regions, slide_width, slide_height),
"bbox_summary": build_bbox_summary(regions, slide_width, slide_height),
"editable_regions": build_editable_regions(regions),
"element_counts": {
"shape": count_tag(slide_xml, "shape"),
"img": count_tag(slide_xml, "img"),
"table": count_tag(slide_xml, "table"),
"chart": count_tag(slide_xml, "chart"),
"icon": count_tag(slide_xml, "icon"),
"line": count_tag(slide_xml, "line"),
"polyline": count_tag(slide_xml, "polyline"),
},
}
def aggregate_slides(slide_summaries: list[dict[str, Any]]) -> dict[str, Any]:
totals = {"shape": 0, "img": 0, "table": 0, "chart": 0, "icon": 0, "line": 0, "polyline": 0}
title_hints: list[str] = []
background_hints: list[str] = []
layout_tags: list[str] = []
for slide in slide_summaries:
for key, value in slide["element_counts"].items():
totals[key] += value
if slide.get("title_hint") and slide["title_hint"]["text"] not in title_hints:
title_hints.append(slide["title_hint"]["text"])
if slide.get("background_hint") and slide["background_hint"] not in background_hints:
background_hints.append(slide["background_hint"])
for tag in slide.get("layout_tags") or []:
if tag not in layout_tags:
layout_tags.append(tag)
return {
"slide_count": len(slide_summaries),
"title_hints": slice_array(title_hints, 4),
"background_hints": slice_array(background_hints, 4),
"layout_tags": layout_tags,
"element_totals": totals,
}
def parse_template_xml(template_path: str | Path) -> dict[str, Any]:
xml = read_file(template_path)
presentation_match = re.search(r"]*)>", xml)
if not presentation_match:
fail(f"template missing presentation root: {template_path}")
opening_tag = presentation_match.group(0)
title_xml_match = re.search(r"[\s\S]*?", xml)
theme_xml_match = re.search(r"[\s\S]*?", xml)
slides = re.findall(r"", xml)
slide_summaries = [
summarize_slide(
slide_xml,
index + 1,
{
"width": extract_attribute(presentation_match.group(1), "width"),
"height": extract_attribute(presentation_match.group(1), "height"),
},
)
for index, slide_xml in enumerate(slides)
]
return {
"xml": xml,
"opening_tag": opening_tag,
"width": extract_attribute(presentation_match.group(1), "width"),
"height": extract_attribute(presentation_match.group(1), "height"),
"title_xml": title_xml_match.group(0) if title_xml_match else None,
"title_text": strip_xml(title_xml_match.group(0)) if title_xml_match else None,
"theme_xml": theme_xml_match.group(0) if theme_xml_match else None,
"theme_summary": parse_theme_summary(theme_xml_match.group(0) if theme_xml_match else None),
"slides": slides,
"slide_summaries": slide_summaries,
}
def finalize_catalog_entry(entry: dict[str, Any] | None) -> dict[str, Any] | None:
if not entry:
return None
filename_stem = re.sub(r"\.xml$", "", entry["filename"])
template_id = f'{entry["category"]}--{filename_stem}'
return {
"template_id": template_id,
"filename": f"{template_id}.xml",
"category": entry["category"],
"category_label": entry["category_label"],
"scene": entry["scene"],
"is_general_template": entry["is_general_template"],
"catalog_slide_count": entry["catalog_slide_count"],
"tone": entry["tone"],
"formality": entry["formality"],
"palette": entry["palette"],
"structure": entry["structure"],
"page_types": entry["page_types"],
"use_cases": entry["use_cases"],
"ranges": entry["ranges"],
}
def parse_catalog(catalog_path: str | Path = CATALOG_PATH) -> list[dict[str, Any]]:
lines = read_file(catalog_path).splitlines()
entries: list[dict[str, Any]] = []
current_category: str | None = None
current_category_label: str | None = None
current_entry: dict[str, Any] | None = None
def push_current() -> None:
nonlocal current_entry
finalized = finalize_catalog_entry(current_entry)
if finalized:
entries.append(finalized)
current_entry = None
for raw_line in lines:
line = raw_line.rstrip()
if line.startswith("## 快速匹配索引"):
break
category_match = re.match(r"^##\s+([a-z]+)\s+—\s+(.+)$", line)
if category_match:
push_current()
current_category = category_match.group(1)
current_category_label = category_match.group(2).strip()
continue
template_match = re.match(r"^###\s+(⭐\s+)?([^ ]+\.xml)\s+—\s+(.+)$", line)
if template_match:
push_current()
current_entry = {
"category": current_category,
"category_label": current_category_label,
"filename": template_match.group(2).strip(),
"scene": template_match.group(3).strip(),
"is_general_template": bool(template_match.group(1)),
"catalog_slide_count": None,
"tone": None,
"formality": None,
"palette": None,
"structure": None,
"page_types": None,
"use_cases": None,
"ranges": [],
}
continue
if not current_entry:
continue
plain = strip_markdown(line)
slide_count_match = re.search(r"(\d+)\s*页", plain)
if slide_count_match:
current_entry["catalog_slide_count"] = int(slide_count_match.group(1))
tone_match = re.search(r"色调:([^|]+)\|\s*正式度:(.+)$", plain)
if tone_match:
current_entry["tone"] = tone_match.group(1).strip()
current_entry["formality"] = tone_match.group(2).strip()
continue
if plain.startswith("配色:"):
current_entry["palette"] = plain[len("配色:") :].strip()
continue
if plain.startswith("结构:"):
current_entry["structure"] = plain[len("结构:") :].strip()
continue
if plain.startswith("页面类型:"):
current_entry["page_types"] = plain[len("页面类型:") :].strip()
continue
if plain.startswith("页型索引"):
_, _, ranges_raw = plain.partition(":")
ranges: list[dict[str, Any]] = []
for item in [part.strip() for part in ranges_raw.split("|") if part.strip()]:
match = re.match(r"^(.+?)\s+([0-9,\-\s无]+)$", item)
if not match:
ranges.append({"label": item, "range": "", "slide_numbers": []})
continue
range_text = normalize_whitespace(match.group(2))
range_text = "" if range_text == "无" else range_text
ranges.append(
{
"label": match.group(1).strip(),
"range": range_text,
"slide_numbers": parse_range_spec(range_text) if range_text else [],
}
)
current_entry["ranges"] = ranges
continue
if plain.startswith("适用:"):
current_entry["use_cases"] = plain[len("适用:") :].strip()
push_current()
return entries
def build_search_text(entry: dict[str, Any]) -> str:
values: list[str] = [
entry.get("template_id"),
entry.get("category"),
entry.get("category_label"),
entry.get("scene"),
entry.get("tone"),
entry.get("formality"),
entry.get("palette"),
entry.get("structure"),
entry.get("page_types"),
*(entry.get("layout_tags") or []),
entry.get("use_cases"),
*[f'{entry_range["label"]} {entry_range["range"]}' for entry_range in entry.get("ranges", [])],
]
return " ".join(str(value) for value in values if value).lower()
def build_index_data() -> dict[str, Any]:
catalog_entries = parse_catalog()
templates: list[dict[str, Any]] = []
for entry in catalog_entries:
template_path = TEMPLATES_DIR / entry["filename"]
xml_info = parse_template_xml(template_path)
layout_tags = sorted({tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])})
templates.append(
{
"template_id": entry["template_id"],
"category": entry["category"],
"category_label": entry["category_label"],
"scene": entry["scene"],
"tone": entry["tone"],
"formality": entry["formality"],
"is_general_template": entry["is_general_template"],
"slide_count": len(xml_info["slides"]),
"presentation_title": xml_info["title_text"],
"palette": entry["palette"],
"structure": entry["structure"],
"page_types": entry["page_types"],
"layout_tags": layout_tags,
"use_cases": entry["use_cases"],
"ranges": [{"label": entry_range["label"], "range": entry_range["range"]} for entry_range in entry["ranges"]],
}
)
return {
"schema_version": LIGHTWEIGHT_INDEX_SCHEMA_VERSION,
"generated_at": datetime.now(timezone.utc).isoformat(timespec="milliseconds").replace("+00:00", "Z"),
"template_count": len(templates),
"templates": templates,
}
def load_index(index_path: str | Path = DEFAULT_INDEX_PATH) -> dict[str, Any]:
index_path = Path(index_path)
if index_path.exists():
existing = json.loads(read_file(index_path))
first_template = existing.get("templates", [None])[0] if existing.get("templates") else None
if first_template and first_template.get("layout_tags") and "bbox_summary" not in first_template:
return existing
return build_index_data()
def catalog_filename(entry: dict[str, Any]) -> str:
return f'{entry["template_id"].split("--", 1)[-1]}.xml'
def build_external_template_entry(template_path: Path) -> dict[str, Any]:
xml_info = parse_template_xml(template_path)
return {
"template_id": template_path.stem,
"scene": None,
"tone": None,
"formality": None,
"slide_count": len(xml_info["slides"]),
"presentation_title": xml_info["title_text"],
"palette": None,
"structure": None,
"page_types": [],
"layout_tags": sorted(
{tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])}
),
"use_cases": None,
"theme_summary": xml_info["theme_summary"],
"ranges": [],
}
def find_template_entry(
index_data: dict[str, Any],
selector: str,
*,
fail_on_ambiguous: bool = True,
) -> dict[str, Any] | None:
normalized = re.sub(r"\.xml$", "", selector)
matches = [
entry
for entry in index_data["templates"]
if entry["template_id"] == normalized
or f'{entry["template_id"]}.xml' == selector
or catalog_filename(entry) == selector
or catalog_filename(entry) == f"{normalized}.xml"
]
if len(matches) > 1 and fail_on_ambiguous:
template_ids = ", ".join(entry["template_id"] for entry in matches)
fail(f"template selector is ambiguous: {selector}; use one of: {template_ids}")
if len(matches) > 1:
return None
return matches[0] if matches else None
def resolve_template_reference(index_data: dict[str, Any], template_selector: str) -> dict[str, Any]:
if not template_selector:
fail("template selector is required")
input_path = Path(template_selector)
as_path = input_path
if not input_path.is_absolute():
as_path = (Path.cwd() / as_path).resolve()
if as_path.exists():
entry = find_template_entry(
index_data,
as_path.name,
fail_on_ambiguous=False,
) or build_external_template_entry(as_path)
return {"entry": entry, "path": as_path}
if input_path.is_absolute() or input_path.parent != Path("."):
fail(f"template not found: {template_selector}")
selector_name = Path(template_selector).name
entry = find_template_entry(index_data, selector_name)
if entry:
return {"entry": entry, "path": get_template_path(entry)}
fail(f"template not found: {template_selector}")
def resolve_template_entry(index_data: dict[str, Any], template_selector: str) -> dict[str, Any]:
return resolve_template_reference(index_data, template_selector)["entry"]
def resolve_range_selection(entry: dict[str, Any], options: dict[str, Any]) -> dict[str, Any]:
if options.get("label"):
matched_range = next((item for item in entry["ranges"] if item["label"] == options["label"]), None)
if not matched_range:
fail(f'range label not found: {options["label"]}')
slide_numbers = parse_range_spec(matched_range["range"]) if matched_range["range"] else []
if not slide_numbers:
fail(f'range label has no slides: {options["label"]}')
return {"label": matched_range["label"], "range": matched_range["range"], "slide_numbers": slide_numbers}
if not options.get("range"):
fail("either --range or --label is required")
slide_numbers = parse_range_spec(options["range"])
return {"label": None, "range": compress_numbers(slide_numbers), "slide_numbers": slide_numbers}
def get_template_path(entry: dict[str, Any]) -> Path:
return TEMPLATES_DIR / f'{entry["template_id"]}.xml'
def summarize_selection(index_data: dict[str, Any], template_selector: str, options: dict[str, Any]) -> dict[str, Any]:
reference = resolve_template_reference(index_data, template_selector)
entry = reference["entry"]
selection = resolve_range_selection(entry, options)
xml_info = parse_template_xml(reference["path"])
slide_summaries = [
xml_info["slide_summaries"][slide_number - 1]
for slide_number in selection["slide_numbers"]
if 0 < slide_number <= len(xml_info["slide_summaries"])
]
return {
"template": {
"template_id": entry["template_id"],
"scene": entry["scene"],
"tone": entry["tone"],
"formality": entry["formality"],
"slide_count": len(xml_info["slides"]),
"presentation_title": xml_info["title_text"],
"palette": entry["palette"],
"structure": entry["structure"],
"page_types": entry["page_types"],
"layout_tags": sorted({tag for slide in xml_info["slide_summaries"] for tag in slide.get("layout_tags", [])}),
"use_cases": entry["use_cases"],
},
"selection": selection,
"theme_summary": xml_info["theme_summary"],
"summary": aggregate_slides(slide_summaries),
"slides": slide_summaries,
}
def extract_selection_xml(index_data: dict[str, Any], template_selector: str, options: dict[str, Any]) -> str:
reference = resolve_template_reference(index_data, template_selector)
entry = reference["entry"]
selection = resolve_range_selection(entry, options)
xml_info = parse_template_xml(reference["path"])
selected_slides: list[str] = []
for slide_number in selection["slide_numbers"]:
if slide_number - 1 >= len(xml_info["slides"]) or slide_number <= 0:
fail(f"slide {slide_number} is out of range for {entry['template_id']}")
selected_slides.append(xml_info["slides"][slide_number - 1])
chunks = [xml_info["opening_tag"]]
if xml_info["title_xml"]:
chunks.append(f' {xml_info["title_xml"]}')
if xml_info["theme_xml"]:
chunks.append(f' {xml_info["theme_xml"]}')
chunks.extend(selected_slides)
chunks.append("")
return "\n".join(chunks)
def search_templates(index_data: dict[str, Any], options: dict[str, Any]) -> list[dict[str, Any]]:
query = options.get("query", "") or ""
tokens = tokenize_query(query)
tone = options.get("tone")
formality = options.get("formality")
category = options.get("category")
layout_tag = options.get("layoutTag") or options.get("layout-tag")
limit = int(options.get("limit", 5))
ranked: list[dict[str, Any]] = []
for entry in index_data["templates"]:
if tone and entry["tone"] != tone:
continue
if formality and entry["formality"] != formality:
continue
if category and entry["category"] != category:
continue
if layout_tag and layout_tag not in (entry.get("layout_tags") or []):
continue
score = 0
if not query:
score = 1
else:
search_text = build_search_text(entry)
exact_id = entry["template_id"].lower() == query.lower()
if exact_id:
score += 100
for token in tokens:
if token in search_text:
score += len(token) * 10 if re.search(r"[\u3400-\u9fff]", token) else len(token) * 6
if entry.get("scene") and token in entry["scene"]:
score += 12
if entry.get("use_cases") and token in entry["use_cases"]:
score += 8
if entry.get("scene") and query in entry["scene"]:
score += 40
if entry.get("use_cases") and query in entry["use_cases"]:
score += 30
if score == 0:
continue
if entry.get("is_general_template"):
score -= 5
ranked.append(
{
"template_id": entry["template_id"],
"scene": entry["scene"],
"tone": entry["tone"],
"formality": entry["formality"],
"is_general_template": entry["is_general_template"],
"use_cases": entry["use_cases"],
"layout_tags": entry.get("layout_tags") or [],
"slide_count": entry["slide_count"],
"ranges": entry["ranges"],
"score": score,
}
)
ranked.sort(key=lambda item: (-item["score"], item["template_id"]))
return ranked[:limit]
def parse_cli_args(argv: list[str]) -> tuple[str | None, dict[str, Any]]:
if not argv:
return None, {}
command, *rest = argv
options: dict[str, Any] = {}
index = 0
while index < len(rest):
token = rest[index]
if not token.startswith("--"):
fail(f"unexpected argument: {token}")
key = token[2:]
next_token = rest[index + 1] if index + 1 < len(rest) else None
if next_token is None or next_token.startswith("--"):
options[key] = True
index += 1
continue
options[key] = next_token
index += 2
return command, options
def print_usage() -> None:
usage = [
"Usage:",
" python3 template_tool.py build-index [--out ]",
" python3 template_tool.py search --query [--tone light|dark|colorful] [--formality formal|casual|creative] [--layout-tag ] [--limit 3]",
" python3 template_tool.py summarize --template (--range 1-2,5 | --label 封面)",
" python3 template_tool.py extract --template (--range 1-2,5 | --label 封面) [--with-summary] [--out ]",
]
print("\n".join(usage), file=sys.stderr)
def write_json(value: Any) -> None:
print(json.dumps(value, ensure_ascii=False, indent=2))
def run_cli(argv: list[str] | None = None) -> None:
command, options = parse_cli_args(argv or sys.argv[1:])
if not command or command in {"--help", "help"}:
print_usage()
raise SystemExit(0)
if command == "build-index":
index_data = build_index_data()
output_path = Path(options["out"]).resolve() if options.get("out") else DEFAULT_INDEX_PATH
output_path.write_text(f'{json.dumps(index_data, ensure_ascii=False, indent=2)}\n', encoding="utf-8")
print(output_path)
return
if command == "search":
write_json(search_templates(load_index(), options))
return
if command == "summarize":
write_json(summarize_selection(load_index(), options.get("template"), options))
return
if command == "extract":
index_data = load_index()
xml = extract_selection_xml(index_data, options.get("template"), options)
if options.get("with-summary"):
summary = summarize_selection(index_data, options.get("template"), options)
write_json({"xml": xml, "selection": summary["selection"], "summary": summary["summary"], "slides": summary["slides"]})
return
if options.get("out"):
output_path = Path(options["out"]).resolve()
output_path.write_text(f"{xml}\n", encoding="utf-8")
print(output_path)
return
sys.stdout.write(f"{xml}\n")
return
print_usage()
fail(f"unknown command: {command}")
if __name__ == "__main__":
try:
run_cli()
except TemplateToolError as error:
print(f"template-tool error: {error}", file=sys.stderr)
raise SystemExit(1) from error