fix(skill): update descriptions and improve error handling in generate_image script

This commit is contained in:
nblog
2026-02-09 16:48:57 +08:00
parent 97bc889d9b
commit ef4aa0b2bc
2 changed files with 20 additions and 16 deletions

View File

@@ -1,6 +1,6 @@
--- ---
name: nano-banana-pro-openrouter name: nano-banana-pro-openrouter
description: Generate or edit images via OpenRouter using openai-python with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines. description: 'Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines.'
metadata: metadata:
emoji: 🍌 emoji: 🍌
requires: requires:
@@ -11,11 +11,12 @@ metadata:
primaryEnv: OPENROUTER_API_KEY primaryEnv: OPENROUTER_API_KEY
--- ---
# Nano Banana Pro OpenRouter # Nano Banana Pro OpenRouter
## Overview ## Overview
Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model and the openai-python client. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment.
### Prompt-only generation ### Prompt-only generation
@@ -58,4 +59,4 @@ The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This al
- Read the API key from `OPENROUTER_API_KEY` (no CLI flag). - Read the API key from `OPENROUTER_API_KEY` (no CLI flag).
- Accept up to 3 input images via repeated `--input-image`. - Accept up to 3 input images via repeated `--input-image`.
- Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc. - Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc.
- Print `MEDIA: <path>` for each saved image. Do not read images back into the response. - Print `MEDIA: <path>` for each saved image. Do not read images back into the response.

View File

@@ -3,7 +3,6 @@
# requires-python = ">=3.10" # requires-python = ">=3.10"
# dependencies = [ # dependencies = [
# "openai", # "openai",
# "pillow",
# ] # ]
# /// # ///
""" """
@@ -32,9 +31,11 @@ def parse_args():
parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.") parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.")
parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).") parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).")
parser.add_argument( parser.add_argument(
"--resolution", "--resolution",
default="1K", type=str.upper,
help="Output resolution: 1K, 2K, or 4K.", choices=["1K", "2K", "4K"],
default="1K",
help="Output resolution: 1K, 2K, or 4K.",
) )
parser.add_argument( parser.add_argument(
"--input-image", "--input-image",
@@ -70,14 +71,16 @@ def build_message_content(prompt: str, input_images):
content.append({"type": "image_url", "image_url": {"url": data_url}}) content.append({"type": "image_url", "image_url": {"url": data_url}})
return content return content
def parse_data_url(data_url: str):
def parse_data_url(data_url: str): if not data_url.startswith("data:") or ";base64," not in data_url:
if not data_url.startswith("data:") or ";base64," not in data_url:
raise ValueError("Image URL is not a base64 data URL.") raise ValueError("Image URL is not a base64 data URL.")
header, encoded = data_url.split(",", 1) header, encoded = data_url.split(",", 1)
mime = header[5:].split(";", 1)[0] mime = header[5:].split(";", 1)[0]
raw = base64.b64decode(encoded) try:
return mime, raw raw = base64.b64decode(encoded)
except Exception as e:
raise SystemExit(f"Failed to decode base64 image payload: {e}")
return mime, raw
def resolve_output_paths(filename: str, image_count: int, mime: str): def resolve_output_paths(filename: str, image_count: int, mime: str):
@@ -110,7 +113,7 @@ def load_system_prompt():
"""Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty.""" """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty."""
script_dir = Path(__file__).parent.parent script_dir = Path(__file__).parent.parent
template_path = script_dir / "assets" / "SYSTEM_TEMPLATE" template_path = script_dir / "assets" / "SYSTEM_TEMPLATE"
if template_path.exists(): if template_path.exists():
content = template_path.read_text().strip() content = template_path.read_text().strip()
if content: if content:
@@ -184,4 +187,4 @@ def main():
if __name__ == "__main__": if __name__ == "__main__":
main() main()