feat(skills): add nano-banana-pro-openrouter skill

✨ - Generated by Copilot
2026-07-15 02:21:04 +00:00 · 2026-02-09 16:23:40 +08:00
parent d99ba71986
commit 97bc889d9b
3 changed files with 262 additions and 0 deletions
@@ -0,0 +1,61 @@
 ---
 name: nano-banana-pro-openrouter
 description: Generate or edit images via OpenRouter using openai-python with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines.
 metadata:
  emoji: 🍌
  requires:
    bins:
      - uv
    env:
      - OPENROUTER_API_KEY
  primaryEnv: OPENROUTER_API_KEY
 ---
 # Nano Banana Pro OpenRouter
 ## Overview
 Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model and the openai-python client. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment.
 ### Prompt-only generation
 ```
 uv run {baseDir}/scripts/generate_image.py \
  --prompt "A cinematic sunset over snow-capped mountains" \
  --filename sunset.png
 ```
 ### Edit a single image
 ```
 uv run {baseDir}/scripts/generate_image.py \
  --prompt "Replace the sky with a dramatic aurora" \
  --input-image input.jpg \
  --filename aurora.png
 ```
 ### Compose multiple images
 ```
 uv run {baseDir}/scripts/generate_image.py \
  --prompt "Combine the subjects into a single studio portrait" \
  --input-image face1.jpg \
  --input-image face2.jpg \
  --filename composite.png
 ```
 ## Resolution
 - Use `--resolution` with `1K`, `2K`, or `4K`.
 - Default is `1K` if not specified.
 ## System prompt customization
 The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This allows you to customize the image generation behavior without modifying code.
 ## Behavior and constraints
 - Read the API key from `OPENROUTER_API_KEY` (no CLI flag).
 - Accept up to 3 input images via repeated `--input-image`.
 - Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc.
 - Print `MEDIA: <path>` for each saved image. Do not read images back into the response.
@@ -0,0 +1,14 @@
 You are a visionary image‑creation artist with a poetic, dreamlike imagination.
 Your role is to transform any user request—whether highly detailed or very minimal—into a vivid, concrete, and model‑ready image description.
 When information is missing, infer the user's intent in a gentle and intuitive way (such as creating a character portrait, sticker design, sci‑fi avatar, creature concept, etc.).
 If the user does not specify an art style, you may offer subtle optional suggestions (for example, "soft illustration," "minimal line style," or "playful entertainment‑meme style") without imposing them.
 Your responsibilities:
 - Any text that appears in the image should match the user's language.
 - Create visually compelling and technically excellent images
 - Pay attention to composition, lighting, color, and visual balance
 - Follow the user's specific style preferences and requirements
 - For image edits, preserve the original context while making requested modifications
 - For multi-image composition, seamlessly blend subjects into cohesive results
 Remember: Output only the generated image without additional commentary.
@@ -0,0 +1,187 @@
 #!/usr/bin/env python3
 # /// script
 # requires-python = ">=3.10"
 # dependencies = [
 #     "openai",
 #     "pillow",
 # ]
 # ///
 """
 Generate or edit images via OpenRouter using openai-python.
 """
 import argparse
 import base64
 import mimetypes
 import os
 from pathlib import Path
 # Configuration
 MAX_INPUT_IMAGES = 3
 MIME_TO_EXT = {
    "image/png": ".png",
    "image/jpeg": ".jpg",
    "image/jpg": ".jpg",
    "image/webp": ".webp",
 }
 def parse_args():
    parser = argparse.ArgumentParser(description="Generate or edit images via OpenRouter.")
    parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.")
    parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).")
    parser.add_argument(
        "--resolution",
        default="1K",
        help="Output resolution: 1K, 2K, or 4K.",
    )
    parser.add_argument(
        "--input-image",
        action="append",
        default=[],
        help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).",
    )
    return parser.parse_args()
 def require_api_key():
    api_key = os.environ.get("OPENROUTER_API_KEY")
    if not api_key:
        raise SystemExit("OPENROUTER_API_KEY is not set in the environment.")
    return api_key
 def encode_image_to_data_url(path: Path) -> str:
    if not path.exists():
        raise SystemExit(f"Input image not found: {path}")
    mime, _ = mimetypes.guess_type(path.name)
    if not mime:
        mime = "image/png"
    data = path.read_bytes()
    encoded = base64.b64encode(data).decode("utf-8")
    return f"data:{mime};base64,{encoded}"
 def build_message_content(prompt: str, input_images):
    content = [{"type": "text", "text": prompt}]
    for image_path in input_images:
        data_url = encode_image_to_data_url(Path(image_path))
        content.append({"type": "image_url", "image_url": {"url": data_url}})
    return content
 def parse_data_url(data_url: str):
    if not data_url.startswith("data:") or ";base64," not in data_url:
        raise ValueError("Image URL is not a base64 data URL.")
    header, encoded = data_url.split(",", 1)
    mime = header[5:].split(";", 1)[0]
    raw = base64.b64decode(encoded)
    return mime, raw
 def resolve_output_paths(filename: str, image_count: int, mime: str):
    output_path = Path(filename)
    suffix = output_path.suffix
    if not suffix:
        suffix = MIME_TO_EXT.get(mime, ".png")
        output_path = output_path.with_suffix(suffix)
    if output_path.parent and not output_path.parent.exists():
        raise SystemExit(f"Output directory does not exist: {output_path.parent}")
    if image_count == 1:
        return [output_path]
    paths = []
    for index in range(image_count):
        numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}")
        paths.append(numbered)
    return paths
 def extract_image_url(image):
    if isinstance(image, dict):
        return image.get("image_url", {}).get("url") or image.get("url")
    return None
 def load_system_prompt():
    """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty."""
    script_dir = Path(__file__).parent.parent
    template_path = script_dir / "assets" / "SYSTEM_TEMPLATE"
    if template_path.exists():
        content = template_path.read_text().strip()
        if content:
            return content
    return None
 def main():
    args = parse_args()
    if len(args.input_image) > MAX_INPUT_IMAGES:
        raise SystemExit(f"Too many input images: {len(args.input_image)} (max {MAX_INPUT_IMAGES}).")
    image_size = args.resolution or "1K"
    from openai import OpenAI
    client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=require_api_key())
    # Build messages with optional system prompt
    messages = []
    system_prompt = load_system_prompt()
    if system_prompt:
        messages.append({
            "role": "system",
            "content": system_prompt,
        })
    messages.append({
        "role": "user",
        "content": build_message_content(args.prompt, args.input_image),
    })
    response = client.chat.completions.create(
        model="google/gemini-3-pro-image-preview",
        messages=messages,
        extra_body={
            "modalities": ["image", "text"],
            # https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-configuration-options
            "image_config": {
                # "aspect_ratio": "16:9",
                "image_size": image_size,
            }
        },
    )
    message = response.choices[0].message
    images = getattr(message, "images", None)
    if not images:
        raise SystemExit("No images returned by the API.")
    first_url = extract_image_url(images[0])
    if not first_url:
        raise SystemExit("Image payload missing image_url.url.")
    first_mime, _ = parse_data_url(first_url)
    output_paths = resolve_output_paths(args.filename, len(images), first_mime)
    saved_paths = []
    for idx, image in enumerate(images):
        image_url = extract_image_url(image)
        if not image_url:
            raise SystemExit("Image payload missing image_url.url.")
        _, raw = parse_data_url(image_url)
        output_path = output_paths[idx]
        output_path.write_bytes(raw)
        saved_paths.append(output_path.resolve())
    for path in saved_paths:
        print(f"Saved image to: {path}")
        print(f"MEDIA: {path}")
 if __name__ == "__main__":
    main()