Merge branch 'main' into main

2026-07-15 18:35:19 +00:00 · 2026-02-10 14:56:22 +08:00
parent 6b1ac50007 9bb89281e2
commit e494fa2e3f
4 changed files with 280 additions and 0 deletions
@@ -0,0 +1,74 @@
+---
+name: nano-banana-pro-openrouter
+description: 'Generate or edit images via OpenRouter with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output.'
+metadata:
+  emoji: 🍌
+  requires:
+    bins:
+      - uv
+    env:
+      - OPENROUTER_API_KEY
+  primaryEnv: OPENROUTER_API_KEY
+---
+
+
+# Nano Banana Pro OpenRouter
+
+## Overview
+
+Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model. Support prompt-only generation, single-image edits, and multi-image composition.
+
+### Prompt-only generation
+
+```
+uv run {baseDir}/scripts/generate_image.py \
+  --prompt "A cinematic sunset over snow-capped mountains" \
+  --filename sunset.png
+```
+
+### Edit a single image
+
+```
+uv run {baseDir}/scripts/generate_image.py \
+  --prompt "Replace the sky with a dramatic aurora" \
+  --input-image input.jpg \
+  --filename aurora.png
+```
+
+### Compose multiple images
+
+```
+uv run {baseDir}/scripts/generate_image.py \
+  --prompt "Combine the subjects into a single studio portrait" \
+  --input-image face1.jpg \
+  --input-image face2.jpg \
+  --filename composite.png
+```
+
+## Resolution
+
+- Use `--resolution` with `1K`, `2K`, or `4K`.
+- Default is `1K` if not specified.
+
+## System prompt customization
+
+The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This allows you to customize the image generation behavior without modifying code.
+
+## Behavior and constraints
+
+- Accept up to 3 input images via repeated `--input-image`.
+- `--filename` accepts relative paths (saves to current directory) or absolute paths.
+- If multiple images are returned, append `-1`, `-2`, etc. to the filename.
+- Print `MEDIA: <path>` for each saved image. Do not read images back into the response.
+
+## Troubleshooting
+
+If the script exits non-zero, check stderr against these common blockers:
+
+| Symptom | Resolution |
+|---------|------------|
+| `OPENROUTER_API_KEY is not set` | Ask the user to set it. PowerShell: `$env:OPENROUTER_API_KEY = "sk-or-..."` / bash: `export OPENROUTER_API_KEY="sk-or-..."` |
+| `uv: command not found` or not recognized | macOS/Linux: <code>curl -LsSf https://astral.sh/uv/install.sh &#124; sh</code>. Windows: <code>powershell -ExecutionPolicy ByPass -c "irm https://astral.sh/uv/install.ps1 &#124; iex"</code>. Then restart the terminal. |
+| `AuthenticationError` / HTTP 401 | Key is invalid or has no credits. Verify at <https://openrouter.ai/settings/keys>. |
+
+For transient errors (HTTP 429, network timeouts), retry once after 30 seconds. Do not retry the same error more than twice — surface the issue to the user instead.
@@ -0,0 +1,14 @@
+You are a visionary image‑creation artist with a poetic, dreamlike imagination.
+Your role is to transform any user request—whether highly detailed or very minimal—into a vivid, concrete, and model‑ready image description.
+When information is missing, infer the user's intent in a gentle and intuitive way (such as creating a character portrait, sticker design, sci‑fi avatar, creature concept, etc.).
+If the user does not specify an art style, you may offer subtle optional suggestions (for example, "soft illustration," "minimal line style," or "playful entertainment‑meme style") without imposing them.
+
+Your responsibilities:
+- Ensure any text appearing in the image matches the user's language (unless explicitly specified otherwise)
+- Create visually compelling and technically excellent images
+- Pay attention to composition, lighting, color, and visual balance
+- Follow the user's specific style preferences and requirements
+- For image edits, preserve the original context while making requested modifications
+- For multi-image composition, seamlessly blend subjects into cohesive results
+
+Remember: Output only the generated image without additional commentary.
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+# /// script
+# requires-python = ">=3.10"
+# dependencies = [
+#     "openai",
+# ]
+# ///
+"""
+Generate or edit images via OpenRouter using openai-python.
+"""
+
+import argparse
+import base64
+import mimetypes
+import os
+from pathlib import Path
+
+from openai import OpenAI
+
+
+# Configuration
+MAX_INPUT_IMAGES = 3
+MIME_TO_EXT = {
+    "image/png": ".png",
+    "image/jpeg": ".jpg",
+    "image/jpg": ".jpg",
+    "image/webp": ".webp",
+}
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description="Generate or edit images via OpenRouter.")
+    parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.")
+    parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).")
+    parser.add_argument(
+      "--resolution",
+      type=str.upper,
+      choices=["1K", "2K", "4K"],
+      default="1K",
+      help="Output resolution: 1K, 2K, or 4K.",
+    )
+    parser.add_argument(
+      "--input-image",
+      action="append",
+      default=[],
+      help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).",
+    )
+    return parser.parse_args()
+
+
+def require_api_key():
+    api_key = os.environ.get("OPENROUTER_API_KEY")
+    if not api_key:
+        raise SystemExit("OPENROUTER_API_KEY is not set in the environment.")
+    return api_key
+
+
+def encode_image_to_data_url(path: Path) -> str:
+    if not path.exists():
+        raise SystemExit(f"Input image not found: {path}")
+    mime, _ = mimetypes.guess_type(str(path))
+    if not mime:
+        mime = "image/png"
+    data = path.read_bytes()
+    encoded = base64.b64encode(data).decode("utf-8")
+    return f"data:{mime};base64,{encoded}"
+
+
+def build_message_content(prompt: str, input_images: list[str]) -> list[dict]:
+    content: list[dict] = [{"type": "text", "text": prompt}]
+    for image_path in input_images:
+        data_url = encode_image_to_data_url(Path(image_path))
+        content.append({"type": "image_url", "image_url": {"url": data_url}})
+    return content
+
+
+def parse_data_url(data_url: str) -> tuple[str, bytes]:
+    if not data_url.startswith("data:") or ";base64," not in data_url:
+        raise SystemExit("Image URL is not a base64 data URL.")
+    header, encoded = data_url.split(",", 1)
+    mime = header[5:].split(";", 1)[0]
+    try:
+        raw = base64.b64decode(encoded)
+    except Exception as e:
+        raise SystemExit(f"Failed to decode base64 image payload: {e}")
+    return mime, raw
+
+
+def resolve_output_path(filename: str, image_index: int, total_count: int, mime: str) -> Path:
+    output_path = Path(filename)
+    suffix = output_path.suffix
+
+    # Validate/correct suffix matches MIME type
+    expected_suffix = MIME_TO_EXT.get(mime, ".png")
+    if suffix and suffix.lower() != expected_suffix.lower():
+        print(f"Warning: filename extension '{suffix}' doesn't match returned MIME type '{mime}'. Using '{expected_suffix}' instead.")
+        suffix = expected_suffix
+    elif not suffix:
+        suffix = expected_suffix
+
+    # Single image: use original stem + corrected suffix
+    if total_count <= 1:
+        return output_path.with_suffix(suffix)
+
+    # Multiple images: append numbering
+    return output_path.with_name(f"{output_path.stem}-{image_index + 1}{suffix}")
+
+
+def extract_image_url(image: dict | object) -> str | None:
+    if isinstance(image, dict):
+        return image.get("image_url", {}).get("url") or image.get("url")
+    return None
+
+
+def load_system_prompt():
+    """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty."""
+    script_dir = Path(__file__).parent.parent
+    template_path = script_dir / "assets" / "SYSTEM_TEMPLATE"
+
+    if template_path.exists():
+        content = template_path.read_text(encoding="utf-8").strip()
+        if content:
+            return content
+    return None
+
+
+def main():
+    args = parse_args()
+
+    if len(args.input_image) > MAX_INPUT_IMAGES:
+        raise SystemExit(f"Too many input images: {len(args.input_image)} (max {MAX_INPUT_IMAGES}).")
+
+    image_size = args.resolution
+
+    client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=require_api_key())
+
+    # Build messages with optional system prompt
+    messages = []
+
+    system_prompt = load_system_prompt()
+    if system_prompt:
+        messages.append({
+            "role": "system",
+            "content": system_prompt,
+        })
+
+    messages.append({
+        "role": "user",
+        "content": build_message_content(args.prompt, args.input_image),
+    })
+
+    response = client.chat.completions.create(
+        model="google/gemini-3-pro-image-preview",
+        messages=messages,
+        extra_body={
+            "modalities": ["image", "text"],
+            # https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-configuration-options
+            "image_config": {
+                # "aspect_ratio": "16:9",
+                "image_size": image_size,
+            }
+        },
+    )
+
+    message = response.choices[0].message
+    images = getattr(message, "images", None)
+    if not images:
+        raise SystemExit("No images returned by the API.")
+
+    # Create output directory once before processing images
+    output_base_path = Path(args.filename)
+    if output_base_path.parent and str(output_base_path.parent) != '.':
+        output_base_path.parent.mkdir(parents=True, exist_ok=True)
+
+    saved_paths = []
+    for idx, image in enumerate(images):
+        image_url = extract_image_url(image)
+        if not image_url:
+            raise SystemExit("Image payload missing image_url.url.")
+        mime, raw = parse_data_url(image_url)
+        output_path = resolve_output_path(args.filename, idx, len(images), mime)
+        output_path.write_bytes(raw)
+        saved_paths.append(output_path.resolve())
+
+    for path in saved_paths:
+        print(f"Saved image to: {path}")
+        print(f"MEDIA: {path}")
+
+
+if __name__ == "__main__":
+    main()