From 97bc889d9bc0e3551805cd433e4c036228eeb40f Mon Sep 17 00:00:00 2001 From: nblog <503407184@qq.com> Date: Mon, 9 Feb 2026 16:23:40 +0800 Subject: [PATCH] feat(skills): add nano-banana-pro-openrouter skill MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ✨ - Generated by Copilot --- skills/nano-banana-pro-openrouter/SKILL.md | 61 ++++++ .../assets/SYSTEM_TEMPLATE | 14 ++ .../scripts/generate_image.py | 187 ++++++++++++++++++ 3 files changed, 262 insertions(+) create mode 100644 skills/nano-banana-pro-openrouter/SKILL.md create mode 100644 skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE create mode 100644 skills/nano-banana-pro-openrouter/scripts/generate_image.py diff --git a/skills/nano-banana-pro-openrouter/SKILL.md b/skills/nano-banana-pro-openrouter/SKILL.md new file mode 100644 index 00000000..d10840dd --- /dev/null +++ b/skills/nano-banana-pro-openrouter/SKILL.md @@ -0,0 +1,61 @@ +--- +name: nano-banana-pro-openrouter +description: Generate or edit images via OpenRouter using openai-python with the Gemini 3 Pro Image model. Use for prompt-only image generation, image edits, and multi-image compositing; supports 1K/2K/4K output, saves results to the current working directory, and prints MEDIA lines. +metadata: + emoji: 🍌 + requires: + bins: + - uv + env: + - OPENROUTER_API_KEY + primaryEnv: OPENROUTER_API_KEY +--- + +# Nano Banana Pro OpenRouter + +## Overview + +Generate or edit images with OpenRouter using the `google/gemini-3-pro-image-preview` model and the openai-python client. Support prompt-only generation, single-image edits, and multi-image composition. Save results to the current working directory and output MEDIA lines for easy attachment. + +### Prompt-only generation + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "A cinematic sunset over snow-capped mountains" \ + --filename sunset.png +``` + +### Edit a single image + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "Replace the sky with a dramatic aurora" \ + --input-image input.jpg \ + --filename aurora.png +``` + +### Compose multiple images + +``` +uv run {baseDir}/scripts/generate_image.py \ + --prompt "Combine the subjects into a single studio portrait" \ + --input-image face1.jpg \ + --input-image face2.jpg \ + --filename composite.png +``` + +## Resolution + +- Use `--resolution` with `1K`, `2K`, or `4K`. +- Default is `1K` if not specified. + +## System prompt customization + +The skill reads an optional system prompt from `assets/SYSTEM_TEMPLATE`. This allows you to customize the image generation behavior without modifying code. + +## Behavior and constraints + +- Read the API key from `OPENROUTER_API_KEY` (no CLI flag). +- Accept up to 3 input images via repeated `--input-image`. +- Save output in the current working directory. If multiple images are returned, append `-1`, `-2`, etc. +- Print `MEDIA: ` for each saved image. Do not read images back into the response. \ No newline at end of file diff --git a/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE new file mode 100644 index 00000000..5efd023c --- /dev/null +++ b/skills/nano-banana-pro-openrouter/assets/SYSTEM_TEMPLATE @@ -0,0 +1,14 @@ +You are a visionary image‑creation artist with a poetic, dreamlike imagination. +Your role is to transform any user request—whether highly detailed or very minimal—into a vivid, concrete, and model‑ready image description. +When information is missing, infer the user's intent in a gentle and intuitive way (such as creating a character portrait, sticker design, sci‑fi avatar, creature concept, etc.). +If the user does not specify an art style, you may offer subtle optional suggestions (for example, "soft illustration," "minimal line style," or "playful entertainment‑meme style") without imposing them. + +Your responsibilities: +- Any text that appears in the image should match the user's language. +- Create visually compelling and technically excellent images +- Pay attention to composition, lighting, color, and visual balance +- Follow the user's specific style preferences and requirements +- For image edits, preserve the original context while making requested modifications +- For multi-image composition, seamlessly blend subjects into cohesive results + +Remember: Output only the generated image without additional commentary. diff --git a/skills/nano-banana-pro-openrouter/scripts/generate_image.py b/skills/nano-banana-pro-openrouter/scripts/generate_image.py new file mode 100644 index 00000000..61909c15 --- /dev/null +++ b/skills/nano-banana-pro-openrouter/scripts/generate_image.py @@ -0,0 +1,187 @@ +#!/usr/bin/env python3 +# /// script +# requires-python = ">=3.10" +# dependencies = [ +# "openai", +# "pillow", +# ] +# /// +""" +Generate or edit images via OpenRouter using openai-python. +""" + +import argparse +import base64 +import mimetypes +import os +from pathlib import Path + + +# Configuration +MAX_INPUT_IMAGES = 3 +MIME_TO_EXT = { + "image/png": ".png", + "image/jpeg": ".jpg", + "image/jpg": ".jpg", + "image/webp": ".webp", +} + + +def parse_args(): + parser = argparse.ArgumentParser(description="Generate or edit images via OpenRouter.") + parser.add_argument("--prompt", required=True, help="Prompt describing the desired image.") + parser.add_argument("--filename", required=True, help="Output filename (relative to CWD).") + parser.add_argument( + "--resolution", + default="1K", + help="Output resolution: 1K, 2K, or 4K.", + ) + parser.add_argument( + "--input-image", + action="append", + default=[], + help=f"Optional input image path (repeatable, max {MAX_INPUT_IMAGES}).", + ) + return parser.parse_args() + + +def require_api_key(): + api_key = os.environ.get("OPENROUTER_API_KEY") + if not api_key: + raise SystemExit("OPENROUTER_API_KEY is not set in the environment.") + return api_key + + +def encode_image_to_data_url(path: Path) -> str: + if not path.exists(): + raise SystemExit(f"Input image not found: {path}") + mime, _ = mimetypes.guess_type(path.name) + if not mime: + mime = "image/png" + data = path.read_bytes() + encoded = base64.b64encode(data).decode("utf-8") + return f"data:{mime};base64,{encoded}" + + +def build_message_content(prompt: str, input_images): + content = [{"type": "text", "text": prompt}] + for image_path in input_images: + data_url = encode_image_to_data_url(Path(image_path)) + content.append({"type": "image_url", "image_url": {"url": data_url}}) + return content + + +def parse_data_url(data_url: str): + if not data_url.startswith("data:") or ";base64," not in data_url: + raise ValueError("Image URL is not a base64 data URL.") + header, encoded = data_url.split(",", 1) + mime = header[5:].split(";", 1)[0] + raw = base64.b64decode(encoded) + return mime, raw + + +def resolve_output_paths(filename: str, image_count: int, mime: str): + output_path = Path(filename) + suffix = output_path.suffix + if not suffix: + suffix = MIME_TO_EXT.get(mime, ".png") + output_path = output_path.with_suffix(suffix) + + if output_path.parent and not output_path.parent.exists(): + raise SystemExit(f"Output directory does not exist: {output_path.parent}") + + if image_count == 1: + return [output_path] + + paths = [] + for index in range(image_count): + numbered = output_path.with_name(f"{output_path.stem}-{index + 1}{suffix}") + paths.append(numbered) + return paths + + +def extract_image_url(image): + if isinstance(image, dict): + return image.get("image_url", {}).get("url") or image.get("url") + return None + + +def load_system_prompt(): + """Load system prompt from assets/SYSTEM_TEMPLATE if it exists and is not empty.""" + script_dir = Path(__file__).parent.parent + template_path = script_dir / "assets" / "SYSTEM_TEMPLATE" + + if template_path.exists(): + content = template_path.read_text().strip() + if content: + return content + return None + + +def main(): + args = parse_args() + + if len(args.input_image) > MAX_INPUT_IMAGES: + raise SystemExit(f"Too many input images: {len(args.input_image)} (max {MAX_INPUT_IMAGES}).") + + image_size = args.resolution or "1K" + + from openai import OpenAI + client = OpenAI(base_url="https://openrouter.ai/api/v1", api_key=require_api_key()) + + # Build messages with optional system prompt + messages = [] + + system_prompt = load_system_prompt() + if system_prompt: + messages.append({ + "role": "system", + "content": system_prompt, + }) + + messages.append({ + "role": "user", + "content": build_message_content(args.prompt, args.input_image), + }) + + response = client.chat.completions.create( + model="google/gemini-3-pro-image-preview", + messages=messages, + extra_body={ + "modalities": ["image", "text"], + # https://openrouter.ai/docs/guides/overview/multimodal/image-generation#image-configuration-options + "image_config": { + # "aspect_ratio": "16:9", + "image_size": image_size, + } + }, + ) + + message = response.choices[0].message + images = getattr(message, "images", None) + if not images: + raise SystemExit("No images returned by the API.") + + first_url = extract_image_url(images[0]) + if not first_url: + raise SystemExit("Image payload missing image_url.url.") + first_mime, _ = parse_data_url(first_url) + output_paths = resolve_output_paths(args.filename, len(images), first_mime) + + saved_paths = [] + for idx, image in enumerate(images): + image_url = extract_image_url(image) + if not image_url: + raise SystemExit("Image payload missing image_url.url.") + _, raw = parse_data_url(image_url) + output_path = output_paths[idx] + output_path.write_bytes(raw) + saved_paths.append(output_path.resolve()) + + for path in saved_paths: + print(f"Saved image to: {path}") + print(f"MEDIA: {path}") + + +if __name__ == "__main__": + main() \ No newline at end of file