feat : add optimize-simplicite-log skill (#1742)

2026-07-14 01:51:02 +00:00 · 2026-05-18 03:26:40 +02:00
parent b669203572
commit 632052fc1b
4 changed files with 348 additions and 0 deletions
@@ -0,0 +1,74 @@
+---
+name: optimize-simplicite-logs
+description: capability to parse Simplicité logs from a raw `.txt` file, filter fields to reduce noise, and output the result as structured JSON.
+---
+
+# Optimize Simplicite Logs
+
+This skill provides the capability to parse Simplicité logs from a raw `.txt` file, filter fields to reduce noise, and output the result as structured JSON. This is critical for optimizing AI context size (saving ~56% of tokens) and providing structured, predictable data for troubleshooting.
+
+## When to Use This Skill
+
+Use this skill when you need to:
+- Analyze user-provided Simplicité log files in `.txt` format.
+- Avoid ingesting massive raw log files into your context window.
+- Extract structured fields (like `timestamp`, `level`, `body`) from verbose multi-line log output.
+
+**IMPORTANT:** Instead of directly reading a raw `.txt` log file provided by the user using file read tools, you **must** use one of the log converter scripts (PowerShell or Python) to parse the file into a JSON format first, optionally extracting only the fields needed.
+
+## Prerequisites
+
+- Access to either the PowerShell script (`/scripts/SimpliciteLog2Json.ps1`) or the Python script (`/scripts/simplicite-log2json.py`).
+
+## Core Capabilities
+
+### 1. Context Optimization
+Reduces the tokens consumed by large Simplicité logs by extracting only relevant log fields (e.g. `body`, `timestamp`, `level`) and discarding non-relevant structural log data (like `app`, `endpoint`, `contextPath`).
+
+### 2. Multi-line Support
+Properly captures stack traces and multiline errors inside the `body` field of the JSON structure, which a simple text search might miss.
+
+### 3. Stdout Support
+If no output path is provided for the JSON file (e.g. omitting `--output` or `-Output`), the parsed JSON will be printed directly to stdout, allowing you to pipe the output to other tools.
+
+## Output Summary
+
+After processing, the tool prints a summary to stderr (or console):
+```
+Processed: 123 entries, Skipped: 2 entries
+```
+
+## Usage Examples
+
+### Example 1: Python Version (Recommended)
+Convert a log file to JSON, keeping only the most important fields:
+```sh
+python /absolute/path/to/skills/optimize-simplicite-logs/scripts/simplicite-log2json.py <input.txt> --include timestamp,level,body --output <output.json>
+```
+
+### Example 2: PowerShell Version
+```powershell
+/python /absolute/path/to/skills/optimize-simplicite-logs/scripts/SimpliciteLog2Json.ps1 -InputPath "<input.txt>" -Output "<output.json>" -Include "body,timestamp,level"
+```
+
+After generating the `<output.json>`, you can safely read the resulting file to perform your analysis.
+
+## Guidelines
+
+1. **Always Convert First:** Never directly read `.txt` log files from Simplicité using standard text reading tools. Always convert them to JSON using the available scripts.
+2. **Filter Fields:** Use `--include` (Python) or `-Include` (PowerShell) to restrict fields to what is absolutely necessary to diagnose the issue (usually `timestamp,level,body`).
+3. **Available Fields:** The fields you can filter include: `timestamp`, `app`, `level`, `endpoint`, `contextPath`, `event`, `user`, `class`, `function`, `rowId`, `body`.
+
+## Common Patterns
+
+### Pattern: Fast Contextual Troubleshooting
+```sh
+# 1. Run the script to generate a minified JSON output in the current directory
+python /absolute/path/to/skills/optimize-simplicite-logs/scripts/simplicite-log2json.py logs.txt --include timestamp,level,body --output logs_minified.json
+
+# 2. Then read logs_minified.json to understand the context.
+```
+
+## Limitations
+
+- The parser depends on a fixed regex pattern that matches the standard Simplicité log output. If the log format has been heavily customized, parsing might fail or degrade.
@@ -0,0 +1,153 @@
+param (
+    [Parameter(Mandatory=$true)]
+    [string]$InputPath,
+
+    [string]$Output,
+
+    [string]$Include,
+
+    [string]$Exclude
+)
+
+# Valid fields
+$ValidFields = @("timestamp", "app", "level", "endpoint", "contextPath", "event", "user", "class", "function", "rowId", "body")
+
+# Function to check if a field is valid
+function Test-ValidField {
+    param([string]$Field)
+    return $ValidFields -contains $Field
+}
+
+# Verify that -Include and -Exclude are not both used
+if ($Include -and $Exclude) {
+    Write-Error "Error: -Include and -Exclude cannot be used together."
+    exit 1
+}
+
+# Initialize field variables
+$IncludeFields = $null
+$ExcludeFields = $null
+
+# Validate fields provided in Include/Exclude
+if ($Include) {
+    $IncludeFields = $Include -split "," | ForEach-Object { $_.Trim() }
+    foreach ($field in $IncludeFields) {
+        if (-not (Test-ValidField $field)) {
+            Write-Error "Error: Invalid field '$field'. Valid fields: $($ValidFields -join ', ')"
+            exit 1
+        }
+    }
+}
+
+if ($Exclude) {
+    $ExcludeFields = $Exclude -split "," | ForEach-Object { $_.Trim() }
+    foreach ($field in $ExcludeFields) {
+        if (-not (Test-ValidField $field)) {
+            Write-Error "Error: Invalid field '$field'. Valid fields: $($ValidFields -join ', ')"
+            exit 1
+        }
+    }
+}
+
+# Check that the input file exists
+if (-not (Test-Path $InputPath)) {
+    Write-Error "Error: File $InputPath does not exist."
+    exit 1
+}
+# Read the file and normalize line endings
+# Group raw lines into log entries where a new entry starts with a timestamp.
+$raw = Get-Content -Path $InputPath -Raw
+$raw = $raw -replace "`r`n","`n" -replace "`r","`n"
+$lines = $raw -split "`n"
+
+$entryTexts = @()
+$buffer = ""
+$skippedLines = 0
+
+foreach ($line in $lines) {
+    if ($line -eq $null) { continue }
+    if ($line.Trim().Length -eq 0) { continue }
+
+    if ($line -match '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') {
+        if ($buffer -ne "") { $entryTexts += $buffer }
+        $buffer = $line
+    } else {
+        # Continuation line: attach to current buffer if present, otherwise skip
+        if ($buffer -eq "") {
+            $skippedLines++
+            continue
+        } else {
+            $buffer += "`n" + $line
+        }
+    }
+}
+
+if ($buffer -ne "") { $entryTexts += $buffer }
+
+$entries = @()
+$processed = 0
+$skippedMalformed = 0
+
+foreach ($entryText in $entryTexts) {
+    $parts = $entryText -split '\|'
+    if ($parts.Count -ge 12) {
+        # Trim only the first 11 fields; preserve the body (may contain pipes/newlines)
+        for ($i=0; $i -le 10; $i++) { $parts[$i] = $parts[$i].Trim() }
+
+        $body = ($parts[11..($parts.Count - 1)] -join '|')
+
+        $entry = @{
+            timestamp   = $parts[0]
+            app         = $parts[1]
+            level       = $parts[2]
+            endpoint    = $parts[4]
+            contextPath = $parts[5]
+            event       = $parts[6]
+            user        = $parts[7]
+            class       = $parts[8]
+            function    = $parts[9]
+            rowId       = $parts[10]
+            body        = $body
+        }
+
+        # Apply include/exclude filters
+        if ($IncludeFields -and $IncludeFields.Count -gt 0) {
+            $filteredEntry = @{}
+            foreach ($field in $IncludeFields) {
+                if ($entry.ContainsKey($field)) {
+                    $filteredEntry[$field] = $entry[$field]
+                } else {
+                    $filteredEntry[$field] = $null
+                }
+            }
+            $entry = $filteredEntry
+        }
+        elseif ($ExcludeFields -and $ExcludeFields.Count -gt 0) {
+            foreach ($field in $ExcludeFields) {
+                if ($entry.ContainsKey($field)) {
+                    $entry.PSObject.Properties.Remove($field)
+                }
+            }
+        }
+
+        $entries += $entry
+        $processed++
+    } else {
+        $skippedMalformed++
+    }
+}
+
+$skipped = $skippedLines + $skippedMalformed
+
+# Convert to JSON (compact)
+$json = $entries | ConvertTo-Json -Depth 10 -Compress
+
+# Write output
+if ($Output) {
+    Set-Content -Path $Output -Value $json -Encoding UTF8
+    Write-Host "Output written to $Output"
+} else {
+    $json
+}
+
+Write-Host "Processed: $processed entries, Skipped: $skipped entries"
@@ -0,0 +1,120 @@
+import argparse
+import re
+import json
+import sys
+
+VALID_FIELDS = [
+    "timestamp", "app", "level", "endpoint", "contextPath", "event", 
+    "user", "class", "function", "rowId", "body"
+]
+
+def validate_fields(value):
+    fields = [f.strip() for f in value.split(",")]
+    for f in fields:
+        if f not in VALID_FIELDS:
+            raise argparse.ArgumentTypeError(f"invalid field: {f}. Available: {', '.join(VALID_FIELDS)}")
+    return fields
+
+def parse_args():
+    parser = argparse.ArgumentParser(
+        prog="simplicite-log2json",
+        description="Parse Simplicité logs and output JSON."
+    )
+    parser.add_argument("input", help="Input .txt log file path")
+    parser.add_argument("-o", "--output", help="Output file path (default: stdout)", metavar="FILE")
+    
+    group = parser.add_mutually_exclusive_group()
+    group.add_argument("--include", help=f"Fields to include (comma-separated). Available: {', '.join(VALID_FIELDS)}", type=validate_fields, metavar="FIELDS", action="append")
+    group.add_argument("--exclude", help=f"Fields to exclude (comma-separated). Available: {', '.join(VALID_FIELDS)}", type=validate_fields, metavar="FIELDS", action="append")
+    
+    return parser.parse_args()
+
+def parse_log_entry(text, log_regex):
+    match = log_regex.match(text)
+    if match:
+        return {
+            "timestamp": match.group("timestamp") or "",
+            "app": match.group("app") or "",
+            "level": match.group("level") or "",
+            "endpoint": match.group("endpoint") or "",
+            "contextPath": match.group("contextPath") or "",
+            "event": match.group("event") or "",
+            "user": match.group("user") or "",
+            "class": match.group("class") or "",
+            "function": match.group("function") or "",
+            "rowId": match.group("rowId") or "",
+            "body": match.group("body") or "",
+        }
+    return None
+
+def filter_entry(entry, include, exclude):
+    filtered = {}
+    for k, v in entry.items():
+        if include is not None and k not in include:
+            continue
+        if exclude is not None and k in exclude:
+            continue
+        filtered[k] = v
+    return filtered
+
+def main():
+    args = parse_args()
+    
+    include_fields = [item for sublist in args.include for item in sublist] if args.include else None
+    exclude_fields = [item for sublist in args.exclude for item in sublist] if args.exclude else None
+
+    log_regex = re.compile(r"^(?P<timestamp>.*?)\|(?P<app>SIMPLICITE)\|(?P<level>.+?)\|\|(?P<endpoint>.*?)\|(?P<contextPath>.*?)\|(?P<event>.*?)\|(?P<user>.*?)\|(?P<class>.*?)\|(?P<function>.*?)\|(?P<rowId>.*?)\|(?P<body>.*)$", re.DOTALL)
+    timestamp_re = re.compile(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}")
+    
+    entries = []
+    buffer = []
+    processed = 0
+    skipped = 0
+    
+    try:
+        with open(args.input, "r", encoding="utf-8") as f:
+            for line in f:
+                line_stripped = line.rstrip('\n')
+                
+                if timestamp_re.match(line_stripped):
+                    if buffer:
+                        entry_text = '\n'.join(buffer)
+                        entry = parse_log_entry(entry_text, log_regex)
+                        if entry:
+                            entries.append(entry)
+                            processed += 1
+                        else:
+                            skipped += 1
+                        buffer = []
+                
+                buffer.append(line_stripped)
+                
+            if buffer:
+                entry_text = '\n'.join(buffer)
+                entry = parse_log_entry(entry_text, log_regex)
+                if entry:
+                    entries.append(entry)
+                    processed += 1
+                else:
+                    skipped += 1
+    except Exception as e:
+        sys.stderr.write(f"Failed to open input file: {e}\n")
+        sys.exit(1)
+        
+    filtered = [filter_entry(entry, include_fields, exclude_fields) for entry in entries]
+    json_str = json.dumps(filtered, indent=2)
+    
+    if args.output:
+        try:
+            with open(args.output, "w", encoding="utf-8") as f:
+                f.write(json_str)
+        except Exception as e:
+            sys.stderr.write(f"Failed to create output file: {e}\n")
+            sys.exit(1)
+    else:
+        print(json_str)
+        
+    sys.stderr.write(f"Processed: {processed} entries, Skipped: {skipped} entries\n")
+
+if __name__ == "__main__":
+    main()