feat : add optimize-simplicite-log skill (#1742)

This commit is contained in:
JordanSOA
2026-05-18 03:26:40 +02:00
committed by GitHub
parent b669203572
commit 632052fc1b
4 changed files with 348 additions and 0 deletions
@@ -0,0 +1,153 @@
param (
[Parameter(Mandatory=$true)]
[string]$InputPath,
[string]$Output,
[string]$Include,
[string]$Exclude
)
# Valid fields
$ValidFields = @("timestamp", "app", "level", "endpoint", "contextPath", "event", "user", "class", "function", "rowId", "body")
# Function to check if a field is valid
function Test-ValidField {
param([string]$Field)
return $ValidFields -contains $Field
}
# Verify that -Include and -Exclude are not both used
if ($Include -and $Exclude) {
Write-Error "Error: -Include and -Exclude cannot be used together."
exit 1
}
# Initialize field variables
$IncludeFields = $null
$ExcludeFields = $null
# Validate fields provided in Include/Exclude
if ($Include) {
$IncludeFields = $Include -split "," | ForEach-Object { $_.Trim() }
foreach ($field in $IncludeFields) {
if (-not (Test-ValidField $field)) {
Write-Error "Error: Invalid field '$field'. Valid fields: $($ValidFields -join ', ')"
exit 1
}
}
}
if ($Exclude) {
$ExcludeFields = $Exclude -split "," | ForEach-Object { $_.Trim() }
foreach ($field in $ExcludeFields) {
if (-not (Test-ValidField $field)) {
Write-Error "Error: Invalid field '$field'. Valid fields: $($ValidFields -join ', ')"
exit 1
}
}
}
# Check that the input file exists
if (-not (Test-Path $InputPath)) {
Write-Error "Error: File $InputPath does not exist."
exit 1
}
# Read the file and normalize line endings
# Group raw lines into log entries where a new entry starts with a timestamp.
$raw = Get-Content -Path $InputPath -Raw
$raw = $raw -replace "`r`n","`n" -replace "`r","`n"
$lines = $raw -split "`n"
$entryTexts = @()
$buffer = ""
$skippedLines = 0
foreach ($line in $lines) {
if ($line -eq $null) { continue }
if ($line.Trim().Length -eq 0) { continue }
if ($line -match '^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}') {
if ($buffer -ne "") { $entryTexts += $buffer }
$buffer = $line
} else {
# Continuation line: attach to current buffer if present, otherwise skip
if ($buffer -eq "") {
$skippedLines++
continue
} else {
$buffer += "`n" + $line
}
}
}
if ($buffer -ne "") { $entryTexts += $buffer }
$entries = @()
$processed = 0
$skippedMalformed = 0
foreach ($entryText in $entryTexts) {
$parts = $entryText -split '\|'
if ($parts.Count -ge 12) {
# Trim only the first 11 fields; preserve the body (may contain pipes/newlines)
for ($i=0; $i -le 10; $i++) { $parts[$i] = $parts[$i].Trim() }
$body = ($parts[11..($parts.Count - 1)] -join '|')
$entry = @{
timestamp = $parts[0]
app = $parts[1]
level = $parts[2]
endpoint = $parts[4]
contextPath = $parts[5]
event = $parts[6]
user = $parts[7]
class = $parts[8]
function = $parts[9]
rowId = $parts[10]
body = $body
}
# Apply include/exclude filters
if ($IncludeFields -and $IncludeFields.Count -gt 0) {
$filteredEntry = @{}
foreach ($field in $IncludeFields) {
if ($entry.ContainsKey($field)) {
$filteredEntry[$field] = $entry[$field]
} else {
$filteredEntry[$field] = $null
}
}
$entry = $filteredEntry
}
elseif ($ExcludeFields -and $ExcludeFields.Count -gt 0) {
foreach ($field in $ExcludeFields) {
if ($entry.ContainsKey($field)) {
$entry.PSObject.Properties.Remove($field)
}
}
}
$entries += $entry
$processed++
} else {
$skippedMalformed++
}
}
$skipped = $skippedLines + $skippedMalformed
# Convert to JSON (compact)
$json = $entries | ConvertTo-Json -Depth 10 -Compress
# Write output
if ($Output) {
Set-Content -Path $Output -Value $json -Encoding UTF8
Write-Host "Output written to $Output"
} else {
$json
}
Write-Host "Processed: $processed entries, Skipped: $skipped entries"
@@ -0,0 +1,120 @@
import argparse
import re
import json
import sys
VALID_FIELDS = [
"timestamp", "app", "level", "endpoint", "contextPath", "event",
"user", "class", "function", "rowId", "body"
]
def validate_fields(value):
fields = [f.strip() for f in value.split(",")]
for f in fields:
if f not in VALID_FIELDS:
raise argparse.ArgumentTypeError(f"invalid field: {f}. Available: {', '.join(VALID_FIELDS)}")
return fields
def parse_args():
parser = argparse.ArgumentParser(
prog="simplicite-log2json",
description="Parse Simplicité logs and output JSON."
)
parser.add_argument("input", help="Input .txt log file path")
parser.add_argument("-o", "--output", help="Output file path (default: stdout)", metavar="FILE")
group = parser.add_mutually_exclusive_group()
group.add_argument("--include", help=f"Fields to include (comma-separated). Available: {', '.join(VALID_FIELDS)}", type=validate_fields, metavar="FIELDS", action="append")
group.add_argument("--exclude", help=f"Fields to exclude (comma-separated). Available: {', '.join(VALID_FIELDS)}", type=validate_fields, metavar="FIELDS", action="append")
return parser.parse_args()
def parse_log_entry(text, log_regex):
match = log_regex.match(text)
if match:
return {
"timestamp": match.group("timestamp") or "",
"app": match.group("app") or "",
"level": match.group("level") or "",
"endpoint": match.group("endpoint") or "",
"contextPath": match.group("contextPath") or "",
"event": match.group("event") or "",
"user": match.group("user") or "",
"class": match.group("class") or "",
"function": match.group("function") or "",
"rowId": match.group("rowId") or "",
"body": match.group("body") or "",
}
return None
def filter_entry(entry, include, exclude):
filtered = {}
for k, v in entry.items():
if include is not None and k not in include:
continue
if exclude is not None and k in exclude:
continue
filtered[k] = v
return filtered
def main():
args = parse_args()
include_fields = [item for sublist in args.include for item in sublist] if args.include else None
exclude_fields = [item for sublist in args.exclude for item in sublist] if args.exclude else None
log_regex = re.compile(r"^(?P<timestamp>.*?)\|(?P<app>SIMPLICITE)\|(?P<level>.+?)\|\|(?P<endpoint>.*?)\|(?P<contextPath>.*?)\|(?P<event>.*?)\|(?P<user>.*?)\|(?P<class>.*?)\|(?P<function>.*?)\|(?P<rowId>.*?)\|(?P<body>.*)$", re.DOTALL)
timestamp_re = re.compile(r"^\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}")
entries = []
buffer = []
processed = 0
skipped = 0
try:
with open(args.input, "r", encoding="utf-8") as f:
for line in f:
line_stripped = line.rstrip('\n')
if timestamp_re.match(line_stripped):
if buffer:
entry_text = '\n'.join(buffer)
entry = parse_log_entry(entry_text, log_regex)
if entry:
entries.append(entry)
processed += 1
else:
skipped += 1
buffer = []
buffer.append(line_stripped)
if buffer:
entry_text = '\n'.join(buffer)
entry = parse_log_entry(entry_text, log_regex)
if entry:
entries.append(entry)
processed += 1
else:
skipped += 1
except Exception as e:
sys.stderr.write(f"Failed to open input file: {e}\n")
sys.exit(1)
filtered = [filter_entry(entry, include_fields, exclude_fields) for entry in entries]
json_str = json.dumps(filtered, indent=2)
if args.output:
try:
with open(args.output, "w", encoding="utf-8") as f:
f.write(json_str)
except Exception as e:
sys.stderr.write(f"Failed to create output file: {e}\n")
sys.exit(1)
else:
print(json_str)
sys.stderr.write(f"Processed: {processed} entries, Skipped: {skipped} entries\n")
if __name__ == "__main__":
main()