apps-gitops/automation/alerts/generate_monitor_workflow.py
2025-06-30 02:23:46 +03:00

143 lines
4.7 KiB
Python

import os
import yaml
import subprocess
# 🔁 Repos to scan for monitoring.yaml files
REPOS = {
"dev-tools": "https://git.dvirlabs.com/dvirlabs/dev-tools.git",
"infra": "https://git.dvirlabs.com/dvirlabs/infra.git",
"observability-stack": "https://git.dvirlabs.com/dvirlabs/observability-stack.git",
"sandbox": "https://git.dvirlabs.com/dvirlabs/sandbox.git",
# ✅ Add GitHub repo to REPOS list (not hardcoded separately)
"lab-monitor": f"https://{os.getenv('GITHUB_TOKEN')}@github.com/dvirlabs/lab-monitor.git",
}
BASE_DIR = "./repos"
REPO_PATH = os.path.join(BASE_DIR, "lab-monitor")
WORKFLOW_FILE = os.path.join(REPO_PATH, ".github/workflows/monitor.yml")
def clone_repos():
os.makedirs(BASE_DIR, exist_ok=True)
for name, url in REPOS.items():
repo_path = os.path.join(BASE_DIR, name)
if os.path.exists(repo_path):
subprocess.run(["git", "-C", repo_path, "pull"])
else:
subprocess.run(["git", "clone", url, repo_path])
def extract_urls():
urls = []
for repo in REPOS:
if repo == "lab-monitor":
continue # skip this, it's where we push
manifests_path = os.path.join(BASE_DIR, repo, "manifests")
if not os.path.isdir(manifests_path):
continue
for app in os.listdir(manifests_path):
path = os.path.join(manifests_path, app, "monitoring.yaml")
if not os.path.exists(path):
continue
with open(path) as f:
cfg = yaml.safe_load(f)
if not cfg.get("enabled"):
continue
ext = cfg.get("external_check")
if not ext or not ext.get("url"):
continue
app_name = cfg.get("app", app)
for code, sev in ext.get("expected_codes", {}).items():
urls.append({
"url": ext["url"],
"name": app_name,
"code": code,
"message": f"⚠️ {app_name} down ({code})",
"severity": sev
})
return urls
def severity_to_priority(sev):
return {
"critical": 2,
"high": 1,
"warning": 0,
"info": 0
}.get(sev.lower(), 0)
def generate_workflow(urls):
os.makedirs(os.path.dirname(WORKFLOW_FILE), exist_ok=True)
with open(WORKFLOW_FILE, "w") as f:
f.write("""name: Monitor Lab URLs
on:
schedule:
- cron: "*/5 * * * *"
workflow_dispatch:
jobs:
monitor:
runs-on: ubuntu-latest
steps:
- name: Check all URLs
run: |
check_url() {
URL=$1
NAME=$2
CODE=$3
MESSAGE=$4
PRIORITY=$5
STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$URL")
TIME=$(date "+%Y-%m-%d %H:%M:%S")
if [[ "$STATUS" != "$CODE" ]]; then
curl -s \\
--form-string "token=${{ secrets.PUSHOVER_TOKEN }}" \\
--form-string "user=${{ secrets.PUSHOVER_USER }}" \\
--form-string "title=🔴 $NAME Alert" \\
--form-string "message=$MESSAGE at $TIME" \\
--form-string "priority=$PRIORITY" \\
--form-string "retry=60" \\
--form-string "expire=600" \\
https://api.pushover.net/1/messages.json
else
echo "✅ $NAME is up: $STATUS"
fi
}
""")
for item in urls:
f.write(f' check_url "{item["url"]}" "{item["name"]}" "{item["code"]}" "{item["message"]}" "{severity_to_priority(item["severity"])}"\n')
def push_workflow():
subprocess.run(["ls", "-l", REPO_PATH])
# Detect default branch (main or master)
result = subprocess.run(
["git", "-C", REPO_PATH, "symbolic-ref", "refs/remotes/origin/HEAD"],
capture_output=True, text=True, check=True
)
default_branch = result.stdout.strip().split("/")[-1]
subprocess.run(["git", "-C", REPO_PATH, "checkout", default_branch], check=True)
# Git identity
subprocess.run(["git", "-C", REPO_PATH, "config", "user.name", "lab-monitor-bot"])
subprocess.run(["git", "-C", REPO_PATH, "config", "user.email", "bot@dvirlabs.com"])
# Add + Commit + Push
subprocess.run(["git", "-C", REPO_PATH, "add", ".github/workflows/monitor.yml"])
subprocess.run(["git", "-C", REPO_PATH, "commit", "-m", "update monitor.yml from monitoring.yaml"], check=False)
result = subprocess.run(["git", "-C", REPO_PATH, "push", "--set-upstream", "origin", default_branch])
if result.returncode != 0:
raise Exception(f"❌ Failed to push monitor.yml to origin/{default_branch}")
if __name__ == "__main__":
clone_repos()
urls = extract_urls()
generate_workflow(urls)
push_workflow()