import os import yaml import glob import subprocess # 🔁 Repos to scan for monitoring.yaml files REPOS = { "dev-tools": "https://git.dvirlabs.com/dvirlabs/dev-tools.git", "infra": "https://git.dvirlabs.com/dvirlabs/infra.git", "observability-stack": "https://git.dvirlabs.com/dvirlabs/observability-stack.git", "sandbox": "https://git.dvirlabs.com/dvirlabs/sandbox.git", "lab-monitor": f"https://{os.getenv('GITHUB_TOKEN')}@github.com/dvirlabs/lab-monitor.git", } BASE_DIR = "./repos" REPO_PATH = os.path.join(BASE_DIR, "lab-monitor") WORKFLOW_FILE = os.path.join(REPO_PATH, ".github/workflows/monitor.yml") def clone_repos(): os.makedirs(BASE_DIR, exist_ok=True) for name, url in REPOS.items(): repo_path = os.path.join(BASE_DIR, name) if os.path.exists(repo_path): subprocess.run(["git", "-C", repo_path, "pull"]) else: subprocess.run(["git", "clone", url, repo_path]) def extract_urls(): urls = [] for repo in REPOS: if repo == "lab-monitor": continue manifests_path = os.path.join(BASE_DIR, repo, "manifests") if not os.path.isdir(manifests_path): continue # recursively find all monitoring.yaml for path in glob.glob(f"{manifests_path}/**/monitoring.yaml", recursive=True): with open(path) as f: cfg = yaml.safe_load(f) if not cfg.get("enabled"): continue ext = cfg.get("external_check") if not ext or not ext.get("url"): continue app_name = cfg.get("app") or os.path.basename(os.path.dirname(path)) urls.append({ "name": app_name, "url": ext["url"] }) return urls def generate_workflow(urls): os.makedirs(os.path.dirname(WORKFLOW_FILE), exist_ok=True) with open(WORKFLOW_FILE, "w") as f: f.write("""name: Monitor Lab URLs on: schedule: - cron: "*/5 * * * *" workflow_dispatch: jobs: monitor: runs-on: ubuntu-latest steps: - name: Check services run: | check_url() { URL=$1 NAME=$2 STATUS=$(curl -s -o /dev/null -w "%{http_code}" "$URL") TIME=$(date "+%Y-%m-%d %H:%M:%S") if [[ "$STATUS" == "502" || "$STATUS" == "404" ]]; then curl -s \\ --form-string "token=${{ secrets.PUSHOVER_TOKEN }}" \\ --form-string "user=${{ secrets.PUSHOVER_USER }}" \\ --form-string "title=🔴 $NAME Alert" \\ --form-string "message=⚠️ $URL is down ($STATUS) at $TIME" \\ --form-string "priority=2" \\ --form-string "retry=60" \\ --form-string "expire=600" \\ https://api.pushover.net/1/messages.json else echo "✅ $NAME is up: $STATUS" fi } """) for item in urls: f.write(f' check_url "{item["url"]}" "{item["name"]}"\n') def push_workflow(): subprocess.run(["ls", "-l", REPO_PATH]) # Detect default branch result = subprocess.run( ["git", "-C", REPO_PATH, "symbolic-ref", "refs/remotes/origin/HEAD"], capture_output=True, text=True, check=True ) default_branch = result.stdout.strip().split("/")[-1] subprocess.run(["git", "-C", REPO_PATH, "checkout", default_branch], check=True) subprocess.run(["git", "-C", REPO_PATH, "config", "user.name", "lab-monitor-bot"]) subprocess.run(["git", "-C", REPO_PATH, "config", "user.email", "bot@dvirlabs.com"]) subprocess.run(["git", "-C", REPO_PATH, "add", ".github/workflows/monitor.yml"]) subprocess.run(["git", "-C", REPO_PATH, "commit", "-m", "update monitor.yml from monitoring.yaml"], check=False) result = subprocess.run(["git", "-C", REPO_PATH, "push", "--set-upstream", "origin", default_branch]) if result.returncode != 0: raise Exception(f"❌ Failed to push monitor.yml to origin/{default_branch}") if __name__ == "__main__": clone_repos() urls = extract_urls() generate_workflow(urls) push_workflow()