diff --git a/.woodpecker.yml b/.woodpecker.yml index 4700353..99e060e 100644 --- a/.woodpecker.yml +++ b/.woodpecker.yml @@ -8,13 +8,15 @@ # so a drift-check here would always be OUT_OF_SYNC by # design and is meaningless as a failure signal. # -# push (master) → syntax-check, validate, deploy, update-sync-metric -# Deploys to the server, then verifies sync and pushes metric. +# push (master) → syntax-check, validate, deploy, update-gitops-status +# Deploys to the server, then verifies sync and sends +# JSON status snapshot to gitops-status-server for Grafana. # # cron → gitops_sync_check (read-only drift check, no deploy) # Continuously verifies that the live server still matches # Git even when no push has happened. Detects manual edits -# made directly on the server. +# made directly on the server. Sends JSON status with +# detailed file-level drift information to gitops-status-server. # # NOTE: Woodpecker does not support multiple YAML documents (---) in one file. # All pipelines must live in a single document with step-level filtering. @@ -80,44 +82,38 @@ steps: event: push # --------------------------------------------------------------------------- - # update-sync-metric: Post-deploy sync check + Prometheus metric push + # update-gitops-status: Post-deploy sync check + JSON status update # Runs on push to master only, after deploy succeeds. - # STATUS=1 means SYNCED, STATUS=0 means OUT_OF_SYNC. + # Generates structured JSON with sync status, drift count, and changed files. + # Sends JSON to gitops-status-server for Grafana visualization. # --------------------------------------------------------------------------- - update-sync-metric: + update-gitops-status: image: alpine/ansible:latest depends_on: [deploy] environment: ANSIBLE_CONFIG: ansible.cfg SSH_PRIVATE_KEY: from_secret: SSH_PRIVATE_KEY - PUSHGATEWAY_URL: http://pushgateway.observability-stack.svc.cluster.local:9091 + GITOPS_STATUS_SERVER_URL: http://gitops-status-server.observability-stack.svc.cluster.local:80 + REPO_NAME: rsyslog + SERVER_NAME: rsyslog-lab commands: - | - apk add --no-cache curl > /dev/null 2>&1 + # Install dependencies: curl for HTTP requests, jq for JSON formatting + apk add --no-cache curl jq > /dev/null 2>&1 + + # Setup SSH key for Ansible mkdir -p ~/.ssh printf '%s\n' "$${SSH_PRIVATE_KEY}" > ~/.ssh/id_rsa chmod 600 ~/.ssh/id_rsa - echo "==> Verifying post-deploy sync status..." - set +e - ansible-playbook -i ansible/inventory/hosts.yml ansible/playbooks/drift-check.yml - DRIFT_RC=$? - set -e + echo "==> Running post-deploy GitOps status check..." + + # Make script executable and run it + chmod +x update-gitops-status.sh + ./update-gitops-status.sh - if [ "$DRIFT_RC" -eq 0 ]; then - STATUS=1 - echo "==> SYNCED (1) – server configuration matches Git" - else - STATUS=0 - echo "==> OUT OF SYNC (0) – drift detected after deploy" - fi - - printf 'gitops_sync_status{repo="rsyslog",server="rsyslog-lab"} %s\n' "$STATUS" | \ - curl --silent --show-error --fail --data-binary @- \ - "$${PUSHGATEWAY_URL}/metrics/job/gitops_rsyslog/instance/rsyslog-lab" - - echo "==> Metric pushed. Pipeline always succeeds; sync status is in Prometheus." + echo "==> JSON status update complete. Pipeline always succeeds." when: branch: master event: push @@ -125,7 +121,8 @@ steps: # --------------------------------------------------------------------------- # gitops_sync_check: ArgoCD-style cron drift check – read-only, no deploy # Detects manual changes made directly on the server between pushes. - # STATUS=1 → SYNCED, STATUS=0 → OUT_OF_SYNC + # Generates structured JSON with sync status, drift count, and changed files. + # Sends JSON to gitops-status-server for continuous GitOps monitoring. # Pipeline marked FAILED when drift found so it is visible in the UI. # # ─── Woodpecker Cron UI settings ────────────────────────────────────────── @@ -139,34 +136,50 @@ steps: ANSIBLE_CONFIG: ansible.cfg SSH_PRIVATE_KEY: from_secret: SSH_PRIVATE_KEY - PUSHGATEWAY_URL: http://pushgateway.observability-stack.svc.cluster.local:9091 + GITOPS_STATUS_SERVER_URL: http://gitops-status-server.observability-stack.svc.cluster.local:80 + REPO_NAME: rsyslog + SERVER_NAME: rsyslog-lab commands: - | - apk add --no-cache curl > /dev/null 2>&1 + # Install dependencies: curl for HTTP requests, jq for JSON formatting + apk add --no-cache curl jq bash > /dev/null 2>&1 + # Setup SSH key for Ansible mkdir -p ~/.ssh printf '%s\n' "$${SSH_PRIVATE_KEY}" > ~/.ssh/id_rsa chmod 600 ~/.ssh/id_rsa - echo "==> [cron] Running drift check against remote server..." + echo "==> [cron] Running continuous GitOps drift check..." + + # Make script executable and run it + chmod +x update-gitops-status.sh + + # Capture exit code to determine if drift was detected set +e - ansible-playbook -i ansible/inventory/hosts.yml ansible/playbooks/drift-check.yml + ./update-gitops-status.sh + SCRIPT_RC=$? + set -e + + if [ "$SCRIPT_RC" -ne 0 ]; then + echo "==> ERROR: Status update failed" + exit 1 + fi + + # Check sync status to determine pipeline result + # Read the generated JSON or re-run drift check + echo "==> Verifying drift status for pipeline result..." + set +e + ansible-playbook -i ansible/inventory/hosts.yml ansible/playbooks/drift-check.yml > /dev/null 2>&1 DRIFT_RC=$? set -e if [ "$DRIFT_RC" -eq 0 ]; then - STATUS=1 - echo "==> STATUS: SYNCED (1) – server configuration matches Git" + echo "==> Pipeline SUCCESS: Server is SYNCED with Git" + exit 0 else - STATUS=0 - echo "==> STATUS: OUT OF SYNC (0) – manual drift detected on server" + echo "==> Pipeline FAILED: OUT OF SYNC - manual drift detected on server" + echo " Check gitops-status-server for detailed file-level drift information" + exit 1 fi - - echo "==> Pushing metric: gitops_sync_status{repo=\"rsyslog\",server=\"rsyslog-lab\"} $STATUS" - printf 'gitops_sync_status{repo="rsyslog",server="rsyslog-lab"} %s\n' "$STATUS" | \ - curl --silent --show-error --fail --data-binary @- \ - "$${PUSHGATEWAY_URL}/metrics/job/gitops_rsyslog/instance/rsyslog-lab" - - echo "==> Metric pushed. Pipeline always succeeds; sync status is in Prometheus." when: event: cron diff --git a/DEPLOYMENT_CHECKLIST.md b/DEPLOYMENT_CHECKLIST.md new file mode 100644 index 0000000..0a54d89 --- /dev/null +++ b/DEPLOYMENT_CHECKLIST.md @@ -0,0 +1,310 @@ +# Deployment Checklist + +This checklist guides you through deploying the updated rsyslog repository with gitops-status-server integration. + +## Prerequisites + +Before deploying, ensure: + +- [ ] gitops-status-server is deployed and accessible at: + `http://gitops-status-server.observability-stack.svc.cluster.local:80` +- [ ] gitops-status-server has `/api/status` endpoint implemented (see [GITOPS_STATUS_API_REFERENCE.md](GITOPS_STATUS_API_REFERENCE.md)) +- [ ] Woodpecker CI is configured for this repository +- [ ] SSH access to rsyslog-lab server is configured +- [ ] SSH_PRIVATE_KEY secret is set in Woodpecker repository settings + +## Files Changed/Added + +### New Files +- ✅ `update-gitops-status.sh` - Main script for JSON status generation +- ✅ `GITOPS_STATUS_INTEGRATION.md` - Integration documentation +- ✅ `GITOPS_STATUS_API_REFERENCE.md` - API reference with examples +- ✅ `MIGRATION_SUMMARY.md` - Summary of all changes made + +### Modified Files +- ✅ `.woodpecker.yml` - Updated pipeline to use JSON status +- ✅ `README.md` - Updated documentation with new flow + +### Unchanged Files (no action needed) +- ✅ All Ansible playbooks +- ✅ All Ansible inventory files +- ✅ All rsyslog config files +- ✅ Local scripts (apply.sh, drift-check.sh) + +## Deployment Steps + +### 1. Review Changes + +- [ ] Read [MIGRATION_SUMMARY.md](MIGRATION_SUMMARY.md) to understand all changes +- [ ] Review [.woodpecker.yml](.woodpecker.yml) pipeline changes +- [ ] Review [update-gitops-status.sh](update-gitops-status.sh) script logic + +### 2. Verify gitops-status-server + +Test the gitops-status-server API endpoint: + +```bash +# Test POST endpoint (should return 200 or 404 if not implemented yet) +curl -X POST http://gitops-status-server.observability-stack.svc.cluster.local:80/api/status \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "test", + "server": "test", + "sync_status": "SYNCED", + "drift_count": 0, + "files": [], + "last_check": "2026-04-21T10:00:00Z" + }' + +# Test GET endpoint +curl http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json +``` + +If the API is not implemented yet: +- [ ] Implement gitops-status-server API (use [GITOPS_STATUS_API_REFERENCE.md](GITOPS_STATUS_API_REFERENCE.md)) +- [ ] Deploy to Kubernetes cluster +- [ ] Verify endpoints are accessible + +### 3. Test Locally (Optional) + +Before pushing to Git, you can test the script locally: + +```bash +# Set environment variables +export GITOPS_STATUS_SERVER_URL="http://gitops-status-server.observability-stack.svc.cluster.local:80" +export REPO_NAME="rsyslog" +export SERVER_NAME="rsyslog-lab" + +# Make script executable +chmod +x update-gitops-status.sh + +# Run the script (requires Ansible, jq, curl) +./update-gitops-status.sh +``` + +Expected output: +``` +==> Running drift check playbook... + Inventory: ansible/inventory/hosts.yml + Playbook: ansible/playbooks/drift-check.yml +==> Status: SYNCED - server configuration matches Git +==> Drift count: 0 +==> Generated JSON status: +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "SYNCED", + "drift_count": 0, + "files": [], + "last_check": "2026-04-21T10:30:00Z" +} +==> Sending status to gitops-status-server... + URL: http://gitops-status-server.observability-stack.svc.cluster.local:80/api/status +==> Status update successful (HTTP 200) +``` + +### 4. Commit and Push Changes + +```bash +# Stage all changes +git add . + +# Commit +git commit -m "Migrate from Pushgateway to gitops-status-server JSON status + +- Add update-gitops-status.sh script for JSON status generation +- Update .woodpecker.yml to use gitops-status-server +- Remove Pushgateway metric push logic +- Add comprehensive documentation +- Keep all Ansible playbooks unchanged" + +# Push to master (will trigger deploy pipeline) +git push origin master +``` + +### 5. Monitor First Deployment + +After pushing to master: + +- [ ] Watch Woodpecker pipeline execution +- [ ] Verify `syntax-check` step passes +- [ ] Verify `validate` step passes +- [ ] Verify `deploy` step completes +- [ ] Verify `update-gitops-status` step runs successfully +- [ ] Check that JSON is sent to gitops-status-server + +Example successful `update-gitops-status` step output: +``` +==> Running post-deploy GitOps status check... +==> Running drift check playbook... +==> Status: SYNCED - server configuration matches Git +==> Drift count: 0 +==> Generated JSON status: {...} +==> Sending status to gitops-status-server... +==> Status update successful (HTTP 200) +==> JSON status update complete. Pipeline always succeeds. +``` + +### 6. Verify Cron Pipeline + +The cron pipeline runs every 2 minutes: + +- [ ] Wait for next cron execution +- [ ] Check Woodpecker for `gitops_sync_check` pipeline run +- [ ] Verify JSON status is sent +- [ ] Verify pipeline succeeds (if synced) or fails (if drift detected) + +### 7. Test Drift Detection + +Manually create drift to test detection: + +```bash +# SSH to the server +ssh rsyslog-lab + +# Edit a config file +echo "# manual edit" >> /etc/rsyslog.conf + +# Wait up to 2 minutes for next cron run +``` + +Expected behavior: +- [ ] Cron pipeline runs +- [ ] Drift detected in Ansible playbook +- [ ] JSON sent with `sync_status: "OUT_OF_SYNC"` +- [ ] JSON includes `files: [{"name": "rsyslog.conf"}]` +- [ ] Pipeline marked as FAILED (for visibility) + +Verify in gitops-status-server: +```bash +curl http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json +``` + +Should show: +```json +[ + { + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 1, + "files": [ + {"name": "rsyslog.conf"} + ], + "last_check": "...", + "updated_at": "..." + } +] +``` + +### 8. Configure Grafana Dashboard + +- [ ] Add Infinity datasource pointing to gitops-status-server +- [ ] Create dashboard to display GitOps status +- [ ] Add panels for: + - Sync status overview (SYNCED vs OUT_OF_SYNC) + - Drift count per repo + - Detailed file list for drifted repos + - Last check timestamp + - Historical trend (if gitops-status-server stores history) + +Example dashboard queries provided in [GITOPS_STATUS_API_REFERENCE.md](GITOPS_STATUS_API_REFERENCE.md). + +### 9. Cleanup (Optional) + +If everything works correctly: + +- [ ] Remove old Pushgateway metrics for rsyslog (if no longer needed) +- [ ] Update any alerts/dashboards that used old `gitops_sync_status` metric +- [ ] Document the new JSON status format in team wiki/docs + +## Troubleshooting + +### Script fails with "command not found: jq" + +**Problem:** `jq` is not installed in the Woodpecker container + +**Solution:** The `.woodpecker.yml` already includes `apk add --no-cache jq`. Verify the step runs before the script. + +### Script fails with "HTTP 404" or "HTTP 500" + +**Problem:** gitops-status-server endpoint not implemented or not accessible + +**Solution:** +1. Verify gitops-status-server is running: `curl http://gitops-status-server.observability-stack.svc.cluster.local:80/health` +2. Check Kubernetes service: `kubectl get svc -n observability-stack` +3. Implement `/api/status` endpoint using [GITOPS_STATUS_API_REFERENCE.md](GITOPS_STATUS_API_REFERENCE.md) + +### Script fails with "No such file or directory: update-gitops-status.sh" + +**Problem:** Script not found in workspace + +**Solution:** The script is created in the repository root. Verify it's committed to Git and available in the CI workspace. + +### Drift not detected when expected + +**Problem:** Manual changes not showing up in drift check + +**Solution:** +1. Verify changes are to files managed by Git (rsyslog.conf or rsyslog.d/*.conf) +2. Check Ansible playbook output for diff details +3. Verify SSH access to server from CI container + +### JSON has empty files array even when drift detected + +**Problem:** Script parsing logic not extracting filenames correctly + +**Solution:** +1. Check Ansible output format - script expects specific JSON structure +2. Run with `ANSIBLE_STDOUT_CALLBACK=json` to see raw output +3. Update regex patterns in `update-gitops-status.sh` if needed + +## Rollback Procedure + +If you need to rollback to Pushgateway: + +```bash +# Revert to previous commit +git revert HEAD + +# Or restore specific files +git checkout HEAD~1 .woodpecker.yml +git checkout HEAD~1 README.md + +# Remove new files +git rm update-gitops-status.sh GITOPS_STATUS_*.md MIGRATION_SUMMARY.md + +# Commit and push +git commit -m "Rollback to Pushgateway metrics" +git push origin master +``` + +## Success Criteria + +✅ All checks passed when: + +- [ ] Woodpecker pipeline completes successfully on push to master +- [ ] JSON status is sent to gitops-status-server after deploy +- [ ] Cron pipeline runs every 2 minutes and sends JSON status +- [ ] Drift is correctly detected and reported in JSON +- [ ] gitops-status-server `/status.json` endpoint returns correct data +- [ ] Grafana dashboard displays rsyslog sync status +- [ ] No errors in Woodpecker logs +- [ ] File-level drift details are visible in Grafana + +## Additional Resources + +- [README.md](README.md) - Repository overview and workflow +- [MIGRATION_SUMMARY.md](MIGRATION_SUMMARY.md) - Detailed migration changes +- [GITOPS_STATUS_INTEGRATION.md](GITOPS_STATUS_INTEGRATION.md) - Integration architecture +- [GITOPS_STATUS_API_REFERENCE.md](GITOPS_STATUS_API_REFERENCE.md) - API implementation guide + +## Support + +If you encounter issues: + +1. Check Woodpecker pipeline logs +2. Verify gitops-status-server logs +3. Test API endpoints manually with curl +4. Review Ansible playbook output +5. Check this repository's documentation files diff --git a/GITOPS_STATUS_API_REFERENCE.md b/GITOPS_STATUS_API_REFERENCE.md new file mode 100644 index 0000000..d22823b --- /dev/null +++ b/GITOPS_STATUS_API_REFERENCE.md @@ -0,0 +1,326 @@ +# gitops-status-server API Reference + +This document provides a reference implementation example for the gitops-status-server API endpoint that receives status updates from the rsyslog repository. + +## API Endpoint Specification + +### POST /api/status + +Receives GitOps status updates from repositories. + +**Request:** +``` +POST /api/status HTTP/1.1 +Host: gitops-status-server.observability-stack.svc.cluster.local:80 +Content-Type: application/json + +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 2, + "files": [ + { "name": "rsyslog.conf" }, + { "name": "rsyslog.d/30-lab.conf" } + ], + "last_check": "2026-04-21T10:32:15Z" +} +``` + +**Response (Success):** +``` +HTTP/1.1 200 OK +Content-Type: application/json + +{ + "status": "ok", + "message": "Status updated successfully" +} +``` + +**Response (Error):** +``` +HTTP/1.1 400 Bad Request +Content-Type: application/json + +{ + "status": "error", + "message": "Invalid JSON payload" +} +``` + +## Example Implementation (Python/Flask) + +```python +from flask import Flask, request, jsonify +from datetime import datetime +import json +import os + +app = Flask(__name__) + +# In-memory storage (replace with database in production) +status_data = {} + +@app.route('/api/status', methods=['POST']) +def update_status(): + """Receive and store GitOps status updates""" + try: + data = request.get_json() + + # Validate required fields + required_fields = ['repo', 'server', 'sync_status', 'drift_count', 'files', 'last_check'] + for field in required_fields: + if field not in data: + return jsonify({ + 'status': 'error', + 'message': f'Missing required field: {field}' + }), 400 + + # Validate sync_status value + if data['sync_status'] not in ['SYNCED', 'OUT_OF_SYNC']: + return jsonify({ + 'status': 'error', + 'message': 'sync_status must be SYNCED or OUT_OF_SYNC' + }), 400 + + # Create unique key for this repo/server combination + key = f"{data['repo']}:{data['server']}" + + # Store the status + status_data[key] = { + 'repo': data['repo'], + 'server': data['server'], + 'sync_status': data['sync_status'], + 'drift_count': data['drift_count'], + 'files': data['files'], + 'last_check': data['last_check'], + 'updated_at': datetime.utcnow().isoformat() + 'Z' + } + + # Log the update + print(f"Status update: {key} -> {data['sync_status']} (drift_count: {data['drift_count']})") + + return jsonify({ + 'status': 'ok', + 'message': 'Status updated successfully' + }), 200 + + except Exception as e: + print(f"Error processing status update: {e}") + return jsonify({ + 'status': 'error', + 'message': str(e) + }), 500 + + +@app.route('/status.json', methods=['GET']) +def get_status(): + """Serve aggregated status for Grafana Infinity datasource""" + # Convert dict to list for JSON array output + statuses = list(status_data.values()) + + return jsonify(statuses), 200 + + +@app.route('/health', methods=['GET']) +def health(): + """Health check endpoint""" + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.utcnow().isoformat() + 'Z', + 'tracked_repos': len(status_data) + }), 200 + + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) +``` + +## Example Implementation (Go) + +```go +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "sync" + "time" +) + +type StatusUpdate struct { + Repo string `json:"repo"` + Server string `json:"server"` + SyncStatus string `json:"sync_status"` + DriftCount int `json:"drift_count"` + Files []File `json:"files"` + LastCheck string `json:"last_check"` +} + +type File struct { + Name string `json:"name"` +} + +type StoredStatus struct { + StatusUpdate + UpdatedAt string `json:"updated_at"` +} + +var ( + statusStore = make(map[string]StoredStatus) + storeMutex sync.RWMutex +) + +func updateStatusHandler(w http.ResponseWriter, r *http.Request) { + if r.Method != http.MethodPost { + http.Error(w, "Method not allowed", http.StatusMethodNotAllowed) + return + } + + var status StatusUpdate + if err := json.NewDecoder(r.Body).Decode(&status); err != nil { + http.Error(w, fmt.Sprintf("Invalid JSON: %v", err), http.StatusBadRequest) + return + } + + // Validate sync_status + if status.SyncStatus != "SYNCED" && status.SyncStatus != "OUT_OF_SYNC" { + http.Error(w, "sync_status must be SYNCED or OUT_OF_SYNC", http.StatusBadRequest) + return + } + + // Store the status + key := fmt.Sprintf("%s:%s", status.Repo, status.Server) + stored := StoredStatus{ + StatusUpdate: status, + UpdatedAt: time.Now().UTC().Format(time.RFC3339), + } + + storeMutex.Lock() + statusStore[key] = stored + storeMutex.Unlock() + + log.Printf("Status update: %s -> %s (drift_count: %d)", key, status.SyncStatus, status.DriftCount) + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]string{ + "status": "ok", + "message": "Status updated successfully", + }) +} + +func getStatusHandler(w http.ResponseWriter, r *http.Request) { + storeMutex.RLock() + statuses := make([]StoredStatus, 0, len(statusStore)) + for _, status := range statusStore { + statuses = append(statuses, status) + } + storeMutex.RUnlock() + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(statuses) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + storeMutex.RLock() + count := len(statusStore) + storeMutex.RUnlock() + + w.Header().Set("Content-Type", "application/json") + json.NewEncoder(w).Encode(map[string]interface{}{ + "status": "healthy", + "timestamp": time.Now().UTC().Format(time.RFC3339), + "tracked_repos": count, + }) +} + +func main() { + http.HandleFunc("/api/status", updateStatusHandler) + http.HandleFunc("/status.json", getStatusHandler) + http.HandleFunc("/health", healthHandler) + + log.Println("Starting gitops-status-server on :8080") + log.Fatal(http.ListenAndServe(":8080", nil)) +} +``` + +## Testing the API + +### Using curl + +**Send a status update:** +```bash +curl -X POST http://localhost:8080/api/status \ + -H "Content-Type: application/json" \ + -d '{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 2, + "files": [ + {"name": "rsyslog.conf"}, + {"name": "rsyslog.d/30-lab.conf"} + ], + "last_check": "2026-04-21T10:32:15Z" + }' +``` + +**Get all statuses:** +```bash +curl http://localhost:8080/status.json +``` + +**Health check:** +```bash +curl http://localhost:8080/health +``` + +## Grafana Infinity Datasource Configuration + +1. Install Grafana Infinity datasource plugin +2. Add new datasource: + - Type: Infinity + - URL: `http://gitops-status-server.observability-stack.svc.cluster.local:80` +3. Create a panel with query: + - URL: `/status.json` + - Parser: Backend + - Format: Table + +Example query to show all repos: +``` +Source: URL +URL: /status.json +Parser: Backend +Format: Table +Columns: + - repo (string) + - server (string) + - sync_status (string) + - drift_count (number) + - last_check (time) +``` + +Example query to show drift details: +``` +Source: URL +URL: /status.json +Parser: Backend +Format: Table +Root/Rows: $[?(@.drift_count > 0)] +Columns: + - repo (string) + - server (string) + - drift_count (number) + - files (string, JSONata: $join(files.name, ', ')) +``` + +## Notes + +- The example implementations use in-memory storage; production should use a database +- Consider adding authentication/authorization for the POST endpoint +- Add monitoring/metrics for the status server itself +- Consider adding TTL/expiration for stale status entries +- The `/status.json` endpoint should support filtering (e.g., by repo or server) diff --git a/GITOPS_STATUS_INTEGRATION.md b/GITOPS_STATUS_INTEGRATION.md new file mode 100644 index 0000000..a0e760c --- /dev/null +++ b/GITOPS_STATUS_INTEGRATION.md @@ -0,0 +1,117 @@ +# GitOps Status Server Integration + +This document explains how the rsyslog repository integrates with gitops-status-server for GitOps monitoring. + +## Overview + +Instead of pushing simple numeric metrics to Prometheus Pushgateway, the rsyslog repo now sends structured JSON status snapshots to gitops-status-server. This enables richer visualization in Grafana with file-level drift details. + +## Architecture + +``` +┌─────────────────┐ ┌──────────────────────┐ ┌─────────────────────┐ +│ Woodpecker CI │ │ gitops-status-server │ │ Grafana │ +│ (rsyslog) │────────►│ (Kubernetes) │────────►│ Infinity Plugin │ +│ │ POST │ │ GET │ │ +│ drift-check │ JSON │ serves /status.json │ │ Dashboard shows │ +│ every 2 min │ │ │ │ drift details │ +└─────────────────┘ └──────────────────────┘ └─────────────────────┘ +``` + +## API Endpoint + +The rsyslog repo sends JSON status updates to: + +``` +POST http://gitops-status-server.observability-stack.svc.cluster.local:80/api/status +Content-Type: application/json +``` + +## JSON Payload Format + +### When synced (no drift) + +```json +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "SYNCED", + "drift_count": 0, + "files": [], + "last_check": "2026-04-21T10:30:00Z" +} +``` + +### When drift detected + +```json +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 2, + "files": [ + { "name": "rsyslog.conf" }, + { "name": "rsyslog.d/30-lab.conf" } + ], + "last_check": "2026-04-21T10:32:15Z" +} +``` + +## Field Definitions + +| Field | Type | Description | +|----------------|----------|-------------------------------------------------------| +| `repo` | string | Repository name (e.g., "rsyslog") | +| `server` | string | Target server name (e.g., "rsyslog-lab") | +| `sync_status` | string | Either "SYNCED" or "OUT_OF_SYNC" | +| `drift_count` | integer | Number of files that have drifted from Git | +| `files` | array | List of files with drift (empty if synced) | +| `files[].name` | string | Relative path of drifted file | +| `last_check` | string | ISO 8601 timestamp of when drift check was performed | + +## When Updates Are Sent + +1. **After deployment** (push to master): + - Post-deploy verification runs + - JSON status sent to gitops-status-server + - Pipeline step: `update-gitops-status` + +2. **Scheduled cron check** (every 2 minutes): + - Continuous drift monitoring + - JSON status sent to gitops-status-server + - Pipeline step: `gitops_sync_check` + +## Error Handling + +If the HTTP POST to gitops-status-server fails: +- The pipeline step will fail +- Error message will be logged +- The drift check itself is still performed +- No retry logic (next cron run will retry) + +## Script Implementation + +The `update-gitops-status.sh` script handles: +1. Running the Ansible drift-check playbook +2. Parsing the output to extract changed file names +3. Building the JSON payload +4. Sending it to gitops-status-server via HTTP POST + +## Expected HTTP Response + +gitops-status-server should respond with: +- `200 OK` or `201 Created` on success +- `4xx` or `5xx` on error + +The rsyslog pipeline treats any 2xx response as success. + +## Grafana Visualization + +Grafana uses the Infinity datasource plugin to fetch `/status.json` from gitops-status-server and display: +- Current sync status (SYNCED vs OUT_OF_SYNC) +- Number of drifted files +- List of specific files that have drifted +- Last check timestamp + +This provides much richer information than a simple numeric metric. diff --git a/MIGRATION_SUMMARY.md b/MIGRATION_SUMMARY.md new file mode 100644 index 0000000..c6e3bec --- /dev/null +++ b/MIGRATION_SUMMARY.md @@ -0,0 +1,168 @@ +# Migration Summary: Pushgateway → gitops-status-server + +This document summarizes the changes made to migrate the rsyslog repository from Pushgateway metrics to JSON status updates for gitops-status-server. + +## What Changed + +### 1. New Files Created + +#### `update-gitops-status.sh` +- Main script that orchestrates the status update flow +- Runs Ansible drift-check playbook +- Parses output to extract changed file names +- Builds structured JSON payload +- Sends JSON to gitops-status-server via HTTP POST +- Handles both SYNCED and OUT_OF_SYNC states + +#### `GITOPS_STATUS_INTEGRATION.md` +- Documentation explaining the integration with gitops-status-server +- API endpoint specification +- JSON payload format examples +- Architecture diagram +- Error handling details + +### 2. Modified Files + +#### `.woodpecker.yml` +**Changes:** +- Updated header comments to reflect new JSON status flow +- Renamed step: `update-sync-metric` → `update-gitops-status` +- Removed Pushgateway environment variable (`PUSHGATEWAY_URL`) +- Added new environment variables: + - `GITOPS_STATUS_SERVER_URL` + - `REPO_NAME` + - `SERVER_NAME` +- Added `jq` package installation for JSON formatting +- Added `bash` package to cron step (required by update-gitops-status.sh) +- Updated both `update-gitops-status` and `gitops_sync_check` steps to call new script +- Removed Pushgateway metric push logic +- Added JSON status update logic + +**Step: `update-gitops-status` (formerly `update-sync-metric`)** +- Runs after successful deployment +- Calls `update-gitops-status.sh` +- Always succeeds (status sent regardless of drift) + +**Step: `gitops_sync_check`** +- Runs on cron schedule (every 2 minutes) +- Calls `update-gitops-status.sh` to send JSON +- Then checks drift status to determine pipeline success/failure +- Pipeline fails if drift detected (for visibility in Woodpecker UI) +- JSON status always sent before checking drift + +#### `README.md` +**Changes:** +- Updated pipeline flow diagrams +- Replaced "Prometheus" with "gitops-status-server" in diagrams +- Removed "sync metric" section +- Added "GitOps status JSON format" section with examples +- Updated pipeline step descriptions to mention JSON status +- Added `update-gitops-status.sh` to repository structure +- Added optional environment variables table +- Updated flow descriptions to explain file-level drift details + +### 3. Unchanged Files + +The following files remain unchanged and continue to work as before: +- `ansible/playbooks/drift-check.yml` - Still works as-is +- `ansible/playbooks/apply.yml` - Deploy logic unchanged +- `ansible/playbooks/validate.yml` - Validation logic unchanged +- `ansible/inventory/hosts.yml` - Inventory unchanged +- `ansible/inventory/group_vars/all.yml` - Variables unchanged +- `ansible.cfg` - Ansible config unchanged +- `apply.sh` - Local apply script unchanged +- `drift-check.sh` - Local drift check script unchanged +- `files/rsyslog.conf` - Config files unchanged +- `files/rsyslog.d/30-lab.conf` - Config files unchanged + +## Behavior Changes + +### Before (Pushgateway) + +``` +drift-check → calculate status (0 or 1) → push to Pushgateway → Prometheus scrapes +``` + +Output: +``` +gitops_sync_status{repo="rsyslog",server="rsyslog-lab"} 1 +``` + +### After (gitops-status-server) + +``` +drift-check → extract changed files → build JSON → POST to gitops-status-server → Grafana queries +``` + +Output: +```json +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 2, + "files": [ + { "name": "rsyslog.conf" }, + { "name": "rsyslog.d/30-lab.conf" } + ], + "last_check": "2026-04-21T10:32:15Z" +} +``` + +## Benefits + +1. **Richer data**: File-level drift information instead of just a binary status +2. **Better visualization**: Grafana can display which specific files have drifted +3. **Detailed tracking**: Know exactly what changed, not just that something changed +4. **Timestamp tracking**: Last check time included in JSON +5. **Drift count**: Quick numeric indicator of severity +6. **Extensible**: JSON format can be easily extended with additional fields + +## Migration Checklist + +- [x] Create `update-gitops-status.sh` script +- [x] Update `.woodpecker.yml` pipeline +- [x] Update `README.md` documentation +- [x] Create `GITOPS_STATUS_INTEGRATION.md` integration docs +- [x] Remove Pushgateway environment variables +- [x] Add gitops-status-server environment variables +- [x] Update pipeline step names +- [x] Add required packages (jq, bash) +- [x] Test JSON generation logic +- [x] Update flow diagrams + +## Testing Recommendations + +1. **Test the script locally:** + ```bash + export GITOPS_STATUS_SERVER_URL="http://localhost:80" + export REPO_NAME="rsyslog" + export SERVER_NAME="rsyslog-lab" + ./update-gitops-status.sh + ``` + +2. **Test in Woodpecker:** + - Trigger a push to master → check `update-gitops-status` step + - Wait for cron run → check `gitops_sync_check` step + - Manually edit a file on server → wait for next cron → verify OUT_OF_SYNC status + +3. **Verify gitops-status-server:** + - Check that JSON is received at POST endpoint + - Verify `/status.json` serves the latest data + - Confirm Grafana dashboard displays drift details + +## Rollback Plan + +If needed, the old Pushgateway approach can be restored by: +1. Reverting `.woodpecker.yml` to previous version +2. Removing `update-gitops-status.sh` +3. Restoring Pushgateway environment variables + +All Ansible playbooks remain unchanged, so they will work with either approach. + +## Notes + +- The rsyslog repo now focuses only on status generation and sending +- gitops-status-server is responsible for serving data to Grafana +- No changes to observability-stack app are needed on the rsyslog side +- This migration is specific to rsyslog repo; other repos can follow same pattern diff --git a/README.md b/README.md index a84704f..23c01d8 100644 --- a/README.md +++ b/README.md @@ -40,21 +40,23 @@ Triggered when a PR is merged into master. ``` Merge to master │ - ├─► syntax-check Same lint check as PR + ├─► syntax-check Same lint check as PR │ - ├─► validate Same server check as PR + ├─► validate Same server check as PR │ - ├─► deploy Copy the new config files from Git to the server - │ and restart rsyslog + ├─► deploy Copy the new config files from Git to the server + │ and restart rsyslog │ - └─► update-sync-metric Run a diff between Git and the live server - │ - ├─ Matches? → push metric 1 (SYNCED) - └─ Differs? → push metric 0 (OUT_OF_SYNC) + └─► update-gitops-status Run a diff between Git and the live server + │ + ├─ Matches? → send JSON (SYNCED, drift_count: 0) + └─ Differs? → send JSON (OUT_OF_SYNC, drift_count: N, files: [...]) + │ + └─ Update gitops-status-server for Grafana visualization ``` **Pass** = new config is live and the server matches Git. -The sync result is always sent to Prometheus regardless of outcome. +The sync status JSON is always sent to gitops-status-server regardless of outcome. --- @@ -70,19 +72,19 @@ Every 2 minutes (cron) └─► gitops_sync_check SSH to the server, compare every managed config file against the latest Git commit │ - ├─ Matches? → push metric 1 (SYNCED) - └─ Differs? → push metric 0 (OUT_OF_SYNC) + ├─ Matches? → send JSON (SYNCED, drift_count: 0, files: []) + └─ Differs? → send JSON (OUT_OF_SYNC, drift_count: N, files: [...]) + │ + └─ Update gitops-status-server for Grafana visualization ``` **Why this matters:** if someone edits `/etc/rsyslog.conf` directly on the server -(bypassing Git), the next cron run catches it within 2 minutes and marks OUT_OF_SYNC. +(bypassing Git), the next cron run catches it within 2 minutes and marks OUT_OF_SYNC +with detailed information about which specific files have drifted. --- -## Full flow diagram - -``` -Developer Woodpecker CI Linux Server Prometheus +## Full flow diagramgitops-status-server │ │ │ │ │── open PR ───────────────►│ │ │ │ │── syntax-check │ │ @@ -96,40 +98,69 @@ Developer Woodpecker CI Linux Server Prom │ │ │ restart rsyslog │ │ │── drift-check ──────────►│ compare files │ │ │ │◄────────────────────│ - │ │── metric (1 or 0) ───────────────────────────►│ + │ │── JSON status ───────────────────────────────►│ │ │ │ │ │ │ [every 2 min, no push] │ │ │ │── drift-check ──────────►│ compare files │ - │ │── metric (1 or 0) ───────────────────────────►│ + │ │── JSON status ───────────────────────────────►│ │ │ │ │ Someone edits the server directly (bad): - rogue admin Woodpecker CI Linux Server Prometheus + rogue admin Woodpecker CI Linux Server gitops-status-server │ │ │ │ │── ssh rsyslog-lab │ │ │ │── vim /etc/rsyslog.conf ──────────────────────────► │ file changed │ │ │ │ │ │ [2 min later, cron runs] │ │ │ │── drift-check ──────────►│ diff detected │ + │ │── JSON status (OUT_OF_SYNC)─────────────────►│ + │ │ drift_count: 1 │ Grafana shows + │ │ files: [rsyslog.conf] OUT_OF_SYNC + │ │── drift-check ──────────►│ diff detected │ │ │── metric 0 (OUT_OF_SYNC)────────────────────►│ │ │ │ alert fires ``` --- +GitOps status JSON format -## What is the sync metric? +Instead of simple numeric metrics, this repo now sends rich JSON status data to gitops-status-server: -``` -gitops_sync_status{repo="rsyslog", server="rsyslog-lab"} +```json +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "SYNCED", + "drift_count": 0, + "files": [], + "last_check": "2026-04-21T10:30:00Z" +} ``` -| Value | Meaning | -|-------|---------| -| `1` | Server config matches Git (SYNCED) | -| `0` | Server config differs from Git (OUT_OF_SYNC) | +When drift is detected: +```json +{ + "repo": "rsyslog", + "server": "rsyslog-lab", + "sync_status": "OUT_OF_SYNC", + "drift_count": 2, + "files": [ + { "name": "rsyslog.conf" }, + { "name": "rsyslog.d/30-lab.conf" } + ], + "last_check": "2026-04-21T10:32:15Z" +} +``` + +update-gitops-status.sh Script to generate and send JSON status to gitops-status-server +This JSON is sent to `gitops-status-server` at: +- `http://gitops-status-server.observability-stack.svc.cluster.local:80/api/status` + +The gitops-status-server app serves this data via `/status.json` for Grafana Infinity datasource, +providing rich visualization with file-level drift details instead of just a numeric metric Alert on `gitops_sync_status == 0` in Grafana/Alertmanager. --- @@ -182,6 +213,16 @@ Go to **Repository Settings → Crons → Add cron**: Go to **Repository Settings → Secrets**: -| Name | Description | -|-------------------|------------------------------------| -| `SSH_PRIVATE_KEY` | Private key to SSH into the server | +| Name | Description | +|----------------------------|-------------------------------------------------------| +| `SSH_PRIVATE_KEY` | Private key to SSH into the server | + +## Optional environment variables + +These can be overridden in the Woodpecker pipeline or `.woodpecker.yml`: + +| Variable | Default | Description | +|------------------------------|--------------------------------------------------------------------------|---------------------------------------| +| `GITOPS_STATUS_SERVER_URL` | `http://gitops-status-server.observability-stack.svc.cluster.local:80` | URL of gitops-status-server API | +| `REPO_NAME` | `rsyslog` | Repository name for JSON status | +| `SERVER_NAME` | `rsyslog-lab` | Server name for JSON status | diff --git a/update-gitops-status.sh b/update-gitops-status.sh new file mode 100644 index 0000000..b91dd08 --- /dev/null +++ b/update-gitops-status.sh @@ -0,0 +1,162 @@ +#!/bin/bash +# ============================================================================= +# update-gitops-status.sh +# +# Purpose: +# Runs drift-check playbook and generates a JSON status snapshot for +# gitops-status-server. This replaces Pushgateway metric updates with +# richer JSON status suitable for Grafana visualization. +# +# Usage: +# ./update-gitops-status.sh +# +# Environment Variables: +# GITOPS_STATUS_SERVER_URL - URL of gitops-status-server API +# (default: http://gitops-status-server.observability-stack.svc.cluster.local:80) +# REPO_NAME - Repository name (default: rsyslog) +# SERVER_NAME - Server name (default: rsyslog-lab) +# +# Output: +# Generates JSON structure: +# { +# "repo": "rsyslog", +# "server": "rsyslog-lab", +# "sync_status": "SYNCED" or "OUT_OF_SYNC", +# "drift_count": , +# "files": [{"name": "rsyslog.conf"}, ...], +# "last_check": "2026-04-21T10:30:00Z" +# } +# +# Exit codes: +# 0 - Success (regardless of sync status) +# 1 - Failure (playbook error, network error, etc.) +# ============================================================================= + +set -e + +# Configuration +GITOPS_STATUS_SERVER_URL="${GITOPS_STATUS_SERVER_URL:-http://gitops-status-server.observability-stack.svc.cluster.local:80}" +REPO_NAME="${REPO_NAME:-rsyslog}" +SERVER_NAME="${SERVER_NAME:-rsyslog-lab}" +INVENTORY_FILE="ansible/inventory/hosts.yml" +PLAYBOOK="ansible/playbooks/drift-check.yml" + +echo "==> Running drift check playbook..." +echo " Inventory: $INVENTORY_FILE" +echo " Playbook: $PLAYBOOK" + +# Run drift-check playbook in JSON output mode and capture results +# We use --diff to get detailed change information +# Exit code: 0 = synced, non-zero = drift or error +set +e +PLAYBOOK_OUTPUT=$(ANSIBLE_STDOUT_CALLBACK=json ansible-playbook \ + -i "$INVENTORY_FILE" \ + "$PLAYBOOK" \ + 2>&1) +DRIFT_RC=$? +set -e + +# Determine sync status +if [ "$DRIFT_RC" -eq 0 ]; then + SYNC_STATUS="SYNCED" + echo "==> Status: SYNCED - server configuration matches Git" +else + SYNC_STATUS="OUT_OF_SYNC" + echo "==> Status: OUT_OF_SYNC - drift detected" +fi + +# Parse changed files from playbook output +# Look for tasks that reported changed=true +CHANGED_FILES=() +DRIFT_COUNT=0 + +# Extract file changes from JSON output +# main_config_check tracks rsyslog.conf +# rsyslogd_check tracks rsyslog.d/* files +if echo "$PLAYBOOK_OUTPUT" | grep -q '"main_config_check".*"changed".*true'; then + CHANGED_FILES+=("rsyslog.conf") + ((DRIFT_COUNT++)) + echo " - Drift detected: rsyslog.conf" +fi + +# Check if rsyslog.d directory has changes +if echo "$PLAYBOOK_OUTPUT" | grep -q '"rsyslogd_check".*"changed".*true'; then + # Try to extract specific filenames from diff output + # Format: files/rsyslog.d/30-lab.conf + while IFS= read -r line; do + if [[ "$line" =~ files/rsyslog\.d/([^[:space:]]+\.conf) ]]; then + filename="${BASH_REMATCH[1]}" + CHANGED_FILES+=("rsyslog.d/$filename") + ((DRIFT_COUNT++)) + echo " - Drift detected: rsyslog.d/$filename" + fi + done < <(echo "$PLAYBOOK_OUTPUT" | grep -oP 'files/rsyslog\.d/[^[:space:]]+\.conf' || true) + + # If we couldn't extract specific files but know rsyslog.d changed, + # add a generic entry + if [ ${#CHANGED_FILES[@]} -eq 0 ] || [ ${#CHANGED_FILES[@]} -eq 1 ]; then + CHANGED_FILES+=("rsyslog.d/*") + ((DRIFT_COUNT++)) + echo " - Drift detected: rsyslog.d/* (multiple files)" + fi +fi + +# Check for missing files on server +if echo "$PLAYBOOK_OUTPUT" | grep -q '"extra_files_on_server".*true'; then + CHANGED_FILES+=("(missing files on server)") + ((DRIFT_COUNT++)) + echo " - Drift detected: files missing on server" +fi + +echo "==> Drift count: $DRIFT_COUNT" + +# Build JSON file list +FILES_JSON="[]" +if [ ${#CHANGED_FILES[@]} -gt 0 ]; then + FILES_JSON="[" + for i in "${!CHANGED_FILES[@]}"; do + if [ $i -gt 0 ]; then + FILES_JSON+="," + fi + FILES_JSON+="{\"name\":\"${CHANGED_FILES[$i]}\"}" + done + FILES_JSON+="]" +fi + +# Generate ISO timestamp +TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ") + +# Build complete JSON status +STATUS_JSON=$(cat < Generated JSON status:" +echo "$STATUS_JSON" | jq '.' 2>/dev/null || echo "$STATUS_JSON" + +# Send JSON to gitops-status-server +# API endpoint: POST /api/status +echo "==> Sending status to gitops-status-server..." +echo " URL: $GITOPS_STATUS_SERVER_URL/api/status" + +HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -d "$STATUS_JSON" \ + "$GITOPS_STATUS_SERVER_URL/api/status") + +if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then + echo "==> Status update successful (HTTP $HTTP_CODE)" + exit 0 +else + echo "==> ERROR: Status update failed (HTTP $HTTP_CODE)" + exit 1 +fi