Compare commits
31 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
1b45e26765 | ||
| 5607c481c1 | |||
|
|
6129d765f4 | ||
|
|
99aabc9aff | ||
|
|
a250dcc633 | ||
|
|
229625e441 | ||
|
|
a04031eef6 | ||
|
|
7a3ffba4c6 | ||
|
|
9c664e0462 | ||
|
|
ef3649595e | ||
|
|
a1d5486091 | ||
|
|
19f12d70d7 | ||
|
|
451eaab9b9 | ||
|
|
6a6ff1dfb3 | ||
|
|
e013edb4ee | ||
|
|
39c0b88476 | ||
|
|
12c4b01943 | ||
|
|
45f1d48e2b | ||
|
|
b9eaa4cc2c | ||
|
|
519fa61908 | ||
|
|
9af9b5b8c9 | ||
|
|
03830e34cb | ||
| 8fb9810cc4 | |||
| 8b837e5b40 | |||
| b80ee095c9 | |||
|
|
2377e969b7 | ||
| 8cde902454 | |||
| 4f18336f89 | |||
| da50fbb61f | |||
| 809cf3a5dc | |||
| c312761eee |
29
argocd-apps/gitops-status-server.yaml
Normal file
29
argocd-apps/gitops-status-server.yaml
Normal file
@ -0,0 +1,29 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: gitops-status-server
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: observability-stack
|
||||
source:
|
||||
repoURL: ssh://git@gitea-ssh.dev-tools.svc.cluster.local:2222/dvirlabs/observability-stack.git
|
||||
targetRevision: HEAD
|
||||
path: charts/gitops-status-server
|
||||
helm:
|
||||
valueFiles:
|
||||
- ../../manifests/gitops-status-server/values.yaml
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: observability-stack
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
@ -4,9 +4,9 @@ metadata:
|
||||
name: kube-prometheus-stack
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: observability
|
||||
project: observability-stack
|
||||
source:
|
||||
repoURL: ssh://git@gitea-ssh.dev-tools.svc.cluster.local.:2222/dvirlabs/observability-stack.git
|
||||
repoURL: ssh://git@gitea-ssh.dev-tools.svc.cluster.local:2222/dvirlabs/observability-stack.git
|
||||
targetRevision: HEAD
|
||||
path: charts/kube-prometheus-stack
|
||||
helm:
|
||||
@ -14,10 +14,16 @@ spec:
|
||||
- ../../manifests/kube-prometheus-stack/values.yaml
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: observability
|
||||
namespace: observability-stack
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
retry:
|
||||
limit: 5
|
||||
backoff:
|
||||
duration: 5s
|
||||
factor: 2
|
||||
maxDuration: 3m
|
||||
22
argocd-apps/raw-resources-observability-stack.yaml
Normal file
22
argocd-apps/raw-resources-observability-stack.yaml
Normal file
@ -0,0 +1,22 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: raw-resources-observability-stack
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: observability-stack
|
||||
source:
|
||||
repoURL: ssh://git@gitea-ssh.dev-tools.svc.cluster.local:2222/dvirlabs/observability-stack.git
|
||||
targetRevision: HEAD
|
||||
path: manifests/raw-resources-observability-stack
|
||||
directory:
|
||||
recurse: true
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: observability-stack
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
23
argocd-apps/secrets-observability-stack.yaml
Normal file
23
argocd-apps/secrets-observability-stack.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: secrets-observability-stack
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: observability-stack
|
||||
source:
|
||||
repoURL: ssh://git@gitea-ssh.dev-tools.svc.cluster.local:2222/dvirlabs/observability-stack.git
|
||||
targetRevision: HEAD
|
||||
path: charts/secrets
|
||||
helm:
|
||||
valueFiles:
|
||||
- ../../manifests/secrets-observability-stack/values.yaml
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: observability-stack
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
syncOptions:
|
||||
- CreateNamespace=true
|
||||
23
charts/gitops-status-server/.helmignore
Normal file
23
charts/gitops-status-server/.helmignore
Normal file
@ -0,0 +1,23 @@
|
||||
# Patterns to ignore when building packages.
|
||||
# This supports shell glob matching, relative path matching, and
|
||||
# negation (prefixed with !). Only one pattern per line.
|
||||
.DS_Store
|
||||
# Common VCS dirs
|
||||
.git/
|
||||
.gitignore
|
||||
.bzr/
|
||||
.bzrignore
|
||||
.hg/
|
||||
.hgignore
|
||||
.svn/
|
||||
# Common backup files
|
||||
*.swp
|
||||
*.bak
|
||||
*.tmp
|
||||
*.orig
|
||||
*~
|
||||
# Various IDEs
|
||||
.project
|
||||
.idea/
|
||||
*.tmproj
|
||||
.vscode/
|
||||
14
charts/gitops-status-server/Chart.yaml
Normal file
14
charts/gitops-status-server/Chart.yaml
Normal file
@ -0,0 +1,14 @@
|
||||
apiVersion: v2
|
||||
name: gitops-status-server
|
||||
description: A minimal HTTP server that serves GitOps status information as JSON
|
||||
type: application
|
||||
version: 1.0.0
|
||||
appVersion: "1.25.5"
|
||||
keywords:
|
||||
- gitops
|
||||
- status
|
||||
- monitoring
|
||||
- nginx
|
||||
maintainers:
|
||||
- name: DevOps Team
|
||||
home: https://github.com/your-org/observability-stack
|
||||
478
charts/gitops-status-server/README.md
Normal file
478
charts/gitops-status-server/README.md
Normal file
@ -0,0 +1,478 @@
|
||||
# GitOps Status Server Helm Chart
|
||||
|
||||
A dual-container HTTP server that receives GitOps status updates via POST API and serves status information as JSON for monitoring and observability purposes.
|
||||
|
||||
## Overview
|
||||
|
||||
This chart deploys a two-container pod:
|
||||
1. **Nginx** - Serves `/status.json` endpoint for monitoring tools and handles API routing
|
||||
2. **Flask API** - Processes POST requests to `/api/status` and updates the status JSON
|
||||
|
||||
It's designed to be consumed by Grafana's Infinity datasource or other monitoring tools, and to receive updates from CI/CD pipelines like Woodpecker.
|
||||
|
||||
## Architecture
|
||||
|
||||
```
|
||||
CI/CD Pipeline (Woodpecker)
|
||||
↓
|
||||
POST /api/status
|
||||
↓
|
||||
Kubernetes Service (port 80)
|
||||
↓
|
||||
Nginx (port 8080)
|
||||
├─→ /api/status → Proxies to Flask (localhost:5000)
|
||||
└─→ /status.json → Serves static file
|
||||
↓
|
||||
Shared Volume (emptyDir)
|
||||
├─→ status.json (updated by Flask API)
|
||||
└─→ Read by Nginx
|
||||
↓
|
||||
Grafana Infinity Datasource
|
||||
Reads /status.json
|
||||
```
|
||||
|
||||
## Features
|
||||
|
||||
- **API-driven updates**: POST endpoint for CI/CD pipelines to update status
|
||||
- **Read-only serving**: Grafana-friendly JSON endpoint
|
||||
- **Minimal footprint**: nginx-unprivileged + Python-Alpine with minimal resources
|
||||
- **Secure by default**: Runs as non-root with restricted filesystems
|
||||
- **Internal only**: ClusterIP service for cluster-internal access
|
||||
- **ArgoCD compatible**: Init container auto-initializes status from ConfigMap
|
||||
- **Production-ready**: Includes health checks, security contexts, and resource limits
|
||||
|
||||
## Installation
|
||||
|
||||
### Using Helm
|
||||
|
||||
```bash
|
||||
# Install with default values
|
||||
helm install gitops-status ./gitops-status-server
|
||||
|
||||
# Install with custom namespace
|
||||
helm install gitops-status ./gitops-status-server -n observability-stack --create-namespace
|
||||
|
||||
# Install with custom values
|
||||
helm install gitops-status ./gitops-status-server -f custom-values.yaml
|
||||
```
|
||||
|
||||
### Using ArgoCD
|
||||
|
||||
Create an Application manifest:
|
||||
|
||||
```yaml
|
||||
apiVersion: argoproj.io/v1alpha1
|
||||
kind: Application
|
||||
metadata:
|
||||
name: gitops-status-server
|
||||
namespace: argocd
|
||||
spec:
|
||||
project: default
|
||||
source:
|
||||
repoURL: https://github.com/your-org/observability-stack
|
||||
targetRevision: main
|
||||
path: gitops-status-server
|
||||
helm:
|
||||
values: |
|
||||
replicaCount: 1
|
||||
statusJson:
|
||||
repo: "rsyslog"
|
||||
server: "rsyslog-lab"
|
||||
sync_status: "UNKNOWN"
|
||||
```
|
||||
|
||||
## API Endpoints
|
||||
|
||||
### GET /status.json
|
||||
Returns the current status JSON
|
||||
|
||||
```bash
|
||||
curl http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json
|
||||
```
|
||||
|
||||
Response:
|
||||
```json
|
||||
{
|
||||
"repo": "rsyslog",
|
||||
"server": "rsyslog-lab",
|
||||
"sync_status": "SYNCED",
|
||||
"drift_count": 0,
|
||||
"files": [],
|
||||
"last_check": "2026-04-21T10:30:00Z"
|
||||
}
|
||||
```
|
||||
|
||||
### POST /api/status
|
||||
Updates the status with new data
|
||||
|
||||
```bash
|
||||
curl -X POST http://gitops-status-server.observability-stack.svc.cluster.local:80/api/status \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"repo": "rsyslog",
|
||||
"server": "rsyslog-lab",
|
||||
"sync_status": "OUT_OF_SYNC",
|
||||
"drift_count": 2,
|
||||
"files": [
|
||||
{"name": "rsyslog.conf"},
|
||||
{"name": "rsyslog.d/30-lab.conf"}
|
||||
],
|
||||
"last_check": "2026-04-21T10:30:00Z"
|
||||
}'
|
||||
```
|
||||
|
||||
Response (HTTP 200):
|
||||
```json
|
||||
{
|
||||
"success": true,
|
||||
"message": "Status updated successfully",
|
||||
"status": { ... }
|
||||
}
|
||||
```
|
||||
|
||||
### GET /health
|
||||
Health check endpoint (returns HTTP 200)
|
||||
|
||||
```bash
|
||||
curl http://gitops-status-server.observability-stack.svc.cluster.local:80/health
|
||||
```
|
||||
|
||||
### GET /ready
|
||||
Readiness check (verifies status file is readable)
|
||||
|
||||
```bash
|
||||
curl http://gitops-status-server.observability-stack.svc.cluster.local:80/ready
|
||||
```
|
||||
|
||||
## Integration with Woodpecker
|
||||
|
||||
The rsyslog CI/CD pipeline can update status by POSTing to the `/api/status` endpoint:
|
||||
|
||||
```bash
|
||||
#!/bin/bash
|
||||
|
||||
GITOPS_STATUS_SERVER_URL="http://gitops-status-server.observability-stack.svc.cluster.local:80"
|
||||
|
||||
STATUS_JSON='{
|
||||
"repo": "rsyslog",
|
||||
"server": "rsyslog-lab",
|
||||
"sync_status": "SYNCED",
|
||||
"drift_count": 0,
|
||||
"files": [],
|
||||
"last_check": "2026-04-21T10:30:00Z"
|
||||
}'
|
||||
|
||||
curl -X POST "$GITOPS_STATUS_SERVER_URL/api/status" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$STATUS_JSON"
|
||||
```
|
||||
|
||||
## Service Discovery
|
||||
|
||||
### Internal Kubernetes URL
|
||||
```
|
||||
http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json
|
||||
```
|
||||
|
||||
### Port Forwarding (for local testing)
|
||||
```bash
|
||||
kubectl port-forward -n observability-stack svc/gitops-status-server 8080:80
|
||||
# Then access at http://localhost:8080/status.json
|
||||
```
|
||||
|
||||
### NodePort (if service type is changed)
|
||||
```bash
|
||||
kubectl patch service -n observability-stack gitops-status-server -p '{"spec":{"type":"NodePort"}}'
|
||||
# Then access at http://<node-ip>:<node-port>/status.json
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
See `values.yaml` for all configuration options:
|
||||
|
||||
- `replicaCount`: Number of replicas
|
||||
- `image.repository`: Container image
|
||||
- `image.tag`: Image tag
|
||||
- `service.type`: Service type (ClusterIP, NodePort, LoadBalancer)
|
||||
- `service.port`: Service port (default 80)
|
||||
- `service.targetPort`: Container port (default 8080)
|
||||
- `resources`: CPU/memory limits and requests
|
||||
- `statusJson`: Default status JSON values
|
||||
- `api.image.*`: Python/Flask image configuration
|
||||
|
||||
## Grafana Integration
|
||||
|
||||
### Infinity Datasource Configuration
|
||||
|
||||
1. Install Infinity datasource plugin:
|
||||
```bash
|
||||
grafana-cli plugins install yesoreyeram-infinity-datasource
|
||||
```
|
||||
|
||||
2. Add datasource with URL:
|
||||
```
|
||||
http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json
|
||||
```
|
||||
|
||||
3. Create panels to visualize:
|
||||
- `sync_status`: Current synchronization state
|
||||
- `drift_count`: Number of drifted files
|
||||
- `files[]`: List of changed files
|
||||
- `last_check`: Timestamp of last check
|
||||
|
||||
### Example Query
|
||||
|
||||
```json
|
||||
{
|
||||
"url": "http://gitops-status-server.observability-stack.svc.cluster.local:80/status.json",
|
||||
"format": "json"
|
||||
}
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
- Runs as non-root user (UID 101)
|
||||
- Read-only root filesystem (except for /tmp, /var/cache/nginx, /var/run)
|
||||
- No privileged capabilities
|
||||
- Network policies recommended for production
|
||||
- Service Account with minimal RBAC
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### POST Request Returns 400 Error
|
||||
|
||||
**Issue**: "Invalid JSON" error
|
||||
|
||||
**Solution**: Verify JSON formatting with:
|
||||
```bash
|
||||
echo '{...}' | jq '.'
|
||||
```
|
||||
|
||||
### POST Updates Not Appearing in GET Response
|
||||
|
||||
**Issue**: Update endpoint returns 200 but status.json isn't updated
|
||||
|
||||
**Possible causes**:
|
||||
- Shared volume permission issue
|
||||
- API container crashed after POST
|
||||
- Status file permissions
|
||||
|
||||
**Debug**:
|
||||
```bash
|
||||
# Check logs
|
||||
kubectl logs -f deployment/gitops-status-server -c api
|
||||
kubectl logs -f deployment/gitops-status-server -c nginx
|
||||
|
||||
# Check shared volume
|
||||
kubectl exec deployment/gitops-status-server -c nginx -- ls -la /usr/share/nginx/html/
|
||||
|
||||
# Test API directly (port-forward to 5000 first)
|
||||
kubectl port-forward deployment/gitops-status-server 5000:5000
|
||||
curl -X POST http://localhost:5000/api/status -H "Content-Type: application/json" -d '{...}'
|
||||
```
|
||||
|
||||
### Connection Refused to gitops-status-server
|
||||
|
||||
**Issue**: Woodpecker can't reach the service
|
||||
|
||||
**Possible causes**:
|
||||
- Service in different namespace
|
||||
- Network policies blocking traffic
|
||||
- Woodpecker outside cluster
|
||||
- Service DNS name incorrect
|
||||
|
||||
**Solutions**:
|
||||
- Verify service exists: `kubectl get svc gitops-status-server -n observability-stack`
|
||||
- Use NodePort for external access (update service type in values)
|
||||
- Use port-forward as a temporary solution
|
||||
- Verify network policies allow traffic
|
||||
|
||||
## Performance
|
||||
|
||||
- **CPU**: 150m limit (100m nginx + 100m API)
|
||||
- **Memory**: 192Mi limit (64Mi nginx + 128Mi API)
|
||||
- **Startup time**: ~5 seconds (Flask app install + startup)
|
||||
- **Update latency**: <100ms (direct file write)
|
||||
- **Read performance**: <10ms (static file serving)
|
||||
|
||||
## License
|
||||
|
||||
Same as observability-stack repository
|
||||
statusJson:
|
||||
repo: "my-repo"
|
||||
server: "my-server"
|
||||
sync_status: "SYNCED"
|
||||
drift_count: 0
|
||||
files: []
|
||||
last_check: "2026-04-21T10:00:00Z"
|
||||
destination:
|
||||
server: https://kubernetes.default.svc
|
||||
namespace: monitoring
|
||||
syncPolicy:
|
||||
automated:
|
||||
prune: true
|
||||
selfHeal: true
|
||||
```
|
||||
|
||||
## Configuration
|
||||
|
||||
### Key Values
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-----------|-------------|---------|
|
||||
| `replicaCount` | Number of replicas | `1` |
|
||||
| `image.repository` | Container image repository | `nginxinc/nginx-unprivileged` |
|
||||
| `image.tag` | Container image tag | `1.25-alpine` |
|
||||
| `service.type` | Kubernetes service type | `ClusterIP` |
|
||||
| `service.port` | Service port | `80` |
|
||||
| `service.targetPort` | Container target port | `8080` |
|
||||
| `resources.limits.cpu` | CPU limit | `100m` |
|
||||
| `resources.limits.memory` | Memory limit | `64Mi` |
|
||||
| `statusJson` | JSON content to serve | See values.yaml |
|
||||
|
||||
### Custom Status JSON
|
||||
|
||||
Override the status JSON content in your values:
|
||||
|
||||
```yaml
|
||||
statusJson:
|
||||
repo: "production-apps"
|
||||
server: "prod-cluster-01"
|
||||
sync_status: "SYNCED"
|
||||
drift_count: 2
|
||||
files:
|
||||
- "deployment.yaml"
|
||||
- "service.yaml"
|
||||
last_check: "2026-04-21T12:30:00Z"
|
||||
```
|
||||
|
||||
## Usage
|
||||
|
||||
### Access the Status Endpoint
|
||||
|
||||
From inside the cluster:
|
||||
|
||||
```bash
|
||||
# Using the service DNS name
|
||||
curl http://gitops-status-server/status.json
|
||||
|
||||
# With namespace
|
||||
curl http://gitops-status-server.monitoring.svc.cluster.local/status.json
|
||||
```
|
||||
|
||||
### Grafana Infinity Datasource Configuration
|
||||
|
||||
1. Add an Infinity datasource in Grafana
|
||||
2. Configure URL: `http://gitops-status-server.monitoring.svc.cluster.local/status.json`
|
||||
3. Parser: JSON
|
||||
4. Use fields from the JSON response in your dashboard
|
||||
|
||||
Example query fields:
|
||||
- `sync_status` - Current sync status
|
||||
- `drift_count` - Number of drifted resources
|
||||
- `files` - List of changed files
|
||||
- `last_check` - Timestamp of last check
|
||||
|
||||
## Updating Status Data
|
||||
|
||||
### Manual Update
|
||||
|
||||
Edit the ConfigMap directly:
|
||||
|
||||
```bash
|
||||
kubectl edit configmap gitops-status-server -n monitoring
|
||||
```
|
||||
|
||||
The deployment will automatically roll out with the new content due to the ConfigMap checksum annotation.
|
||||
|
||||
### Automated Update via Pipeline
|
||||
|
||||
Use `kubectl` in your CI/CD pipeline:
|
||||
|
||||
```bash
|
||||
kubectl create configmap gitops-status-server \
|
||||
--from-file=status.json=./status.json \
|
||||
--dry-run=client -o yaml | kubectl apply -f -
|
||||
```
|
||||
|
||||
### ArgoCD Hook (Advanced)
|
||||
|
||||
Create a PostSync hook that updates the ConfigMap with current sync status:
|
||||
|
||||
```yaml
|
||||
apiVersion: batch/v1
|
||||
kind: Job
|
||||
metadata:
|
||||
name: update-status
|
||||
annotations:
|
||||
argocd.argoproj.io/hook: PostSync
|
||||
spec:
|
||||
template:
|
||||
spec:
|
||||
containers:
|
||||
- name: update
|
||||
image: bitnami/kubectl
|
||||
command:
|
||||
- /bin/sh
|
||||
- -c
|
||||
- |
|
||||
# Update status.json with current sync status
|
||||
kubectl patch configmap gitops-status-server \
|
||||
--patch '{"data":{"status.json":"..."}}'
|
||||
restartPolicy: Never
|
||||
```
|
||||
|
||||
## Security Considerations
|
||||
|
||||
- Runs as non-root user (UID 101)
|
||||
- Read-only root filesystem
|
||||
- No privilege escalation
|
||||
- Minimal capabilities (all dropped)
|
||||
- No external network access required
|
||||
- ClusterIP only (no external exposure)
|
||||
|
||||
## Resource Requirements
|
||||
|
||||
Minimal resource footprint suitable for small clusters:
|
||||
- CPU: 50m request / 100m limit
|
||||
- Memory: 32Mi request / 64Mi limit
|
||||
|
||||
## Troubleshooting
|
||||
|
||||
### Check pod status
|
||||
|
||||
```bash
|
||||
kubectl get pods -l app.kubernetes.io/name=gitops-status-server
|
||||
```
|
||||
|
||||
### View logs
|
||||
|
||||
```bash
|
||||
kubectl logs -l app.kubernetes.io/name=gitops-status-server
|
||||
```
|
||||
|
||||
### Test endpoint
|
||||
|
||||
```bash
|
||||
kubectl run -it --rm curl --image=curlimages/curl --restart=Never -- \
|
||||
curl http://gitops-status-server/status.json
|
||||
```
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Pod not starting**: Check security context compatibility with your cluster's PSP/PSA policies.
|
||||
|
||||
**Empty response**: Verify the ConfigMap is mounted correctly:
|
||||
```bash
|
||||
kubectl describe pod -l app.kubernetes.io/name=gitops-status-server
|
||||
```
|
||||
|
||||
**Service not accessible**: Ensure you're accessing from within the cluster and using the correct namespace.
|
||||
|
||||
## License
|
||||
|
||||
This chart is part of the observability-stack project.
|
||||
|
||||
## Maintainers
|
||||
|
||||
- DevOps Team
|
||||
63
charts/gitops-status-server/templates/_helpers.tpl
Normal file
63
charts/gitops-status-server/templates/_helpers.tpl
Normal file
@ -0,0 +1,63 @@
|
||||
{{/*
|
||||
Expand the name of the chart.
|
||||
*/}}
|
||||
{{- define "gitops-status-server.name" -}}
|
||||
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create a default fully qualified app name.
|
||||
*/}}
|
||||
{{- define "gitops-status-server.fullname" -}}
|
||||
{{- if .Values.fullnameOverride }}
|
||||
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- $name := default .Chart.Name .Values.nameOverride }}
|
||||
{{- if contains $name .Release.Name }}
|
||||
{{- .Release.Name | trunc 63 | trimSuffix "-" }}
|
||||
{{- else }}
|
||||
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create chart name and version as used by the chart label.
|
||||
*/}}
|
||||
{{- define "gitops-status-server.chart" -}}
|
||||
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Common labels
|
||||
*/}}
|
||||
{{- define "gitops-status-server.labels" -}}
|
||||
helm.sh/chart: {{ include "gitops-status-server.chart" . }}
|
||||
{{ include "gitops-status-server.selectorLabels" . }}
|
||||
{{- if .Chart.AppVersion }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- with .Values.labels }}
|
||||
{{ toYaml . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "gitops-status-server.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "gitops-status-server.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end }}
|
||||
|
||||
{{/*
|
||||
Create the name of the service account to use
|
||||
*/}}
|
||||
{{- define "gitops-status-server.serviceAccountName" -}}
|
||||
{{- if .Values.serviceAccount.create }}
|
||||
{{- default (include "gitops-status-server.fullname" .) .Values.serviceAccount.name }}
|
||||
{{- else }}
|
||||
{{- default "default" .Values.serviceAccount.name }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
142
charts/gitops-status-server/templates/api-app.yaml
Normal file
142
charts/gitops-status-server/templates/api-app.yaml
Normal file
@ -0,0 +1,142 @@
|
||||
{{/*
|
||||
ConfigMap containing the API backend Python script
|
||||
Handles POST requests to /api/status and updates the status.json file
|
||||
*/}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.fullname" . }}-api
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
data:
|
||||
app.py: |
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Simple Flask API for updating status.json
|
||||
Listens on port 5000 and handles POST requests to /api/status
|
||||
"""
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from flask import Flask, request, jsonify
|
||||
from datetime import datetime
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Configuration
|
||||
STATUS_FILE = '/usr/share/nginx/html/status.json'
|
||||
API_PORT = int(os.environ.get('API_PORT', 5000))
|
||||
API_HOST = os.environ.get('API_HOST', '127.0.0.1')
|
||||
|
||||
# Setup logging
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
def load_status():
|
||||
"""Load the current status from file"""
|
||||
try:
|
||||
if os.path.exists(STATUS_FILE):
|
||||
with open(STATUS_FILE, 'r') as f:
|
||||
return json.load(f)
|
||||
else:
|
||||
# Default status if file doesn't exist
|
||||
return {
|
||||
"repo": "unknown",
|
||||
"server": "unknown",
|
||||
"sync_status": "UNKNOWN",
|
||||
"drift_count": 0,
|
||||
"files": [],
|
||||
"last_check": ""
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading status: {e}")
|
||||
return {}
|
||||
|
||||
def save_status(status):
|
||||
"""Save the status to file"""
|
||||
try:
|
||||
# Ensure directory exists (should already exist from mount)
|
||||
os.makedirs(os.path.dirname(STATUS_FILE), exist_ok=True)
|
||||
|
||||
# Write with proper formatting
|
||||
with open(STATUS_FILE, 'w') as f:
|
||||
json.dump(status, f, indent=2)
|
||||
|
||||
logger.info(f"Status saved successfully: {status['repo']}/{status['server']} -> {status['sync_status']}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving status: {e}")
|
||||
return False
|
||||
|
||||
@app.route('/api/status', methods=['GET', 'POST', 'OPTIONS'])
|
||||
def api_status():
|
||||
"""
|
||||
GET: Retrieve current status
|
||||
POST: Update status with new data
|
||||
"""
|
||||
if request.method == 'OPTIONS':
|
||||
return '', 204
|
||||
|
||||
if request.method == 'GET':
|
||||
status = load_status()
|
||||
return jsonify(status), 200
|
||||
|
||||
if request.method == 'POST':
|
||||
try:
|
||||
# Parse incoming JSON
|
||||
incoming_data = request.get_json()
|
||||
if not incoming_data:
|
||||
return jsonify({"error": "No JSON data provided"}), 400
|
||||
|
||||
# Load current status
|
||||
status = load_status()
|
||||
|
||||
# Update with incoming data (merge)
|
||||
status.update(incoming_data)
|
||||
|
||||
# Ensure required fields exist
|
||||
if 'last_check' not in status or not status['last_check']:
|
||||
status['last_check'] = datetime.utcnow().isoformat() + 'Z'
|
||||
|
||||
# Save updated status
|
||||
if save_status(status):
|
||||
return jsonify({
|
||||
"success": True,
|
||||
"message": "Status updated successfully",
|
||||
"status": status
|
||||
}), 200
|
||||
else:
|
||||
return jsonify({
|
||||
"error": "Failed to save status"
|
||||
}), 500
|
||||
|
||||
except json.JSONDecodeError:
|
||||
return jsonify({"error": "Invalid JSON"}), 400
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing POST request: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health():
|
||||
"""Health check endpoint"""
|
||||
return jsonify({"status": "healthy"}), 200
|
||||
|
||||
@app.route('/ready', methods=['GET'])
|
||||
def ready():
|
||||
"""Readiness check - verify status file is accessible"""
|
||||
try:
|
||||
status = load_status()
|
||||
if status:
|
||||
return jsonify({"status": "ready"}), 200
|
||||
else:
|
||||
return jsonify({"status": "not_ready", "reason": "status file empty"}), 503
|
||||
except Exception as e:
|
||||
return jsonify({"status": "not_ready", "error": str(e)}), 503
|
||||
|
||||
if __name__ == '__main__':
|
||||
logger.info(f"Starting gitops-status-server API on {API_HOST}:{API_PORT}")
|
||||
logger.info(f"Status file: {STATUS_FILE}")
|
||||
app.run(host=API_HOST, port=API_PORT, debug=False)
|
||||
22
charts/gitops-status-server/templates/configmap.yaml
Normal file
22
charts/gitops-status-server/templates/configmap.yaml
Normal file
@ -0,0 +1,22 @@
|
||||
{{/*
|
||||
ConfigMap for default status.json values
|
||||
Used by init container to set up initial status if file doesn't exist
|
||||
*/}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
data:
|
||||
# Default status.json values (used for initialization)
|
||||
# This is not mounted directly; instead it's used by the init container
|
||||
# to set up the initial status.json in the shared emptyDir volume.
|
||||
# The actual status.json is stored on the emptyDir and updated via the API.
|
||||
status.json: |
|
||||
{{- .Values.statusJson | toJson | nindent 4 }}
|
||||
|
||||
94
charts/gitops-status-server/templates/deployment.yaml
Normal file
94
charts/gitops-status-server/templates/deployment.yaml
Normal file
@ -0,0 +1,94 @@
|
||||
{{/*
|
||||
Deployment for the gitops-status-server
|
||||
Runs a simple Flask API for status updates
|
||||
Uses the gitops-status-api Docker image
|
||||
*/}}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
replicas: {{ .Values.replicaCount }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "gitops-status-server.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
{{- with .Values.podAnnotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "gitops-status-server.selectorLabels" . | nindent 8 }}
|
||||
spec:
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ include "gitops-status-server.serviceAccountName" . }}
|
||||
securityContext:
|
||||
{{- toYaml .Values.podSecurityContext | nindent 8 }}
|
||||
|
||||
containers:
|
||||
- name: api
|
||||
image: "{{ .Values.api.image.repository }}:{{ .Values.api.image.tag }}"
|
||||
imagePullPolicy: {{ .Values.api.image.pullPolicy }}
|
||||
ports:
|
||||
- name: http
|
||||
containerPort: 5000
|
||||
protocol: TCP
|
||||
env:
|
||||
- name: API_HOST
|
||||
value: "0.0.0.0"
|
||||
- name: API_PORT
|
||||
value: "5000"
|
||||
- name: FLASK_ENV
|
||||
value: "production"
|
||||
- name: STATUS_FILE
|
||||
value: "/data/status.json"
|
||||
livenessProbe:
|
||||
httpGet:
|
||||
path: /health
|
||||
port: http
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 10
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 3
|
||||
readinessProbe:
|
||||
httpGet:
|
||||
path: /ready
|
||||
port: http
|
||||
initialDelaySeconds: 20
|
||||
periodSeconds: 5
|
||||
timeoutSeconds: 5
|
||||
failureThreshold: 2
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 10 }}
|
||||
volumeMounts:
|
||||
- name: data
|
||||
mountPath: /data
|
||||
|
||||
volumes:
|
||||
# Data volume for status.json (writable emptyDir)
|
||||
- name: data
|
||||
emptyDir:
|
||||
sizeLimit: 1Mi
|
||||
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
96
charts/gitops-status-server/templates/nginx-config.yaml
Normal file
96
charts/gitops-status-server/templates/nginx-config.yaml
Normal file
@ -0,0 +1,96 @@
|
||||
{{/*
|
||||
ConfigMap containing the nginx configuration
|
||||
Enables serving status.json via GET and updating via POST requests
|
||||
*/}}
|
||||
apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.fullname" . }}-nginx-config
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
data:
|
||||
nginx.conf: |
|
||||
# Minimal nginx config for serving and updating status.json
|
||||
user nginx;
|
||||
worker_processes auto;
|
||||
error_log /var/log/nginx/error.log warn;
|
||||
pid /var/run/nginx.pid;
|
||||
|
||||
events {
|
||||
worker_connections 1024;
|
||||
}
|
||||
|
||||
http {
|
||||
include /etc/nginx/mime.types;
|
||||
default_type application/octet-stream;
|
||||
|
||||
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
|
||||
'$status $body_bytes_sent "$http_referer" '
|
||||
'"$http_user_agent" "$http_x_forwarded_for"';
|
||||
|
||||
access_log /var/log/nginx/access.log main;
|
||||
|
||||
sendfile on;
|
||||
tcp_nopush on;
|
||||
tcp_nodelay on;
|
||||
keepalive_timeout 65;
|
||||
types_hash_max_size 2048;
|
||||
client_max_body_size 1M;
|
||||
|
||||
# Gzip compression
|
||||
gzip on;
|
||||
gzip_vary on;
|
||||
gzip_types text/plain text/css text/xml text/javascript
|
||||
application/x-javascript application/xml+rss
|
||||
application/json;
|
||||
|
||||
upstream api_backend {
|
||||
server 127.0.0.1:5000;
|
||||
keepalive 32;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 8080 default_server;
|
||||
server_name _;
|
||||
|
||||
# Serve status.json as read-only
|
||||
location /status.json {
|
||||
alias /usr/share/nginx/html/status.json;
|
||||
add_header Cache-Control "no-cache, no-store, must-revalidate";
|
||||
add_header Pragma "no-cache";
|
||||
add_header Expires "0";
|
||||
}
|
||||
|
||||
# Health check endpoint
|
||||
location /health {
|
||||
access_log off;
|
||||
return 200 "healthy\n";
|
||||
add_header Content-Type text/plain;
|
||||
}
|
||||
|
||||
# Proxy POST requests to the API backend (Python Flask)
|
||||
location /api/ {
|
||||
proxy_pass http://api_backend;
|
||||
proxy_http_version 1.1;
|
||||
proxy_set_header Connection "";
|
||||
proxy_set_header Host $host;
|
||||
proxy_set_header X-Real-IP $remote_addr;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
|
||||
# Buffer settings for POST requests
|
||||
proxy_request_buffering off;
|
||||
proxy_buffering off;
|
||||
|
||||
# Timeouts
|
||||
proxy_connect_timeout 30s;
|
||||
proxy_send_timeout 30s;
|
||||
proxy_read_timeout 30s;
|
||||
}
|
||||
|
||||
# Catch-all for root
|
||||
location / {
|
||||
return 301 /status.json;
|
||||
}
|
||||
}
|
||||
}
|
||||
27
charts/gitops-status-server/templates/service.yaml
Normal file
27
charts/gitops-status-server/templates/service.yaml
Normal file
@ -0,0 +1,27 @@
|
||||
{{/*
|
||||
Service for the gitops-status-server
|
||||
Exposes the Flask API inside the cluster (ClusterIP)
|
||||
This allows rsyslog pipeline and Grafana to query the API endpoints
|
||||
*/}}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.fullname" . }}
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.service.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
type: {{ .Values.service.type }}
|
||||
ports:
|
||||
- port: {{ .Values.service.port }}
|
||||
targetPort: {{ .Values.service.targetPort | default 5000 }}
|
||||
protocol: TCP
|
||||
name: http
|
||||
{{- if and (eq .Values.service.type "NodePort") .Values.service.nodePort }}
|
||||
nodePort: {{ .Values.service.nodePort }}
|
||||
{{- end }}
|
||||
selector:
|
||||
{{- include "gitops-status-server.selectorLabels" . | nindent 4 }}
|
||||
15
charts/gitops-status-server/templates/serviceaccount.yaml
Normal file
15
charts/gitops-status-server/templates/serviceaccount.yaml
Normal file
@ -0,0 +1,15 @@
|
||||
{{/*
|
||||
ServiceAccount for the gitops-status-server
|
||||
*/}}
|
||||
{{- if .Values.serviceAccount.create -}}
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ include "gitops-status-server.serviceAccountName" . }}
|
||||
labels:
|
||||
{{- include "gitops-status-server.labels" . | nindent 4 }}
|
||||
{{- with .Values.serviceAccount.annotations }}
|
||||
annotations:
|
||||
{{- toYaml . | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
88
charts/gitops-status-server/values.yaml
Normal file
88
charts/gitops-status-server/values.yaml
Normal file
@ -0,0 +1,88 @@
|
||||
# Default values for gitops-status-server
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# Number of replicas for the deployment
|
||||
replicaCount: 1
|
||||
|
||||
# API backend container configuration
|
||||
api:
|
||||
image:
|
||||
# Use the gitops-status-api image (Python Flask API)
|
||||
# Build from: gitops-status-api/Dockerfile
|
||||
# Tag with: docker build -t gitops-status-api:latest gitops-status-api/
|
||||
# Can be from Harbor registry or built locally
|
||||
repository: gitops-status-api
|
||||
pullPolicy: IfNotPresent
|
||||
tag: "latest"
|
||||
|
||||
# Image pull secrets for private registries
|
||||
imagePullSecrets: []
|
||||
|
||||
# Override the name of the chart
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
# Service configuration
|
||||
service:
|
||||
# Service type - NodePort for external access, ClusterIP for internal-only
|
||||
type: ClusterIP
|
||||
# Port where the service will be exposed
|
||||
port: 5000
|
||||
# Target port on the container (API port)
|
||||
targetPort: 5000
|
||||
# NodePort (30000-32767) for external access when type is NodePort
|
||||
nodePort: null
|
||||
# Annotations to add to the service
|
||||
annotations: {}
|
||||
|
||||
# Resource limits and requests
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
|
||||
# Node selector for pod assignment
|
||||
nodeSelector: {}
|
||||
|
||||
# Tolerations for pod assignment
|
||||
tolerations: []
|
||||
|
||||
# Affinity rules for pod assignment
|
||||
affinity: {}
|
||||
|
||||
# Security context for the pod
|
||||
podSecurityContext:
|
||||
runAsNonRoot: true
|
||||
runAsUser: 1000
|
||||
fsGroup: 1000
|
||||
|
||||
# Security context for the container
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
readOnlyRootFilesystem: false
|
||||
|
||||
# Labels to add to all resources
|
||||
labels: {}
|
||||
|
||||
# Annotations to add to all resources
|
||||
annotations: {}
|
||||
|
||||
# Pod annotations
|
||||
podAnnotations: {}
|
||||
|
||||
# Service account configuration
|
||||
serviceAccount:
|
||||
# Specifies whether a service account should be created
|
||||
create: true
|
||||
# Annotations to add to the service account
|
||||
annotations: {}
|
||||
# The name of the service account to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name: ""
|
||||
6
charts/secrets/Chart.yaml
Normal file
6
charts/secrets/Chart.yaml
Normal file
@ -0,0 +1,6 @@
|
||||
apiVersion: v2
|
||||
name: secrets
|
||||
description: Generic ExternalSecret resources chart
|
||||
type: application
|
||||
version: 0.1.0
|
||||
appVersion: "1.0.0"
|
||||
23
charts/secrets/templates/external-secret.yaml
Normal file
23
charts/secrets/templates/external-secret.yaml
Normal file
@ -0,0 +1,23 @@
|
||||
{{- range .Values.externalSecrets }}
|
||||
---
|
||||
apiVersion: external-secrets.io/v1
|
||||
kind: ExternalSecret
|
||||
metadata:
|
||||
name: {{ .name }}
|
||||
namespace: {{ .namespace }}
|
||||
spec:
|
||||
refreshInterval: {{ .refreshInterval | default "1h" }}
|
||||
secretStoreRef:
|
||||
name: {{ $.Values.secretStore.name }}
|
||||
kind: {{ $.Values.secretStore.kind }}
|
||||
target:
|
||||
name: {{ .targetName }}
|
||||
creationPolicy: {{ .creationPolicy | default "Owner" }}
|
||||
data:
|
||||
{{- range .data }}
|
||||
- secretKey: {{ .secretKey }}
|
||||
remoteRef:
|
||||
key: {{ .remoteKey }}
|
||||
property: {{ .property }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
46
crds/prometheus-operator/README.md
Normal file
46
crds/prometheus-operator/README.md
Normal file
@ -0,0 +1,46 @@
|
||||
# Prometheus Operator CRDs
|
||||
|
||||
This directory contains the CustomResourceDefinitions (CRDs) for the Prometheus Operator.
|
||||
|
||||
## Why Separate CRDs?
|
||||
|
||||
These CRDs are managed separately from the main `kube-prometheus-stack` Helm chart to avoid:
|
||||
|
||||
1. **Large Annotation Error**: Helm stores the full manifest in annotations, which can exceed Kubernetes' 262144-byte limit for CRDs
|
||||
2. **CRD Installation Race Conditions**: Ensures CRDs are installed before any resources that depend on them
|
||||
|
||||
## Deployment Method
|
||||
|
||||
These CRDs are **manually deployed** using kubectl server-side apply:
|
||||
|
||||
```bash
|
||||
kubectl apply --server-side=true --force-conflicts -f manifests/prometheus-operator-crds/
|
||||
```
|
||||
|
||||
**Note**: Server-side apply is required to bypass the annotation size limit that affects large CRDs.
|
||||
|
||||
## CRDs Included
|
||||
|
||||
- `crd-alertmanagerconfigs.yaml` - AlertmanagerConfig resources
|
||||
- `crd-alertmanagers.yaml` - Alertmanager resources
|
||||
- `crd-podmonitors.yaml` - PodMonitor resources
|
||||
- `crd-probes.yaml` - Probe resources
|
||||
- `crd-prometheusagents.yaml` - PrometheusAgent resources
|
||||
- `crd-prometheuses.yaml` - Prometheus resources
|
||||
- `crd-prometheusrules.yaml` - PrometheusRule resources
|
||||
- `crd-scrapeconfigs.yaml` - ScrapeConfig resources
|
||||
- `crd-servicemonitors.yaml` - ServiceMonitor resources
|
||||
- `crd-thanosrulers.yaml` - ThanosRuler resources
|
||||
|
||||
## Updating CRDs
|
||||
|
||||
When updating the `kube-prometheus-stack` chart version:
|
||||
|
||||
1. Copy the new CRDs from `charts/kube-prometheus-stack/charts/crds/crds/` to this directory
|
||||
2. Reapply them: `kubectl apply --server-side=true --force-conflicts -f manifests/prometheus-operator-crds/`
|
||||
3. Then update the main stack via ArgoCD
|
||||
|
||||
## Related Configuration
|
||||
|
||||
- Main stack values: `manifests/kube-prometheus-stack/values.yaml` has `crds.enabled: false`
|
||||
- Main ArgoCD app: `argocd-apps/kube-prometheus-stack.yaml`
|
||||
12334
crds/prometheus-operator/crd-alertmanagerconfigs.yaml
Normal file
12334
crds/prometheus-operator/crd-alertmanagerconfigs.yaml
Normal file
File diff suppressed because it is too large
Load Diff
9983
crds/prometheus-operator/crd-alertmanagers.yaml
Normal file
9983
crds/prometheus-operator/crd-alertmanagers.yaml
Normal file
File diff suppressed because it is too large
Load Diff
1399
crds/prometheus-operator/crd-podmonitors.yaml
Normal file
1399
crds/prometheus-operator/crd-podmonitors.yaml
Normal file
File diff suppressed because it is too large
Load Diff
1416
crds/prometheus-operator/crd-probes.yaml
Normal file
1416
crds/prometheus-operator/crd-probes.yaml
Normal file
File diff suppressed because it is too large
Load Diff
11449
crds/prometheus-operator/crd-prometheusagents.yaml
Normal file
11449
crds/prometheus-operator/crd-prometheusagents.yaml
Normal file
File diff suppressed because it is too large
Load Diff
13720
crds/prometheus-operator/crd-prometheuses.yaml
Normal file
13720
crds/prometheus-operator/crd-prometheuses.yaml
Normal file
File diff suppressed because it is too large
Load Diff
267
crds/prometheus-operator/crd-prometheusrules.yaml
Normal file
267
crds/prometheus-operator/crd-prometheusrules.yaml
Normal file
@ -0,0 +1,267 @@
|
||||
# https://raw.githubusercontent.com/prometheus-operator/prometheus-operator/v0.90.1/example/prometheus-operator-crd/monitoring.coreos.com_prometheusrules.yaml
|
||||
---
|
||||
apiVersion: apiextensions.k8s.io/v1
|
||||
kind: CustomResourceDefinition
|
||||
metadata:
|
||||
annotations:
|
||||
controller-gen.kubebuilder.io/version: v0.19.0
|
||||
operator.prometheus.io/version: 0.90.1
|
||||
name: prometheusrules.monitoring.coreos.com
|
||||
spec:
|
||||
group: monitoring.coreos.com
|
||||
names:
|
||||
categories:
|
||||
- prometheus-operator
|
||||
kind: PrometheusRule
|
||||
listKind: PrometheusRuleList
|
||||
plural: prometheusrules
|
||||
shortNames:
|
||||
- promrule
|
||||
singular: prometheusrule
|
||||
scope: Namespaced
|
||||
versions:
|
||||
- name: v1
|
||||
schema:
|
||||
openAPIV3Schema:
|
||||
description: |-
|
||||
The `PrometheusRule` custom resource definition (CRD) defines [alerting](https://prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) and [recording](https://prometheus.io/docs/prometheus/latest/configuration/recording_rules/) rules to be evaluated by `Prometheus` or `ThanosRuler` objects.
|
||||
|
||||
`Prometheus` and `ThanosRuler` objects select `PrometheusRule` objects using label and namespace selectors.
|
||||
properties:
|
||||
apiVersion:
|
||||
description: |-
|
||||
APIVersion defines the versioned schema of this representation of an object.
|
||||
Servers should convert recognized schemas to the latest internal value, and
|
||||
may reject unrecognized values.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#resources
|
||||
type: string
|
||||
kind:
|
||||
description: |-
|
||||
Kind is a string value representing the REST resource this object represents.
|
||||
Servers may infer this from the endpoint the client submits requests to.
|
||||
Cannot be updated.
|
||||
In CamelCase.
|
||||
More info: https://git.k8s.io/community/contributors/devel/sig-architecture/api-conventions.md#types-kinds
|
||||
type: string
|
||||
metadata:
|
||||
type: object
|
||||
spec:
|
||||
description: spec defines the specification of desired alerting rule definitions
|
||||
for Prometheus.
|
||||
properties:
|
||||
groups:
|
||||
description: groups defines the content of Prometheus rule file
|
||||
items:
|
||||
description: RuleGroup is a list of sequentially evaluated recording
|
||||
and alerting rules.
|
||||
properties:
|
||||
interval:
|
||||
description: interval defines how often rules in the group are
|
||||
evaluated.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: |-
|
||||
labels define the labels to add or overwrite before storing the result for its rules.
|
||||
The labels defined at the rule level take precedence.
|
||||
|
||||
It requires Prometheus >= 3.0.0.
|
||||
The field is ignored for Thanos Ruler.
|
||||
type: object
|
||||
limit:
|
||||
description: |-
|
||||
limit defines the number of alerts an alerting rule and series a recording
|
||||
rule can produce.
|
||||
Limit is supported starting with Prometheus >= 2.31 and Thanos Ruler >= 0.24.
|
||||
type: integer
|
||||
name:
|
||||
description: name defines the name of the rule group.
|
||||
minLength: 1
|
||||
type: string
|
||||
partial_response_strategy:
|
||||
description: |-
|
||||
partial_response_strategy is only used by ThanosRuler and will
|
||||
be ignored by Prometheus instances.
|
||||
More info: https://github.com/thanos-io/thanos/blob/main/docs/components/rule.md#partial-response
|
||||
pattern: ^(?i)(abort|warn)?$
|
||||
type: string
|
||||
query_offset:
|
||||
description: |-
|
||||
query_offset defines the offset the rule evaluation timestamp of this particular group by the specified duration into the past.
|
||||
|
||||
It requires Prometheus >= v2.53.0.
|
||||
It is not supported for ThanosRuler.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
rules:
|
||||
description: rules defines the list of alerting and recording
|
||||
rules.
|
||||
items:
|
||||
description: |-
|
||||
Rule describes an alerting or recording rule
|
||||
See Prometheus documentation: [alerting](https://www.prometheus.io/docs/prometheus/latest/configuration/alerting_rules/) or [recording](https://www.prometheus.io/docs/prometheus/latest/configuration/recording_rules/#recording-rules) rule
|
||||
properties:
|
||||
alert:
|
||||
description: |-
|
||||
alert defines the name of the alert. Must be a valid label value.
|
||||
Only one of `record` and `alert` must be set.
|
||||
type: string
|
||||
annotations:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: |-
|
||||
annotations defines annotations to add to each alert.
|
||||
Only valid for alerting rules.
|
||||
type: object
|
||||
expr:
|
||||
anyOf:
|
||||
- type: integer
|
||||
- type: string
|
||||
description: expr defines the PromQL expression to evaluate.
|
||||
x-kubernetes-int-or-string: true
|
||||
for:
|
||||
description: for defines how alerts are considered firing
|
||||
once they have been returned for this long.
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
keep_firing_for:
|
||||
description: keep_firing_for defines how long an alert
|
||||
will continue firing after the condition that triggered
|
||||
it has cleared.
|
||||
minLength: 1
|
||||
pattern: ^(0|(([0-9]+)y)?(([0-9]+)w)?(([0-9]+)d)?(([0-9]+)h)?(([0-9]+)m)?(([0-9]+)s)?(([0-9]+)ms)?)$
|
||||
type: string
|
||||
labels:
|
||||
additionalProperties:
|
||||
type: string
|
||||
description: labels defines labels to add or overwrite.
|
||||
type: object
|
||||
record:
|
||||
description: |-
|
||||
record defines the name of the time series to output to. Must be a valid metric name.
|
||||
Only one of `record` and `alert` must be set.
|
||||
type: string
|
||||
required:
|
||||
- expr
|
||||
type: object
|
||||
type: array
|
||||
required:
|
||||
- name
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- name
|
||||
x-kubernetes-list-type: map
|
||||
type: object
|
||||
status:
|
||||
description: |-
|
||||
status defines the status subresource. It is under active development and is updated only when the
|
||||
"StatusForConfigurationResources" feature gate is enabled.
|
||||
|
||||
Most recent observed status of the PrometheusRule. Read-only.
|
||||
More info:
|
||||
https://github.com/kubernetes/community/blob/master/contributors/devel/sig-architecture/api-conventions.md#spec-and-status
|
||||
properties:
|
||||
bindings:
|
||||
description: bindings defines the list of workload resources (Prometheus,
|
||||
PrometheusAgent, ThanosRuler or Alertmanager) which select the configuration
|
||||
resource.
|
||||
items:
|
||||
description: WorkloadBinding is a link between a configuration resource
|
||||
and a workload resource.
|
||||
properties:
|
||||
conditions:
|
||||
description: conditions defines the current state of the configuration
|
||||
resource when bound to the referenced Workload object.
|
||||
items:
|
||||
description: ConfigResourceCondition describes the status
|
||||
of configuration resources linked to Prometheus, PrometheusAgent,
|
||||
Alertmanager or ThanosRuler.
|
||||
properties:
|
||||
lastTransitionTime:
|
||||
description: lastTransitionTime defines the time of the
|
||||
last update to the current status property.
|
||||
format: date-time
|
||||
type: string
|
||||
message:
|
||||
description: message defines the human-readable message
|
||||
indicating details for the condition's last transition.
|
||||
type: string
|
||||
observedGeneration:
|
||||
description: |-
|
||||
observedGeneration defines the .metadata.generation that the
|
||||
condition was set based upon. For instance, if `.metadata.generation` is
|
||||
currently 12, but the `.status.conditions[].observedGeneration` is 9, the
|
||||
condition is out of date with respect to the current state of the object.
|
||||
format: int64
|
||||
type: integer
|
||||
reason:
|
||||
description: reason for the condition's last transition.
|
||||
type: string
|
||||
status:
|
||||
description: status of the condition.
|
||||
minLength: 1
|
||||
type: string
|
||||
type:
|
||||
description: |-
|
||||
type of the condition being reported.
|
||||
Currently, only "Accepted" is supported.
|
||||
enum:
|
||||
- Accepted
|
||||
minLength: 1
|
||||
type: string
|
||||
required:
|
||||
- lastTransitionTime
|
||||
- status
|
||||
- type
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- type
|
||||
x-kubernetes-list-type: map
|
||||
group:
|
||||
description: group defines the group of the referenced resource.
|
||||
enum:
|
||||
- monitoring.coreos.com
|
||||
type: string
|
||||
name:
|
||||
description: name defines the name of the referenced object.
|
||||
minLength: 1
|
||||
type: string
|
||||
namespace:
|
||||
description: namespace defines the namespace of the referenced
|
||||
object.
|
||||
minLength: 1
|
||||
type: string
|
||||
resource:
|
||||
description: resource defines the type of resource being referenced
|
||||
(e.g. Prometheus, PrometheusAgent, ThanosRuler or Alertmanager).
|
||||
enum:
|
||||
- prometheuses
|
||||
- prometheusagents
|
||||
- thanosrulers
|
||||
- alertmanagers
|
||||
type: string
|
||||
required:
|
||||
- group
|
||||
- name
|
||||
- namespace
|
||||
- resource
|
||||
type: object
|
||||
type: array
|
||||
x-kubernetes-list-map-keys:
|
||||
- group
|
||||
- resource
|
||||
- name
|
||||
- namespace
|
||||
x-kubernetes-list-type: map
|
||||
type: object
|
||||
required:
|
||||
- spec
|
||||
type: object
|
||||
served: true
|
||||
storage: true
|
||||
subresources:
|
||||
status: {}
|
||||
12909
crds/prometheus-operator/crd-scrapeconfigs.yaml
Normal file
12909
crds/prometheus-operator/crd-scrapeconfigs.yaml
Normal file
File diff suppressed because it is too large
Load Diff
1413
crds/prometheus-operator/crd-servicemonitors.yaml
Normal file
1413
crds/prometheus-operator/crd-servicemonitors.yaml
Normal file
File diff suppressed because it is too large
Load Diff
9754
crds/prometheus-operator/crd-thanosrulers.yaml
Normal file
9754
crds/prometheus-operator/crd-thanosrulers.yaml
Normal file
File diff suppressed because it is too large
Load Diff
34
manifests/gitops-status-server/values.yaml
Normal file
34
manifests/gitops-status-server/values.yaml
Normal file
@ -0,0 +1,34 @@
|
||||
# Values for gitops-status-server Helm chart
|
||||
# Deploys a simple Flask API for storing and retrieving GitOps status JSON
|
||||
|
||||
# Number of replicas
|
||||
replicaCount: 1
|
||||
# Image pull secrets for Harbor registry
|
||||
imagePullSecrets:
|
||||
- name: harbor-regcred
|
||||
# API backend configuration (Flask server for status updates)
|
||||
api:
|
||||
image:
|
||||
# Use the gitops-status-api image from gitops-status-api repo
|
||||
# Built and pushed to Harbor or use local image
|
||||
repository: harbor.dvirlabs.com/my-apps/status-api
|
||||
tag: "1.1.0"
|
||||
pullPolicy: IfNotPresent
|
||||
# Service configuration
|
||||
service:
|
||||
# Use NodePort to expose the API externally
|
||||
# Access via: http://<node-ip>:<nodePort>
|
||||
type: NodePort
|
||||
# Port exposed by the service
|
||||
port: 5000
|
||||
# NodePort (30000-32767) for external access. Leave empty for automatic assignment
|
||||
nodePort: 30005
|
||||
annotations: {}
|
||||
# Resource limits
|
||||
resources:
|
||||
limits:
|
||||
cpu: 100m
|
||||
memory: 128Mi
|
||||
requests:
|
||||
cpu: 50m
|
||||
memory: 64Mi
|
||||
@ -0,0 +1,169 @@
|
||||
fullnameOverride: monitoring-stack
|
||||
|
||||
# ========================
|
||||
# CRDs
|
||||
# ========================
|
||||
# CRDs are managed separately by the prometheus-operator-crds ArgoCD application
|
||||
# to avoid the "metadata.annotations: Too long" error when Helm manages them
|
||||
crds:
|
||||
enabled: false
|
||||
|
||||
# ========================
|
||||
# PROMETHEUS
|
||||
# ========================
|
||||
prometheus:
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
hosts:
|
||||
- prometheus.dvirlabs.com
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
|
||||
prometheusSpec:
|
||||
replicas: 1
|
||||
retention: 10d
|
||||
externalUrl: https://prometheus.dvirlabs.com
|
||||
routePrefix: /
|
||||
|
||||
storageSpec:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: nfs-client
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 50Gi
|
||||
|
||||
enableAdminAPI: true
|
||||
|
||||
additionalScrapeConfigs:
|
||||
- job_name: pushgateway
|
||||
honor_labels: true
|
||||
static_configs:
|
||||
- targets:
|
||||
- pushgateway.observability-stack.svc.cluster.local:9091
|
||||
- job_name: nas-node-exporter
|
||||
static_configs:
|
||||
- targets:
|
||||
- 192.168.10.155:9100
|
||||
metrics_path: /metrics
|
||||
|
||||
# ========================
|
||||
# GRAFANA
|
||||
# ========================
|
||||
grafana:
|
||||
enabled: true
|
||||
|
||||
adminUser: admin
|
||||
adminPassword: admin123
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClassName: nfs-client
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
size: 5Gi
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
hosts:
|
||||
- grafana.dvirlabs.com
|
||||
path: /
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
|
||||
# ========================
|
||||
# ALERTMANAGER
|
||||
# ========================
|
||||
alertmanager:
|
||||
enabled: true
|
||||
|
||||
ingress:
|
||||
enabled: true
|
||||
ingressClassName: traefik
|
||||
hosts:
|
||||
- alertmanager.dvirlabs.com
|
||||
paths:
|
||||
- /
|
||||
pathType: Prefix
|
||||
annotations:
|
||||
traefik.ingress.kubernetes.io/router.entrypoints: websecure
|
||||
traefik.ingress.kubernetes.io/router.tls: "true"
|
||||
|
||||
alertmanagerSpec:
|
||||
storage:
|
||||
volumeClaimTemplate:
|
||||
spec:
|
||||
storageClassName: nfs-client
|
||||
accessModes: ["ReadWriteOnce"]
|
||||
resources:
|
||||
requests:
|
||||
storage: 5Gi
|
||||
|
||||
# ========================
|
||||
# EXPORTERS
|
||||
# ========================
|
||||
nodeExporter:
|
||||
enabled: true
|
||||
|
||||
kubeStateMetrics:
|
||||
enabled: true
|
||||
|
||||
# ========================
|
||||
# OPERATOR
|
||||
# ========================
|
||||
prometheusOperator:
|
||||
enabled: true
|
||||
|
||||
# ========================
|
||||
# RULES
|
||||
# ========================
|
||||
defaultRules:
|
||||
create: false
|
||||
|
||||
# ========================
|
||||
# PUSHGATEWAY
|
||||
# ========================
|
||||
extraManifests:
|
||||
- apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: pushgateway
|
||||
namespace: observability-stack
|
||||
labels:
|
||||
app: pushgateway
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: pushgateway
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: pushgateway
|
||||
spec:
|
||||
containers:
|
||||
- name: pushgateway
|
||||
image: prom/pushgateway:v1.10.0
|
||||
ports:
|
||||
- containerPort: 9091
|
||||
- apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
name: pushgateway
|
||||
namespace: observability-stack
|
||||
labels:
|
||||
app: pushgateway
|
||||
spec:
|
||||
selector:
|
||||
app: pushgateway
|
||||
ports:
|
||||
- name: http
|
||||
port: 9091
|
||||
targetPort: 9091
|
||||
5
manifests/secrets-observability-stack/values.yaml
Normal file
5
manifests/secrets-observability-stack/values.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
secretStore:
|
||||
name: vault
|
||||
kind: ClusterSecretStore
|
||||
|
||||
externalSecrets: []
|
||||
Loading…
x
Reference in New Issue
Block a user