26 lines
526 B
YAML
26 lines
526 B
YAML
service:
|
|
type: ClusterIP
|
|
|
|
# IMPORTANT: disable the embedded Ollama that the chart can deploy
|
|
ollama:
|
|
enabled: false
|
|
|
|
# IMPORTANT: set BOTH vars to your existing Ollama service
|
|
extraEnvVars:
|
|
- name: OLLAMA_BASE_URL
|
|
value: http://ollama:11434
|
|
- name: OLLAMA_BASE_URLS
|
|
value: http://ollama:11434
|
|
|
|
resources:
|
|
requests:
|
|
cpu: 100m
|
|
memory: 256Mi
|
|
limits:
|
|
cpu: 500m
|
|
memory: 1Gi
|
|
|
|
# Schedule on GPU worker node (same as Ollama for low latency)
|
|
nodeSelector:
|
|
kubernetes.io/hostname: k3s-worker-5-gpu
|