33 lines
640 B
YAML

service:
type: ClusterIP
# IMPORTANT: disable the embedded Ollama that the chart can deploy
ollama:
enabled: false
# IMPORTANT: set BOTH vars to your existing Ollama service
extraEnvVars:
- name: OLLAMA_BASE_URL
value: http://ollama:11434
- name: OLLAMA_BASE_URLS
value: http://ollama:11434
resources:
requests:
cpu: 100m
memory: 256Mi
limits:
cpu: 500m
memory: 1Gi
# Schedule on GPU worker node (same as Ollama for low latency)
nodeSelector:
kubernetes.io/hostname: k3s-worker-5-gpu
# Tolerate GPU node taint
tolerations:
- key: gpu
operator: Equal
value: "true"
effect: NoSchedule