service: type: ClusterIP # IMPORTANT: disable the embedded Ollama that the chart can deploy ollama: enabled: false # IMPORTANT: set BOTH vars to your existing Ollama service extraEnvVars: - name: OLLAMA_BASE_URL value: http://ollama:11434 - name: OLLAMA_BASE_URLS value: http://ollama:11434 resources: requests: cpu: 100m memory: 256Mi limits: cpu: 500m memory: 1Gi # Schedule on GPU worker node (same as Ollama for low latency) nodeSelector: kubernetes.io/hostname: k3s-worker-5-gpu # Tolerate GPU node taint tolerations: - key: gpu operator: Equal value: "true" effect: NoSchedule