service:
  type: ClusterIP

# IMPORTANT: disable the embedded Ollama that the chart can deploy
ollama:
  enabled: false

# IMPORTANT: set BOTH vars to your existing Ollama service
extraEnvVars:
  - name: OLLAMA_BASE_URL
    value: http://ollama:11434
  - name: OLLAMA_BASE_URLS
    value: http://ollama:11434

resources:
  requests:
    cpu: 100m
    memory: 256Mi
  limits:
    cpu: 500m
    memory: 1Gi

# Schedule on GPU worker node (same as Ollama for low latency)
nodeSelector:
  kubernetes.io/hostname: k3s-worker-5-gpu

# Tolerate GPU node taint
tolerations:
  - key: gpu
    operator: Equal
    value: "true"
    effect: NoSchedule