service: type: ClusterIP # Enable GPU support ollama: gpu: enabled: true type: nvidia number: 1 resources: requests: cpu: 2 memory: 4Gi limits: cpu: 4 memory: 8Gi # Schedule on GPU worker node nodeSelector: kubernetes.io/hostname: k3s-worker-5-gpu # Tolerate GPU node taint tolerations: - key: gpu operator: Equal value: "true" effect: NoSchedule persistentVolume: enabled: true size: 50Gi storageClass: nfs-client