Compare commits
10 Commits
9322dd049d
...
fbe623fd76
| Author | SHA1 | Date | |
|---|---|---|---|
| fbe623fd76 | |||
| ae589acb85 | |||
| 06304a462d | |||
| fdc6a5040a | |||
| 4ab43ca043 | |||
| f2ee04efc4 | |||
| 098ce3bbc4 | |||
|
|
3a0c78d5de | ||
| 5c11223dfb | |||
| db532b6a22 |
@ -1,15 +1,35 @@
|
||||
service:
|
||||
type: ClusterIP
|
||||
|
||||
# Enable GPU support
|
||||
ollama:
|
||||
gpu:
|
||||
enabled: true
|
||||
type: nvidia
|
||||
number: 1
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 2
|
||||
memory: 4Gi
|
||||
limits:
|
||||
cpu: 4
|
||||
memory: 8Gi
|
||||
|
||||
# Schedule on GPU worker node
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||
|
||||
# Tolerate GPU node taint
|
||||
tolerations:
|
||||
- key: gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
persistentVolume:
|
||||
enabled: true
|
||||
size: 30Gi
|
||||
storageClass: nfs-client
|
||||
size: 50Gi
|
||||
storageClass: local-path
|
||||
|
||||
@ -1,10 +1,11 @@
|
||||
service:
|
||||
type: ClusterIP
|
||||
|
||||
env:
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama.ai-stack.svc.cluster.local:11434
|
||||
# IMPORTANT: disable the embedded Ollama that the chart can deploy
|
||||
ollama:
|
||||
enabled: false
|
||||
|
||||
# IMPORTANT: set BOTH vars to your existing Ollama service
|
||||
extraEnvVars:
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama:11434
|
||||
@ -18,3 +19,21 @@ resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: local-path
|
||||
size: 2Gi
|
||||
|
||||
# Schedule on GPU worker node (same as Ollama for low latency)
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||
|
||||
# Tolerate GPU node taint
|
||||
tolerations:
|
||||
- key: gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user