Compare commits
10 Commits
9322dd049d
...
fbe623fd76
| Author | SHA1 | Date | |
|---|---|---|---|
| fbe623fd76 | |||
| ae589acb85 | |||
| 06304a462d | |||
| fdc6a5040a | |||
| 4ab43ca043 | |||
| f2ee04efc4 | |||
| 098ce3bbc4 | |||
|
|
3a0c78d5de | ||
| 5c11223dfb | |||
| db532b6a22 |
@ -1,15 +1,35 @@
|
|||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|
||||||
|
# Enable GPU support
|
||||||
|
ollama:
|
||||||
|
gpu:
|
||||||
|
enabled: true
|
||||||
|
type: nvidia
|
||||||
|
number: 1
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 250m
|
|
||||||
memory: 1Gi
|
|
||||||
limits:
|
|
||||||
cpu: 2
|
cpu: 2
|
||||||
memory: 4Gi
|
memory: 4Gi
|
||||||
|
limits:
|
||||||
|
cpu: 4
|
||||||
|
memory: 8Gi
|
||||||
|
|
||||||
|
# Schedule on GPU worker node
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||||
|
|
||||||
|
# Tolerate GPU node taint
|
||||||
|
tolerations:
|
||||||
|
- key: gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
|
||||||
persistentVolume:
|
persistentVolume:
|
||||||
enabled: true
|
enabled: true
|
||||||
size: 30Gi
|
size: 50Gi
|
||||||
storageClass: nfs-client
|
storageClass: local-path
|
||||||
|
|||||||
@ -1,10 +1,11 @@
|
|||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|
||||||
env:
|
# IMPORTANT: disable the embedded Ollama that the chart can deploy
|
||||||
- name: OLLAMA_BASE_URL
|
ollama:
|
||||||
value: http://ollama.ai-stack.svc.cluster.local:11434
|
enabled: false
|
||||||
|
|
||||||
|
# IMPORTANT: set BOTH vars to your existing Ollama service
|
||||||
extraEnvVars:
|
extraEnvVars:
|
||||||
- name: OLLAMA_BASE_URL
|
- name: OLLAMA_BASE_URL
|
||||||
value: http://ollama:11434
|
value: http://ollama:11434
|
||||||
@ -18,3 +19,21 @@ resources:
|
|||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
|
|
||||||
|
persistence:
|
||||||
|
enabled: true
|
||||||
|
storageClass: local-path
|
||||||
|
size: 2Gi
|
||||||
|
|
||||||
|
# Schedule on GPU worker node (same as Ollama for low latency)
|
||||||
|
nodeSelector:
|
||||||
|
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||||
|
|
||||||
|
# Tolerate GPU node taint
|
||||||
|
tolerations:
|
||||||
|
- key: gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
- key: nvidia.com/gpu
|
||||||
|
operator: Exists
|
||||||
|
effect: NoSchedule
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user