Compare commits
No commits in common. "fbe623fd76c0b86fba178cd691926636f2dfa233" and "9322dd049dca1d137f201a9e3b64a2904a8feed9" have entirely different histories.
fbe623fd76
...
9322dd049d
@ -1,35 +1,15 @@
|
|||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|
||||||
# Enable GPU support
|
|
||||||
ollama:
|
|
||||||
gpu:
|
|
||||||
enabled: true
|
|
||||||
type: nvidia
|
|
||||||
number: 1
|
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
|
cpu: 250m
|
||||||
|
memory: 1Gi
|
||||||
|
limits:
|
||||||
cpu: 2
|
cpu: 2
|
||||||
memory: 4Gi
|
memory: 4Gi
|
||||||
limits:
|
|
||||||
cpu: 4
|
|
||||||
memory: 8Gi
|
|
||||||
|
|
||||||
# Schedule on GPU worker node
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
|
||||||
|
|
||||||
# Tolerate GPU node taint
|
|
||||||
tolerations:
|
|
||||||
- key: gpu
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
- key: nvidia.com/gpu
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|
||||||
persistentVolume:
|
persistentVolume:
|
||||||
enabled: true
|
enabled: true
|
||||||
size: 50Gi
|
size: 30Gi
|
||||||
storageClass: local-path
|
storageClass: nfs-client
|
||||||
|
|||||||
@ -1,17 +1,16 @@
|
|||||||
service:
|
service:
|
||||||
type: ClusterIP
|
type: ClusterIP
|
||||||
|
|
||||||
# IMPORTANT: disable the embedded Ollama that the chart can deploy
|
env:
|
||||||
ollama:
|
- name: OLLAMA_BASE_URL
|
||||||
enabled: false
|
value: http://ollama.ai-stack.svc.cluster.local:11434
|
||||||
|
|
||||||
# IMPORTANT: set BOTH vars to your existing Ollama service
|
|
||||||
extraEnvVars:
|
extraEnvVars:
|
||||||
- name: OLLAMA_BASE_URL
|
- name: OLLAMA_BASE_URL
|
||||||
value: http://ollama:11434
|
value: http://ollama:11434
|
||||||
- name: OLLAMA_BASE_URLS
|
- name: OLLAMA_BASE_URLS
|
||||||
value: http://ollama:11434
|
value: http://ollama:11434
|
||||||
|
|
||||||
resources:
|
resources:
|
||||||
requests:
|
requests:
|
||||||
cpu: 100m
|
cpu: 100m
|
||||||
@ -19,21 +18,3 @@ resources:
|
|||||||
limits:
|
limits:
|
||||||
cpu: 500m
|
cpu: 500m
|
||||||
memory: 1Gi
|
memory: 1Gi
|
||||||
|
|
||||||
persistence:
|
|
||||||
enabled: true
|
|
||||||
storageClass: local-path
|
|
||||||
size: 2Gi
|
|
||||||
|
|
||||||
# Schedule on GPU worker node (same as Ollama for low latency)
|
|
||||||
nodeSelector:
|
|
||||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
|
||||||
|
|
||||||
# Tolerate GPU node taint
|
|
||||||
tolerations:
|
|
||||||
- key: gpu
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
- key: nvidia.com/gpu
|
|
||||||
operator: Exists
|
|
||||||
effect: NoSchedule
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user