Compare commits
No commits in common. "fbe623fd76c0b86fba178cd691926636f2dfa233" and "9322dd049dca1d137f201a9e3b64a2904a8feed9" have entirely different histories.
fbe623fd76
...
9322dd049d
@ -1,35 +1,15 @@
|
||||
service:
|
||||
type: ClusterIP
|
||||
|
||||
# Enable GPU support
|
||||
ollama:
|
||||
gpu:
|
||||
enabled: true
|
||||
type: nvidia
|
||||
number: 1
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 250m
|
||||
memory: 1Gi
|
||||
limits:
|
||||
cpu: 2
|
||||
memory: 4Gi
|
||||
limits:
|
||||
cpu: 4
|
||||
memory: 8Gi
|
||||
|
||||
# Schedule on GPU worker node
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||
|
||||
# Tolerate GPU node taint
|
||||
tolerations:
|
||||
- key: gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
persistentVolume:
|
||||
enabled: true
|
||||
size: 50Gi
|
||||
storageClass: local-path
|
||||
size: 30Gi
|
||||
storageClass: nfs-client
|
||||
|
||||
@ -1,17 +1,16 @@
|
||||
service:
|
||||
type: ClusterIP
|
||||
|
||||
# IMPORTANT: disable the embedded Ollama that the chart can deploy
|
||||
ollama:
|
||||
enabled: false
|
||||
env:
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama.ai-stack.svc.cluster.local:11434
|
||||
|
||||
# IMPORTANT: set BOTH vars to your existing Ollama service
|
||||
extraEnvVars:
|
||||
- name: OLLAMA_BASE_URL
|
||||
value: http://ollama:11434
|
||||
- name: OLLAMA_BASE_URLS
|
||||
value: http://ollama:11434
|
||||
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 100m
|
||||
@ -19,21 +18,3 @@ resources:
|
||||
limits:
|
||||
cpu: 500m
|
||||
memory: 1Gi
|
||||
|
||||
persistence:
|
||||
enabled: true
|
||||
storageClass: local-path
|
||||
size: 2Gi
|
||||
|
||||
# Schedule on GPU worker node (same as Ollama for low latency)
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||
|
||||
# Tolerate GPU node taint
|
||||
tolerations:
|
||||
- key: gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user