fix: target NVIDIA device plugin to GPU node k3s-worker-5-gpu with proper tolerations
This commit is contained in:
parent
19d1d5fa4a
commit
68849b86f9
@ -28,13 +28,16 @@ spec:
|
||||
labels:
|
||||
name: nvidia-device-plugin-ds
|
||||
spec:
|
||||
# nodeSelector removed for initial deployment to avoid chicken-egg problem
|
||||
# The nvidia.com/gpu.present label is applied BY the device plugin itself
|
||||
# FAIL_ON_INIT_ERROR=false ensures graceful skip on nodes without GPU
|
||||
# After first deployment, you can optionally re-enable with:
|
||||
# nodeSelector:
|
||||
# nvidia.com/gpu.present: "true"
|
||||
# Schedule ONLY on the GPU node by hostname
|
||||
nodeSelector:
|
||||
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||
tolerations:
|
||||
# Tolerate the gpu=true:NoSchedule taint on the GPU node
|
||||
- key: gpu
|
||||
operator: Equal
|
||||
value: "true"
|
||||
effect: NoSchedule
|
||||
# Also tolerate nvidia.com/gpu taint if present
|
||||
- key: nvidia.com/gpu
|
||||
operator: Exists
|
||||
effect: NoSchedule
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user