fix: target NVIDIA device plugin to GPU node k3s-worker-5-gpu with proper tolerations
This commit is contained in:
parent
19d1d5fa4a
commit
68849b86f9
@ -28,13 +28,16 @@ spec:
|
|||||||
labels:
|
labels:
|
||||||
name: nvidia-device-plugin-ds
|
name: nvidia-device-plugin-ds
|
||||||
spec:
|
spec:
|
||||||
# nodeSelector removed for initial deployment to avoid chicken-egg problem
|
# Schedule ONLY on the GPU node by hostname
|
||||||
# The nvidia.com/gpu.present label is applied BY the device plugin itself
|
nodeSelector:
|
||||||
# FAIL_ON_INIT_ERROR=false ensures graceful skip on nodes without GPU
|
kubernetes.io/hostname: k3s-worker-5-gpu
|
||||||
# After first deployment, you can optionally re-enable with:
|
|
||||||
# nodeSelector:
|
|
||||||
# nvidia.com/gpu.present: "true"
|
|
||||||
tolerations:
|
tolerations:
|
||||||
|
# Tolerate the gpu=true:NoSchedule taint on the GPU node
|
||||||
|
- key: gpu
|
||||||
|
operator: Equal
|
||||||
|
value: "true"
|
||||||
|
effect: NoSchedule
|
||||||
|
# Also tolerate nvidia.com/gpu taint if present
|
||||||
- key: nvidia.com/gpu
|
- key: nvidia.com/gpu
|
||||||
operator: Exists
|
operator: Exists
|
||||||
effect: NoSchedule
|
effect: NoSchedule
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user