fix: target NVIDIA device plugin to GPU node k3s-worker-5-gpu with proper tolerations

This commit is contained in:
dvirlabs 2026-03-01 17:24:03 +02:00
parent 19d1d5fa4a
commit 68849b86f9

View File

@ -28,13 +28,16 @@ spec:
labels: labels:
name: nvidia-device-plugin-ds name: nvidia-device-plugin-ds
spec: spec:
# nodeSelector removed for initial deployment to avoid chicken-egg problem # Schedule ONLY on the GPU node by hostname
# The nvidia.com/gpu.present label is applied BY the device plugin itself nodeSelector:
# FAIL_ON_INIT_ERROR=false ensures graceful skip on nodes without GPU kubernetes.io/hostname: k3s-worker-5-gpu
# After first deployment, you can optionally re-enable with:
# nodeSelector:
# nvidia.com/gpu.present: "true"
tolerations: tolerations:
# Tolerate the gpu=true:NoSchedule taint on the GPU node
- key: gpu
operator: Equal
value: "true"
effect: NoSchedule
# Also tolerate nvidia.com/gpu taint if present
- key: nvidia.com/gpu - key: nvidia.com/gpu
operator: Exists operator: Exists
effect: NoSchedule effect: NoSchedule