Install drivers
Script to update InfiniBand drivers
update_ib.sh |
---|
| #!/bin/bash
set -e
for f in $( dpkg --list | grep -E 'doca|flexio|dpa-gdbserver|dpa-stats|dpaeumgmt' | awk '{print $2}' ); do echo $f ; apt remove --purge $f -y || true ; done
/usr/sbin/ofed_uninstall.sh --force || true
apt autoremove -y
wget -O /tmp/mlnx_signing_key_pub.der http://www.mellanox.com/downloads/ofed/mlnx_signing_key_pub.der
mokutil --import /tmp/mlnx_signing_key_pub.der
wget -O /tmp/doca-host_2.10.0-093000-25.01-ubuntu2404_amd64.deb https://www.mellanox.com/downloads/DOCA/DOCA_v2.10.0/host/doca-host_2.10.0-093000-25.01-ubuntu2404_amd64.deb
dpkg -i /tmp/doca-host_2.10.0-093000-25.01-ubuntu2404_amd64.deb
apt update -y
apt --fix-broken -y install
apt upgrade -y
apt -y install doca-all
apt autoremove -y
reboot
|
This has to be done in two steps, intertwined by a reboot.
Scripts to update GPU drivers
update_cuda_part1.sh |
---|
| #!/bin/bash
set -e
# removes previously installed cuda versions
rmmod nvidia_drm | rmmod nvidia_modeset | rmmod nvidia
sudo apt-get --purge -y remove "nvidia*" "libnvidia*"
sudo /usr/bin/nvidia-uninstall
# installs the drivers
if ! [ -f /etc/NVIDIA-Linux-x86_64-570.124.04.run ]; then
wget -O /etc/NVIDIA-Linux-x86_64-570.124.04.run https://it.download.nvidia.com/XFree86/Linux-x86_64/570.124.04/NVIDIA-Linux-x86_64-570.124.04.run
fi
sh /etc/NVIDIA-Linux-x86_64-570.124.04.run
|
update_cuda_part2.sh |
---|
| #!/bin/bash
set -e
# installs cuda
if ! [ -f /etc/cuda_12.8.1_570.124.06_linux.run ]; then
wget -O /etc/cuda_12.8.1_570.124.06_linux.run https://developer.download.nvidia.com/compute/cuda/12.8.1/local_installers/cuda_12.8.1_570.124.06_linux.run
fi
sh /etc/cuda_12.8.1_570.124.06_linux.run
# installs the container's toolkit
curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg \
&& curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | \
sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | \
tee /etc/apt/sources.list.d/nvidia-container-toolkit.list
apt update -y
apt install -y nvidia-container-toolkit
|