Перейти к содержанию

Установка драйверов GPU AMD, ROCm и HIP на Ubuntu Linux

Данная инструкция описывает процесс установки драйверов GPU и стека ROCm (Radeon Open Compute) и HIP. Использование ROCm позволяет запускать задачи машинного обучения и ИИ на современных видеокартах AMD, а HIP ускоряет обработку графики, например в Blender.

Внимание

Видеокарты AMD в HOSTKEY гарантированно работают ТОЛЬКО на Ubuntu 24.04 LTS и 26.04 LTS!

Откройте консоль на сервере (через native консоль или SSH), залогиньтесь под root, скопируйте данный скрипт, вставьте его в командную строку и нажмите Enter для автоматической установки драйверов и ROCm. Если вам нужен docker или docker compose для работы вашего ПО, поставьте его ДО установки драйверов, чтобы скрипт "прокинул" поддержку видеокарт в контейнеры.

Внимание

В процессе установки возможно понадобиться нажимать Enter подтверждая установку модулей нового ядра или перезапуск сервисов.

Скрипт для установки

#!/bin/bash
set -euo pipefail

# Universal AMD GPU + ROCm installer for Ubuntu 24.04/26.04 LTS (Non-Interactive)
# Supported:
#   • Ubuntu 24.04 (noble) — ROCm from AMD repo (repo.radeon.com)
#   • Ubuntu 26.04 (resolute) — ROCm from Ubuntu universe repo (Canonical)

# ============ FLAGS ============
DO_OS_POLICY_CHECK=1
ALLOWED_UBUNTU_VERSIONS=("24.04" "26.04")  # 22.04 NOT supported

DO_APT_UPGRADE=1
DO_INSTALL_KERNEL_HEADERS=1
DO_INSTALL_BUILD_TOOLS=1

DO_KERNEL_POLICY_CHECK=1      # Only for 24.04
DO_INSTALL_MAINLINE_KERNEL=1  # Only for 24.04 if kernel < 6.13
REQUIRED_KERNEL_MM="6.13"

DO_GRUB_PARAMS=0
GRUB_PARAMS=("amdgpu.gpu_recovery=1" "amdgpu.runpm=0" "amdgpu.ppfeaturemask=0xffffffff")

DO_PURGE_OLD_PACKAGES=1
DO_SETUP_ROCM_REPO=1          # AMD repo (24.04 only)
DO_INSTALL_ROCM=1
DO_LINK_OPT_ROCM=1

DO_USER_GROUPS=1
DO_BASHRC_PATH=1

DO_OLLAMA_AMDGPU_IDS_WORKAROUND=1
DO_GPU_POWER_CONTROL_ON=1

# ============ START ============
echo "Starting AMD ROCm installation (Non-Interactive)..."

# Dependency checks
for cmd in lspci wget gpg curl lsb_release sed awk uname; do
  command -v "$cmd" >/dev/null 2>&1 || { echo "Missing dependency: $cmd"; exit 1; }
done

# Load OS identification
osr="/etc/os-release"
[[ -r "$osr" ]] || osr="/usr/lib/os-release"
if [[ ! -r "$osr" ]]; then
  echo "Cannot read os-release file. Exiting."
  exit 1
fi

set -a
. "$osr"
set +a

if [[ "${ID:-}" != "ubuntu" ]]; then
  echo "This script is intended for Ubuntu. Exiting."
  exit 1
fi

UBUNTU_VERSION="${VERSION_ID:-}"
UBUNTU_CODENAME="${VERSION_CODENAME:-}"

if [[ -z "${UBUNTU_CODENAME}" ]]; then
  case "${UBUNTU_VERSION}" in
    "24.04") UBUNTU_CODENAME="noble" ;;
    "26.04") UBUNTU_CODENAME="resolute" ;;
    *) UBUNTU_CODENAME="unknown" ;;
  esac
  echo "Detected codename fallback: ${UBUNTU_CODENAME}"
fi

if [[ -z "${UBUNTU_VERSION}" ]]; then
  echo "Cannot detect Ubuntu VERSION_ID. Exiting."
  exit 1
fi

# OS policy check
if [[ "${DO_OS_POLICY_CHECK}" -eq 1 ]]; then
  ok=0
  for v in "${ALLOWED_UBUNTU_VERSIONS[@]}"; do
    [[ "${UBUNTU_VERSION}" == "${v}" ]] && { ok=1; break; }
  done
  if [[ "${ok}" -ne 1 ]]; then
    echo "❌ Unsupported Ubuntu version: ${UBUNTU_VERSION}"
    echo ""
    echo "This script supports ONLY:"
    echo "- Ubuntu 24.04 LTS (noble) — ROCm from AMD repo"
    echo "- Ubuntu 26.04 LTS (resolute) — ROCm from Ubuntu universe repo"
    echo ""
    echo "Ubuntu 22.04 is NOT supported. Please upgrade."
    exit 1
  fi
fi

# Detect AMD GPU
AMD_GPU_LINES="$(lspci -nn | grep -iE 'vga|3d' | grep -i '1002:' || true)"
if [[ -z "${AMD_GPU_LINES}" ]]; then
  echo "No AMD GPU detected (vendor 1002). Exiting."
  exit 1
fi
echo "AMD GPUs detected:"
echo "${AMD_GPU_LINES}"

# Setup non-interactive mode
export DEBIAN_FRONTEND=noninteractive
export NEEDRESTART_MODE=a

REBOOT_REQUIRED=0

# System update
if [[ "${DO_APT_UPGRADE}" -eq 1 ]]; then
  echo "Updating system packages (non-interactive)..."
  sudo -E apt-get update
  sudo -E apt-get upgrade -y \
    -o Dpkg::Options::="--force-confdef" \
    -o Dpkg::Options::="--force-confold"
fi

# Kernel headers
if [[ "${DO_INSTALL_KERNEL_HEADERS}" -eq 1 ]]; then
  CURRENT_KERNEL="$(uname -r)"
  echo "Installing kernel headers for: ${CURRENT_KERNEL}"
  sudo -E apt-get install -y \
    -o Dpkg::Options::="--force-confdef" \
    -o Dpkg::Options::="--force-confold" \
    "linux-headers-${CURRENT_KERNEL}" || {
    echo "Warning: Failed to install linux-headers-${CURRENT_KERNEL}. DKMS may fail."
  }
fi

# Build tools
if [[ "${DO_INSTALL_BUILD_TOOLS}" -eq 1 ]]; then
  case "${UBUNTU_VERSION}" in
    "24.04") GCC_PACKAGES=("gcc-13" "g++-13") ;;
    "26.04") GCC_PACKAGES=("gcc-15" "g++-15") ;;
    *) GCC_PACKAGES=("gcc" "g++") ;;
  esac
  echo "Installing build tools: ${GCC_PACKAGES[*]}"
  sudo -E apt-get install -y \
    -o Dpkg::Options::="--force-confdef" \
    -o Dpkg::Options::="--force-confold" \
    "${GCC_PACKAGES[@]}" || echo "Warning: Build tools install may have failed."
fi

# Kernel policy check — ONLY for Ubuntu 24.04
if [[ "${DO_KERNEL_POLICY_CHECK}" -eq 1 && "${UBUNTU_VERSION}" == "24.04" ]]; then
  KERNEL_VERSION="$(uname -r)"
  KERNEL_MM="$(echo "${KERNEL_VERSION}" | sed -nE 's/^([0-9]+)\.([0-9]+).*/\1.\2/p')"

  if [[ -n "${KERNEL_MM}" ]]; then
    req_major="${REQUIRED_KERNEL_MM%.*}"; req_minor="${REQUIRED_KERNEL_MM#*.}"
    cur_major="${KERNEL_MM%.*}"; cur_minor="${KERNEL_MM#*.}"

    KERNEL_OK=0
    if [[ "${cur_major}" -gt "${req_major}" ]] || \
       [[ "${cur_major}" -eq "${req_major}" && "${cur_minor}" -ge "${req_minor}" ]]; then
      KERNEL_OK=1
    fi

    if [[ "${KERNEL_OK}" -ne 1 ]]; then
      echo "Kernel ${KERNEL_MM} is older than recommended (${REQUIRED_KERNEL_MM}) for ROCm on Ubuntu 24.04."
      if [[ "${DO_INSTALL_MAINLINE_KERNEL}" -eq 1 ]]; then
        echo "Installing latest mainline kernel via PPA..."
        sudo -E add-apt-repository ppa:cappelikan/ppa -y 2>/dev/null || true
        sudo -E apt-get update
        sudo -E apt-get install -y mainline pkexec
        sudo mainline install-latest --quiet || true
        echo "Mainline kernel installed. Reboot required."
        REBOOT_REQUIRED=1
      else
        echo "Mainline kernel install disabled. Continuing with current kernel."
      fi
    else
      echo "Kernel version ${KERNEL_MM} meets ROCm requirements."
    fi
  fi
elif [[ "${UBUNTU_VERSION}" == "26.04" ]]; then
  echo "Ubuntu 26.04: using default GA kernel (no mainline needed)."
fi

# GRUB parameters
if [[ "${DO_GRUB_PARAMS}" -eq 1 ]]; then
  GRUB_FILE="/etc/default/grub"
  GRUB_CHANGED=0
  for param in "${GRUB_PARAMS[@]}"; do
    if ! sudo grep -qE "GRUB_CMDLINE_LINUX_DEFAULT=.*\b${param}\b" "${GRUB_FILE}"; then
      sudo cp -a "${GRUB_FILE}" "${GRUB_FILE}.backup.$(date +%F-%H%M%S)"
      sudo sed -i -E "s/^(GRUB_CMDLINE_LINUX_DEFAULT=\")([^\"]*)\"/\1\2 ${param}\"/" "${GRUB_FILE}"
      echo "Added GRUB param: ${param}"
      GRUB_CHANGED=1
    fi
  done
  if [[ "${GRUB_CHANGED}" -eq 1 ]]; then
    sudo update-grub
    echo "GRUB updated."
    REBOOT_REQUIRED=1
  fi
fi

# Purge old packages
if [[ "${DO_PURGE_OLD_PACKAGES}" -eq 1 ]]; then
  echo "Removing previous ROCm/AMDGPU packages (best effort)..."
  sudo dpkg --configure -a || true
  sudo -E apt-get purge -y 'rocm*' 'amdgpu*' 'hip*' 'radeon*' 'libdrm-amdgpu*' || true
  sudo -E apt-get autoremove -y || true
  sudo -E apt-get clean || true
  sudo rm -rf /etc/apt/sources.list.d/amdgpu* /etc/apt/sources.list.d/rocm* /etc/apt/sources.list.d/radeon* || true
  sudo -E apt-get update || true
fi

# ============ ROCm REPO SETUP (VERSION-SPECIFIC) ============
if [[ "${DO_SETUP_ROCM_REPO}" -eq 1 ]]; then
  if [[ "${UBUNTU_VERSION}" == "24.04" ]]; then
    # === Ubuntu 24.04: Use AMD official repo ===
    echo "Setting up ROCm repository from AMD (repo.radeon.com) for ${UBUNTU_CODENAME}..."

    sudo install -d -m 0755 /usr/share/keyrings
    wget -qO- https://repo.radeon.com/rocm/rocm.gpg.key \
      | gpg --dearmor \
      | sudo tee /usr/share/keyrings/rocm-archive-keyring.gpg >/dev/null

    echo "deb [arch=amd64 signed-by=/usr/share/keyrings/rocm-archive-keyring.gpg] https://repo.radeon.com/rocm/apt/latest/ ${UBUNTU_CODENAME} main" \
      | sudo tee /etc/apt/sources.list.d/rocm.list >/dev/null

    sudo tee /etc/apt/preferences.d/rocm-pin-600 >/dev/null <<'EOF'
Package: *
Pin: origin repo.radeon.com
Pin-Priority: 600
EOF
    sudo -E apt-get update

  elif [[ "${UBUNTU_VERSION}" == "26.04" ]]; then
    # === Ubuntu 26.04: Use Canonical's native ROCm packages ===
    echo "Ubuntu 26.04: ROCm is provided via Ubuntu universe repository (Canonical)."
    echo "Skipping AMD repo setup (repo.radeon.com does not support 'resolute' yet)."

    # Ensure universe repo is enabled
    if ! sudo apt-cache policy | grep -q "universe"; then
      echo "Enabling universe repository..."
      sudo add-apt-repository universe -y
      sudo -E apt-get update
    fi

    # Pin universe ROCm packages slightly higher to prefer them
    sudo tee /etc/apt/preferences.d/rocm-ubuntu-pin >/dev/null <<'EOF'
Package: rocm* libroc*
Pin: release a=resolute, n=universe
Pin-Priority: 650
EOF
  fi
else
  echo "Skipping ROCm repo setup (DO_SETUP_ROCM_REPO=0)."
fi

# ============ INSTALL ROCm PACKAGES ============
if [[ "${DO_INSTALL_ROCM}" -eq 1 ]]; then
  echo "Installing ROCm stack..."

  if [[ "${UBUNTU_VERSION}" == "24.04" ]]; then
    # AMD repo packages
    sudo -E apt-get install -y \
      -o Dpkg::Options::="--force-confdef" \
      -o Dpkg::Options::="--force-confold" \
      -o Dpkg::Options::="--force-overwrite" \
      rocm-dev rocm-libs rocm-hip-sdk rocm-smi-lib rocminfo || {
      echo "Warning: ROCm install from AMD repo encountered issues."
    }

  elif [[ "${UBUNTU_VERSION}" == "26.04" ]]; then
    # Ubuntu universe packages (Canonical)
    # Package names may differ slightly; using common meta-packages
    sudo -E apt-get install -y \
      -o Dpkg::Options::="--force-confdef" \
      -o Dpkg::Options::="--force-confold" \
      rocm-dev rocm-libs rocminfo hip-base || {
      echo "Warning: Some ROCm packages may not be available in universe yet."
      echo "Trying fallback packages..."
      sudo -E apt-get install -y rocm-dev rocminfo || true
    }
  fi
fi

# Symlink /opt/rocm
if [[ "${DO_LINK_OPT_ROCM}" -eq 1 ]]; then
  INSTALLED_ROCM_DIR="$(ls -d /opt/rocm-[0-9]* 2>/dev/null | sort -V | tail -n 1 || true)"
  if [[ -n "${INSTALLED_ROCM_DIR}" ]]; then
    REAL_VERSION="$(basename "${INSTALLED_ROCM_DIR}" | sed 's/rocm-//')"
    sudo ln -sfn "${INSTALLED_ROCM_DIR}" /opt/rocm
    echo "ROCm ${REAL_VERSION} linked: /opt/rocm -> ${INSTALLED_ROCM_DIR}"
  else
    # For Ubuntu 26.04, ROCm may install to /usr instead of /opt
    if [[ "${UBUNTU_VERSION}" == "26.04" ]] && [[ -f /usr/bin/rocminfo ]]; then
      echo "ROCm installed via Ubuntu packages (system paths: /usr/bin, /usr/lib)"
      sudo ln -sfn /usr /opt/rocm 2>/dev/null || true
    else
      echo "Warning: No /opt/rocm-X.Y.Z found; symlink skipped."
    fi
  fi
fi

# User groups
if [[ "${DO_USER_GROUPS}" -eq 1 ]]; then
  TARGET_USER="${SUDO_USER:-$USER}"
  sudo usermod -aG render,video "${TARGET_USER}" || true
  echo "User '${TARGET_USER}' added to groups: render, video (re-login required)."
  REBOOT_REQUIRED=1
fi

# PATH in ~/.bashrc
if [[ "${DO_BASHRC_PATH}" -eq 1 ]]; then
  TARGET_USER="${SUDO_USER:-$USER}"
  TARGET_HOME="$(getent passwd "${TARGET_USER}" | cut -d: -f6)"
  TARGET_BASHRC="${TARGET_HOME}/.bashrc"
  MARKER="AMD ROCm Paths"

  [[ ! -f "${TARGET_BASHRC}" ]] && sudo -u "${TARGET_USER}" touch "${TARGET_BASHRC}" || true

  # Determine ROCm location
  if [[ -d /opt/rocm/bin ]]; then
    ROCM_PREFIX="/opt/rocm"
  elif [[ -f /usr/bin/rocminfo ]]; then
    ROCM_PREFIX="/usr"  # Ubuntu-native install
  else
    ROCM_PREFIX=""
  fi

  if [[ -n "${ROCM_PREFIX}" ]] && ! grep -q "${MARKER}" "${TARGET_BASHRC}" 2>/dev/null; then
    cat >> "${TARGET_BASHRC}" <<EOF

# ${MARKER}
if [ -d "${ROCM_PREFIX}/bin" ]; then
  export PATH="${ROCM_PREFIX}/bin:\$PATH"
  export LD_LIBRARY_PATH="${ROCM_PREFIX}/lib:\${LD_LIBRARY_PATH}"
  export ROCM_PATH="${ROCM_PREFIX}"
fi
EOF
    echo "Added ROCm environment variables to ${TARGET_BASHRC}"
  fi

  # Apply to current session
  if [[ -n "${ROCM_PREFIX}" ]] && [[ -d "${ROCM_PREFIX}/bin" ]]; then
    export PATH="${ROCM_PREFIX}/bin:${PATH}"
    export LD_LIBRARY_PATH="${ROCM_PREFIX}/lib${LD_LIBRARY_PATH:+:${LD_LIBRARY_PATH}}"
    export ROCM_PATH="${ROCM_PREFIX}"
  fi
fi

# Ollama workaround
if [[ "${DO_OLLAMA_AMDGPU_IDS_WORKAROUND}" -eq 1 ]]; then
  if [[ -f /usr/share/libdrm/amdgpu.ids ]]; then
    sudo mkdir -p /opt/amdgpu/share/libdrm
    sudo ln -sf /usr/share/libdrm/amdgpu.ids /opt/amdgpu/share/libdrm/amdgpu.ids
    echo "Created amdgpu.ids compatibility link for Ollama."
  fi
fi

# GPU power control
if [[ "${DO_GPU_POWER_CONTROL_ON}" -eq 1 ]]; then
  for card in /sys/class/drm/card*/device/power/control; do
    [[ -w "${card}" ]] && echo on | sudo tee "${card}" >/dev/null 2>&1 || true
  done
  echo "GPU power control set to 'on' (best effort)."
fi

# Final summary
echo ""
echo "=========================================="
echo "AMD ROCm installation finished."
echo "=========================================="
if [[ "${REBOOT_REQUIRED}" -eq 1 ]]; then
  echo "REBOOT REQUIRED to apply changes."
  echo "Run: sudo reboot"
else
  echo "No reboot strictly required, but recommended."
fi
echo ""
echo "Verify installation:"
echo "  rocminfo"
echo "  hipinfo  # if available"
echo ""
if [[ "${UBUNTU_VERSION}" == "26.04" ]]; then
  echo "Note: On Ubuntu 26.04, ROCm is installed via Ubuntu packages."
  echo "Binaries are in /usr/bin, libraries in /usr/lib (not /opt/rocm)."
fi
echo "For non-interactive sessions: source ~/.bashrc"

Внимание

После выполнения скрипта обязательна перезагрузка сервера командой sudo reboot. Это необходимо для активации новых модулей ядра.

После перезагрузки проверьте установку драйверов и CUDA командами amd-smi (для Ubuntu 24.04) и rocminfo (универсально).

question_mark
Я могу вам чем-то помочь?
question_mark
ИИ Помощник ×