From 84c11d9223f33541e6d69c9ca5a15658fa22569b Mon Sep 17 00:00:00 2001 From: chn Date: Wed, 8 Oct 2025 22:07:07 +0800 Subject: [PATCH] modules.system.kernel: add amdgpu patch --- devices/pc/default.nix | 2 +- modules/system/kernel/amdgpu.patch | 83 ++++++++++++++++++++++++++++++ modules/system/kernel/default.nix | 1 + 3 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 modules/system/kernel/amdgpu.patch diff --git a/devices/pc/default.nix b/devices/pc/default.nix index 8c706dc1..822917e6 100644 --- a/devices/pc/default.nix +++ b/devices/pc/default.nix @@ -34,7 +34,7 @@ inputs: ]; nixpkgs = { march = "znver5"; rocm = true; }; sysctl.laptop-mode = 5; - kernel.variant = "cachyos"; + kernel = { variant = "cachyos"; patches = [ "amdgpu" ]; }; }; hardware = { gpu.type = "amd"; asus = {};}; services = diff --git a/modules/system/kernel/amdgpu.patch b/modules/system/kernel/amdgpu.patch new file mode 100644 index 00000000..eee9b094 --- /dev/null +++ b/modules/system/kernel/amdgpu.patch @@ -0,0 +1,83 @@ +From 1fb710793ce2619223adffaf981b1ff13cd48f17 Mon Sep 17 00:00:00 2001 +From: Mario Limonciello +Date: Thu, 18 Sep 2025 19:48:00 -0500 +Subject: [PATCH] drm/amdgpu: Enable MES lr_compute_wa by default + +The MES set resources packet has an optional bit 'lr_compute_wa' +which can be used for preventing MES hangs on long compute jobs. + +Set this bit by default. + +Co-developed-by: Yifan Zhang +Signed-off-by: Yifan Zhang +Acked-by: Alex Deucher +Signed-off-by: Mario Limonciello +Signed-off-by: Alex Deucher +--- + drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 6 ++++++ + drivers/gpu/drm/amd/amdgpu/mes_v12_0.c | 5 +++++ + drivers/gpu/drm/amd/include/mes_v11_api_def.h | 3 ++- + drivers/gpu/drm/amd/include/mes_v12_api_def.h | 3 ++- + 4 files changed, 15 insertions(+), 2 deletions(-) + +diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +index 3b91ea601add41..e82188431f7969 100644 +--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +@@ -713,6 +713,12 @@ static int mes_v11_0_set_hw_resources(struct amdgpu_mes *mes) + mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; + mes_set_hw_res_pkt.oversubscription_timer = 50; ++ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x7f) ++ mes_set_hw_res_pkt.enable_lr_compute_wa = 1; ++ else ++ dev_info_once(mes->adev->dev, ++ "MES FW version must be >= 0x7f to enable LR compute workaround.\n"); ++ + if (amdgpu_mes_log_enable) { + mes_set_hw_res_pkt.enable_mes_event_int_logging = 1; + mes_set_hw_res_pkt.event_intr_history_gpu_mc_ptr = +diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +index 998893dff08e93..aff06f06aeeecf 100644 +--- a/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c ++++ b/drivers/gpu/drm/amd/amdgpu/mes_v12_0.c +@@ -769,6 +769,11 @@ static int mes_v12_0_set_hw_resources(struct amdgpu_mes *mes, int pipe) + mes_set_hw_res_pkt.use_different_vmid_compute = 1; + mes_set_hw_res_pkt.enable_reg_active_poll = 1; + mes_set_hw_res_pkt.enable_level_process_quantum_check = 1; ++ if ((mes->adev->mes.sched_version & AMDGPU_MES_VERSION_MASK) >= 0x82) ++ mes_set_hw_res_pkt.enable_lr_compute_wa = 1; ++ else ++ dev_info_once(adev->dev, ++ "MES FW version must be >= 0x82 to enable LR compute workaround.\n"); + + /* + * Keep oversubscribe timer for sdma . When we have unmapped doorbell +diff --git a/drivers/gpu/drm/amd/include/mes_v11_api_def.h b/drivers/gpu/drm/amd/include/mes_v11_api_def.h +index 15680c3f49704e..ab1cfc92dbeb1b 100644 +--- a/drivers/gpu/drm/amd/include/mes_v11_api_def.h ++++ b/drivers/gpu/drm/amd/include/mes_v11_api_def.h +@@ -238,7 +238,8 @@ union MESAPI_SET_HW_RESOURCES { + uint32_t enable_mes_sch_stb_log : 1; + uint32_t limit_single_process : 1; + uint32_t is_strix_tmz_wa_enabled :1; +- uint32_t reserved : 13; ++ uint32_t enable_lr_compute_wa : 1; ++ uint32_t reserved : 12; + }; + uint32_t uint32_t_all; + }; +diff --git a/drivers/gpu/drm/amd/include/mes_v12_api_def.h b/drivers/gpu/drm/amd/include/mes_v12_api_def.h +index c04bd351b2505d..69611c7e30e355 100644 +--- a/drivers/gpu/drm/amd/include/mes_v12_api_def.h ++++ b/drivers/gpu/drm/amd/include/mes_v12_api_def.h +@@ -287,7 +287,8 @@ union MESAPI_SET_HW_RESOURCES { + uint32_t limit_single_process : 1; + uint32_t unmapped_doorbell_handling: 2; + uint32_t enable_mes_fence_int: 1; +- uint32_t reserved : 10; ++ uint32_t enable_lr_compute_wa : 1; ++ uint32_t reserved : 9; + }; + uint32_t uint32_all; + }; diff --git a/modules/system/kernel/default.nix b/modules/system/kernel/default.nix index bc73f30e..3abafaca 100644 --- a/modules/system/kernel/default.nix +++ b/modules/system/kernel/default.nix @@ -76,6 +76,7 @@ inputs: }; structuredExtraConfig.BTRFS_EXPERIMENTAL = inputs.lib.kernel.yes; }]; + amdgpu = [{ name = "amdgpu"; patch = ./amdgpu.patch; }]; }; in builtins.concatLists (builtins.map (name: patches.${name}) kernel.patches); };