From bae3d395f9918be1c334c8e105e763e16589e4f1 Mon Sep 17 00:00:00 2001 From: empresa Date: Thu, 16 Oct 2025 01:50:41 +0700 Subject: [PATCH] dev: more debug --- lib/nvml.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/lib/nvml.py b/lib/nvml.py index 1606767..bcb1d31 100644 --- a/lib/nvml.py +++ b/lib/nvml.py @@ -99,15 +99,13 @@ def init(gpu_specs_file=None, allow_hive_binaries=True): gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i) gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle) - - print("name") - print(pynvml.nvmlDeviceGetName(gpu_handle)) - print("device_id") - pci_info = pynvml.nvmlDeviceGetPciInfo(gpu_handle) - print(hex(pci_info.pciDeviceId)) + gpu_name = pynvml.nvmlDeviceGetName(gpu_handle) + gpu_device_id = hex(pynvml.nvmlDeviceGetPciInfo(gpu_handle).pciDeviceId).upper() + if gpu_name == "NVIDIA Graphics Device" and gpu_device_id in constants.GPU_ID_TO_NAME: + gpu_name = constants.GPU_ID_TO_NAME[gpu_device_id] - gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle)) + gpu_name_list.append(gpu_name) if not f"{i}-{gpu_uuid}" in parsed_specs_keys: parsed_specs={} regenerate_specs=True @@ -128,7 +126,13 @@ def init(gpu_specs_file=None, allow_hive_binaries=True): max_power_limit = int(power_limits[1] / 1000.0) gpu_spec["default_power_limit"] = int(pynvml.nvmlDeviceGetPowerManagementDefaultLimit(gpu_handle) / 1000.0) gpu_spec["power_limits"] = [min_power_limit, max_power_limit] - gpu_spec["name"] = pynvml.nvmlDeviceGetName(gpu_handle) + + gpu_name_regen = pynvml.nvmlDeviceGetName(gpu_handle) + gpu_device_id_regen = hex(pynvml.nvmlDeviceGetPciInfo(gpu_handle).pciDeviceId).upper() + if gpu_name_regen == "NVIDIA Graphics Device" and gpu_device_id_regen in constants.GPU_ID_TO_NAME: + gpu_name_regen = constants.GPU_ID_TO_NAME[gpu_device_id_regen] + + gpu_spec["name"] = gpu_name_regen gpu_name_list.append(gpu_spec["name"]) gpu_spec["locks"] = mem_to_core_allowed_locks