dev: more debug

This commit is contained in:
empresa 2025-10-16 01:50:41 +07:00
parent d908fb043d
commit bae3d395f9
1 changed files with 12 additions and 8 deletions

View File

@ -99,15 +99,13 @@ def init(gpu_specs_file=None, allow_hive_binaries=True):
gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i)
gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle)
print("name")
print(pynvml.nvmlDeviceGetName(gpu_handle))
print("device_id")
pci_info = pynvml.nvmlDeviceGetPciInfo(gpu_handle)
print(hex(pci_info.pciDeviceId))
gpu_name = pynvml.nvmlDeviceGetName(gpu_handle)
gpu_device_id = hex(pynvml.nvmlDeviceGetPciInfo(gpu_handle).pciDeviceId).upper()
if gpu_name == "NVIDIA Graphics Device" and gpu_device_id in constants.GPU_ID_TO_NAME:
gpu_name = constants.GPU_ID_TO_NAME[gpu_device_id]
gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle))
gpu_name_list.append(gpu_name)
if not f"{i}-{gpu_uuid}" in parsed_specs_keys:
parsed_specs={}
regenerate_specs=True
@ -128,7 +126,13 @@ def init(gpu_specs_file=None, allow_hive_binaries=True):
max_power_limit = int(power_limits[1] / 1000.0)
gpu_spec["default_power_limit"] = int(pynvml.nvmlDeviceGetPowerManagementDefaultLimit(gpu_handle) / 1000.0)
gpu_spec["power_limits"] = [min_power_limit, max_power_limit]
gpu_spec["name"] = pynvml.nvmlDeviceGetName(gpu_handle)
gpu_name_regen = pynvml.nvmlDeviceGetName(gpu_handle)
gpu_device_id_regen = hex(pynvml.nvmlDeviceGetPciInfo(gpu_handle).pciDeviceId).upper()
if gpu_name_regen == "NVIDIA Graphics Device" and gpu_device_id_regen in constants.GPU_ID_TO_NAME:
gpu_name_regen = constants.GPU_ID_TO_NAME[gpu_device_id_regen]
gpu_spec["name"] = gpu_name_regen
gpu_name_list.append(gpu_spec["name"])
gpu_spec["locks"] = mem_to_core_allowed_locks