Debug CMP170 p.2
This commit is contained in:
parent
73b9bacbd8
commit
35104eff6a
|
@ -261,7 +261,7 @@ def get_gpu_info():
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
nvidia_smi_return_code, nvidia_smi_stdout, nvidia_smi_stderr = utils.run_command(f"nvidia-smi --query-gpu=index,name,uuid,serial,memory.total --format=csv")
|
nvidia_smi_return_code, nvidia_smi_stdout, nvidia_smi_stderr = utils.run_command(f"nvidia-smi --query-gpu=index,name,uuid,serial,memory.total,pci.device_id --format=csv")
|
||||||
nvidia_smi_xl_return_code, nvidia_smi_xl_stdout, nvidia_smi_xl_stderr = utils.run_command("nvidia-smi --query-gpu=timestamp,name,pci.bus_id,driver_version,pstate,pcie.link.gen.max,pcie.link.gen.current,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used,pci.device_id --format=csv")
|
nvidia_smi_xl_return_code, nvidia_smi_xl_stdout, nvidia_smi_xl_stderr = utils.run_command("nvidia-smi --query-gpu=timestamp,name,pci.bus_id,driver_version,pstate,pcie.link.gen.max,pcie.link.gen.current,temperature.gpu,utilization.gpu,utilization.memory,memory.total,memory.free,memory.used,pci.device_id --format=csv")
|
||||||
|
|
||||||
if "Failed to initialize NVML" in nvidia_smi_stdout or "Failed to initialize NVML" in nvidia_smi_stderr or "Failed to initialize NVML" in nvidia_smi_xl_stdout or "Failed to initialize NVML" in nvidia_smi_xl_stderr:
|
if "Failed to initialize NVML" in nvidia_smi_stdout or "Failed to initialize NVML" in nvidia_smi_stderr or "Failed to initialize NVML" in nvidia_smi_xl_stdout or "Failed to initialize NVML" in nvidia_smi_xl_stderr:
|
||||||
|
@ -272,20 +272,14 @@ def get_gpu_info():
|
||||||
for index, line in enumerate(lines_xl):
|
for index, line in enumerate(lines_xl):
|
||||||
parts = [s.strip() for s in line.split(',')]
|
parts = [s.strip() for s in line.split(',')]
|
||||||
if len(parts)>12 and index>0:
|
if len(parts)>12 and index>0:
|
||||||
gpu_name = parts[1]
|
gpu_name_xl = parts[1]
|
||||||
print("gpu name 1")
|
if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in GPU_ID_TO_NAME:
|
||||||
print(gpu_name)
|
gpu_name_xl = GPU_ID_TO_NAME[parts[13]]
|
||||||
print("part 13")
|
|
||||||
print(parts[13])
|
|
||||||
if gpu_name == "NVIDIA Graphics Device" and parts[13] in GPU_ID_TO_NAME:
|
|
||||||
gpu_name = GPU_ID_TO_NAME[parts[13]]
|
|
||||||
print("gpu name 2")
|
|
||||||
print(gpu_name)
|
|
||||||
|
|
||||||
xl_gpu_info={
|
xl_gpu_info={
|
||||||
"id":index-1,
|
"id":index-1,
|
||||||
"timestamp": parts[0],
|
"timestamp": parts[0],
|
||||||
"name": gpu_name,
|
"name": gpu_name_xl,
|
||||||
"pcie_bus": parts[2].split(':', 1)[1],
|
"pcie_bus": parts[2].split(':', 1)[1],
|
||||||
"driver": parts[3],
|
"driver": parts[3],
|
||||||
"pstate": parts[4],
|
"pstate": parts[4],
|
||||||
|
@ -296,6 +290,7 @@ def get_gpu_info():
|
||||||
"mem_free": parts[11],
|
"mem_free": parts[11],
|
||||||
"mem_used": parts[12]
|
"mem_used": parts[12]
|
||||||
}
|
}
|
||||||
|
print(xl_gpu_info)
|
||||||
try:
|
try:
|
||||||
pci_query = parts[2][parts[2].find(':')+1:]
|
pci_query = parts[2][parts[2].find(':')+1:]
|
||||||
for index, valid_pci_dev in enumerate(valid_pci_dev_list):
|
for index, valid_pci_dev in enumerate(valid_pci_dev_list):
|
||||||
|
@ -311,7 +306,12 @@ def get_gpu_info():
|
||||||
for line in lines:
|
for line in lines:
|
||||||
parts = line.split(',')
|
parts = line.split(',')
|
||||||
if bool(re.match(r'^[0-9]+$', parts[0])):
|
if bool(re.match(r'^[0-9]+$', parts[0])):
|
||||||
gpu_str = f"{len(lines)-1}x {parts[1].strip()}"
|
|
||||||
|
gpu_name = parts[1]
|
||||||
|
if gpu_name == "NVIDIA Graphics Device" and parts[5] in GPU_ID_TO_NAME:
|
||||||
|
gpu_name = GPU_ID_TO_NAME[parts[5]]
|
||||||
|
|
||||||
|
gpu_str = f"{len(lines)-1}x {gpu_name.strip()}"
|
||||||
gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2)
|
gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
nvml_err=True
|
nvml_err=True
|
||||||
|
|
Loading…
Reference in New Issue