dev: debug
This commit is contained in:
parent
daa634bfa9
commit
96bd92e5f0
|
@ -554,10 +554,13 @@ class CloreClient:
|
|||
try:
|
||||
await monitoring.put("specs_service")
|
||||
current_specs = await specs.get()
|
||||
print(current_specs)
|
||||
if self.last_hw_specs_submit < (utils.unix_timestamp()-1800):
|
||||
self.last_hw_specs_submit=utils.unix_timestamp()
|
||||
await self.submit_specs(current_specs)
|
||||
print("submit specs")
|
||||
await self.update_realtime_data(current_specs)
|
||||
print("update realtime")
|
||||
try:
|
||||
if self.xfs_state == "active" and len(current_specs["gpus"]["nvidia"]) > 0 and not self.runned_pull_selftest:
|
||||
await clore_partner.check_to_pull_selftest(current_specs)
|
||||
|
|
|
@ -291,8 +291,6 @@ def get_gpu_info():
|
|||
"mem_used": parts[12]
|
||||
}
|
||||
|
||||
print(xl_gpu_info)
|
||||
|
||||
try:
|
||||
pci_query = parts[2][parts[2].find(':')+1:]
|
||||
for index, valid_pci_dev in enumerate(valid_pci_dev_list):
|
||||
|
@ -302,8 +300,6 @@ def get_gpu_info():
|
|||
xl_gpu_info["pcie_width"]=bus_spec.width
|
||||
xl_gpu_info["pcie_revision"]=bus_spec.revision
|
||||
except Exception as e:
|
||||
print("err")
|
||||
print(e)
|
||||
pass
|
||||
gpus["nvidia"].append(xl_gpu_info)
|
||||
lines = nvidia_smi_stdout.split('\n')
|
||||
|
@ -313,21 +309,13 @@ def get_gpu_info():
|
|||
|
||||
gpu_name = parts[1].strip()
|
||||
gpu_id = parts[5].strip();
|
||||
print("parts")
|
||||
print(parts)
|
||||
print("p1:" + gpu_name)
|
||||
print("p1:" + gpu_id)
|
||||
if gpu_name == "NVIDIA Graphics Device" and gpu_id in GPU_ID_TO_NAME:
|
||||
gpu_name = GPU_ID_TO_NAME[gpu_id]
|
||||
print("p2:" + gpu_name)
|
||||
|
||||
gpu_str = f"{len(lines)-1}x {gpu_name}"
|
||||
print(gpu_str)
|
||||
gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2)
|
||||
except Exception as e:
|
||||
nvml_err=True
|
||||
print("err2")
|
||||
print(e)
|
||||
pass
|
||||
else:
|
||||
nvml_err=True
|
||||
|
|
Loading…
Reference in New Issue