diff --git a/clore_hosting/main.py b/clore_hosting/main.py index 6e78ad5..0e05e24 100644 --- a/clore_hosting/main.py +++ b/clore_hosting/main.py @@ -537,8 +537,6 @@ class CloreClient: cpu_usage = await get_specs.get_cpu_usage() ram_usage = await get_specs.get_ram_usage() gpu_list = current_specs["gpus"]["nvidia"]+current_specs["gpus"]["amd"] - print("realtime gpus") - print(gpu_list) submit_document = { "update_realtime_data":True, "gpus": gpu_list, @@ -559,12 +557,8 @@ class CloreClient: print(current_specs) if self.last_hw_specs_submit < (utils.unix_timestamp()-1800): self.last_hw_specs_submit=utils.unix_timestamp() - print("submit specs start") await self.submit_specs(current_specs) - print("submit specs end") - print("update realtime start") await self.update_realtime_data(current_specs) - print("update realtime end") try: if self.xfs_state == "active" and len(current_specs["gpus"]["nvidia"]) > 0 and not self.runned_pull_selftest: await clore_partner.check_to_pull_selftest(current_specs) diff --git a/lib/constants.py b/lib/constants.py new file mode 100644 index 0000000..dc317f0 --- /dev/null +++ b/lib/constants.py @@ -0,0 +1,4 @@ +GPU_ID_TO_NAME = { + "0x20C210DE": "NVIDIA CMP 170HX", + "0x208210DE": "NVIDIA CMP 170HX" +} diff --git a/lib/get_specs.py b/lib/get_specs.py index ff05d74..fae4ce9 100644 --- a/lib/get_specs.py +++ b/lib/get_specs.py @@ -4,6 +4,7 @@ import xml.etree.ElementTree as ET from lib import docker_interface from typing import Dict, List, Optional from lib import utils +from lib import constants import subprocess import speedtest import platform @@ -241,11 +242,6 @@ def get_bus_spec(bus_id): return PCIBusInfo() def get_gpu_info(): - GPU_ID_TO_NAME = { - "0x20C210DE": "NVIDIA CMP 170HX", - "0x208210DE": "NVIDIA CMP 170HX" - } - gpu_str = "0x Unknown" nvml_err = False gpu_mem = 0 @@ -273,8 +269,8 @@ def get_gpu_info(): parts = [s.strip() for s in line.split(',')] if len(parts)>12 and index>0: gpu_name_xl = parts[1] - if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in GPU_ID_TO_NAME: - gpu_name_xl = GPU_ID_TO_NAME[parts[13]] + if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in constants.GPU_ID_TO_NAME: + gpu_name_xl = constants.GPU_ID_TO_NAME[parts[13]] xl_gpu_info={ "id":index-1, @@ -309,8 +305,8 @@ def get_gpu_info(): gpu_name = parts[1].strip() gpu_id = parts[5].strip(); - if gpu_name == "NVIDIA Graphics Device" and gpu_id in GPU_ID_TO_NAME: - gpu_name = GPU_ID_TO_NAME[gpu_id] + if gpu_name == "NVIDIA Graphics Device" and gpu_id in constants.GPU_ID_TO_NAME: + gpu_name = constants.GPU_ID_TO_NAME[gpu_id] gpu_str = f"{len(lines)-1}x {gpu_name}" gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2) diff --git a/lib/nvml.py b/lib/nvml.py index 59dade3..b44b792 100644 --- a/lib/nvml.py +++ b/lib/nvml.py @@ -1,6 +1,7 @@ from lib import config as config_module from lib import logging as logging_lib from lib import get_specs +from lib import constants config = config_module.config log = logging_lib.log @@ -97,6 +98,15 @@ def init(gpu_specs_file=None, allow_hive_binaries=True): break gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i) gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle) + + + print("name") + print(pynvml.nvmlDeviceGetName(gpu_handle)) + print("device_id") + pci_info = pynvml.nvmlDeviceGetPciInfo(gpu_handle) + print(pci_info.pciDeviceId) + + gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle)) if not f"{i}-{gpu_uuid}" in parsed_specs_keys: parsed_specs={}