dev: fixing oc

This commit is contained in:
empresa 2025-10-16 01:43:56 +07:00
parent cc9941db02
commit 260ee6f18f
4 changed files with 19 additions and 15 deletions

View File

@ -537,8 +537,6 @@ class CloreClient:
cpu_usage = await get_specs.get_cpu_usage() cpu_usage = await get_specs.get_cpu_usage()
ram_usage = await get_specs.get_ram_usage() ram_usage = await get_specs.get_ram_usage()
gpu_list = current_specs["gpus"]["nvidia"]+current_specs["gpus"]["amd"] gpu_list = current_specs["gpus"]["nvidia"]+current_specs["gpus"]["amd"]
print("realtime gpus")
print(gpu_list)
submit_document = { submit_document = {
"update_realtime_data":True, "update_realtime_data":True,
"gpus": gpu_list, "gpus": gpu_list,
@ -559,12 +557,8 @@ class CloreClient:
print(current_specs) print(current_specs)
if self.last_hw_specs_submit < (utils.unix_timestamp()-1800): if self.last_hw_specs_submit < (utils.unix_timestamp()-1800):
self.last_hw_specs_submit=utils.unix_timestamp() self.last_hw_specs_submit=utils.unix_timestamp()
print("submit specs start")
await self.submit_specs(current_specs) await self.submit_specs(current_specs)
print("submit specs end")
print("update realtime start")
await self.update_realtime_data(current_specs) await self.update_realtime_data(current_specs)
print("update realtime end")
try: try:
if self.xfs_state == "active" and len(current_specs["gpus"]["nvidia"]) > 0 and not self.runned_pull_selftest: if self.xfs_state == "active" and len(current_specs["gpus"]["nvidia"]) > 0 and not self.runned_pull_selftest:
await clore_partner.check_to_pull_selftest(current_specs) await clore_partner.check_to_pull_selftest(current_specs)

4
lib/constants.py Normal file
View File

@ -0,0 +1,4 @@
GPU_ID_TO_NAME = {
"0x20C210DE": "NVIDIA CMP 170HX",
"0x208210DE": "NVIDIA CMP 170HX"
}

View File

@ -4,6 +4,7 @@ import xml.etree.ElementTree as ET
from lib import docker_interface from lib import docker_interface
from typing import Dict, List, Optional from typing import Dict, List, Optional
from lib import utils from lib import utils
from lib import constants
import subprocess import subprocess
import speedtest import speedtest
import platform import platform
@ -241,11 +242,6 @@ def get_bus_spec(bus_id):
return PCIBusInfo() return PCIBusInfo()
def get_gpu_info(): def get_gpu_info():
GPU_ID_TO_NAME = {
"0x20C210DE": "NVIDIA CMP 170HX",
"0x208210DE": "NVIDIA CMP 170HX"
}
gpu_str = "0x Unknown" gpu_str = "0x Unknown"
nvml_err = False nvml_err = False
gpu_mem = 0 gpu_mem = 0
@ -273,8 +269,8 @@ def get_gpu_info():
parts = [s.strip() for s in line.split(',')] parts = [s.strip() for s in line.split(',')]
if len(parts)>12 and index>0: if len(parts)>12 and index>0:
gpu_name_xl = parts[1] gpu_name_xl = parts[1]
if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in GPU_ID_TO_NAME: if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in constants.GPU_ID_TO_NAME:
gpu_name_xl = GPU_ID_TO_NAME[parts[13]] gpu_name_xl = constants.GPU_ID_TO_NAME[parts[13]]
xl_gpu_info={ xl_gpu_info={
"id":index-1, "id":index-1,
@ -309,8 +305,8 @@ def get_gpu_info():
gpu_name = parts[1].strip() gpu_name = parts[1].strip()
gpu_id = parts[5].strip(); gpu_id = parts[5].strip();
if gpu_name == "NVIDIA Graphics Device" and gpu_id in GPU_ID_TO_NAME: if gpu_name == "NVIDIA Graphics Device" and gpu_id in constants.GPU_ID_TO_NAME:
gpu_name = GPU_ID_TO_NAME[gpu_id] gpu_name = constants.GPU_ID_TO_NAME[gpu_id]
gpu_str = f"{len(lines)-1}x {gpu_name}" gpu_str = f"{len(lines)-1}x {gpu_name}"
gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2) gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2)

View File

@ -1,6 +1,7 @@
from lib import config as config_module from lib import config as config_module
from lib import logging as logging_lib from lib import logging as logging_lib
from lib import get_specs from lib import get_specs
from lib import constants
config = config_module.config config = config_module.config
log = logging_lib.log log = logging_lib.log
@ -97,6 +98,15 @@ def init(gpu_specs_file=None, allow_hive_binaries=True):
break break
gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i) gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i)
gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle) gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle)
print("name")
print(pynvml.nvmlDeviceGetName(gpu_handle))
print("device_id")
pci_info = pynvml.nvmlDeviceGetPciInfo(gpu_handle)
print(pci_info.pciDeviceId)
gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle)) gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle))
if not f"{i}-{gpu_uuid}" in parsed_specs_keys: if not f"{i}-{gpu_uuid}" in parsed_specs_keys:
parsed_specs={} parsed_specs={}