dev: fixing oc
This commit is contained in:
parent
cc9941db02
commit
260ee6f18f
|
@ -537,8 +537,6 @@ class CloreClient:
|
|||
cpu_usage = await get_specs.get_cpu_usage()
|
||||
ram_usage = await get_specs.get_ram_usage()
|
||||
gpu_list = current_specs["gpus"]["nvidia"]+current_specs["gpus"]["amd"]
|
||||
print("realtime gpus")
|
||||
print(gpu_list)
|
||||
submit_document = {
|
||||
"update_realtime_data":True,
|
||||
"gpus": gpu_list,
|
||||
|
@ -559,12 +557,8 @@ class CloreClient:
|
|||
print(current_specs)
|
||||
if self.last_hw_specs_submit < (utils.unix_timestamp()-1800):
|
||||
self.last_hw_specs_submit=utils.unix_timestamp()
|
||||
print("submit specs start")
|
||||
await self.submit_specs(current_specs)
|
||||
print("submit specs end")
|
||||
print("update realtime start")
|
||||
await self.update_realtime_data(current_specs)
|
||||
print("update realtime end")
|
||||
try:
|
||||
if self.xfs_state == "active" and len(current_specs["gpus"]["nvidia"]) > 0 and not self.runned_pull_selftest:
|
||||
await clore_partner.check_to_pull_selftest(current_specs)
|
||||
|
|
|
@ -0,0 +1,4 @@
|
|||
GPU_ID_TO_NAME = {
|
||||
"0x20C210DE": "NVIDIA CMP 170HX",
|
||||
"0x208210DE": "NVIDIA CMP 170HX"
|
||||
}
|
|
@ -4,6 +4,7 @@ import xml.etree.ElementTree as ET
|
|||
from lib import docker_interface
|
||||
from typing import Dict, List, Optional
|
||||
from lib import utils
|
||||
from lib import constants
|
||||
import subprocess
|
||||
import speedtest
|
||||
import platform
|
||||
|
@ -241,11 +242,6 @@ def get_bus_spec(bus_id):
|
|||
return PCIBusInfo()
|
||||
|
||||
def get_gpu_info():
|
||||
GPU_ID_TO_NAME = {
|
||||
"0x20C210DE": "NVIDIA CMP 170HX",
|
||||
"0x208210DE": "NVIDIA CMP 170HX"
|
||||
}
|
||||
|
||||
gpu_str = "0x Unknown"
|
||||
nvml_err = False
|
||||
gpu_mem = 0
|
||||
|
@ -273,8 +269,8 @@ def get_gpu_info():
|
|||
parts = [s.strip() for s in line.split(',')]
|
||||
if len(parts)>12 and index>0:
|
||||
gpu_name_xl = parts[1]
|
||||
if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in GPU_ID_TO_NAME:
|
||||
gpu_name_xl = GPU_ID_TO_NAME[parts[13]]
|
||||
if gpu_name_xl == "NVIDIA Graphics Device" and parts[13] in constants.GPU_ID_TO_NAME:
|
||||
gpu_name_xl = constants.GPU_ID_TO_NAME[parts[13]]
|
||||
|
||||
xl_gpu_info={
|
||||
"id":index-1,
|
||||
|
@ -309,8 +305,8 @@ def get_gpu_info():
|
|||
|
||||
gpu_name = parts[1].strip()
|
||||
gpu_id = parts[5].strip();
|
||||
if gpu_name == "NVIDIA Graphics Device" and gpu_id in GPU_ID_TO_NAME:
|
||||
gpu_name = GPU_ID_TO_NAME[gpu_id]
|
||||
if gpu_name == "NVIDIA Graphics Device" and gpu_id in constants.GPU_ID_TO_NAME:
|
||||
gpu_name = constants.GPU_ID_TO_NAME[gpu_id]
|
||||
|
||||
gpu_str = f"{len(lines)-1}x {gpu_name}"
|
||||
gpu_mem = round(int(filter_non_numeric(parts[4]).strip())/1024, 2)
|
||||
|
|
10
lib/nvml.py
10
lib/nvml.py
|
@ -1,6 +1,7 @@
|
|||
from lib import config as config_module
|
||||
from lib import logging as logging_lib
|
||||
from lib import get_specs
|
||||
from lib import constants
|
||||
|
||||
config = config_module.config
|
||||
log = logging_lib.log
|
||||
|
@ -97,6 +98,15 @@ def init(gpu_specs_file=None, allow_hive_binaries=True):
|
|||
break
|
||||
gpu_handle = pynvml.nvmlDeviceGetHandleByIndex(i)
|
||||
gpu_uuid = pynvml.nvmlDeviceGetUUID(gpu_handle)
|
||||
|
||||
|
||||
print("name")
|
||||
print(pynvml.nvmlDeviceGetName(gpu_handle))
|
||||
print("device_id")
|
||||
pci_info = pynvml.nvmlDeviceGetPciInfo(gpu_handle)
|
||||
print(pci_info.pciDeviceId)
|
||||
|
||||
|
||||
gpu_name_list.append(pynvml.nvmlDeviceGetName(gpu_handle))
|
||||
if not f"{i}-{gpu_uuid}" in parsed_specs_keys:
|
||||
parsed_specs={}
|
||||
|
|
Loading…
Reference in New Issue