remove update_driver_550_flag in all cases on Hive after restart of clore-hosting, pull selftest only on driver 550+ machines
This commit is contained in:
parent
41fdc2814d
commit
7b0e19141e
|
@ -38,6 +38,10 @@ elif config.service:
|
|||
utils.run_command("systemctl stop docker && PATH=/hive/bin:/hive/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:./ nvidia-driver-update http://45.12.132.34/NVIDIA-Linux-x86_64-550.135.run --force")
|
||||
utils.run_command("systemctl restart docker")
|
||||
os.remove("/opt/clore-hosting/.run_hive_driver_update")
|
||||
try:
|
||||
os.remove(config.update_driver_550_flag)
|
||||
except Exception as e:
|
||||
pass
|
||||
if os.path.isfile(config.restart_docker_flag_file):
|
||||
utils.run_command("systemctl restart docker")
|
||||
os.remove(config.restart_docker_flag_file)
|
||||
|
|
|
@ -250,15 +250,17 @@ async def check_to_pull_selftest(current_specs):
|
|||
gpu_total_vram = 0
|
||||
gpu_name = ''
|
||||
mixed_cards = False
|
||||
driver_version = 0
|
||||
for idx, nvidia_gpu in enumerate(current_specs["gpus"]["nvidia"]):
|
||||
if idx > 0 and nvidia_gpu["name"] != gpu_name:
|
||||
mixed_cards = True
|
||||
gpu_name = nvidia_gpu["name"]
|
||||
driver_version = int(nvidia_gpu["driver"].split('.')[0])
|
||||
if nvidia_gpu["pcie_width"] < min_width:
|
||||
min_width = nvidia_gpu["pcie_width"]
|
||||
if " MiB" in nvidia_gpu["mem_total"]:
|
||||
gpu_total_vram += int(nvidia_gpu["mem_total"].replace(" MiB", ''))
|
||||
if gpu_name in auto_pull_selftest_gpus and current_specs["ram"] > 7 and int(current_specs["cpus"].split('/')[0]) >= 4 and not mixed_cards and min_width > 1 and gpu_total_vram < current_specs["ram"] * 1024 and float(current_specs["disk"].split(' ')[-1].replace("GB", '')) > 25:
|
||||
if driver_version >= 550 and gpu_name in auto_pull_selftest_gpus and current_specs["ram"] > 7 and int(current_specs["cpus"].split('/')[0]) >= 4 and not mixed_cards and min_width > 1 and gpu_total_vram < current_specs["ram"] * 1024 and float(current_specs["disk"].split(' ')[-1].replace("GB", '')) > 25:
|
||||
await utils.async_run_command("docker pull vastai/test:selftest", 14400, non_interactive_env)
|
||||
except Exception as e:
|
||||
pass
|
Loading…
Reference in New Issue