From 7b0e19141ebe3dd82a78640ab2abed1bbd420e6c Mon Sep 17 00:00:00 2001 From: clore Date: Wed, 11 Dec 2024 08:59:14 +0000 Subject: [PATCH] remove update_driver_550_flag in all cases on Hive after restart of clore-hosting, pull selftest only on driver 550+ machines --- hosting.py | 4 ++++ lib/clore_partner.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hosting.py b/hosting.py index b9d53bb..38d2c3f 100644 --- a/hosting.py +++ b/hosting.py @@ -38,6 +38,10 @@ elif config.service: utils.run_command("systemctl stop docker && PATH=/hive/bin:/hive/sbin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:./ nvidia-driver-update http://45.12.132.34/NVIDIA-Linux-x86_64-550.135.run --force") utils.run_command("systemctl restart docker") os.remove("/opt/clore-hosting/.run_hive_driver_update") + try: + os.remove(config.update_driver_550_flag) + except Exception as e: + pass if os.path.isfile(config.restart_docker_flag_file): utils.run_command("systemctl restart docker") os.remove(config.restart_docker_flag_file) diff --git a/lib/clore_partner.py b/lib/clore_partner.py index 20d397a..646eaf1 100644 --- a/lib/clore_partner.py +++ b/lib/clore_partner.py @@ -250,15 +250,17 @@ async def check_to_pull_selftest(current_specs): gpu_total_vram = 0 gpu_name = '' mixed_cards = False + driver_version = 0 for idx, nvidia_gpu in enumerate(current_specs["gpus"]["nvidia"]): if idx > 0 and nvidia_gpu["name"] != gpu_name: mixed_cards = True gpu_name = nvidia_gpu["name"] + driver_version = int(nvidia_gpu["driver"].split('.')[0]) if nvidia_gpu["pcie_width"] < min_width: min_width = nvidia_gpu["pcie_width"] if " MiB" in nvidia_gpu["mem_total"]: gpu_total_vram += int(nvidia_gpu["mem_total"].replace(" MiB", '')) - if gpu_name in auto_pull_selftest_gpus and current_specs["ram"] > 7 and int(current_specs["cpus"].split('/')[0]) >= 4 and not mixed_cards and min_width > 1 and gpu_total_vram < current_specs["ram"] * 1024 and float(current_specs["disk"].split(' ')[-1].replace("GB", '')) > 25: + if driver_version >= 550 and gpu_name in auto_pull_selftest_gpus and current_specs["ram"] > 7 and int(current_specs["cpus"].split('/')[0]) >= 4 and not mixed_cards and min_width > 1 and gpu_total_vram < current_specs["ram"] * 1024 and float(current_specs["disk"].split(' ')[-1].replace("GB", '')) > 25: await utils.async_run_command("docker pull vastai/test:selftest", 14400, non_interactive_env) except Exception as e: pass \ No newline at end of file