From 9ec9a14a0e8a80f4e4431f300226a81f9dc70834 Mon Sep 17 00:00:00 2001 From: clore Date: Sat, 1 Mar 2025 02:35:45 +0000 Subject: [PATCH] image prefixes to cache, volumes --- clore_hosting/main.py | 11 ++++++++++- lib/docker_deploy.py | 25 ++++++++++++++++++++----- lib/docker_interface.py | 7 ++++++- 3 files changed, 36 insertions(+), 7 deletions(-) diff --git a/clore_hosting/main.py b/clore_hosting/main.py index 988b965..f110b9f 100644 --- a/clore_hosting/main.py +++ b/clore_hosting/main.py @@ -151,6 +151,7 @@ class CloreClient: self.start_time = utils.unix_timestamp() self.runned_pull_selftest = False + self.image_cache_allowed_prefixes = [] WebSocketClient.set_gpu_list(nvml.get_gpu_name_list()) WebSocketClient.set_is_hive(self.is_hive) @@ -324,6 +325,11 @@ class CloreClient: image_needed = True del self.last_pull_progress[local_image] break + for image_needed_prefix in self.image_cache_allowed_prefixes: + if local_image[:len(image_needed_prefix)] == image_needed_prefix: + image_needed = True + del self.last_pull_progress[local_image] + break if not image_needed and removed_cnt < config.max_remove_images_per_run and config.delete_unused_containers and partner_images != None: log.success(f"GOING TO REMOVE {local_image}") with concurrent.futures.ThreadPoolExecutor() as pool: @@ -409,10 +415,13 @@ class CloreClient: tmp_images = [] is_order_spot = False + self.image_cache_allowed_prefixes=[] for idx, container in enumerate(self.containers): if "spot" in container: is_order_spot = True + if "allow_image_cache_prefix" in container: + self.image_cache_allowed_prefixes.append(container["allow_image_cache_prefix"]) if "image" in container and "image" in container and container["image"]!="cloreai/hive-use-flightsheet": log_pull = False if "name" in container: @@ -519,7 +528,7 @@ class CloreClient: async def submit_specs(self, current_specs): try: if type(current_specs) == dict: - current_specs["backend_version"]=21 + current_specs["backend_version"]=22 current_specs["update_hw"]=True smallest_pcie_width = 999 for gpu in current_specs["gpus"]["nvidia"]: diff --git a/lib/docker_deploy.py b/lib/docker_deploy.py index 5304293..bcf215b 100644 --- a/lib/docker_deploy.py +++ b/lib/docker_deploy.py @@ -19,11 +19,6 @@ log = logging_lib.log def deploy(validated_containers, allowed_running_containers=[], can_run_partner_workloads=False): local_images = docker_interface.get_local_images() all_containers = docker_interface.get_containers(all=True) - - is_hive = "hive" in get_specs.get_kernel() - - # Deploy wireguard first - wireguard_containers = [] rest_containers = [] for container in validated_containers: @@ -40,6 +35,12 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_ needed_running_names = [] paused_names = [] + all_use_volumes = [] + local_volume_list = docker_interface.list_volumes() + clore_volume_list = [] + for volume in local_volume_list: + if volume.name[:6]=="clore_": + clore_volume_list.append(volume.name) created_container_names = [] for container in all_containers: @@ -62,6 +63,12 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_ else: needed_running_names.append(validated_container["name"]) + if "mandatory_volumes" in validated_container: + for volume_name in validated_container["mandatory_volumes"]: + if volume_name[:6] == "clore_" and not volume_name in clore_volume_list: + docker_interface.create_volume(volume_name) + all_use_volumes += validated_container["mandatory_volumes"] + container_options = { 'image': validated_container["image"], 'name': validated_container["name"], @@ -162,6 +169,14 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_ except Exception as e: log.debug(f"Container creation issue | {e}") pass + + all_use_volumes=list(dict.fromkeys(all_use_volumes)) + for volume in local_volume_list: + if volume.name[:6]=="clore_" and not volume in all_use_volumes: + try: + volume.remove() + except Exception as e: + pass all_running_container_names = [] all_stopped_container_names = [] diff --git a/lib/docker_interface.py b/lib/docker_interface.py index de27b52..38f64b4 100644 --- a/lib/docker_interface.py +++ b/lib/docker_interface.py @@ -443,4 +443,9 @@ def configure_exec_opts(key="native.cgroupdriver", value="cgroupfs"): def is_docker_default_name_lenient(container_name): # Not a perfect solution, but it will do the job, pattern = r'^[a-z]+_[a-z]+$' - return re.match(pattern, container_name) is not None \ No newline at end of file + return re.match(pattern, container_name) is not None + +def list_volumes(): + return client.volumes.list() +def create_volume(volume_name): + client.volumes.create(name=volume_name) \ No newline at end of file