image prefixes to cache, volumes
This commit is contained in:
parent
6665aa8fbb
commit
9ec9a14a0e
|
@ -151,6 +151,7 @@ class CloreClient:
|
||||||
self.start_time = utils.unix_timestamp()
|
self.start_time = utils.unix_timestamp()
|
||||||
|
|
||||||
self.runned_pull_selftest = False
|
self.runned_pull_selftest = False
|
||||||
|
self.image_cache_allowed_prefixes = []
|
||||||
|
|
||||||
WebSocketClient.set_gpu_list(nvml.get_gpu_name_list())
|
WebSocketClient.set_gpu_list(nvml.get_gpu_name_list())
|
||||||
WebSocketClient.set_is_hive(self.is_hive)
|
WebSocketClient.set_is_hive(self.is_hive)
|
||||||
|
@ -324,6 +325,11 @@ class CloreClient:
|
||||||
image_needed = True
|
image_needed = True
|
||||||
del self.last_pull_progress[local_image]
|
del self.last_pull_progress[local_image]
|
||||||
break
|
break
|
||||||
|
for image_needed_prefix in self.image_cache_allowed_prefixes:
|
||||||
|
if local_image[:len(image_needed_prefix)] == image_needed_prefix:
|
||||||
|
image_needed = True
|
||||||
|
del self.last_pull_progress[local_image]
|
||||||
|
break
|
||||||
if not image_needed and removed_cnt < config.max_remove_images_per_run and config.delete_unused_containers and partner_images != None:
|
if not image_needed and removed_cnt < config.max_remove_images_per_run and config.delete_unused_containers and partner_images != None:
|
||||||
log.success(f"GOING TO REMOVE {local_image}")
|
log.success(f"GOING TO REMOVE {local_image}")
|
||||||
with concurrent.futures.ThreadPoolExecutor() as pool:
|
with concurrent.futures.ThreadPoolExecutor() as pool:
|
||||||
|
@ -409,10 +415,13 @@ class CloreClient:
|
||||||
tmp_images = []
|
tmp_images = []
|
||||||
|
|
||||||
is_order_spot = False
|
is_order_spot = False
|
||||||
|
self.image_cache_allowed_prefixes=[]
|
||||||
|
|
||||||
for idx, container in enumerate(self.containers):
|
for idx, container in enumerate(self.containers):
|
||||||
if "spot" in container:
|
if "spot" in container:
|
||||||
is_order_spot = True
|
is_order_spot = True
|
||||||
|
if "allow_image_cache_prefix" in container:
|
||||||
|
self.image_cache_allowed_prefixes.append(container["allow_image_cache_prefix"])
|
||||||
if "image" in container and "image" in container and container["image"]!="cloreai/hive-use-flightsheet":
|
if "image" in container and "image" in container and container["image"]!="cloreai/hive-use-flightsheet":
|
||||||
log_pull = False
|
log_pull = False
|
||||||
if "name" in container:
|
if "name" in container:
|
||||||
|
@ -519,7 +528,7 @@ class CloreClient:
|
||||||
async def submit_specs(self, current_specs):
|
async def submit_specs(self, current_specs):
|
||||||
try:
|
try:
|
||||||
if type(current_specs) == dict:
|
if type(current_specs) == dict:
|
||||||
current_specs["backend_version"]=21
|
current_specs["backend_version"]=22
|
||||||
current_specs["update_hw"]=True
|
current_specs["update_hw"]=True
|
||||||
smallest_pcie_width = 999
|
smallest_pcie_width = 999
|
||||||
for gpu in current_specs["gpus"]["nvidia"]:
|
for gpu in current_specs["gpus"]["nvidia"]:
|
||||||
|
|
|
@ -19,11 +19,6 @@ log = logging_lib.log
|
||||||
def deploy(validated_containers, allowed_running_containers=[], can_run_partner_workloads=False):
|
def deploy(validated_containers, allowed_running_containers=[], can_run_partner_workloads=False):
|
||||||
local_images = docker_interface.get_local_images()
|
local_images = docker_interface.get_local_images()
|
||||||
all_containers = docker_interface.get_containers(all=True)
|
all_containers = docker_interface.get_containers(all=True)
|
||||||
|
|
||||||
is_hive = "hive" in get_specs.get_kernel()
|
|
||||||
|
|
||||||
# Deploy wireguard first
|
|
||||||
|
|
||||||
wireguard_containers = []
|
wireguard_containers = []
|
||||||
rest_containers = []
|
rest_containers = []
|
||||||
for container in validated_containers:
|
for container in validated_containers:
|
||||||
|
@ -40,6 +35,12 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_
|
||||||
|
|
||||||
needed_running_names = []
|
needed_running_names = []
|
||||||
paused_names = []
|
paused_names = []
|
||||||
|
all_use_volumes = []
|
||||||
|
local_volume_list = docker_interface.list_volumes()
|
||||||
|
clore_volume_list = []
|
||||||
|
for volume in local_volume_list:
|
||||||
|
if volume.name[:6]=="clore_":
|
||||||
|
clore_volume_list.append(volume.name)
|
||||||
|
|
||||||
created_container_names = []
|
created_container_names = []
|
||||||
for container in all_containers:
|
for container in all_containers:
|
||||||
|
@ -62,6 +63,12 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_
|
||||||
else:
|
else:
|
||||||
needed_running_names.append(validated_container["name"])
|
needed_running_names.append(validated_container["name"])
|
||||||
|
|
||||||
|
if "mandatory_volumes" in validated_container:
|
||||||
|
for volume_name in validated_container["mandatory_volumes"]:
|
||||||
|
if volume_name[:6] == "clore_" and not volume_name in clore_volume_list:
|
||||||
|
docker_interface.create_volume(volume_name)
|
||||||
|
all_use_volumes += validated_container["mandatory_volumes"]
|
||||||
|
|
||||||
container_options = {
|
container_options = {
|
||||||
'image': validated_container["image"],
|
'image': validated_container["image"],
|
||||||
'name': validated_container["name"],
|
'name': validated_container["name"],
|
||||||
|
@ -162,6 +169,14 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"Container creation issue | {e}")
|
log.debug(f"Container creation issue | {e}")
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
all_use_volumes=list(dict.fromkeys(all_use_volumes))
|
||||||
|
for volume in local_volume_list:
|
||||||
|
if volume.name[:6]=="clore_" and not volume in all_use_volumes:
|
||||||
|
try:
|
||||||
|
volume.remove()
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
all_running_container_names = []
|
all_running_container_names = []
|
||||||
all_stopped_container_names = []
|
all_stopped_container_names = []
|
||||||
|
|
|
@ -443,4 +443,9 @@ def configure_exec_opts(key="native.cgroupdriver", value="cgroupfs"):
|
||||||
|
|
||||||
def is_docker_default_name_lenient(container_name): # Not a perfect solution, but it will do the job,
|
def is_docker_default_name_lenient(container_name): # Not a perfect solution, but it will do the job,
|
||||||
pattern = r'^[a-z]+_[a-z]+$'
|
pattern = r'^[a-z]+_[a-z]+$'
|
||||||
return re.match(pattern, container_name) is not None
|
return re.match(pattern, container_name) is not None
|
||||||
|
|
||||||
|
def list_volumes():
|
||||||
|
return client.volumes.list()
|
||||||
|
def create_volume(volume_name):
|
||||||
|
client.volumes.create(name=volume_name)
|
Loading…
Reference in New Issue