hosting/lib/docker_deploy.py

212 lines
10 KiB
Python

from lib import config as config_module
from lib import logging as logging_lib
from lib import docker_cli_wrapper
from lib import background_job
from lib import docker_interface
from lib import clore_partner
from lib import get_specs
from lib import utils
import docker
from docker.types import EndpointConfig, NetworkingConfig
import os
shm_calculator = utils.shm_calculator(get_specs.get_total_ram_mb())
client = docker_interface.client
config = config_module.config
log = logging_lib.log
def deploy(validated_containers, allowed_running_containers=[], can_run_partner_workloads=False):
local_images = docker_interface.get_local_images()
all_containers = docker_interface.get_containers(all=True)
is_hive = "hive" in get_specs.get_kernel()
# Deploy wireguard first
wireguard_containers = []
rest_containers = []
for container in validated_containers:
if not "name" in container or not "image" in container:
pass
elif "wireguard" in container:
wireguard_containers.append(container)
else:
rest_containers.append(container)
validated_containers = wireguard_containers+rest_containers
# Deploy wireguard first
needed_running_names = []
paused_names = []
created_container_names = []
for container in all_containers:
if type(container.name)==str:
created_container_names.append(container.name)
for validated_container in validated_containers:
try:
SHM_SIZE = 64 # MB - default
image_ready = False
docker_gpus = None
for local_image in local_images:
if local_image.replace(':latest','')==validated_container["image"].replace(':latest',''):
image_ready = True
break
if "paused" in validated_container:
paused_names.append(validated_container["name"])
else:
needed_running_names.append(validated_container["name"])
container_options = {
'image': validated_container["image"],
'name': validated_container["name"],
'detach': True,
'tty': True,
'network_mode': 'clore-br0',
'cap_add': [],
'devices': [],
'security_opt': [],
'volumes': validated_container["volumes"] if "volumes" in validated_container else {},
'ports': {},
'device_requests': [],
'environment': validated_container["env"] if "env" in validated_container else {},
'log_config': docker.types.LogConfig(
type='json-file',
config={
'max-size': '5m',
'max-file': '1'
}
)
}
if "security_opt" in validated_container:
container_options["security_opt"] = validated_container["security_opt"]
if "devices" in validated_container:
container_options["devices"] = validated_container["devices"]
if "cap_add" in validated_container:
container_options["cap_add"] = validated_container["cap_add"]
if "hostname" in validated_container:
container_options["hostname"]=validated_container["hostname"]
elif "clore-order-" in validated_container["name"]:
try:
container_options["hostname"] = f"O-{int(validated_container["name"][12:])}"
except Exception as eon:
pass
if "network" in validated_container:
container_options["network_mode"]=validated_container["network"]
if "ip" in validated_container and config.creation_engine=="sdk":
del container_options["network_mode"]
if "gpus" in validated_container and type(validated_container["gpus"])==bool:
if "clore-order-" in validated_container["name"]:
SHM_SIZE = shm_calculator.calculate('*')
container_options["runtime"]="nvidia"
docker_gpus=True
container_options["device_requests"].append(docker.types.DeviceRequest(count=-1, capabilities=[['gpu']]))
elif "gpus" in validated_container and type(validated_container["gpus"])==list:
container_options["runtime"]="nvidia"
docker_gpus=validated_container["gpus"]
container_options["device_requests"].append(docker.types.DeviceRequest(
count=-1,
capabilities=[['gpu']],
device_ids=validated_container["gpus"]
))
if "wireguard" in validated_container:
wg_conf_dir = os.path.join(config.wireguard_config_folder, validated_container["name"])
container_options["cap_add"].append('NET_ADMIN')
container_options["cap_add"].append('SYS_MODULE')
container_options["volumes"]["/lib/modules"] = {'bind': '/lib/modules', 'mode': 'ro'}
container_options["volumes"][wg_conf_dir] = {'bind': '/config', 'mode': 'rw'}
elif "allow_vpn" in validated_container:
container_options["cap_add"].append('NET_ADMIN')
container_options["cap_add"].append('SYS_MODULE')
container_options["volumes"]["/lib/modules"] = {'bind': '/lib/modules', 'mode': 'ro'}
if "limited_disk" in validated_container and type(validated_container["limited_disk"])==str:
container_options["storage_opt"]={'size':validated_container["limited_disk"]}
if "ports" in validated_container and type(validated_container["ports"])==list:
for port in validated_container["ports"]:
if type(port)==str and ':' in port:
is_udp = True if "/udp" in port else False
port=port.replace('/udp','')
container_options["ports"][f"{port.split(':')[0]}/{'udp' if is_udp else 'tcp'}"]=int(port.split(':')[1])
if "custom_entrypoint" in validated_container:
entrypoint_file_name = f"{validated_container["name"]}.sh"
entrypoint_full_path = os.path.join(config.entrypoints_folder, entrypoint_file_name)
container_options["volumes"][entrypoint_full_path] = {'bind': '/etc/order_entrypoint.sh', 'mode': 'ro'}
container_options["entrypoint"]='/etc/order_entrypoint.sh'
elif "entrypoint_command" in validated_container and type(validated_container["entrypoint_command"])==str and len(validated_container["entrypoint_command"])>0:
container_options["entrypoint"]=validated_container["entrypoint_command"]
container_options["shm_size"] = f"{SHM_SIZE}m"
if not validated_container["name"] in created_container_names and image_ready and not (not background_job.is_enabled() and background_job.is_background_job_container_name(validated_container["name"])):
if config.creation_engine == "wrapper":
docker_cli_wrapper.create_container(container_options, ip=(validated_container["ip"] if "ip" in validated_container else None), shm_size=SHM_SIZE, docker_gpus=docker_gpus)
else:
container = client.containers.create(**container_options)
if "ip" in validated_container:
client.networks.get(validated_container["network"] if "network" in validated_container else "clore-br0").connect(container, ipv4_address=validated_container["ip"])
client.networks.get("bridge").disconnect(container)
if not "paused" in validated_container:
container.start()
except Exception as e:
log.debug(f"Container creation issue | {e}")
pass
all_running_container_names = []
all_stopped_container_names = []
for container in all_containers:
if type(container.name)==str:
if container.status == "running":
all_running_container_names.append(container.name)
else:
all_stopped_container_names.append(container.name)
if background_job.is_background_job_container_name(container.name) and not background_job.is_enabled():
if container.status == "running":
container.stop()
elif container.name in needed_running_names and container.status != 'running':
try:
attached_networks = container.attrs['NetworkSettings']['Networks']
if "bridge" in attached_networks.keys() or len(attached_networks.keys())==0: # Ip was not attached, remove container
container.stop()
container.remove()
else:
container.start()
except Exception as e:
pass
elif container.name in paused_names and container.status == 'running':
try:
container.stop()
except Exception as e:
pass
elif container.name not in paused_names+needed_running_names+allowed_running_containers and container.status == 'running' and not clore_partner.validate_partner_container_name(container.name) and not docker_interface.is_docker_default_name_lenient(container.name):
try:
container.stop()
container.remove()
except Exception as e:
pass
elif container.name not in paused_names+needed_running_names+allowed_running_containers and not clore_partner.validate_partner_container_name(container.name) and not docker_interface.is_docker_default_name_lenient(container.name):
try:
container.remove()
except Exception as e:
pass
elif not can_run_partner_workloads and container.status == "running" and clore_partner.validate_partner_workload_container_name(container.name):
try:
container.stop()
except Exception as e:
pass
return all_running_container_names, all_stopped_container_names
#print(validated_containers)