v5 initial release
This commit is contained in:
parent
dcd48b9d09
commit
7397de8a5b
|
@ -47,8 +47,8 @@ async def deploy_containers(validated_containers):
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def get_local_images():
|
async def get_local_images(no_latest_tag = False):
|
||||||
res = await asyncio.to_thread(docker_interface.get_local_images)
|
res = await asyncio.to_thread(docker_interface.get_local_images, no_latest_tag)
|
||||||
return res
|
return res
|
||||||
|
|
||||||
class CloreClient:
|
class CloreClient:
|
||||||
|
@ -73,6 +73,16 @@ class CloreClient:
|
||||||
|
|
||||||
self.last_hw_specs_submit = time.time()-(1800-60)
|
self.last_hw_specs_submit = time.time()-(1800-60)
|
||||||
|
|
||||||
|
self.last_service_heartbeat = {
|
||||||
|
"main": utils.unix_timestamp(),
|
||||||
|
"handle_container_cache": utils.unix_timestamp(),
|
||||||
|
"startup_script_runner": utils.unix_timestamp(),
|
||||||
|
"log_streaming_task": utils.unix_timestamp(),
|
||||||
|
"container_log_streaming_service": utils.unix_timestamp(),
|
||||||
|
"specs_service": utils.unix_timestamp()
|
||||||
|
}
|
||||||
|
self.max_service_inactivity = 600 # seconds
|
||||||
|
|
||||||
if config.debug_ws_peer:
|
if config.debug_ws_peer:
|
||||||
self.ws_peers[str(config.debug_ws_peer)]={
|
self.ws_peers[str(config.debug_ws_peer)]={
|
||||||
"expiration":"immune"
|
"expiration":"immune"
|
||||||
|
@ -82,27 +92,53 @@ class CloreClient:
|
||||||
global container_log_broken
|
global container_log_broken
|
||||||
|
|
||||||
pull_list = asyncio.Queue()
|
pull_list = asyncio.Queue()
|
||||||
pull_logs = asyncio.Queue()
|
monitoring = asyncio.Queue()
|
||||||
|
|
||||||
task1 = asyncio.create_task(self.main(pull_list, pull_logs))
|
task1 = asyncio.create_task(self.main(pull_list, monitoring))
|
||||||
task2 = asyncio.create_task(self.handle_container_cache(pull_list, pull_logs))
|
task2 = asyncio.create_task(self.handle_container_cache(pull_list, monitoring))
|
||||||
task3 = asyncio.create_task(self.startup_script_runner())
|
task3 = asyncio.create_task(self.startup_script_runner(monitoring))
|
||||||
task4 = asyncio.create_task(log_streaming_task.log_streaming_task(container_log_broken))
|
task4 = asyncio.create_task(log_streaming_task.log_streaming_task(container_log_broken, monitoring))
|
||||||
task5 = asyncio.create_task(self.container_log_streaming_service())
|
task5 = asyncio.create_task(self.container_log_streaming_service(monitoring))
|
||||||
task6 = asyncio.create_task(self.specs_service())
|
task6 = asyncio.create_task(self.specs_service(monitoring))
|
||||||
|
monitoring_task = asyncio.create_task(self.monitoring_service(monitoring))
|
||||||
|
|
||||||
# Wait for both tasks to complete (they won't in this case)
|
# Wait for both tasks to complete (they won't in this case)
|
||||||
await asyncio.gather(task1, task2, task3, task4, task5, task6)
|
await asyncio.gather(task1, task2, task3, task4, task5, task6, monitoring_task)
|
||||||
|
|
||||||
async def container_log_streaming_service(self):
|
async def monitoring_service(self, monitoring):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
monitoring_data = []
|
||||||
|
while not monitoring.empty():
|
||||||
|
monitoring_data.append(await monitoring.get())
|
||||||
|
if len(monitoring_data)>0:
|
||||||
|
unique_monitoring = list(set(monitoring_data))
|
||||||
|
for service_name in unique_monitoring:
|
||||||
|
self.last_service_heartbeat[service_name]=utils.unix_timestamp()
|
||||||
|
log.success(self.last_service_heartbeat)
|
||||||
|
for service_name in self.last_service_heartbeat.keys():
|
||||||
|
last_hearthbeat = self.last_service_heartbeat[service_name]
|
||||||
|
if last_hearthbeat < utils.unix_timestamp()-config.maximum_pull_service_loop_time and service_name=="handle_container_cache":
|
||||||
|
log.error(f"\"{service_name}\" service is stuck for {utils.unix_timestamp()-last_hearthbeat} s, Restarting...")
|
||||||
|
os._exit(1)
|
||||||
|
elif last_hearthbeat < utils.unix_timestamp()-config.maximum_service_loop_time and service_name!="handle_container_cache":
|
||||||
|
log.error(f"\"{service_name}\" service is stuck for {utils.unix_timestamp()-last_hearthbeat} s, Restarting...")
|
||||||
|
os._exit(1)
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(f"monitoring_service() | ERROR | {e}")
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
async def container_log_streaming_service(self, monitoring):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
await monitoring.put("container_log_streaming_service")
|
||||||
await WebSocketClient.stream_container_logs()
|
await WebSocketClient.stream_container_logs()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"container_log_streaming_service() | ERROR | {e}")
|
log.debug(f"container_log_streaming_service() | ERROR | {e}")
|
||||||
await asyncio.sleep(0.6)
|
await asyncio.sleep(0.6)
|
||||||
async def run_startup_scripts(self, startup_script_full_path, container_name):
|
async def run_startup_scripts(self, startup_script_full_path, container_name):
|
||||||
try:
|
try:
|
||||||
|
if config.debug:
|
||||||
log.success(f"Runnin' {startup_script_full_path}")
|
log.success(f"Runnin' {startup_script_full_path}")
|
||||||
log.error(self.all_running_container_names)
|
log.error(self.all_running_container_names)
|
||||||
await asyncio.to_thread(run_startup_script.run, container_name, startup_script_full_path, f"/init-{container_name}.sh")
|
await asyncio.to_thread(run_startup_script.run, container_name, startup_script_full_path, f"/init-{container_name}.sh")
|
||||||
|
@ -110,12 +146,13 @@ class CloreClient:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
async def startup_script_runner(self):
|
async def startup_script_runner(self, monitoring):
|
||||||
|
|
||||||
startup_script_ongoing_tasks = {}
|
startup_script_ongoing_tasks = {}
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
await monitoring.put("startup_script_runner")
|
||||||
startup_script_files = await async_os.listdir(config.startup_scripts_folder)
|
startup_script_files = await async_os.listdir(config.startup_scripts_folder)
|
||||||
for startup_script_file in startup_script_files:
|
for startup_script_file in startup_script_files:
|
||||||
if type(startup_script_file)==str and startup_script_file.endswith(".sh") and startup_script_file[:-3] in self.all_running_container_names:
|
if type(startup_script_file)==str and startup_script_file.endswith(".sh") and startup_script_file[:-3] in self.all_running_container_names:
|
||||||
|
@ -160,16 +197,17 @@ class CloreClient:
|
||||||
pass
|
pass
|
||||||
await asyncio.sleep(0.5)
|
await asyncio.sleep(0.5)
|
||||||
|
|
||||||
async def handle_container_cache(self, pull_list, pull_logs):
|
async def handle_container_cache(self, pull_list, monitoring):
|
||||||
while True:
|
while True:
|
||||||
got_data = []
|
got_data = []
|
||||||
while not pull_list.empty():
|
while not pull_list.empty():
|
||||||
got_data.append(await pull_list.get())
|
got_data.append(await pull_list.get())
|
||||||
|
await monitoring.put("handle_container_cache")
|
||||||
if len(got_data)>0:
|
if len(got_data)>0:
|
||||||
self.p_needed_containers=got_data[len(got_data)-1]
|
self.p_needed_containers=got_data[len(got_data)-1]
|
||||||
|
|
||||||
if len(self.p_needed_containers)>0:
|
if len(self.p_needed_containers)>0:
|
||||||
local_images = await get_local_images()
|
local_images = await get_local_images(no_latest_tag=True)
|
||||||
for local_image in local_images:
|
for local_image in local_images:
|
||||||
self.last_pull_progress[local_image]={"log":"Pull complete", "last_update":time.time()}
|
self.last_pull_progress[local_image]={"log":"Pull complete", "last_update":time.time()}
|
||||||
image_needed = False
|
image_needed = False
|
||||||
|
@ -198,7 +236,8 @@ class CloreClient:
|
||||||
r = await asyncio.get_running_loop().run_in_executor(pool, docker_interface.remove_docker_image, local_image)
|
r = await asyncio.get_running_loop().run_in_executor(pool, docker_interface.remove_docker_image, local_image)
|
||||||
if r:
|
if r:
|
||||||
removed_cnt+=1
|
removed_cnt+=1
|
||||||
log.success(f"{local_image} | {image_needed}")
|
#if config.debug:
|
||||||
|
# log.success(f"{local_image} | {image_needed}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
image_needed=True
|
image_needed=True
|
||||||
|
@ -212,6 +251,8 @@ class CloreClient:
|
||||||
most_recent_wanted_state = self.p_needed_containers
|
most_recent_wanted_state = self.p_needed_containers
|
||||||
for wanted_image in most_recent_wanted_state:
|
for wanted_image in most_recent_wanted_state:
|
||||||
if not wanted_image["image"] in local_images:
|
if not wanted_image["image"] in local_images:
|
||||||
|
print("Local", local_images)
|
||||||
|
print("W",wanted_image)
|
||||||
log.debug(f"Starting to pull \"{wanted_image}\"")
|
log.debug(f"Starting to pull \"{wanted_image}\"")
|
||||||
|
|
||||||
auth_config = {}
|
auth_config = {}
|
||||||
|
@ -249,13 +290,17 @@ class CloreClient:
|
||||||
pass
|
pass
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
async def main(self, pull_list, pull_logs):
|
async def main(self, pull_list, monitoring):
|
||||||
step=0
|
step=0
|
||||||
while True:
|
while True:
|
||||||
print("STEP",step,'|',self.containers_set, self.containers if config.log_containers_strings else '')
|
try:
|
||||||
|
|
||||||
step+=1
|
step+=1
|
||||||
|
|
||||||
|
await monitoring.put("main")
|
||||||
|
|
||||||
|
if config.debug:
|
||||||
|
print("STEP",step,'|',self.containers_set, self.containers if config.log_containers_strings else '')
|
||||||
|
|
||||||
tasks = []
|
tasks = []
|
||||||
|
|
||||||
container_conf = WebSocketClient.get_containers()
|
container_conf = WebSocketClient.get_containers()
|
||||||
|
@ -340,6 +385,9 @@ class CloreClient:
|
||||||
self.all_stopped_container_names = result.all_stopped_container_names
|
self.all_stopped_container_names = result.all_stopped_container_names
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
pass
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
log.debug(f"main() | ERROR | {e}")
|
||||||
|
|
||||||
await asyncio.sleep(1)
|
await asyncio.sleep(1)
|
||||||
|
|
||||||
async def submit_specs(self, current_specs):
|
async def submit_specs(self, current_specs):
|
||||||
|
@ -375,9 +423,10 @@ class CloreClient:
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
log.debug(f"FAIL | update_realtime_data() | {e}")
|
log.debug(f"FAIL | update_realtime_data() | {e}")
|
||||||
|
|
||||||
async def specs_service(self):
|
async def specs_service(self, monitoring):
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
await monitoring.put("specs_service")
|
||||||
current_specs = await specs.get()
|
current_specs = await specs.get()
|
||||||
if self.last_hw_specs_submit < (utils.unix_timestamp()-1800):
|
if self.last_hw_specs_submit < (utils.unix_timestamp()-1800):
|
||||||
self.last_hw_specs_submit=utils.unix_timestamp()
|
self.last_hw_specs_submit=utils.unix_timestamp()
|
||||||
|
|
|
@ -141,10 +141,11 @@ class WebSocketClient:
|
||||||
else:
|
else:
|
||||||
try:
|
try:
|
||||||
parsed_json = json.loads(message)
|
parsed_json = json.loads(message)
|
||||||
if "type" in parsed_json and parsed_json["type"]=="set_containers" and "new_containers" in parsed_json:
|
if "type" in parsed_json and parsed_json["type"]=="set_containers" and "new_containers" in parsed_json and type(parsed_json["new_containers"])==list:
|
||||||
self.last_heartbeat = clore_utils.unix_timestamp()
|
self.last_heartbeat = clore_utils.unix_timestamp()
|
||||||
container_str = json.dumps({"containers":parsed_json["new_containers"]})
|
container_str = json.dumps({"containers":parsed_json["new_containers"]})
|
||||||
await self.send(container_str)
|
await self.send(container_str)
|
||||||
|
if len(parsed_json["new_containers"]) > 0: # There should be at least one container
|
||||||
self.containers_set = True
|
self.containers_set = True
|
||||||
self.containers=parsed_json["new_containers"]
|
self.containers=parsed_json["new_containers"]
|
||||||
#log.success(container_str)
|
#log.success(container_str)
|
||||||
|
|
|
@ -30,6 +30,8 @@ hard_config = {
|
||||||
"max_pull_log_size": 24576, # Characters
|
"max_pull_log_size": 24576, # Characters
|
||||||
"max_container_log_size": 262144, # Characters
|
"max_container_log_size": 262144, # Characters
|
||||||
"container_log_streaming_interval": 2, # Seconds
|
"container_log_streaming_interval": 2, # Seconds
|
||||||
|
"maximum_service_loop_time": 900, # Seconds, failsafe variable - if service is stuck processing longer than this timeframe it will lead into restarting the app
|
||||||
|
"maximum_pull_service_loop_time": 14400 # Exception for image pulling
|
||||||
}
|
}
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='Example argparse usage')
|
parser = argparse.ArgumentParser(description='Example argparse usage')
|
||||||
|
|
|
@ -68,7 +68,7 @@ def get_docker_networks():
|
||||||
except docker.errors.DockerException as e:
|
except docker.errors.DockerException as e:
|
||||||
return (f"Error: {e}")
|
return (f"Error: {e}")
|
||||||
|
|
||||||
def get_local_images():
|
def get_local_images(no_latest_tag=False):
|
||||||
try:
|
try:
|
||||||
images = client.images.list()
|
images = client.images.list()
|
||||||
|
|
||||||
|
@ -79,6 +79,9 @@ def get_local_images():
|
||||||
tags = image.tags if image.tags else ['<none>:<none>']
|
tags = image.tags if image.tags else ['<none>:<none>']
|
||||||
for tag in tags:
|
for tag in tags:
|
||||||
if tag!="<none>:<none>":
|
if tag!="<none>:<none>":
|
||||||
|
if no_latest_tag:
|
||||||
|
image_list.append(tag.replace(':latest',''))
|
||||||
|
else:
|
||||||
image_list.append(tag)
|
image_list.append(tag)
|
||||||
|
|
||||||
return image_list
|
return image_list
|
||||||
|
|
|
@ -10,7 +10,7 @@ from lib import container_logs
|
||||||
from concurrent.futures import ThreadPoolExecutor
|
from concurrent.futures import ThreadPoolExecutor
|
||||||
import queue # Import the synchronous queue module
|
import queue # Import the synchronous queue module
|
||||||
|
|
||||||
async def log_streaming_task(message_broker):
|
async def log_streaming_task(message_broker, monitoring):
|
||||||
client = docker_interface.client
|
client = docker_interface.client
|
||||||
executor = ThreadPoolExecutor(max_workers=4)
|
executor = ThreadPoolExecutor(max_workers=4)
|
||||||
tasks = {}
|
tasks = {}
|
||||||
|
@ -18,6 +18,7 @@ async def log_streaming_task(message_broker):
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
try:
|
try:
|
||||||
|
await monitoring.put("log_streaming_task")
|
||||||
current_containers = await asyncio.get_event_loop().run_in_executor(
|
current_containers = await asyncio.get_event_loop().run_in_executor(
|
||||||
executor,
|
executor,
|
||||||
lambda: {container.name: container for container in client.containers.list() if container.status == 'running'}
|
lambda: {container.name: container for container in client.containers.list() if container.status == 'running'}
|
||||||
|
|
Loading…
Reference in New Issue