allow pause of background job

This commit is contained in:
clore 2024-12-07 00:34:12 +00:00
parent df31d8ea6e
commit c4a4b53897
5 changed files with 43 additions and 4 deletions

View File

@ -4,6 +4,7 @@ from lib import log_streaming_task
from lib import run_startup_script from lib import run_startup_script
from lib import hive_miner_interface from lib import hive_miner_interface
from lib import docker_interface from lib import docker_interface
from lib import background_job
from lib import docker_deploy from lib import docker_deploy
from lib import clore_partner from lib import clore_partner
from lib import clore_partner_socket from lib import clore_partner_socket
@ -470,6 +471,11 @@ class CloreClient:
if result.success: if result.success:
self.last_checked_ws_peers = utils.unix_timestamp() self.last_checked_ws_peers = utils.unix_timestamp()
self.allowed_images=result.allowed_images+self.extra_allowed_images self.allowed_images=result.allowed_images+self.extra_allowed_images
if self.xfs_state == "active":
self.allowed_images.append({
"repository": "vastai/test",
"allowed_tags": ["bandwidth-test-nvidia"]
})
if not config.debug_ws_peer: if not config.debug_ws_peer:
for pure_ws_peer in result.ws_peers: for pure_ws_peer in result.ws_peers:
self.ws_peers[pure_ws_peer]={ self.ws_peers[pure_ws_peer]={
@ -547,7 +553,7 @@ class CloreClient:
await monitoring.put("oc_service") await monitoring.put("oc_service")
oc_apply_allowed = True oc_apply_allowed = True
### OC Service should also hande Hive stuff ### OC Service should also hande Hive stuff
if self.use_hive_flightsheet and self.is_hive and not self.dont_use_hive_binaries: if self.use_hive_flightsheet and self.is_hive and not self.dont_use_hive_binaries and background_job.is_enabled():
await set_hive_miner_status(True) await set_hive_miner_status(True)
oc_apply_allowed = False # Don't apply any OC when running HiveOS miner oc_apply_allowed = False # Don't apply any OC when running HiveOS miner
elif self.is_hive and not self.dont_use_hive_binaries: elif self.is_hive and not self.dont_use_hive_binaries:

22
lib/background_job.py Normal file
View File

@ -0,0 +1,22 @@
import time
import re
disabled_till = 0
def is_background_job_container_name(string):
if type(string) != str:
return False
pattern = r"^clore-default-\d+$"
return bool(re.match(pattern, string))
def temporarly_disable(seconds):
global disabled_till
disabled_till = time.time() + seconds
def enable():
global disabled_till
disabled_till=0
def is_enabled():
global disabled_till
return True if disabled_till < time.time() else False

View File

@ -117,7 +117,7 @@ async def configure(partner_config):
partner_container_config = { partner_container_config = {
"image": partner_config["partner_image"], "image": partner_config["partner_image"],
"name": config.clore_partner_container_name, "name": config.clore_partner_container_name,
"hostname": f"{partner_config['partner_id'][:4]}-m{partner_config['machine_id']}", "hostname": f"{partner_config['partner_id'][:16]}-m{partner_config['machine_id']}",
"env": { "env": {
"AUTH": partner_config['partner_id'], "AUTH": partner_config['partner_id'],
"ip_addr": partner_config['openvpn_host'], "ip_addr": partner_config['openvpn_host'],

View File

@ -1,5 +1,6 @@
from lib import config as config_module from lib import config as config_module
from lib import logging as logging_lib from lib import logging as logging_lib
from lib import background_job
from lib import utils from lib import utils
import asyncio import asyncio
import json import json
@ -61,6 +62,12 @@ async def handle_client(reader, writer):
writer.write(json.dumps({ writer.write(json.dumps({
"can_deploy": can_deploy "can_deploy": can_deploy
}).encode()) }).encode())
elif "stop_background_job" in parsed_data and "time" in parsed_data:
try:
if isinstance(parsed_data["time"], int):
background_job.temporarly_disable(parsed_data["time"])
except Exception as e:
pass
else: else:
writer.write('?'.encode()) writer.write('?'.encode())
await writer.drain() await writer.drain()

View File

@ -1,6 +1,7 @@
from lib import config as config_module from lib import config as config_module
from lib import logging as logging_lib from lib import logging as logging_lib
from lib import docker_cli_wrapper from lib import docker_cli_wrapper
from lib import background_job
from lib import docker_interface from lib import docker_interface
from lib import clore_partner from lib import clore_partner
from lib import get_specs from lib import get_specs
@ -148,7 +149,7 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_
container_options["shm_size"] = f"{SHM_SIZE}m" container_options["shm_size"] = f"{SHM_SIZE}m"
if not validated_container["name"] in created_container_names and image_ready: if not validated_container["name"] in created_container_names and image_ready and not (not background_job.is_enabled() and background_job.is_background_job_container_name(validated_container["name"])):
if config.creation_engine == "wrapper": if config.creation_engine == "wrapper":
docker_cli_wrapper.create_container(container_options, ip=(validated_container["ip"] if "ip" in validated_container else None), shm_size=SHM_SIZE, docker_gpus=docker_gpus) docker_cli_wrapper.create_container(container_options, ip=(validated_container["ip"] if "ip" in validated_container else None), shm_size=SHM_SIZE, docker_gpus=docker_gpus)
else: else:
@ -171,7 +172,10 @@ def deploy(validated_containers, allowed_running_containers=[], can_run_partner_
all_running_container_names.append(container.name) all_running_container_names.append(container.name)
else: else:
all_stopped_container_names.append(container.name) all_stopped_container_names.append(container.name)
if container.name in needed_running_names and container.status != 'running': if background_job.is_background_job_container_name(container.name) and not background_job.is_enabled():
if container.status == "running":
container.stop()
elif container.name in needed_running_names and container.status != 'running':
try: try:
attached_networks = container.attrs['NetworkSettings']['Networks'] attached_networks = container.attrs['NetworkSettings']['Networks']
if "bridge" in attached_networks.keys() or len(attached_networks.keys())==0: # Ip was not attached, remove container if "bridge" in attached_networks.keys() or len(attached_networks.keys())==0: # Ip was not attached, remove container