initial version of onboarding system
This commit is contained in:
commit
2f62997012
|
@ -0,0 +1 @@
|
||||||
|
__pycache__/
|
|
@ -0,0 +1,474 @@
|
||||||
|
import http.client
|
||||||
|
import datetime
|
||||||
|
import argparse
|
||||||
|
import json
|
||||||
|
import specs
|
||||||
|
import base64
|
||||||
|
import time
|
||||||
|
import math
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
import os
|
||||||
|
import socket
|
||||||
|
import asyncio
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
import subprocess
|
||||||
|
from functools import partial
|
||||||
|
|
||||||
|
class logger:
|
||||||
|
RED = '\033[91m'
|
||||||
|
GREEN = '\033[92m'
|
||||||
|
BLUE = '\033[94m'
|
||||||
|
RESET = '\033[0m'
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _get_current_time():
|
||||||
|
return datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def error(message):
|
||||||
|
print(f"{logger.RED}{logger._get_current_time()} | ERROR | {message}{logger.RESET}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def success(message):
|
||||||
|
print(f"{logger.GREEN}{logger._get_current_time()} | SUCCESS | {message}{logger.RESET}")
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def info(message):
|
||||||
|
print(f"{logger.BLUE}{logger._get_current_time()} | INFO | {message}{logger.RESET}")
|
||||||
|
|
||||||
|
if os.geteuid() != 0:
|
||||||
|
logger.error("This script must be run as root!")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description="Script with --clore-endpoint flag.")
|
||||||
|
|
||||||
|
parser.add_argument('--clore-endpoint', type=str, default="https://api.clore.ai/machine_onboarding", help='Specify the Clore API endpoint. Default is "https://api.clore.ai/machine_onboarding".')
|
||||||
|
parser.add_argument('--mock', action='store_true', help='Report predefined machine specs (testing only)')
|
||||||
|
parser.add_argument('--mode', type=str, default="linux")
|
||||||
|
|
||||||
|
parser.add_argument('--write-linux-config', type=str, default="")
|
||||||
|
parser.add_argument('--linux-hostname-override', default="")
|
||||||
|
|
||||||
|
parser.add_argument('--auth-file', type=str, default="/opt/clore-hosting/client/auth", help='Auth file location')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
hive_hive_wallet_conf_path = "mock/wallet.conf" if args.mock else "/hive-config/wallet.conf"
|
||||||
|
hive_rig_conf_path = "mock/rig.conf" if args.mock else "/hive-config/rig.conf"
|
||||||
|
hive_oc_conf_path = "mock/oc.conf" if args.mock else "/hive-config/nvidia-oc.conf"
|
||||||
|
clore_conf_path = "mock/onboarding.json" if args.mock else "/opt/clore-hosting/onboarding.json"
|
||||||
|
|
||||||
|
async def run_command(command: str):
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
bash_command = ['/bin/bash', '-c', command]
|
||||||
|
run_subprocess = partial(subprocess.run, bash_command, stdout=subprocess.PIPE, stderr=subprocess.PIPE,text=True)
|
||||||
|
process = await loop.run_in_executor(None, run_subprocess)
|
||||||
|
return process.stdout, process.stderr
|
||||||
|
|
||||||
|
def clean_config_value(value):
|
||||||
|
if '#' in value:
|
||||||
|
value = value.split('#', 1)[0]
|
||||||
|
value = value.strip()
|
||||||
|
if (len(value) >= 2 and value[-1] == "'" and value[0] == "'") or (len(value) >= 2 and value[-1] == '"' and value[0] == '"'):
|
||||||
|
value = value[1:-1]
|
||||||
|
return value
|
||||||
|
|
||||||
|
def filter_name(name):
|
||||||
|
return re.sub(r'[^A-Za-z0-9_-]', '', name)
|
||||||
|
|
||||||
|
def validate_clore_config(clore_config):
|
||||||
|
def is_valid_hostname_override(value):
|
||||||
|
return isinstance(value, str) and 1 <= len(value) <= 64 and re.match(r'^[A-Za-z0-9_-]+$', value)
|
||||||
|
|
||||||
|
def is_valid_auth(value):
|
||||||
|
return isinstance(value, str) and len(value) <= 256
|
||||||
|
|
||||||
|
def is_valid_multipliers(value):
|
||||||
|
return isinstance(value, dict) and "on_demand_multiplier" in value and "spot_multiplier" in value and \
|
||||||
|
1 <= value["on_demand_multiplier"] <= 50 and 1 <= value["spot_multiplier"] <= 50
|
||||||
|
|
||||||
|
def is_valid_mrl(value):
|
||||||
|
return isinstance(value, int) and 6 <= value <= 1440
|
||||||
|
|
||||||
|
def is_valid_keep_params(value):
|
||||||
|
return isinstance(value, bool)
|
||||||
|
|
||||||
|
def is_valid_pricing(clore_config):
|
||||||
|
required_keys = {"on_demand_bitcoin", "on_demand_clore", "spot_bitcoin", "spot_clore"}
|
||||||
|
if required_keys.issubset(clore_config):
|
||||||
|
return 0.000001 <= clore_config["on_demand_bitcoin"] <= 0.005 and \
|
||||||
|
0.1 <= clore_config["on_demand_clore"] <= 5000 and \
|
||||||
|
0.000001 <= clore_config["spot_bitcoin"] <= 0.005 and \
|
||||||
|
0.1 <= clore_config["spot_clore"] <= 5000
|
||||||
|
return False
|
||||||
|
|
||||||
|
def is_valid_usd_pricing(autoprice):
|
||||||
|
required_keys = {"on_demand", "spot"}
|
||||||
|
if required_keys.issubset(autoprice):
|
||||||
|
return 0.1 <= autoprice["spot"] <= 1000 and \
|
||||||
|
0.1 <= autoprice["on_demand"] <= 1000
|
||||||
|
return False
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
|
||||||
|
if "hostname_override" in clore_config and not is_valid_hostname_override(clore_config["hostname_override"]):
|
||||||
|
errors.append("hostname_override must be a string between 1-64 characters, only A-Za-z0-9_- allowed")
|
||||||
|
|
||||||
|
if "auth" not in clore_config or not is_valid_auth(clore_config["auth"]):
|
||||||
|
errors.append("auth is mandatory and must be a string of max 256 character")
|
||||||
|
|
||||||
|
if "autoprice" in clore_config and isinstance(clore_config["autoprice"], dict):
|
||||||
|
if clore_config["autoprice"].get("usd"):
|
||||||
|
if not is_valid_usd_pricing(clore_config["autoprice"]):
|
||||||
|
errors.append("usd pricing input is invalid")
|
||||||
|
elif not is_valid_multipliers(clore_config["autoprice"]):
|
||||||
|
errors.append("multipliers are not following spec")
|
||||||
|
|
||||||
|
if "mrl" not in clore_config or not is_valid_mrl(clore_config["mrl"]):
|
||||||
|
errors.append("mrl is mandatory and must be an integer in range 6-1440")
|
||||||
|
|
||||||
|
if "keep_params" in clore_config and not is_valid_keep_params(clore_config["keep_params"]):
|
||||||
|
errors.append("keep_params must be a boolean value")
|
||||||
|
|
||||||
|
crypto_keys = {"on_demand_bitcoin", "on_demand_clore", "spot_bitcoin", "spot_clore"}
|
||||||
|
if any(key in clore_config for key in crypto_keys):
|
||||||
|
if not is_valid_pricing(clore_config):
|
||||||
|
errors.append("All pricing fields (on_demand_bitcoin, on_demand_clore, spot_bitcoin, spot_clore) must be specified and valid.")
|
||||||
|
|
||||||
|
return errors if errors else "Validation successful"
|
||||||
|
|
||||||
|
def base64_string_to_json(base64_string):
|
||||||
|
try:
|
||||||
|
padding_needed = len(base64_string) % 4
|
||||||
|
if padding_needed:
|
||||||
|
base64_string += '=' * (4 - padding_needed)
|
||||||
|
json_bytes = base64.b64decode(base64_string)
|
||||||
|
json_str = json_bytes.decode('utf-8')
|
||||||
|
json_obj = json.loads(json_str)
|
||||||
|
return json_obj
|
||||||
|
except Exception as e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
def get_default_power_limits():
|
||||||
|
try:
|
||||||
|
cmd = "nvidia-smi -q -d POWER | grep \"Default Power Limit\" | awk '{print $5}'"
|
||||||
|
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
|
||||||
|
lines = result.stdout.strip().split('\n')
|
||||||
|
power_limits = []
|
||||||
|
for line in lines:
|
||||||
|
if line.lower() != 'n/a':
|
||||||
|
try:
|
||||||
|
power_limits.append(int(float(line)))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
return power_limits if power_limits else None
|
||||||
|
|
||||||
|
except subprocess.CalledProcessError:
|
||||||
|
return None
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
def validate_and_convert(input_str, min_val, max_val, adjust_bounds=False):
|
||||||
|
try:
|
||||||
|
int_list = [int(x) for x in input_str.split()]
|
||||||
|
if adjust_bounds:
|
||||||
|
int_list = [max(min(num, max_val), min_val) for num in int_list]
|
||||||
|
else:
|
||||||
|
if not all(min_val <= num <= max_val for num in int_list):
|
||||||
|
return None
|
||||||
|
|
||||||
|
return int_list
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
def get_number_or_last(numbers, index):
|
||||||
|
if index < len(numbers):
|
||||||
|
return numbers[index]
|
||||||
|
else:
|
||||||
|
return numbers[-1]
|
||||||
|
|
||||||
|
async def async_read_file(path):
|
||||||
|
try:
|
||||||
|
with open(path, 'r') as file:
|
||||||
|
return file.read()
|
||||||
|
except Exception as e:
|
||||||
|
#print(f"Error reading file {path}: {e}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
async def hive_parse_config(file_content):
|
||||||
|
conf = {}
|
||||||
|
if file_content:
|
||||||
|
for line in file_content.split('\n'):
|
||||||
|
line = line.strip()
|
||||||
|
if line[:1] != "#" and '=' in line:
|
||||||
|
key, value = [line.split('=', 1)[0], clean_config_value(line.split('=', 1)[1])]
|
||||||
|
conf[key] = value
|
||||||
|
return conf
|
||||||
|
|
||||||
|
async def hive_load_configs(default_power_limits, static_config):
|
||||||
|
parsed_static_config = None
|
||||||
|
try:
|
||||||
|
parsed_static_config = json.loads(static_config)
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
try:
|
||||||
|
# Non-blocking file reads
|
||||||
|
wallet_conf_content = await async_read_file(hive_hive_wallet_conf_path)
|
||||||
|
rig_conf_content = await async_read_file(hive_rig_conf_path)
|
||||||
|
|
||||||
|
# Parse rig config
|
||||||
|
rig_conf = await hive_parse_config(rig_conf_content)
|
||||||
|
|
||||||
|
if not rig_conf or "WORKER_NAME" not in rig_conf or "RIG_ID" not in rig_conf:
|
||||||
|
print("WORKER_NAME or RIG_ID is missing from rig config")
|
||||||
|
os._exit(1)
|
||||||
|
|
||||||
|
clore_miner_present = False
|
||||||
|
|
||||||
|
# Parse wallet config
|
||||||
|
clore_config = None
|
||||||
|
get_oc_config = False
|
||||||
|
if wallet_conf_content:
|
||||||
|
for wallet_conf_line in wallet_conf_content.split('\n'):
|
||||||
|
wallet_conf_line = wallet_conf_line.strip()
|
||||||
|
if wallet_conf_line[:1] != "#" and '=' in wallet_conf_line:
|
||||||
|
key, value = [wallet_conf_line.split('=', 1)[0], clean_config_value(wallet_conf_line.split('=', 1)[1])]
|
||||||
|
if key[-9:] == "_TEMPLATE":
|
||||||
|
possible_clore_config = base64_string_to_json(value)
|
||||||
|
if possible_clore_config:
|
||||||
|
if "set_stock_oc" in possible_clore_config:
|
||||||
|
get_oc_config=True
|
||||||
|
clore_config = possible_clore_config
|
||||||
|
elif key == "CUSTOM_MINER" and value == "clore":
|
||||||
|
clore_miner_present = True
|
||||||
|
|
||||||
|
if (not clore_miner_present or not clore_config) and parsed_static_config:
|
||||||
|
clore_miner_present = True
|
||||||
|
clore_config = parsed_static_config
|
||||||
|
|
||||||
|
if not clore_miner_present:
|
||||||
|
logger.info("CLORE not found in flighsheet, exiting")
|
||||||
|
await run_command("systemctl disable clore-hosting.service ; systemctl stop clore-hosting.service ; systemctl disable docker ; systemctl stop docker ; systemctl disable clore-onboarding.service ; systemctl stop clore-onboarding.service")
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
out_oc_config = {}
|
||||||
|
if get_oc_config and default_power_limits:
|
||||||
|
nvidia_oc = await async_read_file(hive_oc_conf_path)
|
||||||
|
gpu_cnt = len(default_power_limits)
|
||||||
|
if nvidia_oc and gpu_cnt > 0:
|
||||||
|
try:
|
||||||
|
core_offset = None
|
||||||
|
mem_offset = None
|
||||||
|
pl_static = None
|
||||||
|
for nvidia_conf_line in nvidia_oc.split('\n'):
|
||||||
|
nvidia_conf_line = nvidia_conf_line.strip()
|
||||||
|
if nvidia_conf_line[:1] != "#" and '=' in nvidia_conf_line:
|
||||||
|
key, value = [nvidia_conf_line.split('=', 1)[0], clean_config_value(nvidia_conf_line.split('=', 1)[1])]
|
||||||
|
if value == "":
|
||||||
|
pass
|
||||||
|
elif key=="CLOCK":
|
||||||
|
core_offset = [math.floor(num / 2) for num in validate_and_convert(value, -2000, 2000, adjust_bounds=True)]
|
||||||
|
elif key=="MEM":
|
||||||
|
mem_offset = [math.floor(num / 2) for num in validate_and_convert(value, -2000, 6000, adjust_bounds=True)]
|
||||||
|
elif key=="PLIMIT":
|
||||||
|
pl_static = validate_and_convert(value, 1, 1500, adjust_bounds=True)
|
||||||
|
|
||||||
|
if core_offset or mem_offset or pl_static:
|
||||||
|
for gpu_idx, default_pl in enumerate(default_power_limits):
|
||||||
|
out_oc_config[str(gpu_idx)] = {
|
||||||
|
"core": get_number_or_last(core_offset, gpu_idx) if core_offset else 0,
|
||||||
|
"mem": get_number_or_last(mem_offset, gpu_idx) if mem_offset else 0,
|
||||||
|
"pl": get_number_or_last(pl_static, gpu_idx) if pl_static else default_pl
|
||||||
|
}
|
||||||
|
except Exception as oc_info_e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Construct machine name
|
||||||
|
machine_name = f"{filter_name(rig_conf['WORKER_NAME'][:32])}_HIVE_{rig_conf['RIG_ID']}"
|
||||||
|
return machine_name, clore_config, out_oc_config
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Can't load rig.conf, wallet.conf | {e}")
|
||||||
|
|
||||||
|
async def post_request(url, body, headers=None, timeout=15):
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
|
||||||
|
if parsed_url.scheme == 'https':
|
||||||
|
conn = http.client.HTTPSConnection(parsed_url.hostname, parsed_url.port or 443, timeout=timeout)
|
||||||
|
elif parsed_url.scheme == 'http':
|
||||||
|
conn = http.client.HTTPConnection(parsed_url.hostname, parsed_url.port or 80, timeout=timeout)
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Unsupported URL scheme: {parsed_url.scheme}")
|
||||||
|
|
||||||
|
json_data = json.dumps(body)
|
||||||
|
|
||||||
|
if headers is None:
|
||||||
|
headers = {}
|
||||||
|
headers['Content-Type'] = 'application/json'
|
||||||
|
|
||||||
|
path = parsed_url.path
|
||||||
|
if parsed_url.query:
|
||||||
|
path += '?' + parsed_url.query
|
||||||
|
|
||||||
|
try:
|
||||||
|
conn.request("POST", path, body=json_data, headers=headers)
|
||||||
|
response = conn.getresponse()
|
||||||
|
response_data = response.read().decode('utf-8')
|
||||||
|
try:
|
||||||
|
parsed_response_data = json.loads(response_data)
|
||||||
|
response_data = parsed_response_data
|
||||||
|
except Exception as ep:
|
||||||
|
pass
|
||||||
|
status_code = response.status
|
||||||
|
#if 200 <= status_code < 300:
|
||||||
|
# print(f"Request was successful: {status_code}")
|
||||||
|
#else:
|
||||||
|
# print(f"Non-standard status code received: {status_code}")
|
||||||
|
|
||||||
|
return status_code, response_data
|
||||||
|
|
||||||
|
except (http.client.HTTPException, TimeoutError) as e:
|
||||||
|
print(f"Request failed: {e}")
|
||||||
|
return None, None
|
||||||
|
finally:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
def clean_clore_config(clore_config):
|
||||||
|
clore_config_copy = clore_config.copy()
|
||||||
|
if "auth" in clore_config_copy:
|
||||||
|
if "auth" in clore_config_copy:
|
||||||
|
del clore_config_copy["auth"]
|
||||||
|
if "hostname_override" in clore_config_copy:
|
||||||
|
del clore_config_copy["hostname_override"]
|
||||||
|
if "set_stock_oc" in clore_config_copy:
|
||||||
|
del clore_config_copy["set_stock_oc"]
|
||||||
|
if "save_config" in clore_config_copy:
|
||||||
|
del clore_config_copy["save_config"]
|
||||||
|
return clore_config_copy
|
||||||
|
|
||||||
|
def verify_or_update_file(file_path: str, expected_content: str) -> bool:
|
||||||
|
try:
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||||||
|
current_content = file.read()
|
||||||
|
if current_content == expected_content:
|
||||||
|
return True
|
||||||
|
with open(file_path, 'w', encoding='utf-8') as file:
|
||||||
|
file.write(expected_content)
|
||||||
|
return False
|
||||||
|
except Exception:
|
||||||
|
return True
|
||||||
|
|
||||||
|
def get_machine_id():
|
||||||
|
machine_id_path = "/etc/machine-id"
|
||||||
|
if os.path.isfile(machine_id_path):
|
||||||
|
with open(machine_id_path, "r") as file:
|
||||||
|
return file.read().strip()
|
||||||
|
return None
|
||||||
|
|
||||||
|
next_retry_reached_server_limit = 0
|
||||||
|
|
||||||
|
if args.write_linux_config:
|
||||||
|
linux_config = base64_string_to_json(args.write_linux_config)
|
||||||
|
if linux_config:
|
||||||
|
if args.linux_hostname_override:
|
||||||
|
if 1 <= len(args.linux_hostname_override) <= 64 and re.match(r'^[A-Za-z0-9_-]+$', args.linux_hostname_override):
|
||||||
|
linux_config["hostname_override"] = args.linux_hostname_override
|
||||||
|
else:
|
||||||
|
logger.error("Input hostname not valid")
|
||||||
|
sys.exit(1)
|
||||||
|
verify_or_update_file(clore_conf_path, json.dumps(linux_config))
|
||||||
|
logger.success("Config written")
|
||||||
|
sys.exit(0)
|
||||||
|
else:
|
||||||
|
logger.error("Invalid config")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
async def main(machine_specs):
|
||||||
|
last_used_config = None
|
||||||
|
ever_pending_creation = False
|
||||||
|
machine_id = get_machine_id()
|
||||||
|
|
||||||
|
default_power_limits = get_default_power_limits()
|
||||||
|
|
||||||
|
if not machine_id:
|
||||||
|
logger.error("Can't load machine ID")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not default_power_limits or len(default_power_limits)==0:
|
||||||
|
logger.error("Can't load default power limits of nVidia GPU(s)")
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
oc_config = {}
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
if args.mode == "linux":
|
||||||
|
clore_config = await async_read_file(clore_conf_path)
|
||||||
|
clore_config = json.loads(clore_config)
|
||||||
|
machine_name = f"LINUX_{machine_id}"
|
||||||
|
elif args.mode == "hive":
|
||||||
|
static_clore_config = await async_read_file(clore_conf_path)
|
||||||
|
machine_name, clore_config, oc_config = await hive_load_configs(default_power_limits, static_clore_config)
|
||||||
|
#print(f"Machine Name: {machine_name}")
|
||||||
|
|
||||||
|
config_validation = validate_clore_config(clore_config)
|
||||||
|
if config_validation == "Validation successful":
|
||||||
|
if "save_config" in clore_config and args.mode == "hive":
|
||||||
|
verify_or_update_file(clore_conf_path, json.dumps(clore_config))
|
||||||
|
if "set_stock_oc" in clore_config:
|
||||||
|
if oc_config == {}:
|
||||||
|
clore_config["clear_oc_override"] = True
|
||||||
|
else:
|
||||||
|
clore_config["stock_oc_override"] = oc_config
|
||||||
|
if clore_config != last_used_config or (time.time() > next_retry_reached_server_limit and next_retry_reached_server_limit > 0):
|
||||||
|
last_used_config = clore_config.copy()
|
||||||
|
if type(clore_config) == dict and "hostname_override" in clore_config:
|
||||||
|
machine_name = clore_config["hostname_override"]
|
||||||
|
clore_config["name"]=machine_name
|
||||||
|
clore_config["specs"]=machine_specs
|
||||||
|
status_code, response_data = await post_request(
|
||||||
|
args.clore_endpoint,
|
||||||
|
clean_clore_config(clore_config),
|
||||||
|
{"auth": clore_config["auth"]},
|
||||||
|
15
|
||||||
|
)
|
||||||
|
next_retry_reached_server_limit = 0
|
||||||
|
if type(response_data) == dict: # Response data seem to be correct format
|
||||||
|
if response_data.get("status") == "invalid_auth":
|
||||||
|
logger.error("Invalid auth token")
|
||||||
|
elif response_data.get("error") == "exceeded_rate_limit":
|
||||||
|
logger.error("Exceeded API limits, probably a lot of servers on same network")
|
||||||
|
logger.info("Retrying request in 65s")
|
||||||
|
await asyncio.sleep(60)
|
||||||
|
last_used_config = None
|
||||||
|
elif response_data.get("status") == "exceeded_limit":
|
||||||
|
logger.error("Your account already has the maximal server limit, retrying in 12hr")
|
||||||
|
next_retry_reached_server_limit = time.time() + 60*60*12
|
||||||
|
elif response_data.get("status") == "creation_pending":
|
||||||
|
logger.info("Machine creation is pending on clore.ai side")
|
||||||
|
await asyncio.sleep(60 if ever_pending_creation else 10)
|
||||||
|
ever_pending_creation = True
|
||||||
|
last_used_config = None
|
||||||
|
elif "init_communication_token" in response_data and "private_communication_token":
|
||||||
|
clore_hosting_sw_auth_str = f"{response_data['init_communication_token']}:{response_data['private_communication_token']}"
|
||||||
|
was_ok = verify_or_update_file(args.auth_file, clore_hosting_sw_auth_str)
|
||||||
|
if was_ok:
|
||||||
|
logger.info("Token for hosting software already configured")
|
||||||
|
await run_command("systemctl start clore-hosting.service")
|
||||||
|
else:
|
||||||
|
logger.success("Updated local auth file, restarting clore hosting")
|
||||||
|
await run_command("systemctl restart clore-hosting.service")
|
||||||
|
else:
|
||||||
|
logger.error("Unknown API response, retrying in 65s")
|
||||||
|
await asyncio.sleep(60)
|
||||||
|
last_used_config = None
|
||||||
|
else:
|
||||||
|
logger.error(f"Could not parse config - {' | '.join(config_validation)}")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(e)
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
machine_specs = specs.get(benchmark_disk=True, mock=args.mock)
|
||||||
|
asyncio.run(main(machine_specs))
|
|
@ -0,0 +1,283 @@
|
||||||
|
# Library to load specs of machine without the need for any 3rd party libs
|
||||||
|
import subprocess
|
||||||
|
import xml.etree.ElementTree as ET
|
||||||
|
import requests
|
||||||
|
import shutil
|
||||||
|
import time
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
|
||||||
|
def drop_caches():
|
||||||
|
try:
|
||||||
|
with open('/proc/sys/vm/drop_caches', 'w') as f:
|
||||||
|
f.write('3\n')
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
|
||||||
|
def write_test(file_path, block_size, num_blocks):
|
||||||
|
data = os.urandom(block_size)
|
||||||
|
total_bytes = block_size * num_blocks
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
with open(file_path, 'wb') as f:
|
||||||
|
for _ in range(num_blocks):
|
||||||
|
f.write(data)
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
write_speed = total_bytes / elapsed_time / (1024 * 1024)
|
||||||
|
|
||||||
|
return write_speed, elapsed_time
|
||||||
|
|
||||||
|
def read_test(file_path, block_size, num_blocks):
|
||||||
|
total_bytes = block_size * num_blocks
|
||||||
|
|
||||||
|
# Drop caches to avoid OS-level caching effects
|
||||||
|
drop_caches()
|
||||||
|
|
||||||
|
start_time = time.time()
|
||||||
|
|
||||||
|
with open(file_path, 'rb') as f:
|
||||||
|
for _ in range(num_blocks):
|
||||||
|
data = f.read(block_size)
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
|
||||||
|
elapsed_time = time.time() - start_time
|
||||||
|
read_speed = total_bytes / elapsed_time / (1024 * 1024)
|
||||||
|
|
||||||
|
return read_speed, elapsed_time
|
||||||
|
|
||||||
|
def disk_benchmark():
|
||||||
|
total, used, free = shutil.disk_usage("/")
|
||||||
|
|
||||||
|
free_gb = free/1024/1024/1024
|
||||||
|
|
||||||
|
if free_gb<1:
|
||||||
|
return 0,0
|
||||||
|
|
||||||
|
block_size = 1024*1024
|
||||||
|
num_blocks = 250 if free_gb < 3 else 1500
|
||||||
|
|
||||||
|
file_path="/tmp/output"
|
||||||
|
|
||||||
|
print("Running disk benchmark...")
|
||||||
|
print(f"Block Size: {block_size} bytes, Number of Blocks: {num_blocks}")
|
||||||
|
|
||||||
|
# Run write test
|
||||||
|
write_speed, write_time = write_test(file_path, block_size, num_blocks)
|
||||||
|
print(f"Write Speed: {write_speed:.2f} MB/s, Time: {write_time:.2f} seconds")
|
||||||
|
|
||||||
|
# Run read test
|
||||||
|
read_speed, read_time = read_test(file_path, block_size, num_blocks)
|
||||||
|
print(f"Read Speed: {read_speed:.2f} MB/s, Time: {read_time:.2f} seconds")
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
os.remove(file_path)
|
||||||
|
return float(round(write_speed,2)), float(round(read_speed,2))
|
||||||
|
|
||||||
|
def get_root_disk_model():
|
||||||
|
try:
|
||||||
|
# Get the root filesystem mount point
|
||||||
|
root_mount = '/'
|
||||||
|
|
||||||
|
# Find the device associated with the root mount
|
||||||
|
cmd_find_device = ['df', root_mount]
|
||||||
|
output = subprocess.check_output(cmd_find_device).decode()
|
||||||
|
lines = output.splitlines()
|
||||||
|
device_line = lines[1] # Second line contains the device info
|
||||||
|
device_path = device_line.split()[0]
|
||||||
|
|
||||||
|
# Strip partition number to get the main disk device
|
||||||
|
if device_path.startswith('/dev/'):
|
||||||
|
device_path = device_path.rstrip('0123456789')
|
||||||
|
|
||||||
|
# Get disk model information using lsblk command
|
||||||
|
cmd_lsblk = ['lsblk', '-no', 'MODEL', device_path]
|
||||||
|
model = subprocess.check_output(cmd_lsblk).decode().strip()
|
||||||
|
|
||||||
|
# Check if the disk is virtual
|
||||||
|
virtual_identifiers = ['virtual', 'qemu', 'vmware', 'hyper-v', 'vbox']
|
||||||
|
if any(virtual_id.lower() in model.lower() for virtual_id in virtual_identifiers):
|
||||||
|
return "Virtual"
|
||||||
|
|
||||||
|
return model
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def get_free_space_gb():
|
||||||
|
try:
|
||||||
|
statvfs = os.statvfs('/')
|
||||||
|
free_space_gb = (statvfs.f_frsize * statvfs.f_bfree) / (1024 ** 3)
|
||||||
|
return round(free_space_gb, 2)
|
||||||
|
except Exception:
|
||||||
|
print("Can't measure the available disk space")
|
||||||
|
os._exit(1)
|
||||||
|
return ""
|
||||||
|
|
||||||
|
def all_items_same(lst):
|
||||||
|
if len(lst) <= 1:
|
||||||
|
return True
|
||||||
|
return all(item == lst[0] for item in lst)
|
||||||
|
|
||||||
|
def get_motherboard_name():
|
||||||
|
filepath = "/sys/devices/virtual/dmi/id/board_name"
|
||||||
|
if os.path.exists(filepath):
|
||||||
|
try:
|
||||||
|
with open(filepath, 'r') as file:
|
||||||
|
return file.read().strip().replace('\n','')[:32]
|
||||||
|
except Exception as e:
|
||||||
|
print(f"Error reading Motherboard name: {e}")
|
||||||
|
return "Unknown"
|
||||||
|
else:
|
||||||
|
return "Unknown"
|
||||||
|
|
||||||
|
def get_cpu_info():
|
||||||
|
lscpu_out = subprocess.check_output(['lscpu']).decode('utf-8')
|
||||||
|
threads_per_code=1
|
||||||
|
total_threads = os.cpu_count()
|
||||||
|
model_name = "Unknown CPU"
|
||||||
|
for line in lscpu_out.split('\n'):
|
||||||
|
try:
|
||||||
|
key, value = line.split(':', 1)
|
||||||
|
value=value.strip(' ')
|
||||||
|
if "model name" in key.lower():
|
||||||
|
model_name=value
|
||||||
|
elif "Thread(s) per core" == key and int(value):
|
||||||
|
threads_per_code=int(value)
|
||||||
|
except Exception as e:
|
||||||
|
pass
|
||||||
|
total_cores = int(total_threads/threads_per_code)
|
||||||
|
return total_threads, total_cores, model_name
|
||||||
|
|
||||||
|
def get_ram_size():
|
||||||
|
try:
|
||||||
|
with open('/proc/meminfo', 'r') as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
for line in lines:
|
||||||
|
if line.startswith('MemTotal'):
|
||||||
|
total_memory_kb = int(line.split()[1])
|
||||||
|
total_memory_gb = total_memory_kb / (1024) / 1000
|
||||||
|
return round(total_memory_gb, 4)
|
||||||
|
except Exception as e:
|
||||||
|
return 0
|
||||||
|
|
||||||
|
def get_gpu_info_from_xml():
|
||||||
|
result = subprocess.run(['nvidia-smi', '-x', '-q'], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
|
||||||
|
if result.returncode != 0:
|
||||||
|
print(f"Error running nvidia-smi: {result.stderr}")
|
||||||
|
os._exit(1)
|
||||||
|
|
||||||
|
xml_output = result.stdout
|
||||||
|
gpu_names = []
|
||||||
|
gpu_vram_sizes = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
root = ET.fromstring(xml_output)
|
||||||
|
for gpu in root.findall('.//gpu'):
|
||||||
|
product_name = gpu.find('product_name')
|
||||||
|
fb_memory_usage = gpu.find('.//fb_memory_usage')
|
||||||
|
total_memory = fb_memory_usage.find('total') if fb_memory_usage is not None else None
|
||||||
|
|
||||||
|
if product_name is not None and total_memory is not None:
|
||||||
|
memory_size_mb = total_memory.text.split()[0]
|
||||||
|
memory_size_gb = int(round(float(memory_size_mb) / 1024, 0))
|
||||||
|
gpu_names.append(product_name.text)
|
||||||
|
gpu_vram_sizes.append(memory_size_gb)
|
||||||
|
|
||||||
|
except ET.ParseError as e:
|
||||||
|
print(f"Error parsing XML: {e}")
|
||||||
|
|
||||||
|
return gpu_names, gpu_vram_sizes
|
||||||
|
|
||||||
|
def get_country():
|
||||||
|
# Define the directory and file path
|
||||||
|
directory_path = "/opt/clore-hosting"
|
||||||
|
file_path = os.path.join(directory_path, "CC")
|
||||||
|
|
||||||
|
# Check if the file exists; if so, read from the file
|
||||||
|
if os.path.exists(file_path):
|
||||||
|
try:
|
||||||
|
with open(file_path, "r") as file:
|
||||||
|
country = file.read().strip()
|
||||||
|
if country: # Ensure the file is not empty
|
||||||
|
return country
|
||||||
|
except IOError as e:
|
||||||
|
print(f"Error reading the country from file: {e}")
|
||||||
|
|
||||||
|
apis = [
|
||||||
|
"https://ipinfo.io/country"
|
||||||
|
]
|
||||||
|
|
||||||
|
headers = {
|
||||||
|
'User-Agent': 'CloreAutoOnboard/1.0'
|
||||||
|
}
|
||||||
|
|
||||||
|
for api in apis:
|
||||||
|
try:
|
||||||
|
response = requests.get(api, headers=headers, timeout=5)
|
||||||
|
response.raise_for_status() # Raise an exception for HTTP errors
|
||||||
|
country = response.text.strip() # Remove any extra whitespace/newlines
|
||||||
|
if country: # Check if the response is not empty
|
||||||
|
save_data_to_file(country, directory_path, file_path)
|
||||||
|
return country
|
||||||
|
except (requests.RequestException, ValueError) as e:
|
||||||
|
print(f"Error with {api}: {e}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
return "Unable to determine country"
|
||||||
|
|
||||||
|
def save_data_to_file(data, directory_path, file_path):
|
||||||
|
if not os.path.exists(directory_path):
|
||||||
|
os.makedirs(directory_path)
|
||||||
|
try:
|
||||||
|
with open(file_path, "w") as file:
|
||||||
|
file.write(data)
|
||||||
|
except IOError as e:
|
||||||
|
print(f"Error saving the data to file: {e}")
|
||||||
|
|
||||||
|
def get(benchmark_disk=False, mock=False):
|
||||||
|
if mock:
|
||||||
|
return {'mb': 'X99-6PLUS', 'cpu': 'Intel(R) Xeon(R) CPU E5-2698 v3 @ 2.30GHz', 'cpus': '32/64', 'ram': 128.8009, 'gpu': '1x NVIDIA GeForce RTX 4090', 'gpuram': 24, 'disk': 'MSATA SSD 1TB 878.78GB', 'disk_speed': 0, 'net': {'up': 0, 'down': 0, 'cc': 'UA'}}
|
||||||
|
|
||||||
|
threads, cores, model = get_cpu_info()
|
||||||
|
gpu_names, gpu_vram_sizes = get_gpu_info_from_xml()
|
||||||
|
disk_speeds = None
|
||||||
|
if len(gpu_names)==0:
|
||||||
|
print("No nvidia GPUs found")
|
||||||
|
os._exit(1)
|
||||||
|
|
||||||
|
if benchmark_disk:
|
||||||
|
benchmark_file = "/opt/clore-hosting/disk_benchmark.json"
|
||||||
|
if os.path.exists(benchmark_file):
|
||||||
|
try:
|
||||||
|
with open(benchmark_file, "r") as file:
|
||||||
|
benchmark_result = file.read().strip()
|
||||||
|
benchmark_result = json.loads(benchmark_result)
|
||||||
|
disk_speeds = benchmark_result
|
||||||
|
except Exception as load_fail:
|
||||||
|
pass
|
||||||
|
if not disk_speeds:
|
||||||
|
disk_speeds = disk_benchmark()
|
||||||
|
save_data_to_file(json.dumps(disk_speeds), "/opt/clore-hosting", "/opt/clore-hosting/disk_benchmark.json")
|
||||||
|
else:
|
||||||
|
disk_speeds = [0,0]
|
||||||
|
|
||||||
|
specs = {
|
||||||
|
"mb": get_motherboard_name(),
|
||||||
|
"cpu": model,
|
||||||
|
"cpus": f"{cores}/{threads}",
|
||||||
|
"ram": get_ram_size(),
|
||||||
|
"gpu": f"{len(gpu_names)}x "+(gpu_names[0] if all_items_same(gpu_names) else "Mixed GPUs"),
|
||||||
|
"gpuram": min(gpu_vram_sizes),
|
||||||
|
"disk": get_root_disk_model()[:32]+f' {get_free_space_gb()}GB',
|
||||||
|
"disk_speed": disk_speeds[1],
|
||||||
|
"net":{
|
||||||
|
"up": 0,
|
||||||
|
"down": 0,
|
||||||
|
"cc": get_country()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return specs
|
Loading…
Reference in New Issue