hosting/lib/xfs.py

269 lines
9.2 KiB
Python

# Library to setup XFS partition for docker
from lib import ensure_packages_installed
from lib import logging as logging_lib
from lib import docker_interface
from lib import networking
from lib import get_specs
from lib import utils
from datetime import datetime
import asyncio
import json
import os
log = logging_lib.log
DOCKER_ROOT = "/var/lib/docker"
DOCKER_DATA_IMG = "/opt/clore-hosting/data.img"
HP_LEAVE_FREE_SPACE_MB = 1024*24 # 24 GB
HP_MIN_XFS_PARTITION_SIZE = 1024*24 # 24 GB
GENERIC_LEAVE_FREE_SPACE_MB = 1024*8 # 8 GB
GENERIC_MIN_XFS_PARTITION_SIZE = 1024*10 # 10 GB
XFS_STATE_FILE = "/opt/clore-hosting/xfs_state"
HIGH_PERFORMANCE_GPUS = [
"NVIDIA GeForce RTX 4090",
"NVIDIA GeForce RTX 3090"
]
MANDATORY_PACKAGES = [
"xfsprogs",
"dmidecode",
"openvpn",
"iproute2",
"iputils-ping",
"util-linux"
]
# This code is runned on start of clore hosting to migrate docker to XFS partition system
# sudo fallocate -l 300G /docker-storage.img
# sudo mkfs.xfs /docker-storage.img
# mount -o loop,pquota /docker-storage.img /mnt/docker-storage
def get_to_use_storage_values(max_free_space):
gpu_str, gpu_mem, gpus, nvml_err = get_specs.get_gpu_info()
if nvml_err:
return None, None
try:
gpu_names = []
for gpu in gpus["nvidia"]:
gpu_names.append(gpu["name"])
if len(gpu_names) > 0:
all_gpus_same = all(item == gpu_names[0] for item in gpu_names)
if (all_gpus_same and gpu_names[0] in HIGH_PERFORMANCE_GPUS) or max_free_space > 1024 * 70:
return HP_LEAVE_FREE_SPACE_MB, HP_MIN_XFS_PARTITION_SIZE
else:
return GENERIC_LEAVE_FREE_SPACE_MB, GENERIC_MIN_XFS_PARTITION_SIZE
else:
return "no-gpus", "no-gpus"
except Exception as e:
return None, None
def migrate():
docker_xfs_state = validate_docker_xfs()
if docker_xfs_state == "skip":
migrate_log("skipping migration")
return
elif docker_xfs_state == "valid":
migrate_log("migration succeeded")
return 'success'
packages_available = asyncio.run(ensure_packages_installed.ensure_packages_installed(
MANDATORY_PACKAGES
))
if not packages_available:
migrate_log("packages missing")
return 'packages-missing'
root_device = get_specs.get_root_device()
if not root_device:
migrate_log("not supported boot device")
return "not-supported-boot-device"
device_name = os.path.basename(root_device).split('p')[0].rstrip('0123456789')
if get_specs.is_usb_device(device_name):
migrate_log("not supported boot device")
return "not-supported-boot-device"
log.info("Starting migration to xfs")
docker_interface.stop_all_containers()
if os.path.exists(DOCKER_DATA_IMG):
try:
os.remove(DOCKER_DATA_IMG)
except Exception as e:
migrate_log("Failed to remove DOCKER_DATA_IMG")
return "failure"
max_free_space = utils.get_free_space_mb('/') + utils.get_directory_size_mb(DOCKER_ROOT)
leave_free_space, min_xfs_size = get_to_use_storage_values(max_free_space)
if leave_free_space == "no-gpus":
return "nvidia-failure"
if leave_free_space == None:
migrate_log("can't get free space")
return "failure"
data_img_size = int(max_free_space - leave_free_space)
if data_img_size < min_xfs_size:
migrate_log("not enought free space")
return 'not-enough-space'
docker_config_success = False
fstab_config_success = False
code, stdout, stderr = utils.run_command(
f"systemctl stop docker && rm -rf {DOCKER_ROOT} && fallocate -l {str(data_img_size)}M {DOCKER_DATA_IMG} && mkfs.xfs {DOCKER_DATA_IMG}"
)
networking.purge_clore_interfaces()
if code == 0:
docker_config_success = configure_docker_daemon()
if code == 0 and docker_config_success:
fstab_config_success = configure_fstab()
if code == 0 and fstab_config_success:
code, stdout, stderr = utils.run_command(
f"mkdir {DOCKER_ROOT} && systemctl daemon-reload && mount -a"
)
if code==0:
return 'success'
else:
migrate_log("failed to migrate v1")
configure_docker_daemon(remove=True)
configure_fstab(remove=True)
return 'failure'
else:
utils.run_command(
f"mkdir {DOCKER_ROOT}"
)
if os.path.exists(DOCKER_DATA_IMG):
try:
os.remove(DOCKER_DATA_IMG)
except Exception as e:
pass
migrate_log("failed to migrate v2")
return 'failure'
def migrate_log(msg):
log_file_path = "/opt/clore-hosting/migrate-log.txt"
os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
log_message = f"{current_time} | {msg}\n"
with open(log_file_path, "a") as log_file:
log_file.write(log_message)
def validate_docker_xfs():
code_root, stdout_root, stderr_root = utils.run_command("df -T /")
code, stdout, stderr = utils.run_command(f"df -T {DOCKER_ROOT}")
#print([code, stderr, stdout])
if code_root == 0 and stderr_root == '' and ((code == 0 and stderr == '') or (code == 1 and f" {DOCKER_ROOT}: " in stderr and stdout == '')):
root_blocks = None
docker_root_blocks = None
docker_root_format = ''
for idx, line in enumerate(stdout_root.split('\n')):
if idx == 1 and len(line.split()) >= 7 and line.split()[2].isnumeric():
root_blocks = int(line.split()[2])
if code == 1:
docker_root_blocks = root_blocks
else:
for idx, line in enumerate(stdout.split('\n')):
if idx == 1 and len(line.split()) >= 7 and line.split()[2].isnumeric():
docker_root_blocks = int(line.split()[2])
docker_root_format = line.split()[1]
if root_blocks == None or docker_root_blocks == None:
return "skip"
elif docker_root_format=="xfs" and root_blocks > docker_root_blocks:
return "valid"
else:
return "default"
else:
return "skip"
def configure_docker_daemon(remove=False):
try:
daemon_json_path = "/etc/docker/daemon.json"
with open(daemon_json_path, 'r') as file:
raw_content = file.read()
daemon_config = json.loads(raw_content)
if remove:
if daemon_config.get("data-root") == DOCKER_ROOT:
del daemon_config["data-root"]
if daemon_config.get("storage-driver") == "overlay2":
del daemon_config["storage-driver"]
elif daemon_config.get("data-root") != DOCKER_ROOT or daemon_config.get("storage-driver") != "overlay2":
daemon_config["data-root"] = DOCKER_ROOT
daemon_config["storage-driver"] = "overlay2"
with open(daemon_json_path, 'w') as file:
file.write(json.dumps(daemon_config,indent=4))
return True
except Exception as e:
return False
def configure_fstab(remove=False):
try:
file_path = "/etc/fstab"
mount_line = f"{DOCKER_DATA_IMG} {DOCKER_ROOT} xfs loop,pquota 0 0"
with open(file_path, 'r') as file:
raw_content = file.read()
if remove:
if mount_line in raw_content:
raw_content = raw_content.replace(f"\n{mount_line}\n", '')
with open(file_path, 'w') as file:
file.write(raw_content)
elif not mount_line in raw_content:
raw_content += f"\n{mount_line}\n"
with open(file_path, 'w') as file:
file.write(raw_content)
return True
except Exception as e:
return False
def init():
try:
if os.path.exists(XFS_STATE_FILE):
with open(XFS_STATE_FILE, 'r') as file:
raw_content = file.read()
if "enabled" in raw_content:
migarion_status = migrate()
if migarion_status == "success":
with open(XFS_STATE_FILE, 'w') as file:
file.write("active")
return "active"
elif migarion_status == "not-enough-space":
with open(XFS_STATE_FILE, 'w') as file:
file.write("not-enough-space")
return 'not-enough-space'
elif migarion_status == "not-supported-boot-device":
with open(XFS_STATE_FILE, 'w') as file:
file.write("not-supported-boot-device")
return 'failed'
elif migarion_status == "nvidia-failure":
return 'failed'
else:
with open(XFS_STATE_FILE, 'w') as file:
file.write("failed-migration")
return 'failed'
elif 'not-enough-space' in raw_content:
return 'not-enough-space'
elif "active" in raw_content:
return "active"
else:
return "failed"
else:
return "disabled"
except Exception as e:
print(e)
return "failed"