diff --git a/lib/nvml.py b/lib/nvml.py index 94fb47e..f35bda4 100644 --- a/lib/nvml.py +++ b/lib/nvml.py @@ -23,7 +23,7 @@ GPU_MEM_ALLOWED_OC_RANGES = { # Known to be problematic GPUs "NVIDIA GeForce GTX 1070": [-2000, 2000], "NVIDIA GeForce GTX 1070 Ti": [-2000, 2000], "NVIDIA GeForce GTX 1080": [-2000, 2000], - "NVIDIA GeForce GTX 1080 Ti":[-2000, 2000], + "NVIDIA GeForce GTX 1080 Ti": [-2000, 2000], "NVIDIA CMP 30HX": [-2000, 6000], "NVIDIA CMP 40HX": [-2000, 6000], "NVIDIA CMP 50HX": [-2000, 6000], @@ -39,6 +39,33 @@ GPU_MEM_ALLOWED_OC_RANGES = { # Known to be problematic GPUs "NVIDIA GeForce RTX 2080 Ti": [-2000, 6000] } +GPU_CORE_ALLOWED_OC_RANGES = { # Known to be problematic GPUs + "NVIDIA P102-100": [-200, 1200], + "NVIDIA P104-100": [-200, 1200], + "NVIDIA P106-090": [-200, 1200], + "NVIDIA P106-100": [-200, 1200], + "NVIDIA GeForce GTX 1050 Ti": [-200, 1200], + "NVIDIA GeForce GTX 1060 3GB": [-200, 1200], + "NVIDIA GeForce GTX 1060 6GB": [-200, 1200], + "NVIDIA GeForce GTX 1070": [-200, 1200], + "NVIDIA GeForce GTX 1070 Ti": [-200, 1200], + "NVIDIA GeForce GTX 1080": [-200, 1200], + "NVIDIA GeForce GTX 1080 Ti": [-200, 1200], + "NVIDIA CMP 30HX": [-1000, 1000], + "NVIDIA CMP 40HX": [-1000, 1000], + "NVIDIA CMP 50HX": [-1000, 1000], + "NVIDIA CMP 90HX": [-1000, 1000], + "NVIDIA GeForce GTX 1650": [-1000, 1000], + "NVIDIA GeForce GTX 1660 SUPER": [-1000, 1000], + "NVIDIA GeForce GTX 1660 Ti": [-1000, 1000], + "NVIDIA GeForce RTX 2060": [-1000, 1000], + "NVIDIA GeForce RTX 2060 SUPER": [-1000, 1000], + "NVIDIA GeForce RTX 2070": [-1000, 1000], + "NVIDIA GeForce RTX 2070 SUPER": [-1000, 1000], + "NVIDIA GeForce RTX 2080": [-1000, 1000], + "NVIDIA GeForce RTX 2080 Ti": [-1000, 1000] +} + is_hive = False all_gpus_data_list=[] get_data_fail=False @@ -134,7 +161,17 @@ def init(gpu_specs_file=None): else: mem_range=[0,0] except Exception as e2: - get_data_fail=True + if "function not found" in str(e2).lower(): + if gpu_spec["name"] in GPU_MEM_ALLOWED_OC_RANGES: + mem_range = GPU_MEM_ALLOWED_OC_RANGES[gpu_spec["name"]] + else: + mem_range = [0,0] + if gpu_spec["name"] in GPU_CORE_ALLOWED_OC_RANGES: + core_range = GPU_CORE_ALLOWED_OC_RANGES[gpu_spec["name"]] + else: + core_range = [0,0] + else: + get_data_fail=True if type(mem_range) == list and type(core_range) == list and len(mem_range)==2 and len(core_range)==2: gpu_spec["mem"]=mem_range gpu_spec["core"]=core_range