From 52b96548c247c0fef253c30ae49c386ad266b447 Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Wed, 14 Feb 2024 16:31:52 +0100 Subject: [PATCH] Update slurm sinfo filter to discard empty cluster --- src/glicid_spawner/slurm.py | 19 ++++++---- tests/test_slurm.py | 74 ++++++++++++++++++++++++++----------- 2 files changed, 64 insertions(+), 29 deletions(-) diff --git a/src/glicid_spawner/slurm.py b/src/glicid_spawner/slurm.py index dec3cb9..000a0d4 100644 --- a/src/glicid_spawner/slurm.py +++ b/src/glicid_spawner/slurm.py @@ -59,7 +59,7 @@ class SlurmNode: self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else []) -def _sinfo_run(username: str = None) -> str: +def sinfo_run(username: str = None) -> str: """SLURM SINFO run command.""" flags = '--federation --noheader --responding' fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres' @@ -71,20 +71,18 @@ def _sinfo_run(username: str = None) -> str: return subprocess.check_output(shlex.split(cmd, posix=False)).decode('utf-8') -def _sinfo_reader(result) -> list: +def sinfo_reader(result: str) -> list: """SLURM SINFO reader.""" return [SlurmNode(*re.findall('.{20}', node)) for node in result.splitlines()] -def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict: - """SLURM SINFO resources available with a given state(s). +def sinfo_filter(resources: list, with_states=('idle', 'mixed')) -> dict: + """SLURM SINFO filtered resources available with a given state(s). Grouped by cluster and partition names. """ - resources = _sinfo_reader(_sinfo_run(username=username)) - - return { + resources = { cluster: { partition: available for partition, nodes in groupby(partitions, key=attrgetter('partition')) @@ -92,3 +90,10 @@ def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict: } for cluster, partitions in groupby(resources, key=attrgetter('cluster')) } + + return {key: values for key, values in resources.items() if values} + + +def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict: + """SLURM SINFO resources available for a given user.""" + return sinfo_filter(sinfo_reader(sinfo_run(username=username)), with_states=with_states) diff --git a/tests/test_slurm.py b/tests/test_slurm.py index 2039602..2c59832 100644 --- a/tests/test_slurm.py +++ b/tests/test_slurm.py @@ -6,13 +6,15 @@ from glicid_spawner.slurm import ( SlurmCpu, SlurmGpu, SlurmNode, - _sinfo_reader, - _sinfo_run, sinfo, + sinfo_filter, + sinfo_reader, + sinfo_run, subprocess, ) DATA = Path(__file__).parent / 'data' +SINFO = (DATA / 'sinfo.txt').read_text() def test_slurm_dataclasses(): @@ -48,7 +50,7 @@ def test_slurm_sinfo_run(monkeypatch): """Test SLURM SINFO run command.""" monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode()) - assert _sinfo_run() == ( + assert sinfo_run() == ( 'sinfo ' '--federation ' '--noheader ' @@ -56,7 +58,7 @@ def test_slurm_sinfo_run(monkeypatch): '--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres' ) - assert _sinfo_run(username='john-doe') == ( + assert sinfo_run(username='john-doe') == ( 'su - john-doe -c "' 'sinfo ' '--federation ' @@ -69,7 +71,7 @@ def test_slurm_sinfo_run(monkeypatch): def test_slurm_sinfo_reader(): """Test SLURM SINFO reader.""" - nodes = _sinfo_reader((DATA / 'sinfo.txt').read_text()) + nodes = sinfo_reader(SINFO) for node in nodes: assert isinstance(node, SlurmNode) @@ -113,11 +115,11 @@ def test_slurm_sinfo_reader(): assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 2] -def test_slurm_sinfo_resources(monkeypatch): - """Test SLURM SINFO resources.""" - monkeypatch.setattr(subprocess, 'check_output', lambda _: (DATA / 'sinfo.txt').read_bytes()) +def test_slurm_sinfo_filter(monkeypatch): + """Test SLURM SINFO filtered resources.""" + resources = sinfo_reader(SINFO) - clusters = sinfo() + clusters = sinfo_filter(resources) assert isinstance(clusters, dict) assert len(clusters) == 3 @@ -125,21 +127,49 @@ def test_slurm_sinfo_resources(monkeypatch): assert [len(partitions) for partitions in clusters.values()] == [2, 2, 2] - nautilus = clusters['nautilus'] + partitions = clusters['nautilus'] - assert isinstance(nautilus, dict) - assert len(nautilus) == 2 - assert list(nautilus) == ['gpu', 'all'] + assert isinstance(partitions, dict) + assert len(partitions) == 2 + assert list(partitions) == ['gpu', 'all'] - gpus = nautilus['gpu'] + gpu_nodes = partitions['gpu'] - assert len(gpus) == 2 - assert [partition.hostname for partition in gpus] == ['gnode1', 'gnode2'] - assert [partition.cpu.allocated for partition in gpus] == [4, 0] - assert [partition.cpu.idle for partition in gpus] == [92, 96] - assert [partition.mem for partition in gpus] == [768, 256] - assert [partition.gpu.name for partition in gpus] == ['A100', 'A100'] - assert [partition.gpu.nb for partition in gpus] == [1, 2] + assert len(gpu_nodes) == 2 + assert [node.hostname for node in gpu_nodes] == ['gnode1', 'gnode2'] + assert [node.cpu.allocated for node in gpu_nodes] == [4, 0] + assert [node.cpu.idle for node in gpu_nodes] == [92, 96] + assert [node.mem for node in gpu_nodes] == [768, 256] + assert [node.gpu.name for node in gpu_nodes] == ['A100', 'A100'] + assert [node.gpu.nb for node in gpu_nodes] == [1, 2] # Get only `idle` nodes - assert [len(partitions) for partitions in sinfo(with_states=('idle')).values()] == [1, 2, 1] + clusters = sinfo_filter(resources, with_states=('idle')) + + assert list(clusters) == [None, 'nautilus', 'waves'] + assert [len(partitions) for partitions in clusters.values()] == [1, 2, 1] + + # Discard clusters without partition available + clusters = sinfo_filter(resources, with_states=('completing')) + + assert list(clusters) == ['nautilus'] + assert [len(partitions) for partitions in clusters.values()] == [1] + + +def test_slurm_sinfo_resources(monkeypatch): + """Test SLURM SINFO resources.""" + monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO.encode()) + + clusters = sinfo(username='john-doe', with_states=('completing')) + + assert list(clusters) == ['nautilus'] + + partitions = clusters['nautilus'] + + assert list(partitions) == ['standard'] + + std_nodes = partitions['standard'] + + assert std_nodes == [ + SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split()) + ]