Update slurm sinfo filter to discard empty cluster

This commit is contained in:
Benoît Seignovert 2024-02-14 16:31:52 +01:00
parent 867e217d44
commit 52b96548c2
Signed by: Benoît Seignovert
GPG key ID: F5D8895227D18A0B
2 changed files with 64 additions and 29 deletions

View file

@ -6,13 +6,15 @@ from glicid_spawner.slurm import (
SlurmCpu,
SlurmGpu,
SlurmNode,
_sinfo_reader,
_sinfo_run,
sinfo,
sinfo_filter,
sinfo_reader,
sinfo_run,
subprocess,
)
DATA = Path(__file__).parent / 'data'
SINFO = (DATA / 'sinfo.txt').read_text()
def test_slurm_dataclasses():
@ -48,7 +50,7 @@ def test_slurm_sinfo_run(monkeypatch):
"""Test SLURM SINFO run command."""
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
assert _sinfo_run() == (
assert sinfo_run() == (
'sinfo '
'--federation '
'--noheader '
@ -56,7 +58,7 @@ def test_slurm_sinfo_run(monkeypatch):
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
assert _sinfo_run(username='john-doe') == (
assert sinfo_run(username='john-doe') == (
'su - john-doe -c "'
'sinfo '
'--federation '
@ -69,7 +71,7 @@ def test_slurm_sinfo_run(monkeypatch):
def test_slurm_sinfo_reader():
"""Test SLURM SINFO reader."""
nodes = _sinfo_reader((DATA / 'sinfo.txt').read_text())
nodes = sinfo_reader(SINFO)
for node in nodes:
assert isinstance(node, SlurmNode)
@ -113,11 +115,11 @@ def test_slurm_sinfo_reader():
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 2]
def test_slurm_sinfo_resources(monkeypatch):
"""Test SLURM SINFO resources."""
monkeypatch.setattr(subprocess, 'check_output', lambda _: (DATA / 'sinfo.txt').read_bytes())
def test_slurm_sinfo_filter(monkeypatch):
"""Test SLURM SINFO filtered resources."""
resources = sinfo_reader(SINFO)
clusters = sinfo()
clusters = sinfo_filter(resources)
assert isinstance(clusters, dict)
assert len(clusters) == 3
@ -125,21 +127,49 @@ def test_slurm_sinfo_resources(monkeypatch):
assert [len(partitions) for partitions in clusters.values()] == [2, 2, 2]
nautilus = clusters['nautilus']
partitions = clusters['nautilus']
assert isinstance(nautilus, dict)
assert len(nautilus) == 2
assert list(nautilus) == ['gpu', 'all']
assert isinstance(partitions, dict)
assert len(partitions) == 2
assert list(partitions) == ['gpu', 'all']
gpus = nautilus['gpu']
gpu_nodes = partitions['gpu']
assert len(gpus) == 2
assert [partition.hostname for partition in gpus] == ['gnode1', 'gnode2']
assert [partition.cpu.allocated for partition in gpus] == [4, 0]
assert [partition.cpu.idle for partition in gpus] == [92, 96]
assert [partition.mem for partition in gpus] == [768, 256]
assert [partition.gpu.name for partition in gpus] == ['A100', 'A100']
assert [partition.gpu.nb for partition in gpus] == [1, 2]
assert len(gpu_nodes) == 2
assert [node.hostname for node in gpu_nodes] == ['gnode1', 'gnode2']
assert [node.cpu.allocated for node in gpu_nodes] == [4, 0]
assert [node.cpu.idle for node in gpu_nodes] == [92, 96]
assert [node.mem for node in gpu_nodes] == [768, 256]
assert [node.gpu.name for node in gpu_nodes] == ['A100', 'A100']
assert [node.gpu.nb for node in gpu_nodes] == [1, 2]
# Get only `idle` nodes
assert [len(partitions) for partitions in sinfo(with_states=('idle')).values()] == [1, 2, 1]
clusters = sinfo_filter(resources, with_states=('idle'))
assert list(clusters) == [None, 'nautilus', 'waves']
assert [len(partitions) for partitions in clusters.values()] == [1, 2, 1]
# Discard clusters without partition available
clusters = sinfo_filter(resources, with_states=('completing'))
assert list(clusters) == ['nautilus']
assert [len(partitions) for partitions in clusters.values()] == [1]
def test_slurm_sinfo_resources(monkeypatch):
"""Test SLURM SINFO resources."""
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO.encode())
clusters = sinfo(username='john-doe', with_states=('completing'))
assert list(clusters) == ['nautilus']
partitions = clusters['nautilus']
assert list(partitions) == ['standard']
std_nodes = partitions['standard']
assert std_nodes == [
SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
]