Add SLURM SINFO parser

This commit is contained in:
Benoît Seignovert 2024-02-08 16:20:59 +01:00
parent e78d20787a
commit f00b406962
Signed by: Benoît Seignovert
GPG key ID: F5D8895227D18A0B
4 changed files with 240 additions and 0 deletions

17
tests/data/sinfo.txt Normal file
View file

@ -0,0 +1,17 @@
N/A Devel nazare001 idle 0/20/0/20 128000 (null)
N/A GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000
N/A A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20
N/A AMD-short cloudbreak001 drained 0/0/32/32 128000 (null)
N/A lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2
N/A lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4
N/A lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1
nautilus standard cnode001 completing 0/96/0/96 384000 (null)
nautilus bigmem cnode002 planned 0/96/0/96 768000 (null)
nautilus gpu gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1)
nautilus gpu gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1)
nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1)
nautilus all visu1 idle 0/96/0/96 768000 (null)
waves standard cribbar001 idle 0/40/0/40 128000 (null)
waves gpu budbud006 allocated 64/0/0/64 256000 gpu:a100:2,mps:a100:
waves all cribbar001 mixed 20/20/0/40 128000 (null)
waves devel vmworker001 inval 0/0/8/8 16000 (null)

135
tests/test_slurm.py Normal file
View file

@ -0,0 +1,135 @@
"""Test SLURM module."""
from pathlib import Path
from glicid_spawner.slurm import (
SlurmCpu,
SlurmGpu,
SlurmNode,
_sinfo_reader,
_sinfo_run,
sinfo,
subprocess,
)
DATA = Path(__file__).parent / 'data'
def test_slurm_dataclasses():
"""Test SLURM dataclasses formatter."""
cpu = SlurmCpu(1, '2', 4.0)
assert cpu.allocated == 1
assert cpu.idle == 2
assert cpu.total == 4
assert isinstance(cpu.allocated, int)
assert isinstance(cpu.idle, int)
assert isinstance(cpu.total, int)
gpu = SlurmGpu('fOo', '1')
assert gpu # __bool__
assert gpu.name == 'Foo'
assert gpu.nb == 1
assert isinstance(gpu.name, str)
assert isinstance(gpu.nb, int)
# Default values
gpu = SlurmGpu()
assert not gpu # __bool__
assert gpu.name == 'None'
assert gpu.nb == 0
def test_slurm_sinfo_run(monkeypatch):
"""Test SLURM SINFO run command."""
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
assert _sinfo_run() == (
'sinfo '
'--federation '
'--noheader '
'--responding '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
def test_slurm_sinfo_reader():
"""Test SLURM SINFO reader."""
nodes = _sinfo_reader((DATA / 'sinfo.txt').read_text())
for node in nodes:
assert isinstance(node, SlurmNode)
node = nodes[0]
assert node.cluster is None
assert node.partition == 'Devel'
assert node.hostname == 'nazare001'
assert node.state == 'idle'
assert node.cpu.allocated == 0
assert node.cpu.idle == node.cpu.total == 20
assert node.mem == 128
assert not node.gpu
assert [node.cluster for node in nodes] == 7 * [None] + 6 * ['nautilus'] + 4 * ['waves']
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 7
for node in nodes:
if node.state == 'idle':
assert node.cpu.allocated == 0
assert node.cpu.idle > 0
elif node.state == 'mixed':
assert node.cpu.allocated > 0
assert node.cpu.idle > 0
elif node.state == 'allocated':
assert node.cpu.allocated > 0
assert node.cpu.idle == 0
assert sum(node.mem for node in nodes) == 4_672
assert [node.gpu.name for node in nodes if node.gpu] == [
'T4',
'A40',
'P100',
'K80',
'P100',
] + 4 * ['A100']
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 2]
def test_slurm_sinfo_resources(monkeypatch):
"""Test SLURM SINFO resources."""
monkeypatch.setattr(subprocess, 'check_output', lambda _: (DATA / 'sinfo.txt').read_bytes())
clusters = sinfo()
assert isinstance(clusters, dict)
assert len(clusters) == 3
assert list(clusters) == [None, 'nautilus', 'waves']
assert [len(partitions) for partitions in clusters.values()] == [2, 2, 2]
nautilus = clusters['nautilus']
assert isinstance(nautilus, dict)
assert len(nautilus) == 2
assert list(nautilus) == ['gpu', 'all']
gpus = nautilus['gpu']
assert len(gpus) == 2
assert [partition.hostname for partition in gpus] == ['gnode1', 'gnode2']
assert [partition.cpu.allocated for partition in gpus] == [4, 0]
assert [partition.cpu.idle for partition in gpus] == [92, 96]
assert [partition.mem for partition in gpus] == [768, 256]
assert [partition.gpu.name for partition in gpus] == ['A100', 'A100']
assert [partition.gpu.nb for partition in gpus] == [1, 2]
# Get only `idle` nodes
assert [len(partitions) for partitions in sinfo(with_states=('idle')).values()] == [1, 2, 1]