spawner/tests/test_slurm.py

232 lines
6.4 KiB
Python

"""Test SLURM module."""
from pathlib import Path
from glicid_spawner.slurm import (
SlurmCluster,
SlurmCpu,
SlurmGpu,
SlurmNode,
SlurmPartition,
sinfo,
sinfo_filter,
sinfo_from_file,
sinfo_reader,
sinfo_run,
subprocess,
)
DATA = Path(__file__).parent / 'data'
SINFO_FILE = DATA / 'sinfo.txt'
SINFO_CONTENT = SINFO_FILE.read_text()
def test_slurm_dataclasses():
"""Test SLURM dataclasses formatter."""
# CPU
cpu = SlurmCpu(1, '2', 4.0)
assert cpu.allocated == 1
assert cpu.idle == 2
assert cpu.total == 4
assert isinstance(cpu.allocated, int)
assert isinstance(cpu.idle, int)
assert isinstance(cpu.total, int)
# GPU
gpu = SlurmGpu('fOo', '1')
assert gpu # __bool__
assert str(gpu) == 'Foo' # = name
assert gpu.name == 'Foo'
assert gpu.nb == 1
assert isinstance(gpu.name, str)
assert isinstance(gpu.nb, int)
# Default values
gpu = SlurmGpu()
assert not gpu # __bool__
assert str(gpu) == 'None' # = name
assert gpu.name == 'None'
assert gpu.nb == 0
# Node
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
assert str(node) == 'cnode001' # hostname
assert node.cluster == 'nautilus'
assert node.partition == 'standard'
assert node.hostname == 'cnode001'
assert node.state == 'completing'
assert node.cpu.allocated == 0
assert node.cpu.idle == 96
assert node.cpu.total == 96
assert node.mem == 384
assert node.gpu.name == 'None'
# Partition
partition = SlurmPartition('standard', [node])
assert str(partition) == 'standard' # = name
assert partition.name == 'standard'
for _node in partition:
assert str(_node) == 'cnode001'
assert partition.gpus == 'None'
assert partition.max_idle_cpu == 96
assert partition.max_mem == 384
# Cluster
cluster = SlurmCluster('nautilus', [partition])
assert str(cluster) == 'nautilus' # = name
assert cluster.name == 'nautilus'
assert cluster == 'nautilus' # __eq__
for _partition in cluster:
assert str(_partition) == 'standard'
def test_slurm_sinfo_run(monkeypatch):
"""Test SLURM SINFO run command."""
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
assert sinfo_run() == (
'sinfo '
'--federation '
'--noheader '
'--responding '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
assert sinfo_run(username='john-doe') == (
'su - john-doe -c "'
'sinfo '
'--federation '
'--noheader '
'--responding '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
'"'
)
def test_slurm_sinfo_reader():
"""Test SLURM SINFO reader."""
nodes = sinfo_reader(SINFO_CONTENT)
for node in nodes:
assert isinstance(node, SlurmNode)
node = nodes[0]
assert node.cluster == 'N/A'
assert node.partition == 'Devel'
assert node.hostname == 'nazare001'
assert node.state == 'idle'
assert node.cpu.allocated == 0
assert node.cpu.idle == node.cpu.total == 20
assert node.mem == 128
assert not node.gpu
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
for node in nodes:
if node.state == 'idle':
assert node.cpu.allocated == 0
assert node.cpu.idle > 0
elif node.state == 'mixed':
assert node.cpu.allocated > 0
assert node.cpu.idle > 0
elif node.state == 'allocated':
assert node.cpu.allocated > 0
assert node.cpu.idle == 0
assert sum(node.mem for node in nodes) == 7_792
assert [node.gpu.name for node in nodes if node.gpu] == [
'T4',
'A40',
'P100',
'K80',
'P100',
] + 8 * ['A100']
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
def test_slurm_sinfo_filter(monkeypatch):
"""Test SLURM SINFO filtered resources."""
resources = sinfo_reader(SINFO_CONTENT)
clusters = sinfo_filter(resources)
assert isinstance(clusters, dict)
assert len(clusters) == 3
assert list(clusters) == ['N/A', 'nautilus', 'waves']
assert [len(partitions) for partitions in clusters.values()] == [2, 3, 2]
partitions = clusters['nautilus']
assert isinstance(partitions, dict)
assert len(partitions) == 3
assert list(partitions) == ['gpu', 'visu', 'all']
gpu_nodes = partitions['gpu']
assert len(gpu_nodes) == 2
assert [node.hostname for node in gpu_nodes] == ['gnode1', 'gnode2']
assert [node.cpu.allocated for node in gpu_nodes] == [4, 0]
assert [node.cpu.idle for node in gpu_nodes] == [92, 96]
assert [node.mem for node in gpu_nodes] == [768, 256]
assert [node.gpu.name for node in gpu_nodes] == ['A100', 'A100']
assert [node.gpu.nb for node in gpu_nodes] == [1, 2]
# Get only `idle` nodes
clusters = sinfo_filter(resources, with_states=('idle'))
assert list(clusters) == ['N/A', 'nautilus']
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
# Discard clusters without partition available
clusters = sinfo_filter(resources, with_states=('completing'))
assert list(clusters) == ['nautilus']
assert [len(partitions) for partitions in clusters.values()] == [2]
def test_slurm_sinfo_from_file(monkeypatch):
"""Test SLURM SINFO resources from file."""
resources = sinfo_from_file(SINFO_FILE, with_states=('idle'))
assert [
node.hostname
for cluster, partitions in resources.items()
for nodes in partitions.values()
for node in nodes
] == ['nazare001', 'gnode2', 'visu1', 'gnode2', 'visu1']
def test_slurm_sinfo_resources(monkeypatch):
"""Test SLURM SINFO resources."""
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO_CONTENT.encode())
clusters = sinfo(username='john-doe', with_states=('completing'))
assert list(clusters) == ['nautilus']
partitions = clusters['nautilus']
assert list(partitions) == ['standard', 'all']
std_nodes = partitions['standard']
assert std_nodes == [
SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
]