spawner/tests/test_slurm.py

256 lines
7 KiB
Python
Raw Normal View History

2024-02-08 16:20:59 +01:00
"""Test SLURM module."""
from pathlib import Path
from glicid_spawner.slurm import (
SlurmCluster,
2024-02-08 16:20:59 +01:00
SlurmCpu,
SlurmGpu,
SlurmNode,
SlurmPartition,
gres,
2024-02-08 16:20:59 +01:00
sinfo,
sinfo_filter,
2024-02-14 17:09:12 +01:00
sinfo_from_file,
sinfo_reader,
sinfo_run,
2024-02-08 16:20:59 +01:00
subprocess,
)
DATA = Path(__file__).parent / 'data'
2024-02-14 17:09:12 +01:00
SINFO_FILE = DATA / 'sinfo.txt'
SINFO_CONTENT = SINFO_FILE.read_text()
2024-02-08 16:20:59 +01:00
def test_slurm_dataclasses():
"""Test SLURM dataclasses formatter."""
# CPU
2024-02-08 16:20:59 +01:00
cpu = SlurmCpu(1, '2', 4.0)
assert cpu.allocated == 1
assert cpu.idle == 2
assert cpu.total == 4
assert isinstance(cpu.allocated, int)
assert isinstance(cpu.idle, int)
assert isinstance(cpu.total, int)
# GPU
2024-02-08 16:20:59 +01:00
gpu = SlurmGpu('fOo', '1')
assert gpu # __bool__
2024-02-19 17:53:35 +01:00
assert gpu == str(gpu) == gpu.name == 'Foo'
2024-02-08 16:20:59 +01:00
assert gpu.nb == 1
assert isinstance(gpu.name, str)
assert isinstance(gpu.nb, int)
# Default values
gpu = SlurmGpu()
assert not gpu # __bool__
2024-02-19 17:53:35 +01:00
assert str(gpu) == gpu.name == 'None'
2024-02-08 16:20:59 +01:00
assert gpu.nb == 0
# Node
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
assert node.cluster == 'nautilus'
assert node.partition == 'standard'
2024-02-19 17:53:35 +01:00
assert node == str(node) == node.hostname == 'cnode001'
assert node.state == 'completing'
assert node.cpu.allocated == 0
assert node.cpu.idle == 96
assert node.cpu.total == 96
assert node.mem == 384
assert node.gpu.name == 'None'
# Partition
partition = SlurmPartition('standard', [node])
2024-02-19 17:53:35 +01:00
assert partition == str(partition) == partition.name == 'standard'
assert len(partition) == 1
for _node in partition:
2024-02-19 17:53:35 +01:00
assert _node == 'cnode001'
_node = partition['cnode001'] # __getitem__
assert isinstance(_node, SlurmNode)
assert _node == 'cnode001'
assert partition.gpus == 'None'
assert partition.max_idle_cpu == 96
assert partition.max_mem == 384
# Cluster
cluster = SlurmCluster('nautilus', [partition])
2024-02-19 17:53:35 +01:00
assert cluster == str(cluster) == cluster.name == 'nautilus'
assert len(cluster) == 1
for _partition in cluster:
2024-02-19 17:53:35 +01:00
assert _partition == 'standard'
_partition = cluster['standard'] # __getitem__
assert isinstance(_partition, SlurmPartition)
assert _partition == 'standard'
2024-02-08 16:20:59 +01:00
def test_slurm_sinfo_run(monkeypatch):
"""Test SLURM SINFO run command."""
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
assert sinfo_run() == (
2024-02-08 16:20:59 +01:00
'sinfo '
'--federation '
'--noheader '
'--responding '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
assert sinfo_run(username='john-doe') == (
2024-02-14 13:18:21 +01:00
'su - john-doe -c "'
'sinfo '
'--federation '
'--noheader '
'--responding '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
'"'
)
2024-02-08 16:20:59 +01:00
def test_slurm_sinfo_reader():
"""Test SLURM SINFO reader."""
2024-02-14 17:09:12 +01:00
nodes = sinfo_reader(SINFO_CONTENT)
2024-02-08 16:20:59 +01:00
for node in nodes:
assert isinstance(node, SlurmNode)
node = nodes[0]
2024-02-14 18:46:53 +01:00
assert node.cluster == 'N/A'
2024-02-08 16:20:59 +01:00
assert node.partition == 'Devel'
assert node.hostname == 'nazare001'
assert node.state == 'idle'
assert node.cpu.allocated == 0
assert node.cpu.idle == node.cpu.total == 20
assert node.mem == 128
assert not node.gpu
2024-02-19 17:26:39 +01:00
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
2024-02-08 16:20:59 +01:00
2024-02-19 17:26:39 +01:00
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
2024-02-08 16:20:59 +01:00
for node in nodes:
if node.state == 'idle':
assert node.cpu.allocated == 0
assert node.cpu.idle > 0
elif node.state == 'mixed':
assert node.cpu.allocated > 0
assert node.cpu.idle > 0
elif node.state == 'allocated':
assert node.cpu.allocated > 0
assert node.cpu.idle == 0
2024-02-19 17:26:39 +01:00
assert sum(node.mem for node in nodes) == 7_792
2024-02-08 16:20:59 +01:00
assert [node.gpu.name for node in nodes if node.gpu] == [
'T4',
'A40',
'P100',
'K80',
'P100',
2024-02-19 17:26:39 +01:00
] + 8 * ['A100']
2024-02-08 16:20:59 +01:00
2024-02-19 17:26:39 +01:00
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
2024-02-08 16:20:59 +01:00
def test_slurm_sinfo_filter(monkeypatch):
"""Test SLURM SINFO filtered resources."""
2024-02-14 17:09:12 +01:00
resources = sinfo_reader(SINFO_CONTENT)
2024-02-08 16:20:59 +01:00
clusters = sinfo_filter(resources)
2024-02-08 16:20:59 +01:00
assert isinstance(clusters, dict)
assert len(clusters) == 3
assert list(clusters) == ['N/A', 'nautilus', 'waves'] # __eq__ on cluster.name
2024-02-08 16:20:59 +01:00
2024-02-19 17:26:39 +01:00
assert [len(partitions) for partitions in clusters.values()] == [2, 3, 2]
2024-02-08 16:20:59 +01:00
nautilus = clusters['nautilus']
2024-02-08 16:20:59 +01:00
assert isinstance(nautilus, SlurmCluster)
assert len(nautilus) == 3
assert nautilus.partitions == ['gpu', 'visu', 'all'] # __eq__ on partition.name
2024-02-08 16:20:59 +01:00
gpu = nautilus['gpu']
2024-02-08 16:20:59 +01:00
assert len(gpu) == 2
assert [node.hostname for node in gpu] == ['gnode1', 'gnode2']
assert [node.cpu.allocated for node in gpu] == [4, 0]
assert [node.cpu.idle for node in gpu] == [92, 96]
assert [node.mem for node in gpu] == [768, 256]
assert [node.gpu.name for node in gpu] == ['A100', 'A100']
assert [node.gpu.nb for node in gpu] == [1, 2]
2024-02-08 16:20:59 +01:00
# Get only `idle` nodes
clusters = sinfo_filter(resources, with_states=('idle'))
2024-02-19 17:26:39 +01:00
assert list(clusters) == ['N/A', 'nautilus']
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
# Discard clusters without partition available
clusters = sinfo_filter(resources, with_states=('completing'))
assert list(clusters) == ['nautilus']
2024-02-19 17:26:39 +01:00
assert [len(partitions) for partitions in clusters.values()] == [2]
2024-02-14 17:09:12 +01:00
def test_slurm_sinfo_from_file(monkeypatch):
"""Test SLURM SINFO resources from file."""
resources = sinfo_from_file(SINFO_FILE, with_states=('idle'))
assert [
node.hostname
for cluster in resources.values()
for partition in cluster
for node in partition
2024-02-19 17:26:39 +01:00
] == ['nazare001', 'gnode2', 'visu1', 'gnode2', 'visu1']
2024-02-14 17:09:12 +01:00
def test_slurm_sinfo_resources(monkeypatch):
"""Test SLURM SINFO resources."""
2024-02-14 17:09:12 +01:00
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO_CONTENT.encode())
clusters = sinfo(username='john-doe', with_states=('completing'))
assert 'nautilus' in clusters
assert list(clusters) == ['nautilus']
nautilus = clusters['nautilus']
assert isinstance(nautilus, SlurmCluster)
assert 'standard' in nautilus
assert list(nautilus) == ['standard', 'all']
standard = nautilus['standard']
assert isinstance(standard, SlurmPartition)
assert 'cnode001' in standard
assert list(standard) == ['cnode001']
cnode = standard['cnode001']
assert isinstance(cnode, SlurmNode)
assert cnode == 'cnode001'
def test_slurm_gres():
"""Test SLURM GPU resources extraction."""
resources = sinfo_from_file(SINFO_FILE, with_states=('idle', 'idle~', 'mixed', 'allocated'))
gpus = gres(resources)
# Sorted and without duplicates
assert gpus == ['A100', 'A40', 'None', 'P100', 'T4']