2024-02-08 16:20:59 +01:00
|
|
|
"""Test SLURM module."""
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from glicid_spawner.slurm import (
|
2024-02-19 17:16:28 +01:00
|
|
|
SlurmCluster,
|
2024-02-08 16:20:59 +01:00
|
|
|
SlurmCpu,
|
|
|
|
SlurmGpu,
|
|
|
|
SlurmNode,
|
2024-02-19 17:16:28 +01:00
|
|
|
SlurmPartition,
|
2024-02-20 11:16:53 +01:00
|
|
|
gres,
|
2024-02-08 16:20:59 +01:00
|
|
|
sinfo,
|
2024-02-14 16:31:52 +01:00
|
|
|
sinfo_filter,
|
2024-02-14 17:09:12 +01:00
|
|
|
sinfo_from_file,
|
2024-02-14 16:31:52 +01:00
|
|
|
sinfo_reader,
|
|
|
|
sinfo_run,
|
2024-02-08 16:20:59 +01:00
|
|
|
subprocess,
|
|
|
|
)
|
|
|
|
|
|
|
|
DATA = Path(__file__).parent / 'data'
|
2024-02-14 17:09:12 +01:00
|
|
|
SINFO_FILE = DATA / 'sinfo.txt'
|
|
|
|
SINFO_CONTENT = SINFO_FILE.read_text()
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_slurm_dataclasses():
|
|
|
|
"""Test SLURM dataclasses formatter."""
|
2024-02-19 17:16:28 +01:00
|
|
|
# CPU
|
2024-02-08 16:20:59 +01:00
|
|
|
cpu = SlurmCpu(1, '2', 4.0)
|
|
|
|
|
|
|
|
assert cpu.allocated == 1
|
|
|
|
assert cpu.idle == 2
|
|
|
|
assert cpu.total == 4
|
|
|
|
|
|
|
|
assert isinstance(cpu.allocated, int)
|
|
|
|
assert isinstance(cpu.idle, int)
|
|
|
|
assert isinstance(cpu.total, int)
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
# GPU
|
2024-02-08 16:20:59 +01:00
|
|
|
gpu = SlurmGpu('fOo', '1')
|
|
|
|
|
|
|
|
assert gpu # __bool__
|
2024-02-19 17:53:35 +01:00
|
|
|
assert gpu == str(gpu) == gpu.name == 'Foo'
|
2024-02-08 16:20:59 +01:00
|
|
|
assert gpu.nb == 1
|
|
|
|
|
|
|
|
assert isinstance(gpu.name, str)
|
|
|
|
assert isinstance(gpu.nb, int)
|
|
|
|
|
|
|
|
# Default values
|
|
|
|
gpu = SlurmGpu()
|
|
|
|
|
|
|
|
assert not gpu # __bool__
|
2024-02-19 17:53:35 +01:00
|
|
|
assert str(gpu) == gpu.name == 'None'
|
2024-02-08 16:20:59 +01:00
|
|
|
assert gpu.nb == 0
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
# Node
|
|
|
|
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
|
|
|
|
|
|
|
|
assert node.cluster == 'nautilus'
|
|
|
|
assert node.partition == 'standard'
|
2024-02-19 17:53:35 +01:00
|
|
|
assert node == str(node) == node.hostname == 'cnode001'
|
2024-02-19 17:16:28 +01:00
|
|
|
assert node.state == 'completing'
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle == 96
|
|
|
|
assert node.cpu.total == 96
|
|
|
|
assert node.mem == 384
|
|
|
|
assert node.gpu.name == 'None'
|
|
|
|
|
|
|
|
# Partition
|
|
|
|
partition = SlurmPartition('standard', [node])
|
|
|
|
|
2024-02-19 17:53:35 +01:00
|
|
|
assert partition == str(partition) == partition.name == 'standard'
|
2024-02-19 17:32:23 +01:00
|
|
|
assert len(partition) == 1
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
for _node in partition:
|
2024-02-19 17:53:35 +01:00
|
|
|
assert _node == 'cnode001'
|
|
|
|
|
|
|
|
_node = partition['cnode001'] # __getitem__
|
|
|
|
assert isinstance(_node, SlurmNode)
|
|
|
|
assert _node == 'cnode001'
|
2024-02-19 17:16:28 +01:00
|
|
|
|
|
|
|
assert partition.gpus == 'None'
|
|
|
|
assert partition.max_idle_cpu == 96
|
|
|
|
assert partition.max_mem == 384
|
|
|
|
|
|
|
|
# Cluster
|
|
|
|
cluster = SlurmCluster('nautilus', [partition])
|
|
|
|
|
2024-02-19 17:53:35 +01:00
|
|
|
assert cluster == str(cluster) == cluster.name == 'nautilus'
|
2024-02-19 17:32:23 +01:00
|
|
|
assert len(cluster) == 1
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
for _partition in cluster:
|
2024-02-19 17:53:35 +01:00
|
|
|
assert _partition == 'standard'
|
|
|
|
|
|
|
|
_partition = cluster['standard'] # __getitem__
|
|
|
|
assert isinstance(_partition, SlurmPartition)
|
|
|
|
assert _partition == 'standard'
|
2024-02-19 17:16:28 +01:00
|
|
|
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
def test_slurm_sinfo_run(monkeypatch):
|
|
|
|
"""Test SLURM SINFO run command."""
|
|
|
|
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert sinfo_run() == (
|
2024-02-08 16:20:59 +01:00
|
|
|
'sinfo '
|
|
|
|
'--federation '
|
|
|
|
'--noheader '
|
|
|
|
'--responding '
|
|
|
|
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
|
|
|
)
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert sinfo_run(username='john-doe') == (
|
2024-02-20 18:08:46 +01:00
|
|
|
'su - john-doe -c '
|
2024-02-14 13:18:21 +01:00
|
|
|
'sinfo '
|
|
|
|
'--federation '
|
|
|
|
'--noheader '
|
|
|
|
'--responding '
|
|
|
|
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
|
|
|
)
|
|
|
|
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
def test_slurm_sinfo_reader():
|
|
|
|
"""Test SLURM SINFO reader."""
|
2024-02-14 17:09:12 +01:00
|
|
|
nodes = sinfo_reader(SINFO_CONTENT)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
for node in nodes:
|
|
|
|
assert isinstance(node, SlurmNode)
|
|
|
|
|
|
|
|
node = nodes[0]
|
|
|
|
|
2024-02-14 18:46:53 +01:00
|
|
|
assert node.cluster == 'N/A'
|
2024-02-08 16:20:59 +01:00
|
|
|
assert node.partition == 'Devel'
|
|
|
|
assert node.hostname == 'nazare001'
|
|
|
|
assert node.state == 'idle'
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle == node.cpu.total == 20
|
|
|
|
assert node.mem == 128
|
|
|
|
assert not node.gpu
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
for node in nodes:
|
|
|
|
if node.state == 'idle':
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle > 0
|
|
|
|
elif node.state == 'mixed':
|
|
|
|
assert node.cpu.allocated > 0
|
|
|
|
assert node.cpu.idle > 0
|
|
|
|
elif node.state == 'allocated':
|
|
|
|
assert node.cpu.allocated > 0
|
|
|
|
assert node.cpu.idle == 0
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert sum(node.mem for node in nodes) == 7_792
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
assert [node.gpu.name for node in nodes if node.gpu] == [
|
|
|
|
'T4',
|
|
|
|
'A40',
|
|
|
|
'P100',
|
|
|
|
'K80',
|
|
|
|
'P100',
|
2024-02-19 17:26:39 +01:00
|
|
|
] + 8 * ['A100']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def test_slurm_sinfo_filter(monkeypatch):
|
|
|
|
"""Test SLURM SINFO filtered resources."""
|
2024-02-14 17:09:12 +01:00
|
|
|
resources = sinfo_reader(SINFO_CONTENT)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
clusters = sinfo_filter(resources)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
assert isinstance(clusters, dict)
|
|
|
|
assert len(clusters) == 3
|
2024-02-19 18:09:41 +01:00
|
|
|
assert list(clusters) == ['N/A', 'nautilus', 'waves'] # __eq__ on cluster.name
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [2, 3, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
nautilus = clusters['nautilus']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
assert isinstance(nautilus, SlurmCluster)
|
|
|
|
assert len(nautilus) == 3
|
|
|
|
assert nautilus.partitions == ['gpu', 'visu', 'all'] # __eq__ on partition.name
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
gpu = nautilus['gpu']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
assert len(gpu) == 2
|
|
|
|
assert [node.hostname for node in gpu] == ['gnode1', 'gnode2']
|
|
|
|
assert [node.cpu.allocated for node in gpu] == [4, 0]
|
|
|
|
assert [node.cpu.idle for node in gpu] == [92, 96]
|
|
|
|
assert [node.mem for node in gpu] == [768, 256]
|
|
|
|
assert [node.gpu.name for node in gpu] == ['A100', 'A100']
|
|
|
|
assert [node.gpu.nb for node in gpu] == [1, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
# Get only `idle` nodes
|
2024-02-14 16:31:52 +01:00
|
|
|
clusters = sinfo_filter(resources, with_states=('idle'))
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert list(clusters) == ['N/A', 'nautilus']
|
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
# Discard clusters without partition available
|
|
|
|
clusters = sinfo_filter(resources, with_states=('completing'))
|
|
|
|
|
|
|
|
assert list(clusters) == ['nautilus']
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [2]
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
|
2024-02-14 17:09:12 +01:00
|
|
|
def test_slurm_sinfo_from_file(monkeypatch):
|
|
|
|
"""Test SLURM SINFO resources from file."""
|
|
|
|
resources = sinfo_from_file(SINFO_FILE, with_states=('idle'))
|
|
|
|
|
|
|
|
assert [
|
|
|
|
node.hostname
|
2024-02-19 18:09:41 +01:00
|
|
|
for cluster in resources.values()
|
|
|
|
for partition in cluster
|
|
|
|
for node in partition
|
2024-02-19 17:26:39 +01:00
|
|
|
] == ['nazare001', 'gnode2', 'visu1', 'gnode2', 'visu1']
|
2024-02-14 17:09:12 +01:00
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def test_slurm_sinfo_resources(monkeypatch):
|
|
|
|
"""Test SLURM SINFO resources."""
|
2024-02-14 17:09:12 +01:00
|
|
|
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO_CONTENT.encode())
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
clusters = sinfo(username='john-doe', with_states=('completing'))
|
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
assert 'nautilus' in clusters
|
2024-02-14 16:31:52 +01:00
|
|
|
assert list(clusters) == ['nautilus']
|
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
nautilus = clusters['nautilus']
|
2024-02-14 16:31:52 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
assert isinstance(nautilus, SlurmCluster)
|
|
|
|
assert 'standard' in nautilus
|
|
|
|
assert list(nautilus) == ['standard', 'all']
|
2024-02-14 16:31:52 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
standard = nautilus['standard']
|
2024-02-14 16:31:52 +01:00
|
|
|
|
2024-02-19 18:09:41 +01:00
|
|
|
assert isinstance(standard, SlurmPartition)
|
|
|
|
assert 'cnode001' in standard
|
|
|
|
assert list(standard) == ['cnode001']
|
|
|
|
|
|
|
|
cnode = standard['cnode001']
|
|
|
|
|
|
|
|
assert isinstance(cnode, SlurmNode)
|
|
|
|
assert cnode == 'cnode001'
|
2024-02-20 11:16:53 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_slurm_gres():
|
|
|
|
"""Test SLURM GPU resources extraction."""
|
|
|
|
resources = sinfo_from_file(SINFO_FILE, with_states=('idle', 'idle~', 'mixed', 'allocated'))
|
|
|
|
|
|
|
|
gpus = gres(resources)
|
|
|
|
|
|
|
|
# Sorted and without duplicates
|
|
|
|
assert gpus == ['A100', 'A40', 'None', 'P100', 'T4']
|