2024-02-08 16:20:59 +01:00
|
|
|
"""Test SLURM module."""
|
|
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
from glicid_spawner.slurm import (
|
2024-02-19 17:16:28 +01:00
|
|
|
SlurmCluster,
|
2024-02-08 16:20:59 +01:00
|
|
|
SlurmCpu,
|
|
|
|
SlurmGpu,
|
|
|
|
SlurmNode,
|
2024-02-19 17:16:28 +01:00
|
|
|
SlurmPartition,
|
2024-02-08 16:20:59 +01:00
|
|
|
sinfo,
|
2024-02-14 16:31:52 +01:00
|
|
|
sinfo_filter,
|
2024-02-14 17:09:12 +01:00
|
|
|
sinfo_from_file,
|
2024-02-14 16:31:52 +01:00
|
|
|
sinfo_reader,
|
|
|
|
sinfo_run,
|
2024-02-08 16:20:59 +01:00
|
|
|
subprocess,
|
|
|
|
)
|
|
|
|
|
|
|
|
DATA = Path(__file__).parent / 'data'
|
2024-02-14 17:09:12 +01:00
|
|
|
SINFO_FILE = DATA / 'sinfo.txt'
|
|
|
|
SINFO_CONTENT = SINFO_FILE.read_text()
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
|
|
|
def test_slurm_dataclasses():
|
|
|
|
"""Test SLURM dataclasses formatter."""
|
2024-02-19 17:16:28 +01:00
|
|
|
# CPU
|
2024-02-08 16:20:59 +01:00
|
|
|
cpu = SlurmCpu(1, '2', 4.0)
|
|
|
|
|
|
|
|
assert cpu.allocated == 1
|
|
|
|
assert cpu.idle == 2
|
|
|
|
assert cpu.total == 4
|
|
|
|
|
|
|
|
assert isinstance(cpu.allocated, int)
|
|
|
|
assert isinstance(cpu.idle, int)
|
|
|
|
assert isinstance(cpu.total, int)
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
# GPU
|
2024-02-08 16:20:59 +01:00
|
|
|
gpu = SlurmGpu('fOo', '1')
|
|
|
|
|
|
|
|
assert gpu # __bool__
|
2024-02-19 17:16:28 +01:00
|
|
|
assert str(gpu) == 'Foo' # = name
|
2024-02-08 16:20:59 +01:00
|
|
|
assert gpu.name == 'Foo'
|
|
|
|
assert gpu.nb == 1
|
|
|
|
|
|
|
|
assert isinstance(gpu.name, str)
|
|
|
|
assert isinstance(gpu.nb, int)
|
|
|
|
|
|
|
|
# Default values
|
|
|
|
gpu = SlurmGpu()
|
|
|
|
|
|
|
|
assert not gpu # __bool__
|
2024-02-19 17:16:28 +01:00
|
|
|
assert str(gpu) == 'None' # = name
|
2024-02-08 16:20:59 +01:00
|
|
|
assert gpu.name == 'None'
|
|
|
|
assert gpu.nb == 0
|
|
|
|
|
2024-02-19 17:16:28 +01:00
|
|
|
# Node
|
|
|
|
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
|
|
|
|
|
|
|
|
assert str(node) == 'cnode001' # hostname
|
|
|
|
assert node.cluster == 'nautilus'
|
|
|
|
assert node.partition == 'standard'
|
|
|
|
assert node.hostname == 'cnode001'
|
|
|
|
assert node.state == 'completing'
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle == 96
|
|
|
|
assert node.cpu.total == 96
|
|
|
|
assert node.mem == 384
|
|
|
|
assert node.gpu.name == 'None'
|
|
|
|
|
|
|
|
# Partition
|
|
|
|
partition = SlurmPartition('standard', [node])
|
|
|
|
|
|
|
|
assert str(partition) == 'standard' # = name
|
|
|
|
assert partition.name == 'standard'
|
|
|
|
|
|
|
|
for _node in partition:
|
|
|
|
assert str(_node) == 'cnode001'
|
|
|
|
|
|
|
|
assert partition.gpus == 'None'
|
|
|
|
assert partition.max_idle_cpu == 96
|
|
|
|
assert partition.max_mem == 384
|
|
|
|
|
|
|
|
# Cluster
|
|
|
|
cluster = SlurmCluster('nautilus', [partition])
|
|
|
|
|
|
|
|
assert str(cluster) == 'nautilus' # = name
|
|
|
|
assert cluster.name == 'nautilus'
|
|
|
|
assert cluster == 'nautilus' # __eq__
|
|
|
|
|
|
|
|
for _partition in cluster:
|
|
|
|
assert str(_partition) == 'standard'
|
|
|
|
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
def test_slurm_sinfo_run(monkeypatch):
|
|
|
|
"""Test SLURM SINFO run command."""
|
|
|
|
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert sinfo_run() == (
|
2024-02-08 16:20:59 +01:00
|
|
|
'sinfo '
|
|
|
|
'--federation '
|
|
|
|
'--noheader '
|
|
|
|
'--responding '
|
|
|
|
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
|
|
|
)
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert sinfo_run(username='john-doe') == (
|
2024-02-14 13:18:21 +01:00
|
|
|
'su - john-doe -c "'
|
|
|
|
'sinfo '
|
|
|
|
'--federation '
|
|
|
|
'--noheader '
|
|
|
|
'--responding '
|
|
|
|
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
|
|
|
'"'
|
|
|
|
)
|
|
|
|
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
def test_slurm_sinfo_reader():
|
|
|
|
"""Test SLURM SINFO reader."""
|
2024-02-14 17:09:12 +01:00
|
|
|
nodes = sinfo_reader(SINFO_CONTENT)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
for node in nodes:
|
|
|
|
assert isinstance(node, SlurmNode)
|
|
|
|
|
|
|
|
node = nodes[0]
|
|
|
|
|
2024-02-14 18:46:53 +01:00
|
|
|
assert node.cluster == 'N/A'
|
2024-02-08 16:20:59 +01:00
|
|
|
assert node.partition == 'Devel'
|
|
|
|
assert node.hostname == 'nazare001'
|
|
|
|
assert node.state == 'idle'
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle == node.cpu.total == 20
|
|
|
|
assert node.mem == 128
|
|
|
|
assert not node.gpu
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
for node in nodes:
|
|
|
|
if node.state == 'idle':
|
|
|
|
assert node.cpu.allocated == 0
|
|
|
|
assert node.cpu.idle > 0
|
|
|
|
elif node.state == 'mixed':
|
|
|
|
assert node.cpu.allocated > 0
|
|
|
|
assert node.cpu.idle > 0
|
|
|
|
elif node.state == 'allocated':
|
|
|
|
assert node.cpu.allocated > 0
|
|
|
|
assert node.cpu.idle == 0
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert sum(node.mem for node in nodes) == 7_792
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
assert [node.gpu.name for node in nodes if node.gpu] == [
|
|
|
|
'T4',
|
|
|
|
'A40',
|
|
|
|
'P100',
|
|
|
|
'K80',
|
|
|
|
'P100',
|
2024-02-19 17:26:39 +01:00
|
|
|
] + 8 * ['A100']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def test_slurm_sinfo_filter(monkeypatch):
|
|
|
|
"""Test SLURM SINFO filtered resources."""
|
2024-02-14 17:09:12 +01:00
|
|
|
resources = sinfo_reader(SINFO_CONTENT)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
clusters = sinfo_filter(resources)
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
assert isinstance(clusters, dict)
|
|
|
|
assert len(clusters) == 3
|
2024-02-14 18:46:53 +01:00
|
|
|
assert list(clusters) == ['N/A', 'nautilus', 'waves']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [2, 3, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
partitions = clusters['nautilus']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert isinstance(partitions, dict)
|
2024-02-19 17:26:39 +01:00
|
|
|
assert len(partitions) == 3
|
|
|
|
assert list(partitions) == ['gpu', 'visu', 'all']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
gpu_nodes = partitions['gpu']
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
assert len(gpu_nodes) == 2
|
|
|
|
assert [node.hostname for node in gpu_nodes] == ['gnode1', 'gnode2']
|
|
|
|
assert [node.cpu.allocated for node in gpu_nodes] == [4, 0]
|
|
|
|
assert [node.cpu.idle for node in gpu_nodes] == [92, 96]
|
|
|
|
assert [node.mem for node in gpu_nodes] == [768, 256]
|
|
|
|
assert [node.gpu.name for node in gpu_nodes] == ['A100', 'A100']
|
|
|
|
assert [node.gpu.nb for node in gpu_nodes] == [1, 2]
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
# Get only `idle` nodes
|
2024-02-14 16:31:52 +01:00
|
|
|
clusters = sinfo_filter(resources, with_states=('idle'))
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert list(clusters) == ['N/A', 'nautilus']
|
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
# Discard clusters without partition available
|
|
|
|
clusters = sinfo_filter(resources, with_states=('completing'))
|
|
|
|
|
|
|
|
assert list(clusters) == ['nautilus']
|
2024-02-19 17:26:39 +01:00
|
|
|
assert [len(partitions) for partitions in clusters.values()] == [2]
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
|
2024-02-14 17:09:12 +01:00
|
|
|
def test_slurm_sinfo_from_file(monkeypatch):
|
|
|
|
"""Test SLURM SINFO resources from file."""
|
|
|
|
resources = sinfo_from_file(SINFO_FILE, with_states=('idle'))
|
|
|
|
|
|
|
|
assert [
|
|
|
|
node.hostname
|
|
|
|
for cluster, partitions in resources.items()
|
|
|
|
for nodes in partitions.values()
|
|
|
|
for node in nodes
|
2024-02-19 17:26:39 +01:00
|
|
|
] == ['nazare001', 'gnode2', 'visu1', 'gnode2', 'visu1']
|
2024-02-14 17:09:12 +01:00
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def test_slurm_sinfo_resources(monkeypatch):
|
|
|
|
"""Test SLURM SINFO resources."""
|
2024-02-14 17:09:12 +01:00
|
|
|
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO_CONTENT.encode())
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
clusters = sinfo(username='john-doe', with_states=('completing'))
|
|
|
|
|
|
|
|
assert list(clusters) == ['nautilus']
|
|
|
|
|
|
|
|
partitions = clusters['nautilus']
|
|
|
|
|
2024-02-19 17:26:39 +01:00
|
|
|
assert list(partitions) == ['standard', 'all']
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
std_nodes = partitions['standard']
|
|
|
|
|
|
|
|
assert std_nodes == [
|
|
|
|
SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
|
|
|
|
]
|