Update slurm sinfo filter to discard empty cluster
This commit is contained in:
parent
867e217d44
commit
52b96548c2
2 changed files with 64 additions and 29 deletions
|
@ -59,7 +59,7 @@ class SlurmNode:
|
||||||
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
||||||
|
|
||||||
|
|
||||||
def _sinfo_run(username: str = None) -> str:
|
def sinfo_run(username: str = None) -> str:
|
||||||
"""SLURM SINFO run command."""
|
"""SLURM SINFO run command."""
|
||||||
flags = '--federation --noheader --responding'
|
flags = '--federation --noheader --responding'
|
||||||
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||||
|
@ -71,20 +71,18 @@ def _sinfo_run(username: str = None) -> str:
|
||||||
return subprocess.check_output(shlex.split(cmd, posix=False)).decode('utf-8')
|
return subprocess.check_output(shlex.split(cmd, posix=False)).decode('utf-8')
|
||||||
|
|
||||||
|
|
||||||
def _sinfo_reader(result) -> list:
|
def sinfo_reader(result: str) -> list:
|
||||||
"""SLURM SINFO reader."""
|
"""SLURM SINFO reader."""
|
||||||
return [SlurmNode(*re.findall('.{20}', node)) for node in result.splitlines()]
|
return [SlurmNode(*re.findall('.{20}', node)) for node in result.splitlines()]
|
||||||
|
|
||||||
|
|
||||||
def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict:
|
def sinfo_filter(resources: list, with_states=('idle', 'mixed')) -> dict:
|
||||||
"""SLURM SINFO resources available with a given state(s).
|
"""SLURM SINFO filtered resources available with a given state(s).
|
||||||
|
|
||||||
Grouped by cluster and partition names.
|
Grouped by cluster and partition names.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
resources = _sinfo_reader(_sinfo_run(username=username))
|
resources = {
|
||||||
|
|
||||||
return {
|
|
||||||
cluster: {
|
cluster: {
|
||||||
partition: available
|
partition: available
|
||||||
for partition, nodes in groupby(partitions, key=attrgetter('partition'))
|
for partition, nodes in groupby(partitions, key=attrgetter('partition'))
|
||||||
|
@ -92,3 +90,10 @@ def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict:
|
||||||
}
|
}
|
||||||
for cluster, partitions in groupby(resources, key=attrgetter('cluster'))
|
for cluster, partitions in groupby(resources, key=attrgetter('cluster'))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return {key: values for key, values in resources.items() if values}
|
||||||
|
|
||||||
|
|
||||||
|
def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict:
|
||||||
|
"""SLURM SINFO resources available for a given user."""
|
||||||
|
return sinfo_filter(sinfo_reader(sinfo_run(username=username)), with_states=with_states)
|
||||||
|
|
|
@ -6,13 +6,15 @@ from glicid_spawner.slurm import (
|
||||||
SlurmCpu,
|
SlurmCpu,
|
||||||
SlurmGpu,
|
SlurmGpu,
|
||||||
SlurmNode,
|
SlurmNode,
|
||||||
_sinfo_reader,
|
|
||||||
_sinfo_run,
|
|
||||||
sinfo,
|
sinfo,
|
||||||
|
sinfo_filter,
|
||||||
|
sinfo_reader,
|
||||||
|
sinfo_run,
|
||||||
subprocess,
|
subprocess,
|
||||||
)
|
)
|
||||||
|
|
||||||
DATA = Path(__file__).parent / 'data'
|
DATA = Path(__file__).parent / 'data'
|
||||||
|
SINFO = (DATA / 'sinfo.txt').read_text()
|
||||||
|
|
||||||
|
|
||||||
def test_slurm_dataclasses():
|
def test_slurm_dataclasses():
|
||||||
|
@ -48,7 +50,7 @@ def test_slurm_sinfo_run(monkeypatch):
|
||||||
"""Test SLURM SINFO run command."""
|
"""Test SLURM SINFO run command."""
|
||||||
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
|
monkeypatch.setattr(subprocess, 'check_output', lambda cmd: ' '.join(cmd).encode())
|
||||||
|
|
||||||
assert _sinfo_run() == (
|
assert sinfo_run() == (
|
||||||
'sinfo '
|
'sinfo '
|
||||||
'--federation '
|
'--federation '
|
||||||
'--noheader '
|
'--noheader '
|
||||||
|
@ -56,7 +58,7 @@ def test_slurm_sinfo_run(monkeypatch):
|
||||||
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||||
)
|
)
|
||||||
|
|
||||||
assert _sinfo_run(username='john-doe') == (
|
assert sinfo_run(username='john-doe') == (
|
||||||
'su - john-doe -c "'
|
'su - john-doe -c "'
|
||||||
'sinfo '
|
'sinfo '
|
||||||
'--federation '
|
'--federation '
|
||||||
|
@ -69,7 +71,7 @@ def test_slurm_sinfo_run(monkeypatch):
|
||||||
|
|
||||||
def test_slurm_sinfo_reader():
|
def test_slurm_sinfo_reader():
|
||||||
"""Test SLURM SINFO reader."""
|
"""Test SLURM SINFO reader."""
|
||||||
nodes = _sinfo_reader((DATA / 'sinfo.txt').read_text())
|
nodes = sinfo_reader(SINFO)
|
||||||
|
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
assert isinstance(node, SlurmNode)
|
assert isinstance(node, SlurmNode)
|
||||||
|
@ -113,11 +115,11 @@ def test_slurm_sinfo_reader():
|
||||||
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 2]
|
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 2]
|
||||||
|
|
||||||
|
|
||||||
def test_slurm_sinfo_resources(monkeypatch):
|
def test_slurm_sinfo_filter(monkeypatch):
|
||||||
"""Test SLURM SINFO resources."""
|
"""Test SLURM SINFO filtered resources."""
|
||||||
monkeypatch.setattr(subprocess, 'check_output', lambda _: (DATA / 'sinfo.txt').read_bytes())
|
resources = sinfo_reader(SINFO)
|
||||||
|
|
||||||
clusters = sinfo()
|
clusters = sinfo_filter(resources)
|
||||||
|
|
||||||
assert isinstance(clusters, dict)
|
assert isinstance(clusters, dict)
|
||||||
assert len(clusters) == 3
|
assert len(clusters) == 3
|
||||||
|
@ -125,21 +127,49 @@ def test_slurm_sinfo_resources(monkeypatch):
|
||||||
|
|
||||||
assert [len(partitions) for partitions in clusters.values()] == [2, 2, 2]
|
assert [len(partitions) for partitions in clusters.values()] == [2, 2, 2]
|
||||||
|
|
||||||
nautilus = clusters['nautilus']
|
partitions = clusters['nautilus']
|
||||||
|
|
||||||
assert isinstance(nautilus, dict)
|
assert isinstance(partitions, dict)
|
||||||
assert len(nautilus) == 2
|
assert len(partitions) == 2
|
||||||
assert list(nautilus) == ['gpu', 'all']
|
assert list(partitions) == ['gpu', 'all']
|
||||||
|
|
||||||
gpus = nautilus['gpu']
|
gpu_nodes = partitions['gpu']
|
||||||
|
|
||||||
assert len(gpus) == 2
|
assert len(gpu_nodes) == 2
|
||||||
assert [partition.hostname for partition in gpus] == ['gnode1', 'gnode2']
|
assert [node.hostname for node in gpu_nodes] == ['gnode1', 'gnode2']
|
||||||
assert [partition.cpu.allocated for partition in gpus] == [4, 0]
|
assert [node.cpu.allocated for node in gpu_nodes] == [4, 0]
|
||||||
assert [partition.cpu.idle for partition in gpus] == [92, 96]
|
assert [node.cpu.idle for node in gpu_nodes] == [92, 96]
|
||||||
assert [partition.mem for partition in gpus] == [768, 256]
|
assert [node.mem for node in gpu_nodes] == [768, 256]
|
||||||
assert [partition.gpu.name for partition in gpus] == ['A100', 'A100']
|
assert [node.gpu.name for node in gpu_nodes] == ['A100', 'A100']
|
||||||
assert [partition.gpu.nb for partition in gpus] == [1, 2]
|
assert [node.gpu.nb for node in gpu_nodes] == [1, 2]
|
||||||
|
|
||||||
# Get only `idle` nodes
|
# Get only `idle` nodes
|
||||||
assert [len(partitions) for partitions in sinfo(with_states=('idle')).values()] == [1, 2, 1]
|
clusters = sinfo_filter(resources, with_states=('idle'))
|
||||||
|
|
||||||
|
assert list(clusters) == [None, 'nautilus', 'waves']
|
||||||
|
assert [len(partitions) for partitions in clusters.values()] == [1, 2, 1]
|
||||||
|
|
||||||
|
# Discard clusters without partition available
|
||||||
|
clusters = sinfo_filter(resources, with_states=('completing'))
|
||||||
|
|
||||||
|
assert list(clusters) == ['nautilus']
|
||||||
|
assert [len(partitions) for partitions in clusters.values()] == [1]
|
||||||
|
|
||||||
|
|
||||||
|
def test_slurm_sinfo_resources(monkeypatch):
|
||||||
|
"""Test SLURM SINFO resources."""
|
||||||
|
monkeypatch.setattr(subprocess, 'check_output', lambda _: SINFO.encode())
|
||||||
|
|
||||||
|
clusters = sinfo(username='john-doe', with_states=('completing'))
|
||||||
|
|
||||||
|
assert list(clusters) == ['nautilus']
|
||||||
|
|
||||||
|
partitions = clusters['nautilus']
|
||||||
|
|
||||||
|
assert list(partitions) == ['standard']
|
||||||
|
|
||||||
|
std_nodes = partitions['standard']
|
||||||
|
|
||||||
|
assert std_nodes == [
|
||||||
|
SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
|
||||||
|
]
|
||||||
|
|
Loading…
Add table
Reference in a new issue