Add SLURM partition and cluster data classes

This commit is contained in:
Benoît Seignovert 2024-02-19 17:16:28 +01:00
parent 8529930db1
commit a163ecb575
Signed by: Benoît Seignovert
GPG key ID: F5D8895227D18A0B
2 changed files with 98 additions and 3 deletions

View file

@ -11,7 +11,7 @@ from pathlib import Path
@dataclass @dataclass
class SlurmCpu: class SlurmCpu:
"""SLURM CPU resource.""" """SLURM CPU."""
allocated: int allocated: int
idle: int idle: int
@ -25,7 +25,7 @@ class SlurmCpu:
@dataclass @dataclass
class SlurmGpu: class SlurmGpu:
"""SLURM GPU resource.""" """SLURM GPU."""
name: str = field(default='None') name: str = field(default='None')
nb: int = field(default=0) nb: int = field(default=0)
@ -37,10 +37,13 @@ class SlurmGpu:
def __bool__(self): def __bool__(self):
return self.nb > 0 return self.nb > 0
def __str__(self):
return self.name
@dataclass @dataclass
class SlurmNode: class SlurmNode:
"""SLURM node resource.""" """SLURM node."""
cluster: str cluster: str
partition: str partition: str
@ -59,6 +62,55 @@ class SlurmNode:
self.mem = int(memory_mb) // 1000 # in GB self.mem = int(memory_mb) // 1000 # in GB
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else []) self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
def __str__(self):
return self.hostname
@dataclass
class SlurmPartition:
"""SLURM partition."""
name: str
nodes: list
def __str__(self):
return self.name
def __iter__(self):
return iter(self.nodes)
@property
def gpus(self) -> str:
"""List of GPUs available."""
return ':'.join({node.gpu.name for node in self.nodes})
@property
def max_idle_cpu(self) -> int:
"""Maximum of idle CPU available."""
return max(node.cpu.idle for node in self.nodes)
@property
def max_mem(self) -> int:
"""Maximum of memory available."""
return max(node.mem for node in self.nodes)
@dataclass
class SlurmCluster:
"""SLURM cluster."""
name: str
partitions: list
def __str__(self):
return self.name
def __iter__(self):
return iter(self.partitions)
def __eq__(self, other):
return str(self) == str(other)
def sinfo_run(username: str = None) -> str: def sinfo_run(username: str = None) -> str:
"""SLURM SINFO run command.""" """SLURM SINFO run command."""

View file

@ -3,9 +3,11 @@
from pathlib import Path from pathlib import Path
from glicid_spawner.slurm import ( from glicid_spawner.slurm import (
SlurmCluster,
SlurmCpu, SlurmCpu,
SlurmGpu, SlurmGpu,
SlurmNode, SlurmNode,
SlurmPartition,
sinfo, sinfo,
sinfo_filter, sinfo_filter,
sinfo_from_file, sinfo_from_file,
@ -21,6 +23,7 @@ SINFO_CONTENT = SINFO_FILE.read_text()
def test_slurm_dataclasses(): def test_slurm_dataclasses():
"""Test SLURM dataclasses formatter.""" """Test SLURM dataclasses formatter."""
# CPU
cpu = SlurmCpu(1, '2', 4.0) cpu = SlurmCpu(1, '2', 4.0)
assert cpu.allocated == 1 assert cpu.allocated == 1
@ -31,9 +34,11 @@ def test_slurm_dataclasses():
assert isinstance(cpu.idle, int) assert isinstance(cpu.idle, int)
assert isinstance(cpu.total, int) assert isinstance(cpu.total, int)
# GPU
gpu = SlurmGpu('fOo', '1') gpu = SlurmGpu('fOo', '1')
assert gpu # __bool__ assert gpu # __bool__
assert str(gpu) == 'Foo' # = name
assert gpu.name == 'Foo' assert gpu.name == 'Foo'
assert gpu.nb == 1 assert gpu.nb == 1
@ -44,9 +49,47 @@ def test_slurm_dataclasses():
gpu = SlurmGpu() gpu = SlurmGpu()
assert not gpu # __bool__ assert not gpu # __bool__
assert str(gpu) == 'None' # = name
assert gpu.name == 'None' assert gpu.name == 'None'
assert gpu.nb == 0 assert gpu.nb == 0
# Node
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
assert str(node) == 'cnode001' # hostname
assert node.cluster == 'nautilus'
assert node.partition == 'standard'
assert node.hostname == 'cnode001'
assert node.state == 'completing'
assert node.cpu.allocated == 0
assert node.cpu.idle == 96
assert node.cpu.total == 96
assert node.mem == 384
assert node.gpu.name == 'None'
# Partition
partition = SlurmPartition('standard', [node])
assert str(partition) == 'standard' # = name
assert partition.name == 'standard'
for _node in partition:
assert str(_node) == 'cnode001'
assert partition.gpus == 'None'
assert partition.max_idle_cpu == 96
assert partition.max_mem == 384
# Cluster
cluster = SlurmCluster('nautilus', [partition])
assert str(cluster) == 'nautilus' # = name
assert cluster.name == 'nautilus'
assert cluster == 'nautilus' # __eq__
for _partition in cluster:
assert str(_partition) == 'standard'
def test_slurm_sinfo_run(monkeypatch): def test_slurm_sinfo_run(monkeypatch):
"""Test SLURM SINFO run command.""" """Test SLURM SINFO run command."""