Add SLURM partition and cluster data classes
This commit is contained in:
parent
8529930db1
commit
a163ecb575
2 changed files with 98 additions and 3 deletions
|
@ -11,7 +11,7 @@ from pathlib import Path
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SlurmCpu:
|
class SlurmCpu:
|
||||||
"""SLURM CPU resource."""
|
"""SLURM CPU."""
|
||||||
|
|
||||||
allocated: int
|
allocated: int
|
||||||
idle: int
|
idle: int
|
||||||
|
@ -25,7 +25,7 @@ class SlurmCpu:
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SlurmGpu:
|
class SlurmGpu:
|
||||||
"""SLURM GPU resource."""
|
"""SLURM GPU."""
|
||||||
|
|
||||||
name: str = field(default='None')
|
name: str = field(default='None')
|
||||||
nb: int = field(default=0)
|
nb: int = field(default=0)
|
||||||
|
@ -37,10 +37,13 @@ class SlurmGpu:
|
||||||
def __bool__(self):
|
def __bool__(self):
|
||||||
return self.nb > 0
|
return self.nb > 0
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class SlurmNode:
|
class SlurmNode:
|
||||||
"""SLURM node resource."""
|
"""SLURM node."""
|
||||||
|
|
||||||
cluster: str
|
cluster: str
|
||||||
partition: str
|
partition: str
|
||||||
|
@ -59,6 +62,55 @@ class SlurmNode:
|
||||||
self.mem = int(memory_mb) // 1000 # in GB
|
self.mem = int(memory_mb) // 1000 # in GB
|
||||||
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.hostname
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlurmPartition:
|
||||||
|
"""SLURM partition."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
nodes: list
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.nodes)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def gpus(self) -> str:
|
||||||
|
"""List of GPUs available."""
|
||||||
|
return ':'.join({node.gpu.name for node in self.nodes})
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_idle_cpu(self) -> int:
|
||||||
|
"""Maximum of idle CPU available."""
|
||||||
|
return max(node.cpu.idle for node in self.nodes)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def max_mem(self) -> int:
|
||||||
|
"""Maximum of memory available."""
|
||||||
|
return max(node.mem for node in self.nodes)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class SlurmCluster:
|
||||||
|
"""SLURM cluster."""
|
||||||
|
|
||||||
|
name: str
|
||||||
|
partitions: list
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.name
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return iter(self.partitions)
|
||||||
|
|
||||||
|
def __eq__(self, other):
|
||||||
|
return str(self) == str(other)
|
||||||
|
|
||||||
|
|
||||||
def sinfo_run(username: str = None) -> str:
|
def sinfo_run(username: str = None) -> str:
|
||||||
"""SLURM SINFO run command."""
|
"""SLURM SINFO run command."""
|
||||||
|
|
|
@ -3,9 +3,11 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from glicid_spawner.slurm import (
|
from glicid_spawner.slurm import (
|
||||||
|
SlurmCluster,
|
||||||
SlurmCpu,
|
SlurmCpu,
|
||||||
SlurmGpu,
|
SlurmGpu,
|
||||||
SlurmNode,
|
SlurmNode,
|
||||||
|
SlurmPartition,
|
||||||
sinfo,
|
sinfo,
|
||||||
sinfo_filter,
|
sinfo_filter,
|
||||||
sinfo_from_file,
|
sinfo_from_file,
|
||||||
|
@ -21,6 +23,7 @@ SINFO_CONTENT = SINFO_FILE.read_text()
|
||||||
|
|
||||||
def test_slurm_dataclasses():
|
def test_slurm_dataclasses():
|
||||||
"""Test SLURM dataclasses formatter."""
|
"""Test SLURM dataclasses formatter."""
|
||||||
|
# CPU
|
||||||
cpu = SlurmCpu(1, '2', 4.0)
|
cpu = SlurmCpu(1, '2', 4.0)
|
||||||
|
|
||||||
assert cpu.allocated == 1
|
assert cpu.allocated == 1
|
||||||
|
@ -31,9 +34,11 @@ def test_slurm_dataclasses():
|
||||||
assert isinstance(cpu.idle, int)
|
assert isinstance(cpu.idle, int)
|
||||||
assert isinstance(cpu.total, int)
|
assert isinstance(cpu.total, int)
|
||||||
|
|
||||||
|
# GPU
|
||||||
gpu = SlurmGpu('fOo', '1')
|
gpu = SlurmGpu('fOo', '1')
|
||||||
|
|
||||||
assert gpu # __bool__
|
assert gpu # __bool__
|
||||||
|
assert str(gpu) == 'Foo' # = name
|
||||||
assert gpu.name == 'Foo'
|
assert gpu.name == 'Foo'
|
||||||
assert gpu.nb == 1
|
assert gpu.nb == 1
|
||||||
|
|
||||||
|
@ -44,9 +49,47 @@ def test_slurm_dataclasses():
|
||||||
gpu = SlurmGpu()
|
gpu = SlurmGpu()
|
||||||
|
|
||||||
assert not gpu # __bool__
|
assert not gpu # __bool__
|
||||||
|
assert str(gpu) == 'None' # = name
|
||||||
assert gpu.name == 'None'
|
assert gpu.name == 'None'
|
||||||
assert gpu.nb == 0
|
assert gpu.nb == 0
|
||||||
|
|
||||||
|
# Node
|
||||||
|
node = SlurmNode(*'nautilus standard cnode001 completing 0/96/0/96 384000 (null)'.split())
|
||||||
|
|
||||||
|
assert str(node) == 'cnode001' # hostname
|
||||||
|
assert node.cluster == 'nautilus'
|
||||||
|
assert node.partition == 'standard'
|
||||||
|
assert node.hostname == 'cnode001'
|
||||||
|
assert node.state == 'completing'
|
||||||
|
assert node.cpu.allocated == 0
|
||||||
|
assert node.cpu.idle == 96
|
||||||
|
assert node.cpu.total == 96
|
||||||
|
assert node.mem == 384
|
||||||
|
assert node.gpu.name == 'None'
|
||||||
|
|
||||||
|
# Partition
|
||||||
|
partition = SlurmPartition('standard', [node])
|
||||||
|
|
||||||
|
assert str(partition) == 'standard' # = name
|
||||||
|
assert partition.name == 'standard'
|
||||||
|
|
||||||
|
for _node in partition:
|
||||||
|
assert str(_node) == 'cnode001'
|
||||||
|
|
||||||
|
assert partition.gpus == 'None'
|
||||||
|
assert partition.max_idle_cpu == 96
|
||||||
|
assert partition.max_mem == 384
|
||||||
|
|
||||||
|
# Cluster
|
||||||
|
cluster = SlurmCluster('nautilus', [partition])
|
||||||
|
|
||||||
|
assert str(cluster) == 'nautilus' # = name
|
||||||
|
assert cluster.name == 'nautilus'
|
||||||
|
assert cluster == 'nautilus' # __eq__
|
||||||
|
|
||||||
|
for _partition in cluster:
|
||||||
|
assert str(_partition) == 'standard'
|
||||||
|
|
||||||
|
|
||||||
def test_slurm_sinfo_run(monkeypatch):
|
def test_slurm_sinfo_run(monkeypatch):
|
||||||
"""Test SLURM SINFO run command."""
|
"""Test SLURM SINFO run command."""
|
||||||
|
|
Loading…
Add table
Reference in a new issue