Add SLURM SINFO parser
This commit is contained in:
parent
e78d20787a
commit
f00b406962
4 changed files with 240 additions and 0 deletions
87
src/glicid_spawner/slurm.py
Normal file
87
src/glicid_spawner/slurm.py
Normal file
|
@ -0,0 +1,87 @@
|
|||
"""SLURM module."""
|
||||
|
||||
import re
|
||||
import shlex
|
||||
import subprocess
|
||||
from dataclasses import dataclass, field
|
||||
from itertools import groupby
|
||||
from operator import attrgetter
|
||||
|
||||
|
||||
@dataclass
|
||||
class SlurmCpu:
|
||||
"""SLURM CPU resource."""
|
||||
|
||||
allocated: int
|
||||
idle: int
|
||||
total: int
|
||||
|
||||
def __post_init__(self):
|
||||
self.allocated = int(self.allocated)
|
||||
self.idle = int(self.idle)
|
||||
self.total = int(self.total)
|
||||
|
||||
|
||||
@dataclass
|
||||
class SlurmGpu:
|
||||
"""SLURM GPU resource."""
|
||||
|
||||
name: str = field(default='None')
|
||||
nb: int = field(default=0)
|
||||
|
||||
def __post_init__(self):
|
||||
self.name = str(self.name).capitalize()
|
||||
self.nb = int(self.nb)
|
||||
|
||||
def __bool__(self):
|
||||
return self.nb > 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class SlurmNode:
|
||||
"""SLURM node resource."""
|
||||
|
||||
cluster: str
|
||||
partition: str
|
||||
hostname: str
|
||||
state: str
|
||||
cpu: SlurmCpu
|
||||
mem: int
|
||||
gpu: SlurmGpu
|
||||
|
||||
def __init__(self, cluster, partition, hostname, state, cpus_state, memory_mb, gres): # noqa: PLR0913
|
||||
self.cluster = None if 'N/A' in cluster else cluster.strip()
|
||||
self.partition = partition.strip()
|
||||
self.hostname = hostname.strip()
|
||||
self.state = state.strip().lower()
|
||||
self.cpu = SlurmCpu(*re.findall(r'(\d+)/(\d+)/\d+/(\d+)', cpus_state)[0])
|
||||
self.mem = int(memory_mb) // 1000 # in GB
|
||||
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
||||
|
||||
|
||||
def _sinfo_run() -> str:
|
||||
"""SLURM SINFO run command."""
|
||||
flags = '--federation --noheader --responding'
|
||||
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||
cmd = shlex.split(f'sinfo {flags} --Format={fmt}')
|
||||
|
||||
return subprocess.check_output(cmd).decode('utf-8')
|
||||
|
||||
|
||||
def _sinfo_reader(result) -> list:
|
||||
"""SLURM SINFO reader."""
|
||||
return [SlurmNode(*re.findall('.{20}', node)) for node in result.splitlines()]
|
||||
|
||||
|
||||
def sinfo(with_states=('idle', 'mixed')) -> dict:
|
||||
"""SLURM SINFO resources available with a given state(s)."""
|
||||
resources = _sinfo_reader(_sinfo_run())
|
||||
|
||||
return {
|
||||
cluster: {
|
||||
partition: available
|
||||
for partition, nodes in groupby(partitions, key=attrgetter('partition'))
|
||||
if (available := [node for node in nodes if node.state in with_states])
|
||||
}
|
||||
for cluster, partitions in groupby(resources, key=attrgetter('cluster'))
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue