2024-02-08 16:20:59 +01:00
|
|
|
"""SLURM module."""
|
|
|
|
|
|
|
|
import re
|
|
|
|
import shlex
|
|
|
|
import subprocess
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
from itertools import groupby
|
|
|
|
from operator import attrgetter
|
2024-02-14 17:09:12 +01:00
|
|
|
from pathlib import Path
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class SlurmCpu:
|
|
|
|
"""SLURM CPU resource."""
|
|
|
|
|
|
|
|
allocated: int
|
|
|
|
idle: int
|
|
|
|
total: int
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
self.allocated = int(self.allocated)
|
|
|
|
self.idle = int(self.idle)
|
|
|
|
self.total = int(self.total)
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class SlurmGpu:
|
|
|
|
"""SLURM GPU resource."""
|
|
|
|
|
|
|
|
name: str = field(default='None')
|
|
|
|
nb: int = field(default=0)
|
|
|
|
|
|
|
|
def __post_init__(self):
|
|
|
|
self.name = str(self.name).capitalize()
|
|
|
|
self.nb = int(self.nb)
|
|
|
|
|
|
|
|
def __bool__(self):
|
|
|
|
return self.nb > 0
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass
|
|
|
|
class SlurmNode:
|
|
|
|
"""SLURM node resource."""
|
|
|
|
|
|
|
|
cluster: str
|
|
|
|
partition: str
|
|
|
|
hostname: str
|
|
|
|
state: str
|
|
|
|
cpu: SlurmCpu
|
|
|
|
mem: int
|
|
|
|
gpu: SlurmGpu
|
|
|
|
|
|
|
|
def __init__(self, cluster, partition, hostname, state, cpus_state, memory_mb, gres): # noqa: PLR0913
|
|
|
|
self.cluster = None if 'N/A' in cluster else cluster.strip()
|
|
|
|
self.partition = partition.strip()
|
|
|
|
self.hostname = hostname.strip()
|
|
|
|
self.state = state.strip().lower()
|
|
|
|
self.cpu = SlurmCpu(*re.findall(r'(\d+)/(\d+)/\d+/(\d+)', cpus_state)[0])
|
|
|
|
self.mem = int(memory_mb) // 1000 # in GB
|
|
|
|
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def sinfo_run(username: str = None) -> str:
|
2024-02-08 16:20:59 +01:00
|
|
|
"""SLURM SINFO run command."""
|
2024-02-14 13:18:21 +01:00
|
|
|
flags = '--federation --noheader --responding'
|
2024-02-08 16:20:59 +01:00
|
|
|
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
2024-02-14 13:18:21 +01:00
|
|
|
cmd = f'sinfo {flags} --Format={fmt}'
|
2024-02-08 16:20:59 +01:00
|
|
|
|
2024-02-14 13:18:21 +01:00
|
|
|
if username:
|
|
|
|
cmd = f'su - {username} -c "{cmd}"'
|
|
|
|
|
|
|
|
return subprocess.check_output(shlex.split(cmd, posix=False)).decode('utf-8')
|
2024-02-08 16:20:59 +01:00
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def sinfo_reader(result: str) -> list:
|
2024-02-08 16:20:59 +01:00
|
|
|
"""SLURM SINFO reader."""
|
|
|
|
return [SlurmNode(*re.findall('.{20}', node)) for node in result.splitlines()]
|
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def sinfo_filter(resources: list, with_states=('idle', 'mixed')) -> dict:
|
|
|
|
"""SLURM SINFO filtered resources available with a given state(s).
|
2024-02-08 16:37:20 +01:00
|
|
|
|
|
|
|
Grouped by cluster and partition names.
|
|
|
|
|
|
|
|
"""
|
2024-02-14 16:31:52 +01:00
|
|
|
resources = {
|
2024-02-08 16:20:59 +01:00
|
|
|
cluster: {
|
|
|
|
partition: available
|
|
|
|
for partition, nodes in groupby(partitions, key=attrgetter('partition'))
|
|
|
|
if (available := [node for node in nodes if node.state in with_states])
|
|
|
|
}
|
|
|
|
for cluster, partitions in groupby(resources, key=attrgetter('cluster'))
|
|
|
|
}
|
2024-02-14 16:31:52 +01:00
|
|
|
|
|
|
|
return {key: values for key, values in resources.items() if values}
|
|
|
|
|
|
|
|
|
2024-02-14 17:09:12 +01:00
|
|
|
def sinfo_from_file(fname, with_states=('idle', 'mixed')) -> dict:
|
|
|
|
"""SLURM SINFO resources available from a given file."""
|
|
|
|
content = Path(fname).read_text()
|
|
|
|
return sinfo_filter(sinfo_reader(content), with_states=with_states)
|
|
|
|
|
|
|
|
|
2024-02-14 16:31:52 +01:00
|
|
|
def sinfo(username: str = None, with_states=('idle', 'mixed')) -> dict:
|
|
|
|
"""SLURM SINFO resources available for a given user."""
|
|
|
|
return sinfo_filter(sinfo_reader(sinfo_run(username=username)), with_states=with_states)
|