Compare commits

...

6 commits
v1.0 ... main

12 changed files with 65 additions and 43 deletions

View file

@ -1,6 +1,6 @@
[tool.poetry]
name = "glicid-spawner"
version = "1.0"
version = "1.2"
description = "JupyterHub Batch Spawner for GLiCID"
authors = ["Benoit Seignovert <benoit.seignovert@univ-nantes.fr>"]
license = "BSD 3-Clause License"
@ -90,7 +90,7 @@ exclude_lines = [
]
[tool.tbump.version]
current = "1.0"
current = "1.2"
regex = '(?P<major>\d+)\.(?P<patch>\d+)'
[tool.tbump.git]

View file

@ -32,7 +32,7 @@ SINFO = sinfo_from_file(
)
# Single vs. multi-cluster implementation
SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')}
SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')}
SLURM_MULTI_CLUSTER = SINFO
GPU_SINGLE_CLUSTER = gpu_max_duration(gres(SLURM_SINGLE_CLUSTER))
GPU_MULTI_CLUSTER = gpu_max_duration(gres(SLURM_MULTI_CLUSTER))

View file

@ -9,6 +9,7 @@ GLOBAL_USER = 'operator'
MAMBA_ROOT_PREFIX = f'{MICROMAMBA_ROOT}/{GLOBAL_USER}'
MAMBA_EXE = f'{MAMBA_ROOT_PREFIX}/bin/micromamba'
MAMBA_USER_BASE = f'{MICROMAMBA_ROOT}/$USER'
@dataclass

View file

@ -69,7 +69,7 @@ class SlurmNode:
self.state = state.strip().lower()
self.cpu = SlurmCpu(*re.findall(r'(\d+)/(\d+)/\d+/(\d+)', cpus_state)[0])
self.mem = int(memory_mb) // 1000 # in GB
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
self.gpu = SlurmGpu(*re.findall(r'gpu:([\w\.]+):(\d+)', gres)[0] if 'gpu:' in gres else [])
def __str__(self):
return self.hostname
@ -141,7 +141,7 @@ class SlurmCluster:
def sinfo_run(username: str = None) -> str:
"""SLURM SINFO run command."""
flags = '--federation --noheader --responding'
flags = '--federation --noheader --responding --cluster=all'
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
cmd = f'sinfo {flags} --Format={fmt}'

View file

@ -9,7 +9,7 @@ from batchspawner import JobStatus, SlurmSpawner
from traitlets import Bool, Integer, Unicode, default
from .form import options_form, options_from_form
from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX
from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX, MAMBA_USER_BASE
from .progress import ElapseTime, get_progress
from .templates import get_template_src
@ -24,14 +24,19 @@ class GlicidSpawner(SlurmSpawner):
help='Spawner singleuser command.',
).tag(config=True)
req_mamba_exe = Unicode(
MAMBA_EXE,
help='Micromamba global exe',
).tag(config=True)
req_mamba_root_prefix = Unicode(
MAMBA_ROOT_PREFIX,
help='Micromamba global root prefix',
).tag(config=True)
req_mamba_exe = Unicode(
MAMBA_EXE,
help='Micromamba global exe',
req_mamba_user_base = Unicode(
MAMBA_USER_BASE,
help='Micromamba user base prefix',
).tag(config=True)
req_job_name = Unicode(
@ -80,7 +85,7 @@ class GlicidSpawner(SlurmSpawner):
slurm_job_id_re = Unicode(r'(\d+)(?:;(\w+))?').tag(config=True)
def parse_job_id(self, output):
def parse_job_id(self, output) -> str:
"""Parse job id with cluster name support.
If cluster name is present, `job_id` will be a string
@ -88,10 +93,10 @@ class GlicidSpawner(SlurmSpawner):
"""
for job_id, job_cluster in re.findall(self.slurm_job_id_re, output):
return f'{job_id} -M {job_cluster}' if job_cluster else int(job_id)
return f'{job_id} -M {job_cluster}' if job_cluster else job_id
self.log.error(f'GlicidSpawner unable to parse job ID from text: {output}')
return None
return ''
@default('options_form')
def _options_form_default(self) -> str:

View file

@ -33,6 +33,7 @@ echo "The {{job_name}} logs are located in: ${JUPYTER_LOG_DIR}"
{# Micromamba config -#}
export MAMBA_EXE={{mamba_exe}};
export MAMBA_ROOT_PREFIX={{mamba_root_prefix}};
export PYTHONUSERBASE={{ mamba_user_base }};
source $MAMBA_ROOT_PREFIX/etc/profile.d/micromamba.sh;
{# Activate micromamba env requested by the user -#}

View file

@ -41,7 +41,7 @@
data-max-duration="{{max_duration}}"
{%- if loop.first %} checked{% endif %}>
<label for="gpu_{{gpu}}" class="btn btn-default btn-block">
{{ gpu }}
{{ gpu | replace("_"," ") | replace("."," ") | replace("gb","GB") }}
</label>
</div>
{% endfor -%}

View file

@ -11,7 +11,7 @@
<div id="cluster-config-collapse" class="panel-collapse collapse" role="tabpanel" aria-labelledby="heading">
<div class="panel-body">
{% if 'N/A' not in sinfo %}
{% if 'cluster' not in sinfo %}
<div class="form-group clusters">
<label for="cluster" class="col-sm-3 control-label">Cluster:</label>
<div class="col-sm-9 flex-container">

View file

@ -1,21 +1,21 @@
N/A Devel nazare001 idle 0/20/0/20 128000 (null)
N/A GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000
N/A A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20
N/A AMD-short cloudbreak001 drained 0/0/32/32 128000 (null)
N/A lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2
N/A lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4
N/A lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1
cluster Devel nazare001 idle 0/20/0/20 128000 (null)
cluster GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000
cluster A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20
cluster AMD-short cloudbreak001 drained 0/0/32/32 128000 (null)
cluster lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2
cluster lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4
cluster lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1
nautilus standard cnode001 completing 0/96/0/96 384000 (null)
nautilus bigmem cnode002 planned 0/96/0/96 768000 (null)
nautilus gpu gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1)
nautilus gpu gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1)
nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1)
nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S
nautilus visu visu1 idle 0/96/0/96 768000 (null)
nautilus all cnode001 completing 0/96/0/96 384000 (null)
nautilus all cnode002 planned 0/96/0/96 768000 (null)
nautilus all gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1)
nautilus all gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1)
nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1)
nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S
nautilus all visu1 idle 0/96/0/96 768000 (null)
waves standard cribbar001 mixed 30/10/0/40 16000 (null)
waves gpu budbud006 allocated 64/0/0/64 256000 gpu:a100:2,mps:a100:

View file

@ -12,7 +12,7 @@ from pytest import fixture
DATA = Path(__file__).parent / 'data'
SINFO = sinfo_from_file(DATA / 'sinfo.txt')
SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')}
SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')}
SLURM_MULTI_CLUSTER = SINFO
@ -81,7 +81,7 @@ def test_options_attrs(mock_cluster):
# Multi cluster configuration (default)
sinfo = options['sinfo']
assert 'N/A' not in sinfo
assert 'cluster' not in sinfo
assert 'nautilus' in sinfo
assert 'waves' in sinfo
@ -99,11 +99,11 @@ def test_options_attrs_single_cluster(mock_single_cluster):
# Single cluster configuration
sinfo = options['sinfo']
assert 'N/A' in sinfo
assert 'cluster' in sinfo
assert 'nautilus' not in sinfo
assert 'waves' not in sinfo
node = sinfo['N/A']['Devel']['nazare001']
node = sinfo['cluster']['Devel']['nazare001']
assert node == 'nazare001'
assert node.cpu.idle == 20
@ -218,32 +218,32 @@ def test_options_form_slurm_single_cluster(mock_single_cluster):
assert (
'<div class="flex-item-4 slurm-partition" '
'data-cluster="N/A" data-partition="GPU-short" '
'data-cluster="cluster" data-partition="GPU-short" '
'data-cpu="20" data-mem="184" data-gpu="T4">' in html
)
assert (
'<input type="radio" name="partition" id="partition_N/A_GPU-short" value="GPU-short">'
'<input type="radio" name="partition" id="partition_cluster_GPU-short" value="GPU-short">'
in html
)
assert (
'<label for="partition_N/A_GPU-short" class="btn btn-default btn-block"> Gpu-short </label>'
'<label for="partition_cluster_GPU-short" class="btn btn-default btn-block"> Gpu-short </label>'
in html
)
# Nodes (hidden by default)
assert (
'<div class="flex-item-4 slurm-node" '
'data-cluster="N/A" data-partition="GPU-short" data-node="budbud001" '
'data-cluster="cluster" data-partition="GPU-short" data-node="budbud001" '
'data-cpu="20" data-mem="184" data-gpu="T4">' in html
)
assert (
'<input type="radio" name="node" id="node_N/A_GPU-short_budbud001" value="budbud001">'
'<input type="radio" name="node" id="node_cluster_GPU-short_budbud001" value="budbud001">'
in html
)
assert (
'<label for="node_N/A_GPU-short_budbud001" class="btn btn-default btn-block"> Budbud001 </label>'
'<label for="node_cluster_GPU-short_budbud001" class="btn btn-default btn-block"> Budbud001 </label>'
in html
)

View file

@ -105,6 +105,7 @@ def test_slurm_sinfo_run(monkeypatch):
'--federation '
'--noheader '
'--responding '
'--cluster=all '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
@ -114,6 +115,7 @@ def test_slurm_sinfo_run(monkeypatch):
'--federation '
'--noheader '
'--responding '
'--cluster=all '
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
)
@ -127,7 +129,7 @@ def test_slurm_sinfo_reader():
node = nodes[0]
assert node.cluster == 'N/A'
assert node.cluster == 'cluster'
assert node.partition == 'Devel'
assert node.hostname == 'nazare001'
assert node.state == 'idle'
@ -136,7 +138,7 @@ def test_slurm_sinfo_reader():
assert node.mem == 128
assert not node.gpu
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
assert [node.cluster for node in nodes] == 7 * ['cluster'] + 12 * ['nautilus'] + 6 * ['waves']
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
@ -159,9 +161,17 @@ def test_slurm_sinfo_reader():
'P100',
'K80',
'P100',
] + 8 * ['A100']
'A100',
'A100',
'A100_2g.10gb',
'A100',
'A100',
'A100_2g.10gb',
'A100',
'A100',
]
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 6, 1, 2, 6, 2, 2]
def test_slurm_sinfo_filter(monkeypatch):
@ -172,7 +182,7 @@ def test_slurm_sinfo_filter(monkeypatch):
assert isinstance(clusters, dict)
assert len(clusters) == 3
assert list(clusters) == ['N/A', 'nautilus', 'waves'] # __eq__ on cluster.name
assert list(clusters) == ['cluster', 'nautilus', 'waves'] # __eq__ on cluster.name
assert [len(partitions) for partitions in clusters.values()] == [2, 4, 2]
@ -195,7 +205,7 @@ def test_slurm_sinfo_filter(monkeypatch):
# Get only `idle` nodes
clusters = sinfo_filter(resources, with_states=('idle'))
assert list(clusters) == ['N/A', 'nautilus']
assert list(clusters) == ['cluster', 'nautilus']
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
# Discard clusters without partition available
@ -251,4 +261,4 @@ def test_slurm_gres():
gpus = gres(resources)
# Sorted and without duplicates
assert gpus == ['A100', 'A40', 'None', 'P100', 'T4']
assert gpus == ['A100', 'A100_2g.10gb', 'A40', 'None', 'P100', 'T4']

View file

@ -27,8 +27,9 @@ def test_spawner_config():
assert 'bin/glicid-spawner-singleuser' in cmd
assert 'bin/jupyterhub-singleuser' in cmd
assert spawner.req_mamba_root_prefix == '/micromamba/operator'
assert spawner.req_mamba_exe == '/micromamba/operator/bin/micromamba'
assert spawner.req_mamba_root_prefix == '/micromamba/operator'
assert spawner.req_mamba_user_base == '/micromamba/$USER'
assert spawner.req_job_name == 'jupyterhub_glicid'
assert spawner.req_qos == 'short'
@ -70,8 +71,12 @@ def test_spawner_batch_script(monkeypatch):
assert 'export MAMBA_EXE=/micromamba/operator/bin/micromamba;' in script
assert 'export MAMBA_ROOT_PREFIX=/micromamba/operator;' in script
assert 'export PYTHONUSERBASE=/micromamba/$USER;' in script
assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script
assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script
assert 'export JUPYTER_PATH=/micromamba/john-doe/envs/foo/share/jupyter;' in script
assert re.search(r'.*/bin/glicid-spawner-singleuser .*/bin/jupyterhub-singleuser', script)
@ -79,10 +84,10 @@ def test_spawner_parse_job_id():
"""Test spawner job id parser."""
spawner = GlicidSpawner()
assert spawner.parse_job_id('123') == 123
assert spawner.parse_job_id('123') == '123'
assert spawner.parse_job_id('456;nautilus') == '456 -M nautilus'
assert spawner.parse_job_id('') is None
assert spawner.parse_job_id('') == ''
def test_spawner_options_form(monkeypatch):