Compare commits
6 commits
Author | SHA1 | Date | |
---|---|---|---|
540df4b0aa | |||
255bf6201e | |||
d2c0631956 | |||
69015d5062 | |||
5cdee4d73d | |||
aaf16ee89d |
12 changed files with 65 additions and 43 deletions
|
@ -1,6 +1,6 @@
|
|||
[tool.poetry]
|
||||
name = "glicid-spawner"
|
||||
version = "1.0"
|
||||
version = "1.2"
|
||||
description = "JupyterHub Batch Spawner for GLiCID"
|
||||
authors = ["Benoit Seignovert <benoit.seignovert@univ-nantes.fr>"]
|
||||
license = "BSD 3-Clause License"
|
||||
|
@ -90,7 +90,7 @@ exclude_lines = [
|
|||
]
|
||||
|
||||
[tool.tbump.version]
|
||||
current = "1.0"
|
||||
current = "1.2"
|
||||
regex = '(?P<major>\d+)\.(?P<patch>\d+)'
|
||||
|
||||
[tool.tbump.git]
|
||||
|
|
|
@ -32,7 +32,7 @@ SINFO = sinfo_from_file(
|
|||
)
|
||||
|
||||
# Single vs. multi-cluster implementation
|
||||
SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')}
|
||||
SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')}
|
||||
SLURM_MULTI_CLUSTER = SINFO
|
||||
GPU_SINGLE_CLUSTER = gpu_max_duration(gres(SLURM_SINGLE_CLUSTER))
|
||||
GPU_MULTI_CLUSTER = gpu_max_duration(gres(SLURM_MULTI_CLUSTER))
|
||||
|
|
|
@ -9,6 +9,7 @@ GLOBAL_USER = 'operator'
|
|||
|
||||
MAMBA_ROOT_PREFIX = f'{MICROMAMBA_ROOT}/{GLOBAL_USER}'
|
||||
MAMBA_EXE = f'{MAMBA_ROOT_PREFIX}/bin/micromamba'
|
||||
MAMBA_USER_BASE = f'{MICROMAMBA_ROOT}/$USER'
|
||||
|
||||
|
||||
@dataclass
|
||||
|
|
|
@ -69,7 +69,7 @@ class SlurmNode:
|
|||
self.state = state.strip().lower()
|
||||
self.cpu = SlurmCpu(*re.findall(r'(\d+)/(\d+)/\d+/(\d+)', cpus_state)[0])
|
||||
self.mem = int(memory_mb) // 1000 # in GB
|
||||
self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
||||
self.gpu = SlurmGpu(*re.findall(r'gpu:([\w\.]+):(\d+)', gres)[0] if 'gpu:' in gres else [])
|
||||
|
||||
def __str__(self):
|
||||
return self.hostname
|
||||
|
@ -141,7 +141,7 @@ class SlurmCluster:
|
|||
|
||||
def sinfo_run(username: str = None) -> str:
|
||||
"""SLURM SINFO run command."""
|
||||
flags = '--federation --noheader --responding'
|
||||
flags = '--federation --noheader --responding --cluster=all'
|
||||
fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||
cmd = f'sinfo {flags} --Format={fmt}'
|
||||
|
||||
|
|
|
@ -9,7 +9,7 @@ from batchspawner import JobStatus, SlurmSpawner
|
|||
from traitlets import Bool, Integer, Unicode, default
|
||||
|
||||
from .form import options_form, options_from_form
|
||||
from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX
|
||||
from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX, MAMBA_USER_BASE
|
||||
from .progress import ElapseTime, get_progress
|
||||
from .templates import get_template_src
|
||||
|
||||
|
@ -24,14 +24,19 @@ class GlicidSpawner(SlurmSpawner):
|
|||
help='Spawner singleuser command.',
|
||||
).tag(config=True)
|
||||
|
||||
req_mamba_exe = Unicode(
|
||||
MAMBA_EXE,
|
||||
help='Micromamba global exe',
|
||||
).tag(config=True)
|
||||
|
||||
req_mamba_root_prefix = Unicode(
|
||||
MAMBA_ROOT_PREFIX,
|
||||
help='Micromamba global root prefix',
|
||||
).tag(config=True)
|
||||
|
||||
req_mamba_exe = Unicode(
|
||||
MAMBA_EXE,
|
||||
help='Micromamba global exe',
|
||||
req_mamba_user_base = Unicode(
|
||||
MAMBA_USER_BASE,
|
||||
help='Micromamba user base prefix',
|
||||
).tag(config=True)
|
||||
|
||||
req_job_name = Unicode(
|
||||
|
@ -80,7 +85,7 @@ class GlicidSpawner(SlurmSpawner):
|
|||
|
||||
slurm_job_id_re = Unicode(r'(\d+)(?:;(\w+))?').tag(config=True)
|
||||
|
||||
def parse_job_id(self, output):
|
||||
def parse_job_id(self, output) -> str:
|
||||
"""Parse job id with cluster name support.
|
||||
|
||||
If cluster name is present, `job_id` will be a string
|
||||
|
@ -88,10 +93,10 @@ class GlicidSpawner(SlurmSpawner):
|
|||
|
||||
"""
|
||||
for job_id, job_cluster in re.findall(self.slurm_job_id_re, output):
|
||||
return f'{job_id} -M {job_cluster}' if job_cluster else int(job_id)
|
||||
return f'{job_id} -M {job_cluster}' if job_cluster else job_id
|
||||
|
||||
self.log.error(f'GlicidSpawner unable to parse job ID from text: {output}')
|
||||
return None
|
||||
return ''
|
||||
|
||||
@default('options_form')
|
||||
def _options_form_default(self) -> str:
|
||||
|
|
|
@ -33,6 +33,7 @@ echo "The {{job_name}} logs are located in: ${JUPYTER_LOG_DIR}"
|
|||
{# Micromamba config -#}
|
||||
export MAMBA_EXE={{mamba_exe}};
|
||||
export MAMBA_ROOT_PREFIX={{mamba_root_prefix}};
|
||||
export PYTHONUSERBASE={{ mamba_user_base }};
|
||||
source $MAMBA_ROOT_PREFIX/etc/profile.d/micromamba.sh;
|
||||
|
||||
{# Activate micromamba env requested by the user -#}
|
||||
|
|
|
@ -41,7 +41,7 @@
|
|||
data-max-duration="{{max_duration}}"
|
||||
{%- if loop.first %} checked{% endif %}>
|
||||
<label for="gpu_{{gpu}}" class="btn btn-default btn-block">
|
||||
{{ gpu }}
|
||||
{{ gpu | replace("_"," ") | replace("."," ") | replace("gb","GB") }}
|
||||
</label>
|
||||
</div>
|
||||
{% endfor -%}
|
||||
|
|
|
@ -11,7 +11,7 @@
|
|||
<div id="cluster-config-collapse" class="panel-collapse collapse" role="tabpanel" aria-labelledby="heading">
|
||||
<div class="panel-body">
|
||||
|
||||
{% if 'N/A' not in sinfo %}
|
||||
{% if 'cluster' not in sinfo %}
|
||||
<div class="form-group clusters">
|
||||
<label for="cluster" class="col-sm-3 control-label">Cluster:</label>
|
||||
<div class="col-sm-9 flex-container">
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
N/A Devel nazare001 idle 0/20/0/20 128000 (null)
|
||||
N/A GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000
|
||||
N/A A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20
|
||||
N/A AMD-short cloudbreak001 drained 0/0/32/32 128000 (null)
|
||||
N/A lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2
|
||||
N/A lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4
|
||||
N/A lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1
|
||||
cluster Devel nazare001 idle 0/20/0/20 128000 (null)
|
||||
cluster GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000
|
||||
cluster A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20
|
||||
cluster AMD-short cloudbreak001 drained 0/0/32/32 128000 (null)
|
||||
cluster lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2
|
||||
cluster lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4
|
||||
cluster lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1
|
||||
nautilus standard cnode001 completing 0/96/0/96 384000 (null)
|
||||
nautilus bigmem cnode002 planned 0/96/0/96 768000 (null)
|
||||
nautilus gpu gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1)
|
||||
nautilus gpu gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1)
|
||||
nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1)
|
||||
nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S
|
||||
nautilus visu visu1 idle 0/96/0/96 768000 (null)
|
||||
nautilus all cnode001 completing 0/96/0/96 384000 (null)
|
||||
nautilus all cnode002 planned 0/96/0/96 768000 (null)
|
||||
nautilus all gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1)
|
||||
nautilus all gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1)
|
||||
nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1)
|
||||
nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S
|
||||
nautilus all visu1 idle 0/96/0/96 768000 (null)
|
||||
waves standard cribbar001 mixed 30/10/0/40 16000 (null)
|
||||
waves gpu budbud006 allocated 64/0/0/64 256000 gpu:a100:2,mps:a100:
|
||||
|
|
|
@ -12,7 +12,7 @@ from pytest import fixture
|
|||
DATA = Path(__file__).parent / 'data'
|
||||
SINFO = sinfo_from_file(DATA / 'sinfo.txt')
|
||||
|
||||
SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')}
|
||||
SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')}
|
||||
SLURM_MULTI_CLUSTER = SINFO
|
||||
|
||||
|
||||
|
@ -81,7 +81,7 @@ def test_options_attrs(mock_cluster):
|
|||
# Multi cluster configuration (default)
|
||||
sinfo = options['sinfo']
|
||||
|
||||
assert 'N/A' not in sinfo
|
||||
assert 'cluster' not in sinfo
|
||||
assert 'nautilus' in sinfo
|
||||
assert 'waves' in sinfo
|
||||
|
||||
|
@ -99,11 +99,11 @@ def test_options_attrs_single_cluster(mock_single_cluster):
|
|||
# Single cluster configuration
|
||||
sinfo = options['sinfo']
|
||||
|
||||
assert 'N/A' in sinfo
|
||||
assert 'cluster' in sinfo
|
||||
assert 'nautilus' not in sinfo
|
||||
assert 'waves' not in sinfo
|
||||
|
||||
node = sinfo['N/A']['Devel']['nazare001']
|
||||
node = sinfo['cluster']['Devel']['nazare001']
|
||||
|
||||
assert node == 'nazare001'
|
||||
assert node.cpu.idle == 20
|
||||
|
@ -218,32 +218,32 @@ def test_options_form_slurm_single_cluster(mock_single_cluster):
|
|||
|
||||
assert (
|
||||
'<div class="flex-item-4 slurm-partition" '
|
||||
'data-cluster="N/A" data-partition="GPU-short" '
|
||||
'data-cluster="cluster" data-partition="GPU-short" '
|
||||
'data-cpu="20" data-mem="184" data-gpu="T4">' in html
|
||||
)
|
||||
|
||||
assert (
|
||||
'<input type="radio" name="partition" id="partition_N/A_GPU-short" value="GPU-short">'
|
||||
'<input type="radio" name="partition" id="partition_cluster_GPU-short" value="GPU-short">'
|
||||
in html
|
||||
)
|
||||
assert (
|
||||
'<label for="partition_N/A_GPU-short" class="btn btn-default btn-block"> Gpu-short </label>'
|
||||
'<label for="partition_cluster_GPU-short" class="btn btn-default btn-block"> Gpu-short </label>'
|
||||
in html
|
||||
)
|
||||
|
||||
# Nodes (hidden by default)
|
||||
assert (
|
||||
'<div class="flex-item-4 slurm-node" '
|
||||
'data-cluster="N/A" data-partition="GPU-short" data-node="budbud001" '
|
||||
'data-cluster="cluster" data-partition="GPU-short" data-node="budbud001" '
|
||||
'data-cpu="20" data-mem="184" data-gpu="T4">' in html
|
||||
)
|
||||
|
||||
assert (
|
||||
'<input type="radio" name="node" id="node_N/A_GPU-short_budbud001" value="budbud001">'
|
||||
'<input type="radio" name="node" id="node_cluster_GPU-short_budbud001" value="budbud001">'
|
||||
in html
|
||||
)
|
||||
assert (
|
||||
'<label for="node_N/A_GPU-short_budbud001" class="btn btn-default btn-block"> Budbud001 </label>'
|
||||
'<label for="node_cluster_GPU-short_budbud001" class="btn btn-default btn-block"> Budbud001 </label>'
|
||||
in html
|
||||
)
|
||||
|
||||
|
|
|
@ -105,6 +105,7 @@ def test_slurm_sinfo_run(monkeypatch):
|
|||
'--federation '
|
||||
'--noheader '
|
||||
'--responding '
|
||||
'--cluster=all '
|
||||
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||
)
|
||||
|
||||
|
@ -114,6 +115,7 @@ def test_slurm_sinfo_run(monkeypatch):
|
|||
'--federation '
|
||||
'--noheader '
|
||||
'--responding '
|
||||
'--cluster=all '
|
||||
'--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres'
|
||||
)
|
||||
|
||||
|
@ -127,7 +129,7 @@ def test_slurm_sinfo_reader():
|
|||
|
||||
node = nodes[0]
|
||||
|
||||
assert node.cluster == 'N/A'
|
||||
assert node.cluster == 'cluster'
|
||||
assert node.partition == 'Devel'
|
||||
assert node.hostname == 'nazare001'
|
||||
assert node.state == 'idle'
|
||||
|
@ -136,7 +138,7 @@ def test_slurm_sinfo_reader():
|
|||
assert node.mem == 128
|
||||
assert not node.gpu
|
||||
|
||||
assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves']
|
||||
assert [node.cluster for node in nodes] == 7 * ['cluster'] + 12 * ['nautilus'] + 6 * ['waves']
|
||||
|
||||
assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10
|
||||
|
||||
|
@ -159,9 +161,17 @@ def test_slurm_sinfo_reader():
|
|||
'P100',
|
||||
'K80',
|
||||
'P100',
|
||||
] + 8 * ['A100']
|
||||
'A100',
|
||||
'A100',
|
||||
'A100_2g.10gb',
|
||||
'A100',
|
||||
'A100',
|
||||
'A100_2g.10gb',
|
||||
'A100',
|
||||
'A100',
|
||||
]
|
||||
|
||||
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2]
|
||||
assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 6, 1, 2, 6, 2, 2]
|
||||
|
||||
|
||||
def test_slurm_sinfo_filter(monkeypatch):
|
||||
|
@ -172,7 +182,7 @@ def test_slurm_sinfo_filter(monkeypatch):
|
|||
|
||||
assert isinstance(clusters, dict)
|
||||
assert len(clusters) == 3
|
||||
assert list(clusters) == ['N/A', 'nautilus', 'waves'] # __eq__ on cluster.name
|
||||
assert list(clusters) == ['cluster', 'nautilus', 'waves'] # __eq__ on cluster.name
|
||||
|
||||
assert [len(partitions) for partitions in clusters.values()] == [2, 4, 2]
|
||||
|
||||
|
@ -195,7 +205,7 @@ def test_slurm_sinfo_filter(monkeypatch):
|
|||
# Get only `idle` nodes
|
||||
clusters = sinfo_filter(resources, with_states=('idle'))
|
||||
|
||||
assert list(clusters) == ['N/A', 'nautilus']
|
||||
assert list(clusters) == ['cluster', 'nautilus']
|
||||
assert [len(partitions) for partitions in clusters.values()] == [1, 3]
|
||||
|
||||
# Discard clusters without partition available
|
||||
|
@ -251,4 +261,4 @@ def test_slurm_gres():
|
|||
gpus = gres(resources)
|
||||
|
||||
# Sorted and without duplicates
|
||||
assert gpus == ['A100', 'A40', 'None', 'P100', 'T4']
|
||||
assert gpus == ['A100', 'A100_2g.10gb', 'A40', 'None', 'P100', 'T4']
|
||||
|
|
|
@ -27,8 +27,9 @@ def test_spawner_config():
|
|||
assert 'bin/glicid-spawner-singleuser' in cmd
|
||||
assert 'bin/jupyterhub-singleuser' in cmd
|
||||
|
||||
assert spawner.req_mamba_root_prefix == '/micromamba/operator'
|
||||
assert spawner.req_mamba_exe == '/micromamba/operator/bin/micromamba'
|
||||
assert spawner.req_mamba_root_prefix == '/micromamba/operator'
|
||||
assert spawner.req_mamba_user_base == '/micromamba/$USER'
|
||||
assert spawner.req_job_name == 'jupyterhub_glicid'
|
||||
assert spawner.req_qos == 'short'
|
||||
|
||||
|
@ -70,8 +71,12 @@ def test_spawner_batch_script(monkeypatch):
|
|||
|
||||
assert 'export MAMBA_EXE=/micromamba/operator/bin/micromamba;' in script
|
||||
assert 'export MAMBA_ROOT_PREFIX=/micromamba/operator;' in script
|
||||
assert 'export PYTHONUSERBASE=/micromamba/$USER;' in script
|
||||
assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script
|
||||
|
||||
assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script
|
||||
assert 'export JUPYTER_PATH=/micromamba/john-doe/envs/foo/share/jupyter;' in script
|
||||
|
||||
assert re.search(r'.*/bin/glicid-spawner-singleuser .*/bin/jupyterhub-singleuser', script)
|
||||
|
||||
|
||||
|
@ -79,10 +84,10 @@ def test_spawner_parse_job_id():
|
|||
"""Test spawner job id parser."""
|
||||
spawner = GlicidSpawner()
|
||||
|
||||
assert spawner.parse_job_id('123') == 123
|
||||
assert spawner.parse_job_id('123') == '123'
|
||||
assert spawner.parse_job_id('456;nautilus') == '456 -M nautilus'
|
||||
|
||||
assert spawner.parse_job_id('') is None
|
||||
assert spawner.parse_job_id('') == ''
|
||||
|
||||
|
||||
def test_spawner_options_form(monkeypatch):
|
||||
|
|
Loading…
Add table
Reference in a new issue