From aaf16ee89d16b73a94100f57c3ab9cf69d2acb2d Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Fri, 22 Mar 2024 19:41:27 +0100 Subject: [PATCH 1/6] Enforce cluster flag in sinfo query --- render/__main__.py | 2 +- src/glicid_spawner/slurm.py | 2 +- .../templates/views/slurm.jinja | 2 +- tests/data/sinfo.txt | 14 ++++++------- tests/test_form.py | 20 +++++++++---------- tests/test_slurm.py | 10 ++++++---- 6 files changed, 26 insertions(+), 24 deletions(-) diff --git a/render/__main__.py b/render/__main__.py index f554c5a..a3eaac6 100644 --- a/render/__main__.py +++ b/render/__main__.py @@ -32,7 +32,7 @@ SINFO = sinfo_from_file( ) # Single vs. multi-cluster implementation -SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')} +SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')} SLURM_MULTI_CLUSTER = SINFO GPU_SINGLE_CLUSTER = gpu_max_duration(gres(SLURM_SINGLE_CLUSTER)) GPU_MULTI_CLUSTER = gpu_max_duration(gres(SLURM_MULTI_CLUSTER)) diff --git a/src/glicid_spawner/slurm.py b/src/glicid_spawner/slurm.py index 011193e..1aea52d 100644 --- a/src/glicid_spawner/slurm.py +++ b/src/glicid_spawner/slurm.py @@ -141,7 +141,7 @@ class SlurmCluster: def sinfo_run(username: str = None) -> str: """SLURM SINFO run command.""" - flags = '--federation --noheader --responding' + flags = '--federation --noheader --responding --cluster=all' fmt = 'Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres' cmd = f'sinfo {flags} --Format={fmt}' diff --git a/src/glicid_spawner/templates/views/slurm.jinja b/src/glicid_spawner/templates/views/slurm.jinja index 5486b16..6980efd 100644 --- a/src/glicid_spawner/templates/views/slurm.jinja +++ b/src/glicid_spawner/templates/views/slurm.jinja @@ -11,7 +11,7 @@
- {% if 'N/A' not in sinfo %} + {% if 'cluster' not in sinfo %}
diff --git a/tests/data/sinfo.txt b/tests/data/sinfo.txt index 4dcb4ae..37d0b25 100644 --- a/tests/data/sinfo.txt +++ b/tests/data/sinfo.txt @@ -1,10 +1,10 @@ -N/A Devel nazare001 idle 0/20/0/20 128000 (null) -N/A GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000 -N/A A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20 -N/A AMD-short cloudbreak001 drained 0/0/32/32 128000 (null) -N/A lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2 -N/A lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4 -N/A lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1 +cluster Devel nazare001 idle 0/20/0/20 128000 (null) +cluster GPU-short budbud001 mixed 20/20/0/40 184000 gpu:t4:2,mps:t4:2000 +cluster A40-short budbud002 allocated 40/0/0/40 184000 gpu:a40:2,mps:a40:20 +cluster AMD-short cloudbreak001 drained 0/0/32/32 128000 (null) +cluster lowp budbud003 down~ 0/0/40/40 128000 gpu:p100:2 +cluster lowp budbud004 drained~ 0/0/20/20 128000 gpu:k80:4 +cluster lowp budbud005 idle~ 0/20/0/20 192000 gpu:p100:1 nautilus standard cnode001 completing 0/96/0/96 384000 (null) nautilus bigmem cnode002 planned 0/96/0/96 768000 (null) nautilus gpu gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1) diff --git a/tests/test_form.py b/tests/test_form.py index 99b8895..65aa099 100644 --- a/tests/test_form.py +++ b/tests/test_form.py @@ -12,7 +12,7 @@ from pytest import fixture DATA = Path(__file__).parent / 'data' SINFO = sinfo_from_file(DATA / 'sinfo.txt') -SLURM_SINGLE_CLUSTER = {'N/A': SINFO.pop('N/A')} +SLURM_SINGLE_CLUSTER = {'cluster': SINFO.pop('cluster')} SLURM_MULTI_CLUSTER = SINFO @@ -81,7 +81,7 @@ def test_options_attrs(mock_cluster): # Multi cluster configuration (default) sinfo = options['sinfo'] - assert 'N/A' not in sinfo + assert 'cluster' not in sinfo assert 'nautilus' in sinfo assert 'waves' in sinfo @@ -99,11 +99,11 @@ def test_options_attrs_single_cluster(mock_single_cluster): # Single cluster configuration sinfo = options['sinfo'] - assert 'N/A' in sinfo + assert 'cluster' in sinfo assert 'nautilus' not in sinfo assert 'waves' not in sinfo - node = sinfo['N/A']['Devel']['nazare001'] + node = sinfo['cluster']['Devel']['nazare001'] assert node == 'nazare001' assert node.cpu.idle == 20 @@ -218,32 +218,32 @@ def test_options_form_slurm_single_cluster(mock_single_cluster): assert ( '
' in html ) assert ( - '' + '' in html ) assert ( - '' + '' in html ) # Nodes (hidden by default) assert ( '
' in html ) assert ( - '' + '' in html ) assert ( - '' + '' in html ) diff --git a/tests/test_slurm.py b/tests/test_slurm.py index 28e23f1..a4d8693 100644 --- a/tests/test_slurm.py +++ b/tests/test_slurm.py @@ -105,6 +105,7 @@ def test_slurm_sinfo_run(monkeypatch): '--federation ' '--noheader ' '--responding ' + '--cluster=all ' '--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres' ) @@ -114,6 +115,7 @@ def test_slurm_sinfo_run(monkeypatch): '--federation ' '--noheader ' '--responding ' + '--cluster=all ' '--Format=Cluster,PartitionName,NodeHost,StateLong,CPUsState,Memory,Gres' ) @@ -127,7 +129,7 @@ def test_slurm_sinfo_reader(): node = nodes[0] - assert node.cluster == 'N/A' + assert node.cluster == 'cluster' assert node.partition == 'Devel' assert node.hostname == 'nazare001' assert node.state == 'idle' @@ -136,7 +138,7 @@ def test_slurm_sinfo_reader(): assert node.mem == 128 assert not node.gpu - assert [node.cluster for node in nodes] == 7 * ['N/A'] + 12 * ['nautilus'] + 6 * ['waves'] + assert [node.cluster for node in nodes] == 7 * ['cluster'] + 12 * ['nautilus'] + 6 * ['waves'] assert len([node for node in nodes if node.state in ('idle', 'mixed')]) == 10 @@ -172,7 +174,7 @@ def test_slurm_sinfo_filter(monkeypatch): assert isinstance(clusters, dict) assert len(clusters) == 3 - assert list(clusters) == ['N/A', 'nautilus', 'waves'] # __eq__ on cluster.name + assert list(clusters) == ['cluster', 'nautilus', 'waves'] # __eq__ on cluster.name assert [len(partitions) for partitions in clusters.values()] == [2, 4, 2] @@ -195,7 +197,7 @@ def test_slurm_sinfo_filter(monkeypatch): # Get only `idle` nodes clusters = sinfo_filter(resources, with_states=('idle')) - assert list(clusters) == ['N/A', 'nautilus'] + assert list(clusters) == ['cluster', 'nautilus'] assert [len(partitions) for partitions in clusters.values()] == [1, 3] # Discard clusters without partition available From 5cdee4d73dca26002994f556bc87413be5d6575f Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Fri, 22 Mar 2024 19:58:02 +0100 Subject: [PATCH 2/6] Enforce job_id as string --- src/glicid_spawner/spawner.py | 6 +++--- tests/test_spawner.py | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/glicid_spawner/spawner.py b/src/glicid_spawner/spawner.py index 12f8339..cdd7631 100644 --- a/src/glicid_spawner/spawner.py +++ b/src/glicid_spawner/spawner.py @@ -80,7 +80,7 @@ class GlicidSpawner(SlurmSpawner): slurm_job_id_re = Unicode(r'(\d+)(?:;(\w+))?').tag(config=True) - def parse_job_id(self, output): + def parse_job_id(self, output) -> str: """Parse job id with cluster name support. If cluster name is present, `job_id` will be a string @@ -88,10 +88,10 @@ class GlicidSpawner(SlurmSpawner): """ for job_id, job_cluster in re.findall(self.slurm_job_id_re, output): - return f'{job_id} -M {job_cluster}' if job_cluster else int(job_id) + return f'{job_id} -M {job_cluster}' if job_cluster else job_id self.log.error(f'GlicidSpawner unable to parse job ID from text: {output}') - return None + return '' @default('options_form') def _options_form_default(self) -> str: diff --git a/tests/test_spawner.py b/tests/test_spawner.py index e6e430e..8ac208b 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -79,10 +79,10 @@ def test_spawner_parse_job_id(): """Test spawner job id parser.""" spawner = GlicidSpawner() - assert spawner.parse_job_id('123') == 123 + assert spawner.parse_job_id('123') == '123' assert spawner.parse_job_id('456;nautilus') == '456 -M nautilus' - assert spawner.parse_job_id('') is None + assert spawner.parse_job_id('') == '' def test_spawner_options_form(monkeypatch): From 69015d5062ab06323225ef78ff6a0fb5a6e72b93 Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Fri, 22 Mar 2024 19:59:21 +0100 Subject: [PATCH 3/6] Bump to version 1.1 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e264c8f..66f8f02 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "glicid-spawner" -version = "1.0" +version = "1.1" description = "JupyterHub Batch Spawner for GLiCID" authors = ["Benoit Seignovert "] license = "BSD 3-Clause License" @@ -90,7 +90,7 @@ exclude_lines = [ ] [tool.tbump.version] -current = "1.0" +current = "1.1" regex = '(?P\d+)\.(?P\d+)' [tool.tbump.git] From d2c0631956012725ee35be19d470835aef4a5258 Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Tue, 26 Mar 2024 08:05:00 +0100 Subject: [PATCH 4/6] Add explicit PYTHONUSERBASE path to micromamba endpoint --- src/glicid_spawner/micromamba.py | 1 + src/glicid_spawner/spawner.py | 13 +++++++++---- src/glicid_spawner/templates/slurm_script.jinja | 1 + tests/test_spawner.py | 7 ++++++- 4 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/glicid_spawner/micromamba.py b/src/glicid_spawner/micromamba.py index 1d42063..0fe1641 100644 --- a/src/glicid_spawner/micromamba.py +++ b/src/glicid_spawner/micromamba.py @@ -9,6 +9,7 @@ GLOBAL_USER = 'operator' MAMBA_ROOT_PREFIX = f'{MICROMAMBA_ROOT}/{GLOBAL_USER}' MAMBA_EXE = f'{MAMBA_ROOT_PREFIX}/bin/micromamba' +MAMBA_USER_BASE = f'{MICROMAMBA_ROOT}/$USER' @dataclass diff --git a/src/glicid_spawner/spawner.py b/src/glicid_spawner/spawner.py index cdd7631..b65af99 100644 --- a/src/glicid_spawner/spawner.py +++ b/src/glicid_spawner/spawner.py @@ -9,7 +9,7 @@ from batchspawner import JobStatus, SlurmSpawner from traitlets import Bool, Integer, Unicode, default from .form import options_form, options_from_form -from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX +from .micromamba import MAMBA_EXE, MAMBA_ROOT_PREFIX, MAMBA_USER_BASE from .progress import ElapseTime, get_progress from .templates import get_template_src @@ -24,14 +24,19 @@ class GlicidSpawner(SlurmSpawner): help='Spawner singleuser command.', ).tag(config=True) + req_mamba_exe = Unicode( + MAMBA_EXE, + help='Micromamba global exe', + ).tag(config=True) + req_mamba_root_prefix = Unicode( MAMBA_ROOT_PREFIX, help='Micromamba global root prefix', ).tag(config=True) - req_mamba_exe = Unicode( - MAMBA_EXE, - help='Micromamba global exe', + req_mamba_user_base = Unicode( + MAMBA_USER_BASE, + help='Micromamba user base prefix', ).tag(config=True) req_job_name = Unicode( diff --git a/src/glicid_spawner/templates/slurm_script.jinja b/src/glicid_spawner/templates/slurm_script.jinja index 3c01679..c3e4515 100644 --- a/src/glicid_spawner/templates/slurm_script.jinja +++ b/src/glicid_spawner/templates/slurm_script.jinja @@ -33,6 +33,7 @@ echo "The {{job_name}} logs are located in: ${JUPYTER_LOG_DIR}" {# Micromamba config -#} export MAMBA_EXE={{mamba_exe}}; export MAMBA_ROOT_PREFIX={{mamba_root_prefix}}; + export PYTHONUSERBASE={{ mamba_user_base }}; source $MAMBA_ROOT_PREFIX/etc/profile.d/micromamba.sh; {# Activate micromamba env requested by the user -#} diff --git a/tests/test_spawner.py b/tests/test_spawner.py index 8ac208b..7990185 100644 --- a/tests/test_spawner.py +++ b/tests/test_spawner.py @@ -27,8 +27,9 @@ def test_spawner_config(): assert 'bin/glicid-spawner-singleuser' in cmd assert 'bin/jupyterhub-singleuser' in cmd - assert spawner.req_mamba_root_prefix == '/micromamba/operator' assert spawner.req_mamba_exe == '/micromamba/operator/bin/micromamba' + assert spawner.req_mamba_root_prefix == '/micromamba/operator' + assert spawner.req_mamba_user_base == '/micromamba/$USER' assert spawner.req_job_name == 'jupyterhub_glicid' assert spawner.req_qos == 'short' @@ -70,8 +71,12 @@ def test_spawner_batch_script(monkeypatch): assert 'export MAMBA_EXE=/micromamba/operator/bin/micromamba;' in script assert 'export MAMBA_ROOT_PREFIX=/micromamba/operator;' in script + assert 'export PYTHONUSERBASE=/micromamba/$USER;' in script assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script + assert 'micromamba activate /micromamba/john-doe/envs/foo;' in script + assert 'export JUPYTER_PATH=/micromamba/john-doe/envs/foo/share/jupyter;' in script + assert re.search(r'.*/bin/glicid-spawner-singleuser .*/bin/jupyterhub-singleuser', script) From 255bf6201eab4823bf10ace74b90169e0cb46c13 Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Tue, 21 May 2024 17:52:26 +0200 Subject: [PATCH 5/6] Fix support for GPU gres with a dot in their name --- src/glicid_spawner/slurm.py | 2 +- src/glicid_spawner/templates/views/resources.jinja | 2 +- tests/data/sinfo.txt | 4 ++-- tests/test_slurm.py | 14 +++++++++++--- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/src/glicid_spawner/slurm.py b/src/glicid_spawner/slurm.py index 1aea52d..15da6fd 100644 --- a/src/glicid_spawner/slurm.py +++ b/src/glicid_spawner/slurm.py @@ -69,7 +69,7 @@ class SlurmNode: self.state = state.strip().lower() self.cpu = SlurmCpu(*re.findall(r'(\d+)/(\d+)/\d+/(\d+)', cpus_state)[0]) self.mem = int(memory_mb) // 1000 # in GB - self.gpu = SlurmGpu(*re.findall(r'gpu:(\w+):(\d+)', gres)[0] if 'gpu:' in gres else []) + self.gpu = SlurmGpu(*re.findall(r'gpu:([\w\.]+):(\d+)', gres)[0] if 'gpu:' in gres else []) def __str__(self): return self.hostname diff --git a/src/glicid_spawner/templates/views/resources.jinja b/src/glicid_spawner/templates/views/resources.jinja index efae401..d29de24 100644 --- a/src/glicid_spawner/templates/views/resources.jinja +++ b/src/glicid_spawner/templates/views/resources.jinja @@ -41,7 +41,7 @@ data-max-duration="{{max_duration}}" {%- if loop.first %} checked{% endif %}>
{% endfor -%} diff --git a/tests/data/sinfo.txt b/tests/data/sinfo.txt index 37d0b25..8079789 100644 --- a/tests/data/sinfo.txt +++ b/tests/data/sinfo.txt @@ -9,13 +9,13 @@ nautilus standard cnode001 completing nautilus bigmem cnode002 planned 0/96/0/96 768000 (null) nautilus gpu gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1) nautilus gpu gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1) -nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1) +nautilus gpu gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S nautilus visu visu1 idle 0/96/0/96 768000 (null) nautilus all cnode001 completing 0/96/0/96 384000 (null) nautilus all cnode002 planned 0/96/0/96 768000 (null) nautilus all gnode1 mixed 4/92/0/96 768000 gpu:A100:1(S:0-1) nautilus all gnode2 idle 0/96/0/96 256000 gpu:A100:2(S:0-1) -nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100:4(S:0-1) +nautilus all gnode3 allocated 96/0/0/96 128000 gpu:A100_2g.10gb:6(S nautilus all visu1 idle 0/96/0/96 768000 (null) waves standard cribbar001 mixed 30/10/0/40 16000 (null) waves gpu budbud006 allocated 64/0/0/64 256000 gpu:a100:2,mps:a100: diff --git a/tests/test_slurm.py b/tests/test_slurm.py index a4d8693..c38829d 100644 --- a/tests/test_slurm.py +++ b/tests/test_slurm.py @@ -161,9 +161,17 @@ def test_slurm_sinfo_reader(): 'P100', 'K80', 'P100', - ] + 8 * ['A100'] + 'A100', + 'A100', + 'A100_2g.10gb', + 'A100', + 'A100', + 'A100_2g.10gb', + 'A100', + 'A100', + ] - assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 4, 1, 2, 4, 2, 2] + assert [node.gpu.nb for node in nodes if node.gpu] == [2, 2, 2, 4, 1, 1, 2, 6, 1, 2, 6, 2, 2] def test_slurm_sinfo_filter(monkeypatch): @@ -253,4 +261,4 @@ def test_slurm_gres(): gpus = gres(resources) # Sorted and without duplicates - assert gpus == ['A100', 'A40', 'None', 'P100', 'T4'] + assert gpus == ['A100', 'A100_2g.10gb', 'A40', 'None', 'P100', 'T4'] From 540df4b0aa9e33fba32fd00189b36668908482dd Mon Sep 17 00:00:00 2001 From: Benoit Seignovert Date: Tue, 21 May 2024 17:52:59 +0200 Subject: [PATCH 6/6] Bump to version 1.2 --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 66f8f02..2f2f3ae 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "glicid-spawner" -version = "1.1" +version = "1.2" description = "JupyterHub Batch Spawner for GLiCID" authors = ["Benoit Seignovert "] license = "BSD 3-Clause License" @@ -90,7 +90,7 @@ exclude_lines = [ ] [tool.tbump.version] -current = "1.1" +current = "1.2" regex = '(?P\d+)\.(?P\d+)' [tool.tbump.git]