From 733a9651e90be66b1165a17fcd90365b87f973a0 Mon Sep 17 00:00:00 2001
From: sfux <samuel.fux@id.ethz.ch>
Date: Thu, 10 Nov 2022 07:24:11 +0000
Subject: [PATCH] Update start_vscode.sh

---
 start_vscode.sh | 74 +++++++++++++++++++++++--------------------------
 1 file changed, 35 insertions(+), 39 deletions(-)

diff --git a/start_vscode.sh b/start_vscode.sh
index be32a1c..9e609f2 100755
--- a/start_vscode.sh
+++ b/start_vscode.sh
@@ -6,12 +6,13 @@
 #  connect it with a local browser to it                                      #
 #                                                                             #
 #  Main author    : Samuel Fux                                                #
-#  Contributions  : Andreas Lugmayr, Mike Boss                                #
+#  Contributions  : Andreas Lugmayr, Mike Boss, Nadia Marounina               #
 #  Date           : October 2021                                              #
 #  Location       : ETH Zurich                                                #
-#  Version        : 0.1                                                       #
+#  Version        : 0.2                                                       #
 #  Change history :                                                           #
 #                                                                             #
+#  24.10.2022    Added Slurm support                                          #
 #  19.05.2022    JOBID is now saved to reconnect_info file                    #
 #  28.10.2021    Initial version of the script based on Jupyter script        #
 #                                                                             #
@@ -22,7 +23,7 @@
 ###############################################################################
 
 # Version
-VSC_VERSION="0.1"
+VSC_VERSION="0.2"
 
 # Script directory
 VSC_SCRIPTDIR=$(pwd)
@@ -59,6 +60,9 @@ VSC_WAITING_INTERVAL=60
 # SSH key location default      : no default
 VSC_SSH_KEY_PATH=""
 
+# Batch system                  : Slurm
+VSC_BATCH_SYSTEM="SLURM"
+
 ###############################################################################
 # Usage instructions                                                          #
 ###############################################################################
@@ -75,7 +79,7 @@ Options:
         -n | --numcores       NUM_CPU          Number of CPU cores to be used on the cluster
         -W | --runtime        RUN_TIME         Run time limit for the code-server in hours and minutes HH:MM
         -m | --memory         MEM_PER_CORE     Memory limit in MB per core
-        -b | --batchsys       BATCH_SYS        Batch system to use for the submission of this job to Euler (LSF/SLURM)
+        -b | --batchsys       BATCH_SYS        Batch system to use (LSF or SLURM)
 
 Optional arguments:
 
@@ -84,15 +88,14 @@ Optional arguments:
         -h | --help                            Display help for this script and quit
         -i | --interval       INTERVAL         Time interval for checking if the job on the cluster already started
         -k | --key            SSH_KEY_PATH     Path to SSH key with non-standard name
-        -s | --shareholdergr  SHAREHOLDERGR    Shareholder group, mandatory when requesting GPUs with SLURM batch system
         -v | --version                         Display version of the script and exit
 
 
 Examples:
 
-        ./start_vscode.sh -u sfux -n 4 -W 04:00 -m 2048
+        ./start_vscode.sh -u sfux -b SLURM -n 4 -W 04:00 -m 2048
 
-        ./start_vscode.sh --username sfux --numcores 2 --runtime 01:30 --memory 2048
+        ./start_vscode.sh --username sfux --batchsys SLURM --numcores 2 --runtime 01:30 --memory 2048
 
         ./start_vscode.sh -c $HOME/.vsc_config
 
@@ -105,6 +108,7 @@ VSC_RUN_TIME="01:00"        # Run time limit for the code-server in hours and mi
 VSC_MEM_PER_CPU_CORE=1024   # Memory limit in MB per core
 VSC_WAITING_INTERVAL=60     # Time interval to check if the job on the cluster already started
 VSC_SSH_KEY_PATH=""         # Path to SSH key with non-standard name
+VSC_BATCH_SYSTEM="SLURM"    # Batch system to use (SLURM or LSF)
 
 EOF
 exit 1
@@ -169,11 +173,6 @@ do
                 shift
                 shift
                 ;;
-                -s|--shareholdergr)
-                SHAREHOLDERGR=$2
-                shift
-                shift
-                ;;
                 *)
                 echo -e "Warning: ignoring unknown option $1 \n"
                 shift
@@ -281,17 +280,19 @@ else
         echo -e "Using SSH key $VSC_SSH_KEY_PATH"
 fi
 
-#check in the case where GPUs are requested with SLURM whether the shareholder group is provided 
-if [[ "$VSC_NUM_GPU" > "0" && $BATCH_SYS = "SLURM" &&  $SHAREHOLDERGR = "" ]]; then
-        echo -e "Please provide the shareholder group if requesting GPUs with SLURM"
-        display_help
-fi
-
-#if batch system has not been provided, default to LSF:
-if [ -z "$BATCH_SYS" ]; then
-        BATCH_SYS="LSF"
-fi
 
+# check if VSC_BATCH_SYSTEM is set to SLURM or LSF
+case $VSC_BATCH_SYSTEM in
+        LSF)
+        echo -e "Using LSF batch system"
+        ;;
+        SLURM)
+        echo -e "Using Slurm batch system"
+        ;;
+        *)
+        echo -e "Error: Unknown batch system $VSC_BATCH_SYSTEM. Please either specify LSF or SLURM as batch system"
+        ;;
+esac
 
 # put together string for SSH options
 VSC_SSH_OPT="$VSC_SKPATH $VSC_USERNAME@$VSC_HOSTNAME"
@@ -324,7 +325,7 @@ ENDSSH
 # run the code-server job on Euler and save the ip of the compute node in the file vscip in the home directory of the user on Euler
 echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a $BATCH_SYS batch job"
 case $BATCH_SYS in
-        "LSF" )
+        "LSF")
         VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU<<ENDBSUB
         module load $VSC_MODULE_COMMAND
         export XDG_RUNTIME_DIR="\$HOME/vsc_runtime"
@@ -332,16 +333,14 @@ case $BATCH_SYS in
         echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip
         code-server --bind-addr=\${VSC_IP_REMOTE}:8899
 ENDBSUB
- )
-                ;;
-
-        "SLURM" )
+)       ;;
+        "SLURM")
 
         VSC_RUN_TIME="${VSC_RUN_TIME}":00" "
 
         if [ "$VSC_NUM_GPU" -gt "0" ]; then
         
-                VSC_SNUM_GPU="-G $VSC_NUM_GPU -A $SHAREHOLDERGR"
+                VSC_SNUM_GPU="-G $VSC_NUM_GPU"
         
         fi
 
@@ -355,18 +354,12 @@ code-server --bind-addr=\${VSC_IP_REMOTE}:8899
 ENDBSUB
 )
                 ;;
-        *)
-        echo
-        echo "Please specify either LSF or SLURM as your choice of the batch system"
-        echo
-        display_help
-        ;;
-esac
-
-
 
+esac
 
-VSC_BJOB_ID=$(echo $VSC_BJOB_OUT | awk '/is submitted/{print substr($2, 2, length($2)-2);}')
+# TODO: get jobid for both cases (LSF/Slurm)
+# store jobid in a variable
+# VSC_BJOB_ID=$(echo $VSC_BJOB_OUT | awk '/is submitted/{print substr($2, 2, length($2)-2);}')
 
 # wait until batch job has started, poll every $VSC_WAITING_INTERVAL seconds to check if /cluster/home/$VSC_USERNAME/vscip exists
 # once the file exists and is not empty the batch job has started
@@ -409,6 +402,10 @@ VSC_LOCAL_PORT=$((3 * 2**14 + RANDOM % 2**14))
 echo -e "Using local port: $VSC_LOCAL_PORT"
 
 # write reconnect_info file
+#
+# FIXME: add jobid
+# BJOB ID           : $VSC_BJOB_ID
+
 cat <<EOF > $VSC_SCRIPTDIR/reconnect_info
 Restart file
 Remote IP address : $VSC_REMOTE_IP
@@ -416,7 +413,6 @@ Remote port       : $VSC_REMOTE_PORT
 Local port        : $VSC_LOCAL_PORT
 SSH tunnel        : ssh $VSC_SSH_OPT -L $VSC_LOCAL_PORT:$VSC_REMOTE_IP:$VSC_REMOTE_PORT -N &
 URL               : http://localhost:$VSC_LOCAL_PORT
-BJOB ID           : $VSC_BJOB_ID
 EOF
 
 # setup SSH tunnel from local computer to compute node via login node
-- 
GitLab