diff --git a/start_vscode.sh b/start_vscode.sh index caba6e0740f2e8756c23bd10bd19cc58685fe5a7..be32a1cd807cf9212f4e8313d28aac96392e52ac 100755 --- a/start_vscode.sh +++ b/start_vscode.sh @@ -75,6 +75,7 @@ Options: -n | --numcores NUM_CPU Number of CPU cores to be used on the cluster -W | --runtime RUN_TIME Run time limit for the code-server in hours and minutes HH:MM -m | --memory MEM_PER_CORE Memory limit in MB per core + -b | --batchsys BATCH_SYS Batch system to use for the submission of this job to Euler (LSF/SLURM) Optional arguments: @@ -83,8 +84,10 @@ Optional arguments: -h | --help Display help for this script and quit -i | --interval INTERVAL Time interval for checking if the job on the cluster already started -k | --key SSH_KEY_PATH Path to SSH key with non-standard name + -s | --shareholdergr SHAREHOLDERGR Shareholder group, mandatory when requesting GPUs with SLURM batch system -v | --version Display version of the script and exit + Examples: ./start_vscode.sh -u sfux -n 4 -W 04:00 -m 2048 @@ -161,6 +164,16 @@ do shift shift ;; + -b|--batchsys) + BATCH_SYS=$2 + shift + shift + ;; + -s|--shareholdergr) + SHAREHOLDERGR=$2 + shift + shift + ;; *) echo -e "Warning: ignoring unknown option $1 \n" shift @@ -268,6 +281,18 @@ else echo -e "Using SSH key $VSC_SSH_KEY_PATH" fi +#check in the case where GPUs are requested with SLURM whether the shareholder group is provided +if [[ "$VSC_NUM_GPU" > "0" && $BATCH_SYS = "SLURM" && $SHAREHOLDERGR = "" ]]; then + echo -e "Please provide the shareholder group if requesting GPUs with SLURM" + display_help +fi + +#if batch system has not been provided, default to LSF: +if [ -z "$BATCH_SYS" ]; then + BATCH_SYS="LSF" +fi + + # put together string for SSH options VSC_SSH_OPT="$VSC_SKPATH $VSC_USERNAME@$VSC_HOSTNAME" @@ -297,9 +322,31 @@ ENDSSH ############################################################################### # run the code-server job on Euler and save the ip of the compute node in the file vscip in the home directory of the user on Euler -echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a batch job" +echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a $BATCH_SYS batch job" +case $BATCH_SYS in + "LSF" ) + VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU<<ENDBSUB + module load $VSC_MODULE_COMMAND + export XDG_RUNTIME_DIR="\$HOME/vsc_runtime" + VSC_IP_REMOTE="\$(hostname -i)" + echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip + code-server --bind-addr=\${VSC_IP_REMOTE}:8899 +ENDBSUB + ) + ;; + + "SLURM" ) + + VSC_RUN_TIME="${VSC_RUN_TIME}":00" " + + if [ "$VSC_NUM_GPU" -gt "0" ]; then + + VSC_SNUM_GPU="-G $VSC_NUM_GPU -A $SHAREHOLDERGR" + + fi -VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU<<ENDBSUB + VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT sbatch -n $VSC_NUM_CPU "--time=$VSC_RUN_TIME" "--mem-per-cpu=$VSC_MEM_PER_CPU_CORE" -e "error.dat" $VSC_SNUM_GPU<<ENDBSUB +#!/bin/bash module load $VSC_MODULE_COMMAND export XDG_RUNTIME_DIR="\$HOME/vsc_runtime" VSC_IP_REMOTE="\$(hostname -i)" @@ -307,6 +354,17 @@ echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip code-server --bind-addr=\${VSC_IP_REMOTE}:8899 ENDBSUB ) + ;; + *) + echo + echo "Please specify either LSF or SLURM as your choice of the batch system" + echo + display_help + ;; +esac + + + VSC_BJOB_ID=$(echo $VSC_BJOB_OUT | awk '/is submitted/{print substr($2, 2, length($2)-2);}')