Skip to content
Snippets Groups Projects
Commit dfaa9f96 authored by Nadejda Marounina's avatar Nadejda Marounina
Browse files

Added the batch option LSF/SLURM

parent ac1410ff
No related branches found
No related tags found
No related merge requests found
......@@ -75,6 +75,7 @@ Options:
-n | --numcores NUM_CPU Number of CPU cores to be used on the cluster
-W | --runtime RUN_TIME Run time limit for the code-server in hours and minutes HH:MM
-m | --memory MEM_PER_CORE Memory limit in MB per core
-b | --batchsys BATCH_SYS Batch system to use for the submission of this job to Euler (LSF/SLURM)
Optional arguments:
......@@ -83,8 +84,10 @@ Optional arguments:
-h | --help Display help for this script and quit
-i | --interval INTERVAL Time interval for checking if the job on the cluster already started
-k | --key SSH_KEY_PATH Path to SSH key with non-standard name
-s | --shareholdergr SHAREHOLDERGR Shareholder group, mandatory when requesting GPUs with SLURM batch system
-v | --version Display version of the script and exit
Examples:
./start_vscode.sh -u sfux -n 4 -W 04:00 -m 2048
......@@ -161,6 +164,16 @@ do
shift
shift
;;
-b|--batchsys)
BATCH_SYS=$2
shift
shift
;;
-s|--shareholdergr)
SHAREHOLDERGR=$2
shift
shift
;;
*)
echo -e "Warning: ignoring unknown option $1 \n"
shift
......@@ -268,6 +281,18 @@ else
echo -e "Using SSH key $VSC_SSH_KEY_PATH"
fi
#check in the case where GPUs are requested with SLURM whether the shareholder group is provided
if [[ "$VSC_NUM_GPU" > "0" && $BATCH_SYS = "SLURM" && $SHAREHOLDERGR = "" ]]; then
echo -e "Please provide the shareholder group if requesting GPUs with SLURM"
display_help
fi
#if batch system has not been provided, default to LSF:
if [ -z "$BATCH_SYS" ]; then
BATCH_SYS="LSF"
fi
# put together string for SSH options
VSC_SSH_OPT="$VSC_SKPATH $VSC_USERNAME@$VSC_HOSTNAME"
......@@ -297,9 +322,31 @@ ENDSSH
###############################################################################
# run the code-server job on Euler and save the ip of the compute node in the file vscip in the home directory of the user on Euler
echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a batch job"
echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a $BATCH_SYS batch job"
case $BATCH_SYS in
"LSF" )
VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU<<ENDBSUB
module load $VSC_MODULE_COMMAND
export XDG_RUNTIME_DIR="\$HOME/vsc_runtime"
VSC_IP_REMOTE="\$(hostname -i)"
echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip
code-server --bind-addr=\${VSC_IP_REMOTE}:8899
ENDBSUB
)
;;
"SLURM" )
VSC_RUN_TIME="${VSC_RUN_TIME}":00" "
if [ "$VSC_NUM_GPU" -gt "0" ]; then
VSC_SNUM_GPU="-G $VSC_NUM_GPU -A $SHAREHOLDERGR"
fi
VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU<<ENDBSUB
VSC_BJOB_OUT=$(ssh $VSC_SSH_OPT sbatch -n $VSC_NUM_CPU "--time=$VSC_RUN_TIME" "--mem-per-cpu=$VSC_MEM_PER_CPU_CORE" -e "error.dat" $VSC_SNUM_GPU<<ENDBSUB
#!/bin/bash
module load $VSC_MODULE_COMMAND
export XDG_RUNTIME_DIR="\$HOME/vsc_runtime"
VSC_IP_REMOTE="\$(hostname -i)"
......@@ -307,6 +354,17 @@ echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip
code-server --bind-addr=\${VSC_IP_REMOTE}:8899
ENDBSUB
)
;;
*)
echo
echo "Please specify either LSF or SLURM as your choice of the batch system"
echo
display_help
;;
esac
VSC_BJOB_ID=$(echo $VSC_BJOB_OUT | awk '/is submitted/{print substr($2, 2, length($2)-2);}')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment