#!/bin/bash ############################################################################### # # # Script to run on a local computer to start a code-server on Euler and # # connect it with a local browser to it # # # # Main author : Samuel Fux # # Contributions : Andreas Lugmayr # # Date : October 2021 # # Location : ETH Zurich # # Version : 0.1 # # Change history : # # # # 28.10.2021 Initial version of the script based on Jupyter script # # # ############################################################################### ############################################################################### # Configuration options, initalising variables and setting default values # ############################################################################### # Version VSC_VERSION="0.1" # Script directory VSC_SCRIPTDIR=$(pwd) # hostname of the cluster to connect to VSC_HOSTNAME="euler.ethz.ch" # order for initializing configuration options # 1. Defaults values set inside this script # 2. Command line options overwrite defaults # 3. Config file options overwrite command line options # Configuration file default : $HOME/.vsc_config VSC_CONFIG_FILE="$HOME/.vsc_config" # Username default : no default VSC_USERNAME="" # Number of CPU cores default : 1 CPU core VSC_NUM_CPU=1 # Runtime limit default : 1:00 hour VSC_RUN_TIME="01:00" # Memory default : 1024 MB per core VSC_MEM_PER_CPU_CORE=1024 # Number of GPUs default : 0 GPUs VSC_NUM_GPU=0 # Waiting interval default : 60 seconds VSC_WAITING_INTERVAL=60 # SSH key location default : no default VSC_SSH_KEY_PATH="" ############################################################################### # Usage instructions # ############################################################################### function display_help { cat <<-EOF $0: Script to start a VSCode on Euler from a local computer Usage: start_vscode.sh [options] Options: -u | --username USERNAME ETH username for SSH connection to Euler -n | --numcores NUM_CPU Number of CPU cores to be used on the cluster -W | --runtime RUN_TIME Run time limit for the code-server in hours and minutes HH:MM -m | --memory MEM_PER_CORE Memory limit in MB per core Optional arguments: -c | --config CONFIG_FILE Configuration file for specifying options -g | --numgpu NUM_GPU Number of GPUs to be used on the cluster -h | --help Display help for this script and quit -i | --interval INTERVAL Time interval for checking if the job on the cluster already started -k | --key SSH_KEY_PATH Path to SSH key with non-standard name -v | --version Display version of the script and exit Examples: ./start_vscode.sh -u sfux -n 4 -W 04:00 -m 2048 ./start_vscode.sh --username sfux --numcores 2 --runtime 01:30 --memory 2048 ./start_vscode.sh -c $HOME/.vsc_config Format of configuration file: VSC_USERNAME="" # ETH username for SSH connection to Euler VSC_NUM_CPU=1 # Number of CPU cores to be used on the cluster VSC_NUM_GPU=0 # Number of GPUs to be used on the cluster VSC_RUN_TIME="01:00" # Run time limit for the code-server in hours and minutes HH:MM VSC_MEM_PER_CPU_CORE=1024 # Memory limit in MB per core VSC_WAITING_INTERVAL=60 # Time interval to check if the job on the cluster already started VSC_SSH_KEY_PATH="" # Path to SSH key with non-standard name EOF exit 1 } ############################################################################### # Parse configuration options # ############################################################################### while [[ $# -gt 0 ]] do case $1 in -h|--help) display_help ;; -v|--version) echo -e "start_vscode.sh version: $VSC_VERSION\n" exit ;; -u|--username) VSC_USERNAME=$2 shift shift ;; -n|--numcores) VSC_NUM_CPU=$2 shift shift ;; -W|--runtime) VSC_RUN_TIME=$2 shift shift ;; -m|--memory) VSC_MEM_PER_CPU_CORE=$2 shift shift ;; -c|--config) VSC_CONFIG_FILE=$2 shift shift ;; -g|--numgpu) VSC_NUM_GPU=$2 shift shift ;; -i|--interval) VSC_WAITING_INTERVAL=$2 shift shift ;; -k|--key) VSC_SSH_KEY_PATH=$2 shift shift ;; *) echo -e "Warning: ignoring unknown option $1 \n" shift ;; esac done ############################################################################### # Check configuration options # ############################################################################### # check if user has a configuration file and source it to initialize options if [ -f "$VSC_CONFIG_FILE" ]; then echo -e "Found configuration file $VSC_CONFIG_FILE" echo -e "Initializing configuration from file ${VSC_CONFIG_FILE}:" cat "$VSC_CONFIG_FILE" source "$VSC_CONFIG_FILE" fi # check that VSC_USERNAME is not an empty string if [ -z "$VSC_USERNAME" ] then echo -e "Error: No ETH username is specified, terminating script\n" display_help else echo -e "ETH username: $VSC_USERNAME" fi # check number of CPU cores # check if VSC_NUM_CPU an integer if ! [[ "$VSC_NUM_CPU" =~ ^[0-9]+$ ]]; then echo -e "Error: $VSC_NUM_CPU -> Incorrect format. Please specify number of CPU cores as an integer and try again\n" display_help fi # check if VSC_NUM_CPU is <= 128 if [ "$VSC_NUM_CPU" -gt "128" ]; then echo -e "Error: $VSC_NUM_CPU -> Larger than 128. No distributed memory supported, therefore the number of CPU cores needs to be smaller or equal to 128\n" display_help fi if [ "$VSC_NUM_CPU" -gt "0" ]; then echo -e "Requesting $VSC_NUM_CPU CPU cores for running the code-server" fi # check number of GPUs # check if VSC_NUM_GPU an integer if ! [[ "$VSC_NUM_GPU" =~ ^[0-9]+$ ]]; then echo -e "Error: $VSC_NUM_GPU -> Incorrect format. Please specify the number of GPU as an integer and try again\n" display_help fi # check if VSC_NUM_GPU is <= 8 if [ "$VSC_NUM_GPU" -gt "8" ]; then echo -e "Error: No distributed memory supported, therefore number of GPUs needs to be smaller or equal to 8\n" display_help fi if [ "$VSC_NUM_GPU" -gt "0" ]; then echo -e "Requesting $VSC_NUM_GPU GPUs for running the code-server" VSC_SNUM_GPU="-R \"rusage[ngpus_excl_p=$VSC_NUM_GPU]\"" else VSC_SNUM_GPU="" fi if [ ! "$VSC_NUM_CPU" -gt "0" -a ! "$VSC_NUM_GPU" -gt "0" ]; then echo -e "Error: No CPU and no GPU resources requested, terminating script" display_help fi # check if VSC_RUN_TIME is provided in HH:MM format if ! [[ "$VSC_RUN_TIME" =~ ^[0-9][0-9]:[0-9][0-9]$ ]]; then echo -e "Error: $VSC_RUN_TIME -> Incorrect format. Please specify runtime limit in the format HH:MM and try again\n" display_help else echo -e "Run time limit set to $VSC_RUN_TIME" fi # check if VSC_MEM_PER_CPU_CORE is an integer if ! [[ "$VSC_MEM_PER_CPU_CORE" =~ ^[0-9]+$ ]]; then echo -e "Error: $VSC_MEM_PER_CPU_CORE -> Memory limit must be an integer, please try again\n" display_help else echo -e "Memory per core set to $VSC_MEM_PER_CPU_CORE MB" fi # check if VSC_WAITING_INTERVAL is an integer if ! [[ "$VSC_WAITING_INTERVAL" =~ ^[0-9]+$ ]]; then echo -e "Error: $VSC_WAITING_INTERVAL -> Waiting time interval [seconds] must be an integer, please try again\n" display_help else echo -e "Setting waiting time interval for checking the start of the job to $VSC_WAITING_INTERVAL seconds" fi # set modules VSC_MODULE_COMMAND="gcc/6.3.0 code-server/3.12.0 eth_proxy" # check if VSC_SSH_KEY_PATH is empty or contains a valid path if [ -z "$VSC_SSH_KEY_PATH" ]; then VSC_SKPATH="" else VSC_SKPATH="-i $VSC_SSH_KEY_PATH" echo -e "Using SSH key $VSC_SSH_KEY_PATH" fi # put together string for SSH options VSC_SSH_OPT="$VSC_SKPATH $VSC_USERNAME@$VSC_HOSTNAME" ############################################################################### # Check for leftover files # ############################################################################### # check if some old files are left from a previous session and delete them # check for reconnect_info in the current directory on the local computer echo -e "Checking for left over files from previous sessions" if [ -f $VSC_SCRIPTDIR/reconnect_info ]; then echo -e "Found old reconnect_info file, deleting it ..." rm $VSC_SCRIPTDIR/reconnect_info fi # check for log files from a previous session in the home directory of the cluster ssh -T $VSC_SSH_OPT <<ENDSSH if [ -f /cluster/home/$VSC_USERNAME/vscip ]; then echo -e "Found old vscip file, deleting it ..." rm /cluster/home/$VSC_USERNAME/vscip fi ENDSSH ############################################################################### # Start code-server on the cluster # ############################################################################### # run the code-server job on Euler and save the ip of the compute node in the file vscip in the home directory of the user on Euler echo -e "Connecting to $VSC_HOSTNAME to start the code-server in a batch job" # FIXME: save jobid in a variable, that the script can kill the batch job at the end ssh $VSC_SSH_OPT bsub -n $VSC_NUM_CPU -W $VSC_RUN_TIME -R "rusage[mem=$VSC_MEM_PER_CPU_CORE]" $VSC_SNUM_GPU <<ENDBSUB module load $VSC_MODULE_COMMAND export XDG_RUNTIME_DIR="\$HOME/vsc_runtime" VSC_IP_REMOTE="\$(hostname -i)" echo "Remote IP:\$VSC_IP_REMOTE" >> /cluster/home/$VSC_USERNAME/vscip code-server --bind-addr=\${VSC_IP_REMOTE}:8899 ENDBSUB # wait until batch job has started, poll every $VSC_WAITING_INTERVAL seconds to check if /cluster/home/$VSC_USERNAME/vscip exists # once the file exists and is not empty the batch job has started ssh $VSC_SSH_OPT <<ENDSSH while ! [ -e /cluster/home/$VSC_USERNAME/vscip -a -s /cluster/home/$VSC_USERNAME/vscip ]; do echo 'Waiting for code-server to start, sleep for $VSC_WAITING_INTERVAL sec' sleep $VSC_WAITING_INTERVAL done ENDSSH # give the code-server a few seconds to start sleep 7 # get remote ip, port and token from files stored on Euler echo -e "Receiving ip, port and token from the code-server" VSC_REMOTE_IP=$(ssh $VSC_SSH_OPT "cat /cluster/home/$VSC_USERNAME/vscip | grep -m1 'Remote IP' | cut -d ':' -f 2") VSC_REMOTE_PORT=8899 # check if the IP, the port and the token are defined if [[ "$VSC_REMOTE_IP" == "" ]]; then cat <<EOF Error: remote ip is not defined. Terminating script. * Please check login to the cluster and check with bjobs if the batch job on the cluster is running and terminate it with bkill. EOF exit 1 fi # print information about IP, port and token echo -e "Remote IP address: $VSC_REMOTE_IP" echo -e "Remote port: $VSC_REMOTE_PORT" # get a free port on local computer echo -e "Determining free port on local computer" #VSC_LOCAL_PORT=$(python -c 'import socket; s=socket.socket(); s.bind(("",0)); print(s.getsockname()[1]); s.close()') # FIXME: check if there is a solution that does not require python (as some Windows computers don't have a usable Python installed by default) # if python is not available, one could use VSC_LOCAL_PORT=$((3 * 2**14 + RANDOM % 2**14)) # as a replacement. No guarantee that the port is unused, but so far best non-Python solution echo -e "Using local port: $VSC_LOCAL_PORT" # write reconnect_info file cat <<EOF > $VSC_SCRIPTDIR/reconnect_info Restart file Remote IP address : $VSC_REMOTE_IP Remote port : $VSC_REMOTE_PORT Local port : $VSC_LOCAL_PORT SSH tunnel : ssh $VSC_SSH_OPT -L $VSC_LOCAL_PORT:$VSC_REMOTE_IP:$VSC_REMOTE_PORT -N & URL : http://localhost:$VSC_LOCAL_PORT EOF # setup SSH tunnel from local computer to compute node via login node # FIXME: check if the tunnel can be managed via this script (opening, closing) by using a control socket from SSH echo -e "Setting up SSH tunnel for connecting the browser to the code-server" ssh $VSC_SSH_OPT -L $VSC_LOCAL_PORT:$VSC_REMOTE_IP:$VSC_REMOTE_PORT -N & # SSH tunnel is started in the background, pause 5 seconds to make sure # it is established before starting the browser sleep 5 # save url in variable VSC_URL=http://localhost:$VSC_LOCAL_PORT echo -e "Starting browser and connecting it to the code-server" echo -e "Connecting to url $VSc_URL" # start local browser if possible if [[ "$OSTYPE" == "linux-gnu" ]]; then xdg-open $VSC_URL elif [[ "$OSTYPE" == "darwin"* ]]; then open $VSC_URL elif [[ "$OSTYPE" == "msys" ]]; then # Git Bash on Windows 10 start $VSC_URL else echo -e "Your operating system does not allow to start the browser automatically." echo -e "Please open $VSC_URL in your browser." fi