Skip to content
Snippets Groups Projects
Forked from TheAlternative / courses
350 commits behind, 36 commits ahead of the upstream repository.
slides.tex 19.53 KiB
\input{../.style/header}

\title{Software Management for Open Science}
\author{Horea Christian}
\institute{SSC TheAlternative | ETHZ and UZH}

\begin{document}
	\begin{frame}
		\titlepage
	\end{frame}
	\begin{frame}{These Slides}
		Type one link, click all others:
		\begin{itemize}
			\item Download \textcolor{lg}{\href{https://thealternative.ch/ssm/slides.pdf}{\texttt{thealternative.ch/ssm/slides.pdf}}}
		\end{itemize}
	\end{frame}
	\section{Requirements}
		\subsection{... for the demo session}
			\begin{frame}{SSH}
				Linux and MacOS:
				\begin{itemize}
					\item Check that you can run:
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
				\end{itemize}
				Windows:
				\begin{itemize}
					\item Download and launch “Git for Windows” from \textcolor{lg}{\href{https://git-for-windows.github.io}{\texttt{git-for-windows.github.io}}}.
					\item Check that you can run:
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
				\end{itemize}
			\end{frame}
			\begin{frame}{Command Line Text Editor}
				Usable via SSH and ubiquitous. There are many alternatives, but here we use \textcolor{lg}{\texttt{nano}}:
				\begin{itemize}
					\item Open file:
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|nano file|
					\item Save via: \keys{\ctrl + o}, \keys{\enter}
					\item Exit via: \keys{\ctrl + x}
				\end{itemize}
			\end{frame}
			\begin{frame}{Git and Social Coding}
				Git needs to know who you are.
				\begin{itemize}
					\item On the server, run:
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.name "Your Name"|
					\vspace{-3.1em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.email yourname@example.com|
				\end{itemize}

				GitHub is a \textbf{social coding platform} providing free accounts:
				\begin{itemize}
					\item Register under \textcolor{lg}{\href{https://github.com}{\texttt{github.com}}}.
					\item Use a password which you can remember.
				\end{itemize}
			\end{frame}
	\section{What?}
		\subsection{What is software management?}
			\begin{frame}{The Package}
				\begin{center}
					\textcolor{ldorange}{\Large Better organization for your research!}
				\end{center}
				\vspace{1.5em}

				A package is a software format which is (easily):
				\begin{multicols}{2}
				\begin{itemize}
					\item Distributable
					\item Integrated
					\item Testable
					\item Updateable
					\item Uninstallable
					\item Understandable
				\end{itemize}
				\end{multicols}
			\end{frame}
			\begin{frame}{Package Management --- best done automatically}
				\begin{minipage}{0.44\textwidth}
					Packages interact in complex and nontrivial manners:
					\begin{itemize}
						\item Version-dependent behaviour
						\item Optional features
						\item Incompatibilities
						\item Static/dynamic linking
					\end{itemize}
				\end{minipage}
				\begin{minipage}{0.55\textwidth}
					\begin{figure}
						\includegraphics[height=0.83\textheight]{img/ng_mi.png}
						\vspace{-1em}
						\caption{Minimal neuroscience package dependency stack \cite{Ioanas2017}}
					\end{figure}
				\end{minipage}
			\end{frame}
			\begin{frame}{Binary Packages}
				\begin{figure}
				\centering
					\includegraphics[width=0.9\textwidth]{img/pm_d.png}
					\caption{Rudimentary overview of binary package distribution.}
				\end{figure}
				\begin{columns}
					\column{.5\linewidth}
						Advantages:
						\begin{itemize}
							\item Faster installation
							\item Less variable installation
						\end{itemize}
					\column{.5\linewidth}
						Disadvantages:
						\begin{itemize}
							\item No access to live software
							\item Man-in-the middle
							\item Limited support for rolling release
						\end{itemize}
				\end{columns}
			\end{frame}
			\begin{frame}{Source-Based Packages}
				\begin{figure}
				\centering
					\includegraphics[width=0.9\textwidth]{img/pm_g.png}
					\caption{Rudimentary overview of source-based package distribution.}
				\end{figure}
				\begin{columns}
					\column{.5\linewidth}
						Advantages:
						\begin{itemize}
							\item Live software is a first-class citizen
							\item Thin wrapper for upstream
							\item Acutely version and linking aware
						\end{itemize}
					\column{.5\linewidth}
						Disadvantages:
						\begin{itemize}
							\item Slower installation
							\item More variable installation
						\end{itemize}
				\end{columns}
			\end{frame}
	\section{Why?}
		\subsection{Why does open science require package management?}
			\begin{frame}{Quality}
				\begin{itemize}
					\item Make development more transparent.
					\item Get \textbf{constructive} feedback.
					\item Ask for help with concrete reproducible examples.
					\item Easily manage \textcolor{lg}{\href{https://github.com/gentoo-science/sci/issues}{\texttt{bugs/issues}}} and \textcolor{lg}{\href{https://github.com/gentoo-science/sci/pulls}{\texttt{conributions}}}.
					\item Implement proper version tracking.
				\end{itemize}
			\end{frame}
			\begin{frame}{Impact}
				\begin{itemize}
					\item Reach more potential users.
					\item Communicate with users to improve your software's usability.
					\item Retain more users.
				\end{itemize}
			\end{frame}
			\begin{frame}{Recognition}
				\begin{itemize}
					\item Establish proof of authorship.
					\item Publicize your innovative workflows, solutions, data structures.
					\item Create a handle for attribution (including DOI), e.g:
					\begin{itemize}
						\item BehavioPy: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.188169}{\texttt{10.5281/zenodo.188169}}}
						\item Nipype: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.50186}{\texttt{10.5281/zenodo.50186}}}
					\end{itemize}
				\end{itemize}
			\end{frame}
			\begin{frame}{Sustainability}
				A sustainable project \textbf{cannot} depend on environments remaining unchanged.
				\begin{itemize}
					\item Ensure long-term viability of your software.
					\item Avoid death-by-PhD.
					\item Give your funders their money's worth.
					\item Develop a lean start-up.
					\item Maintain a reliable and affordable infrastructure for your work.
				\end{itemize}
			\end{frame}
			\begin{frame}{Why Not?}
				\begin{itemize}
					\item Don't be afraid of your software not being “good/unique enough”!
					\item Don't wait until your software is “ready”!
					\item A lot of research software you are already using is not written by “professional” programmers.
				\end{itemize}
			\end{frame}
	\section{How?}
		\subsection{How do I package my software?}
			\begin{frame}{Choose Appropriate Technologies}
				\begin{columns}
					\column{.5\linewidth}
						\begin{figure}
						\centering
							\includegraphics[width=0.6\textwidth]{img/gentoo.png}
							\caption{Gentoo Linux Logo by Gentoo Foundation and Lennart Andre Rolland - CC BY-SA/2.5.}
						\end{figure}
					\column{.5\linewidth}
						\begin{figure}
							\centering
							\includegraphics[width=0.6\textwidth]{img/python.png}
							\caption{Python Logo by Python Software Foundation.}
						\end{figure}
				\end{columns}
			\end{frame}
			\begin{frame}{Python Package Distribution}
				\begin{columns}
					\column{.55\linewidth}
					You can package your python software by writing \textbf{one short} file.
						\begin{itemize}
							\item Python provides its own limited package management, e.g. via \textcolor{lg}{\href{https://packaging.python.org/distributing/}{\texttt{setuptools}}}.
							\item Package metadata saved in \texttt{setup.py}, e.g. \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI/blob/master/setup.py}{\texttt{SAMRI/setup.py}}}.
						\end{itemize}
					\column{.45\linewidth}
						\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4]{python}{samri/setup.py}
				\end{columns}
			\end{frame}
			\begin{frame}{Gentoo Packages}
				\begin{columns}
					\column{.55\linewidth}
					A Gentoo package is \textbf{one short} file.
						\begin{itemize}
							\item Regardless of the programming language
							\item Can automatically interpret information contained in the package, e.g. in \textcolor{lg}{\texttt{setup.py}}
						\end{itemize}
					\column{.47\linewidth}
						\vspace{-3em}
						\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=1,lastline=36]{bash}{samri/samri-0.4.ebuild}
						\vspace{-3.2em}
						\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=47,lastline=49]{bash}{samri/samri-0.4.ebuild}
				\end{columns}
			\end{frame}
			\begin{frame}{Reposit Your Software}
				\begin{figure}
				\centering
					\includegraphics[width=0.4\textwidth]{img/git.png}
					\caption{Git Logo by Jason Long (\href{https://creativecommons.org/licenses/by/3.0/}{CC-BY-3.0})}
				\end{figure}
				You can self-host, but hosting also available via social coding platforms:
				\begin{multicols}{3}
					\begin{itemize}
						\item GitLab
						\item GitHub
						\item Bitbucket
					\end{itemize}
				\end{multicols}
			\end{frame}
	\section{Demo}
		\subsection{Put what you have learned into practice, and start typing...}
			\begin{frame}{A Few Basic Gentoo Commands}
				\begin{itemize}
					\item Check available package names, versions, and details.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix -v nibabel|
					\item See package dependencies.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery g nibabel|
					\item See what packages depend on a said package.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery d nibabel|
					\item See files installed by package.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery f nibabel|
					\item Try to install a new package.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -p psychopy|
				\end{itemize}
			\end{frame}
			\begin{frame}{Reproduce a Scientific Article}
				Novel frameworks, such as RepSeP \cite{repsep} permit articles to be written as software.
				\begin{itemize}
					\item Get the source code for brand-new articles:
					\begin{itemize}
						\item Work-in-progress (reexecution time \SI{\approx 2}{\minute})
						\vspace{-0.6em}
						\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://gitlab.com/Chymera/nvcz.git |
						\item Preprint (reexecution time \SI{\approx 11}{\minute})
						\vspace{-0.6em}
						\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://bitbucket.org/TheChymera/irsabi.git |
					\end{itemize}
					\item Switch to article directory.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz|
					\item Attempt to reexecute.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh|
					%\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git chechout 28b5d2d1|
				\end{itemize}
			\end{frame}
			\begin{frame}{What happened? Dependency requirements happened.}
				\begin{center}
					\textcolor{ldorange}{\Large But you can solve the issue yourself!}
				\end{center}
				\vspace{2em}
				Write a new package atom for the package manager.
				\begin{itemize}
					\item Gentoo Linux makes this wholly autonomous.
					\item Solve one problem only once:
					\begin{itemize}
						\item Installation will be automatic on all your further systems.
						\item And on everybody else's systems!
					\end{itemize}
				\end{itemize}
			\end{frame}
			\begin{frame}{Write a Package Atom --- The Overlay}
				\begin{itemize}
					\item Fork an overlay on GitHub, e.g. from \textcolor{lg}{\href{https://github.com/TheChymera/overlay}{\texttt{github.com/TheChymera/overlay}}}
					\begin{figure}
						\vspace{-0.1em}
						\includegraphics[width=0.94\textwidth]{img/fork.png}
					\end{figure}
					\item Go back to your home directory.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd|
					\item Clone your fork of the overlay.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://github.com/YourName/overlay.git|
					\item Make the ebuild directory, and navigate into it.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|mkdir -p overlay/sci-biology/samri && cd $_|%stopzone
				\end{itemize}
			\end{frame}
			\begin{frame}{Transparency means less work for you!}
				You could write the following files from scratch, but you can also reuse analogous files from existing packages.
				\begin{itemize}
					\item Copy a metadata file from a Python package.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/metadata.xml .|
					\item Copy an ebuild file from a Python package.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/*2.0.1.ebuild samri-0.4.ebuild|
				\end{itemize}
			\end{frame}
			\begin{frame}{Write a Package Atom --- The Metadata File}
				\vspace{-0.9em}
				\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4]{xml}{samri/metadata.xml}
			\end{frame}
			\begin{frame}{Write a Package Atom --- The Ebuild (header excerpt)}
				\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=1,lastline=17]{bash}{samri/samri-0.4.ebuild}
			\end{frame}
			\begin{frame}{Write a Package Atom --- The Ebuild (dependency excerpts)}
				\begin{itemize}
					\item Compile-time dependency example:
					\vspace{-0.6em}
					\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=18,lastline=25]{bash}{samri/samri-0.4.ebuild}
					\item Run-time dependency DIY (fill out, consulting \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI}{\texttt{github.com/IBT-FMI/SAMRI}}}):
					\vspace{-0.6em}
					\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=26,lastline=29]{bash}{samri/samri-0.4.ebuild}
					\vspace{-3em}
					\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=47,lastline=47]{bash}{samri/samri-0.4.ebuild}
				\end{itemize}
			\end{frame}
			\begin{frame}{Write a Package Atom --- Finishing Touches}
				\begin{itemize}
					\item Not all packages are perfect. Append the following to the ebuild:
					\vspace{-0.6em}
					\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=49,lastline=49]{bash}{samri/samri-0.4.ebuild}
					\item Check your work. Minor formatting differences (e.g. indents) are not critical.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/samri-0.4.ebuild -P ~|
					\vspace{-3.1em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/samri-0.4.ebuild samri-0.4.ebuild|
					\vspace{-3.1em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/metadata.xml -P ~|
					\vspace{-3.1em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/metadata.xml metadata.xml|
				\end{itemize}
			\end{frame}
			\begin{frame}{Social Coding --- Upload Your Package for Reuse}
				\begin{itemize}
					\item Download the data and make git aware of your files.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ebuild samri-0.4.ebuild manifest && git add .|
					\item Run a quality check.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|repoman full|
					\item Record and publish your work in version control.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git commit -a && git push origin master|
					\item Include your work in widely used overlay: visit \textcolor{lg}{\href{https://github.com/YourName/overlay}{\texttt{github.com/YourName/overlay}}}.
					\begin{figure}
						\vspace{-0.1em}
						\includegraphics[width=0.94\textwidth]{img/pr.png}
					\end{figure}
				\end{itemize}
			\end{frame}
			\begin{frame}{Use Your Work}
				\begin{itemize}
					\item Update the package index (as superuser).
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix-sync|
					\item Try out the install command yourself.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -pv samri|
					\item Install (as superuser).
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -v samri|
				\end{itemize}
			\end{frame}
			\begin{frame}{The Article Environment is Now Reproducible}
				\begin{itemize}
					\item Navigate back to the article directory.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd ~/nvcz|
					\item Compile.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh|
					\item Log out from SSH: \keys{\ctrl + d}
					\item Get the document locally.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf .|
				\end{itemize}
			\end{frame}
			\begin{frame}{And the Article is now Automated}
				\begin{itemize}
					\item Log back in and navigate to article directory.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
					\vspace{-3.1em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz|
					\item Automatically adjust the t-statistic threshold for the entire document.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}:grep -rlI 3\.5 | xargs sed -i -e "s/3.5/3.0/g":
					\item Clean up trace files and visualize what you have changed.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./cleanup.sh && git diff|
					\item Compile, log out.
					\item Get the document locally.
					\vspace{-0.6em}
					\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf newarticle.pdf|
				\end{itemize}
			\end{frame}
			\begin{frame}{Results}
				You have:
				\begin{itemize}
					\item Packaged a new piece of scientific software, now automatically installable:
					\begin{itemize}
						\item by anybody else,
						\item by you on any machine.
					\end{itemize}
					\item Updated data analysis visualizations in a reproducible article.
					\begin{itemize}
						\item It's that easy to contribute to well-organized research!
					\end{itemize}
				\end{itemize}
				\vspace{-.5em}
				\begin{columns}
					\column{.5\linewidth}
						\begin{figure}
						\centering
							\includegraphics[width=0.52\textwidth]{img/fig_old.png}
						\end{figure}
					\column{.5\linewidth}
						\begin{figure}
							\centering
							\includegraphics[width=0.52\textwidth]{img/fig_new.png}
						\end{figure}
				\end{columns}
			\end{frame}


	\section{Meta}
		\subsection{About this presentation}
			\begin{frame}{What now?}
				\begin{itemize}
					\item Q\&A round\\
					\textcolor{lg}{in a few seconds}
					\item Get help packaging your own Free and Open Source Scientific Software\\
					\textcolor{lg}{in a few minutes}
					\item Get help with running your own Gentoo Linux data analysis server\\
					\textcolor{lg}{in a few hours}
					\item Spread package management in your field\\
					\textcolor{lg}{tomorrow at work}
				\end{itemize}
			\end{frame}
			\begin{frame}{These Slides}
				\begin{itemize}
					\item \textcolor{lg}{Latest Slides:}\\
					\texttt{\href{https://thealternative.ch/ssm/slides.pdf}{thealternative.ch/ssm/slides.pdf}}
					\item \textcolor{lg}{Source:}\\
					\texttt{\href{https://gitlab.ethz.ch/thealternative/courses/tree/master/scientific_software_management}{\footnotesize gitlab.ethz.ch/thealternative/courses/tree/master/scientific\_software\_management}}
				\end{itemize}
				%separate sources from info so it looks nicer
				\begin{itemize}
					\item \textcolor{lg}{License:} \href{https://creativecommons.org/licenses/by-sa/3.0/}{CC BY-SA 3.0}
				\end{itemize}
			\end{frame}
			\begin{frame}{References}
				\bibliographystyle{IEEEtran}
				\bibliography{./bib}
			\end{frame}
\end{document}