Forked from
TheAlternative / courses
350 commits behind, 36 commits ahead of the upstream repository.
-
Horea Christian authoredHorea Christian authored
slides.tex 19.53 KiB
\input{../.style/header}
\title{Software Management for Open Science}
\author{Horea Christian}
\institute{SSC TheAlternative | ETHZ and UZH}
\begin{document}
\begin{frame}
\titlepage
\end{frame}
\begin{frame}{These Slides}
Type one link, click all others:
\begin{itemize}
\item Download \textcolor{lg}{\href{https://thealternative.ch/ssm/slides.pdf}{\texttt{thealternative.ch/ssm/slides.pdf}}}
\end{itemize}
\end{frame}
\section{Requirements}
\subsection{... for the demo session}
\begin{frame}{SSH}
Linux and MacOS:
\begin{itemize}
\item Check that you can run:
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
\end{itemize}
Windows:
\begin{itemize}
\item Download and launch “Git for Windows” from \textcolor{lg}{\href{https://git-for-windows.github.io}{\texttt{git-for-windows.github.io}}}.
\item Check that you can run:
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
\end{itemize}
\end{frame}
\begin{frame}{Command Line Text Editor}
Usable via SSH and ubiquitous. There are many alternatives, but here we use \textcolor{lg}{\texttt{nano}}:
\begin{itemize}
\item Open file:
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|nano file|
\item Save via: \keys{\ctrl + o}, \keys{\enter}
\item Exit via: \keys{\ctrl + x}
\end{itemize}
\end{frame}
\begin{frame}{Git and Social Coding}
Git needs to know who you are.
\begin{itemize}
\item On the server, run:
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.name "Your Name"|
\vspace{-3.1em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.email yourname@example.com|
\end{itemize}
GitHub is a \textbf{social coding platform} providing free accounts:
\begin{itemize}
\item Register under \textcolor{lg}{\href{https://github.com}{\texttt{github.com}}}.
\item Use a password which you can remember.
\end{itemize}
\end{frame}
\section{What?}
\subsection{What is software management?}
\begin{frame}{The Package}
\begin{center}
\textcolor{ldorange}{\Large Better organization for your research!}
\end{center}
\vspace{1.5em}
A package is a software format which is (easily):
\begin{multicols}{2}
\begin{itemize}
\item Distributable
\item Integrated
\item Testable
\item Updateable
\item Uninstallable
\item Understandable
\end{itemize}
\end{multicols}
\end{frame}
\begin{frame}{Package Management --- best done automatically}
\begin{minipage}{0.44\textwidth}
Packages interact in complex and nontrivial manners:
\begin{itemize}
\item Version-dependent behaviour
\item Optional features
\item Incompatibilities
\item Static/dynamic linking
\end{itemize}
\end{minipage}
\begin{minipage}{0.55\textwidth}
\begin{figure}
\includegraphics[height=0.83\textheight]{img/ng_mi.png}
\vspace{-1em}
\caption{Minimal neuroscience package dependency stack \cite{Ioanas2017}}
\end{figure}
\end{minipage}
\end{frame}
\begin{frame}{Binary Packages}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{img/pm_d.png}
\caption{Rudimentary overview of binary package distribution.}
\end{figure}
\begin{columns}
\column{.5\linewidth}
Advantages:
\begin{itemize}
\item Faster installation
\item Less variable installation
\end{itemize}
\column{.5\linewidth}
Disadvantages:
\begin{itemize}
\item No access to live software
\item Man-in-the middle
\item Limited support for rolling release
\end{itemize}
\end{columns}
\end{frame}
\begin{frame}{Source-Based Packages}
\begin{figure}
\centering
\includegraphics[width=0.9\textwidth]{img/pm_g.png}
\caption{Rudimentary overview of source-based package distribution.}
\end{figure}
\begin{columns}
\column{.5\linewidth}
Advantages:
\begin{itemize}
\item Live software is a first-class citizen
\item Thin wrapper for upstream
\item Acutely version and linking aware
\end{itemize}
\column{.5\linewidth}
Disadvantages:
\begin{itemize}
\item Slower installation
\item More variable installation
\end{itemize}
\end{columns}
\end{frame}
\section{Why?}
\subsection{Why does open science require package management?}
\begin{frame}{Quality}
\begin{itemize}
\item Make development more transparent.
\item Get \textbf{constructive} feedback.
\item Ask for help with concrete reproducible examples.
\item Easily manage \textcolor{lg}{\href{https://github.com/gentoo-science/sci/issues}{\texttt{bugs/issues}}} and \textcolor{lg}{\href{https://github.com/gentoo-science/sci/pulls}{\texttt{conributions}}}.
\item Implement proper version tracking.
\end{itemize}
\end{frame}
\begin{frame}{Impact}
\begin{itemize}
\item Reach more potential users.
\item Communicate with users to improve your software's usability.
\item Retain more users.
\end{itemize}
\end{frame}
\begin{frame}{Recognition}
\begin{itemize}
\item Establish proof of authorship.
\item Publicize your innovative workflows, solutions, data structures.
\item Create a handle for attribution (including DOI), e.g:
\begin{itemize}
\item BehavioPy: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.188169}{\texttt{10.5281/zenodo.188169}}}
\item Nipype: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.50186}{\texttt{10.5281/zenodo.50186}}}
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Sustainability}
A sustainable project \textbf{cannot} depend on environments remaining unchanged.
\begin{itemize}
\item Ensure long-term viability of your software.
\item Avoid death-by-PhD.
\item Give your funders their money's worth.
\item Develop a lean start-up.
\item Maintain a reliable and affordable infrastructure for your work.
\end{itemize}
\end{frame}
\begin{frame}{Why Not?}
\begin{itemize}
\item Don't be afraid of your software not being “good/unique enough”!
\item Don't wait until your software is “ready”!
\item A lot of research software you are already using is not written by “professional” programmers.
\end{itemize}
\end{frame}
\section{How?}
\subsection{How do I package my software?}
\begin{frame}{Choose Appropriate Technologies}
\begin{columns}
\column{.5\linewidth}
\begin{figure}
\centering
\includegraphics[width=0.6\textwidth]{img/gentoo.png}
\caption{Gentoo Linux Logo by Gentoo Foundation and Lennart Andre Rolland - CC BY-SA/2.5.}
\end{figure}
\column{.5\linewidth}
\begin{figure}
\centering
\includegraphics[width=0.6\textwidth]{img/python.png}
\caption{Python Logo by Python Software Foundation.}
\end{figure}
\end{columns}
\end{frame}
\begin{frame}{Python Package Distribution}
\begin{columns}
\column{.55\linewidth}
You can package your python software by writing \textbf{one short} file.
\begin{itemize}
\item Python provides its own limited package management, e.g. via \textcolor{lg}{\href{https://packaging.python.org/distributing/}{\texttt{setuptools}}}.
\item Package metadata saved in \texttt{setup.py}, e.g. \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI/blob/master/setup.py}{\texttt{SAMRI/setup.py}}}.
\end{itemize}
\column{.45\linewidth}
\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4]{python}{samri/setup.py}
\end{columns}
\end{frame}
\begin{frame}{Gentoo Packages}
\begin{columns}
\column{.55\linewidth}
A Gentoo package is \textbf{one short} file.
\begin{itemize}
\item Regardless of the programming language
\item Can automatically interpret information contained in the package, e.g. in \textcolor{lg}{\texttt{setup.py}}
\end{itemize}
\column{.47\linewidth}
\vspace{-3em}
\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=1,lastline=36]{bash}{samri/samri-0.4.ebuild}
\vspace{-3.2em}
\inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=47,lastline=49]{bash}{samri/samri-0.4.ebuild}
\end{columns}
\end{frame}
\begin{frame}{Reposit Your Software}
\begin{figure}
\centering
\includegraphics[width=0.4\textwidth]{img/git.png}
\caption{Git Logo by Jason Long (\href{https://creativecommons.org/licenses/by/3.0/}{CC-BY-3.0})}
\end{figure}
You can self-host, but hosting also available via social coding platforms:
\begin{multicols}{3}
\begin{itemize}
\item GitLab
\item GitHub
\item Bitbucket
\end{itemize}
\end{multicols}
\end{frame}
\section{Demo}
\subsection{Put what you have learned into practice, and start typing...}
\begin{frame}{A Few Basic Gentoo Commands}
\begin{itemize}
\item Check available package names, versions, and details.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix -v nibabel|
\item See package dependencies.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery g nibabel|
\item See what packages depend on a said package.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery d nibabel|
\item See files installed by package.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery f nibabel|
\item Try to install a new package.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -p psychopy|
\end{itemize}
\end{frame}
\begin{frame}{Reproduce a Scientific Article}
Novel frameworks, such as RepSeP \cite{repsep} permit articles to be written as software.
\begin{itemize}
\item Get the source code for brand-new articles:
\begin{itemize}
\item Work-in-progress (reexecution time \SI{\approx 2}{\minute})
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://gitlab.com/Chymera/nvcz.git |
\item Preprint (reexecution time \SI{\approx 11}{\minute})
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://bitbucket.org/TheChymera/irsabi.git |
\end{itemize}
\item Switch to article directory.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz|
\item Attempt to reexecute.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh|
%\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git chechout 28b5d2d1|
\end{itemize}
\end{frame}
\begin{frame}{What happened? Dependency requirements happened.}
\begin{center}
\textcolor{ldorange}{\Large But you can solve the issue yourself!}
\end{center}
\vspace{2em}
Write a new package atom for the package manager.
\begin{itemize}
\item Gentoo Linux makes this wholly autonomous.
\item Solve one problem only once:
\begin{itemize}
\item Installation will be automatic on all your further systems.
\item And on everybody else's systems!
\end{itemize}
\end{itemize}
\end{frame}
\begin{frame}{Write a Package Atom --- The Overlay}
\begin{itemize}
\item Fork an overlay on GitHub, e.g. from \textcolor{lg}{\href{https://github.com/TheChymera/overlay}{\texttt{github.com/TheChymera/overlay}}}
\begin{figure}
\vspace{-0.1em}
\includegraphics[width=0.94\textwidth]{img/fork.png}
\end{figure}
\item Go back to your home directory.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd|
\item Clone your fork of the overlay.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://github.com/YourName/overlay.git|
\item Make the ebuild directory, and navigate into it.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|mkdir -p overlay/sci-biology/samri && cd $_|%stopzone
\end{itemize}
\end{frame}
\begin{frame}{Transparency means less work for you!}
You could write the following files from scratch, but you can also reuse analogous files from existing packages.
\begin{itemize}
\item Copy a metadata file from a Python package.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/metadata.xml .|
\item Copy an ebuild file from a Python package.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/*2.0.1.ebuild samri-0.4.ebuild|
\end{itemize}
\end{frame}
\begin{frame}{Write a Package Atom --- The Metadata File}
\vspace{-0.9em}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4]{xml}{samri/metadata.xml}
\end{frame}
\begin{frame}{Write a Package Atom --- The Ebuild (header excerpt)}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=1,lastline=17]{bash}{samri/samri-0.4.ebuild}
\end{frame}
\begin{frame}{Write a Package Atom --- The Ebuild (dependency excerpts)}
\begin{itemize}
\item Compile-time dependency example:
\vspace{-0.6em}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=18,lastline=25]{bash}{samri/samri-0.4.ebuild}
\item Run-time dependency DIY (fill out, consulting \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI}{\texttt{github.com/IBT-FMI/SAMRI}}}):
\vspace{-0.6em}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=26,lastline=29]{bash}{samri/samri-0.4.ebuild}
\vspace{-3em}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=47,lastline=47]{bash}{samri/samri-0.4.ebuild}
\end{itemize}
\end{frame}
\begin{frame}{Write a Package Atom --- Finishing Touches}
\begin{itemize}
\item Not all packages are perfect. Append the following to the ebuild:
\vspace{-0.6em}
\inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=49,lastline=49]{bash}{samri/samri-0.4.ebuild}
\item Check your work. Minor formatting differences (e.g. indents) are not critical.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/samri-0.4.ebuild -P ~|
\vspace{-3.1em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/samri-0.4.ebuild samri-0.4.ebuild|
\vspace{-3.1em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/metadata.xml -P ~|
\vspace{-3.1em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/metadata.xml metadata.xml|
\end{itemize}
\end{frame}
\begin{frame}{Social Coding --- Upload Your Package for Reuse}
\begin{itemize}
\item Download the data and make git aware of your files.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ebuild samri-0.4.ebuild manifest && git add .|
\item Run a quality check.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|repoman full|
\item Record and publish your work in version control.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git commit -a && git push origin master|
\item Include your work in widely used overlay: visit \textcolor{lg}{\href{https://github.com/YourName/overlay}{\texttt{github.com/YourName/overlay}}}.
\begin{figure}
\vspace{-0.1em}
\includegraphics[width=0.94\textwidth]{img/pr.png}
\end{figure}
\end{itemize}
\end{frame}
\begin{frame}{Use Your Work}
\begin{itemize}
\item Update the package index (as superuser).
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix-sync|
\item Try out the install command yourself.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -pv samri|
\item Install (as superuser).
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -v samri|
\end{itemize}
\end{frame}
\begin{frame}{The Article Environment is Now Reproducible}
\begin{itemize}
\item Navigate back to the article directory.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd ~/nvcz|
\item Compile.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh|
\item Log out from SSH: \keys{\ctrl + d}
\item Get the document locally.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf .|
\end{itemize}
\end{frame}
\begin{frame}{And the Article is now Automated}
\begin{itemize}
\item Log back in and navigate to article directory.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66|
\vspace{-3.1em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz|
\item Automatically adjust the t-statistic threshold for the entire document.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}:grep -rlI 3\.5 | xargs sed -i -e "s/3.5/3.0/g":
\item Clean up trace files and visualize what you have changed.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./cleanup.sh && git diff|
\item Compile, log out.
\item Get the document locally.
\vspace{-0.6em}
\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf newarticle.pdf|
\end{itemize}
\end{frame}
\begin{frame}{Results}
You have:
\begin{itemize}
\item Packaged a new piece of scientific software, now automatically installable:
\begin{itemize}
\item by anybody else,
\item by you on any machine.
\end{itemize}
\item Updated data analysis visualizations in a reproducible article.
\begin{itemize}
\item It's that easy to contribute to well-organized research!
\end{itemize}
\end{itemize}
\vspace{-.5em}
\begin{columns}
\column{.5\linewidth}
\begin{figure}
\centering
\includegraphics[width=0.52\textwidth]{img/fig_old.png}
\end{figure}
\column{.5\linewidth}
\begin{figure}
\centering
\includegraphics[width=0.52\textwidth]{img/fig_new.png}
\end{figure}
\end{columns}
\end{frame}
\section{Meta}
\subsection{About this presentation}
\begin{frame}{What now?}
\begin{itemize}
\item Q\&A round\\
\textcolor{lg}{in a few seconds}
\item Get help packaging your own Free and Open Source Scientific Software\\
\textcolor{lg}{in a few minutes}
\item Get help with running your own Gentoo Linux data analysis server\\
\textcolor{lg}{in a few hours}
\item Spread package management in your field\\
\textcolor{lg}{tomorrow at work}
\end{itemize}
\end{frame}
\begin{frame}{These Slides}
\begin{itemize}
\item \textcolor{lg}{Latest Slides:}\\
\texttt{\href{https://thealternative.ch/ssm/slides.pdf}{thealternative.ch/ssm/slides.pdf}}
\item \textcolor{lg}{Source:}\\
\texttt{\href{https://gitlab.ethz.ch/thealternative/courses/tree/master/scientific_software_management}{\footnotesize gitlab.ethz.ch/thealternative/courses/tree/master/scientific\_software\_management}}
\end{itemize}
%separate sources from info so it looks nicer
\begin{itemize}
\item \textcolor{lg}{License:} \href{https://creativecommons.org/licenses/by-sa/3.0/}{CC BY-SA 3.0}
\end{itemize}
\end{frame}
\begin{frame}{References}
\bibliographystyle{IEEEtran}
\bibliography{./bib}
\end{frame}
\end{document}