\input{../.style/header} \title{Software Management for Open Science} \author{Horea Christian} \institute{SSC TheAlternative | ETHZ and UZH} \begin{document} \begin{frame} \titlepage \end{frame} \begin{frame}{These Slides} Type one link, click all others: \begin{itemize} \item Download \textcolor{lg}{\href{https://thealternative.ch/ssm/slides.pdf}{\texttt{thealternative.ch/ssm/slides.pdf}}} \end{itemize} \end{frame} \section{Requirements} \subsection{... for the demo session} \begin{frame}{SSH} Linux and MacOS: \begin{itemize} \item Check that you can run: \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66| \end{itemize} Windows: \begin{itemize} \item Download and launch “Git for Windows” from \textcolor{lg}{\href{https://git-for-windows.github.io}{\texttt{git-for-windows.github.io}}}. \item Check that you can run: \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66| \end{itemize} \end{frame} \begin{frame}{Command Line Text Editor} Usable via SSH and ubiquitous. There are many alternatives, but here we use \textcolor{lg}{\texttt{nano}}: \begin{itemize} \item Open file: \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|nano file| \item Save via: \keys{\ctrl + o}, \keys{\enter} \item Exit via: \keys{\ctrl + x} \end{itemize} \end{frame} \begin{frame}{Git and Social Coding} Git needs to know who you are. \begin{itemize} \item On the server, run: \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.name "Your Name"| \vspace{-3.1em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git config --global user.email yourname@example.com| \end{itemize} GitHub is a \textbf{social coding platform} providing free accounts: \begin{itemize} \item Register under \textcolor{lg}{\href{https://github.com}{\texttt{github.com}}}. \item Use a password which you can remember. \end{itemize} \end{frame} \section{What?} \subsection{What is software management?} \begin{frame}{The Package} \begin{center} \textcolor{ldorange}{\Large Better organization for your research!} \end{center} \vspace{1.5em} A package is a software format which is (easily): \begin{multicols}{2} \begin{itemize} \item Distributable \item Integrated \item Testable \item Updateable \item Uninstallable \item Understandable \end{itemize} \end{multicols} \end{frame} \begin{frame}{Package Management --- best done automatically} \begin{minipage}{0.44\textwidth} Packages interact in complex and nontrivial manners: \begin{itemize} \item Version-dependent behaviour \item Optional features \item Incompatibilities \item Static/dynamic linking \end{itemize} \end{minipage} \begin{minipage}{0.55\textwidth} \begin{figure} \includegraphics[height=0.83\textheight]{img/ng_mi.png} \vspace{-1em} \caption{Minimal neuroscience package dependency stack \cite{Ioanas2017}} \end{figure} \end{minipage} \end{frame} \begin{frame}{Binary Packages} \begin{figure} \centering \includegraphics[width=0.9\textwidth]{img/pm_d.png} \caption{Rudimentary overview of binary package distribution.} \end{figure} \begin{columns} \column{.5\linewidth} Advantages: \begin{itemize} \item Faster installation \item Less variable installation \end{itemize} \column{.5\linewidth} Disadvantages: \begin{itemize} \item No access to live software \item Man-in-the middle \item Limited support for rolling release \end{itemize} \end{columns} \end{frame} \begin{frame}{Source-Based Packages} \begin{figure} \centering \includegraphics[width=0.9\textwidth]{img/pm_g.png} \caption{Rudimentary overview of source-based package distribution.} \end{figure} \begin{columns} \column{.5\linewidth} Advantages: \begin{itemize} \item Live software is a first-class citizen \item Thin wrapper for upstream \item Acutely version and linking aware \end{itemize} \column{.5\linewidth} Disadvantages: \begin{itemize} \item Slower installation \item More variable installation \end{itemize} \end{columns} \end{frame} \section{Why?} \subsection{Why does open science require package management?} \begin{frame}{Quality} \begin{itemize} \item Make development more transparent. \item Get \textbf{constructive} feedback. \item Ask for help with concrete reproducible examples. \item Easily manage \textcolor{lg}{\href{https://github.com/gentoo-science/sci/issues}{\texttt{bugs/issues}}} and \textcolor{lg}{\href{https://github.com/gentoo-science/sci/pulls}{\texttt{conributions}}}. \item Implement proper version tracking. \end{itemize} \end{frame} \begin{frame}{Impact} \begin{itemize} \item Reach more potential users. \item Communicate with users to improve your software's usability. \item Retain more users. \end{itemize} \end{frame} \begin{frame}{Recognition} \begin{itemize} \item Establish proof of authorship. \item Publicize your innovative workflows, solutions, data structures. \item Create a handle for attribution (including DOI), e.g: \begin{itemize} \item BehavioPy: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.188169}{\texttt{10.5281/zenodo.188169}}} \item Nipype: \textcolor{lg}{\href{http://doi.org/10.5281/zenodo.50186}{\texttt{10.5281/zenodo.50186}}} \end{itemize} \end{itemize} \end{frame} \begin{frame}{Sustainability} A sustainable project \textbf{cannot} depend on environments remaining unchanged. \begin{itemize} \item Ensure long-term viability of your software. \item Avoid death-by-PhD. \item Give your funders their money's worth. \item Develop a lean start-up. \item Maintain a reliable and affordable infrastructure for your work. \end{itemize} \end{frame} \begin{frame}{Why Not?} \begin{itemize} \item Don't be afraid of your software not being “good/unique enough”! \item Don't wait until your software is “ready”! \item A lot of research software you are already using is not written by “professional” programmers. \end{itemize} \end{frame} \section{How?} \subsection{How do I package my software?} \begin{frame}{Choose Appropriate Technologies} \begin{columns} \column{.5\linewidth} \begin{figure} \centering \includegraphics[width=0.6\textwidth]{img/gentoo.png} \caption{Gentoo Linux Logo by Gentoo Foundation and Lennart Andre Rolland - CC BY-SA/2.5.} \end{figure} \column{.5\linewidth} \begin{figure} \centering \includegraphics[width=0.6\textwidth]{img/python.png} \caption{Python Logo by Python Software Foundation.} \end{figure} \end{columns} \end{frame} \begin{frame}{Python Package Distribution} \begin{columns} \column{.55\linewidth} You can package your python software by writing \textbf{one short} file. \begin{itemize} \item Python provides its own limited package management, e.g. via \textcolor{lg}{\href{https://packaging.python.org/distributing/}{\texttt{setuptools}}}. \item Package metadata saved in \texttt{setup.py}, e.g. \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI/blob/master/setup.py}{\texttt{SAMRI/setup.py}}}. \end{itemize} \column{.45\linewidth} \inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4]{python}{samri/setup.py} \end{columns} \end{frame} \begin{frame}{Gentoo Packages} \begin{columns} \column{.55\linewidth} A Gentoo package is \textbf{one short} file. \begin{itemize} \item Regardless of the programming language \item Can automatically interpret information contained in the package, e.g. in \textcolor{lg}{\texttt{setup.py}} \end{itemize} \column{.47\linewidth} \vspace{-3em} \inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=1,lastline=36]{bash}{samri/samri-0.4.ebuild} \vspace{-3.2em} \inputminted[bgcolor=tlg,fontsize=\Tiny,tabsize=4,firstline=47,lastline=49]{bash}{samri/samri-0.4.ebuild} \end{columns} \end{frame} \begin{frame}{Reposit Your Software} \begin{figure} \centering \includegraphics[width=0.4\textwidth]{img/git.png} \caption{Git Logo by Jason Long (\href{https://creativecommons.org/licenses/by/3.0/}{CC-BY-3.0})} \end{figure} You can self-host, but hosting also available via social coding platforms: \begin{multicols}{3} \begin{itemize} \item GitLab \item GitHub \item Bitbucket \end{itemize} \end{multicols} \end{frame} \section{Demo} \subsection{Put what you have learned into practice, and start typing...} \begin{frame}{A Few Basic Gentoo Commands} \begin{itemize} \item Check available package names, versions, and details. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix -v nibabel| \item See package dependencies. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery g nibabel| \item See what packages depend on a said package. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery d nibabel| \item See files installed by package. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|equery f nibabel| \item Try to install a new package. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -p psychopy| \end{itemize} \end{frame} \begin{frame}{Reproduce a Scientific Article} Novel frameworks, such as RepSeP \cite{repsep} permit articles to be written as software. \begin{itemize} \item Get the source code for brand-new articles: \begin{itemize} \item Work-in-progress (reexecution time \SI{\approx 2}{\minute}) \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://gitlab.com/Chymera/nvcz.git | \item Preprint (reexecution time \SI{\approx 11}{\minute}) \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://bitbucket.org/TheChymera/irsabi.git | \end{itemize} \item Switch to article directory. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz| \item Attempt to reexecute. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh| %\mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git chechout 28b5d2d1| \end{itemize} \end{frame} \begin{frame}{What happened? Dependency requirements happened.} \begin{center} \textcolor{ldorange}{\Large But you can solve the issue yourself!} \end{center} \vspace{2em} Write a new package atom for the package manager. \begin{itemize} \item Gentoo Linux makes this wholly autonomous. \item Solve one problem only once: \begin{itemize} \item Installation will be automatic on all your further systems. \item And on everybody else's systems! \end{itemize} \end{itemize} \end{frame} \begin{frame}{Write a Package Atom --- The Overlay} \begin{itemize} \item Fork an overlay on GitHub, e.g. from \textcolor{lg}{\href{https://github.com/TheChymera/overlay}{\texttt{github.com/TheChymera/overlay}}} \begin{figure} \vspace{-0.1em} \includegraphics[width=0.94\textwidth]{img/fork.png} \end{figure} \item Go back to your home directory. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd| \item Clone your fork of the overlay. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git clone https://github.com/YourName/overlay.git| \item Make the ebuild directory, and navigate into it. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|mkdir -p overlay/sci-biology/samri && cd $_|%stopzone \end{itemize} \end{frame} \begin{frame}{Transparency means less work for you!} You could write the following files from scratch, but you can also reuse analogous files from existing packages. \begin{itemize} \item Copy a metadata file from a Python package. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/metadata.xml .| \item Copy an ebuild file from a Python package. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cp /usr/portage/dev-python/astropy/*2.0.1.ebuild samri-0.4.ebuild| \end{itemize} \end{frame} \begin{frame}{Write a Package Atom --- The Metadata File} \vspace{-0.9em} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4]{xml}{samri/metadata.xml} \end{frame} \begin{frame}{Write a Package Atom --- The Ebuild (header excerpt)} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=1,lastline=17]{bash}{samri/samri-0.4.ebuild} \end{frame} \begin{frame}{Write a Package Atom --- The Ebuild (dependency excerpts)} \begin{itemize} \item Compile-time dependency example: \vspace{-0.6em} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=18,lastline=25]{bash}{samri/samri-0.4.ebuild} \item Run-time dependency DIY (fill out, consulting \textcolor{lg}{\href{https://github.com/IBT-FMI/SAMRI}{\texttt{github.com/IBT-FMI/SAMRI}}}): \vspace{-0.6em} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=26,lastline=29]{bash}{samri/samri-0.4.ebuild} \vspace{-3em} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=47,lastline=47]{bash}{samri/samri-0.4.ebuild} \end{itemize} \end{frame} \begin{frame}{Write a Package Atom --- Finishing Touches} \begin{itemize} \item Not all packages are perfect. Append the following to the ebuild: \vspace{-0.6em} \inputminted[bgcolor=tlg,fontsize=\scriptsize,tabsize=4,firstline=49,lastline=49]{bash}{samri/samri-0.4.ebuild} \item Check your work. Minor formatting differences (e.g. indents) are not critical. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/samri-0.4.ebuild -P ~| \vspace{-3.1em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/samri-0.4.ebuild samri-0.4.ebuild| \vspace{-3.1em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|wget https://thealternative.ch/ssm/samri/metadata.xml -P ~| \vspace{-3.1em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|colordiff ~/metadata.xml metadata.xml| \end{itemize} \end{frame} \begin{frame}{Social Coding --- Upload Your Package for Reuse} \begin{itemize} \item Download the data and make git aware of your files. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ebuild samri-0.4.ebuild manifest && git add .| \item Run a quality check. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|repoman full| \item Record and publish your work in version control. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|git commit -a && git push origin master| \item Include your work in widely used overlay: visit \textcolor{lg}{\href{https://github.com/YourName/overlay}{\texttt{github.com/YourName/overlay}}}. \begin{figure} \vspace{-0.1em} \includegraphics[width=0.94\textwidth]{img/pr.png} \end{figure} \end{itemize} \end{frame} \begin{frame}{Use Your Work} \begin{itemize} \item Update the package index (as superuser). \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|eix-sync| \item Try out the install command yourself. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -pv samri| \item Install (as superuser). \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|emerge -v samri| \end{itemize} \end{frame} \begin{frame}{The Article Environment is Now Reproducible} \begin{itemize} \item Navigate back to the article directory. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd ~/nvcz| \item Compile. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./compile.sh| \item Log out from SSH: \keys{\ctrl + d} \item Get the document locally. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf .| \end{itemize} \end{frame} \begin{frame}{And the Article is now Automated} \begin{itemize} \item Log back in and navigate to article directory. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|ssh YOURUSER@130.60.24.66| \vspace{-3.1em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|cd nvcz| \item Automatically adjust the t-statistic threshold for the entire document. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}:grep -rlI 3\.5 | xargs sed -i -e "s/3.5/3.0/g": \item Clean up trace files and visualize what you have changed. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|./cleanup.sh && git diff| \item Compile, log out. \item Get the document locally. \vspace{-0.6em} \mint[bgcolor=tlg,fontsize=\footnotesize]{bash}|scp YOURUSER@130.60.24.66:nvcz/article.pdf newarticle.pdf| \end{itemize} \end{frame} \begin{frame}{Results} You have: \begin{itemize} \item Packaged a new piece of scientific software, now automatically installable: \begin{itemize} \item by anybody else, \item by you on any machine. \end{itemize} \item Updated data analysis visualizations in a reproducible article. \begin{itemize} \item It's that easy to contribute to well-organized research! \end{itemize} \end{itemize} \vspace{-.5em} \begin{columns} \column{.5\linewidth} \begin{figure} \centering \includegraphics[width=0.52\textwidth]{img/fig_old.png} \end{figure} \column{.5\linewidth} \begin{figure} \centering \includegraphics[width=0.52\textwidth]{img/fig_new.png} \end{figure} \end{columns} \end{frame} \section{Meta} \subsection{About this presentation} \begin{frame}{What now?} \begin{itemize} \item Q\&A round\\ \textcolor{lg}{in a few seconds} \item Get help packaging your own Free and Open Source Scientific Software\\ \textcolor{lg}{in a few minutes} \item Get help with running your own Gentoo Linux data analysis server\\ \textcolor{lg}{in a few hours} \item Spread package management in your field\\ \textcolor{lg}{tomorrow at work} \end{itemize} \end{frame} \begin{frame}{These Slides} \begin{itemize} \item \textcolor{lg}{Latest Slides:}\\ \texttt{\href{https://thealternative.ch/ssm/slides.pdf}{thealternative.ch/ssm/slides.pdf}} \item \textcolor{lg}{Source:}\\ \texttt{\href{https://gitlab.ethz.ch/thealternative/courses/tree/master/scientific_software_management}{\footnotesize gitlab.ethz.ch/thealternative/courses/tree/master/scientific\_software\_management}} \end{itemize} %separate sources from info so it looks nicer \begin{itemize} \item \textcolor{lg}{License:} \href{https://creativecommons.org/licenses/by-sa/3.0/}{CC BY-SA 3.0} \end{itemize} \end{frame} \begin{frame}{References} \bibliographystyle{IEEEtran} \bibliography{./bib} \end{frame} \end{document}