\@writefile{lof}{\defcounter{refsection}{0}\relax}\@writefile{lof}{\contentsline{figure}{\numberline{2}{\ignorespaces Cell lists visualized in 2D. The neighborhood of the pore highlighted in red is marked in blue. Because the cell-size is the maximally permissible throat length $L_m$, only the pores contained within the blue region must be considered during neighbor search. Finally, the periodic buffer layers, containing copies of pores on the opposite side from the interior, are painted in orange.\relax}}{5}{figure.caption.2}\protected@file@percent }

\newlabel{fig:cell}{{2}{5}{Cell lists visualized in 2D. The neighborhood of the pore highlighted in red is marked in blue. Because the cell-size is the maximally permissible throat length $L_m$, only the pores contained within the blue region must be considered during neighbor search. Finally, the periodic buffer layers, containing copies of pores on the opposite side from the interior, are painted in orange.\relax}{figure.caption.2}{}}

\@writefile{loa}{\defcounter{refsection}{0}\relax}\@writefile{loa}{\contentsline{algorithm}{\numberline{1}{\ignorespaces Connect pores in parallel\relax}}{6}{algorithm.1}\protected@file@percent }

\newlabel{alg:connect}{{1}{6}{Connect pores in parallel\relax}{algorithm.1}{}}

\abx@aux@cite{MEYER2021101592}

\abx@aux@segm{0}{0}{MEYER2021101592}

\@writefile{lot}{\defcounter{refsection}{0}\relax}\@writefile{lot}{\contentsline{table}{\numberline{1}{\ignorespaces Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.\relax}}{7}{table.caption.3}\protected@file@percent }

\newlabel{table:iter}{{1}{7}{Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.\relax}{table.caption.3}{}}

\@writefile{lof}{\defcounter{refsection}{0}\relax}\@writefile{lof}{\contentsline{figure}{\numberline{3}{\ignorespaces For a fixed problem size where the network to be generated is $3^3=27$ times as large as the base network in total, we compute the \emph{speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.\relax}}{8}{figure.caption.4}\protected@file@percent }

\newlabel{fig:strong}{{3}{8}{For a fixed problem size where the network to be generated is $3^3=27$ times as large as the base network in total, we compute the \emph{speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.\relax}{figure.caption.4}{}}

\@writefile{lof}{\defcounter{refsection}{0}\relax}\@writefile{lof}{\contentsline{figure}{\numberline{4}{\ignorespaces Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.\relax}}{8}{figure.caption.4}\protected@file@percent }

\newlabel{fig:comp}{{4}{8}{Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.\relax}{figure.caption.4}{}}

\@writefile{lof}{\defcounter{refsection}{0}\relax}\@writefile{lof}{\contentsline{figure}{\numberline{5}{\ignorespaces Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax}}{10}{figure.caption.5}\protected@file@percent }

\newlabel{fig:base}{{5}{10}{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax}{figure.caption.5}{}}

\@writefile{lof}{\defcounter{refsection}{0}\relax}\@writefile{lof}{\contentsline{figure}{\numberline{3}{\ignorespaces Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax}}{8}{figure.caption.4}\protected@file@percent }

\newlabel{fig:base}{{3}{8}{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax}{figure.caption.4}{}}

Where $\mathcal{N}(i)$ is the set of all pores in adjacent cells to pore $i$. Finding $j^*$ for different pores is \emph{embarrassingly parallel} and can therefore be computed by a large number of threads, storing their results in shared memory. Each thread works on an even chunk of the pores located \emph{inside} the domain. Subsequently, these ideal matches are connected, while avoiding \textbf{conflicts} of pores seeking either a neighbor that is already fully-connected or that was previously connected to them. Finally we repeat the two steps from above, now only considering pores that still have throats left in \emph{random} order (for improved \textbf{load balancing}), until \textbf{no more connections} can be found. Empirically, this process converges very fast and consistently, for different target sizes of the full domain, after merely 7-8 iterations. The first iteration alone leaves only $\approx18\%$ of all possible throats left, see Table ~\ref{table:iter}. Final iterations may be performed serially if only few pores remain as to avoid costly spawning of threads without speed gain. The procedure is summarized as pseudo-code in Algorithm ~\ref{alg:connect}.

\subsection{Results}

\hspace{0.5cm}The parallel algorithm achieves

\end{multicols}

\begin{algorithm}[ht]

\begin{algorithm}

\caption{Connect pores in parallel}

\begin{algorithmic}

\State Initialize $cellList$ using $pores$ and compute $totalThroats$

...

...

@@ -248,7 +246,8 @@

\label{alg:connect}

\end{algorithm}

\begin{table}[ht]

\begin{table}

\vspace{-0.5cm}

\centering

\caption{Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.}

\begin{tabular}{|c|c|c|}

...

...

@@ -270,6 +269,34 @@

\newpage

\begin{multicols}{2}

\section{Performance Analysis}

\hspace{0.5cm}In the following we discuss the results of our parallel algorithm and compare it to the existing serial implementation from \cite{MEYER2021101592}. Particularly, we are interested in the \textbf{scaling} of our method with respect to the number of processes and the difference in \textbf{run-time} between the two implementations for varying target sizes of the generated domain. All performance measurements were conducted on \emph{Euler} and the specific type of node used is mentioned together with the results.

\subsection{Strong Scaling}

\hspace{0.5cm}Firstly, we investigate the \textbf{strong scaling} as shown in Figure ~\ref{fig:strong}. We observe that our method scales fairly well for an increasing number of processes, as the measurements lie reasonably close to the \emph{ideal} case. As expected, the speedup is not perfect, since the network generation function is not fully parallelized. In particular, the distribution of pores using dendrogram-based clustering is still serial. Additionally, connecting the individual pores cannot be parallelized in a meaningful way, as it involves substantial complexity concerning synchronization. It is also significantly faster and computationally cheap, than the parallel match finding. Despite all this, because these serial parts only account for a small fraction of the total computation time, we still observe a large \textbf{speedup} of up to a factor $20$.

\subsection{Comparison}

\hspace{0.5cm}Secondly, we compare the performance of the concurrent algorithm with that of the serial version. To do this, we consider increasing target sizes, starting from the original size of the network. Next, we generate networks that are twice as large in the first, then also the second spatial direction etc. We continue this process of doubling individual dimension sizes until we reach a network that is 4 times as large in every direction, or in total 64 times as large as the original base network. The results, which are depicted in Figure ~\ref{fig:comp}, indicate a drastic improvement in terms of computation time over the original implementation.

\caption{For a fixed problem size where the network to be generated is $3^3=27$ times as large as the base network in total, we compute the \emph{speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.}

\caption{Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.}

\label{fig:comp}

\end{figure}

\newpage

\begin{multicols}{2}

\subsection{Flow Simulation}

\hspace{0.5cm}In the next step we evaluate the performance of our distributed flow solver as introduced in section 2.

Throughout this thesis we rely on existing networks obtained via tomographic scans to serve as a \emph{basis} for \textbf{generation} of larger networks and \textbf{simulation} of network flow. The statistics of the base network mentioned previously are detailed in Figure ~\ref{fig:base}.

Throughout this thesis we rely on existing networks obtained via tomographic scans to serve as a \emph{basis} for \textbf{generation} of larger networks and \textbf{simulation} of network flow. The statistics of the base network mentioned previously and used in code are detailed in Figure ~\ref{fig:base}.

\caption{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.}

\caption{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.}