To receive notifications about scheduled maintenance, please subscribe to the mailing-list gitlab-operations@sympa.ethz.ch. You can subscribe to the mailing-list at https://sympa.ethz.ch

Commit 501ba5da authored by sfritschi's avatar sfritschi
Browse files

Updated thesis

parent 2b45e8de
......@@ -33,6 +33,8 @@ def main():
print("Min. number of throats per pore: %d" % min(pore_throat_counts))
print("Avg. number of throats per pore: %f" % (sum(pore_throat_counts) / len(pore_throat_counts)))
del pore_throat_counts
nthreads = int(sys.argv[1])
print("Using: {} threads".format(nthreads))
targetsizes = compute_target_sizes(int(sys.argv[2]))
......
#!/bin/bash
# Targets Xeon Gold 6150 nodes with 36 cores/node
bsub -n 36 -W 04:00 -R fullnode python3 perf.py 36 2
bsub -n 36 -W 04:00 -R fullnode python3 compare.py 36 2
......@@ -19,6 +19,8 @@ def main():
print("Min. number of throats per pore: %d" % min(pore_throat_counts))
print("Avg. number of throats per pore: %f" % (sum(pore_throat_counts) / len(pore_throat_counts)))
del pore_throat_counts
target = [int(sys.argv[2]), int(sys.argv[3]), int(sys.argv[4])]
print("Target size: {}".format(target))
cutoff = 0.5 * max([basenet.ub[i] - basenet.lb[i] \
......
......@@ -18,6 +18,8 @@ def main():
print("Min. number of throats per pore: %d" % min(pore_throat_counts))
print("Avg. number of throats per pore: %f" % (sum(pore_throat_counts) / len(pore_throat_counts)))
del pore_throat_counts
target = [int(sys.argv[3]), int(sys.argv[4]), int(sys.argv[5])]
print("Target size: {}".format(target))
cutoff = 0.5 * max([basenet.ub[i] - basenet.lb[i] \
......
......@@ -48,19 +48,28 @@
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {2}{\ignorespaces Cell lists visualized in 2D. The neighborhood of the pore highlighted in red is marked in blue. Because the cell-size is the maximally permissible throat length $L_m$, only the pores contained within the blue region must be considered during neighbor search. Finally, the periodic buffer layers, containing copies of pores on the opposite side from the interior, are painted in orange.\relax }}{5}{figure.caption.2}\protected@file@percent }
\newlabel{fig:cell}{{2}{5}{Cell lists visualized in 2D. The neighborhood of the pore highlighted in red is marked in blue. Because the cell-size is the maximally permissible throat length $L_m$, only the pores contained within the blue region must be considered during neighbor search. Finally, the periodic buffer layers, containing copies of pores on the opposite side from the interior, are painted in orange.\relax }{figure.caption.2}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.2}Iterative Algorithm}{5}{subsection.3.2}\protected@file@percent }
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {3.3}Results}{6}{subsection.3.3}\protected@file@percent }
\@writefile{loa}{\defcounter {refsection}{0}\relax }\@writefile{loa}{\contentsline {algorithm}{\numberline {1}{\ignorespaces Connect pores in parallel\relax }}{6}{algorithm.1}\protected@file@percent }
\newlabel{alg:connect}{{1}{6}{Connect pores in parallel\relax }{algorithm.1}{}}
\abx@aux@cite{MEYER2021101592}
\abx@aux@segm{0}{0}{MEYER2021101592}
\@writefile{lot}{\defcounter {refsection}{0}\relax }\@writefile{lot}{\contentsline {table}{\numberline {1}{\ignorespaces Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.\relax }}{7}{table.caption.3}\protected@file@percent }
\newlabel{table:iter}{{1}{7}{Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.\relax }{table.caption.3}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {A}PETSc Installation}{7}{appendix.A}\protected@file@percent }
\newlabel{appendix:install}{{A}{7}{PETSc Installation}{appendix.A}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {B}Base Network}{7}{appendix.B}\protected@file@percent }
\newlabel{appendix:base}{{B}{7}{Base Network}{appendix.B}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {4}Performance Analysis}{7}{section.4}\protected@file@percent }
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {4.1}Strong Scaling}{7}{subsection.4.1}\protected@file@percent }
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {4.2}Comparison}{7}{subsection.4.2}\protected@file@percent }
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces For a fixed problem size where the network to be generated is $3^3 = 27$ times as large as the base network in total, we compute the \emph {speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.\relax }}{8}{figure.caption.4}\protected@file@percent }
\newlabel{fig:strong}{{3}{8}{For a fixed problem size where the network to be generated is $3^3 = 27$ times as large as the base network in total, we compute the \emph {speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.\relax }{figure.caption.4}{}}
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {4}{\ignorespaces Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.\relax }}{8}{figure.caption.4}\protected@file@percent }
\newlabel{fig:comp}{{4}{8}{Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.\relax }{figure.caption.4}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {subsection}{\numberline {4.3}Flow Simulation}{9}{subsection.4.3}\protected@file@percent }
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {A}PETSc Installation}{10}{appendix.A}\protected@file@percent }
\newlabel{appendix:install}{{A}{10}{PETSc Installation}{appendix.A}{}}
\@writefile{toc}{\defcounter {refsection}{0}\relax }\@writefile{toc}{\contentsline {section}{\numberline {B}Base Network}{10}{appendix.B}\protected@file@percent }
\newlabel{appendix:base}{{B}{10}{Base Network}{appendix.B}{}}
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {5}{\ignorespaces Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph {not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph {porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax }}{10}{figure.caption.5}\protected@file@percent }
\newlabel{fig:base}{{5}{10}{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph {not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph {porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax }{figure.caption.5}{}}
\abx@aux@refcontextdefaultsdone
\abx@aux@defaultrefcontext{0}{MEYER2021103936}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{MEYER2021101592}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{petsc-web-page}{none/global//global/global}
\abx@aux@defaultrefcontext{0}{hypre-web-page}{none/global//global/global}
\@writefile{lof}{\defcounter {refsection}{0}\relax }\@writefile{lof}{\contentsline {figure}{\numberline {3}{\ignorespaces Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot 10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph {not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph {porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax }}{8}{figure.caption.4}\protected@file@percent }
\newlabel{fig:base}{{3}{8}{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot 10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph {not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph {porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.\relax }{figure.caption.4}{}}
......@@ -2192,6 +2192,7 @@
<bcf:citekey order="5">hypre-web-page</bcf:citekey>
<bcf:citekey order="6">MEYER2021101592</bcf:citekey>
<bcf:citekey order="7">MEYER2021103936</bcf:citekey>
<bcf:citekey order="8">MEYER2021101592</bcf:citekey>
</bcf:section>
<!-- SORTING TEMPLATES -->
<bcf:sortingtemplate name="none">
......
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.4.27) 2 DEC 2021 23:24
This is pdfTeX, Version 3.14159265-2.6-1.40.20 (TeX Live 2019/Debian) (preloaded format=pdflatex 2021.4.27) 3 DEC 2021 21:44
entering extended mode
restricted \write18 enabled.
%&-line parsing enabled.
......@@ -983,38 +983,49 @@ LaTeX Info: Redefining \nameref on input line 29.
[1
{/var/lib/texmf/fonts/map/pdftex/updmap/pdftex.map}] [2]
<plots/flux_PETSC.png, id=79, 462.528pt x 346.896pt>
<plots/flux_PETSC.png, id=91, 462.528pt x 346.896pt>
File: plots/flux_PETSC.png Graphic file (type png)
<use plots/flux_PETSC.png>
Package pdftex.def Info: plots/flux_PETSC.png used on input line 74.
(pdftex.def) Requested size: 276.00105pt x 207.01175pt.
[3] [4 <./plots/flux_PETSC.png>] [5]
Package hyperref Info: bookmark level for unknown algorithm defaults to 0 on in
put line 224.
[6]
<plots/base.png, id=120, 462.528pt x 346.896pt>
put line 222.
[6] [7]
<plots/strong_scaling.png, id=139, 462.528pt x 346.896pt>
File: plots/strong_scaling.png Graphic file (type png)
<use plots/strong_scaling.png>
Package pdftex.def Info: plots/strong_scaling.png used on input line 284.
(pdftex.def) Requested size: 276.00105pt x 207.01175pt.
<plots/comparison_time.png, id=140, 462.528pt x 346.896pt>
File: plots/comparison_time.png Graphic file (type png)
<use plots/comparison_time.png>
Package pdftex.def Info: plots/comparison_time.png used on input line 287.
(pdftex.def) Requested size: 276.00105pt x 207.01175pt.
[8 <./plots/strong_scaling.png> <./plots/comparison_time.png>] [9]
<plots/base.png, id=153, 462.528pt x 346.896pt>
File: plots/base.png Graphic file (type png)
<use plots/base.png>
Package pdftex.def Info: plots/base.png used on input line 297.
(pdftex.def) Requested size: 276.00105pt x 207.01175pt.
[7] [8 <./plots/base.png>]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 306.
Package atveryend Info: Empty hook `AfterLastShipout' on input line 306.
Package pdftex.def Info: plots/base.png used on input line 324.
(pdftex.def) Requested size: 207.0021pt x 155.25484pt.
[10 <./plots/base.png>] [11]
Package atveryend Info: Empty hook `BeforeClearDocument' on input line 333.
Package atveryend Info: Empty hook `AfterLastShipout' on input line 333.
(./thesis.aux)
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 306.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 306.
Package atveryend Info: Executing hook `AtVeryEndDocument' on input line 333.
Package atveryend Info: Executing hook `AtEndAfterFileList' on input line 333.
Package rerunfilecheck Info: File `thesis.out' has not changed.
(rerunfilecheck) Checksum: E25FA12DB519AB2249909CDF24D9E27D;687.
(rerunfilecheck) Checksum: 6A5B22A5A120A9EE6A3C2AAD1D353580;875.
Package logreq Info: Writing requests to 'thesis.run.xml'.
\openout1 = `thesis.run.xml'.
)
Here is how much of TeX's memory you used:
26209 strings out of 483107
491544 string characters out of 5964630
1146485 words of memory out of 5000000
40800 multiletter control sequences out of 15000+600000
26233 strings out of 483107
492002 string characters out of 5964630
1147181 words of memory out of 5000000
40815 multiletter control sequences out of 15000+600000
540150 words of font info for 51 fonts, out of 8000000 for 9000
59 hyphenation exceptions out of 8191
60i,8n,60p,2036b,1256s stack positions out of 5000i,500n,10000p,200000b,80000s
......@@ -1032,10 +1043,10 @@ s/cm/cmsl10.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/c
msy10.pfb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmsy7.p
fb></usr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmti10.pfb></u
sr/share/texlive/texmf-dist/fonts/type1/public/amsfonts/cm/cmtt10.pfb>
Output written on thesis.pdf (8 pages, 335697 bytes).
Output written on thesis.pdf (11 pages, 395527 bytes).
PDF statistics:
206 PDF objects out of 1000 (max. 8388607)
175 compressed objects within 2 object streams
32 named destinations out of 1000 (max. 500000)
119 words of extra memory for PDF output out of 10000 (max. 10000000)
242 PDF objects out of 1000 (max. 8388607)
203 compressed objects within 3 object streams
39 named destinations out of 1000 (max. 500000)
153 words of extra memory for PDF output out of 10000 (max. 10000000)
......@@ -7,6 +7,9 @@
\BOOKMARK [1][-]{section.3}{Parallel Network Generation}{}% 7
\BOOKMARK [2][-]{subsection.3.1}{Cell Lists}{section.3}% 8
\BOOKMARK [2][-]{subsection.3.2}{Iterative Algorithm}{section.3}% 9
\BOOKMARK [2][-]{subsection.3.3}{Results}{section.3}% 10
\BOOKMARK [1][-]{appendix.A}{PETSc Installation}{}% 11
\BOOKMARK [1][-]{appendix.B}{Base Network}{}% 12
\BOOKMARK [1][-]{section.4}{Performance Analysis}{}% 10
\BOOKMARK [2][-]{subsection.4.1}{Strong Scaling}{section.4}% 11
\BOOKMARK [2][-]{subsection.4.2}{Comparison}{section.4}% 12
\BOOKMARK [2][-]{subsection.4.3}{Flow Simulation}{section.4}% 13
\BOOKMARK [1][-]{appendix.A}{PETSc Installation}{}% 14
\BOOKMARK [1][-]{appendix.B}{Base Network}{}% 15
No preview for this file type
No preview for this file type
......@@ -216,11 +216,9 @@
j^* = \argmin_{j \in \mathcal{N}(i)}\Bigr| ||\mathbf{p}_i - \mathbf{p}_j||_2 - L_t \Bigr|
\end{equation}
Where $\mathcal{N}(i)$ is the set of all pores in adjacent cells to pore $i$. Finding $j^*$ for different pores is \emph{embarrassingly parallel} and can therefore be computed by a large number of threads, storing their results in shared memory. Each thread works on an even chunk of the pores located \emph{inside} the domain. Subsequently, these ideal matches are connected, while avoiding \textbf{conflicts} of pores seeking either a neighbor that is already fully-connected or that was previously connected to them. Finally we repeat the two steps from above, now only considering pores that still have throats left in \emph{random} order (for improved \textbf{load balancing}), until \textbf{no more connections} can be found. Empirically, this process converges very fast and consistently, for different target sizes of the full domain, after merely 7-8 iterations. The first iteration alone leaves only $\approx 18\%$ of all possible throats left, see Table ~\ref{table:iter}. Final iterations may be performed serially if only few pores remain as to avoid costly spawning of threads without speed gain. The procedure is summarized as pseudo-code in Algorithm ~\ref{alg:connect}.
\subsection{Results}
\hspace{0.5cm}The parallel algorithm achieves
\end{multicols}
\begin{algorithm}[ht]
\begin{algorithm}
\caption{Connect pores in parallel}
\begin{algorithmic}
\State Initialize $cellList$ using $pores$ and compute $totalThroats$
......@@ -248,7 +246,8 @@
\label{alg:connect}
\end{algorithm}
\begin{table}[ht]
\begin{table}
\vspace{-0.5cm}
\centering
\caption{Sample run of parallel pore-connecting algorithm using 4 threads. The generated network is 3 times as large as the base network in all directions. The maximal feasible number of throats is 103464, of which 27 were not realized due to there being no possible candidates left for these remaining pores.}
\begin{tabular}{|c|c|c|}
......@@ -270,6 +269,34 @@
\newpage
\begin{multicols}{2}
\section{Performance Analysis}
\hspace{0.5cm}In the following we discuss the results of our parallel algorithm and compare it to the existing serial implementation from \cite{MEYER2021101592}. Particularly, we are interested in the \textbf{scaling} of our method with respect to the number of processes and the difference in \textbf{run-time} between the two implementations for varying target sizes of the generated domain. All performance measurements were conducted on \emph{Euler} and the specific type of node used is mentioned together with the results.
\subsection{Strong Scaling}
\hspace{0.5cm}Firstly, we investigate the \textbf{strong scaling} as shown in Figure ~\ref{fig:strong}. We observe that our method scales fairly well for an increasing number of processes, as the measurements lie reasonably close to the \emph{ideal} case. As expected, the speedup is not perfect, since the network generation function is not fully parallelized. In particular, the distribution of pores using dendrogram-based clustering is still serial. Additionally, connecting the individual pores cannot be parallelized in a meaningful way, as it involves substantial complexity concerning synchronization. It is also significantly faster and computationally cheap, than the parallel match finding. Despite all this, because these serial parts only account for a small fraction of the total computation time, we still observe a large \textbf{speedup} of up to a factor $20$.
\subsection{Comparison}
\hspace{0.5cm}Secondly, we compare the performance of the concurrent algorithm with that of the serial version. To do this, we consider increasing target sizes, starting from the original size of the network. Next, we generate networks that are twice as large in the first, then also the second spatial direction etc. We continue this process of doubling individual dimension sizes until we reach a network that is 4 times as large in every direction, or in total 64 times as large as the original base network. The results, which are depicted in Figure ~\ref{fig:comp}, indicate a drastic improvement in terms of computation time over the original implementation.
\end{multicols}
\begin{figure}[ht!]
\vspace{-0.5cm}
\centering
\includegraphics[width=0.8\textwidth]{plots/strong_scaling.png}
\caption{For a fixed problem size where the network to be generated is $3^3 = 27$ times as large as the base network in total, we compute the \emph{speedup} for $1, 2, 4, 8, 16$ and $32$ threads. The time measurements of the parallel generation algorithm were conducted on an Euler IV node equipped with Intel Xeon Gold 6150 CPUs and $36$ cores in total.}
\label{fig:strong}
\includegraphics[width=0.8\textwidth]{plots/comparison_time.png}
\caption{Run-times of both serial and parallel algorithms in seconds, as measured on Euler IV node (same as above). The generated size on the $x$-axis refers to the number of times the base network fits inside the generated network. The parallel version is consistently between $10$ and $20$ times faster.}
\label{fig:comp}
\end{figure}
\newpage
\begin{multicols}{2}
\subsection{Flow Simulation}
\hspace{0.5cm}In the next step we evaluate the performance of our distributed flow solver as introduced in section 2.
\end{multicols}
\newpage
\appendix
\begin{center} \Large{\textbf{Appendix}} \end{center}
\section{PETSc Installation}
......@@ -289,16 +316,16 @@
\section{Base Network}
\label{appendix:base}
Throughout this thesis we rely on existing networks obtained via tomographic scans to serve as a \emph{basis} for \textbf{generation} of larger networks and \textbf{simulation} of network flow. The statistics of the base network mentioned previously are detailed in Figure ~\ref{fig:base}.
Throughout this thesis we rely on existing networks obtained via tomographic scans to serve as a \emph{basis} for \textbf{generation} of larger networks and \textbf{simulation} of network flow. The statistics of the base network mentioned previously and used in code are detailed in Figure ~\ref{fig:base}.
\begin{figure}[ht]
\vspace{-0.5cm}
\begin{figure}[ht!]
\vspace{-0.3cm}
\centering
\includegraphics[width=0.8\textwidth]{plots/base.png}
\caption{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07\cdot 10^{-3}$m in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.}
\includegraphics[width=0.6\textwidth]{plots/base.png}
\caption{Network consisting of 2636 pores and 4291 throats, inscribed within a cube extending $1.07$ mm in each spatial direction. The pore-arrangement is obviously \emph{not uniform}, as can be seen by the clustering of pores in some regions, while others are mostly undisturbed. The \emph{porosity}, measured as the relative fraction of volume taken up by the void-space geometry, is roughly $32\%$.}
\label{fig:base}
\end{figure}
\newpage
\centering
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment