LennardJones.h 7.74 KB
Newer Older
Damian S. Steiger's avatar
Damian S. Steiger committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
/* File:   LennardJones.h */
/* Copyright 2015 ETH Zurich. All Rights Reserved. */
#ifndef LENNARD_JONES_H_AHQBPV5S
#define LENNARD_JONES_H_AHQBPV5S

#include <cassert>
#include <numeric>
#include <utility>
#include "types.h"

/* TODO: Step 0: Include appropriate header to access AVX intrinsics */
#include <x86intrin.h>


class LennardJones
{
public:
    LennardJones(const value_type rm, const value_type epsilon)
    : rm_sq_(rm*rm), eps_(epsilon)
    { }


    // sequential
    inline value_type diff(configuration_type const& c, std::pair<value_type,value_type> const& newpos) const
    {
        assert( c.first.size() == c.second.size() );

        const std::size_t N = c.first.size()-1;

        value_type dE = 0.0;

        for(std::size_t i=0; i < N; ++i){
            const value_type dx = c.first[N] - c.first[i];
            const value_type new_dx = newpos.first - c.first[i];
            const value_type dy = c.second[N] - c.second[i];
            const value_type new_dy = newpos.second - c.second[i];
            const value_type rm_d_sq = rm_sq_ / (dx*dx + dy*dy);
            const value_type new_rm_d_sq = rm_sq_ / (new_dx*new_dx + new_dy*new_dy);
            const value_type rm_d_6th = rm_d_sq*rm_d_sq*rm_d_sq;
            const value_type new_rm_d_6th = new_rm_d_sq*new_rm_d_sq*new_rm_d_sq;

            dE += eps_*((new_rm_d_6th*new_rm_d_6th - 2*new_rm_d_6th) - (rm_d_6th*rm_d_6th - 2*rm_d_6th));
        }

        return dE;
    }


#if defined(_AVX256_)
    inline value_type diff_avx(configuration_type const& c, std::pair<value_type,value_type> const& newpos) const
    {
        assert( c.first.size() == c.second.size() );

        const std::size_t N = c.first.size()-1;

        /* TODO: Step 1: */
        const __m256 mmrm_sq = _mm256_set1_ps(rm_sq_);
        const __m256 mmeps = _mm256_set1_ps(eps_);
        const __m256 mmxp = _mm256_set1_ps(c.first[N]);
        const __m256 mmyp = _mm256_set1_ps(c.second[N]);
        const __m256 mmxnew = _mm256_set1_ps(newpos.first);
        const __m256 mmynew = _mm256_set1_ps(newpos.second);
        const __m256 mm2 = _mm256_set1_ps(-2.0f);

        /* TODO: Step 2: */
        __m256 mmdE = _mm256_setzero_ps();

        /* TODO: Step 3: */
        const unsigned r_width = 8;
        for(std::size_t i=0; i < N/r_width; ++i){
            const __m256 mmxi = _mm256_load_ps(&c.first[i*r_width]);
            const __m256 mmyi = _mm256_load_ps(&c.second[i*r_width]);

            const __m256 dx = _mm256_sub_ps(mmxp,mmxi);
            const __m256 new_dx = _mm256_sub_ps(mmxnew,mmxi);
            const __m256 dy = _mm256_sub_ps(mmyp,mmyi);
            const __m256 new_dy = _mm256_sub_ps(mmynew,mmyi);

#ifdef _ACCURATE_DIV_
            const __m256 rm_d_sq =
            _mm256_div_ps(mmrm_sq,_mm256_add_ps(_mm256_mul_ps(dx,dx),_mm256_mul_ps(dy,dy)));
            const __m256 new_rm_d_sq =
            _mm256_div_ps(mmrm_sq,_mm256_add_ps(_mm256_mul_ps(new_dx,new_dx),_mm256_mul_ps(new_dy,new_dy)));
#else
            const __m256 rm_d_sq =
            _mm256_mul_ps(mmrm_sq,_mm256_rcp_ps(_mm256_add_ps(_mm256_mul_ps(dx,dx),_mm256_mul_ps(dy,dy))));
            const __m256 new_rm_d_sq =
            _mm256_mul_ps(mmrm_sq,_mm256_rcp_ps(_mm256_add_ps(_mm256_mul_ps(new_dx,new_dx),_mm256_mul_ps(new_dy,new_dy))));
#endif /* _ACCURATE_DIV_ */

            const __m256 rm_d_6th =
            _mm256_mul_ps(_mm256_mul_ps(rm_d_sq,rm_d_sq),rm_d_sq);

            const __m256 new_rm_d_6th =
            _mm256_mul_ps(_mm256_mul_ps(new_rm_d_sq,new_rm_d_sq),new_rm_d_sq);

            mmdE = _mm256_add_ps(mmdE,_mm256_mul_ps(mmeps,_mm256_sub_ps(
                            _mm256_add_ps(_mm256_mul_ps(new_rm_d_6th,new_rm_d_6th),_mm256_mul_ps(mm2,new_rm_d_6th)),
                            _mm256_add_ps(_mm256_mul_ps(rm_d_6th,rm_d_6th),_mm256_mul_ps(mm2,rm_d_6th)))));
        }

        /* TODO: Step 4: */
        float sums[r_width];
        _mm256_store_ps(sums,mmdE);
        float dE = std::accumulate(sums,sums+r_width,0.0);

        /* TODO: Step 5: */
        for(std::size_t i=(N/r_width)*r_width; i < N; ++i){
            const float dx = c.first[N] - c.first[i];
            const float new_dx = newpos.first - c.first[i];
            const float dy = c.second[N] - c.second[i];
            const float new_dy = newpos.second - c.second[i];
            const float rm_d_sq = rm_sq_ / (dx*dx + dy*dy);
            const float new_rm_d_sq = rm_sq_ / (new_dx*new_dx + new_dy*new_dy);
            const float rm_d_6th = rm_d_sq*rm_d_sq*rm_d_sq;
            const float new_rm_d_6th = new_rm_d_sq*new_rm_d_sq*new_rm_d_sq;

            dE += eps_*((new_rm_d_6th*new_rm_d_6th - 2*new_rm_d_6th) - (rm_d_6th*rm_d_6th - 2*rm_d_6th));
        }

        return dE;
    }

#elif defined(_ANY_)

    // adapts to chosen register type in types.h (SSE or AVX)
    inline value_type diff_any(configuration_type const& c, std::pair<value_type,value_type> const& newpos) const
    {
        assert( c.first.size() == c.second.size() );

        const std::size_t N = c.first.size()-1;

        const register_type mmrm_sq(rm_sq_);
        const register_type mmeps(eps_);
        const register_type mmxp(c.first[N]);
        const register_type mmyp(c.second[N]);
        const register_type mmxnew(newpos.first);
        const register_type mmynew(newpos.second);

        register_type mmdE;

        const unsigned r_width = mmdE.get_register_width();

        for(std::size_t i=0; i < N/r_width; ++i){
            const register_type mmxi(&c.first[i*r_width]);
            const register_type mmyi(&c.second[i*r_width]);
            const register_type dx = mmxp - mmxi;
            const register_type new_dx = mmxnew - mmxi;
            const register_type dy = mmyp - mmyi;
            const register_type new_dy = mmynew - mmyi;
            const register_type rm_d_sq = mmrm_sq / (dx*dx + dy*dy);
            const register_type new_rm_d_sq = mmrm_sq / (new_dx*new_dx + new_dy*new_dy);
            const register_type rm_d_6th = rm_d_sq*rm_d_sq*rm_d_sq;
            const register_type new_rm_d_6th = new_rm_d_sq*new_rm_d_sq*new_rm_d_sq;

            mmdE += mmeps*((new_rm_d_6th*new_rm_d_6th - 2*new_rm_d_6th) - (rm_d_6th*rm_d_6th - 2*rm_d_6th));
        }

        value_type dE = mmdE.sum();

        for(std::size_t i=(N/r_width)*r_width; i < N; ++i){
            const value_type dx = c.first[N] - c.first[i];
            const value_type new_dx = newpos.first - c.first[i];
            const value_type dy = c.second[N] - c.second[i];
            const value_type new_dy = newpos.second - c.second[i];
            const value_type rm_d_sq = rm_sq_ / (dx*dx + dy*dy);
            const value_type new_rm_d_sq = rm_sq_ / (new_dx*new_dx + new_dy*new_dy);
            const value_type rm_d_6th = rm_d_sq*rm_d_sq*rm_d_sq;
            const value_type new_rm_d_6th = new_rm_d_sq*new_rm_d_sq*new_rm_d_sq;

            dE += eps_*((new_rm_d_6th*new_rm_d_6th - 2*new_rm_d_6th) - (rm_d_6th*rm_d_6th - 2*rm_d_6th));
        }

        return dE;
    }
#endif


    inline value_type operator()(configuration_type const& c) const
    {
        assert( c.first.size() == c.second.size() );
        std::size_t const n = c.first.size();
        value_type energy = 0.0;
        for(std::size_t i=0; i < n; ++i) {
            value_type energy_part = 0.0;
            for(std::size_t j=0; j < i; ++j)
            {
                value_type const dx = c.first[i] - c.first[j];
                value_type const dy = c.second[i] - c.second[j];
                value_type const rm_d_sq = rm_sq_ / (dx*dx + dy*dy);
                value_type const rm_d_6th = rm_d_sq*rm_d_sq*rm_d_sq;
                energy_part += eps_*(rm_d_6th*rm_d_6th - 2*rm_d_6th);
            }
            energy += energy_part;
        }
        return energy;
    }


private:

    const value_type rm_sq_;
    const value_type eps_;
};

#endif /* LENNARD_JONES_H_AHQBPV5S */