-
Notifications
You must be signed in to change notification settings - Fork 2
/
displacement.cpp
418 lines (375 loc) · 16.7 KB
/
displacement.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
#include "displacement.hpp"
#include <math.h>
#include <stdlib.h>
#include <fftw3.h>
#include <gsl/gsl_rng.h>
#include <exception>
//For memset
#include <string.h>
#include <omp.h>
#include "power.hpp"
/**Initialise the memory for the FFTs*/
DisplacementFields::DisplacementFields(size_t Nmesh, int Seed, double Box, bool twolpt): Nmesh(Nmesh), twolpt(twolpt), Seed(Seed), Box(Box), Forward_plan2(), Inverse_plan_grad(), twosrc(NULL), ctwosrc(NULL), cdigrad(), digrad()
{
size_t bytes = sizeof(double) * 2*Nmesh*Nmesh*(Nmesh/2+1);
Disp = (double *) fftw_malloc(bytes);
if(Disp)
printf("Nmesh = %lu. Allocated %lu MB for FFTs\n",Nmesh, bytes / (1024 * 1024));
else{
fprintf(stderr, "Nmesh = %lu. Failed to allocate %lu MB for FFTs\n",Nmesh, bytes / (1024 * 1024));
throw std::bad_alloc();
}
Cdata = (fftw_complex *) Disp; /* transformed array */
if (twolpt) {
size_t two_bytes = sizeof(float) * 2*Nmesh*Nmesh*(Nmesh/2+1);
twosrc = (float *) fftwf_malloc(two_bytes);
ctwosrc = (fftwf_complex *) twosrc;
for(int i=0; i<3; i++){
cdigrad[i] = (fftwf_complex *) malloc(two_bytes);
digrad[i] = (float *) cdigrad[i];
}
/*Check memory allocation*/
if(cdigrad[0] && cdigrad[1] && cdigrad[2] && twosrc)
printf("Allocated %lu MB for 2LPT term\n",4*two_bytes / (1024 * 1024));
else{
fprintf(stderr, "Failed to allocate %lu MB for 2LPT term\n",4*two_bytes / (1024 * 1024));
throw std::bad_alloc();
}
}
if(!fftw_init_threads()){
fprintf(stderr,"Error initialising fftw threads\n");
exit(1);
}
fftw_plan_with_nthreads(omp_get_num_procs());
Inverse_plan = fftw_plan_dft_c2r_3d(Nmesh, Nmesh, Nmesh,Cdata,Disp, FFTW_ESTIMATE);
if(twolpt) {
Forward_plan2 = fftwf_plan_dft_r2c_3d(Nmesh, Nmesh, Nmesh,twosrc,ctwosrc, FFTW_ESTIMATE);
for(int i=0; i<3; i++){
Inverse_plan_grad[i] = fftwf_plan_dft_c2r_3d(Nmesh, Nmesh, Nmesh,cdigrad[i],digrad[i], FFTW_ESTIMATE);
}
}
}
//Free the memory in the FFTs
DisplacementFields::~DisplacementFields()
{
fftw_free(Disp);
fftw_destroy_plan(Inverse_plan);
if (twolpt) {
/* Free */
fftwf_free(twosrc);
fftwf_destroy_plan(Forward_plan2);
for(int i=0;i<3;i++){
fftwf_free(cdigrad[i]);
fftwf_destroy_plan(Inverse_plan_grad[i]);
}
}
}
/**Little macro to work the storage order of the FFT.*/
inline int64_t KVAL(const size_t n, const size_t Nmesh)
{
return (n < Nmesh / 2 ? n : n - Nmesh);
}
#ifdef CORRECT_CIC
/*Helper function to deconvolve 1D CIC interpolation*/
inline double oneinvwindow(int kx, int n)
{
double iwx = 1.0;
if(kx){
iwx=M_PI*kx/static_cast<double>(n);
iwx=iwx/sin(iwx);
}
return iwx;
}
/* do deconvolution of CIC interpolation */
double invwindow(int kx,int ky,int kz,int n)
{
if(!n)
return 0;
double iwx = oneinvwindow(kx,n);
double iwy = oneinvwindow(ky,n);
double iwz = oneinvwindow(kz,n);
return pow(iwx*iwy*iwz,2);
}
#endif
/**Initialise a table of seeds for the random number generator*/
unsigned int * initialize_rng(int Seed, size_t Nmesh)
{
gsl_rng * random_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
gsl_rng_set(random_generator, Seed);
unsigned int * seedtable;
if(!(seedtable =(unsigned int *) malloc(Nmesh * Nmesh * sizeof(unsigned int))))
throw std::bad_alloc();
for(size_t i = 0; i < Nmesh / 2; i++)
{
size_t j;
for(j = 0; j < i; j++)
seedtable[i * Nmesh + j] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i + 1; j++)
seedtable[j * Nmesh + i] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i; j++)
seedtable[(Nmesh - 1 - i) * Nmesh + j] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i + 1; j++)
seedtable[(Nmesh - 1 - j) * Nmesh + i] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i; j++)
seedtable[i * Nmesh + (Nmesh - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i + 1; j++)
seedtable[j * Nmesh + (Nmesh - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i; j++)
seedtable[(Nmesh - 1 - i) * Nmesh + (Nmesh - 1 - j)] = 0x7fffffff * gsl_rng_uniform(random_generator);
for(j = 0; j < i + 1; j++)
seedtable[(Nmesh - 1 - j) * Nmesh + (Nmesh - 1 - i)] = 0x7fffffff * gsl_rng_uniform(random_generator);
}
gsl_rng_free(random_generator);
return seedtable;
}
/**Function to compute Zeldovich displacement fields using a double Fourier transform*/
lpt_data DisplacementFields::displacement_fields(const int type, part_grid& Pgrid, PowerSpec * PSpec, bool RayleighScatter)
{
const double fac = pow(2 * M_PI / Box, 1.5);
//Re-initialize every time this is called, so each particle type has the same phases
unsigned int *seedtable = initialize_rng(Seed, Nmesh);
double maxdisp;
const size_t fftsize = 2*Nmesh*Nmesh*(Nmesh/2+1);
double maxdisp2=0;
lpt_data outdata(Pgrid.GetNumPart(type), twolpt);
if(twolpt)
memset(twosrc, 0, fftsize*sizeof(float));
for(int axes = 0; axes < 3; axes++) {
printf("Starting Zeldovich axis %d.\n", axes);
/* first, clean the array */
memset(Disp, 0, fftsize*sizeof(double));
#pragma omp parallel
{
gsl_rng * random_generator = gsl_rng_alloc(gsl_rng_ranlxd1);
#pragma omp for
for(size_t i = 0; i < Nmesh; i++) {
for(size_t j = 0; j < Nmesh; j++) {
gsl_rng_set(random_generator, seedtable[i * Nmesh + j]);
for(size_t k = 0; k < Nmesh / 2; k++) {
double kvec[3], kmag, kmag2, p_of_k;
double delta, phase, ampl;
phase = gsl_rng_uniform(random_generator) * 2 * M_PI;
//We don't actually need this if RayleighScatter is off,
//but we do it anyway to keep the phases the same.
do
ampl = gsl_rng_uniform(random_generator);
while(ampl == 0);
//Skip zero mode as this contains the field mean anyway.
if(i == 0 && j == 0 && k == 0)
continue;
kvec[0] = KVAL(i, Nmesh) * 2 * M_PI / Box;
kvec[1] = KVAL(j, Nmesh) * 2 * M_PI / Box;
kvec[2] = KVAL(k, Nmesh) * 2 * M_PI / Box;
kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
kmag = sqrt(kmag2);
/* select a sphere in k-space. No idea why you would do this, so disabled */
/*if(SphereMode){
if(kmag * Box / (2 * M_PI) > Nmesh / 2)
continue;
}*/
p_of_k = PSpec->power(kmag, type);
// printf(" k %d %g %g \n",Type,kmag,p_of_k);
if(RayleighScatter)
p_of_k *= -log(ampl);
delta = fac * sqrt(p_of_k) ;
#ifdef CORRECT_CIC
/* do deconvolution of CIC interpolation */
delta *= invwindow(KVAL(i,Nmesh),KVAL(j,Nmesh),KVAL(k,Nmesh),Nmesh);
#endif
if(k > 0) {
size_t index = (i * Nmesh + j) * (Nmesh / 2 + 1) + k;
(Cdata[index])[0] = -kvec[axes] / kmag2 * delta * sin(phase);
(Cdata[index])[1] = kvec[axes] / kmag2 * delta * cos(phase);
}
else /* k=0 plane needs special treatment */
{
if(i == 0)
{
if(j >= Nmesh / 2)
continue;
else
{
/* This is done like this, rather than just looping for longer,
* because we want these modes to have the same phase, and
* thus be complex conjugates of each other.*/
int jj = Nmesh - j; /* note: i=k=0 => j!=0 */
size_t index = j * (Nmesh / 2 + 1);
(Cdata[index])[0] = -kvec[axes] / kmag2 * delta * sin(phase);
(Cdata[index])[1] = kvec[axes] / kmag2 * delta * cos(phase);
index = jj * (Nmesh / 2 + 1);
(Cdata[index])[0] = -kvec[axes] / kmag2 * delta * sin(phase);
(Cdata[index])[1] = -kvec[axes] / kmag2 * delta * cos(phase);
}
}
else /* here comes i!=0 */
{
if(i >= Nmesh / 2)
continue;
else
{
int ii = (Nmesh - i) % Nmesh;
int jj = (Nmesh - j) % Nmesh;
size_t index = (i * Nmesh + j) * (Nmesh / 2 + 1);
(Cdata[index])[0] = -kvec[axes] / kmag2 * delta * sin(phase);
(Cdata[index])[1] = kvec[axes] / kmag2 * delta * cos(phase);
index = (ii * Nmesh + jj) * (Nmesh / 2 + 1);
(Cdata[index])[0] = -kvec[axes] / kmag2 * delta * sin(phase);
(Cdata[index])[1] = -kvec[axes] / kmag2 * delta * cos(phase);
}
}
}
}
}
}
gsl_rng_free(random_generator);
#pragma omp barrier
} //omp_parallel
if (twolpt ) {
/* At this point, Cdata contains the complex Zeldovich displacement for this axis */
/* Compute displacement gradient
* do disp(0,0), disp(0,1), disp(0,2), disp(1,1), disp(1,2), disp(2,2) only as vector symmetric*/
for(int ax=2;ax>=axes; ax--){
//Neutrinos do not have 2lpt
if(type == 2)
break;
#pragma omp parallel for
for(size_t i = 0; i < Nmesh; i++) {
for(size_t j = 0; j < Nmesh; j++) {
for(size_t k = 0; k <= Nmesh / 2; k++){
double kvec[3];
size_t coord = (i * Nmesh + j) * (Nmesh / 2 + 1) + k;
kvec[0] = KVAL(i, Nmesh) * 2 * M_PI / Box;
kvec[1] = KVAL(j, Nmesh) * 2 * M_PI / Box;
kvec[2] = KVAL(k, Nmesh) * 2 * M_PI / Box;
/*Note that unlike Scoccimaro et al, we do not have
* memory to waste, so we only do one axis at a time */
/* Derivatives of ZA displacement */
/* d(dis_i)/d(q_j) -> sqrt(-1) k_j dis_i */
(cdigrad[axes][coord])[0] = -(Cdata[coord])[1] * kvec[ax]; /* disp0,0 */
(cdigrad[axes][coord])[1] = (Cdata[coord])[0] * kvec[ax];
}
}
}
/*At this point, cdigrad[i] contains FT(phi,ii). For grad^2 phi, want the FT. */
// printf("Finding gradient FT component (%d,%d)\n",ax,axes);
fftwf_execute(Inverse_plan_grad[axes]); /** FFT of cdigrad[axes] **/
/* Compute second order source and store it in twosrc*/
if(ax != axes)
#pragma omp parallel for
for(size_t i = 0; i < Nmesh; i++)
for(size_t j = 0; j < Nmesh; j++)
for(size_t k = 0; k < Nmesh; k++){
size_t coord = (i * Nmesh + j) * (2 * (Nmesh / 2 + 1)) + k;
twosrc[coord] -= digrad[axes][coord]*digrad[axes][coord];
}
}
} //end twolpt
fftw_execute(Inverse_plan); /** FFT of the Zeldovich displacements **/
/* read-out Zeldovich displacements into P.Vel*/
maxdisp=displacement_read_out(1, outdata, Pgrid, axes, type);
}
//2lpt not computed for neutrinos
if (twolpt && type != 2) {
/* So now digrad[axes] contains phi,ii and twosrc contains sum_(i>j)(- phi,ij^2)
* We want to now compute phi,ii^(2), the laplacian of the 2LPT term, in twosrc */
#pragma omp parallel for
for(size_t i = 0; i < Nmesh; i++)
for(size_t j = 0; j < Nmesh; j++)
for(size_t k = 0; k < Nmesh; k++){
size_t co = (i * Nmesh + j) * (2 * (Nmesh / 2 + 1)) + k;
twosrc[co] += digrad[0][co]*digrad[1][co]+digrad[0][co]*digrad[2][co]+digrad[1][co]*digrad[2][co];
}
fftwf_execute(Forward_plan2); /** FFT of twosrc**/
for(int axes=0; axes< 3; axes++){
printf("Starting 2LPT term, axis %d\n",axes);
/* Reuse the memory used earlier for ZA field */
memset(Disp, 0, fftsize*sizeof(double));
/* Solve Poisson eq. and calculate 2nd order displacements */
(Cdata[0])[0] = (Cdata[0])[1] = 0.0;
#pragma omp parallel for
for(size_t i = 0; i < Nmesh; i++)
for(size_t j = 0; j < Nmesh; j++)
for(size_t k = 1; k <= Nmesh / 2; k++){
double kvec[3],kmag2;
size_t coord = (i * Nmesh + j) * (Nmesh / 2 + 1) + k;
kvec[0] = KVAL(i, Nmesh) * 2 * M_PI / Box;
kvec[1] = KVAL(j, Nmesh) * 2 * M_PI / Box;
kvec[2] = KVAL(k, Nmesh) * 2 * M_PI / Box;
kmag2 = kvec[0] * kvec[0] + kvec[1] * kvec[1] + kvec[2] * kvec[2];
/* cdisp2 = source * k / (sqrt(-1) k^2) */
(Cdata[coord])[0] = (ctwosrc[coord])[1] * kvec[axes] / kmag2;
(Cdata[coord])[1] = -(ctwosrc[coord])[0] * kvec[axes] / kmag2;
#ifdef CORRECT_CIC
/* do deconvolution of CIC interpolation */
double smth= invwindow(KVAL(i,Nmesh),KVAL(j,Nmesh),KVAL(k,Nmesh),Nmesh);
(Cdata[coord])[0] *= smth;
(Cdata[coord])[1] *= smth;
#endif
}
/* Cdata now contains the FFT of the 2LPT term */
fftw_execute(Inverse_plan); /** FFT of Cdata**/
/* read-out displacements */
maxdisp2=displacement_read_out(2, outdata, Pgrid, axes, type);
}
} //end twolpt
printf("\nMaximum Zeldovich displacement: %g kpc/h, in units of the part-spacing= %g\n",
maxdisp, maxdisp / (Box / Nmesh));
printf("\nMaximum 2LPT displacement: %g kpc/h, in units of the part-spacing= %g\n",maxdisp2, maxdisp2 / (Box / Nmesh));
free(seedtable);
return outdata;
}
/** This function evaluates the displacement field computed above for a particular position, given as a list in part_data
* Cloud-in-cell is used to find the displacement field averaged over the 8 nearby grid cells.
*/
double DisplacementFields::displacement_read_out(const int order, lpt_data& outdata, part_grid& Pgrid, const int axes, const int type)
{
double maxdisp=0;
const double Nmesh3 = pow(1.*Nmesh, 3);
const int64_t NumPart = outdata.GetNumPart();
//This needs openmp 3.1, (gcc 4.7)
#pragma omp parallel for reduction(max: maxdisp)
for(int64_t n = 0; n < NumPart; n++)
{
double dis;
double f1, f2, f3, f4, f5, f6, f7, f8;
double u[3];
size_t i[3], ii[3];
for(int q=0;q<3;q++){
u[q] = Pgrid.Pos(n,q, type) / Box * Nmesh;
i[q] = static_cast<size_t>(u[q]);
if(i[q] == Nmesh)
i[q]--;
u[q] -= i[q];
ii[q] = i[q]+1;
if(ii[q] >= Nmesh)
ii[q] -= Nmesh;
}
f1 = (1 - u[0]) * (1 - u[1]) * (1 - u[2]);
f2 = (1 - u[0]) * (1 - u[1]) * (u[2]);
f3 = (1 - u[0]) * (u[1]) * (1 - u[2]);
f4 = (1 - u[0]) * (u[1]) * (u[2]);
f5 = (u[0]) * (1 - u[1]) * (1 - u[2]);
f6 = (u[0]) * (1 - u[1]) * (u[2]);
f7 = (u[0]) * (u[1]) * (1 - u[2]);
f8 = (u[0]) * (u[1]) * (u[2]);
dis = Disp[(i[0] * Nmesh + i[1]) * (2 * (Nmesh / 2 + 1)) + i[2]] * f1 +
Disp[(i[0] * Nmesh + i[1]) * (2 * (Nmesh / 2 + 1)) + ii[2]] * f2 +
Disp[(i[0] * Nmesh + ii[1]) * (2 * (Nmesh / 2 + 1)) + i[2]] * f3 +
Disp[(i[0] * Nmesh + ii[1]) * (2 * (Nmesh / 2 + 1)) + ii[2]] * f4 +
Disp[(ii[0] * Nmesh + i[1]) * (2 * (Nmesh / 2 + 1)) + i[2]] * f5 +
Disp[(ii[0] * Nmesh + i[1]) * (2 * (Nmesh / 2 + 1)) + ii[2]] * f6 +
Disp[(ii[0] * Nmesh + ii[1]) * (2 * (Nmesh / 2 + 1)) + i[2]] * f7 +
Disp[(ii[0] * Nmesh + ii[1]) * (2 * (Nmesh / 2 + 1)) + ii[2]] * f8;
/*Read out the 2lpt velocity if this is
* being called from the 2lpt part of the code*/
if(order == 2){
dis /= Nmesh3;
outdata.Set2Vel(dis, n,axes);
}
else
outdata.SetVel(dis, n,axes);
if(dis > maxdisp)
maxdisp = dis;
} // end openmp parallel
return maxdisp;
}