forked from fwyzard/soa
-
Notifications
You must be signed in to change notification settings - Fork 0
/
soa_v9_cuda.h
151 lines (132 loc) · 5.16 KB
/
soa_v9_cuda.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
#pragma once
/* Common definitions shared by .cu and .cc files for the test */
#include "soa_v9.h"
#include <cppunit/extensions/HelperMacros.h>
#include <iostream>
#include <type_traits>
#include <cuda.h>
#include <cuda_runtime.h>
#include <Eigen/Core>
/* Test definitions */
class testSoA: public CppUnit::TestFixture {
CPPUNIT_TEST_SUITE(testSoA);
CPPUNIT_TEST(initialTest);
CPPUNIT_TEST(checkAlignment);
CPPUNIT_TEST(fill);
CPPUNIT_TEST(randomFill);
CPPUNIT_TEST(crossProduct);
CPPUNIT_TEST(randomCrossProduct);
CPPUNIT_TEST(randomCrossProductEigen);
CPPUNIT_TEST(randomCrossProductEmbeddedVector);
CPPUNIT_TEST_SUITE_END();
/* SoA and AoS structures definitions */
public:
void setUp() {}
void tearDown() {}
void initialTest();
void checkAlignment();
void fill();
void randomFill();
void crossProduct();
void randomCrossProduct();
void randomCrossProductEigen();
void randomCrossProductEmbeddedVector();
declare_SoA_template(SoA,
// predefined static scalars
// size_t size;
// size_t alignment;
// columns: one value per element
SoA_column(double, x),
SoA_column(double, y),
SoA_column(double, z),
SoA_eigenColumn(Eigen::Vector3d, a),
SoA_eigenColumn(Eigen::Vector3d, b),
SoA_eigenColumn(Eigen::Vector3d, r),
SoA_column(uint16_t, colour),
SoA_column(int32_t, value),
SoA_column(double *, py),
// scalars: one value for the whole structure
SoA_scalar(const char *, description)
);
// declare equivalent struct
struct AoSelement {
double x;
double y;
double z;
Eigen::Vector3d a;
Eigen::Vector3d b;
Eigen::Vector3d r;
uint16_t colour;
int32_t value;
double * py;
};
private:
// Constants
static constexpr int defaultDevice = 0;
static constexpr size_t elementsCount = 10000;
// Helper functions
template <typename T>
void checkValuesAlignment(SoA &soa, T SoA::element::*member, const std::string & memberName, size_t byteAlignment) {
for (size_t i=0; i<soa.nElements(); i++) {
// Check that each value is aligned
if (reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment
!= (i * T::valueSize) %byteAlignment ) {
std::stringstream err;
err << "Misaligned value: " << memberName << " at index=" << i
<< " address=" << &(soa[i].*member) << " byteAlignment=" << byteAlignment
<< " address lower part: " << reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment
<< " expected address lower part: " << ((i * T::valueSize) % byteAlignment)
<< " size=" << soa.nElements() << " align=" << soa.byteAlignment();
CPPUNIT_FAIL(err.str());
}
// Check that all values except the first-in rows (address 0 modulo alignment)
// are contiguous to their predecessors in memory (this will detect cutting
// memory/cache/etc... lines in unexpected places (for blocked SoA like AoSoA)
if ((reinterpret_cast<std::uintptr_t>(&(soa[i].*member)) % byteAlignment)
&& (reinterpret_cast<std::uintptr_t>(&(soa[i - 1].*member)) + T::valueSize
!= reinterpret_cast<std::uintptr_t>(&(soa[i].*member)))) {
std::stringstream err;
err << "Unexpected non-contiguity: " << memberName << " at index=" << i
<< " address=" << &(soa[i].*member) << " is not contiguous to "
<< memberName << " at index=" << i - 1 << "address=" << &(soa[i - 1].*member)
<< " size=" << soa.nElements() << " align=" << soa.byteAlignment() << " valueSize=" << T::valueSize;
CPPUNIT_FAIL(err.str());
}
}
}
void checkSoAAlignment(size_t nElements, size_t byteAlignment);
std::unique_ptr<std::byte, std::function<void(void*)>> make_aligned_unique(size_t size, size_t alignment) {
return std::unique_ptr<std::byte, std::function<void(void*)>> (
static_cast<std::byte*>(std::aligned_alloc(size, alignment)), [](void*p){std::free(p);});
}
class bad_alloc: public std::bad_alloc {
public:
bad_alloc(const std::string& w) noexcept: what_(w) {}
const char* what() const noexcept override { return what_.c_str(); }
private:
const std::string what_;
};
std::unique_ptr<std::byte, std::function<void(void*)>> make_device_unique(size_t size) {
void *p = nullptr;
cudaError_t e = cudaMalloc(&p, size);
if (e != cudaSuccess) {
std::string m("Failed to allocate device memory: ");
m+= cudaGetErrorName(e);
[[unlikely]] throw bad_alloc(m);
}
return std::unique_ptr<std::byte, std::function<void(void*)>> (
static_cast<std::byte*>(p), [](void*p){cudaFree(p);});
}
std::unique_ptr<std::byte, std::function<void(void*)>> make_host_unique(size_t size) {
void *p = nullptr;
cudaError_t e = cudaMallocHost(&p, size);
if (e != cudaSuccess) {
std::string m("Failed to allocate page-locked host memory: ");
m+= cudaGetErrorName(e);
[[unlikely]] throw bad_alloc(m);
}
return std::unique_ptr<std::byte, std::function<void(void*)>> (
static_cast<std::byte*>(p), [](void*p){cudaFreeHost(p);});
}
};
/* SoA and AoS structures definitions */