Merge branch 'develop'

mosaic-group · Jun 14, 2024 · 6f29451 · 6f29451
2 parents 761d15a + 332ae5d
commit 6f29451
Show file tree

Hide file tree

Showing 37 changed files with 352 additions and 403 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,22 @@
 # Change Log
 All notable changes to this project will be documented in this file.
 
+## OpenFPM 5.1.0 - Jun 2024
+- Refactor implementations of cell list and Verlet list `CellList`, `CellList_gpu`,`VerletList` and all neighborhood iterators. Move from keeping two sets (unordered and ordered) of positions/property vectors to reordering explicitly before launching CUDA kernels that utilize this feature
+
+### Added
+- Add adaptive cut-off radius Verlet list on CPU. Particles individual radii are passed as a vector to `getVerletAdaptRCut` of `openfpm_pdata`. Time complexity: O(N^2)
+
+### Changes
+- `cuda_launch` is a function instead of a macro
+- Move project c++ standard to c++17
+- Add bit-wise option flags that control lell list behaviour. Pass as a parameter `opt` to `getCellList` of `openfpm_pdata` or set directly on cell list via `setOpt`
+- Add support for the following cell list bit-wise option flags: `CL_SYMMETRIC`, `CL_NON_SYMMETRIC`, `CL_LOCAL_SYMMETRIC`, `CL_LINEAR_CELL_KEYS`, `CL_HILBERT_CELL_KEYS`, `CL_GPU_REORDER`, where for the last one `CL_GPU_REORDER` control options for reordering/restoring operations could be more fine-tuned: `CL_GPU_REORDER_POSITION`, `CL_GPU_REORDER_PROPERTY`, `CL_GPU_RESTORE_POSITION`, `CL_GPU_RESTORE_PROPERTY`
+- Add bit-wise option flags that control Verlet list behaviour. Pass as a template parameter `opt` in `VerletList` or pass Verlet list type with `opt` set to `getVerlet` of `openfpm_pdata`. The difference in how `opt` is set for cell list and Verlet list is due to legacy code
+- Add support for the following Verlet list bit-wise option flags: `VL_NON_SYMMETRIC`, `VL_SYMMETRIC`, `VL_CRS_SYMMETRIC`, `VL_ADAPTIVE_RCUT`, `VL_NMAX_NEIGHBOR`, `VL_SKIP_REF_PART`
+- `getCellListGPU` of `openfpm_pdata` doesn't fill a cell list with particle locations. Additionally, `updateCellListGPU` has to be called to perform this operation. The reason is to remove two identical fill operations in a row in simulations
+- To reorder `vector_dist` for GPU coalesced memory access dictated by the cell list structure, cell list on GPU has to be constructed with one of the following flags enabled: `CL_GPU_REORDER`, `CL_GPU_REORDER_POSITION`, `CL_GPU_REORDER_PROPERTY`, `CL_GPU_RESTORE_POSITION`, `CL_GPU_RESTORE_PROPERTY`. Properties to be copied to reordered copies of position/property vectors have to be passed to `updateCellListGPU<PROPERTIES>(CELL LIST)` of `vector_dist`. Properties that undergone changes in reordered copies and have to be restored to the original position/property vectors have to be passed to `restoreOrder<PROPERTIES>(CELL LIST)` of `vector_dist`
+
 ## OpenFPM 5.0.0 - Feb 2024
 - Move to `openfpm` meta OpenFPM project structure with git subprojects `openfpm_data`, `openfpm_devices`, `openfpm_io`, `openfpm_vcluster`, `openfpm_pdata`, `openfpm_numerics`. Example codes, installation scripts for dependencies, configuration scripts moved to `openfpm`. Only source code of `openfpm_pdata` kept in `openfpm_pdata`
 

diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -131,6 +131,9 @@ if(APPLE)
 endif()
 
 if (Boost_FOUND)
+	# ignore BOOST deprecated headers
+	add_definitions("-DBOOST_ALLOW_DEPRECATED_HEADERS")
+
 	if ((CUDA_ON_BACKEND STREQUAL "SEQUENTIAL" OR CUDA_ON_BACKEND STREQUAL "OpenMP") AND NOT Boost_CONTEXT_FOUND)
 		message( FATAL_ERROR "BOOST is invalid reinstalling" )
 	endif()
@@ -163,6 +166,7 @@ add_subdirectory(openfpm_pdata)
 
 if (ENABLE_NUMERICS)
 	add_subdirectory(openfpm_numerics)
+	set(DEFINE_ENABLE_NUMERICS "#define ENABLE_NUMERICS")
 endif()
 
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/config/config_cmake.h.in ${PROJECT_BINARY_DIR}/config/config.h)

diff --git a/config/config_cmake.h.in b/config/config_cmake.h.in
@@ -195,6 +195,9 @@ ${DEFINE_VCLUSTER_GARBAGE_INJECTOR}
 /* Test coverage mode */
 ${DEFINE_TEST_COVERAGE_MODE}
 
+/* Enable numerics module */
+${DEFINE_ENABLE_NUMERICS}
+
 /* when an error accur continue but avoid unsafe operation */
 /* #undef THROW_ON_ERROR */
 

diff --git a/example/Numerics/PSE/0_Derivative_approx_1D/main.cpp b/example/Numerics/PSE/0_Derivative_approx_1D/main.cpp
@@ -210,9 +210,7 @@ int main(int argc, char* argv[])
 	//
 
     // get and construct the Cell list
-
-	Ghost<1,double> gp(enlarge);
-    auto cl = vd.getCellList(12*eps,gp);
+	auto cl = vd.getCellList(12*eps, CL_NON_SYMMETRIC, false, enlarge);
 
     // Maximum infinity norm
     double linf = 0.0;
@@ -251,7 +249,7 @@ int main(int argc, char* argv[])
     	double prp_x = vd.template getProp<0>(key);
 
     	// Get the neighborhood of the particles
-    	auto NN = cl.getNNIterator(cl.getCell(p));
+	auto NN = cl.getNNIteratorBox(cl.getCell(p));
     	while(NN.isNext())
     	{
     		auto nnp = NN.get();

diff --git a/example/Numerics/PSE/1_Derivative_approx_1D_mp/main_float128.cpp b/example/Numerics/PSE/1_Derivative_approx_1D_mp/main_float128.cpp
@@ -218,9 +218,8 @@ int main(int argc, char* argv[])
 	//
 
     // get and construct the Cell list
+    auto cl = vd.getCellList(12*eps, CL_NON_SYMMETRIC, false, enlarge);
 
-	Ghost<1,float128> gp(enlarge);
-    auto cl = vd.getCellList(12*eps,gp);
 
     // Maximum infinity norm
     double linf = 0.0;
@@ -260,7 +259,7 @@ int main(int argc, char* argv[])
     	float128 prp_x = vd.template getProp<0>(key);
 
     	// Get the neighborhood of the particles
-    	auto NN = cl.getNNIterator(cl.getCell(p));
+	auto NN = cl.getNNIteratorBox(cl.getCell(p));
     	while(NN.isNext())
     	{
     		auto nnp = NN.get();

diff --git a/example/Numerics/PSE/1_Diffusion_1D/main.cpp b/example/Numerics/PSE/1_Diffusion_1D/main.cpp
@@ -69,7 +69,7 @@ template<typename CellL> double calcLap(Point<1,double> p, vect_dist_key_dx key,
 	double prp_x = vd.template getProp<0>(key);
 
 	// Get the neighborhood of the particles
-	auto NN = cl.getNNIterator(cl.getCell(p));
+	auto NN = cl.getNNIteratorBox(cl.getCell(p));
 	while(NN.isNext())
 	{
 		auto nnp = NN.get();
@@ -339,13 +339,11 @@ int main(int argc, char* argv[])
 	// keeping ghost size and padding area unrelated give us the possibility to show how to create a CellList
 	// on a area bigger than the domain + ghost
 
-	Ghost<1,double> gp(enlarge);
-
-        // Create a Cell list with Cell spaping 8*epsilon, the CellList is created on a domain+ghost+enlarge space
-        auto cl = vd.getCellList(8*eps,gp);
+	// Create a Cell list with Cell spaping 8*epsilon, the CellList is created on a domain+ghost+enlarge space
+	auto cl = vd.getCellList(8*eps, CL_NON_SYMMETRIC, false, enlarge);
 
-        // Maximum infinity norm
-        double linf = 0.0;
+	// Maximum infinity norm
+	double linf = 0.0;
 
 	//
 	// ### WIKI 13 ###

diff --git a/example/SparseGrid/8_filling_benchmark_gpu/main.cu b/example/SparseGrid/8_filling_benchmark_gpu/main.cu
@@ -2,7 +2,7 @@
 #define SYNC_BEFORE_TAKE_TIME
 #define ENABLE_GRID_DIST_ID_PERF_STATS
 #include "Decomposition/Distribution/BoxDistribution.hpp"
-#include "util/cuda_launch.hpp"
+#include "util/cuda_util.hpp"
 #include "Grid/grid_dist_id.hpp"
 #include "data_type/aggregate.hpp"
 #include "timer.hpp"

diff --git a/example/Vector/10_level_set/main.cpp b/example/Vector/10_level_set/main.cpp
@@ -63,7 +63,11 @@ const double rdist_cutoff_factor = 2.8;
 
 // dimensions of spatial and temporal domain
 const double l = 2.44;
+#ifdef TEST_RUN
+const double t_end = 0.1;
+#else
 const double t_end = 1.0;
+#endif
 // total mass in the domain to compute individual particle masses from
 const double M = l*l*rho_0;
 // number of particles in total
@@ -89,7 +93,7 @@ template <typename CellList> inline void density_summation(particles & vd, CellL
         // intitialize sum that yields 1/(particle volume)
 	double V_inv = 0.0;
 
-        auto Np = NN.getNNIterator(NN.getCell(vd.getPos(a)));
+        auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(a)));
 	// iterate over particles b (neighboring particles)
         while (Np.isNext() == true)
         {
@@ -171,7 +175,7 @@ template<typename CellList> inline void calc_forces(particles & vd, CellList & N
         Point<2, double> va = vd.getProp<vel>(a);
 
         // Get an iterator over the neighborhood particles of p
-        auto Np = NN.getNNIterator(NN.getCell(vd.getPos(a)));
+        auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(a)));
 
         // For each neighborhood particle b
         while (Np.isNext() == true)

diff --git a/example/Vector/1_celllist/main.cpp b/example/Vector/1_celllist/main.cpp
@@ -247,7 +247,7 @@ int main(int argc, char* argv[])
 		Point<3,float> xp = vd.getPos(p);
 
 		// Get an iterator of all the particles neighborhood of p
-		auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+		auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 		// For each particle near p
 		while (Np.isNext())
@@ -392,8 +392,8 @@ int main(int argc, char* argv[])
 	//! \cond [cell_list_types] \endcond
 
 	// Get cell list
-	auto NN4 = vd.getCellList<CELL_MEMBAL(3,float)>(r_cut);
-	auto NN5 = vd.getCellList<CELL_MEMMW(3,float)>(r_cut);
+	auto NN4 = vd.getCellList<CELL_MEMBAL<3,float>>(r_cut);
+	auto NN5 = vd.getCellList<CELL_MEMMW<3,float>>(r_cut);
 
 	//! \cond [cell_list_types] \endcond
 

diff --git a/example/Vector/1_gpu_first_step/main.cu b/example/Vector/1_gpu_first_step/main.cu
@@ -127,7 +127,6 @@
 #define OPENMPI
 //! \cond [using_openmpi] \endcond
 
-#define SCAN_WITH_CUB <------ MODERNGPU is broken on RTX use CUB library for scan
 //#define EXTERNAL_SET_GPU <----- In case you want to distribute the GPUs differently from the default
 
 #include "Vector/vector_dist.hpp"

diff --git a/example/Vector/3_molecular_dynamic/main.cpp b/example/Vector/3_molecular_dynamic/main.cpp
@@ -115,7 +115,7 @@ template<typename CellList> void calc_forces(vector_dist<3,double, aggregate<dou
 		vd.template getProp<force>(p)[2] = 0.0;
 
 		// Get an iterator over the neighborhood particles of p
-		auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+		auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 		// For each neighborhood particle ...
 		while (Np.isNext())
@@ -232,7 +232,7 @@ template<typename CellList> double calc_energy(vector_dist<3,double, aggregate<d
 		Point<3,double> xp = vd.getPos(p);
 
 		// Get an iterator over the neighborhood of the particle p
-		auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+		auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 		// For each neighborhood of the particle p
 		while (Np.isNext())
@@ -438,7 +438,7 @@ int main(int argc, char* argv[])
 	//! \cond [md steps] \endcond
 
 	// Get the Cell list structure
-	auto NN = vd.getCellList<CELL_MEMBAL(3,double)>(r_cut);
+	auto NN = vd.getCellList<CELL_MEMBAL<3,double>>(r_cut);
 
 	// The standard
 	// auto NN = vd.getCellList(r_cut);

diff --git a/example/Vector/3_molecular_dynamic/main_expr_paper.cpp b/example/Vector/3_molecular_dynamic/main_expr_paper.cpp
@@ -94,7 +94,7 @@ int main(int argc, char* argv[])
 		particles.ghost_get<>();
 
 		// Calculate the force at t + dt
-		particles.updateCellListSym(NN);
+		particles.updateCellList(NN);
 		force = applyKernel_in_sim(particles,NN,lennard_jones);
 
 		// 2-step Verlet velocity

diff --git a/example/Vector/3_molecular_dynamic/main_vl.cpp b/example/Vector/3_molecular_dynamic/main_vl.cpp
@@ -62,7 +62,7 @@ constexpr int force = 1;
 
 //! \cond [arg diff] \endcond
 
-void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, VerletList<3, double, Mem_fast<>, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut)
+void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, VerletList<3, double, VL_NON_SYMMETRIC, Mem_fast<>, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut)
 {
 	//! \cond [arg diff] \endcond
 
@@ -150,7 +150,7 @@ void calc_forces(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, Ve
 
 //! \cond [calc energy vl] \endcond
 
-double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, VerletList<3, double, Mem_fast<>, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut)
+double calc_energy(vector_dist<3,double, aggregate<double[3],double[3]> > & vd, VerletList<3, double, VL_NON_SYMMETRIC, Mem_fast<>, shift<3, double> > & NN, double sigma12, double sigma6, double r_cut)
 {
 	double E = 0.0;
 

diff --git a/example/Vector/3_molecular_dynamic_gpu/main.cu b/example/Vector/3_molecular_dynamic_gpu/main.cu
@@ -1,5 +1,3 @@
-#define SCAN_WITH_CUB
-
 /*!
  * \page Vector_3_md_dyn_gpu Vector 3 molecular dynamic on GPU
  *
@@ -111,7 +109,7 @@ __global__ void calc_force_gpu(vector_dist_type vd, NN_type NN, real_number sigm
 
 
 	// Get an iterator over the neighborhood particles of p
-	auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+	auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 	// For each neighborhood particle ...
 	while (Np.isNext())
@@ -189,7 +187,7 @@ __global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sig
 	Point<3,real_number> xp = vd.getPos(p);
 
 	// Get an iterator over the neighborhood of the particle p
-	auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+	auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 	real_number E = 0;
 
@@ -230,7 +228,7 @@ __global__ void particle_energy(vector_dist_type vd, NN_type NN, real_number sig
 
 template<typename CellList> void calc_forces(vector_dist_gpu<3,real_number, aggregate<real_number[3],real_number[3],real_number> > & vd, CellList & NN, real_number sigma12, real_number sigma6, real_number r_cut2)
 {
-	vd.updateCellList(NN);
+	vd.updateCellListGPU(NN);
 
 	// Get an iterator over particles
 	auto it2 = vd.getDomainIteratorGPU();
@@ -245,7 +243,7 @@ template<typename CellList> real_number calc_energy(vector_dist_gpu<3,real_numbe
 	real_number rc = r_cut2;
 	real_number shift = 2.0 * ( sigma12 / (rc*rc*rc*rc*rc*rc) - sigma6 / ( rc*rc*rc) );
 
-	vd.updateCellList(NN);
+	vd.updateCellListGPU(NN);
 
 	auto it2 = vd.getDomainIteratorGPU();
 
@@ -328,7 +326,7 @@ int main(int argc, char* argv[])
 	//! \cond [md steps] \endcond
 
 	// Get the Cell list structure
-	auto NN = vd.getCellListGPU(r_cut, 2);
+	auto NN = vd.getCellListGPU(r_cut, CL_NON_SYMMETRIC, 2);
 
 	// The standard
 	// auto NN = vd.getCellList(r_cut);

diff --git a/example/Vector/3_molecular_dynamic_gpu_opt/main_cpu.cpp b/example/Vector/3_molecular_dynamic_gpu_opt/main_cpu.cpp
@@ -37,7 +37,7 @@ template<typename CellList> void calc_forces(vector_dist<3,real_number, aggregat
 		vd.template getProp<force>(p)[2] = 0.0;
 
 		// Get an iterator over the neighborhood particles of p
-		auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+		auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 		// For each neighborhood particle ...
 		while (Np.isNext())
@@ -97,7 +97,7 @@ template<typename CellList> real_number calc_energy(vector_dist<3,real_number, a
 		Point<3,real_number> xp = vd.getPos(p);
 
 		// Get an iterator over the neighborhood of the particle p
-		auto Np = NN.getNNIterator(NN.getCell(vd.getPos(p)));
+		auto Np = NN.getNNIteratorBox(NN.getCell(vd.getPos(p)));
 
 		// For each neighborhood of the particle p
 		while (Np.isNext())

diff --git a/example/Vector/3_molecular_dynamic_gpu_opt/main_cpu_best.cpp b/example/Vector/3_molecular_dynamic_gpu_opt/main_cpu_best.cpp
@@ -241,7 +241,7 @@ int main(int argc, char* argv[])
 	tsim.start();
 
 	// Get the Cell list structure
-	auto NN = vd.getVerletCrs(r_gskin);;
+	auto NN = vd.getVerletCrs(r_gskin);
 
 	// calculate forces
 	calc_forces(vd,NN,sigma12,sigma6,r_cut);
@@ -290,7 +290,7 @@ int main(int argc, char* argv[])
 			vd.map();
 			vd.template ghost_get<>();
 			// Get the Cell list structure
-			vd.updateVerlet(NN,r_gskin,VL_CRS_SYMMETRIC);
+			vd.updateVerlet(NN,r_gskin);
 		}
 		else
 		{