Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix: add MPI_Finalize in QUIT function #5505

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 27 additions & 19 deletions source/module_base/module_mixing/broyden_mixing.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include "broyden_mixing.h"

#include "module_base/lapack_connector.h"
#include "module_base/memory.h"
#include "module_base/module_container/base/third_party/blas.h"
#include "module_base/timer.h"
#include "module_base/tool_title.h"
#ifdef _OPENMP
#include <omp.h>
#endif

namespace Base_Mixing
{
template void Broyden_Mixing::tem_push_data(Mixing_Data& mdata,
Expand All @@ -31,49 +34,53 @@ void Broyden_Mixing::tem_push_data(Mixing_Data& mdata,
{
const size_t length = mdata.length;
std::vector<FPTYPE> F_tmp(length);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

// to use omp, do not need to enclose with #ifdef ... #endif
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
F_tmp[i] = data_out[i] - data_in[i];
}

// get screened F
if (screen != nullptr)
if (screen != nullptr) {
screen(F_tmp.data());
}

// container::Tensor data = data_in + mixing_beta * F;
std::vector<FPTYPE> data(length);
mix(data.data(), data_in, F_tmp.data());

mdata.push(data.data());

if (!need_calcoef)
if (!need_calcoef) {
return;
}

if (address != &mdata && address != nullptr)
if (address != &mdata && address != nullptr) {
ModuleBase::WARNING_QUIT(
"Broyden_Mixing",
"One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients");
}

FPTYPE* FP_dF = static_cast<FPTYPE*>(dF);
FPTYPE* FP_F = static_cast<FPTYPE*>(F);
if (mdata.ndim_use == 1)
{
address = &mdata;
// allocate
if (F != nullptr)
if (F != nullptr) {
free(F);
}
F = malloc(sizeof(FPTYPE) * length);
FP_F = static_cast<FPTYPE*>(F);
if (dF != nullptr)
if (dF != nullptr) {
free(dF);
}
dF = malloc(sizeof(FPTYPE) * length * mixing_ndim);
FP_dF = static_cast<FPTYPE*>(dF);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
FP_F[i] = F_tmp[i];
Expand All @@ -83,9 +90,8 @@ void Broyden_Mixing::tem_push_data(Mixing_Data& mdata,
{
this->ndim_cal_dF = std::min(this->ndim_cal_dF + 1, this->mixing_ndim);
start_dF = (this->start_dF + 1) % this->mixing_ndim;
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
FP_F[i] = F_tmp[i];
Expand All @@ -107,9 +113,11 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
ModuleBase::TITLE("Charge_Mixing", "Simplified_Broyden_mixing");
ModuleBase::timer::tick("Charge", "Broyden_mixing");
if (address != &mdata && address != nullptr)
{
ModuleBase::WARNING_QUIT(
"Broyden_mixing",
"One Broyden_Mixing object can only bind one Mixing_Data object to calculate coefficients");
}
const int length = mdata.length;
FPTYPE* FP_dF = static_cast<FPTYPE*>(dF);
FPTYPE* FP_F = static_cast<FPTYPE*>(F);
Expand Down Expand Up @@ -152,8 +160,9 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
}
// solve aG = c
dsysv_(&uu, &ndim_cal_dF, &m, beta_tmp.c, &ndim_cal_dF, iwork, gamma.data(), &ndim_cal_dF, work, &ndim_cal_dF, &info);
if (info != 0)
if (info != 0) {
ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYSV.");
}
// after solving, gamma store the coeficients for mixing
coef[mdata.start] = 1 + gamma[dFindex_move(0)];
for (int i = 1; i < ndim_cal_dF; ++i)
Expand All @@ -171,9 +180,8 @@ void Broyden_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double
}

FPTYPE* dFnext = FP_dF + dFindex_move(1) * length;
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
dFnext[i] = FP_F[i];
Expand Down
35 changes: 18 additions & 17 deletions source/module_base/module_mixing/mixing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@ void Mixing::push_data(Mixing_Data& mdata,
data_out,
screen,
[this, length](double* out, const double* in, const double* sres) {
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 512)
#endif

#pragma omp parallel for schedule(static, 512)
for (int i = 0; i < length; ++i)
{
out[i] = in[i] + this->mixing_beta * sres[i];
Expand All @@ -42,9 +41,8 @@ void Mixing::push_data(Mixing_Data& mdata,
data_out,
screen,
[this, length](std::complex<double>* out, const std::complex<double>* in, const std::complex<double>* sres) {
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 256)
#endif

#pragma omp parallel for schedule(static, 256)
for (int i = 0; i < length; ++i)
{
out[i] = in[i] + this->mixing_beta * sres[i];
Expand All @@ -56,16 +54,17 @@ void Mixing::push_data(Mixing_Data& mdata,

void Mixing::mix_data(const Mixing_Data& mdata, double* data_mix)
{
if (mdata.length <= 0)
if (mdata.length <= 0) {
return;
}
double* FP_data = static_cast<double*>(mdata.data);
if (mdata.ndim_use == 1)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 512)
#endif
for (int i = 0; i < mdata.length; ++i)

#pragma omp parallel for schedule(static, 512)
for (int i = 0; i < mdata.length; ++i) {
data_mix[i] = FP_data[i];
}
return;
}
container::BlasConnector::gemv('N',
Expand All @@ -82,22 +81,24 @@ void Mixing::mix_data(const Mixing_Data& mdata, double* data_mix)
}
void Mixing::mix_data(const Mixing_Data& mdata, std::complex<double>* data_mix)
{
if (mdata.length <= 0)
if (mdata.length <= 0) {
return;
}
std::complex<double>* FP_data = static_cast<std::complex<double>*>(mdata.data);
if (mdata.ndim_use == 1)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 256)
#endif
for (int i = 0; i < mdata.length; ++i)

#pragma omp parallel for schedule(static, 256)
for (int i = 0; i < mdata.length; ++i) {
data_mix[i] = FP_data[i];
}
return;
}
// conver coef to complex
std::vector<std::complex<double>> coef_complex(coef.size());
for (int i = 0; i < coef.size(); ++i)
for (int i = 0; i < coef.size(); ++i) {
coef_complex[i] = coef[i];
}
container::BlasConnector::gemv('N',
mdata.length,
mdata.ndim_use,
Expand Down
8 changes: 6 additions & 2 deletions source/module_base/module_mixing/mixing_data.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,20 @@ Mixing_Data::Mixing_Data(const int& ndim, const int& length, const size_t& type_

Mixing_Data::~Mixing_Data()
{
if (this->data != nullptr)
if (this->data != nullptr) {
free(this->data);
this->data = nullptr;
}
}

void Mixing_Data::resize(const int& ndim, const int& length, const size_t& type_size)
{
this->ndim_tot = ndim;
this->length = length;
if (this->data != nullptr)
if (this->data != nullptr) {
free(this->data);
this->data = nullptr;
}
if (ndim * length > 0)
{
this->data = malloc(ndim * length * type_size);
Expand Down
20 changes: 8 additions & 12 deletions source/module_base/module_mixing/plain_mixing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,8 @@ void Plain_Mixing::tem_push_data(Mixing_Data& mdata,
{
const size_t length = mdata.length;
std::vector<FPTYPE> F_tmp(length);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
F_tmp[i] = data_out[i] - data_in[i];
Expand Down Expand Up @@ -67,9 +66,8 @@ void Plain_Mixing::simple_mix(FPTYPE* data_new,
{
if (screen == nullptr)
{
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int ig = 0; ig < length; ig++)
{
data_new[ig] = data_in[ig] + this->mixing_beta * (data_out[ig] - data_in[ig]);
Expand All @@ -78,17 +76,15 @@ void Plain_Mixing::simple_mix(FPTYPE* data_new,
else
{
std::vector<FPTYPE> F_tmp(length);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
F_tmp[i] = data_out[i] - data_in[i];
}
screen(F_tmp.data());
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
data_new[i] = data_in[i] + this->mixing_beta * F_tmp[i];
Expand Down
36 changes: 20 additions & 16 deletions source/module_base/module_mixing/pulay_mixing.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,44 +31,46 @@ void Pulay_Mixing::tem_push_data(Mixing_Data& mdata,
const size_t length = mdata.length;
std::vector<FPTYPE> F_tmp(length);

#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
F_tmp[i] = data_out[i] - data_in[i];
}

// get screened F
if (screen != nullptr)
if (screen != nullptr) {
screen(F_tmp.data());
}

// container::Tensor data = data_in + mixing_beta * F;
std::vector<FPTYPE> data(length);
mix(data.data(), data_in, F_tmp.data());

mdata.push(data.data());

if (!need_calcoef)
if (!need_calcoef) {
return;
}

if (address != &mdata && address != nullptr)
if (address != &mdata && address != nullptr) {
ModuleBase::WARNING_QUIT(
"Pulay_Mixing",
"One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients");
}

FPTYPE* FP_F = static_cast<FPTYPE*>(F);
if (mdata.ndim_use == 1)
{
address = &mdata;
// allocate
if (F != nullptr)
if (F != nullptr) {
free(F);
}
F = malloc(sizeof(FPTYPE) * length * mixing_ndim);
FP_F = static_cast<FPTYPE*>(F);
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
FP_F[i] = F_tmp[i];
Expand All @@ -78,9 +80,8 @@ void Pulay_Mixing::tem_push_data(Mixing_Data& mdata,
{
start_F = (this->start_F + 1) % this->mixing_ndim;
FPTYPE* FP_startF = FP_F + start_F * length;
#ifdef _OPENMP
#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
#endif

#pragma omp parallel for schedule(static, 4096 / sizeof(FPTYPE))
for (int i = 0; i < length; ++i)
{
FP_startF[i] = F_tmp[i];
Expand All @@ -99,10 +100,11 @@ void Pulay_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double(F
{
ModuleBase::TITLE("Charge_Mixing", "Pulay_mixing");
ModuleBase::timer::tick("Charge", "Pulay_mixing");
if (address != &mdata && address != nullptr)
if (address != &mdata && address != nullptr) {
ModuleBase::WARNING_QUIT(
"Pulay_mixing",
"One Pulay_Mixing object can only bind one Mixing_Data object to calculate coefficients");
}
const int length = mdata.length;
FPTYPE* FP_F = static_cast<FPTYPE*>(F);

Expand Down Expand Up @@ -137,11 +139,13 @@ void Pulay_Mixing::tem_cal_coef(const Mixing_Data& mdata, std::function<double(F
char uu = 'U';
int info;
dsytrf_(&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &ndim_use, &info);
if (info != 0)
if (info != 0) {
ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when factorizing beta.");
}
dsytri_(&uu, &ndim_use, beta_tmp.c, &ndim_use, iwork, work, &info);
if (info != 0)
if (info != 0) {
ModuleBase::WARNING_QUIT("Charge_Mixing", "Error when DSYTRI beta.");
}
for (int i = 0; i < ndim_use; ++i)
{
for (int j = i + 1; j < ndim_use; ++j)
Expand Down
1 change: 0 additions & 1 deletion source/module_base/module_mixing/test/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
remove_definitions(-D__MPI)
AddTest(
TARGET test_mixing
LIBS parameter base device ${math_libs}
Expand Down
Loading
Loading