forked from mlxd/GPUE
-
Notifications
You must be signed in to change notification settings - Fork 9
Open
Description
It seems like the Ax and Ay arrays are somehow mixed up in GPUE, and the current master branch does not evolve correctly with rotation.
The following apply_gauge(...) function works:
// 3D
void apply_gauge(Grid &par, double2 *wfc, double2 *Ax, double2 *Ay,
double2 *Az, double renorm_factor_x,
double renorm_factor_y, double renorm_factor_z, bool flip,
cufftHandle plan_1d, cufftHandle plan_dim2,
cufftHandle plan_dim3, double dx, double dy, double dz,
double time, int yDim, int size){
dim3 grid = par.grid;
dim3 threads = par.threads;
if (flip){
// 1d forward / mult by Ay
cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc);
cudaCheckError();
if(par.bval("Ay_time")){
EqnNode_gpu* Ay_eqn = par.astval("Ay");
int e_num = par.ival("Ay_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Ay_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ay, wfc);
cudaCheckError();
}
cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc);
cudaCheckError();
// loop to multiply by Ax
for (int i = 0; i < yDim; i++){
cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size],
&wfc[i*size], CUFFT_FORWARD) );
}
scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc);
cudaCheckError();
if(par.bval("Ax_time")){
EqnNode_gpu* Ax_eqn = par.astval("Ax");
int e_num = par.ival("Ax_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Ax_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ax, wfc);
cudaCheckError();
}
for (int i = 0; i < yDim; i++){
//size = xDim * zDim;
cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size],
&wfc[i*size], CUFFT_INVERSE) );
}
scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc);
cudaCheckError();
// 1D FFT to Az
cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc);
cudaCheckError();
if(par.bval("Az_time")){
EqnNode_gpu* Az_eqn = par.astval("Az");
int e_num = par.ival("Az_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Az_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Az, wfc);
cudaCheckError();
}
cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc);
cudaCheckError();
}
else{
// 1D FFT to Az
cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_FORWARD) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc);
cudaCheckError();
if(par.bval("Az_time")){
EqnNode_gpu* Az_eqn = par.astval("Az");
int e_num = par.ival("Az_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Az_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Az, wfc);
cudaCheckError();
}
cufftHandleError( cufftExecZ2Z(plan_dim3, wfc, wfc, CUFFT_INVERSE) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_z, wfc);
cudaCheckError();
// loop to multiply by Ax
for (int i = 0; i < yDim; i++){
cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size],
&wfc[i*size], CUFFT_FORWARD) );
}
scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc);
cudaCheckError();
if(par.bval("Ax_time")){
EqnNode_gpu* Ax_eqn = par.astval("Ax");
int e_num = par.ival("Ax_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Ax_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ax, wfc);
cudaCheckError();
}
for (int i = 0; i < yDim; i++){
//size = xDim * zDim;
cufftHandleError( cufftExecZ2Z(plan_dim2, &wfc[i*size],
&wfc[i*size], CUFFT_INVERSE) );
}
scalarMult<<<grid,threads>>>(wfc, renorm_factor_x, wfc);
cudaCheckError();
// 1d forward / mult by Ay
cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_FORWARD) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc);
cudaCheckError();
if(par.bval("Ay_time")){
EqnNode_gpu* Ay_eqn = par.astval("Ay");
int e_num = par.ival("Ay_num");
ast_cmult<<<grid,threads>>>(wfc, wfc, Ay_eqn, dx, dy, dz,
time, e_num);
cudaCheckError();
}
else{
cMult<<<grid,threads>>>(wfc, (cufftDoubleComplex*) Ay, wfc);
cudaCheckError();
}
cufftHandleError( cufftExecZ2Z(plan_1d, wfc, wfc, CUFFT_INVERSE) );
scalarMult<<<grid,threads>>>(wfc, renorm_factor_y, wfc);
cudaCheckError();
}
}Note that the dim2 plan is for some reason for the Ax operator instead of Ay. I am looking into this now.
These changes are on the 3d_rot_fix branch.
Metadata
Metadata
Assignees
Labels
No labels