Skip to content

Commit e059134

Browse files
committed
Add one debug mode by directly solving reordered S
, the results of which are identical to ref. Also solved S directly after eliminating e_vol. There should be one bug in eliminating e_vol
1 parent 280daa6 commit e059134

File tree

4 files changed

+98
-16
lines changed

4 files changed

+98
-16
lines changed

src/generateStiff.cpp

+4-3
Original file line numberDiff line numberDiff line change
@@ -658,9 +658,10 @@ int reference(fdtdMesh *sys, int freqNo, myint *RowId, myint *ColId, double *val
658658

659659
pardisoinit(pt, &mtype, iparm);
660660
iparm[38] = 1;
661-
iparm[34] = 1; // 0-based indexing
662-
iparm[3] = 2; // number of processors
663-
//iparm[59] = 2; // out of core version to solve very large problem
661+
iparm[34] = 1; // 0-based indexing
662+
iparm[3] = 0; /* No iterative-direct algorithm */
663+
//iparm[3] = 2; // CGS iteration for symmetric positive definite matrices replaces the computation of LLT
664+
//iparm[59] = 2; // out of core version to solve very large problem
664665
//iparm[10] = 0; /* Use nonsymmetric permutation and scaling MPS */
665666

666667
//cout << "Begin to solve (-w^2*D_eps+iwD_sig+S)x=-iwJ\n";

src/mapIndex.hpp

+4-4
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,18 @@
22
#define GDS2PARA_MAP_INDEX_H_
33

44
#include "fdtd.hpp"
5-
//#include "matrixTypeDef.hpp"
65

76
class mapIndex {
87
public:
98
// num of bricks Nx*Ny*Nz
109
myint Nx, Ny, Nz;
1110
myint N_totEdges; // number of all {e} or edges without PEC removal
11+
myint N_totEdges_rmPEC; // number of all {e} or edges after PEC removal
1212

1313
// num of edges after removing PEC
1414
myint N_edgesAtPEC; // number of {e} or edges at PEC surfaces
1515
myint N_surfExEz_rmPEC; // number of {e}_surface at one surface e.g. 0s, mode (growY, removed PEC)
16-
myint N_volEy_rmPEC; // number of {e}_volumn at one layer, e.g. 0v, mode (growY, removed PEC)
16+
myint N_volEy_rmPEC; // number of {e}_volume at one layer, e.g. 0v, mode (growY, removed PEC)
1717

1818
// Upper PEC edge indices (z=zmax) and lower PEC edge indices (z=zmin), index mode (growY, no PEC removal)
1919
unordered_set<myint> edges_upperPEC;
@@ -54,13 +54,13 @@ class mapIndex {
5454
this->N_surfExEz_rmPEC -= Nx; // remove Ex at PEC
5555
this->N_volEy_rmPEC -= (Nx + 1); // remove Ey at PEC
5656
#endif
57-
57+
this->N_totEdges_rmPEC = this->N_totEdges - this->N_edgesAtPEC;
5858
}
5959

6060
// Set global {e} index map between mode (growZ, no PEC removal) and mode (growY, no PEC removal)
6161
void setEdgeMap_growZgrowY() {
6262
/* The index follows y - x - z ordering, and {e} is stacked layer by layer as
63-
{e_surface, e_volumn}.T at each layer. For the two cases here:
63+
{e_surface, e_volume}.T at each layer. For the two cases here:
6464
- case 1 ~ grow along Z: {e} = {e_s, | e_v}.T = {ey,ex, | ez}.T, in which vector
6565
{ey} first runs through y for each x, then runs through x as by
6666
y - x - z ordering. Same for {ex} ~y->x and {ez} ~y->x.

src/matrixTypeDef.hpp

+21-1
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,26 @@ class denseFormatOfMatrix {
158158
exit(2);
159159
}
160160

161+
//// Refines the solution C = A_LU \ B and estimate its error (Error < 1e-12, negligible at this step)
162+
//vector<MKL_Complex16> A_mklComplex(this->matrixSize);
163+
//this->copyToMKL_Complex16(A_mklComplex.data()); // copy matrix A to a MKL_Complex16 type A_mklComplex
164+
//vector<MKL_Complex16> B_mklComplex(B.matrixSize);
165+
//B.copyToMKL_Complex16(B_mklComplex.data()); // copy matrix B to a MKL_Complex16 type B_mklComplex
166+
//vector<double> ferr(B.N_cols, 0.0), berr(B.N_cols, 0.0);
167+
//info = LAPACKE_zgerfs(LAPACK_COL_MAJOR, 'N',
168+
// B.N_rows, B.N_cols, // n & nrhs
169+
// A_mklComplex.data(), B.N_rows, // original A in MKL_Complex16 type
170+
// A_LU.data(), B.N_rows, // A_LU
171+
// ipiv.data(), // pivot indices of A_LU
172+
// B_mklComplex.data(), B.N_rows, // B
173+
// C_mklComplex.data(), B.N_rows, // C
174+
// ferr.data(), berr.data() // forward and backward errors for each solution vector
175+
//);
176+
//if (info != 0) {
177+
// cout << "Issue on backlash refinement, LAPACKE_?gerfs returns: " << info << endl;
178+
// exit(2);
179+
//}
180+
161181
// Store the solution at denseFormatOfMatrix
162182
denseFormatOfMatrix C(B.N_rows, B.N_cols);
163183
C.copyFromMKL_Complex16(C_mklComplex.data()); // copy C.vals (complex<double>) from MKL_Complex16
@@ -277,7 +297,7 @@ denseFormatOfMatrix csrFormatOfMatrix::backslashDense(const denseFormatOfMatrix
277297
pardiso(pt, &maxfct, &mnum, &mtype, &phase, &(this->N_rows), this->vals, this->rows, this->cols,
278298
&perm, &nrhs, iparm, &msglvl, (complex<double>*)denseB21B23.vals.data(), denseD0sD1s.vals.data(), &error);
279299
if (error != 0) {
280-
printf("\nERROR during numerical factorization: %d", error);
300+
printf("\nERROR during PARDISO backslash: %d", error);
281301
exit(2);
282302
}
283303

src/pardisoSolver.hpp

+69-8
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
#include "fdtd.hpp"
99
#include "matrixTypeDef.hpp"
1010
#include "mapIndex.hpp"
11+
//#define DEBUG_SOLVE_REORDERED_S // debug mode: directly solve the entire reordered S (growZ, rmPEC)
1112

1213
// Compute y = alpha * A * x + beta * y and store computed dense matrix in y
1314
int csrMultiplyDense(const sparse_matrix_t &csrA_mklHandle, // mkl handle of csr A
@@ -56,11 +57,11 @@ int eliminateVolumE(const vector<BlockType> &layerS, myint N_surfE, myint N_volE
5657
Inputs:
5758
layerS: a vector containing 9 blocks of the whole matrix S, ~ within one layer
5859
N_surfE: number of {e}_surface at one surface, e.g. 0s
59-
N_volE: number of {e}_volumn at one layer, e.g. 0v
60+
N_volE: number of {e}_volume at one layer, e.g. 0v
6061
Output:
6162
preducedS: pointing to memory of a vector storing 4 reduced dense blocks as {C11, C12, C21, C22}
6263
63-
Each isolated layer here contains 2 surfaces and 1 middle volumn e, namely 0s-0v-1s.
64+
Each isolated layer here contains 2 surfaces and 1 middle volume e, namely 0s-0v-1s.
6465
A symbolic form is (each number represents the blockId in vector<BlockType>):
6566
6667
0s 0v 1s 0s 1s
@@ -219,21 +220,32 @@ vector<myint> findSurfLocationOfPort(fdtdMesh *psys) {
219220
returned surfLocationOfPort = { 0, 2 } */
220221

221222
vector<myint> surfLocationOfPort;
223+
double dyErrorUpperBound = (psys->yn[1] - psys->yn[0]) * MINDISFRACY; // used to compare two equal y coordinates
222224

223225
for (myint sourcePort = 0; sourcePort < psys->numPorts; sourcePort++) {
224226
double y_thisPort = psys->portCoor[sourcePort].y1[0];
225227
for (myint indy = 0; indy < psys->N_cell_y + 1; indy++) { // search over all y nodes
226-
if (y_thisPort == psys->yn[indy]) {
228+
if (fabs(y_thisPort - psys->yn[indy]) < dyErrorUpperBound) {
227229
surfLocationOfPort.push_back(indy);
228230
break;
229231
}
230232
}
231233
}
232234

235+
if (surfLocationOfPort.size() != psys->numPorts) {
236+
cout << "ERROR! Missed some ports." << endl;
237+
exit(2);
238+
}
239+
233240
// Sort vector and erase duplicated surface index
234241
sort(surfLocationOfPort.begin(), surfLocationOfPort.end());
235242
surfLocationOfPort.erase( unique(surfLocationOfPort.begin(), surfLocationOfPort.end()), surfLocationOfPort.end() );
236243

244+
if (surfLocationOfPort.size() != psys->numPorts) {
245+
cout << "ERROR! Unsupported port setting! Ports should not locate at the same surface" << endl;
246+
exit(2);
247+
}
248+
237249
return surfLocationOfPort;
238250
}
239251

@@ -248,14 +260,46 @@ denseFormatOfMatrix cascadeMatrixS(fdtdMesh *psys, double omegaHz, const mapInde
248260
Return:
249261
- cascadedS: matrix "(-w^2*D_eps+iw*D_sig+ShSe/mu)" with only {e}_portSurf left */
250262

251-
252-
253263
// Num of {e} at each surface or each layer, index mode (growY, removed PEC)
254264
myint n_surfExEz = indexMap.N_surfExEz_rmPEC;
255265
myint n_volEy = indexMap.N_volEy_rmPEC;
256266
myint n_layerE_growY = n_surfExEz + n_volEy;
257267
myint N_layers = psys->N_cell_y; // num of layers
258268

269+
#ifdef DEBUG_SOLVE_REORDERED_S
270+
BlockType coo_reorderedS(psys->leng_S);
271+
for (myint i_nnz = 0; i_nnz < psys->leng_S; i_nnz++) {
272+
// (rowId, colId, val) of this nnz element in ShSe/mu, index mode (growZ, removed PEC)
273+
myint nnzS_rowId_rmPECz = psys->SRowId[i_nnz];
274+
myint nnzS_colId_rmPECz = psys->SColId[i_nnz];
275+
complex<double> cascadedS_val = psys->Sval[i_nnz];
276+
277+
// Map row and col index from (growZ, removed PEC) to (growY, removed PEC)
278+
myint nnzS_rowId = indexMap.eInd_map_rmPEC_z2y[nnzS_rowId_rmPECz];
279+
myint nnzS_colId = indexMap.eInd_map_rmPEC_z2y[nnzS_colId_rmPECz];
280+
281+
// For diagonal nnz, add "-w^2*eps+iw*sig" to psys->Sval (ShSe/mu)
282+
if (nnzS_rowId == nnzS_colId) { // if at diagonal
283+
myint nnzS_rowId_growZ = psys->mapEdgeR[nnzS_rowId_rmPECz];
284+
myint layerInd_alongZ = (nnzS_rowId_growZ + psys->N_edge_v) / (psys->N_edge_s + psys->N_edge_v);
285+
double epsi_thisnnz = psys->stackEpsn[layerInd_alongZ] * EPSILON0;
286+
double sigma_thisnnz = 0;
287+
if (psys->markEdge[nnzS_rowId_growZ] != 0) {
288+
sigma_thisnnz = SIGMA;
289+
} // if inside a conductor
290+
291+
complex<double> epsi_sigma = { -omegaHz*omegaHz*epsi_thisnnz, omegaHz*sigma_thisnnz };
292+
cascadedS_val += epsi_sigma; // -w^2*D_eps+iw*D_sig+ShSe/mu
293+
}
294+
coo_reorderedS.push_back({ nnzS_rowId, nnzS_colId, cascadedS_val });
295+
}
296+
297+
denseFormatOfMatrix denseS_reordered(indexMap.N_totEdges_rmPEC, indexMap.N_totEdges_rmPEC);
298+
denseS_reordered.convertBlockTypeToDense(coo_reorderedS);
299+
return denseS_reordered;
300+
301+
#endif
302+
259303
// Determine the block id of each nnz element of S and store in corresponding block matrix
260304
vector< BlockType > Blocks(8 * N_layers + 1); // store all bolck matrices of S in a 2D vector
261305
for (myint i_nnz = 0; i_nnz < psys->leng_S; i_nnz++) {
@@ -320,7 +364,7 @@ denseFormatOfMatrix cascadeMatrixS(fdtdMesh *psys, double omegaHz, const mapInde
320364
}
321365
}
322366

323-
// Eliminate e_volumn at each layer and store 4 reduced dense blocks of each layer
367+
// Eliminate e_volume at each layer and store 4 reduced dense blocks of each layer
324368
vector<vector<denseFormatOfMatrix>> surfSurfBlocks(N_layers); // N_layers*4 dense blocks
325369
for (myint i_layer = 0; i_layer < N_layers; i_layer++) {
326370
// Init and allocate memory for 4 dense blocks of each layer
@@ -465,9 +509,13 @@ denseFormatOfMatrix assignRhsJForAllPorts(fdtdMesh *psys, double omegaHz, const
465509
}
466510
}
467511

512+
#ifdef DEBUG_SOLVE_REORDERED_S
513+
return RhsJ_SI;
514+
#endif
515+
468516
// Only keep -iw{j} at port surfaces (cascading)
469517
myint N_surfExEz = indexMap.N_surfExEz_rmPEC; // number of edges at one surface
470-
myint n_volEy = indexMap.N_volEy_rmPEC; // number of volumn edges in one layer
518+
myint n_volEy = indexMap.N_volEy_rmPEC; // number of volume edges in one layer
471519
myint N_allCascadedEdges = psys->numPorts * N_surfExEz; // number of all edges in kept surfaces
472520
denseFormatOfMatrix cascadedRhsJ_SI(N_allCascadedEdges, psys->numPorts); // -iw{j} only at port surfaces
473521
vector<myint> surfLocationOfPort = findSurfLocationOfPort(psys);
@@ -500,7 +548,7 @@ denseFormatOfMatrix reconstruct_e(fdtdMesh *psys, const mapIndex &indexMap, cons
500548
- eField_oneExcit: of size (psys->N_edge - psys->bden) * 1, mode (growZ, removed PEC) */
501549

502550
myint N_surfExEz = indexMap.N_surfExEz_rmPEC; // number of edges at one surface
503-
myint n_volEy = indexMap.N_volEy_rmPEC; // number of volumn edges in one layer
551+
myint n_volEy = indexMap.N_volEy_rmPEC; // number of volume edges in one layer
504552
myint N_allCascadedEdges = psys->numPorts * N_surfExEz; // number of all edges in kept surfaces
505553
myint N_edgesNotAtPEC = psys->N_edge - psys->bden; // num of all edges after removing PEC
506554
vector<myint> surfLocationOfPort = findSurfLocationOfPort(psys); // surf index of the port's location
@@ -524,6 +572,19 @@ denseFormatOfMatrix reconstruct_e(fdtdMesh *psys, const mapIndex &indexMap, cons
524572
}
525573
}
526574

575+
#ifdef DEBUG_SOLVE_REORDERED_S
576+
for (myint e_ind = 0; e_ind < N_allCascadedEdges; e_ind++) {
577+
myint eInd_cascaded = e_ind + excitedPort * N_allCascadedEdges;
578+
579+
myint eInd_growY = indexMap.eInd_map_rmPEC2y[e_ind]; // Step 1: -> index at (growY, no PEC removal)
580+
myint eInd_growZ = indexMap.eInd_map_y2z[eInd_growY]; // Step 2: map (growY, no PEC removal) to (growZ, no PEC removal)
581+
myint eInd_growZrmPEC = psys->mapEdge[eInd_growZ]; // Step 3: map (growZ, no PEC removal) to (growZ, removed PEC)
582+
583+
eField_oneExcit.vals[eInd_growZrmPEC] = cascadedeField_SI.vals[eInd_cascaded];
584+
}
585+
return eField_oneExcit;
586+
#endif
587+
527588
return eField_oneExcit;
528589
}
529590

0 commit comments

Comments
 (0)