You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
465 lines
15 KiB
465 lines
15 KiB
/* ========================================================================== */
|
|
/* === KLU_tsolve =========================================================== */
|
|
/* ========================================================================== */
|
|
|
|
/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze
|
|
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
|
|
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
|
|
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
|
|
* Numeric->Iwork).
|
|
*/
|
|
|
|
#include "klu_internal.h"
|
|
|
|
Int KLU_tsolve
|
|
(
|
|
/* inputs, not modified */
|
|
KLU_symbolic *Symbolic,
|
|
KLU_numeric *Numeric,
|
|
Int d, /* leading dimension of B */
|
|
Int nrhs, /* number of right-hand-sides */
|
|
|
|
/* right-hand-side on input, overwritten with solution to Ax=b on output */
|
|
double B [ ], /* size n*nrhs, in column-oriented form, with
|
|
* leading dimension d. */
|
|
#ifdef COMPLEX
|
|
Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for
|
|
* array transpose solve. Used for the complex
|
|
* case only. */
|
|
#endif
|
|
/* --------------- */
|
|
KLU_common *Common
|
|
)
|
|
{
|
|
Entry x [4], offik, s ;
|
|
double rs, *Rs ;
|
|
Entry *Offx, *X, *Bz, *Udiag ;
|
|
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
|
|
Unit **LUbx ;
|
|
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
/* check inputs */
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
if (Common == NULL)
|
|
{
|
|
return (FALSE) ;
|
|
}
|
|
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
|
|
B == NULL)
|
|
{
|
|
Common->status = KLU_INVALID ;
|
|
return (FALSE) ;
|
|
}
|
|
Common->status = KLU_OK ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
/* get the contents of the Symbolic object */
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
Bz = (Entry *) B ;
|
|
n = Symbolic->n ;
|
|
nblocks = Symbolic->nblocks ;
|
|
Q = Symbolic->Q ;
|
|
R = Symbolic->R ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
/* get the contents of the Numeric object */
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
ASSERT (nblocks == Numeric->nblocks) ;
|
|
Pnum = Numeric->Pnum ;
|
|
Offp = Numeric->Offp ;
|
|
Offi = Numeric->Offi ;
|
|
Offx = (Entry *) Numeric->Offx ;
|
|
|
|
Lip = Numeric->Lip ;
|
|
Llen = Numeric->Llen ;
|
|
Uip = Numeric->Uip ;
|
|
Ulen = Numeric->Ulen ;
|
|
LUbx = (Unit **) Numeric->LUbx ;
|
|
Udiag = Numeric->Udiag ;
|
|
|
|
Rs = Numeric->Rs ;
|
|
X = (Entry *) Numeric->Xwork ;
|
|
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
|
|
|
|
/* ---------------------------------------------------------------------- */
|
|
/* solve in chunks of 4 columns at a time */
|
|
/* ---------------------------------------------------------------------- */
|
|
|
|
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
|
|
{
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
/* get the size of the current chunk */
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
nr = MIN (nrhs - chunk, 4) ;
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
/* permute the right hand side, X = Q'*B */
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
switch (nr)
|
|
{
|
|
|
|
case 1:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
X [k] = Bz [Q [k]] ;
|
|
}
|
|
break ;
|
|
|
|
case 2:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Q [k] ;
|
|
X [2*k ] = Bz [i ] ;
|
|
X [2*k + 1] = Bz [i + d ] ;
|
|
}
|
|
break ;
|
|
|
|
case 3:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Q [k] ;
|
|
X [3*k ] = Bz [i ] ;
|
|
X [3*k + 1] = Bz [i + d ] ;
|
|
X [3*k + 2] = Bz [i + d*2] ;
|
|
}
|
|
break ;
|
|
|
|
case 4:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Q [k] ;
|
|
X [4*k ] = Bz [i ] ;
|
|
X [4*k + 1] = Bz [i + d ] ;
|
|
X [4*k + 2] = Bz [i + d*2] ;
|
|
X [4*k + 3] = Bz [i + d*3] ;
|
|
}
|
|
break ;
|
|
|
|
}
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
/* solve X = (L*U + Off)'\X */
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
for (block = 0 ; block < nblocks ; block++)
|
|
{
|
|
|
|
/* -------------------------------------------------------------- */
|
|
/* the block of size nk is from rows/columns k1 to k2-1 */
|
|
/* -------------------------------------------------------------- */
|
|
|
|
k1 = R [block] ;
|
|
k2 = R [block+1] ;
|
|
nk = k2 - k1 ;
|
|
PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
|
|
|
|
/* -------------------------------------------------------------- */
|
|
/* block back-substitution for the off-diagonal-block entries */
|
|
/* -------------------------------------------------------------- */
|
|
|
|
if (block > 0)
|
|
{
|
|
switch (nr)
|
|
{
|
|
|
|
case 1:
|
|
|
|
for (k = k1 ; k < k2 ; k++)
|
|
{
|
|
pend = Offp [k+1] ;
|
|
for (p = Offp [k] ; p < pend ; p++)
|
|
{
|
|
#ifdef COMPLEX
|
|
if (conj_solve)
|
|
{
|
|
MULT_SUB_CONJ (X [k], X [Offi [p]],
|
|
Offx [p]) ;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
MULT_SUB (X [k], Offx [p], X [Offi [p]]) ;
|
|
}
|
|
}
|
|
}
|
|
break ;
|
|
|
|
case 2:
|
|
|
|
for (k = k1 ; k < k2 ; k++)
|
|
{
|
|
pend = Offp [k+1] ;
|
|
x [0] = X [2*k ] ;
|
|
x [1] = X [2*k + 1] ;
|
|
for (p = Offp [k] ; p < pend ; p++)
|
|
{
|
|
i = Offi [p] ;
|
|
#ifdef COMPLEX
|
|
if (conj_solve)
|
|
{
|
|
CONJ (offik, Offx [p]) ;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
offik = Offx [p] ;
|
|
}
|
|
MULT_SUB (x [0], offik, X [2*i]) ;
|
|
MULT_SUB (x [1], offik, X [2*i + 1]) ;
|
|
}
|
|
X [2*k ] = x [0] ;
|
|
X [2*k + 1] = x [1] ;
|
|
}
|
|
break ;
|
|
|
|
case 3:
|
|
|
|
for (k = k1 ; k < k2 ; k++)
|
|
{
|
|
pend = Offp [k+1] ;
|
|
x [0] = X [3*k ] ;
|
|
x [1] = X [3*k + 1] ;
|
|
x [2] = X [3*k + 2] ;
|
|
for (p = Offp [k] ; p < pend ; p++)
|
|
{
|
|
i = Offi [p] ;
|
|
#ifdef COMPLEX
|
|
if (conj_solve)
|
|
{
|
|
CONJ (offik, Offx [p]) ;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
offik = Offx [p] ;
|
|
}
|
|
MULT_SUB (x [0], offik, X [3*i]) ;
|
|
MULT_SUB (x [1], offik, X [3*i + 1]) ;
|
|
MULT_SUB (x [2], offik, X [3*i + 2]) ;
|
|
}
|
|
X [3*k ] = x [0] ;
|
|
X [3*k + 1] = x [1] ;
|
|
X [3*k + 2] = x [2] ;
|
|
}
|
|
break ;
|
|
|
|
case 4:
|
|
|
|
for (k = k1 ; k < k2 ; k++)
|
|
{
|
|
pend = Offp [k+1] ;
|
|
x [0] = X [4*k ] ;
|
|
x [1] = X [4*k + 1] ;
|
|
x [2] = X [4*k + 2] ;
|
|
x [3] = X [4*k + 3] ;
|
|
for (p = Offp [k] ; p < pend ; p++)
|
|
{
|
|
i = Offi [p] ;
|
|
#ifdef COMPLEX
|
|
if (conj_solve)
|
|
{
|
|
CONJ(offik, Offx [p]) ;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
offik = Offx [p] ;
|
|
}
|
|
MULT_SUB (x [0], offik, X [4*i]) ;
|
|
MULT_SUB (x [1], offik, X [4*i + 1]) ;
|
|
MULT_SUB (x [2], offik, X [4*i + 2]) ;
|
|
MULT_SUB (x [3], offik, X [4*i + 3]) ;
|
|
}
|
|
X [4*k ] = x [0] ;
|
|
X [4*k + 1] = x [1] ;
|
|
X [4*k + 2] = x [2] ;
|
|
X [4*k + 3] = x [3] ;
|
|
}
|
|
break ;
|
|
}
|
|
}
|
|
|
|
/* -------------------------------------------------------------- */
|
|
/* solve the block system */
|
|
/* -------------------------------------------------------------- */
|
|
|
|
if (nk == 1)
|
|
{
|
|
#ifdef COMPLEX
|
|
if (conj_solve)
|
|
{
|
|
CONJ (s, Udiag [k1]) ;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
s = Udiag [k1] ;
|
|
}
|
|
switch (nr)
|
|
{
|
|
|
|
case 1:
|
|
DIV (X [k1], X [k1], s) ;
|
|
break ;
|
|
|
|
case 2:
|
|
DIV (X [2*k1], X [2*k1], s) ;
|
|
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
|
|
break ;
|
|
|
|
case 3:
|
|
DIV (X [3*k1], X [3*k1], s) ;
|
|
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
|
|
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
|
|
break ;
|
|
|
|
case 4:
|
|
DIV (X [4*k1], X [4*k1], s) ;
|
|
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
|
|
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
|
|
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
|
|
break ;
|
|
|
|
}
|
|
}
|
|
else
|
|
{
|
|
KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block],
|
|
Udiag + k1, nr,
|
|
#ifdef COMPLEX
|
|
conj_solve,
|
|
#endif
|
|
X + nr*k1) ;
|
|
KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
|
|
#ifdef COMPLEX
|
|
conj_solve,
|
|
#endif
|
|
X + nr*k1) ;
|
|
}
|
|
}
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
/* scale and permute the result, Bz = P'(R\X) */
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
if (Rs == NULL)
|
|
{
|
|
|
|
/* no scaling */
|
|
switch (nr)
|
|
{
|
|
|
|
case 1:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
Bz [Pnum [k]] = X [k] ;
|
|
}
|
|
break ;
|
|
|
|
case 2:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
Bz [i ] = X [2*k ] ;
|
|
Bz [i + d ] = X [2*k + 1] ;
|
|
}
|
|
break ;
|
|
|
|
case 3:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
Bz [i ] = X [3*k ] ;
|
|
Bz [i + d ] = X [3*k + 1] ;
|
|
Bz [i + d*2] = X [3*k + 2] ;
|
|
}
|
|
break ;
|
|
|
|
case 4:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
Bz [i ] = X [4*k ] ;
|
|
Bz [i + d ] = X [4*k + 1] ;
|
|
Bz [i + d*2] = X [4*k + 2] ;
|
|
Bz [i + d*3] = X [4*k + 3] ;
|
|
}
|
|
break ;
|
|
}
|
|
|
|
}
|
|
else
|
|
{
|
|
|
|
switch (nr)
|
|
{
|
|
|
|
case 1:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ;
|
|
}
|
|
break ;
|
|
|
|
case 2:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
rs = Rs [k] ;
|
|
SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ;
|
|
}
|
|
break ;
|
|
|
|
case 3:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
rs = Rs [k] ;
|
|
SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ;
|
|
}
|
|
break ;
|
|
|
|
case 4:
|
|
|
|
for (k = 0 ; k < n ; k++)
|
|
{
|
|
i = Pnum [k] ;
|
|
rs = Rs [k] ;
|
|
SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ;
|
|
SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ;
|
|
}
|
|
break ;
|
|
}
|
|
}
|
|
|
|
/* ------------------------------------------------------------------ */
|
|
/* go to the next chunk of B */
|
|
/* ------------------------------------------------------------------ */
|
|
|
|
Bz += d*4 ;
|
|
}
|
|
return (TRUE) ;
|
|
}
|