You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 
 

400 lines
13 KiB

/* ========================================================================== */
/* === KLU_solve ============================================================ */
/* ========================================================================== */
/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is
* performed. Uses Numeric->Xwork as workspace (undefined on input and output),
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with
* Numeric->Iwork).
*/
#include "klu_internal.h"
Int KLU_solve
(
/* inputs, not modified */
KLU_symbolic *Symbolic,
KLU_numeric *Numeric,
Int d, /* leading dimension of B */
Int nrhs, /* number of right-hand-sides */
/* right-hand-side on input, overwritten with solution to Ax=b on output */
double B [ ], /* size n*nrhs, in column-oriented form, with
* leading dimension d. */
/* --------------- */
KLU_common *Common
)
{
Entry x [4], offik, s ;
double rs, *Rs ;
Entry *Offx, *X, *Bz, *Udiag ;
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ;
Unit **LUbx ;
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ;
/* ---------------------------------------------------------------------- */
/* check inputs */
/* ---------------------------------------------------------------------- */
if (Common == NULL)
{
return (FALSE) ;
}
if (Common->status == KLU_EMPTY_MATRIX)
{
return (FALSE) ;
}
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 ||
B == NULL)
{
Common->status = KLU_INVALID ;
return (FALSE) ;
}
Common->status = KLU_OK ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Symbolic object */
/* ---------------------------------------------------------------------- */
Bz = (Entry *) B ;
n = Symbolic->n ;
nblocks = Symbolic->nblocks ;
Q = Symbolic->Q ;
R = Symbolic->R ;
/* ---------------------------------------------------------------------- */
/* get the contents of the Numeric object */
/* ---------------------------------------------------------------------- */
ASSERT (nblocks == Numeric->nblocks) ;
Pnum = Numeric->Pnum ;
Offp = Numeric->Offp ;
Offi = Numeric->Offi ;
Offx = (Entry *) Numeric->Offx ;
Lip = Numeric->Lip ;
Llen = Numeric->Llen ;
Uip = Numeric->Uip ;
Ulen = Numeric->Ulen ;
LUbx = (Unit **) Numeric->LUbx ;
Udiag = Numeric->Udiag ;
Rs = Numeric->Rs ;
X = (Entry *) Numeric->Xwork ;
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ;
/* ---------------------------------------------------------------------- */
/* solve in chunks of 4 columns at a time */
/* ---------------------------------------------------------------------- */
for (chunk = 0 ; chunk < nrhs ; chunk += 4)
{
/* ------------------------------------------------------------------ */
/* get the size of the current chunk */
/* ------------------------------------------------------------------ */
nr = MIN (nrhs - chunk, 4) ;
/* ------------------------------------------------------------------ */
/* scale and permute the right hand side, X = P*(R\B) */
/* ------------------------------------------------------------------ */
if (Rs == NULL)
{
/* no scaling */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
X [k] = Bz [Pnum [k]] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [2*k ] = Bz [i ] ;
X [2*k + 1] = Bz [i + d ] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [3*k ] = Bz [i ] ;
X [3*k + 1] = Bz [i + d ] ;
X [3*k + 2] = Bz [i + d*2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
X [4*k ] = Bz [i ] ;
X [4*k + 1] = Bz [i + d ] ;
X [4*k + 2] = Bz [i + d*2] ;
X [4*k + 3] = Bz [i + d*3] ;
}
break ;
}
}
else
{
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ;
SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Pnum [k] ;
rs = Rs [k] ;
SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ;
SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ;
}
break ;
}
}
/* ------------------------------------------------------------------ */
/* solve X = (L*U + Off)\X */
/* ------------------------------------------------------------------ */
for (block = nblocks-1 ; block >= 0 ; block--)
{
/* -------------------------------------------------------------- */
/* the block of size nk is from rows/columns k1 to k2-1 */
/* -------------------------------------------------------------- */
k1 = R [block] ;
k2 = R [block+1] ;
nk = k2 - k1 ;
PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ;
/* solve the block system */
if (nk == 1)
{
s = Udiag [k1] ;
switch (nr)
{
case 1:
DIV (X [k1], X [k1], s) ;
break ;
case 2:
DIV (X [2*k1], X [2*k1], s) ;
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ;
break ;
case 3:
DIV (X [3*k1], X [3*k1], s) ;
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ;
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ;
break ;
case 4:
DIV (X [4*k1], X [4*k1], s) ;
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ;
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ;
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ;
break ;
}
}
else
{
KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr,
X + nr*k1) ;
KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block],
Udiag + k1, nr, X + nr*k1) ;
}
/* -------------------------------------------------------------- */
/* block back-substitution for the off-diagonal-block entries */
/* -------------------------------------------------------------- */
if (block > 0)
{
switch (nr)
{
case 1:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [k] ;
for (p = Offp [k] ; p < pend ; p++)
{
MULT_SUB (X [Offi [p]], Offx [p], x [0]) ;
}
}
break ;
case 2:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [2*k ] ;
x [1] = X [2*k + 1] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [2*i], offik, x [0]) ;
MULT_SUB (X [2*i + 1], offik, x [1]) ;
}
}
break ;
case 3:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [3*k ] ;
x [1] = X [3*k + 1] ;
x [2] = X [3*k + 2] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [3*i], offik, x [0]) ;
MULT_SUB (X [3*i + 1], offik, x [1]) ;
MULT_SUB (X [3*i + 2], offik, x [2]) ;
}
}
break ;
case 4:
for (k = k1 ; k < k2 ; k++)
{
pend = Offp [k+1] ;
x [0] = X [4*k ] ;
x [1] = X [4*k + 1] ;
x [2] = X [4*k + 2] ;
x [3] = X [4*k + 3] ;
for (p = Offp [k] ; p < pend ; p++)
{
i = Offi [p] ;
offik = Offx [p] ;
MULT_SUB (X [4*i], offik, x [0]) ;
MULT_SUB (X [4*i + 1], offik, x [1]) ;
MULT_SUB (X [4*i + 2], offik, x [2]) ;
MULT_SUB (X [4*i + 3], offik, x [3]) ;
}
}
break ;
}
}
}
/* ------------------------------------------------------------------ */
/* permute the result, Bz = Q*X */
/* ------------------------------------------------------------------ */
switch (nr)
{
case 1:
for (k = 0 ; k < n ; k++)
{
Bz [Q [k]] = X [k] ;
}
break ;
case 2:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [2*k ] ;
Bz [i + d ] = X [2*k + 1] ;
}
break ;
case 3:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [3*k ] ;
Bz [i + d ] = X [3*k + 1] ;
Bz [i + d*2] = X [3*k + 2] ;
}
break ;
case 4:
for (k = 0 ; k < n ; k++)
{
i = Q [k] ;
Bz [i ] = X [4*k ] ;
Bz [i + d ] = X [4*k + 1] ;
Bz [i + d*2] = X [4*k + 2] ;
Bz [i + d*3] = X [4*k + 3] ;
}
break ;
}
/* ------------------------------------------------------------------ */
/* go to the next chunk of B */
/* ------------------------------------------------------------------ */
Bz += d*4 ;
}
return (TRUE) ;
}