committed by
Holger Vogt
44 changed files with 17674 additions and 0 deletions
-
151src/maths/KLU/UFconfig.h
-
412src/maths/KLU/amd.h
-
181src/maths/KLU/amd_1.c
-
1842src/maths/KLU/amd_2.c
-
185src/maths/KLU/amd_aat.c
-
64src/maths/KLU/amd_control.c
-
38src/maths/KLU/amd_defaults.c
-
180src/maths/KLU/amd_dump.c
-
84src/maths/KLU/amd_global.c
-
120src/maths/KLU/amd_info.c
-
350src/maths/KLU/amd_internal.h
-
200src/maths/KLU/amd_order.c
-
121src/maths/KLU/amd_post_tree.c
-
207src/maths/KLU/amd_postorder.c
-
119src/maths/KLU/amd_preprocess.c
-
93src/maths/KLU/amd_valid.c
-
263src/maths/KLU/btf.h
-
64src/maths/KLU/btf_internal.h
-
387src/maths/KLU/btf_maxtrans.c
-
132src/maths/KLU/btf_order.c
-
593src/maths/KLU/btf_strongcomp.c
-
3611src/maths/KLU/colamd.c
-
255src/maths/KLU/colamd.h
-
24src/maths/KLU/colamd_global.c
-
773src/maths/KLU/klu.c
-
831src/maths/KLU/klu.h
-
488src/maths/KLU/klu_analyze.c
-
369src/maths/KLU/klu_analyze_given.c
-
60src/maths/KLU/klu_defaults.c
-
570src/maths/KLU/klu_diagnostics.c
-
142src/maths/KLU/klu_dump.c
-
290src/maths/KLU/klu_extract.c
-
545src/maths/KLU/klu_factor.c
-
71src/maths/KLU/klu_free_numeric.c
-
34src/maths/KLU/klu_free_symbolic.c
-
243src/maths/KLU/klu_internal.h
-
1009src/maths/KLU/klu_kernel.c
-
225src/maths/KLU/klu_memory.c
-
478src/maths/KLU/klu_refactor.c
-
159src/maths/KLU/klu_scale.c
-
396src/maths/KLU/klu_solve.c
-
156src/maths/KLU/klu_sort.c
-
465src/maths/KLU/klu_tsolve.c
-
694src/maths/KLU/klu_version.h
@ -0,0 +1,151 @@ |
|||
/* ========================================================================== */ |
|||
/* === UFconfig.h =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Configuration file for SuiteSparse: a Suite of Sparse matrix packages |
|||
* (AMD, COLAMD, CCOLAMD, CAMD, CHOLMOD, UMFPACK, CXSparse, and others). |
|||
* |
|||
* UFconfig.h provides the definition of the long integer. On most systems, |
|||
* a C program can be compiled in LP64 mode, in which long's and pointers are |
|||
* both 64-bits, and int's are 32-bits. Windows 64, however, uses the LLP64 |
|||
* model, in which int's and long's are 32-bits, and long long's and pointers |
|||
* are 64-bits. |
|||
* |
|||
* SuiteSparse packages that include long integer versions are |
|||
* intended for the LP64 mode. However, as a workaround for Windows 64 |
|||
* (and perhaps other systems), the long integer can be redefined. |
|||
* |
|||
* If _WIN64 is defined, then the __int64 type is used instead of long. |
|||
* |
|||
* The long integer can also be defined at compile time. For example, this |
|||
* could be added to UFconfig.mk: |
|||
* |
|||
* CFLAGS = -O -D'UF_long=long long' -D'UF_long_max=9223372036854775801' \ |
|||
* -D'UF_long_idd="lld"' |
|||
* |
|||
* This file defines UF_long as either long (on all but _WIN64) or |
|||
* __int64 on Windows 64. The intent is that a UF_long is always a 64-bit |
|||
* integer in a 64-bit code. ptrdiff_t might be a better choice than long; |
|||
* it is always the same size as a pointer. |
|||
* |
|||
* This file also defines the SUITESPARSE_VERSION and related definitions. |
|||
* |
|||
* Copyright (c) 2007, University of Florida. No licensing restrictions |
|||
* apply to this file or to the UFconfig directory. Author: Timothy A. Davis. |
|||
*/ |
|||
|
|||
#ifndef _UFCONFIG_H |
|||
#define _UFCONFIG_H |
|||
|
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
#include <limits.h> |
|||
#include <stdlib.h> |
|||
|
|||
/* ========================================================================== */ |
|||
/* === UF_long ============================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
#ifndef UF_long |
|||
|
|||
#ifdef _WIN64 |
|||
|
|||
#define UF_long __int64 |
|||
#define UF_long_max _I64_MAX |
|||
#define UF_long_idd "I64d" |
|||
|
|||
#else |
|||
|
|||
#define UF_long long |
|||
#define UF_long_max LONG_MAX |
|||
#define UF_long_idd "ld" |
|||
|
|||
#endif |
|||
#define UF_long_id "%" UF_long_idd |
|||
#endif |
|||
|
|||
/* ========================================================================== */ |
|||
/* === UFconfig parameters and functions ==================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* SuiteSparse-wide parameters will be placed in this struct. So far, they |
|||
are only used by RBio. */ |
|||
|
|||
typedef struct UFconfig_struct |
|||
{ |
|||
void *(*malloc_memory) (size_t) ; /* pointer to malloc */ |
|||
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */ |
|||
void (*free_memory) (void *) ; /* pointer to free */ |
|||
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */ |
|||
|
|||
} UFconfig ; |
|||
|
|||
void *UFmalloc /* pointer to allocated block of memory */ |
|||
( |
|||
size_t nitems, /* number of items to malloc (>=1 is enforced) */ |
|||
size_t size_of_item, /* sizeof each item */ |
|||
int *ok, /* TRUE if successful, FALSE otherwise */ |
|||
UFconfig *config /* SuiteSparse-wide configuration */ |
|||
) ; |
|||
|
|||
void *UFfree /* always returns NULL */ |
|||
( |
|||
void *p, /* block to free */ |
|||
UFconfig *config /* SuiteSparse-wide configuration */ |
|||
) ; |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === SuiteSparse version ================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* SuiteSparse is not a package itself, but a collection of packages, some of |
|||
* which must be used together (UMFPACK requires AMD, CHOLMOD requires AMD, |
|||
* COLAMD, CAMD, and CCOLAMD, etc). A version number is provided here for the |
|||
* collection itself. The versions of packages within each version of |
|||
* SuiteSparse are meant to work together. Combining one packge from one |
|||
* version of SuiteSparse, with another package from another version of |
|||
* SuiteSparse, may or may not work. |
|||
* |
|||
* SuiteSparse Version 3.7.0 contains the following packages: |
|||
* |
|||
* UFconfig version 3.7.0 (version always the same as SuiteSparse) |
|||
* AMD version 2.2.3 |
|||
* CAMD version 2.2.3 |
|||
* CCOLAMD version 2.7.4 |
|||
* COLAMD version 2.7.4 |
|||
* BTF version 1.1.3 |
|||
* CHOLMOD version 1.7.4 |
|||
* CSparse3 version 3.0.2 |
|||
* CSparse version 2.2.6 |
|||
* CXSparse version 2.2.6 |
|||
* KLU version 1.1.3 |
|||
* LDL version 2.0.4 |
|||
* RBio version 2.0.2 |
|||
* SPQR version 1.2.3 (also called SuiteSparseQR) |
|||
* UFcollection version 1.6.0 |
|||
* UMFPACK version 5.5.2 |
|||
* SSMULT version 2.0.3 |
|||
* spqr_rank version 1.0.0 |
|||
* MATLAB_Tools various packages & M-files. No specific version number. |
|||
* |
|||
* Other package dependencies: |
|||
* BLAS required by CHOLMOD and UMFPACK |
|||
* LAPACK required by CHOLMOD |
|||
* METIS 4.0.1 required by CHOLMOD (optional) and KLU (optional) |
|||
*/ |
|||
|
|||
#define SUITESPARSE_DATE "Dec 15, 2011" |
|||
#define SUITESPARSE_VER_CODE(main,sub) ((main) * 1000 + (sub)) |
|||
#define SUITESPARSE_MAIN_VERSION 3 |
|||
#define SUITESPARSE_SUB_VERSION 7 |
|||
#define SUITESPARSE_SUBSUB_VERSION 0 |
|||
#define SUITESPARSE_VERSION \ |
|||
SUITESPARSE_VER_CODE(SUITESPARSE_MAIN_VERSION,SUITESPARSE_SUB_VERSION) |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif |
|||
@ -0,0 +1,412 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD: approximate minimum degree ordering =========================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD Version 2.2, Copyright (c) 2007 by Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* AMD finds a symmetric ordering P of a matrix A so that the Cholesky |
|||
* factorization of P*A*P' has fewer nonzeros and takes less work than the |
|||
* Cholesky factorization of A. If A is not symmetric, then it performs its |
|||
* ordering on the matrix A+A'. Two sets of user-callable routines are |
|||
* provided, one for int integers and the other for UF_long integers. |
|||
* |
|||
* The method is based on the approximate minimum degree algorithm, discussed |
|||
* in Amestoy, Davis, and Duff, "An approximate degree ordering algorithm", |
|||
* SIAM Journal of Matrix Analysis and Applications, vol. 17, no. 4, pp. |
|||
* 886-905, 1996. This package can perform both the AMD ordering (with |
|||
* aggressive absorption), and the AMDBAR ordering (without aggressive |
|||
* absorption) discussed in the above paper. This package differs from the |
|||
* Fortran codes discussed in the paper: |
|||
* |
|||
* (1) it can ignore "dense" rows and columns, leading to faster run times |
|||
* (2) it computes the ordering of A+A' if A is not symmetric |
|||
* (3) it is followed by a depth-first post-ordering of the assembly tree |
|||
* (or supernodal elimination tree) |
|||
* |
|||
* For historical reasons, the Fortran versions, amd.f and amdbar.f, have |
|||
* been left (nearly) unchanged. They compute the identical ordering as |
|||
* described in the above paper. |
|||
*/ |
|||
|
|||
#ifndef AMD_H |
|||
#define AMD_H |
|||
|
|||
/* make it easy for C++ programs to include AMD */ |
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/* get the definition of size_t: */ |
|||
#include <stddef.h> |
|||
|
|||
/* define UF_long */ |
|||
#include "UFconfig.h" |
|||
|
|||
int amd_order /* returns AMD_OK, AMD_OK_BUT_JUMBLED, |
|||
* AMD_INVALID, or AMD_OUT_OF_MEMORY */ |
|||
( |
|||
int n, /* A is n-by-n. n must be >= 0. */ |
|||
const int Ap [ ], /* column pointers for A, of size n+1 */ |
|||
const int Ai [ ], /* row indices of A, of size nz = Ap [n] */ |
|||
int P [ ], /* output permutation, of size n */ |
|||
double Control [ ], /* input Control settings, of size AMD_CONTROL */ |
|||
double Info [ ] /* output Info statistics, of size AMD_INFO */ |
|||
) ; |
|||
|
|||
UF_long amd_l_order /* see above for description of arguments */ |
|||
( |
|||
UF_long n, |
|||
const UF_long Ap [ ], |
|||
const UF_long Ai [ ], |
|||
UF_long P [ ], |
|||
double Control [ ], |
|||
double Info [ ] |
|||
) ; |
|||
|
|||
/* Input arguments (not modified): |
|||
* |
|||
* n: the matrix A is n-by-n. |
|||
* Ap: an int/UF_long array of size n+1, containing column pointers of A. |
|||
* Ai: an int/UF_long array of size nz, containing the row indices of A, |
|||
* where nz = Ap [n]. |
|||
* Control: a double array of size AMD_CONTROL, containing control |
|||
* parameters. Defaults are used if Control is NULL. |
|||
* |
|||
* Output arguments (not defined on input): |
|||
* |
|||
* P: an int/UF_long array of size n, containing the output permutation. If |
|||
* row i is the kth pivot row, then P [k] = i. In MATLAB notation, |
|||
* the reordered matrix is A (P,P). |
|||
* Info: a double array of size AMD_INFO, containing statistical |
|||
* information. Ignored if Info is NULL. |
|||
* |
|||
* On input, the matrix A is stored in column-oriented form. The row indices |
|||
* of nonzero entries in column j are stored in Ai [Ap [j] ... Ap [j+1]-1]. |
|||
* |
|||
* If the row indices appear in ascending order in each column, and there |
|||
* are no duplicate entries, then amd_order is slightly more efficient in |
|||
* terms of time and memory usage. If this condition does not hold, a copy |
|||
* of the matrix is created (where these conditions do hold), and the copy is |
|||
* ordered. This feature is new to v2.0 (v1.2 and earlier required this |
|||
* condition to hold for the input matrix). |
|||
* |
|||
* Row indices must be in the range 0 to |
|||
* n-1. Ap [0] must be zero, and thus nz = Ap [n] is the number of nonzeros |
|||
* in A. The array Ap is of size n+1, and the array Ai is of size nz = Ap [n]. |
|||
* The matrix does not need to be symmetric, and the diagonal does not need to |
|||
* be present (if diagonal entries are present, they are ignored except for |
|||
* the output statistic Info [AMD_NZDIAG]). The arrays Ai and Ap are not |
|||
* modified. This form of the Ap and Ai arrays to represent the nonzero |
|||
* pattern of the matrix A is the same as that used internally by MATLAB. |
|||
* If you wish to use a more flexible input structure, please see the |
|||
* umfpack_*_triplet_to_col routines in the UMFPACK package, at |
|||
* http://www.cise.ufl.edu/research/sparse/umfpack. |
|||
* |
|||
* Restrictions: n >= 0. Ap [0] = 0. Ap [j] <= Ap [j+1] for all j in the |
|||
* range 0 to n-1. nz = Ap [n] >= 0. Ai [0..nz-1] must be in the range 0 |
|||
* to n-1. Finally, Ai, Ap, and P must not be NULL. If any of these |
|||
* restrictions are not met, AMD returns AMD_INVALID. |
|||
* |
|||
* AMD returns: |
|||
* |
|||
* AMD_OK if the matrix is valid and sufficient memory can be allocated to |
|||
* perform the ordering. |
|||
* |
|||
* AMD_OUT_OF_MEMORY if not enough memory can be allocated. |
|||
* |
|||
* AMD_INVALID if the input arguments n, Ap, Ai are invalid, or if P is |
|||
* NULL. |
|||
* |
|||
* AMD_OK_BUT_JUMBLED if the matrix had unsorted columns, and/or duplicate |
|||
* entries, but was otherwise valid. |
|||
* |
|||
* The AMD routine first forms the pattern of the matrix A+A', and then |
|||
* computes a fill-reducing ordering, P. If P [k] = i, then row/column i of |
|||
* the original is the kth pivotal row. In MATLAB notation, the permuted |
|||
* matrix is A (P,P), except that 0-based indexing is used instead of the |
|||
* 1-based indexing in MATLAB. |
|||
* |
|||
* The Control array is used to set various parameters for AMD. If a NULL |
|||
* pointer is passed, default values are used. The Control array is not |
|||
* modified. |
|||
* |
|||
* Control [AMD_DENSE]: controls the threshold for "dense" rows/columns. |
|||
* A dense row/column in A+A' can cause AMD to spend a lot of time in |
|||
* ordering the matrix. If Control [AMD_DENSE] >= 0, rows/columns |
|||
* with more than Control [AMD_DENSE] * sqrt (n) entries are ignored |
|||
* during the ordering, and placed last in the output order. The |
|||
* default value of Control [AMD_DENSE] is 10. If negative, no |
|||
* rows/columns are treated as "dense". Rows/columns with 16 or |
|||
* fewer off-diagonal entries are never considered "dense". |
|||
* |
|||
* Control [AMD_AGGRESSIVE]: controls whether or not to use aggressive |
|||
* absorption, in which a prior element is absorbed into the current |
|||
* element if is a subset of the current element, even if it is not |
|||
* adjacent to the current pivot element (refer to Amestoy, Davis, |
|||
* & Duff, 1996, for more details). The default value is nonzero, |
|||
* which means to perform aggressive absorption. This nearly always |
|||
* leads to a better ordering (because the approximate degrees are |
|||
* more accurate) and a lower execution time. There are cases where |
|||
* it can lead to a slightly worse ordering, however. To turn it off, |
|||
* set Control [AMD_AGGRESSIVE] to 0. |
|||
* |
|||
* Control [2..4] are not used in the current version, but may be used in |
|||
* future versions. |
|||
* |
|||
* The Info array provides statistics about the ordering on output. If it is |
|||
* not present, the statistics are not returned. This is not an error |
|||
* condition. |
|||
* |
|||
* Info [AMD_STATUS]: the return value of AMD, either AMD_OK, |
|||
* AMD_OK_BUT_JUMBLED, AMD_OUT_OF_MEMORY, or AMD_INVALID. |
|||
* |
|||
* Info [AMD_N]: n, the size of the input matrix |
|||
* |
|||
* Info [AMD_NZ]: the number of nonzeros in A, nz = Ap [n] |
|||
* |
|||
* Info [AMD_SYMMETRY]: the symmetry of the matrix A. It is the number |
|||
* of "matched" off-diagonal entries divided by the total number of |
|||
* off-diagonal entries. An entry A(i,j) is matched if A(j,i) is also |
|||
* an entry, for any pair (i,j) for which i != j. In MATLAB notation, |
|||
* S = spones (A) ; |
|||
* B = tril (S, -1) + triu (S, 1) ; |
|||
* symmetry = nnz (B & B') / nnz (B) ; |
|||
* |
|||
* Info [AMD_NZDIAG]: the number of entries on the diagonal of A. |
|||
* |
|||
* Info [AMD_NZ_A_PLUS_AT]: the number of nonzeros in A+A', excluding the |
|||
* diagonal. If A is perfectly symmetric (Info [AMD_SYMMETRY] = 1) |
|||
* with a fully nonzero diagonal, then Info [AMD_NZ_A_PLUS_AT] = nz-n |
|||
* (the smallest possible value). If A is perfectly unsymmetric |
|||
* (Info [AMD_SYMMETRY] = 0, for an upper triangular matrix, for |
|||
* example) with no diagonal, then Info [AMD_NZ_A_PLUS_AT] = 2*nz |
|||
* (the largest possible value). |
|||
* |
|||
* Info [AMD_NDENSE]: the number of "dense" rows/columns of A+A' that were |
|||
* removed from A prior to ordering. These are placed last in the |
|||
* output order P. |
|||
* |
|||
* Info [AMD_MEMORY]: the amount of memory used by AMD, in bytes. In the |
|||
* current version, this is 1.2 * Info [AMD_NZ_A_PLUS_AT] + 9*n |
|||
* times the size of an integer. This is at most 2.4nz + 9n. This |
|||
* excludes the size of the input arguments Ai, Ap, and P, which have |
|||
* a total size of nz + 2*n + 1 integers. |
|||
* |
|||
* Info [AMD_NCMPA]: the number of garbage collections performed. |
|||
* |
|||
* Info [AMD_LNZ]: the number of nonzeros in L (excluding the diagonal). |
|||
* This is a slight upper bound because mass elimination is combined |
|||
* with the approximate degree update. It is a rough upper bound if |
|||
* there are many "dense" rows/columns. The rest of the statistics, |
|||
* below, are also slight or rough upper bounds, for the same reasons. |
|||
* The post-ordering of the assembly tree might also not exactly |
|||
* correspond to a true elimination tree postordering. |
|||
* |
|||
* Info [AMD_NDIV]: the number of divide operations for a subsequent LDL' |
|||
* or LU factorization of the permuted matrix A (P,P). |
|||
* |
|||
* Info [AMD_NMULTSUBS_LDL]: the number of multiply-subtract pairs for a |
|||
* subsequent LDL' factorization of A (P,P). |
|||
* |
|||
* Info [AMD_NMULTSUBS_LU]: the number of multiply-subtract pairs for a |
|||
* subsequent LU factorization of A (P,P), assuming that no numerical |
|||
* pivoting is required. |
|||
* |
|||
* Info [AMD_DMAX]: the maximum number of nonzeros in any column of L, |
|||
* including the diagonal. |
|||
* |
|||
* Info [14..19] are not used in the current version, but may be used in |
|||
* future versions. |
|||
*/ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* direct interface to AMD */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* amd_2 is the primary AMD ordering routine. It is not meant to be |
|||
* user-callable because of its restrictive inputs and because it destroys |
|||
* the user's input matrix. It does not check its inputs for errors, either. |
|||
* However, if you can work with these restrictions it can be faster than |
|||
* amd_order and use less memory (assuming that you can create your own copy |
|||
* of the matrix for AMD to destroy). Refer to AMD/Source/amd_2.c for a |
|||
* description of each parameter. */ |
|||
|
|||
void amd_2 |
|||
( |
|||
int n, |
|||
int Pe [ ], |
|||
int Iw [ ], |
|||
int Len [ ], |
|||
int iwlen, |
|||
int pfree, |
|||
int Nv [ ], |
|||
int Next [ ], |
|||
int Last [ ], |
|||
int Head [ ], |
|||
int Elen [ ], |
|||
int Degree [ ], |
|||
int W [ ], |
|||
double Control [ ], |
|||
double Info [ ] |
|||
) ; |
|||
|
|||
void amd_l2 |
|||
( |
|||
UF_long n, |
|||
UF_long Pe [ ], |
|||
UF_long Iw [ ], |
|||
UF_long Len [ ], |
|||
UF_long iwlen, |
|||
UF_long pfree, |
|||
UF_long Nv [ ], |
|||
UF_long Next [ ], |
|||
UF_long Last [ ], |
|||
UF_long Head [ ], |
|||
UF_long Elen [ ], |
|||
UF_long Degree [ ], |
|||
UF_long W [ ], |
|||
double Control [ ], |
|||
double Info [ ] |
|||
) ; |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* amd_valid */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Returns AMD_OK or AMD_OK_BUT_JUMBLED if the matrix is valid as input to |
|||
* amd_order; the latter is returned if the matrix has unsorted and/or |
|||
* duplicate row indices in one or more columns. Returns AMD_INVALID if the |
|||
* matrix cannot be passed to amd_order. For amd_order, the matrix must also |
|||
* be square. The first two arguments are the number of rows and the number |
|||
* of columns of the matrix. For its use in AMD, these must both equal n. |
|||
* |
|||
* NOTE: this routine returned TRUE/FALSE in v1.2 and earlier. |
|||
*/ |
|||
|
|||
int amd_valid |
|||
( |
|||
int n_row, /* # of rows */ |
|||
int n_col, /* # of columns */ |
|||
const int Ap [ ], /* column pointers, of size n_col+1 */ |
|||
const int Ai [ ] /* row indices, of size Ap [n_col] */ |
|||
) ; |
|||
|
|||
UF_long amd_l_valid |
|||
( |
|||
UF_long n_row, |
|||
UF_long n_col, |
|||
const UF_long Ap [ ], |
|||
const UF_long Ai [ ] |
|||
) ; |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD memory manager and printf routines */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* The user can redefine these to change the malloc, free, and printf routines |
|||
* that AMD uses. */ |
|||
|
|||
#ifndef EXTERN |
|||
#define EXTERN extern |
|||
#endif |
|||
|
|||
EXTERN void *(*amd_malloc) (size_t) ; /* pointer to malloc */ |
|||
EXTERN void (*amd_free) (void *) ; /* pointer to free */ |
|||
EXTERN void *(*amd_realloc) (void *, size_t) ; /* pointer to realloc */ |
|||
EXTERN void *(*amd_calloc) (size_t, size_t) ; /* pointer to calloc */ |
|||
EXTERN int (*amd_printf) (const char *, ...) ; /* pointer to printf */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD Control and Info arrays */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* amd_defaults: sets the default control settings */ |
|||
void amd_defaults (double Control [ ]) ; |
|||
void amd_l_defaults (double Control [ ]) ; |
|||
|
|||
/* amd_control: prints the control settings */ |
|||
void amd_control (double Control [ ]) ; |
|||
void amd_l_control (double Control [ ]) ; |
|||
|
|||
/* amd_info: prints the statistics */ |
|||
void amd_info (double Info [ ]) ; |
|||
void amd_l_info (double Info [ ]) ; |
|||
|
|||
#define AMD_CONTROL 5 /* size of Control array */ |
|||
#define AMD_INFO 20 /* size of Info array */ |
|||
|
|||
/* contents of Control */ |
|||
#define AMD_DENSE 0 /* "dense" if degree > Control [0] * sqrt (n) */ |
|||
#define AMD_AGGRESSIVE 1 /* do aggressive absorption if Control [1] != 0 */ |
|||
|
|||
/* default Control settings */ |
|||
#define AMD_DEFAULT_DENSE 10.0 /* default "dense" degree 10*sqrt(n) */ |
|||
#define AMD_DEFAULT_AGGRESSIVE 1 /* do aggressive absorption by default */ |
|||
|
|||
/* contents of Info */ |
|||
#define AMD_STATUS 0 /* return value of amd_order and amd_l_order */ |
|||
#define AMD_N 1 /* A is n-by-n */ |
|||
#define AMD_NZ 2 /* number of nonzeros in A */ |
|||
#define AMD_SYMMETRY 3 /* symmetry of pattern (1 is sym., 0 is unsym.) */ |
|||
#define AMD_NZDIAG 4 /* # of entries on diagonal */ |
|||
#define AMD_NZ_A_PLUS_AT 5 /* nz in A+A' */ |
|||
#define AMD_NDENSE 6 /* number of "dense" rows/columns in A */ |
|||
#define AMD_MEMORY 7 /* amount of memory used by AMD */ |
|||
#define AMD_NCMPA 8 /* number of garbage collections in AMD */ |
|||
#define AMD_LNZ 9 /* approx. nz in L, excluding the diagonal */ |
|||
#define AMD_NDIV 10 /* number of fl. point divides for LU and LDL' */ |
|||
#define AMD_NMULTSUBS_LDL 11 /* number of fl. point (*,-) pairs for LDL' */ |
|||
#define AMD_NMULTSUBS_LU 12 /* number of fl. point (*,-) pairs for LU */ |
|||
#define AMD_DMAX 13 /* max nz. in any column of L, incl. diagonal */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* return values of AMD */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#define AMD_OK 0 /* success */ |
|||
#define AMD_OUT_OF_MEMORY -1 /* malloc failed, or problem too large */ |
|||
#define AMD_INVALID -2 /* input arguments are not valid */ |
|||
#define AMD_OK_BUT_JUMBLED 1 /* input matrix is OK for amd_order, but |
|||
* columns were not sorted, and/or duplicate entries were present. AMD had |
|||
* to do extra work before ordering the matrix. This is a warning, not an |
|||
* error. */ |
|||
|
|||
/* ========================================================================== */ |
|||
/* === AMD version ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* AMD Version 1.2 and later include the following definitions. |
|||
* As an example, to test if the version you are using is 1.2 or later: |
|||
* |
|||
* #ifdef AMD_VERSION |
|||
* if (AMD_VERSION >= AMD_VERSION_CODE (1,2)) ... |
|||
* #endif |
|||
* |
|||
* This also works during compile-time: |
|||
* |
|||
* #if defined(AMD_VERSION) && (AMD_VERSION >= AMD_VERSION_CODE (1,2)) |
|||
* printf ("This is version 1.2 or later\n") ; |
|||
* #else |
|||
* printf ("This is an early version\n") ; |
|||
* #endif |
|||
* |
|||
* Versions 1.1 and earlier of AMD do not include a #define'd version number. |
|||
*/ |
|||
|
|||
#define AMD_DATE "Dec 7, 2011" |
|||
#define AMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) |
|||
#define AMD_MAIN_VERSION 2 |
|||
#define AMD_SUB_VERSION 2 |
|||
#define AMD_SUBSUB_VERSION 3 |
|||
#define AMD_VERSION AMD_VERSION_CODE(AMD_MAIN_VERSION,AMD_SUB_VERSION) |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
|
|||
#endif |
|||
@ -0,0 +1,181 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_1 =============================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* AMD_1: Construct A+A' for a sparse matrix A and perform the AMD ordering. |
|||
* |
|||
* The n-by-n sparse matrix A can be unsymmetric. It is stored in MATLAB-style |
|||
* compressed-column form, with sorted row indices in each column, and no |
|||
* duplicate entries. Diagonal entries may be present, but they are ignored. |
|||
* Row indices of column j of A are stored in Ai [Ap [j] ... Ap [j+1]-1]. |
|||
* Ap [0] must be zero, and nz = Ap [n] is the number of entries in A. The |
|||
* size of the matrix, n, must be greater than or equal to zero. |
|||
* |
|||
* This routine must be preceded by a call to AMD_aat, which computes the |
|||
* number of entries in each row/column in A+A', excluding the diagonal. |
|||
* Len [j], on input, is the number of entries in row/column j of A+A'. This |
|||
* routine constructs the matrix A+A' and then calls AMD_2. No error checking |
|||
* is performed (this was done in AMD_valid). |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL void AMD_1 |
|||
( |
|||
Int n, /* n > 0 */ |
|||
const Int Ap [ ], /* input of size n+1, not modified */ |
|||
const Int Ai [ ], /* input of size nz = Ap [n], not modified */ |
|||
Int P [ ], /* size n output permutation */ |
|||
Int Pinv [ ], /* size n output inverse permutation */ |
|||
Int Len [ ], /* size n input, undefined on output */ |
|||
Int slen, /* slen >= sum (Len [0..n-1]) + 7n, |
|||
* ideally slen = 1.2 * sum (Len) + 8n */ |
|||
Int S [ ], /* size slen workspace */ |
|||
double Control [ ], /* input array of size AMD_CONTROL */ |
|||
double Info [ ] /* output array of size AMD_INFO */ |
|||
) |
|||
{ |
|||
Int i, j, k, p, pfree, iwlen, pj, p1, p2, pj2, *Iw, *Pe, *Nv, *Head, |
|||
*Elen, *Degree, *s, *W, *Sp, *Tp ; |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* construct the matrix for AMD_2 */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
ASSERT (n > 0) ; |
|||
|
|||
iwlen = slen - 6*n ; |
|||
s = S ; |
|||
Pe = s ; s += n ; |
|||
Nv = s ; s += n ; |
|||
Head = s ; s += n ; |
|||
Elen = s ; s += n ; |
|||
Degree = s ; s += n ; |
|||
W = s ; s += n ; |
|||
Iw = s ; s += iwlen ; |
|||
|
|||
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ; |
|||
|
|||
/* construct the pointers for A+A' */ |
|||
Sp = Nv ; /* use Nv and W as workspace for Sp and Tp [ */ |
|||
Tp = W ; |
|||
pfree = 0 ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
Pe [j] = pfree ; |
|||
Sp [j] = pfree ; |
|||
pfree += Len [j] ; |
|||
} |
|||
|
|||
/* Note that this restriction on iwlen is slightly more restrictive than |
|||
* what is strictly required in AMD_2. AMD_2 can operate with no elbow |
|||
* room at all, but it will be very slow. For better performance, at |
|||
* least size-n elbow room is enforced. */ |
|||
ASSERT (iwlen >= pfree + n) ; |
|||
|
|||
#ifndef NDEBUG |
|||
for (p = 0 ; p < iwlen ; p++) Iw [p] = EMPTY ; |
|||
#endif |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
AMD_DEBUG1 (("Construct row/column k= "ID" of A+A'\n", k)) ; |
|||
p1 = Ap [k] ; |
|||
p2 = Ap [k+1] ; |
|||
|
|||
/* construct A+A' */ |
|||
for (p = p1 ; p < p2 ; ) |
|||
{ |
|||
/* scan the upper triangular part of A */ |
|||
j = Ai [p] ; |
|||
ASSERT (j >= 0 && j < n) ; |
|||
if (j < k) |
|||
{ |
|||
/* entry A (j,k) in the strictly upper triangular part */ |
|||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; |
|||
ASSERT (Sp [k] < (k == n-1 ? pfree : Pe [k+1])) ; |
|||
Iw [Sp [j]++] = k ; |
|||
Iw [Sp [k]++] = j ; |
|||
p++ ; |
|||
} |
|||
else if (j == k) |
|||
{ |
|||
/* skip the diagonal */ |
|||
p++ ; |
|||
break ; |
|||
} |
|||
else /* j > k */ |
|||
{ |
|||
/* first entry below the diagonal */ |
|||
break ; |
|||
} |
|||
/* scan lower triangular part of A, in column j until reaching |
|||
* row k. Start where last scan left off. */ |
|||
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ; |
|||
pj2 = Ap [j+1] ; |
|||
for (pj = Tp [j] ; pj < pj2 ; ) |
|||
{ |
|||
i = Ai [pj] ; |
|||
ASSERT (i >= 0 && i < n) ; |
|||
if (i < k) |
|||
{ |
|||
/* A (i,j) is only in the lower part, not in upper */ |
|||
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ; |
|||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; |
|||
Iw [Sp [i]++] = j ; |
|||
Iw [Sp [j]++] = i ; |
|||
pj++ ; |
|||
} |
|||
else if (i == k) |
|||
{ |
|||
/* entry A (k,j) in lower part and A (j,k) in upper */ |
|||
pj++ ; |
|||
break ; |
|||
} |
|||
else /* i > k */ |
|||
{ |
|||
/* consider this entry later, when k advances to i */ |
|||
break ; |
|||
} |
|||
} |
|||
Tp [j] = pj ; |
|||
} |
|||
Tp [k] = p ; |
|||
} |
|||
|
|||
/* clean up, for remaining mismatched entries */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++) |
|||
{ |
|||
i = Ai [pj] ; |
|||
ASSERT (i >= 0 && i < n) ; |
|||
/* A (i,j) is only in the lower part, not in upper */ |
|||
ASSERT (Sp [i] < (i == n-1 ? pfree : Pe [i+1])) ; |
|||
ASSERT (Sp [j] < (j == n-1 ? pfree : Pe [j+1])) ; |
|||
Iw [Sp [i]++] = j ; |
|||
Iw [Sp [j]++] = i ; |
|||
} |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
for (j = 0 ; j < n-1 ; j++) ASSERT (Sp [j] == Pe [j+1]) ; |
|||
ASSERT (Sp [n-1] == pfree) ; |
|||
#endif |
|||
|
|||
/* Tp and Sp no longer needed ] */ |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* order the matrix */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
AMD_2 (n, Pe, Iw, Len, iwlen, pfree, |
|||
Nv, Pinv, P, Head, Elen, Degree, W, Control, Info) ; |
|||
} |
|||
1842
src/maths/KLU/amd_2.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,185 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_aat ============================================================= */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* AMD_aat: compute the symmetry of the pattern of A, and count the number of |
|||
* nonzeros each column of A+A' (excluding the diagonal). Assumes the input |
|||
* matrix has no errors, with sorted columns and no duplicates |
|||
* (AMD_valid (n, n, Ap, Ai) must be AMD_OK, but this condition is not |
|||
* checked). |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL size_t AMD_aat /* returns nz in A+A' */ |
|||
( |
|||
Int n, |
|||
const Int Ap [ ], |
|||
const Int Ai [ ], |
|||
Int Len [ ], /* Len [j]: length of column j of A+A', excl diagonal*/ |
|||
Int Tp [ ], /* workspace of size n */ |
|||
double Info [ ] |
|||
) |
|||
{ |
|||
Int p1, p2, p, i, j, pj, pj2, k, nzdiag, nzboth, nz ; |
|||
double sym ; |
|||
size_t nzaat ; |
|||
|
|||
#ifndef NDEBUG |
|||
AMD_debug_init ("AMD AAT") ; |
|||
for (k = 0 ; k < n ; k++) Tp [k] = EMPTY ; |
|||
ASSERT (AMD_valid (n, n, Ap, Ai) == AMD_OK) ; |
|||
#endif |
|||
|
|||
if (Info != (double *) NULL) |
|||
{ |
|||
/* clear the Info array, if it exists */ |
|||
for (i = 0 ; i < AMD_INFO ; i++) |
|||
{ |
|||
Info [i] = EMPTY ; |
|||
} |
|||
Info [AMD_STATUS] = AMD_OK ; |
|||
} |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Len [k] = 0 ; |
|||
} |
|||
|
|||
nzdiag = 0 ; |
|||
nzboth = 0 ; |
|||
nz = Ap [n] ; |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
p1 = Ap [k] ; |
|||
p2 = Ap [k+1] ; |
|||
AMD_DEBUG2 (("\nAAT Column: "ID" p1: "ID" p2: "ID"\n", k, p1, p2)) ; |
|||
|
|||
/* construct A+A' */ |
|||
for (p = p1 ; p < p2 ; ) |
|||
{ |
|||
/* scan the upper triangular part of A */ |
|||
j = Ai [p] ; |
|||
if (j < k) |
|||
{ |
|||
/* entry A (j,k) is in the strictly upper triangular part, |
|||
* add both A (j,k) and A (k,j) to the matrix A+A' */ |
|||
Len [j]++ ; |
|||
Len [k]++ ; |
|||
AMD_DEBUG3 ((" upper ("ID","ID") ("ID","ID")\n", j,k, k,j)); |
|||
p++ ; |
|||
} |
|||
else if (j == k) |
|||
{ |
|||
/* skip the diagonal */ |
|||
p++ ; |
|||
nzdiag++ ; |
|||
break ; |
|||
} |
|||
else /* j > k */ |
|||
{ |
|||
/* first entry below the diagonal */ |
|||
break ; |
|||
} |
|||
/* scan lower triangular part of A, in column j until reaching |
|||
* row k. Start where last scan left off. */ |
|||
ASSERT (Tp [j] != EMPTY) ; |
|||
ASSERT (Ap [j] <= Tp [j] && Tp [j] <= Ap [j+1]) ; |
|||
pj2 = Ap [j+1] ; |
|||
for (pj = Tp [j] ; pj < pj2 ; ) |
|||
{ |
|||
i = Ai [pj] ; |
|||
if (i < k) |
|||
{ |
|||
/* A (i,j) is only in the lower part, not in upper. |
|||
* add both A (i,j) and A (j,i) to the matrix A+A' */ |
|||
Len [i]++ ; |
|||
Len [j]++ ; |
|||
AMD_DEBUG3 ((" lower ("ID","ID") ("ID","ID")\n", |
|||
i,j, j,i)) ; |
|||
pj++ ; |
|||
} |
|||
else if (i == k) |
|||
{ |
|||
/* entry A (k,j) in lower part and A (j,k) in upper */ |
|||
pj++ ; |
|||
nzboth++ ; |
|||
break ; |
|||
} |
|||
else /* i > k */ |
|||
{ |
|||
/* consider this entry later, when k advances to i */ |
|||
break ; |
|||
} |
|||
} |
|||
Tp [j] = pj ; |
|||
} |
|||
/* Tp [k] points to the entry just below the diagonal in column k */ |
|||
Tp [k] = p ; |
|||
} |
|||
|
|||
/* clean up, for remaining mismatched entries */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
for (pj = Tp [j] ; pj < Ap [j+1] ; pj++) |
|||
{ |
|||
i = Ai [pj] ; |
|||
/* A (i,j) is only in the lower part, not in upper. |
|||
* add both A (i,j) and A (j,i) to the matrix A+A' */ |
|||
Len [i]++ ; |
|||
Len [j]++ ; |
|||
AMD_DEBUG3 ((" lower cleanup ("ID","ID") ("ID","ID")\n", |
|||
i,j, j,i)) ; |
|||
} |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* compute the symmetry of the nonzero pattern of A */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
/* Given a matrix A, the symmetry of A is: |
|||
* B = tril (spones (A), -1) + triu (spones (A), 1) ; |
|||
* sym = nnz (B & B') / nnz (B) ; |
|||
* or 1 if nnz (B) is zero. |
|||
*/ |
|||
|
|||
if (nz == nzdiag) |
|||
{ |
|||
sym = 1 ; |
|||
} |
|||
else |
|||
{ |
|||
sym = (2 * (double) nzboth) / ((double) (nz - nzdiag)) ; |
|||
} |
|||
|
|||
nzaat = 0 ; |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
nzaat += Len [k] ; |
|||
} |
|||
|
|||
AMD_DEBUG1 (("AMD nz in A+A', excluding diagonal (nzaat) = %g\n", |
|||
(double) nzaat)) ; |
|||
AMD_DEBUG1 ((" nzboth: "ID" nz: "ID" nzdiag: "ID" symmetry: %g\n", |
|||
nzboth, nz, nzdiag, sym)) ; |
|||
|
|||
if (Info != (double *) NULL) |
|||
{ |
|||
Info [AMD_STATUS] = AMD_OK ; |
|||
Info [AMD_N] = n ; |
|||
Info [AMD_NZ] = nz ; |
|||
Info [AMD_SYMMETRY] = sym ; /* symmetry of pattern of A */ |
|||
Info [AMD_NZDIAG] = nzdiag ; /* nonzeros on diagonal of A */ |
|||
Info [AMD_NZ_A_PLUS_AT] = nzaat ; /* nonzeros in A+A' */ |
|||
} |
|||
|
|||
return (nzaat) ; |
|||
} |
|||
@ -0,0 +1,64 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_control ========================================================= */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* User-callable. Prints the control parameters for AMD. See amd.h |
|||
* for details. If the Control array is not present, the defaults are |
|||
* printed instead. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL void AMD_control |
|||
( |
|||
double Control [ ] |
|||
) |
|||
{ |
|||
double alpha ; |
|||
Int aggressive ; |
|||
|
|||
if (Control != (double *) NULL) |
|||
{ |
|||
alpha = Control [AMD_DENSE] ; |
|||
aggressive = Control [AMD_AGGRESSIVE] != 0 ; |
|||
} |
|||
else |
|||
{ |
|||
alpha = AMD_DEFAULT_DENSE ; |
|||
aggressive = AMD_DEFAULT_AGGRESSIVE ; |
|||
} |
|||
|
|||
PRINTF (("\nAMD version %d.%d.%d, %s: approximate minimum degree ordering\n" |
|||
" dense row parameter: %g\n", AMD_MAIN_VERSION, AMD_SUB_VERSION, |
|||
AMD_SUBSUB_VERSION, AMD_DATE, alpha)) ; |
|||
|
|||
if (alpha < 0) |
|||
{ |
|||
PRINTF ((" no rows treated as dense\n")) ; |
|||
} |
|||
else |
|||
{ |
|||
PRINTF (( |
|||
" (rows with more than max (%g * sqrt (n), 16) entries are\n" |
|||
" considered \"dense\", and placed last in output permutation)\n", |
|||
alpha)) ; |
|||
} |
|||
|
|||
if (aggressive) |
|||
{ |
|||
PRINTF ((" aggressive absorption: yes\n")) ; |
|||
} |
|||
else |
|||
{ |
|||
PRINTF ((" aggressive absorption: no\n")) ; |
|||
} |
|||
|
|||
PRINTF ((" size of AMD integer: %d\n\n", sizeof (Int))) ; |
|||
} |
|||
@ -0,0 +1,38 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_defaults ======================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* User-callable. Sets default control parameters for AMD. See amd.h |
|||
* for details. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
/* ========================================================================= */ |
|||
/* === AMD defaults ======================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
GLOBAL void AMD_defaults |
|||
( |
|||
double Control [ ] |
|||
) |
|||
{ |
|||
Int i ; |
|||
|
|||
if (Control != (double *) NULL) |
|||
{ |
|||
for (i = 0 ; i < AMD_CONTROL ; i++) |
|||
{ |
|||
Control [i] = 0 ; |
|||
} |
|||
Control [AMD_DENSE] = AMD_DEFAULT_DENSE ; |
|||
Control [AMD_AGGRESSIVE] = AMD_DEFAULT_AGGRESSIVE ; |
|||
} |
|||
} |
|||
@ -0,0 +1,180 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_dump ============================================================ */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Debugging routines for AMD. Not used if NDEBUG is not defined at compile- |
|||
* time (the default). See comments in amd_internal.h on how to enable |
|||
* debugging. Not user-callable. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
#ifndef NDEBUG |
|||
|
|||
/* This global variable is present only when debugging */ |
|||
GLOBAL Int AMD_debug = -999 ; /* default is no debug printing */ |
|||
|
|||
/* ========================================================================= */ |
|||
/* === AMD_debug_init ====================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* Sets the debug print level, by reading the file debug.amd (if it exists) */ |
|||
|
|||
GLOBAL void AMD_debug_init ( char *s ) |
|||
{ |
|||
FILE *f ; |
|||
f = fopen ("debug.amd", "r") ; |
|||
if (f == (FILE *) NULL) |
|||
{ |
|||
AMD_debug = -999 ; |
|||
} |
|||
else |
|||
{ |
|||
fscanf (f, ID, &AMD_debug) ; |
|||
fclose (f) ; |
|||
} |
|||
if (AMD_debug >= 0) |
|||
{ |
|||
printf ("%s: AMD_debug_init, D= "ID"\n", s, AMD_debug) ; |
|||
} |
|||
} |
|||
|
|||
/* ========================================================================= */ |
|||
/* === AMD_dump ============================================================ */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* Dump AMD's data structure, except for the hash buckets. This routine |
|||
* cannot be called when the hash buckets are non-empty. |
|||
*/ |
|||
|
|||
GLOBAL void AMD_dump ( |
|||
Int n, /* A is n-by-n */ |
|||
Int Pe [ ], /* pe [0..n-1]: index in iw of start of row i */ |
|||
Int Iw [ ], /* workspace of size iwlen, iwlen [0..pfree-1] |
|||
* holds the matrix on input */ |
|||
Int Len [ ], /* len [0..n-1]: length for row i */ |
|||
Int iwlen, /* length of iw */ |
|||
Int pfree, /* iw [pfree ... iwlen-1] is empty on input */ |
|||
Int Nv [ ], /* nv [0..n-1] */ |
|||
Int Next [ ], /* next [0..n-1] */ |
|||
Int Last [ ], /* last [0..n-1] */ |
|||
Int Head [ ], /* head [0..n-1] */ |
|||
Int Elen [ ], /* size n */ |
|||
Int Degree [ ], /* size n */ |
|||
Int W [ ], /* size n */ |
|||
Int nel |
|||
) |
|||
{ |
|||
Int i, pe, elen, nv, len, e, p, k, j, deg, w, cnt, ilast ; |
|||
|
|||
if (AMD_debug < 0) return ; |
|||
ASSERT (pfree <= iwlen) ; |
|||
AMD_DEBUG3 (("\nAMD dump, pfree: "ID"\n", pfree)) ; |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
pe = Pe [i] ; |
|||
elen = Elen [i] ; |
|||
nv = Nv [i] ; |
|||
len = Len [i] ; |
|||
w = W [i] ; |
|||
|
|||
if (elen >= EMPTY) |
|||
{ |
|||
if (nv == 0) |
|||
{ |
|||
AMD_DEBUG3 (("\nI "ID": nonprincipal: ", i)) ; |
|||
ASSERT (elen == EMPTY) ; |
|||
if (pe == EMPTY) |
|||
{ |
|||
AMD_DEBUG3 ((" dense node\n")) ; |
|||
ASSERT (w == 1) ; |
|||
} |
|||
else |
|||
{ |
|||
ASSERT (pe < EMPTY) ; |
|||
AMD_DEBUG3 ((" i "ID" -> parent "ID"\n", i, FLIP (Pe[i]))); |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
AMD_DEBUG3 (("\nI "ID": active principal supervariable:\n",i)); |
|||
AMD_DEBUG3 ((" nv(i): "ID" Flag: %d\n", nv, (nv < 0))) ; |
|||
ASSERT (elen >= 0) ; |
|||
ASSERT (nv > 0 && pe >= 0) ; |
|||
p = pe ; |
|||
AMD_DEBUG3 ((" e/s: ")) ; |
|||
if (elen == 0) AMD_DEBUG3 ((" : ")) ; |
|||
ASSERT (pe + len <= pfree) ; |
|||
for (k = 0 ; k < len ; k++) |
|||
{ |
|||
j = Iw [p] ; |
|||
AMD_DEBUG3 ((" "ID"", j)) ; |
|||
ASSERT (j >= 0 && j < n) ; |
|||
if (k == elen-1) AMD_DEBUG3 ((" : ")) ; |
|||
p++ ; |
|||
} |
|||
AMD_DEBUG3 (("\n")) ; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
e = i ; |
|||
if (w == 0) |
|||
{ |
|||
AMD_DEBUG3 (("\nE "ID": absorbed element: w "ID"\n", e, w)) ; |
|||
ASSERT (nv > 0 && pe < 0) ; |
|||
AMD_DEBUG3 ((" e "ID" -> parent "ID"\n", e, FLIP (Pe [e]))) ; |
|||
} |
|||
else |
|||
{ |
|||
AMD_DEBUG3 (("\nE "ID": unabsorbed element: w "ID"\n", e, w)) ; |
|||
ASSERT (nv > 0 && pe >= 0) ; |
|||
p = pe ; |
|||
AMD_DEBUG3 ((" : ")) ; |
|||
ASSERT (pe + len <= pfree) ; |
|||
for (k = 0 ; k < len ; k++) |
|||
{ |
|||
j = Iw [p] ; |
|||
AMD_DEBUG3 ((" "ID"", j)) ; |
|||
ASSERT (j >= 0 && j < n) ; |
|||
p++ ; |
|||
} |
|||
AMD_DEBUG3 (("\n")) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* this routine cannot be called when the hash buckets are non-empty */ |
|||
AMD_DEBUG3 (("\nDegree lists:\n")) ; |
|||
if (nel >= 0) |
|||
{ |
|||
cnt = 0 ; |
|||
for (deg = 0 ; deg < n ; deg++) |
|||
{ |
|||
if (Head [deg] == EMPTY) continue ; |
|||
ilast = EMPTY ; |
|||
AMD_DEBUG3 ((ID": \n", deg)) ; |
|||
for (i = Head [deg] ; i != EMPTY ; i = Next [i]) |
|||
{ |
|||
AMD_DEBUG3 ((" "ID" : next "ID" last "ID" deg "ID"\n", |
|||
i, Next [i], Last [i], Degree [i])) ; |
|||
ASSERT (i >= 0 && i < n && ilast == Last [i] && |
|||
deg == Degree [i]) ; |
|||
cnt += Nv [i] ; |
|||
ilast = i ; |
|||
} |
|||
AMD_DEBUG3 (("\n")) ; |
|||
} |
|||
ASSERT (cnt == n - nel) ; |
|||
} |
|||
|
|||
} |
|||
|
|||
#endif |
|||
@ -0,0 +1,84 @@ |
|||
/* ========================================================================= */ |
|||
/* === amd_global ========================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#include <stdlib.h> |
|||
|
|||
#ifdef MATLAB_MEX_FILE |
|||
#include "mex.h" |
|||
#include "matrix.h" |
|||
#endif |
|||
|
|||
#ifndef NULL |
|||
#define NULL 0 |
|||
#endif |
|||
|
|||
/* ========================================================================= */ |
|||
/* === Default AMD memory manager ========================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* The user can redefine these global pointers at run-time to change the memory |
|||
* manager used by AMD. AMD only uses malloc and free; realloc and calloc are |
|||
* include for completeness, in case another package wants to use the same |
|||
* memory manager as AMD. |
|||
* |
|||
* If compiling as a MATLAB mexFunction, the default memory manager is mxMalloc. |
|||
* You can also compile AMD as a standard ANSI-C library and link a mexFunction |
|||
* against it, and then redefine these pointers at run-time, in your |
|||
* mexFunction. |
|||
* |
|||
* If -DNMALLOC is defined at compile-time, no memory manager is specified at |
|||
* compile-time. You must then define these functions at run-time, before |
|||
* calling AMD, for AMD to work properly. |
|||
*/ |
|||
|
|||
#ifndef NMALLOC |
|||
#ifdef MATLAB_MEX_FILE |
|||
/* MATLAB mexFunction: */ |
|||
void *(*amd_malloc) (size_t) = mxMalloc ; |
|||
void (*amd_free) (void *) = mxFree ; |
|||
void *(*amd_realloc) (void *, size_t) = mxRealloc ; |
|||
void *(*amd_calloc) (size_t, size_t) = mxCalloc ; |
|||
#else |
|||
/* standard ANSI-C: */ |
|||
void *(*amd_malloc) (size_t) = malloc ; |
|||
void (*amd_free) (void *) = free ; |
|||
void *(*amd_realloc) (void *, size_t) = realloc ; |
|||
void *(*amd_calloc) (size_t, size_t) = calloc ; |
|||
#endif |
|||
#else |
|||
/* no memory manager defined at compile-time; you MUST define one at run-time */ |
|||
void *(*amd_malloc) (size_t) = NULL ; |
|||
void (*amd_free) (void *) = NULL ; |
|||
void *(*amd_realloc) (void *, size_t) = NULL ; |
|||
void *(*amd_calloc) (size_t, size_t) = NULL ; |
|||
#endif |
|||
|
|||
/* ========================================================================= */ |
|||
/* === Default AMD printf routine ========================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* The user can redefine this global pointer at run-time to change the printf |
|||
* routine used by AMD. If NULL, no printing occurs. |
|||
* |
|||
* If -DNPRINT is defined at compile-time, stdio.h is not included. Printing |
|||
* can then be enabled at run-time by setting amd_printf to a non-NULL function. |
|||
*/ |
|||
|
|||
#ifndef NPRINT |
|||
#ifdef MATLAB_MEX_FILE |
|||
int (*amd_printf) (const char *, ...) = mexPrintf ; |
|||
#else |
|||
#include <stdio.h> |
|||
int (*amd_printf) (const char *, ...) = printf ; |
|||
#endif |
|||
#else |
|||
int (*amd_printf) (const char *, ...) = NULL ; |
|||
#endif |
|||
@ -0,0 +1,120 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_info ============================================================ */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* User-callable. Prints the output statistics for AMD. See amd.h |
|||
* for details. If the Info array is not present, nothing is printed. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
#define PRI(format,x) { if (x >= 0) { PRINTF ((format, x)) ; }} |
|||
|
|||
GLOBAL void AMD_info |
|||
( |
|||
double Info [ ] |
|||
) |
|||
{ |
|||
double n, ndiv, nmultsubs_ldl, nmultsubs_lu, lnz, lnzd ; |
|||
|
|||
PRINTF (("\nAMD version %d.%d.%d, %s, results:\n", |
|||
AMD_MAIN_VERSION, AMD_SUB_VERSION, AMD_SUBSUB_VERSION, AMD_DATE)) ; |
|||
|
|||
if (!Info) |
|||
{ |
|||
return ; |
|||
} |
|||
|
|||
n = Info [AMD_N] ; |
|||
ndiv = Info [AMD_NDIV] ; |
|||
nmultsubs_ldl = Info [AMD_NMULTSUBS_LDL] ; |
|||
nmultsubs_lu = Info [AMD_NMULTSUBS_LU] ; |
|||
lnz = Info [AMD_LNZ] ; |
|||
lnzd = (n >= 0 && lnz >= 0) ? (n + lnz) : (-1) ; |
|||
|
|||
/* AMD return status */ |
|||
PRINTF ((" status: ")) ; |
|||
if (Info [AMD_STATUS] == AMD_OK) |
|||
{ |
|||
PRINTF (("OK\n")) ; |
|||
} |
|||
else if (Info [AMD_STATUS] == AMD_OUT_OF_MEMORY) |
|||
{ |
|||
PRINTF (("out of memory\n")) ; |
|||
} |
|||
else if (Info [AMD_STATUS] == AMD_INVALID) |
|||
{ |
|||
PRINTF (("invalid matrix\n")) ; |
|||
} |
|||
else if (Info [AMD_STATUS] == AMD_OK_BUT_JUMBLED) |
|||
{ |
|||
PRINTF (("OK, but jumbled\n")) ; |
|||
} |
|||
else |
|||
{ |
|||
PRINTF (("unknown\n")) ; |
|||
} |
|||
|
|||
/* statistics about the input matrix */ |
|||
PRI (" n, dimension of A: %.20g\n", n); |
|||
PRI (" nz, number of nonzeros in A: %.20g\n", |
|||
Info [AMD_NZ]) ; |
|||
PRI (" symmetry of A: %.4f\n", |
|||
Info [AMD_SYMMETRY]) ; |
|||
PRI (" number of nonzeros on diagonal: %.20g\n", |
|||
Info [AMD_NZDIAG]) ; |
|||
PRI (" nonzeros in pattern of A+A' (excl. diagonal): %.20g\n", |
|||
Info [AMD_NZ_A_PLUS_AT]) ; |
|||
PRI (" # dense rows/columns of A+A': %.20g\n", |
|||
Info [AMD_NDENSE]) ; |
|||
|
|||
/* statistics about AMD's behavior */ |
|||
PRI (" memory used, in bytes: %.20g\n", |
|||
Info [AMD_MEMORY]) ; |
|||
PRI (" # of memory compactions: %.20g\n", |
|||
Info [AMD_NCMPA]) ; |
|||
|
|||
/* statistics about the ordering quality */ |
|||
PRINTF (("\n" |
|||
" The following approximate statistics are for a subsequent\n" |
|||
" factorization of A(P,P) + A(P,P)'. They are slight upper\n" |
|||
" bounds if there are no dense rows/columns in A+A', and become\n" |
|||
" looser if dense rows/columns exist.\n\n")) ; |
|||
|
|||
PRI (" nonzeros in L (excluding diagonal): %.20g\n", |
|||
lnz) ; |
|||
PRI (" nonzeros in L (including diagonal): %.20g\n", |
|||
lnzd) ; |
|||
PRI (" # divide operations for LDL' or LU: %.20g\n", |
|||
ndiv) ; |
|||
PRI (" # multiply-subtract operations for LDL': %.20g\n", |
|||
nmultsubs_ldl) ; |
|||
PRI (" # multiply-subtract operations for LU: %.20g\n", |
|||
nmultsubs_lu) ; |
|||
PRI (" max nz. in any column of L (incl. diagonal): %.20g\n", |
|||
Info [AMD_DMAX]) ; |
|||
|
|||
/* total flop counts for various factorizations */ |
|||
|
|||
if (n >= 0 && ndiv >= 0 && nmultsubs_ldl >= 0 && nmultsubs_lu >= 0) |
|||
{ |
|||
PRINTF (("\n" |
|||
" chol flop count for real A, sqrt counted as 1 flop: %.20g\n" |
|||
" LDL' flop count for real A: %.20g\n" |
|||
" LDL' flop count for complex A: %.20g\n" |
|||
" LU flop count for real A (with no pivoting): %.20g\n" |
|||
" LU flop count for complex A (with no pivoting): %.20g\n\n", |
|||
n + ndiv + 2*nmultsubs_ldl, |
|||
ndiv + 2*nmultsubs_ldl, |
|||
9*ndiv + 8*nmultsubs_ldl, |
|||
ndiv + 2*nmultsubs_lu, |
|||
9*ndiv + 8*nmultsubs_lu)) ; |
|||
} |
|||
} |
|||
@ -0,0 +1,350 @@ |
|||
/* ========================================================================= */ |
|||
/* === amd_internal.h ====================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* This file is for internal use in AMD itself, and does not normally need to |
|||
* be included in user code (it is included in UMFPACK, however). All others |
|||
* should use amd.h instead. |
|||
* |
|||
* The following compile-time definitions affect how AMD is compiled. |
|||
* |
|||
* -DNPRINT |
|||
* |
|||
* Disable all printing. stdio.h will not be included. Printing can |
|||
* be re-enabled at run-time by setting the global pointer amd_printf |
|||
* to printf (or mexPrintf for a MATLAB mexFunction). |
|||
* |
|||
* -DNMALLOC |
|||
* |
|||
* No memory manager is defined at compile-time. You MUST define the |
|||
* function pointers amd_malloc, amd_free, amd_realloc, and |
|||
* amd_calloc at run-time for AMD to work properly. |
|||
*/ |
|||
|
|||
/* ========================================================================= */ |
|||
/* === NDEBUG ============================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* |
|||
* Turning on debugging takes some work (see below). If you do not edit this |
|||
* file, then debugging is always turned off, regardless of whether or not |
|||
* -DNDEBUG is specified in your compiler options. |
|||
* |
|||
* If AMD is being compiled as a mexFunction, then MATLAB_MEX_FILE is defined, |
|||
* and mxAssert is used instead of assert. If debugging is not enabled, no |
|||
* MATLAB include files or functions are used. Thus, the AMD library libamd.a |
|||
* can be safely used in either a stand-alone C program or in another |
|||
* mexFunction, without any change. |
|||
*/ |
|||
|
|||
/* |
|||
AMD will be exceedingly slow when running in debug mode. The next three |
|||
lines ensure that debugging is turned off. |
|||
*/ |
|||
#ifndef NDEBUG |
|||
#define NDEBUG |
|||
#endif |
|||
|
|||
/* |
|||
To enable debugging, uncomment the following line: |
|||
#undef NDEBUG |
|||
*/ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* ANSI include files */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* from stdlib.h: size_t, malloc, free, realloc, and calloc */ |
|||
#include <stdlib.h> |
|||
|
|||
#if !defined(NPRINT) || !defined(NDEBUG) |
|||
/* from stdio.h: printf. Not included if NPRINT is defined at compile time. |
|||
* fopen and fscanf are used when debugging. */ |
|||
#include <stdio.h> |
|||
#endif |
|||
|
|||
/* from limits.h: INT_MAX and LONG_MAX */ |
|||
#include <limits.h> |
|||
|
|||
/* from math.h: sqrt */ |
|||
#include <math.h> |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* MATLAB include files (only if being used in or via MATLAB) */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#ifdef MATLAB_MEX_FILE |
|||
#include "matrix.h" |
|||
#include "mex.h" |
|||
#endif |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* basic definitions */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#ifdef FLIP |
|||
#undef FLIP |
|||
#endif |
|||
|
|||
#ifdef MAX |
|||
#undef MAX |
|||
#endif |
|||
|
|||
#ifdef MIN |
|||
#undef MIN |
|||
#endif |
|||
|
|||
#ifdef EMPTY |
|||
#undef EMPTY |
|||
#endif |
|||
|
|||
#ifdef GLOBAL |
|||
#undef GLOBAL |
|||
#endif |
|||
|
|||
#ifdef PRIVATE |
|||
#undef PRIVATE |
|||
#endif |
|||
|
|||
/* FLIP is a "negation about -1", and is used to mark an integer i that is |
|||
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY |
|||
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i |
|||
* for all integers i. UNFLIP (i) is >= EMPTY. */ |
|||
#define EMPTY (-1) |
|||
#define FLIP(i) (-(i)-2) |
|||
#define UNFLIP(i) ((i < EMPTY) ? FLIP (i) : (i)) |
|||
|
|||
/* for integer MAX/MIN, or for doubles when we don't care how NaN's behave: */ |
|||
#define MAX(a,b) (((a) > (b)) ? (a) : (b)) |
|||
#define MIN(a,b) (((a) < (b)) ? (a) : (b)) |
|||
|
|||
/* logical expression of p implies q: */ |
|||
#define IMPLIES(p,q) (!(p) || (q)) |
|||
|
|||
/* Note that the IBM RS 6000 xlc predefines TRUE and FALSE in <types.h>. */ |
|||
/* The Compaq Alpha also predefines TRUE and FALSE. */ |
|||
#ifdef TRUE |
|||
#undef TRUE |
|||
#endif |
|||
#ifdef FALSE |
|||
#undef FALSE |
|||
#endif |
|||
|
|||
#define TRUE (1) |
|||
#define FALSE (0) |
|||
#define PRIVATE static |
|||
#define GLOBAL |
|||
#define EMPTY (-1) |
|||
|
|||
/* Note that Linux's gcc 2.96 defines NULL as ((void *) 0), but other */ |
|||
/* compilers (even gcc 2.95.2 on Solaris) define NULL as 0 or (0). We */ |
|||
/* need to use the ANSI standard value of 0. */ |
|||
#ifdef NULL |
|||
#undef NULL |
|||
#endif |
|||
|
|||
#define NULL 0 |
|||
|
|||
/* largest value of size_t */ |
|||
#ifndef SIZE_T_MAX |
|||
#define SIZE_T_MAX ((size_t) (-1)) |
|||
#endif |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* integer type for AMD: int or UF_long */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* define UF_long */ |
|||
#include "UFconfig.h" |
|||
|
|||
#if defined (DLONG) || defined (ZLONG) |
|||
|
|||
#define Int UF_long |
|||
#define ID UF_long_id |
|||
#define Int_MAX UF_long_max |
|||
|
|||
#define AMD_order amd_l_order |
|||
#define AMD_defaults amd_l_defaults |
|||
#define AMD_control amd_l_control |
|||
#define AMD_info amd_l_info |
|||
#define AMD_1 amd_l1 |
|||
#define AMD_2 amd_l2 |
|||
#define AMD_valid amd_l_valid |
|||
#define AMD_aat amd_l_aat |
|||
#define AMD_postorder amd_l_postorder |
|||
#define AMD_post_tree amd_l_post_tree |
|||
#define AMD_dump amd_l_dump |
|||
#define AMD_debug amd_l_debug |
|||
#define AMD_debug_init amd_l_debug_init |
|||
#define AMD_preprocess amd_l_preprocess |
|||
|
|||
#else |
|||
|
|||
#define Int int |
|||
#define ID "%d" |
|||
#define Int_MAX INT_MAX |
|||
|
|||
#define AMD_order amd_order |
|||
#define AMD_defaults amd_defaults |
|||
#define AMD_control amd_control |
|||
#define AMD_info amd_info |
|||
#define AMD_1 amd_1 |
|||
#define AMD_2 amd_2 |
|||
#define AMD_valid amd_valid |
|||
#define AMD_aat amd_aat |
|||
#define AMD_postorder amd_postorder |
|||
#define AMD_post_tree amd_post_tree |
|||
#define AMD_dump amd_dump |
|||
#define AMD_debug amd_debug |
|||
#define AMD_debug_init amd_debug_init |
|||
#define AMD_preprocess amd_preprocess |
|||
|
|||
#endif |
|||
|
|||
/* ========================================================================= */ |
|||
/* === PRINTF macro ======================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* All output goes through the PRINTF macro. */ |
|||
#define PRINTF(params) { if (amd_printf != NULL) (void) amd_printf params ; } |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD routine definitions (user-callable) */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#include "amd.h" |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD routine definitions (not user-callable) */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
GLOBAL size_t AMD_aat |
|||
( |
|||
Int n, |
|||
const Int Ap [ ], |
|||
const Int Ai [ ], |
|||
Int Len [ ], |
|||
Int Tp [ ], |
|||
double Info [ ] |
|||
) ; |
|||
|
|||
GLOBAL void AMD_1 |
|||
( |
|||
Int n, |
|||
const Int Ap [ ], |
|||
const Int Ai [ ], |
|||
Int P [ ], |
|||
Int Pinv [ ], |
|||
Int Len [ ], |
|||
Int slen, |
|||
Int S [ ], |
|||
double Control [ ], |
|||
double Info [ ] |
|||
) ; |
|||
|
|||
GLOBAL void AMD_postorder |
|||
( |
|||
Int nn, |
|||
Int Parent [ ], |
|||
Int Npiv [ ], |
|||
Int Fsize [ ], |
|||
Int Order [ ], |
|||
Int Child [ ], |
|||
Int Sibling [ ], |
|||
Int Stack [ ] |
|||
) ; |
|||
|
|||
GLOBAL Int AMD_post_tree |
|||
( |
|||
Int root, |
|||
Int k, |
|||
Int Child [ ], |
|||
const Int Sibling [ ], |
|||
Int Order [ ], |
|||
Int Stack [ ] |
|||
#ifndef NDEBUG |
|||
, Int nn |
|||
#endif |
|||
) ; |
|||
|
|||
GLOBAL void AMD_preprocess |
|||
( |
|||
Int n, |
|||
const Int Ap [ ], |
|||
const Int Ai [ ], |
|||
Int Rp [ ], |
|||
Int Ri [ ], |
|||
Int W [ ], |
|||
Int Flag [ ] |
|||
) ; |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* debugging definitions */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
#ifndef NDEBUG |
|||
|
|||
/* from assert.h: assert macro */ |
|||
#include <assert.h> |
|||
|
|||
#ifndef EXTERN |
|||
#define EXTERN extern |
|||
#endif |
|||
|
|||
EXTERN Int AMD_debug ; |
|||
|
|||
GLOBAL void AMD_debug_init ( char *s ) ; |
|||
|
|||
GLOBAL void AMD_dump |
|||
( |
|||
Int n, |
|||
Int Pe [ ], |
|||
Int Iw [ ], |
|||
Int Len [ ], |
|||
Int iwlen, |
|||
Int pfree, |
|||
Int Nv [ ], |
|||
Int Next [ ], |
|||
Int Last [ ], |
|||
Int Head [ ], |
|||
Int Elen [ ], |
|||
Int Degree [ ], |
|||
Int W [ ], |
|||
Int nel |
|||
) ; |
|||
|
|||
#ifdef ASSERT |
|||
#undef ASSERT |
|||
#endif |
|||
|
|||
/* Use mxAssert if AMD is compiled into a mexFunction */ |
|||
#ifdef MATLAB_MEX_FILE |
|||
#define ASSERT(expression) (mxAssert ((expression), "")) |
|||
#else |
|||
#define ASSERT(expression) (assert (expression)) |
|||
#endif |
|||
|
|||
#define AMD_DEBUG0(params) { PRINTF (params) ; } |
|||
#define AMD_DEBUG1(params) { if (AMD_debug >= 1) PRINTF (params) ; } |
|||
#define AMD_DEBUG2(params) { if (AMD_debug >= 2) PRINTF (params) ; } |
|||
#define AMD_DEBUG3(params) { if (AMD_debug >= 3) PRINTF (params) ; } |
|||
#define AMD_DEBUG4(params) { if (AMD_debug >= 4) PRINTF (params) ; } |
|||
|
|||
#else |
|||
|
|||
/* no debugging */ |
|||
#define ASSERT(expression) |
|||
#define AMD_DEBUG0(params) |
|||
#define AMD_DEBUG1(params) |
|||
#define AMD_DEBUG2(params) |
|||
#define AMD_DEBUG3(params) |
|||
#define AMD_DEBUG4(params) |
|||
|
|||
#endif |
|||
@ -0,0 +1,200 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_order =========================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* User-callable AMD minimum degree ordering routine. See amd.h for |
|||
* documentation. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
/* ========================================================================= */ |
|||
/* === AMD_order =========================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
GLOBAL Int AMD_order |
|||
( |
|||
Int n, |
|||
const Int Ap [ ], |
|||
const Int Ai [ ], |
|||
Int P [ ], |
|||
double Control [ ], |
|||
double Info [ ] |
|||
) |
|||
{ |
|||
Int *Len, *S, nz, i, *Pinv, info, status, *Rp, *Ri, *Cp, *Ci, ok ; |
|||
size_t nzaat, slen ; |
|||
double mem = 0 ; |
|||
|
|||
#ifndef NDEBUG |
|||
AMD_debug_init ("amd") ; |
|||
#endif |
|||
|
|||
/* clear the Info array, if it exists */ |
|||
info = Info != (double *) NULL ; |
|||
if (info) |
|||
{ |
|||
for (i = 0 ; i < AMD_INFO ; i++) |
|||
{ |
|||
Info [i] = EMPTY ; |
|||
} |
|||
Info [AMD_N] = n ; |
|||
Info [AMD_STATUS] = AMD_OK ; |
|||
} |
|||
|
|||
/* make sure inputs exist and n is >= 0 */ |
|||
if (Ai == (Int *) NULL || Ap == (Int *) NULL || P == (Int *) NULL || n < 0) |
|||
{ |
|||
if (info) Info [AMD_STATUS] = AMD_INVALID ; |
|||
return (AMD_INVALID) ; /* arguments are invalid */ |
|||
} |
|||
|
|||
if (n == 0) |
|||
{ |
|||
return (AMD_OK) ; /* n is 0 so there's nothing to do */ |
|||
} |
|||
|
|||
nz = Ap [n] ; |
|||
if (info) |
|||
{ |
|||
Info [AMD_NZ] = nz ; |
|||
} |
|||
if (nz < 0) |
|||
{ |
|||
if (info) Info [AMD_STATUS] = AMD_INVALID ; |
|||
return (AMD_INVALID) ; |
|||
} |
|||
|
|||
/* check if n or nz will cause size_t overflow */ |
|||
if (((size_t) n) >= SIZE_T_MAX / sizeof (Int) |
|||
|| ((size_t) nz) >= SIZE_T_MAX / sizeof (Int)) |
|||
{ |
|||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; |
|||
return (AMD_OUT_OF_MEMORY) ; /* problem too large */ |
|||
} |
|||
|
|||
/* check the input matrix: AMD_OK, AMD_INVALID, or AMD_OK_BUT_JUMBLED */ |
|||
status = AMD_valid (n, n, Ap, Ai) ; |
|||
|
|||
if (status == AMD_INVALID) |
|||
{ |
|||
if (info) Info [AMD_STATUS] = AMD_INVALID ; |
|||
return (AMD_INVALID) ; /* matrix is invalid */ |
|||
} |
|||
|
|||
/* allocate two size-n integer workspaces */ |
|||
Len = amd_malloc (n * sizeof (Int)) ; |
|||
Pinv = amd_malloc (n * sizeof (Int)) ; |
|||
mem += n ; |
|||
mem += n ; |
|||
if (!Len || !Pinv) |
|||
{ |
|||
/* :: out of memory :: */ |
|||
amd_free (Len) ; |
|||
amd_free (Pinv) ; |
|||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; |
|||
return (AMD_OUT_OF_MEMORY) ; |
|||
} |
|||
|
|||
if (status == AMD_OK_BUT_JUMBLED) |
|||
{ |
|||
/* sort the input matrix and remove duplicate entries */ |
|||
AMD_DEBUG1 (("Matrix is jumbled\n")) ; |
|||
Rp = amd_malloc ((n+1) * sizeof (Int)) ; |
|||
Ri = amd_malloc (MAX (nz,1) * sizeof (Int)) ; |
|||
mem += (n+1) ; |
|||
mem += MAX (nz,1) ; |
|||
if (!Rp || !Ri) |
|||
{ |
|||
/* :: out of memory :: */ |
|||
amd_free (Rp) ; |
|||
amd_free (Ri) ; |
|||
amd_free (Len) ; |
|||
amd_free (Pinv) ; |
|||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; |
|||
return (AMD_OUT_OF_MEMORY) ; |
|||
} |
|||
/* use Len and Pinv as workspace to create R = A' */ |
|||
AMD_preprocess (n, Ap, Ai, Rp, Ri, Len, Pinv) ; |
|||
Cp = Rp ; |
|||
Ci = Ri ; |
|||
} |
|||
else |
|||
{ |
|||
/* order the input matrix as-is. No need to compute R = A' first */ |
|||
Rp = NULL ; |
|||
Ri = NULL ; |
|||
Cp = (Int *) Ap ; |
|||
Ci = (Int *) Ai ; |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* determine the symmetry and count off-diagonal nonzeros in A+A' */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
nzaat = AMD_aat (n, Cp, Ci, Len, P, Info) ; |
|||
AMD_DEBUG1 (("nzaat: %g\n", (double) nzaat)) ; |
|||
ASSERT ((MAX (nz-n, 0) <= nzaat) && (nzaat <= 2 * (size_t) nz)) ; |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* allocate workspace for matrix, elbow room, and 6 size-n vectors */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
S = NULL ; |
|||
slen = nzaat ; /* space for matrix */ |
|||
ok = ((slen + nzaat/5) >= slen) ; /* check for size_t overflow */ |
|||
slen += nzaat/5 ; /* add elbow room */ |
|||
for (i = 0 ; ok && i < 7 ; i++) |
|||
{ |
|||
ok = ((slen + n) > slen) ; /* check for size_t overflow */ |
|||
slen += n ; /* size-n elbow room, 6 size-n work */ |
|||
} |
|||
mem += slen ; |
|||
ok = ok && (slen < SIZE_T_MAX / sizeof (Int)) ; /* check for overflow */ |
|||
ok = ok && (slen < Int_MAX) ; /* S[i] for Int i must be OK */ |
|||
if (ok) |
|||
{ |
|||
S = amd_malloc (slen * sizeof (Int)) ; |
|||
} |
|||
AMD_DEBUG1 (("slen %g\n", (double) slen)) ; |
|||
if (!S) |
|||
{ |
|||
/* :: out of memory :: (or problem too large) */ |
|||
amd_free (Rp) ; |
|||
amd_free (Ri) ; |
|||
amd_free (Len) ; |
|||
amd_free (Pinv) ; |
|||
if (info) Info [AMD_STATUS] = AMD_OUT_OF_MEMORY ; |
|||
return (AMD_OUT_OF_MEMORY) ; |
|||
} |
|||
if (info) |
|||
{ |
|||
/* memory usage, in bytes. */ |
|||
Info [AMD_MEMORY] = mem * sizeof (Int) ; |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* order the matrix */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
AMD_1 (n, Cp, Ci, P, Pinv, Len, slen, S, Control, Info) ; |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* free the workspace */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
amd_free (Rp) ; |
|||
amd_free (Ri) ; |
|||
amd_free (Len) ; |
|||
amd_free (Pinv) ; |
|||
amd_free (S) ; |
|||
if (info) Info [AMD_STATUS] = status ; |
|||
return (status) ; /* successful ordering */ |
|||
} |
|||
@ -0,0 +1,121 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_post_tree ======================================================= */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Post-ordering of a supernodal elimination tree. */ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL Int AMD_post_tree |
|||
( |
|||
Int root, /* root of the tree */ |
|||
Int k, /* start numbering at k */ |
|||
Int Child [ ], /* input argument of size nn, undefined on |
|||
* output. Child [i] is the head of a link |
|||
* list of all nodes that are children of node |
|||
* i in the tree. */ |
|||
const Int Sibling [ ], /* input argument of size nn, not modified. |
|||
* If f is a node in the link list of the |
|||
* children of node i, then Sibling [f] is the |
|||
* next child of node i. |
|||
*/ |
|||
Int Order [ ], /* output order, of size nn. Order [i] = k |
|||
* if node i is the kth node of the reordered |
|||
* tree. */ |
|||
Int Stack [ ] /* workspace of size nn */ |
|||
#ifndef NDEBUG |
|||
, Int nn /* nodes are in the range 0..nn-1. */ |
|||
#endif |
|||
) |
|||
{ |
|||
Int f, head, h, i ; |
|||
|
|||
#if 0 |
|||
/* --------------------------------------------------------------------- */ |
|||
/* recursive version (Stack [ ] is not used): */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
/* this is simple, but can caouse stack overflow if nn is large */ |
|||
i = root ; |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
k = AMD_post_tree (f, k, Child, Sibling, Order, Stack, nn) ; |
|||
} |
|||
Order [i] = k++ ; |
|||
return (k) ; |
|||
#endif |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* non-recursive version, using an explicit stack */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
/* push root on the stack */ |
|||
head = 0 ; |
|||
Stack [0] = root ; |
|||
|
|||
while (head >= 0) |
|||
{ |
|||
/* get head of stack */ |
|||
ASSERT (head < nn) ; |
|||
i = Stack [head] ; |
|||
AMD_DEBUG1 (("head of stack "ID" \n", i)) ; |
|||
ASSERT (i >= 0 && i < nn) ; |
|||
|
|||
if (Child [i] != EMPTY) |
|||
{ |
|||
/* the children of i are not yet ordered */ |
|||
/* push each child onto the stack in reverse order */ |
|||
/* so that small ones at the head of the list get popped first */ |
|||
/* and the biggest one at the end of the list gets popped last */ |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
head++ ; |
|||
ASSERT (head < nn) ; |
|||
ASSERT (f >= 0 && f < nn) ; |
|||
} |
|||
h = head ; |
|||
ASSERT (head < nn) ; |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
ASSERT (h > 0) ; |
|||
Stack [h--] = f ; |
|||
AMD_DEBUG1 (("push "ID" on stack\n", f)) ; |
|||
ASSERT (f >= 0 && f < nn) ; |
|||
} |
|||
ASSERT (Stack [h] == i) ; |
|||
|
|||
/* delete child list so that i gets ordered next time we see it */ |
|||
Child [i] = EMPTY ; |
|||
} |
|||
else |
|||
{ |
|||
/* the children of i (if there were any) are already ordered */ |
|||
/* remove i from the stack and order it. Front i is kth front */ |
|||
head-- ; |
|||
AMD_DEBUG1 (("pop "ID" order "ID"\n", i, k)) ; |
|||
Order [i] = k++ ; |
|||
ASSERT (k <= nn) ; |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
AMD_DEBUG1 (("\nStack:")) ; |
|||
for (h = head ; h >= 0 ; h--) |
|||
{ |
|||
Int j = Stack [h] ; |
|||
AMD_DEBUG1 ((" "ID, j)) ; |
|||
ASSERT (j >= 0 && j < nn) ; |
|||
} |
|||
AMD_DEBUG1 (("\n\n")) ; |
|||
ASSERT (head < nn) ; |
|||
#endif |
|||
|
|||
} |
|||
return (k) ; |
|||
} |
|||
@ -0,0 +1,207 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_postorder ======================================================= */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Perform a postordering (via depth-first search) of an assembly tree. */ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL void AMD_postorder |
|||
( |
|||
/* inputs, not modified on output: */ |
|||
Int nn, /* nodes are in the range 0..nn-1 */ |
|||
Int Parent [ ], /* Parent [j] is the parent of j, or EMPTY if root */ |
|||
Int Nv [ ], /* Nv [j] > 0 number of pivots represented by node j, |
|||
* or zero if j is not a node. */ |
|||
Int Fsize [ ], /* Fsize [j]: size of node j */ |
|||
|
|||
/* output, not defined on input: */ |
|||
Int Order [ ], /* output post-order */ |
|||
|
|||
/* workspaces of size nn: */ |
|||
Int Child [ ], |
|||
Int Sibling [ ], |
|||
Int Stack [ ] |
|||
) |
|||
{ |
|||
Int i, j, k, parent, frsize, f, fprev, maxfrsize, bigfprev, bigf, fnext ; |
|||
|
|||
for (j = 0 ; j < nn ; j++) |
|||
{ |
|||
Child [j] = EMPTY ; |
|||
Sibling [j] = EMPTY ; |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* place the children in link lists - bigger elements tend to be last */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
for (j = nn-1 ; j >= 0 ; j--) |
|||
{ |
|||
if (Nv [j] > 0) |
|||
{ |
|||
/* this is an element */ |
|||
parent = Parent [j] ; |
|||
if (parent != EMPTY) |
|||
{ |
|||
/* place the element in link list of the children its parent */ |
|||
/* bigger elements will tend to be at the end of the list */ |
|||
Sibling [j] = Child [parent] ; |
|||
Child [parent] = j ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
{ |
|||
Int nels, ff, nchild ; |
|||
AMD_DEBUG1 (("\n\n================================ AMD_postorder:\n")); |
|||
nels = 0 ; |
|||
for (j = 0 ; j < nn ; j++) |
|||
{ |
|||
if (Nv [j] > 0) |
|||
{ |
|||
AMD_DEBUG1 (( ""ID" : nels "ID" npiv "ID" size "ID |
|||
" parent "ID" maxfr "ID"\n", j, nels, |
|||
Nv [j], Fsize [j], Parent [j], Fsize [j])) ; |
|||
/* this is an element */ |
|||
/* dump the link list of children */ |
|||
nchild = 0 ; |
|||
AMD_DEBUG1 ((" Children: ")) ; |
|||
for (ff = Child [j] ; ff != EMPTY ; ff = Sibling [ff]) |
|||
{ |
|||
AMD_DEBUG1 ((ID" ", ff)) ; |
|||
ASSERT (Parent [ff] == j) ; |
|||
nchild++ ; |
|||
ASSERT (nchild < nn) ; |
|||
} |
|||
AMD_DEBUG1 (("\n")) ; |
|||
parent = Parent [j] ; |
|||
if (parent != EMPTY) |
|||
{ |
|||
ASSERT (Nv [parent] > 0) ; |
|||
} |
|||
nels++ ; |
|||
} |
|||
} |
|||
} |
|||
AMD_DEBUG1 (("\n\nGo through the children of each node, and put\n" |
|||
"the biggest child last in each list:\n")) ; |
|||
#endif |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* place the largest child last in the list of children for each node */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
for (i = 0 ; i < nn ; i++) |
|||
{ |
|||
if (Nv [i] > 0 && Child [i] != EMPTY) |
|||
{ |
|||
|
|||
#ifndef NDEBUG |
|||
Int nchild ; |
|||
AMD_DEBUG1 (("Before partial sort, element "ID"\n", i)) ; |
|||
nchild = 0 ; |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
ASSERT (f >= 0 && f < nn) ; |
|||
AMD_DEBUG1 ((" f: "ID" size: "ID"\n", f, Fsize [f])) ; |
|||
nchild++ ; |
|||
ASSERT (nchild <= nn) ; |
|||
} |
|||
#endif |
|||
|
|||
/* find the biggest element in the child list */ |
|||
fprev = EMPTY ; |
|||
maxfrsize = EMPTY ; |
|||
bigfprev = EMPTY ; |
|||
bigf = EMPTY ; |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
ASSERT (f >= 0 && f < nn) ; |
|||
frsize = Fsize [f] ; |
|||
if (frsize >= maxfrsize) |
|||
{ |
|||
/* this is the biggest seen so far */ |
|||
maxfrsize = frsize ; |
|||
bigfprev = fprev ; |
|||
bigf = f ; |
|||
} |
|||
fprev = f ; |
|||
} |
|||
ASSERT (bigf != EMPTY) ; |
|||
|
|||
fnext = Sibling [bigf] ; |
|||
|
|||
AMD_DEBUG1 (("bigf "ID" maxfrsize "ID" bigfprev "ID" fnext "ID |
|||
" fprev " ID"\n", bigf, maxfrsize, bigfprev, fnext, fprev)) ; |
|||
|
|||
if (fnext != EMPTY) |
|||
{ |
|||
/* if fnext is EMPTY then bigf is already at the end of list */ |
|||
|
|||
if (bigfprev == EMPTY) |
|||
{ |
|||
/* delete bigf from the element of the list */ |
|||
Child [i] = fnext ; |
|||
} |
|||
else |
|||
{ |
|||
/* delete bigf from the middle of the list */ |
|||
Sibling [bigfprev] = fnext ; |
|||
} |
|||
|
|||
/* put bigf at the end of the list */ |
|||
Sibling [bigf] = EMPTY ; |
|||
ASSERT (Child [i] != EMPTY) ; |
|||
ASSERT (fprev != bigf) ; |
|||
ASSERT (fprev != EMPTY) ; |
|||
Sibling [fprev] = bigf ; |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
AMD_DEBUG1 (("After partial sort, element "ID"\n", i)) ; |
|||
for (f = Child [i] ; f != EMPTY ; f = Sibling [f]) |
|||
{ |
|||
ASSERT (f >= 0 && f < nn) ; |
|||
AMD_DEBUG1 ((" "ID" "ID"\n", f, Fsize [f])) ; |
|||
ASSERT (Nv [f] > 0) ; |
|||
nchild-- ; |
|||
} |
|||
ASSERT (nchild == 0) ; |
|||
#endif |
|||
|
|||
} |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* postorder the assembly tree */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
for (i = 0 ; i < nn ; i++) |
|||
{ |
|||
Order [i] = EMPTY ; |
|||
} |
|||
|
|||
k = 0 ; |
|||
|
|||
for (i = 0 ; i < nn ; i++) |
|||
{ |
|||
if (Parent [i] == EMPTY && Nv [i] > 0) |
|||
{ |
|||
AMD_DEBUG1 (("Root of assembly tree "ID"\n", i)) ; |
|||
k = AMD_post_tree (i, k, Child, Sibling, Order, Stack |
|||
#ifndef NDEBUG |
|||
, nn |
|||
#endif |
|||
) ; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,119 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_preprocess ====================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Sorts, removes duplicate entries, and transposes from the nonzero pattern of |
|||
* a column-form matrix A, to obtain the matrix R. The input matrix can have |
|||
* duplicate entries and/or unsorted columns (AMD_valid (n,Ap,Ai) must not be |
|||
* AMD_INVALID). |
|||
* |
|||
* This input condition is NOT checked. This routine is not user-callable. |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
/* ========================================================================= */ |
|||
/* === AMD_preprocess ====================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* AMD_preprocess does not check its input for errors or allocate workspace. |
|||
* On input, the condition (AMD_valid (n,n,Ap,Ai) != AMD_INVALID) must hold. |
|||
*/ |
|||
|
|||
GLOBAL void AMD_preprocess |
|||
( |
|||
Int n, /* input matrix: A is n-by-n */ |
|||
const Int Ap [ ], /* size n+1 */ |
|||
const Int Ai [ ], /* size nz = Ap [n] */ |
|||
|
|||
/* output matrix R: */ |
|||
Int Rp [ ], /* size n+1 */ |
|||
Int Ri [ ], /* size nz (or less, if duplicates present) */ |
|||
|
|||
Int W [ ], /* workspace of size n */ |
|||
Int Flag [ ] /* workspace of size n */ |
|||
) |
|||
{ |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* local variables */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
Int i, j, p, p2 ; |
|||
|
|||
ASSERT (AMD_valid (n, n, Ap, Ai) != AMD_INVALID) ; |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* count the entries in each row of A (excluding duplicates) */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
W [i] = 0 ; /* # of nonzeros in row i (excl duplicates) */ |
|||
Flag [i] = EMPTY ; /* Flag [i] = j if i appears in column j */ |
|||
} |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
p2 = Ap [j+1] ; |
|||
for (p = Ap [j] ; p < p2 ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
if (Flag [i] != j) |
|||
{ |
|||
/* row index i has not yet appeared in column j */ |
|||
W [i]++ ; /* one more entry in row i */ |
|||
Flag [i] = j ; /* flag row index i as appearing in col j*/ |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* compute the row pointers for R */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
Rp [0] = 0 ; |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
Rp [i+1] = Rp [i] + W [i] ; |
|||
} |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
W [i] = Rp [i] ; |
|||
Flag [i] = EMPTY ; |
|||
} |
|||
|
|||
/* --------------------------------------------------------------------- */ |
|||
/* construct the row form matrix R */ |
|||
/* --------------------------------------------------------------------- */ |
|||
|
|||
/* R = row form of pattern of A */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
p2 = Ap [j+1] ; |
|||
for (p = Ap [j] ; p < p2 ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
if (Flag [i] != j) |
|||
{ |
|||
/* row index i has not yet appeared in column j */ |
|||
Ri [W [i]++] = j ; /* put col j in row i */ |
|||
Flag [i] = j ; /* flag row index i as appearing in col j*/ |
|||
} |
|||
} |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
ASSERT (AMD_valid (n, n, Rp, Ri) == AMD_OK) ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
ASSERT (W [j] == Rp [j+1]) ; |
|||
} |
|||
#endif |
|||
} |
|||
@ -0,0 +1,93 @@ |
|||
/* ========================================================================= */ |
|||
/* === AMD_valid =========================================================== */ |
|||
/* ========================================================================= */ |
|||
|
|||
/* ------------------------------------------------------------------------- */ |
|||
/* AMD, Copyright (c) Timothy A. Davis, */ |
|||
/* Patrick R. Amestoy, and Iain S. Duff. See ../README.txt for License. */ |
|||
/* email: davis at cise.ufl.edu CISE Department, Univ. of Florida. */ |
|||
/* web: http://www.cise.ufl.edu/research/sparse/amd */ |
|||
/* ------------------------------------------------------------------------- */ |
|||
|
|||
/* Check if a column-form matrix is valid or not. The matrix A is |
|||
* n_row-by-n_col. The row indices of entries in column j are in |
|||
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are: |
|||
* |
|||
* n_row >= 0 |
|||
* n_col >= 0 |
|||
* nz = Ap [n_col] >= 0 number of entries in the matrix |
|||
* Ap [0] == 0 |
|||
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col. |
|||
* Ai [0 ... nz-1] must be in the range 0 to n_row-1. |
|||
* |
|||
* If any of the above conditions hold, AMD_INVALID is returned. If the |
|||
* following condition holds, AMD_OK_BUT_JUMBLED is returned (a warning, |
|||
* not an error): |
|||
* |
|||
* row indices in Ai [Ap [j] ... Ap [j+1]-1] are not sorted in ascending |
|||
* order, and/or duplicate entries exist. |
|||
* |
|||
* Otherwise, AMD_OK is returned. |
|||
* |
|||
* In v1.2 and earlier, this function returned TRUE if the matrix was valid |
|||
* (now returns AMD_OK), or FALSE otherwise (now returns AMD_INVALID or |
|||
* AMD_OK_BUT_JUMBLED). |
|||
*/ |
|||
|
|||
#include "amd_internal.h" |
|||
|
|||
GLOBAL Int AMD_valid |
|||
( |
|||
/* inputs, not modified on output: */ |
|||
Int n_row, /* A is n_row-by-n_col */ |
|||
Int n_col, |
|||
const Int Ap [ ], /* column pointers of A, of size n_col+1 */ |
|||
const Int Ai [ ] /* row indices of A, of size nz = Ap [n_col] */ |
|||
) |
|||
{ |
|||
Int nz, j, p1, p2, ilast, i, p, result = AMD_OK ; |
|||
|
|||
if (n_row < 0 || n_col < 0 || Ap == NULL || Ai == NULL) |
|||
{ |
|||
return (AMD_INVALID) ; |
|||
} |
|||
nz = Ap [n_col] ; |
|||
if (Ap [0] != 0 || nz < 0) |
|||
{ |
|||
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */ |
|||
AMD_DEBUG0 (("column 0 pointer bad or nz < 0\n")) ; |
|||
return (AMD_INVALID) ; |
|||
} |
|||
for (j = 0 ; j < n_col ; j++) |
|||
{ |
|||
p1 = Ap [j] ; |
|||
p2 = Ap [j+1] ; |
|||
AMD_DEBUG2 (("\nColumn: "ID" p1: "ID" p2: "ID"\n", j, p1, p2)) ; |
|||
if (p1 > p2) |
|||
{ |
|||
/* column pointers must be ascending */ |
|||
AMD_DEBUG0 (("column "ID" pointer bad\n", j)) ; |
|||
return (AMD_INVALID) ; |
|||
} |
|||
ilast = EMPTY ; |
|||
for (p = p1 ; p < p2 ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
AMD_DEBUG3 (("row: "ID"\n", i)) ; |
|||
if (i < 0 || i >= n_row) |
|||
{ |
|||
/* row index out of range */ |
|||
AMD_DEBUG0 (("index out of range, col "ID" row "ID"\n", j, i)); |
|||
return (AMD_INVALID) ; |
|||
} |
|||
if (i <= ilast) |
|||
{ |
|||
/* row index unsorted, or duplicate entry present */ |
|||
AMD_DEBUG1 (("index unsorted/dupl col "ID" row "ID"\n", j, i)); |
|||
result = AMD_OK_BUT_JUMBLED ; |
|||
} |
|||
ilast = i ; |
|||
} |
|||
} |
|||
return (result) ; |
|||
} |
|||
@ -0,0 +1,263 @@ |
|||
/* ========================================================================== */ |
|||
/* === BTF package ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* BTF_MAXTRANS: find a column permutation Q to give A*Q a zero-free diagonal |
|||
* BTF_STRONGCOMP: find a symmetric permutation P to put P*A*P' into block |
|||
* upper triangular form. |
|||
* BTF_ORDER: do both of the above (btf_maxtrans then btf_strongcomp). |
|||
* |
|||
* Copyright (c) 2004-2007. Tim Davis, University of Florida, |
|||
* with support from Sandia National Laboratories. All Rights Reserved. |
|||
*/ |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === BTF_MAXTRANS ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* BTF_MAXTRANS: finds a permutation of the columns of a matrix so that it has a |
|||
* zero-free diagonal. The input is an m-by-n sparse matrix in compressed |
|||
* column form. The array Ap of size n+1 gives the starting and ending |
|||
* positions of the columns in the array Ai. Ap[0] must be zero. The array Ai |
|||
* contains the row indices of the nonzeros of the matrix A, and is of size |
|||
* Ap[n]. The row indices of column j are located in Ai[Ap[j] ... Ap[j+1]-1]. |
|||
* Row indices must be in the range 0 to m-1. Duplicate entries may be present |
|||
* in any given column. The input matrix is not checked for validity (row |
|||
* indices out of the range 0 to m-1 will lead to an undeterminate result - |
|||
* possibly a core dump, for example). Row indices in any given column need |
|||
* not be in sorted order. However, if they are sorted and the matrix already |
|||
* has a zero-free diagonal, then the identity permutation is returned. |
|||
* |
|||
* The output of btf_maxtrans is an array Match of size n. If row i is matched |
|||
* with column j, then A(i,j) is nonzero, and then Match[i] = j. If the matrix |
|||
* is structurally nonsingular, all entries in the Match array are unique, and |
|||
* Match can be viewed as a column permutation if A is square. That is, column |
|||
* k of the original matrix becomes column Match[k] of the permuted matrix. In |
|||
* MATLAB, this can be expressed as (for non-structurally singular matrices): |
|||
* |
|||
* Match = maxtrans (A) ; |
|||
* B = A (:, Match) ; |
|||
* |
|||
* except of course here the A matrix and Match vector are all 0-based (rows |
|||
* and columns in the range 0 to n-1), not 1-based (rows/cols in range 1 to n). |
|||
* The MATLAB dmperm routine returns a row permutation. See the maxtrans |
|||
* mexFunction for more details. |
|||
* |
|||
* If row i is not matched to any column, then Match[i] is == -1. The |
|||
* btf_maxtrans routine returns the number of nonzeros on diagonal of the |
|||
* permuted matrix. |
|||
* |
|||
* In the MATLAB mexFunction interface to btf_maxtrans, 1 is added to the Match |
|||
* array to obtain a 1-based permutation. Thus, in MATLAB where A is m-by-n: |
|||
* |
|||
* q = maxtrans (A) ; % has entries in the range 0:n |
|||
* q % a column permutation (only if sprank(A)==n) |
|||
* B = A (:, q) ; % permuted matrix (only if sprank(A)==n) |
|||
* sum (q > 0) ; % same as "sprank (A)" |
|||
* |
|||
* This behaviour differs from p = dmperm (A) in MATLAB, which returns the |
|||
* matching as p(j)=i if row i and column j are matched, and p(j)=0 if column j |
|||
* is unmatched. |
|||
* |
|||
* p = dmperm (A) ; % has entries in the range 0:m |
|||
* p % a row permutation (only if sprank(A)==m) |
|||
* B = A (p, :) ; % permuted matrix (only if sprank(A)==m) |
|||
* sum (p > 0) ; % definition of sprank (A) |
|||
* |
|||
* This algorithm is based on the paper "On Algorithms for obtaining a maximum |
|||
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1, |
|||
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal", |
|||
* same issue, pp. 387-390. Algorithm 575 is MC21A in the Harwell Subroutine |
|||
* Library. This code is not merely a translation of the Fortran code into C. |
|||
* It is a completely new implementation of the basic underlying method (depth |
|||
* first search over a subgraph with nodes corresponding to columns matched so |
|||
* far, and cheap matching). This code was written with minimal observation of |
|||
* the MC21A/B code itself. See comments below for a comparison between the |
|||
* maxtrans and MC21A/B codes. |
|||
* |
|||
* This routine operates on a column-form matrix and produces a column |
|||
* permutation. MC21A uses a row-form matrix and produces a row permutation. |
|||
* The difference is merely one of convention in the comments and interpretation |
|||
* of the inputs and outputs. If you want a row permutation, simply pass a |
|||
* compressed-row sparse matrix to this routine and you will get a row |
|||
* permutation (just like MC21A). Similarly, you can pass a column-oriented |
|||
* matrix to MC21A and it will happily return a column permutation. |
|||
*/ |
|||
|
|||
#ifndef _BTF_H |
|||
#define _BTF_H |
|||
|
|||
/* make it easy for C++ programs to include BTF */ |
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
#include "UFconfig.h" |
|||
|
|||
int btf_maxtrans /* returns # of columns matched */ |
|||
( |
|||
/* --- input, not modified: --- */ |
|||
int nrow, /* A is nrow-by-ncol in compressed column form */ |
|||
int ncol, |
|||
int Ap [ ], /* size ncol+1 */ |
|||
int Ai [ ], /* size nz = Ap [ncol] */ |
|||
double maxwork, /* maximum amount of work to do is maxwork*nnz(A); no limit |
|||
* if <= 0 */ |
|||
|
|||
/* --- output, not defined on input --- */ |
|||
double *work, /* work = -1 if maxwork > 0 and the total work performed |
|||
* reached the maximum of maxwork*nnz(A). |
|||
* Otherwise, work = the total work performed. */ |
|||
|
|||
int Match [ ], /* size nrow. Match [i] = j if column j matched to row i |
|||
* (see above for the singular-matrix case) */ |
|||
|
|||
/* --- workspace, not defined on input or output --- */ |
|||
int Work [ ] /* size 5*ncol */ |
|||
) ; |
|||
|
|||
/* long integer version (all "int" parameters become "UF_long") */ |
|||
UF_long btf_l_maxtrans (UF_long, UF_long, UF_long *, UF_long *, double, |
|||
double *, UF_long *, UF_long *) ; |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === BTF_STRONGCOMP ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* BTF_STRONGCOMP finds the strongly connected components of a graph, returning |
|||
* a symmetric permutation. The matrix A must be square, and is provided on |
|||
* input in compressed-column form (see BTF_MAXTRANS, above). The diagonal of |
|||
* the input matrix A (or A*Q if Q is provided on input) is ignored. |
|||
* |
|||
* If Q is not NULL on input, then the strongly connected components of A*Q are |
|||
* found. Q may be flagged on input, where Q[k] < 0 denotes a flagged column k. |
|||
* The permutation is j = BTF_UNFLIP (Q [k]). On output, Q is modified (the |
|||
* flags are preserved) so that P*A*Q is in block upper triangular form. |
|||
* |
|||
* If Q is NULL, then the permutation P is returned so that P*A*P' is in upper |
|||
* block triangular form. |
|||
* |
|||
* The vector R gives the block boundaries, where block b is in rows/columns |
|||
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the |
|||
* number of strongly connected components found. |
|||
*/ |
|||
|
|||
int btf_strongcomp /* return # of strongly connected components */ |
|||
( |
|||
/* input, not modified: */ |
|||
int n, /* A is n-by-n in compressed column form */ |
|||
int Ap [ ], /* size n+1 */ |
|||
int Ai [ ], /* size nz = Ap [n] */ |
|||
|
|||
/* optional input, modified (if present) on output: */ |
|||
int Q [ ], /* size n, input column permutation */ |
|||
|
|||
/* output, not defined on input */ |
|||
int P [ ], /* size n. P [k] = j if row and column j are kth row/col |
|||
* in permuted matrix. */ |
|||
|
|||
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ |
|||
|
|||
/* workspace, not defined on input or output */ |
|||
int Work [ ] /* size 4n */ |
|||
) ; |
|||
|
|||
UF_long btf_l_strongcomp (UF_long, UF_long *, UF_long *, UF_long *, UF_long *, |
|||
UF_long *, UF_long *) ; |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === BTF_ORDER ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* BTF_ORDER permutes a square matrix into upper block triangular form. It |
|||
* does this by first finding a maximum matching (or perhaps a limited matching |
|||
* if the work is limited), via the btf_maxtrans function. If a complete |
|||
* matching is not found, BTF_ORDER completes the permutation, but flags the |
|||
* columns of P*A*Q to denote which columns are not matched. If the matrix is |
|||
* structurally rank deficient, some of the entries on the diagonal of the |
|||
* permuted matrix will be zero. BTF_ORDER then calls btf_strongcomp to find |
|||
* the strongly-connected components. |
|||
* |
|||
* On output, P and Q are the row and column permutations, where i = P[k] if |
|||
* row i of A is the kth row of P*A*Q, and j = BTF_UNFLIP(Q[k]) if column j of |
|||
* A is the kth column of P*A*Q. If Q[k] < 0, then the (k,k)th entry in P*A*Q |
|||
* is structurally zero. |
|||
* |
|||
* The vector R gives the block boundaries, where block b is in rows/columns |
|||
* R[b] to R[b+1]-1 of the permuted matrix, and where b ranges from 1 to the |
|||
* number of strongly connected components found. |
|||
*/ |
|||
|
|||
int btf_order /* returns number of blocks found */ |
|||
( |
|||
/* --- input, not modified: --- */ |
|||
int n, /* A is n-by-n in compressed column form */ |
|||
int Ap [ ], /* size n+1 */ |
|||
int Ai [ ], /* size nz = Ap [n] */ |
|||
double maxwork, /* do at most maxwork*nnz(A) work in the maximum |
|||
* transversal; no limit if <= 0 */ |
|||
|
|||
/* --- output, not defined on input --- */ |
|||
double *work, /* return value from btf_maxtrans */ |
|||
int P [ ], /* size n, row permutation */ |
|||
int Q [ ], /* size n, column permutation */ |
|||
int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ |
|||
int *nmatch, /* # nonzeros on diagonal of P*A*Q */ |
|||
|
|||
/* --- workspace, not defined on input or output --- */ |
|||
int Work [ ] /* size 5n */ |
|||
) ; |
|||
|
|||
UF_long btf_l_order (UF_long, UF_long *, UF_long *, double , double *, |
|||
UF_long *, UF_long *, UF_long *, UF_long *, UF_long *) ; |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === BTF marking of singular columns ====================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* BTF_FLIP is a "negation about -1", and is used to mark an integer j |
|||
* that is normally non-negative. BTF_FLIP (-1) is -1. BTF_FLIP of |
|||
* a number > -1 is negative, and BTF_FLIP of a number < -1 is positive. |
|||
* BTF_FLIP (BTF_FLIP (j)) = j for all integers j. UNFLIP (j) acts |
|||
* like an "absolute value" operation, and is always >= -1. You can test |
|||
* whether or not an integer j is "flipped" with the BTF_ISFLIPPED (j) |
|||
* macro. |
|||
*/ |
|||
|
|||
#define BTF_FLIP(j) (-(j)-2) |
|||
#define BTF_ISFLIPPED(j) ((j) < -1) |
|||
#define BTF_UNFLIP(j) ((BTF_ISFLIPPED (j)) ? BTF_FLIP (j) : (j)) |
|||
|
|||
/* ========================================================================== */ |
|||
/* === BTF version ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* All versions of BTF include these definitions. |
|||
* As an example, to test if the version you are using is 1.2 or later: |
|||
* |
|||
* if (BTF_VERSION >= BTF_VERSION_CODE (1,2)) ... |
|||
* |
|||
* This also works during compile-time: |
|||
* |
|||
* #if (BTF >= BTF_VERSION_CODE (1,2)) |
|||
* printf ("This is version 1.2 or later\n") ; |
|||
* #else |
|||
* printf ("This is an early version\n") ; |
|||
* #endif |
|||
*/ |
|||
|
|||
#define BTF_DATE "Dec 7, 2011" |
|||
#define BTF_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) |
|||
#define BTF_MAIN_VERSION 1 |
|||
#define BTF_SUB_VERSION 1 |
|||
#define BTF_SUBSUB_VERSION 3 |
|||
#define BTF_VERSION BTF_VERSION_CODE(BTF_MAIN_VERSION,BTF_SUB_VERSION) |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif |
|||
@ -0,0 +1,64 @@ |
|||
/* ========================================================================== */ |
|||
/* === btf_internal include file ============================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
#ifndef _BTF_INTERNAL_H |
|||
#define _BTF_INTERNAL_H |
|||
|
|||
/* |
|||
* Copyright (c) 2004-2007. Tim Davis, University of Florida, |
|||
* with support from Sandia National Laboratories. All Rights Reserved. |
|||
*/ |
|||
|
|||
/* Not to be included in any user program. */ |
|||
|
|||
#ifdef DLONG |
|||
#define Int UF_long |
|||
#define Int_id UF_long_id |
|||
#define BTF(name) btf_l_ ## name |
|||
#else |
|||
#define Int int |
|||
#define Int_id "%d" |
|||
#define BTF(name) btf_ ## name |
|||
#endif |
|||
|
|||
/* ========================================================================== */ |
|||
/* make sure debugging and printing is turned off */ |
|||
|
|||
#ifndef NDEBUG |
|||
#define NDEBUG |
|||
#endif |
|||
#ifndef NPRINT |
|||
#define NPRINT |
|||
#endif |
|||
|
|||
/* To enable debugging and assertions, uncomment this line: |
|||
#undef NDEBUG |
|||
*/ |
|||
/* To enable diagnostic printing, uncomment this line: |
|||
#undef NPRINT |
|||
*/ |
|||
|
|||
/* ========================================================================== */ |
|||
|
|||
#include <stdio.h> |
|||
#include <assert.h> |
|||
#define ASSERT(a) assert(a) |
|||
|
|||
#undef TRUE |
|||
#undef FALSE |
|||
#undef PRINTF |
|||
#undef MIN |
|||
|
|||
#ifndef NPRINT |
|||
#define PRINTF(s) { printf s ; } ; |
|||
#else |
|||
#define PRINTF(s) |
|||
#endif |
|||
|
|||
#define TRUE 1 |
|||
#define FALSE 0 |
|||
#define EMPTY (-1) |
|||
#define MIN(a,b) (((a) < (b)) ? (a) : (b)) |
|||
|
|||
#endif |
|||
@ -0,0 +1,387 @@ |
|||
/* ========================================================================== */ |
|||
/* === BTF_MAXTRANS ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Finds a column permutation that maximizes the number of entries on the |
|||
* diagonal of a sparse matrix. See btf.h for more information. |
|||
* |
|||
* This function is identical to cs_maxtrans in CSparse, with the following |
|||
* exceptions: |
|||
* |
|||
* (1) cs_maxtrans finds both jmatch and imatch, where jmatch [i] = j and |
|||
* imatch [j] = i if row i is matched to column j. This function returns |
|||
* just jmatch (the Match array). The MATLAB interface to cs_maxtrans |
|||
* (the single-output cs_dmperm) returns imatch, not jmatch to the MATLAB |
|||
* caller. |
|||
* |
|||
* (2) cs_maxtrans includes a pre-pass that counts the number of non-empty |
|||
* rows and columns (m2 and n2, respectively), and computes the matching |
|||
* using the transpose of A if m2 < n2. cs_maxtrans also returns quickly |
|||
* if the diagonal of the matrix is already zero-free. This pre-pass |
|||
* allows cs_maxtrans to be much faster than maxtrans, if the use of the |
|||
* transpose is warranted. |
|||
* |
|||
* However, for square structurally non-singular matrices with one or more |
|||
* zeros on the diagonal, the pre-pass is a waste of time, and for these |
|||
* matrices, maxtrans can be twice as fast as cs_maxtrans. Since the |
|||
* maxtrans function is intended primarily for square matrices that are |
|||
* typically structurally nonsingular, the pre-pass is not included here. |
|||
* If this maxtrans function is used on a matrix with many more columns |
|||
* than rows, consider passing the transpose to this function, or use |
|||
* cs_maxtrans instead. |
|||
* |
|||
* (3) cs_maxtrans can operate as a randomized algorithm, to help avoid |
|||
* rare cases of excessive run-time. |
|||
* |
|||
* (4) this maxtrans function includes an option that limits the total work |
|||
* performed. If this limit is reached, the maximum transveral might not |
|||
* be found. |
|||
* |
|||
* Thus, for general usage, cs_maxtrans is preferred. For square matrices that |
|||
* are typically structurally non-singular, maxtrans is preferred. A partial |
|||
* maxtrans can still be very useful when solving a sparse linear system. |
|||
* |
|||
* Copyright (c) 2004-2007. Tim Davis, University of Florida, |
|||
* with support from Sandia National Laboratories. All Rights Reserved. |
|||
*/ |
|||
|
|||
#include "btf.h" |
|||
#include "btf_internal.h" |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === augment ============================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Perform a depth-first-search starting at column k, to find an augmenting |
|||
* path. An augmenting path is a sequence of row/column pairs (i1,k), (i2,j1), |
|||
* (i3,j2), ..., (i(s+1), js), such that all of the following properties hold: |
|||
* |
|||
* * column k is not matched to any row |
|||
* * entries in the path are nonzero |
|||
* * the pairs (i1,j1), (i2,j2), (i3,j3) ..., (is,js) have been |
|||
* previously matched to each other |
|||
* * (i(s+1), js) is nonzero, and row i(s+1) is not matched to any column |
|||
* |
|||
* Once this path is found, the matching can be changed to the set of pairs |
|||
* path. An augmenting path is a sequence of row/column pairs |
|||
* |
|||
* (i1,k), (i2,j1), (i3,j2), ..., (i(s+1), js) |
|||
* |
|||
* Once a row is matched with a column it remains matched with some column, but |
|||
* not necessarily the column it was first matched with. |
|||
* |
|||
* In the worst case, this function can examine every nonzero in A. Since it |
|||
* is called n times by maxtrans, the total time of maxtrans can be as high as |
|||
* O(n*nnz(A)). To limit this work, pass a value of maxwork > 0. Then at |
|||
* most O((maxwork+1)*nnz(A)) work will be performed; the maximum matching might |
|||
* not be found, however. |
|||
* |
|||
* This routine is very similar to the dfs routine in klu_kernel.c, in the |
|||
* KLU sparse LU factorization package. It is essentially identical to the |
|||
* cs_augment routine in CSparse, and its recursive version (augment function |
|||
* in cs_maxtransr_mex.c), except that this routine allows for the search to be |
|||
* terminated early if too much work is being performed. |
|||
* |
|||
* The algorithm is based on the paper "On Algorithms for obtaining a maximum |
|||
* transversal" by Iain Duff, ACM Trans. Mathematical Software, vol 7, no. 1, |
|||
* pp. 315-330, and "Algorithm 575: Permutations for a zero-free diagonal", |
|||
* same issue, pp. 387-390. The code here is a new implementation of that |
|||
* algorithm, with different data structures and control flow. After writing |
|||
* this code, I carefully compared my algorithm with MC21A/B (ACM Algorithm 575) |
|||
* Some of the comparisons are partial because I didn't dig deeply into all of |
|||
* the details of MC21A/B, such as how the stack is maintained. The following |
|||
* arguments are essentially identical between this code and MC21A: |
|||
* |
|||
* maxtrans MC21A,B |
|||
* -------- ------- |
|||
* n N identical |
|||
* k JORD identical |
|||
* Ap IP column / row pointers |
|||
* Ai ICN row / column indices |
|||
* Ap[n] LICN length of index array (# of nonzeros in A) |
|||
* Match IPERM output column / row permutation |
|||
* nmatch NUMNZ # of nonzeros on diagonal of permuted matrix |
|||
* Flag CV mark a node as visited by the depth-first-search |
|||
* |
|||
* The following are different, but analogous: |
|||
* |
|||
* Cheap ARP indicates what part of the a column / row has |
|||
* already been matched. |
|||
* |
|||
* The following arguments are very different: |
|||
* |
|||
* - LENR # of entries in each row/column (unused in maxtrans) |
|||
* Pstack OUT Pstack keeps track of where we are in the depth- |
|||
* first-search scan of column j. I think that OUT |
|||
* plays a similar role in MC21B, but I'm unsure. |
|||
* Istack PR keeps track of the rows in the path. PR is a link |
|||
* list, though, whereas Istack is a stack. Maxtrans |
|||
* does not use any link lists. |
|||
* Jstack OUT? PR? the stack for nodes in the path (unsure) |
|||
* |
|||
* The following control structures are roughly comparable: |
|||
* |
|||
* maxtrans MC21B |
|||
* -------- ----- |
|||
* for (k = 0 ; k < n ; k++) DO 100 JORD=1,N |
|||
* while (head >= 0) DO 70 K=1,JORD |
|||
* for (p = Cheap [j] ; ...) DO 20 II=IN1,IN2 |
|||
* for (p = head ; ...) DO 90 K=1,JORD |
|||
*/ |
|||
|
|||
static Int augment |
|||
( |
|||
Int k, /* which stage of the main loop we're in */ |
|||
Int Ap [ ], /* column pointers, size n+1 */ |
|||
Int Ai [ ], /* row indices, size nz = Ap [n] */ |
|||
Int Match [ ], /* size n, Match [i] = j if col j matched to i */ |
|||
Int Cheap [ ], /* rows Ai [Ap [j] .. Cheap [j]-1] alread matched */ |
|||
Int Flag [ ], /* Flag [j] = k if j already visited this stage */ |
|||
Int Istack [ ], /* size n. Row index stack. */ |
|||
Int Jstack [ ], /* size n. Column index stack. */ |
|||
Int Pstack [ ], /* size n. Keeps track of position in adjacency list */ |
|||
double *work, /* work performed by the depth-first-search */ |
|||
double maxwork /* maximum work allowed */ |
|||
) |
|||
{ |
|||
/* local variables, but "global" to all DFS levels: */ |
|||
Int found ; /* true if match found. */ |
|||
Int head ; /* top of stack */ |
|||
|
|||
/* variables that are purely local to any one DFS level: */ |
|||
Int j2 ; /* the next DFS goes to node j2 */ |
|||
Int pend ; /* one past the end of the adjacency list for node j */ |
|||
Int pstart ; |
|||
Int quick ; |
|||
|
|||
/* variables that need to be pushed then popped from the stack: */ |
|||
Int i ; /* the row tentatively matched to i if DFS successful */ |
|||
Int j ; /* the DFS is at the current node j */ |
|||
Int p ; /* current index into the adj. list for node j */ |
|||
/* the variables i, j, and p are stacked in Istack, Jstack, and Pstack */ |
|||
|
|||
quick = (maxwork > 0) ; |
|||
|
|||
/* start a DFS to find a match for column k */ |
|||
found = FALSE ; |
|||
i = EMPTY ; |
|||
head = 0 ; |
|||
Jstack [0] = k ; |
|||
ASSERT (Flag [k] != k) ; |
|||
|
|||
while (head >= 0) |
|||
{ |
|||
j = Jstack [head] ; |
|||
pend = Ap [j+1] ; |
|||
|
|||
if (Flag [j] != k) /* a node is not yet visited */ |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* prework for node j */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
/* first time that j has been visited */ |
|||
Flag [j] = k ; |
|||
/* cheap assignment: find the next unmatched row in col j. This |
|||
* loop takes at most O(nnz(A)) time for the sum total of all |
|||
* calls to augment. */ |
|||
for (p = Cheap [j] ; p < pend && !found ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
found = (Match [i] == EMPTY) ; |
|||
} |
|||
Cheap [j] = p ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
/* prepare for DFS */ |
|||
if (found) |
|||
{ |
|||
/* end of augmenting path, column j matched with row i */ |
|||
Istack [head] = i ; |
|||
break ; |
|||
} |
|||
/* set Pstack [head] to the first entry in column j to scan */ |
|||
Pstack [head] = Ap [j] ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* quick return if too much work done */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (quick && *work > maxwork) |
|||
{ |
|||
/* too much work has been performed; abort the search */ |
|||
return (EMPTY) ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* DFS for nodes adjacent to j */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
/* If cheap assignment not made, continue the depth-first search. All |
|||
* rows in column j are already matched. Add the adjacent nodes to the |
|||
* stack by iterating through until finding another non-visited node. |
|||
* |
|||
* It is the following loop that can force maxtrans to take |
|||
* O(n*nnz(A)) time. */ |
|||
|
|||
pstart = Pstack [head] ; |
|||
for (p = pstart ; p < pend ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
j2 = Match [i] ; |
|||
ASSERT (j2 != EMPTY) ; |
|||
if (Flag [j2] != k) |
|||
{ |
|||
/* Node j2 is not yet visited, start a depth-first search on |
|||
* node j2. Keep track of where we left off in the scan of adj |
|||
* list of node j so we can restart j where we left off. */ |
|||
Pstack [head] = p + 1 ; |
|||
/* Push j2 onto the stack and immediately break so we can |
|||
* recurse on node j2. Also keep track of row i which (if this |
|||
* search for an augmenting path works) will be matched with the |
|||
* current node j. */ |
|||
Istack [head] = i ; |
|||
Jstack [++head] = j2 ; |
|||
break ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* determine how much work was just performed */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
*work += (p - pstart + 1) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* node j is done, but the postwork is postponed - see below */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (p == pend) |
|||
{ |
|||
/* If all adjacent nodes of j are already visited, pop j from |
|||
* stack and continue. We failed to find a match. */ |
|||
head-- ; |
|||
} |
|||
} |
|||
|
|||
/* postwork for all nodes j in the stack */ |
|||
/* unwind the path and make the corresponding matches */ |
|||
if (found) |
|||
{ |
|||
for (p = head ; p >= 0 ; p--) |
|||
{ |
|||
j = Jstack [p] ; |
|||
i = Istack [p] ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* postwork for node j */ |
|||
/* -------------------------------------------------------------- */ |
|||
/* if found, match row i with column j */ |
|||
Match [i] = j ; |
|||
} |
|||
} |
|||
return (found) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === maxtrans ============================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
Int BTF(maxtrans) /* returns # of columns in the matching */ |
|||
( |
|||
/* --- input --- */ |
|||
Int nrow, /* A is nrow-by-ncol in compressed column form */ |
|||
Int ncol, |
|||
Int Ap [ ], /* size ncol+1 */ |
|||
Int Ai [ ], /* size nz = Ap [ncol] */ |
|||
double maxwork, /* do at most maxwork*nnz(A) work; no limit if <= 0. This |
|||
* work limit excludes the O(nnz(A)) cheap-match phase. */ |
|||
|
|||
/* --- output --- */ |
|||
double *work, /* work = -1 if maxwork > 0 and the total work performed |
|||
* reached the maximum of maxwork*nnz(A)). |
|||
* Otherwise, work = the total work performed. */ |
|||
|
|||
Int Match [ ], /* size nrow. Match [i] = j if column j matched to row i */ |
|||
|
|||
/* --- workspace --- */ |
|||
Int Work [ ] /* size 5*ncol */ |
|||
) |
|||
{ |
|||
Int *Cheap, *Flag, *Istack, *Jstack, *Pstack ; |
|||
Int i, j, k, nmatch, work_limit_reached, result ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get workspace and initialize */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Cheap = Work ; Work += ncol ; |
|||
Flag = Work ; Work += ncol ; |
|||
|
|||
/* stack for non-recursive depth-first search in augment function */ |
|||
Istack = Work ; Work += ncol ; |
|||
Jstack = Work ; Work += ncol ; |
|||
Pstack = Work ; |
|||
|
|||
/* in column j, rows Ai [Ap [j] .. Cheap [j]-1] are known to be matched */ |
|||
for (j = 0 ; j < ncol ; j++) |
|||
{ |
|||
Cheap [j] = Ap [j] ; |
|||
Flag [j] = EMPTY ; |
|||
} |
|||
|
|||
/* all rows and columns are currently unmatched */ |
|||
for (i = 0 ; i < nrow ; i++) |
|||
{ |
|||
Match [i] = EMPTY ; |
|||
} |
|||
|
|||
if (maxwork > 0) |
|||
{ |
|||
maxwork *= Ap [ncol] ; |
|||
} |
|||
*work = 0 ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* find a matching row for each column k */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
nmatch = 0 ; |
|||
work_limit_reached = FALSE ; |
|||
for (k = 0 ; k < ncol ; k++) |
|||
{ |
|||
/* find an augmenting path to match some row i to column k */ |
|||
result = augment (k, Ap, Ai, Match, Cheap, Flag, Istack, Jstack, Pstack, |
|||
work, maxwork) ; |
|||
if (result == TRUE) |
|||
{ |
|||
/* we found it. Match [i] = k for some row i has been done. */ |
|||
nmatch++ ; |
|||
} |
|||
else if (result == EMPTY) |
|||
{ |
|||
/* augment gave up because of too much work, and no match found */ |
|||
work_limit_reached = TRUE ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return the Match, and the # of matches made */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* At this point, row i is matched to j = Match [i] if j >= 0. i is an |
|||
* unmatched row if Match [i] == EMPTY. */ |
|||
|
|||
if (work_limit_reached) |
|||
{ |
|||
/* return -1 if the work limit of maxwork*nnz(A) was reached */ |
|||
*work = EMPTY ; |
|||
} |
|||
|
|||
return (nmatch) ; |
|||
} |
|||
@ -0,0 +1,132 @@ |
|||
/* ========================================================================== */ |
|||
/* === BTF_ORDER ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Find a permutation P and Q to permute a square sparse matrix into upper block |
|||
* triangular form. A(P,Q) will contain a zero-free diagonal if A has |
|||
* structural full-rank. Otherwise, the number of nonzeros on the diagonal of |
|||
* A(P,Q) will be maximized, and will equal the structural rank of A. |
|||
* |
|||
* Q[k] will be "flipped" if a zero-free diagonal was not found. Q[k] will be |
|||
* negative, and j = BTF_UNFLIP (Q [k]) gives the corresponding permutation. |
|||
* |
|||
* R defines the block boundaries of A(P,Q). The kth block consists of rows |
|||
* and columns R[k] to R[k+1]-1. |
|||
* |
|||
* If maxwork > 0 on input, then the work performed in btf_maxtrans is limited |
|||
* to maxwork*nnz(A) (excluding the "cheap match" phase, which can take another |
|||
* nnz(A) work). On output, the work parameter gives the actual work performed, |
|||
* or -1 if the limit was reached. In the latter case, the diagonal of A(P,Q) |
|||
* might not be zero-free, and the number of nonzeros on the diagonal of A(P,Q) |
|||
* might not be equal to the structural rank. |
|||
* |
|||
* See btf.h for more details. |
|||
* |
|||
* Copyright (c) 2004-2007. Tim Davis, University of Florida, |
|||
* with support from Sandia National Laboratories. All Rights Reserved. |
|||
*/ |
|||
|
|||
#include "btf.h" |
|||
#include "btf_internal.h" |
|||
|
|||
/* This function only operates on square matrices (either structurally full- |
|||
* rank, or structurally rank deficient). */ |
|||
|
|||
Int BTF(order) /* returns number of blocks found */ |
|||
( |
|||
/* input, not modified: */ |
|||
Int n, /* A is n-by-n in compressed column form */ |
|||
Int Ap [ ], /* size n+1 */ |
|||
Int Ai [ ], /* size nz = Ap [n] */ |
|||
double maxwork, /* do at most maxwork*nnz(A) work in the maximum |
|||
* transversal; no limit if <= 0 */ |
|||
|
|||
/* output, not defined on input */ |
|||
double *work, /* work performed in maxtrans, or -1 if limit reached */ |
|||
Int P [ ], /* size n, row permutation */ |
|||
Int Q [ ], /* size n, column permutation */ |
|||
Int R [ ], /* size n+1. block b is in rows/cols R[b] ... R[b+1]-1 */ |
|||
Int *nmatch, /* # nonzeros on diagonal of P*A*Q */ |
|||
|
|||
/* workspace, not defined on input or output */ |
|||
Int Work [ ] /* size 5n */ |
|||
) |
|||
{ |
|||
Int *Flag ; |
|||
Int nblocks, i, j, nbadcol ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute the maximum matching */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* if maxwork > 0, then a maximum matching might not be found */ |
|||
|
|||
*nmatch = BTF(maxtrans) (n, n, Ap, Ai, maxwork, work, Q, Work) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* complete permutation if the matrix is structurally singular */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* Since the matrix is square, ensure BTF_UNFLIP(Q[0..n-1]) is a |
|||
* permutation of the columns of A so that A has as many nonzeros on the |
|||
* diagonal as possible. |
|||
*/ |
|||
|
|||
if (*nmatch < n) |
|||
{ |
|||
/* get a size-n work array */ |
|||
Flag = Work + n ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
Flag [j] = 0 ; |
|||
} |
|||
|
|||
/* flag all matched columns */ |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
j = Q [i] ; |
|||
if (j != EMPTY) |
|||
{ |
|||
/* row i and column j are matched to each other */ |
|||
Flag [j] = 1 ; |
|||
} |
|||
} |
|||
|
|||
/* make a list of all unmatched columns, in Work [0..nbadcol-1] */ |
|||
nbadcol = 0 ; |
|||
for (j = n-1 ; j >= 0 ; j--) |
|||
{ |
|||
if (!Flag [j]) |
|||
{ |
|||
/* j is matched to nobody */ |
|||
Work [nbadcol++] = j ; |
|||
} |
|||
} |
|||
ASSERT (*nmatch + nbadcol == n) ; |
|||
|
|||
/* make an assignment for each unmatched row */ |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
if (Q [i] == EMPTY && nbadcol > 0) |
|||
{ |
|||
/* get an unmatched column j */ |
|||
j = Work [--nbadcol] ; |
|||
/* assign j to row i and flag the entry by "flipping" it */ |
|||
Q [i] = BTF_FLIP (j) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* The permutation of a square matrix can be recovered as follows: Row i is |
|||
* matched with column j, where j = BTF_UNFLIP (Q [i]) and where j |
|||
* will always be in the valid range 0 to n-1. The entry A(i,j) is zero |
|||
* if BTF_ISFLIPPED (Q [i]) is true, and nonzero otherwise. nmatch |
|||
* is the number of entries in the Q array that are non-negative. */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* find the strongly connected components */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
nblocks = BTF(strongcomp) (n, Ap, Ai, Q, P, R, Work) ; |
|||
return (nblocks) ; |
|||
} |
|||
@ -0,0 +1,593 @@ |
|||
/* ========================================================================== */ |
|||
/* === BTF_STRONGCOMP ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Finds the strongly connected components of a graph, or equivalently, permutes |
|||
* the matrix into upper block triangular form. See btf.h for more details. |
|||
* Input matrix and Q are not checked on input. |
|||
* |
|||
* Copyright (c) 2004-2007. Tim Davis, University of Florida, |
|||
* with support from Sandia National Laboratories. All Rights Reserved. |
|||
*/ |
|||
|
|||
#include "btf.h" |
|||
#include "btf_internal.h" |
|||
|
|||
#define UNVISITED (-2) /* Flag [j] = UNVISITED if node j not visited yet */ |
|||
#define UNASSIGNED (-1) /* Flag [j] = UNASSIGNED if node j has been visited, |
|||
* but not yet assigned to a strongly-connected |
|||
* component (aka block). Flag [j] = k (k in the |
|||
* range 0 to nblocks-1) if node j has been visited |
|||
* (and completed, with its postwork done) and |
|||
* assigned to component k. */ |
|||
|
|||
/* This file contains two versions of the depth-first-search, a recursive one |
|||
* and a non-recursive one. By default, the non-recursive one is used. */ |
|||
|
|||
#ifndef RECURSIVE |
|||
|
|||
/* ========================================================================== */ |
|||
/* === dfs: non-recursive version (default) ================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Perform a depth-first-search of a graph, stored in an adjacency-list form. |
|||
* The row indices of column j (equivalently, the out-adjacency list of node j) |
|||
* are stored in Ai [Ap[j] ... Ap[j+1]-1]. Self-edge (diagonal entries) are |
|||
* ignored. Ap[0] must be zero, and thus nz = Ap[n] is the number of entries |
|||
* in the matrix (or edges in the graph). The row indices in each column need |
|||
* not be in any particular order. If an input column permutation is given, |
|||
* node j (in the permuted matrix A*Q) is located in |
|||
* Ai [Ap[Q[j]] ... Ap[Q[j]+1]-1]. This Q can be the same as the Match array |
|||
* output from the maxtrans routine, for a square matrix that is structurally |
|||
* full rank. |
|||
* |
|||
* The algorithm is from the paper by Robert E. Tarjan, "Depth-first search and |
|||
* linear graph algorithms," SIAM Journal on Computing, vol. 1, no. 2, |
|||
* pp. 146-160, 1972. The time taken by strongcomp is O(nnz(A)). |
|||
* |
|||
* See also MC13A/B in the Harwell subroutine library (Iain S. Duff and John |
|||
* K. Reid, "Algorithm 529: permutations to block triangular form," ACM Trans. |
|||
* on Mathematical Software, vol. 4, no. 2, pp. 189-192, 1978, and "An |
|||
* implementation of Tarjan's algorithm for the block triangular form of a |
|||
* matrix," same journal, pp. 137-147. This code is implements the same |
|||
* algorithm as MC13A/B, except that the data structures are very different. |
|||
* Also, unlike MC13A/B, the output permutation preserves the natural ordering |
|||
* within each block. |
|||
*/ |
|||
|
|||
static void dfs |
|||
( |
|||
/* inputs, not modified on output: */ |
|||
Int j, /* start the DFS at node j */ |
|||
Int Ap [ ], /* size n+1, column pointers for the matrix A */ |
|||
Int Ai [ ], /* row indices, size nz = Ap [n] */ |
|||
Int Q [ ], /* input column permutation */ |
|||
|
|||
/* inputs, modified on output (each array is of size n): */ |
|||
Int Time [ ], /* Time [j] = "time" that node j was first visited */ |
|||
Int Flag [ ], /* Flag [j]: see above */ |
|||
Int Low [ ], /* Low [j]: see definition below */ |
|||
Int *p_nblocks, /* number of blocks (aka strongly-connected-comp.)*/ |
|||
Int *p_timestamp, /* current "time" */ |
|||
|
|||
/* workspace, not defined on input or output: */ |
|||
Int Cstack [ ], /* size n, output stack to hold nodes of components */ |
|||
Int Jstack [ ], /* size n, stack for the variable j */ |
|||
Int Pstack [ ] /* size n, stack for the variable p */ |
|||
) |
|||
{ |
|||
/* ---------------------------------------------------------------------- */ |
|||
/* local variables, and initializations */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* local variables, but "global" to all DFS levels: */ |
|||
Int chead ; /* top of Cstack */ |
|||
Int jhead ; /* top of Jstack and Pstack */ |
|||
|
|||
/* variables that are purely local to any one DFS level: */ |
|||
Int i ; /* edge (j,i) considered; i can be next node to traverse */ |
|||
Int parent ; /* parent of node j in the DFS tree */ |
|||
Int pend ; /* one past the end of the adjacency list for node j */ |
|||
Int jj ; /* column j of A*Q is column jj of the input matrix A */ |
|||
|
|||
/* variables that need to be pushed then popped from the stack: */ |
|||
Int p ; /* current index into the adj. list for node j */ |
|||
/* the variables j and p are stacked in Jstack and Pstack */ |
|||
|
|||
/* local copies of variables in the calling routine */ |
|||
Int nblocks = *p_nblocks ; |
|||
Int timestamp = *p_timestamp ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* start a DFS at node j (same as the recursive call dfs (EMPTY, j)) */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
chead = 0 ; /* component stack is empty */ |
|||
jhead = 0 ; /* Jstack and Pstack are empty */ |
|||
Jstack [0] = j ; /* put the first node j on the Jstack */ |
|||
ASSERT (Flag [j] == UNVISITED) ; |
|||
|
|||
while (jhead >= 0) |
|||
{ |
|||
j = Jstack [jhead] ; /* grab the node j from the top of Jstack */ |
|||
|
|||
/* determine which column jj of the A is column j of A*Q */ |
|||
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ; |
|||
pend = Ap [jj+1] ; /* j's row index list ends at Ai [pend-1] */ |
|||
|
|||
if (Flag [j] == UNVISITED) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* prework at node j */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
/* node j is being visited for the first time */ |
|||
Cstack [++chead] = j ; /* push j onto the stack */ |
|||
timestamp++ ; /* get a timestamp */ |
|||
Time [j] = timestamp ; /* give the timestamp to node j */ |
|||
Low [j] = timestamp ; |
|||
Flag [j] = UNASSIGNED ; /* flag node j as visited */ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* set Pstack [jhead] to the first entry in column j to scan */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
Pstack [jhead] = Ap [jj] ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* DFS rooted at node j (start it, or continue where left off) */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (p = Pstack [jhead] ; p < pend ; p++) |
|||
{ |
|||
i = Ai [p] ; /* examine the edge from node j to node i */ |
|||
if (Flag [i] == UNVISITED) |
|||
{ |
|||
/* Node i has not been visited - start a DFS at node i. |
|||
* Keep track of where we left off in the scan of adjacency list |
|||
* of node j so we can restart j where we left off. */ |
|||
Pstack [jhead] = p + 1 ; |
|||
/* Push i onto the stack and immediately break |
|||
* so we can recurse on node i. */ |
|||
Jstack [++jhead] = i ; |
|||
ASSERT (Time [i] == EMPTY) ; |
|||
ASSERT (Low [i] == EMPTY) ; |
|||
/* break here to do what the recursive call dfs (j,i) does */ |
|||
break ; |
|||
} |
|||
else if (Flag [i] == UNASSIGNED) |
|||
{ |
|||
/* Node i has been visited, but still unassigned to a block |
|||
* this is a back or cross edge if Time [i] < Time [j]. |
|||
* Note that i might equal j, in which case this code does |
|||
* nothing. */ |
|||
ASSERT (Time [i] > 0) ; |
|||
ASSERT (Low [i] > 0) ; |
|||
Low [j] = MIN (Low [j], Time [i]) ; |
|||
} |
|||
} |
|||
|
|||
if (p == pend) |
|||
{ |
|||
/* If all adjacent nodes of j are already visited, pop j from |
|||
* Jstack and do the post work for node j. This also pops p |
|||
* from the Pstack. */ |
|||
jhead-- ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* postwork at node j */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
/* determine if node j is the head of a component */ |
|||
if (Low [j] == Time [j]) |
|||
{ |
|||
/* pop all nodes in this SCC from Cstack */ |
|||
while (TRUE) |
|||
{ |
|||
ASSERT (chead >= 0) ; /* stack not empty (j in it) */ |
|||
i = Cstack [chead--] ; /* pop a node from the Cstack */ |
|||
ASSERT (i >= 0) ; |
|||
ASSERT (Flag [i] == UNASSIGNED) ; |
|||
Flag [i] = nblocks ; /* assign i to current block */ |
|||
if (i == j) break ; /* current block ends at j */ |
|||
} |
|||
nblocks++ ; /* one more block has been found */ |
|||
} |
|||
/* update Low [parent], if the parent exists */ |
|||
if (jhead >= 0) |
|||
{ |
|||
parent = Jstack [jhead] ; |
|||
Low [parent] = MIN (Low [parent], Low [j]) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* cleanup: update timestamp and nblocks */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
*p_timestamp = timestamp ; |
|||
*p_nblocks = nblocks ; |
|||
} |
|||
|
|||
#else |
|||
|
|||
/* ========================================================================== */ |
|||
/* === dfs: recursive version (only for illustration) ======================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* The following is a recursive version of dfs, which computes identical results |
|||
* as the non-recursive dfs. It is included here because it is easier to read. |
|||
* Compare the comments in the code below with the identical comments in the |
|||
* non-recursive code above, and that will help you see the correlation between |
|||
* the two routines. |
|||
* |
|||
* This routine can cause stack overflow, and is thus not recommended for heavy |
|||
* usage, particularly for large matrices. To help in delaying stack overflow, |
|||
* global variables are used, reducing the amount of information each call to |
|||
* dfs places on the call/return stack (the integers i, j, p, parent, and the |
|||
* return address). Note that this means the recursive code is not thread-safe. |
|||
* To try this version, compile the code with -DRECURSIVE or include the |
|||
* following line at the top of this file: |
|||
|
|||
#define RECURSIVE |
|||
|
|||
*/ |
|||
|
|||
static Int /* for recursive illustration only, not for production use */ |
|||
chead, timestamp, nblocks, n, *Ap, *Ai, *Flag, *Cstack, *Time, *Low, |
|||
*P, *R, *Q ; |
|||
|
|||
static void dfs |
|||
( |
|||
Int parent, /* came from parent node */ |
|||
Int j /* at node j in the DFS */ |
|||
) |
|||
{ |
|||
Int p ; /* current index into the adj. list for node j */ |
|||
Int i ; /* edge (j,i) considered; i can be next node to traverse */ |
|||
Int jj ; /* column j of A*Q is column jj of the input matrix A */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* prework at node j */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* node j is being visited for the first time */ |
|||
Cstack [++chead] = j ; /* push j onto the stack */ |
|||
timestamp++ ; /* get a timestamp */ |
|||
Time [j] = timestamp ; /* give the timestamp to node j */ |
|||
Low [j] = timestamp ; |
|||
Flag [j] = UNASSIGNED ; /* flag node j as visited */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* DFS rooted at node j */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* determine which column jj of the A is column j of A*Q */ |
|||
jj = (Q == (Int *) NULL) ? (j) : (BTF_UNFLIP (Q [j])) ; |
|||
for (p = Ap [jj] ; p < Ap [jj+1] ; p++) |
|||
{ |
|||
i = Ai [p] ; /* examine the edge from node j to node i */ |
|||
if (Flag [i] == UNVISITED) |
|||
{ |
|||
/* Node i has not been visited - start a DFS at node i. */ |
|||
dfs (j, i) ; |
|||
} |
|||
else if (Flag [i] == UNASSIGNED) |
|||
{ |
|||
/* Node i has been visited, but still unassigned to a block |
|||
* this is a back or cross edge if Time [i] < Time [j]. |
|||
* Note that i might equal j, in which case this code does |
|||
* nothing. */ |
|||
Low [j] = MIN (Low [j], Time [i]) ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* postwork at node j */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* determine if node j is the head of a component */ |
|||
if (Low [j] == Time [j]) |
|||
{ |
|||
/* pop all nodes in this strongly connected component from Cstack */ |
|||
while (TRUE) |
|||
{ |
|||
i = Cstack [chead--] ; /* pop a node from the Cstack */ |
|||
Flag [i] = nblocks ; /* assign node i to current block */ |
|||
if (i == j) break ; /* current block ends at node j */ |
|||
} |
|||
nblocks++ ; /* one more block has been found */ |
|||
} |
|||
/* update Low [parent] */ |
|||
if (parent != EMPTY) |
|||
{ |
|||
/* Note that this could be done with Low[j] = MIN(Low[j],Low[i]) just |
|||
* after the dfs (j,i) statement above, and then parent would not have |
|||
* to be an input argument. Putting it here places all the postwork |
|||
* for node j in one place, thus making the non-recursive DFS easier. */ |
|||
Low [parent] = MIN (Low [parent], Low [j]) ; |
|||
} |
|||
} |
|||
|
|||
#endif |
|||
|
|||
/* ========================================================================== */ |
|||
/* === btf_strongcomp ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
#ifndef RECURSIVE |
|||
|
|||
Int BTF(strongcomp) /* return # of strongly connected components */ |
|||
( |
|||
/* input, not modified: */ |
|||
Int n, /* A is n-by-n in compressed column form */ |
|||
Int Ap [ ], /* size n+1 */ |
|||
Int Ai [ ], /* size nz = Ap [n] */ |
|||
|
|||
/* optional input, modified (if present) on output: */ |
|||
Int Q [ ], /* size n, input column permutation. The permutation Q can |
|||
* include a flag which indicates an unmatched row. |
|||
* jold = BTF_UNFLIP (Q [jnew]) is the permutation; |
|||
* this function ingnores these flags. On output, it is |
|||
* modified according to the permutation P. */ |
|||
|
|||
/* output, not defined on input: */ |
|||
Int P [ ], /* size n. P [k] = j if row and column j are kth row/col |
|||
* in permuted matrix. */ |
|||
Int R [ ], /* size n+1. kth block is in rows/cols R[k] ... R[k+1]-1 |
|||
* of the permuted matrix. */ |
|||
|
|||
/* workspace, not defined on input or output: */ |
|||
Int Work [ ] /* size 4n */ |
|||
) |
|||
|
|||
#else |
|||
|
|||
Int BTF(strongcomp) /* recursive version - same as above except for Work size */ |
|||
( |
|||
Int n_in, |
|||
Int Ap_in [ ], |
|||
Int Ai_in [ ], |
|||
Int Q_in [ ], |
|||
Int P_in [ ], |
|||
Int R_in [ ], |
|||
Int Work [ ] /* size 2n */ |
|||
) |
|||
|
|||
#endif |
|||
|
|||
{ |
|||
Int j, k, b ; |
|||
|
|||
#ifndef RECURSIVE |
|||
Int timestamp, nblocks, *Flag, *Cstack, *Time, *Low, *Jstack, *Pstack ; |
|||
#else |
|||
n = n_in ; |
|||
Ap = Ap_in ; |
|||
Ai = Ai_in ; |
|||
Q = Q_in ; |
|||
P = P_in ; |
|||
R = R_in ; |
|||
chead = EMPTY ; |
|||
#endif |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get and initialize workspace */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* timestamp is incremented each time a new node is visited. |
|||
* |
|||
* Time [j] is the timestamp given to node j. |
|||
* |
|||
* Low [j] is the lowest timestamp of any node reachable from j via either |
|||
* a path to any descendent of j in the DFS tree, or via a single edge to |
|||
* an either an ancestor (a back edge) or another node that's neither an |
|||
* ancestor nor a descendant (a cross edge). If Low [j] is equal to |
|||
* the timestamp of node j (Time [j]), then node j is the "head" of a |
|||
* strongly connected component (SCC). That is, it is the first node |
|||
* visited in its strongly connected component, and the DFS subtree rooted |
|||
* at node j spans all the nodes of the strongly connected component. |
|||
* |
|||
* The term "block" and "component" are used interchangebly in this code; |
|||
* "block" being a matrix term and "component" being a graph term for the |
|||
* same thing. |
|||
* |
|||
* When a node is visited, it is placed on the Cstack (for "component" |
|||
* stack). When node j is found to be an SCC head, all the nodes from the |
|||
* top of the stack to node j itself form the nodes in the SCC. This Cstack |
|||
* is used for both the recursive and non-recursive versions. |
|||
*/ |
|||
|
|||
Time = Work ; Work += n ; |
|||
Flag = Work ; Work += n ; |
|||
Low = P ; /* use output array P as workspace for Low */ |
|||
Cstack = R ; /* use output array R as workspace for Cstack */ |
|||
|
|||
#ifndef RECURSIVE |
|||
/* stack for non-recursive dfs */ |
|||
Jstack = Work ; Work += n ; /* stack for j */ |
|||
Pstack = Work ; /* stack for p */ |
|||
#endif |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
Flag [j] = UNVISITED ; |
|||
Low [j] = EMPTY ; |
|||
Time [j] = EMPTY ; |
|||
#ifndef NDEBUG |
|||
Cstack [j] = EMPTY ; |
|||
#ifndef RECURSIVE |
|||
Jstack [j] = EMPTY ; |
|||
Pstack [j] = EMPTY ; |
|||
#endif |
|||
#endif |
|||
} |
|||
|
|||
timestamp = 0 ; /* each node given a timestamp when it is visited */ |
|||
nblocks = 0 ; /* number of blocks found so far */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* find the connected components via a depth-first-search */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* node j is unvisited or assigned to a block. Cstack is empty. */ |
|||
ASSERT (Flag [j] == UNVISITED || (Flag [j] >= 0 && Flag [j] < nblocks)); |
|||
if (Flag [j] == UNVISITED) |
|||
{ |
|||
#ifndef RECURSIVE |
|||
/* non-recursive dfs (default) */ |
|||
dfs (j, Ap, Ai, Q, Time, Flag, Low, &nblocks, ×tamp, |
|||
Cstack, Jstack, Pstack) ; |
|||
#else |
|||
/* recursive dfs (for illustration only) */ |
|||
ASSERT (chead == EMPTY) ; |
|||
dfs (EMPTY, j) ; |
|||
ASSERT (chead == EMPTY) ; |
|||
#endif |
|||
} |
|||
} |
|||
ASSERT (timestamp == n) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* construct the block boundary array, R */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (b = 0 ; b < nblocks ; b++) |
|||
{ |
|||
R [b] = 0 ; |
|||
} |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* node j has been assigned to block b = Flag [j] */ |
|||
ASSERT (Time [j] > 0 && Time [j] <= n) ; |
|||
ASSERT (Low [j] > 0 && Low [j] <= n) ; |
|||
ASSERT (Flag [j] >= 0 && Flag [j] < nblocks) ; |
|||
R [Flag [j]]++ ; |
|||
} |
|||
/* R [b] is now the number of nodes in block b. Compute cumulative sum |
|||
* of R, using Time [0 ... nblocks-1] as workspace. */ |
|||
Time [0] = 0 ; |
|||
for (b = 1 ; b < nblocks ; b++) |
|||
{ |
|||
Time [b] = Time [b-1] + R [b-1] ; |
|||
} |
|||
for (b = 0 ; b < nblocks ; b++) |
|||
{ |
|||
R [b] = Time [b] ; |
|||
} |
|||
R [nblocks] = n ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* construct the permutation, preserving the natural order */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
P [k] = EMPTY ; |
|||
} |
|||
#endif |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* place column j in the permutation */ |
|||
P [Time [Flag [j]]++] = j ; |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
ASSERT (P [k] != EMPTY) ; |
|||
} |
|||
#endif |
|||
|
|||
/* Now block b consists of the nodes k1 to k2-1 in the permuted matrix, |
|||
* where k1 = R [b] and k2 = R [b+1]. Row and column j of the original |
|||
* matrix becomes row and column P [k] of the permuted matrix. The set of |
|||
* of rows/columns (nodes) in block b is given by P [k1 ... k2-1], and this |
|||
* set is sorted in ascending order. Thus, if the matrix consists of just |
|||
* one block, P is the identity permutation. */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* if Q is present on input, set Q = Q*P' */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Q != (Int *) NULL) |
|||
{ |
|||
/* We found a symmetric permutation P for the matrix A*Q. The overall |
|||
* permutation is thus P*(A*Q)*P'. Set Q=Q*P' so that the final |
|||
* permutation is P*A*Q. Use Time as workspace. Note that this |
|||
* preserves the negative values of Q if the matrix is structurally |
|||
* singular. */ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Time [k] = Q [P [k]] ; |
|||
} |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Q [k] = Time [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* how to traverse the permuted matrix */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* If Q is not present, the following code can be used to traverse the |
|||
* permuted matrix P*A*P' |
|||
* |
|||
* // compute the inverse of P |
|||
* for (knew = 0 ; knew < n ; knew++) |
|||
* { |
|||
* // row and column kold in the old matrix is row/column knew |
|||
* // in the permuted matrix P*A*P' |
|||
* kold = P [knew] ; |
|||
* Pinv [kold] = knew ; |
|||
* } |
|||
* for (b = 0 ; b < nblocks ; b++) |
|||
* { |
|||
* // traverse block b of the permuted matrix P*A*P' |
|||
* k1 = R [b] ; |
|||
* k2 = R [b+1] ; |
|||
* nk = k2 - k1 ; |
|||
* for (jnew = k1 ; jnew < k2 ; jnew++) |
|||
* { |
|||
* jold = P [jnew] ; |
|||
* for (p = Ap [jold] ; p < Ap [jold+1] ; p++) |
|||
* { |
|||
* iold = Ai [p] ; |
|||
* inew = Pinv [iold] ; |
|||
* // Entry in the old matrix is A (iold, jold), and its |
|||
* // position in the new matrix P*A*P' is (inew, jnew). |
|||
* // Let B be the bth diagonal block of the permuted |
|||
* // matrix. If inew >= k1, then this entry is in row/ |
|||
* // column (inew-k1, jnew-k1) of the nk-by-nk matrix B. |
|||
* // Otherwise, the entry is in the upper block triangular |
|||
* // part, not in any diagonal block. |
|||
* } |
|||
* } |
|||
* } |
|||
* |
|||
* If Q is present replace the above statement |
|||
* jold = P [jnew] ; |
|||
* with |
|||
* jold = Q [jnew] ; |
|||
* or |
|||
* jold = BTF_UNFLIP (Q [jnew]) ; |
|||
* |
|||
* then entry A (iold,jold) in the old (unpermuted) matrix is at (inew,jnew) |
|||
* in the permuted matrix P*A*Q. Everything else remains the same as the |
|||
* above (simply replace P*A*P' with P*A*Q in the above comments). |
|||
*/ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return # of blocks / # of strongly connected components */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
return (nblocks) ; |
|||
} |
|||
3611
src/maths/KLU/colamd.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,255 @@ |
|||
/* ========================================================================== */ |
|||
/* === colamd/symamd prototypes and definitions ============================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* COLAMD / SYMAMD include file |
|||
|
|||
You must include this file (colamd.h) in any routine that uses colamd, |
|||
symamd, or the related macros and definitions. |
|||
|
|||
Authors: |
|||
|
|||
The authors of the code itself are Stefan I. Larimore and Timothy A. |
|||
Davis (davis at cise.ufl.edu), University of Florida. The algorithm was |
|||
developed in collaboration with John Gilbert, Xerox PARC, and Esmond |
|||
Ng, Oak Ridge National Laboratory. |
|||
|
|||
Acknowledgements: |
|||
|
|||
This work was supported by the National Science Foundation, under |
|||
grants DMS-9504974 and DMS-9803599. |
|||
|
|||
Notice: |
|||
|
|||
Copyright (c) 1998-2007, Timothy A. Davis, All Rights Reserved. |
|||
|
|||
THIS MATERIAL IS PROVIDED AS IS, WITH ABSOLUTELY NO WARRANTY |
|||
EXPRESSED OR IMPLIED. ANY USE IS AT YOUR OWN RISK. |
|||
|
|||
Permission is hereby granted to use, copy, modify, and/or distribute |
|||
this program, provided that the Copyright, this License, and the |
|||
Availability of the original version is retained on all copies and made |
|||
accessible to the end-user of any code or package that includes COLAMD |
|||
or any modified version of COLAMD. |
|||
|
|||
Availability: |
|||
|
|||
The colamd/symamd library is available at |
|||
|
|||
http://www.cise.ufl.edu/research/sparse/colamd/ |
|||
|
|||
This is the http://www.cise.ufl.edu/research/sparse/colamd/colamd.h |
|||
file. It is required by the colamd.c, colamdmex.c, and symamdmex.c |
|||
files, and by any C code that calls the routines whose prototypes are |
|||
listed below, or that uses the colamd/symamd definitions listed below. |
|||
|
|||
*/ |
|||
|
|||
#ifndef COLAMD_H |
|||
#define COLAMD_H |
|||
|
|||
/* make it easy for C++ programs to include COLAMD */ |
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
/* ========================================================================== */ |
|||
/* === Include files ======================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
#include <stdlib.h> |
|||
|
|||
/* ========================================================================== */ |
|||
/* === COLAMD version ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* COLAMD Version 2.4 and later will include the following definitions. |
|||
* As an example, to test if the version you are using is 2.4 or later: |
|||
* |
|||
* #ifdef COLAMD_VERSION |
|||
* if (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) ... |
|||
* #endif |
|||
* |
|||
* This also works during compile-time: |
|||
* |
|||
* #if defined(COLAMD_VERSION) && (COLAMD_VERSION >= COLAMD_VERSION_CODE (2,4)) |
|||
* printf ("This is version 2.4 or later\n") ; |
|||
* #else |
|||
* printf ("This is an early version\n") ; |
|||
* #endif |
|||
* |
|||
* Versions 2.3 and earlier of COLAMD do not include a #define'd version number. |
|||
*/ |
|||
|
|||
#define COLAMD_DATE "Dec 7, 2011" |
|||
#define COLAMD_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) |
|||
#define COLAMD_MAIN_VERSION 2 |
|||
#define COLAMD_SUB_VERSION 7 |
|||
#define COLAMD_SUBSUB_VERSION 4 |
|||
#define COLAMD_VERSION \ |
|||
COLAMD_VERSION_CODE(COLAMD_MAIN_VERSION,COLAMD_SUB_VERSION) |
|||
|
|||
/* ========================================================================== */ |
|||
/* === Knob and statistics definitions ====================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* size of the knobs [ ] array. Only knobs [0..1] are currently used. */ |
|||
#define COLAMD_KNOBS 20 |
|||
|
|||
/* number of output statistics. Only stats [0..6] are currently used. */ |
|||
#define COLAMD_STATS 20 |
|||
|
|||
/* knobs [0] and stats [0]: dense row knob and output statistic. */ |
|||
#define COLAMD_DENSE_ROW 0 |
|||
|
|||
/* knobs [1] and stats [1]: dense column knob and output statistic. */ |
|||
#define COLAMD_DENSE_COL 1 |
|||
|
|||
/* knobs [2]: aggressive absorption */ |
|||
#define COLAMD_AGGRESSIVE 2 |
|||
|
|||
/* stats [2]: memory defragmentation count output statistic */ |
|||
#define COLAMD_DEFRAG_COUNT 2 |
|||
|
|||
/* stats [3]: colamd status: zero OK, > 0 warning or notice, < 0 error */ |
|||
#define COLAMD_STATUS 3 |
|||
|
|||
/* stats [4..6]: error info, or info on jumbled columns */ |
|||
#define COLAMD_INFO1 4 |
|||
#define COLAMD_INFO2 5 |
|||
#define COLAMD_INFO3 6 |
|||
|
|||
/* error codes returned in stats [3]: */ |
|||
#define COLAMD_OK (0) |
|||
#define COLAMD_OK_BUT_JUMBLED (1) |
|||
#define COLAMD_ERROR_A_not_present (-1) |
|||
#define COLAMD_ERROR_p_not_present (-2) |
|||
#define COLAMD_ERROR_nrow_negative (-3) |
|||
#define COLAMD_ERROR_ncol_negative (-4) |
|||
#define COLAMD_ERROR_nnz_negative (-5) |
|||
#define COLAMD_ERROR_p0_nonzero (-6) |
|||
#define COLAMD_ERROR_A_too_small (-7) |
|||
#define COLAMD_ERROR_col_length_negative (-8) |
|||
#define COLAMD_ERROR_row_index_out_of_bounds (-9) |
|||
#define COLAMD_ERROR_out_of_memory (-10) |
|||
#define COLAMD_ERROR_internal_error (-999) |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === Prototypes of user-callable routines ================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* define UF_long */ |
|||
#include "UFconfig.h" |
|||
|
|||
size_t colamd_recommended /* returns recommended value of Alen, */ |
|||
/* or 0 if input arguments are erroneous */ |
|||
( |
|||
int nnz, /* nonzeros in A */ |
|||
int n_row, /* number of rows in A */ |
|||
int n_col /* number of columns in A */ |
|||
) ; |
|||
|
|||
size_t colamd_l_recommended /* returns recommended value of Alen, */ |
|||
/* or 0 if input arguments are erroneous */ |
|||
( |
|||
UF_long nnz, /* nonzeros in A */ |
|||
UF_long n_row, /* number of rows in A */ |
|||
UF_long n_col /* number of columns in A */ |
|||
) ; |
|||
|
|||
void colamd_set_defaults /* sets default parameters */ |
|||
( /* knobs argument is modified on output */ |
|||
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ |
|||
) ; |
|||
|
|||
void colamd_l_set_defaults /* sets default parameters */ |
|||
( /* knobs argument is modified on output */ |
|||
double knobs [COLAMD_KNOBS] /* parameter settings for colamd */ |
|||
) ; |
|||
|
|||
int colamd /* returns (1) if successful, (0) otherwise*/ |
|||
( /* A and p arguments are modified on output */ |
|||
int n_row, /* number of rows in A */ |
|||
int n_col, /* number of columns in A */ |
|||
int Alen, /* size of the array A */ |
|||
int A [], /* row indices of A, of size Alen */ |
|||
int p [], /* column pointers of A, of size n_col+1 */ |
|||
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ |
|||
int stats [COLAMD_STATS] /* colamd output statistics and error codes */ |
|||
) ; |
|||
|
|||
UF_long colamd_l /* returns (1) if successful, (0) otherwise*/ |
|||
( /* A and p arguments are modified on output */ |
|||
UF_long n_row, /* number of rows in A */ |
|||
UF_long n_col, /* number of columns in A */ |
|||
UF_long Alen, /* size of the array A */ |
|||
UF_long A [], /* row indices of A, of size Alen */ |
|||
UF_long p [], /* column pointers of A, of size n_col+1 */ |
|||
double knobs [COLAMD_KNOBS],/* parameter settings for colamd */ |
|||
UF_long stats [COLAMD_STATS]/* colamd output statistics and error codes */ |
|||
) ; |
|||
|
|||
int symamd /* return (1) if OK, (0) otherwise */ |
|||
( |
|||
int n, /* number of rows and columns of A */ |
|||
int A [], /* row indices of A */ |
|||
int p [], /* column pointers of A */ |
|||
int perm [], /* output permutation, size n_col+1 */ |
|||
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ |
|||
int stats [COLAMD_STATS], /* output statistics and error codes */ |
|||
void * (*allocate) (size_t, size_t), |
|||
/* pointer to calloc (ANSI C) or */ |
|||
/* mxCalloc (for MATLAB mexFunction) */ |
|||
void (*release) (void *) |
|||
/* pointer to free (ANSI C) or */ |
|||
/* mxFree (for MATLAB mexFunction) */ |
|||
) ; |
|||
|
|||
UF_long symamd_l /* return (1) if OK, (0) otherwise */ |
|||
( |
|||
UF_long n, /* number of rows and columns of A */ |
|||
UF_long A [], /* row indices of A */ |
|||
UF_long p [], /* column pointers of A */ |
|||
UF_long perm [], /* output permutation, size n_col+1 */ |
|||
double knobs [COLAMD_KNOBS], /* parameters (uses defaults if NULL) */ |
|||
UF_long stats [COLAMD_STATS], /* output statistics and error codes */ |
|||
void * (*allocate) (size_t, size_t), |
|||
/* pointer to calloc (ANSI C) or */ |
|||
/* mxCalloc (for MATLAB mexFunction) */ |
|||
void (*release) (void *) |
|||
/* pointer to free (ANSI C) or */ |
|||
/* mxFree (for MATLAB mexFunction) */ |
|||
) ; |
|||
|
|||
void colamd_report |
|||
( |
|||
int stats [COLAMD_STATS] |
|||
) ; |
|||
|
|||
void colamd_l_report |
|||
( |
|||
UF_long stats [COLAMD_STATS] |
|||
) ; |
|||
|
|||
void symamd_report |
|||
( |
|||
int stats [COLAMD_STATS] |
|||
) ; |
|||
|
|||
void symamd_l_report |
|||
( |
|||
UF_long stats [COLAMD_STATS] |
|||
) ; |
|||
|
|||
#ifndef EXTERN |
|||
#define EXTERN extern |
|||
#endif |
|||
|
|||
EXTERN int (*colamd_printf) (const char *, ...) ; |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
|
|||
#endif /* COLAMD_H */ |
|||
@ -0,0 +1,24 @@ |
|||
/* ========================================================================== */ |
|||
/* === colamd_global.c ====================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* ---------------------------------------------------------------------------- |
|||
* COLAMD, Copyright (C) 2007, Timothy A. Davis. |
|||
* See License.txt for the Version 2.1 of the GNU Lesser General Public License |
|||
* http://www.cise.ufl.edu/research/sparse |
|||
* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Global variables for COLAMD */ |
|||
|
|||
#ifndef NPRINT |
|||
#ifdef MATLAB_MEX_FILE |
|||
#include "mex.h" |
|||
int (*colamd_printf) (const char *, ...) = mexPrintf ; |
|||
#else |
|||
#include <stdio.h> |
|||
int (*colamd_printf) (const char *, ...) = printf ; |
|||
#endif |
|||
#else |
|||
int (*colamd_printf) (const char *, ...) = ((void *) 0) ; |
|||
#endif |
|||
|
|||
@ -0,0 +1,773 @@ |
|||
/* ========================================================================== */ |
|||
/* === klu ================================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* KLU: factorizes P*A into L*U, using the Gilbert-Peierls algorithm [1], with |
|||
* optional symmetric pruning by Eisenstat and Liu [2]. The code is by Tim |
|||
* Davis. This algorithm is what appears as the default sparse LU routine in |
|||
* MATLAB version 6.0, and still appears in MATLAB 6.5 as [L,U,P] = lu (A). |
|||
* Note that no column ordering is provided (see COLAMD or AMD for suitable |
|||
* orderings). SuperLU is based on this algorithm, except that it adds the |
|||
* use of dense matrix operations on "supernodes" (adjacent columns with |
|||
* identical). This code doesn't use supernodes, thus its name ("Kent" LU, |
|||
* as in "Clark Kent", in contrast with Super-LU...). This algorithm is slower |
|||
* than SuperLU and UMFPACK for large matrices with lots of nonzeros in their |
|||
* factors (such as for most finite-element problems). However, for matrices |
|||
* with very sparse LU factors, this algorithm is typically faster than both |
|||
* SuperLU and UMFPACK, since in this case there is little chance to exploit |
|||
* dense matrix kernels (the BLAS). |
|||
* |
|||
* Only one block of A is factorized, in the BTF form. The input n is the |
|||
* size of the block; k1 is the first row and column in the block. |
|||
* |
|||
* NOTE: no error checking is done on the inputs. This version is not meant to |
|||
* be called directly by the user. Use klu_factor instead. |
|||
* |
|||
* No fill-reducing ordering is provided. The ordering quality of |
|||
* klu_kernel_factor is the responsibility of the caller. The input A must |
|||
* pre-permuted to reduce fill-in, or fill-reducing input permutation Q must |
|||
* be provided. |
|||
* |
|||
* The input matrix A must be in compressed-column form, with either sorted |
|||
* or unsorted row indices. Row indices for column j of A is in |
|||
* Ai [Ap [j] ... Ap [j+1]-1] and the same range of indices in Ax holds the |
|||
* numerical values. No duplicate entries are allowed. |
|||
* |
|||
* Copyright 2004-2009, Tim Davis. All rights reserved. See the README |
|||
* file for details on permitted use. Note that no code from The MathWorks, |
|||
* Inc, or from SuperLU, or from any other source appears here. The code is |
|||
* written from scratch, from the algorithmic description in Gilbert & Peierls' |
|||
* and Eisenstat & Liu's journal papers [1,2]. |
|||
* |
|||
* If an input permutation Q is provided, the factorization L*U = A (P,Q) |
|||
* is computed, where P is determined by partial pivoting, and Q is the input |
|||
* ordering. If the pivot tolerance is less than 1, the "diagonal" entry that |
|||
* KLU attempts to choose is the diagonal of A (Q,Q). In other words, the |
|||
* input permutation is applied symmetrically to the input matrix. The output |
|||
* permutation P includes both the partial pivoting ordering and the input |
|||
* permutation. If Q is NULL, then it is assumed to be the identity |
|||
* permutation. Q is not modified. |
|||
* |
|||
* [1] Gilbert, J. R. and Peierls, T., "Sparse Partial Pivoting in Time |
|||
* Proportional to Arithmetic Operations," SIAM J. Sci. Stat. Comp., |
|||
* vol 9, pp. 862-874, 1988. |
|||
* [2] Eisenstat, S. C. and Liu, J. W. H., "Exploiting Structural Symmetry in |
|||
* Unsymmetric Sparse Symbolic Factorization," SIAM J. Matrix Analysis & |
|||
* Applic., vol 13, pp. 202-211, 1992. |
|||
*/ |
|||
|
|||
/* ========================================================================== */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n. n must be > 0. */ |
|||
Int Ap [ ], /* size n+1, column pointers for A */ |
|||
Int Ai [ ], /* size nz = Ap [n], row indices for A */ |
|||
Entry Ax [ ], /* size nz, values of A */ |
|||
Int Q [ ], /* size n, optional column permutation */ |
|||
double Lsize, /* estimate of number of nonzeros in L */ |
|||
|
|||
/* outputs, not defined on input */ |
|||
Unit **p_LU, /* row indices and values of L and U */ |
|||
Entry Udiag [ ], /* size n, diagonal of U */ |
|||
Int Llen [ ], /* size n, column length of L */ |
|||
Int Ulen [ ], /* size n, column length of U */ |
|||
Int Lip [ ], /* size n, column pointers for L */ |
|||
Int Uip [ ], /* size n, column pointers for U */ |
|||
Int P [ ], /* row permutation, size n */ |
|||
Int *lnz, /* size of L */ |
|||
Int *unz, /* size of U */ |
|||
|
|||
/* workspace, undefined on input */ |
|||
Entry *X, /* size n double's, zero on output */ |
|||
Int *Work, /* size 5n Int's */ |
|||
|
|||
/* inputs, not modified on output */ |
|||
Int k1, /* the block of A is from k1 to k2-1 */ |
|||
Int PSinv [ ], /* inverse of P from symbolic factorization */ |
|||
double Rs [ ], /* scale factors for A */ |
|||
|
|||
/* inputs, modified on output */ |
|||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ |
|||
Int Offi [ ], |
|||
Entry Offx [ ], |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double maxlnz, dunits ; |
|||
Unit *LU ; |
|||
Int *Pinv, *Lpend, *Stack, *Flag, *Ap_pos, *W ; |
|||
Int lsize, usize, anz, ok ; |
|||
size_t lusize ; |
|||
ASSERT (Common != NULL) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get control parameters, or use defaults */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
n = MAX (1, n) ; |
|||
anz = Ap [n+k1] - Ap [k1] ; |
|||
|
|||
if (Lsize <= 0) |
|||
{ |
|||
Lsize = -Lsize ; |
|||
Lsize = MAX (Lsize, 1.0) ; |
|||
lsize = Lsize * anz + n ; |
|||
} |
|||
else |
|||
{ |
|||
lsize = Lsize ; |
|||
} |
|||
|
|||
usize = lsize ; |
|||
|
|||
lsize = MAX (n+1, lsize) ; |
|||
usize = MAX (n+1, usize) ; |
|||
|
|||
maxlnz = (((double) n) * ((double) n) + ((double) n)) / 2. ; |
|||
maxlnz = MIN (maxlnz, ((double) INT_MAX)) ; |
|||
lsize = MIN (maxlnz, lsize) ; |
|||
usize = MIN (maxlnz, usize) ; |
|||
|
|||
PRINTF (("Welcome to klu: n %d anz %d k1 %d lsize %d usize %d maxlnz %g\n", |
|||
n, anz, k1, lsize, usize, maxlnz)) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate workspace and outputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* return arguments are not yet assigned */ |
|||
*p_LU = (Unit *) NULL ; |
|||
|
|||
/* these computations are safe from size_t overflow */ |
|||
W = Work ; |
|||
Pinv = (Int *) W ; W += n ; |
|||
Stack = (Int *) W ; W += n ; |
|||
Flag = (Int *) W ; W += n ; |
|||
Lpend = (Int *) W ; W += n ; |
|||
Ap_pos = (Int *) W ; W += n ; |
|||
|
|||
dunits = DUNITS (Int, lsize) + DUNITS (Entry, lsize) + |
|||
DUNITS (Int, usize) + DUNITS (Entry, usize) ; |
|||
lusize = (size_t) dunits ; |
|||
ok = !INT_OVERFLOW (dunits) ; |
|||
LU = ok ? KLU_malloc (lusize, sizeof (Unit), Common) : NULL ; |
|||
if (LU == NULL) |
|||
{ |
|||
/* out of memory, or problem too large */ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
lusize = 0 ; |
|||
return (lusize) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* factorize */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* with pruning, and non-recursive depth-first-search */ |
|||
lusize = KLU_kernel (n, Ap, Ai, Ax, Q, lusize, |
|||
Pinv, P, &LU, Udiag, Llen, Ulen, Lip, Uip, lnz, unz, |
|||
X, Stack, Flag, Ap_pos, Lpend, |
|||
k1, PSinv, Rs, Offp, Offi, Offx, Common) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return LU factors, or return nothing if an error occurred */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
LU = KLU_free (LU, lusize, sizeof (Unit), Common) ; |
|||
lusize = 0 ; |
|||
} |
|||
*p_LU = LU ; |
|||
PRINTF ((" in klu noffdiag %d\n", Common->noffdiag)) ; |
|||
return (lusize) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_lsolve =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve Lx=b. Assumes L is unit lower triangular and where the unit diagonal |
|||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs |
|||
* and is stored in ROW form with row dimension nrhs. nrhs must be in the |
|||
* range 1 to 4. */ |
|||
|
|||
void KLU_lsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Lip [ ], |
|||
Int Llen [ ], |
|||
Unit LU [ ], |
|||
Int nrhs, |
|||
/* right-hand-side on input, solution to Lx=b on output */ |
|||
Entry X [ ] |
|||
) |
|||
{ |
|||
Entry x [4], lik ; |
|||
Int *Li ; |
|||
Entry *Lx ; |
|||
Int k, p, len, i ; |
|||
|
|||
switch (nrhs) |
|||
{ |
|||
|
|||
case 1: |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
x [0] = X [k] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
/* unit diagonal of L is not stored*/ |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
/* X [Li [p]] -= Lx [p] * x [0] ; */ |
|||
MULT_SUB (X [Li [p]], Lx [p], x [0]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
x [0] = X [2*k ] ; |
|||
x [1] = X [2*k + 1] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
lik = Lx [p] ; |
|||
MULT_SUB (X [2*i], lik, x [0]) ; |
|||
MULT_SUB (X [2*i + 1], lik, x [1]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
x [0] = X [3*k ] ; |
|||
x [1] = X [3*k + 1] ; |
|||
x [2] = X [3*k + 2] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
lik = Lx [p] ; |
|||
MULT_SUB (X [3*i], lik, x [0]) ; |
|||
MULT_SUB (X [3*i + 1], lik, x [1]) ; |
|||
MULT_SUB (X [3*i + 2], lik, x [2]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
x [0] = X [4*k ] ; |
|||
x [1] = X [4*k + 1] ; |
|||
x [2] = X [4*k + 2] ; |
|||
x [3] = X [4*k + 3] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
lik = Lx [p] ; |
|||
MULT_SUB (X [4*i], lik, x [0]) ; |
|||
MULT_SUB (X [4*i + 1], lik, x [1]) ; |
|||
MULT_SUB (X [4*i + 2], lik, x [2]) ; |
|||
MULT_SUB (X [4*i + 3], lik, x [3]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
} |
|||
} |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_usolve =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve Ux=b. Assumes U is non-unit upper triangular and where the diagonal |
|||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs |
|||
* and is stored in ROW form with row dimension nrhs. nrhs must be in the |
|||
* range 1 to 4. */ |
|||
|
|||
void KLU_usolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Uip [ ], |
|||
Int Ulen [ ], |
|||
Unit LU [ ], |
|||
Entry Udiag [ ], |
|||
Int nrhs, |
|||
/* right-hand-side on input, solution to Ux=b on output */ |
|||
Entry X [ ] |
|||
) |
|||
{ |
|||
Entry x [4], uik, ukk ; |
|||
Int *Ui ; |
|||
Entry *Ux ; |
|||
Int k, p, len, i ; |
|||
|
|||
switch (nrhs) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
/* x [0] = X [k] / Udiag [k] ; */ |
|||
DIV (x [0], X [k], Udiag [k]) ; |
|||
X [k] = x [0] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
/* X [Ui [p]] -= Ux [p] * x [0] ; */ |
|||
MULT_SUB (X [Ui [p]], Ux [p], x [0]) ; |
|||
|
|||
} |
|||
} |
|||
|
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
ukk = Udiag [k] ; |
|||
/* x [0] = X [2*k ] / ukk ; |
|||
x [1] = X [2*k + 1] / ukk ; */ |
|||
DIV (x [0], X [2*k], ukk) ; |
|||
DIV (x [1], X [2*k + 1], ukk) ; |
|||
|
|||
X [2*k ] = x [0] ; |
|||
X [2*k + 1] = x [1] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
uik = Ux [p] ; |
|||
/* X [2*i ] -= uik * x [0] ; |
|||
X [2*i + 1] -= uik * x [1] ; */ |
|||
MULT_SUB (X [2*i], uik, x [0]) ; |
|||
MULT_SUB (X [2*i + 1], uik, x [1]) ; |
|||
} |
|||
} |
|||
|
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
ukk = Udiag [k] ; |
|||
|
|||
DIV (x [0], X [3*k], ukk) ; |
|||
DIV (x [1], X [3*k + 1], ukk) ; |
|||
DIV (x [2], X [3*k + 2], ukk) ; |
|||
|
|||
X [3*k ] = x [0] ; |
|||
X [3*k + 1] = x [1] ; |
|||
X [3*k + 2] = x [2] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
uik = Ux [p] ; |
|||
MULT_SUB (X [3*i], uik, x [0]) ; |
|||
MULT_SUB (X [3*i + 1], uik, x [1]) ; |
|||
MULT_SUB (X [3*i + 2], uik, x [2]) ; |
|||
} |
|||
} |
|||
|
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
ukk = Udiag [k] ; |
|||
|
|||
DIV (x [0], X [4*k], ukk) ; |
|||
DIV (x [1], X [4*k + 1], ukk) ; |
|||
DIV (x [2], X [4*k + 2], ukk) ; |
|||
DIV (x [3], X [4*k + 3], ukk) ; |
|||
|
|||
X [4*k ] = x [0] ; |
|||
X [4*k + 1] = x [1] ; |
|||
X [4*k + 2] = x [2] ; |
|||
X [4*k + 3] = x [3] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
uik = Ux [p] ; |
|||
|
|||
MULT_SUB (X [4*i], uik, x [0]) ; |
|||
MULT_SUB (X [4*i + 1], uik, x [1]) ; |
|||
MULT_SUB (X [4*i + 2], uik, x [2]) ; |
|||
MULT_SUB (X [4*i + 3], uik, x [3]) ; |
|||
} |
|||
} |
|||
|
|||
break ; |
|||
|
|||
} |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_ltsolve ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve L'x=b. Assumes L is unit lower triangular and where the unit diagonal |
|||
* entry is NOT stored. Overwrites B with the solution X. B is n-by-nrhs |
|||
* and is stored in ROW form with row dimension nrhs. nrhs must in the |
|||
* range 1 to 4. */ |
|||
|
|||
void KLU_ltsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Lip [ ], |
|||
Int Llen [ ], |
|||
Unit LU [ ], |
|||
Int nrhs, |
|||
#ifdef COMPLEX |
|||
Int conj_solve, |
|||
#endif |
|||
/* right-hand-side on input, solution to L'x=b on output */ |
|||
Entry X [ ] |
|||
) |
|||
{ |
|||
Entry x [4], lik ; |
|||
Int *Li ; |
|||
Entry *Lx ; |
|||
Int k, p, len, i ; |
|||
|
|||
switch (nrhs) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
x [0] = X [k] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
/* x [0] -= CONJ (Lx [p]) * X [Li [p]] ; */ |
|||
MULT_SUB_CONJ (x [0], X [Li [p]], Lx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
/*x [0] -= Lx [p] * X [Li [p]] ;*/ |
|||
MULT_SUB (x [0], Lx [p], X [Li [p]]) ; |
|||
} |
|||
} |
|||
X [k] = x [0] ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
x [0] = X [2*k ] ; |
|||
x [1] = X [2*k + 1] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (lik, Lx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
lik = Lx [p] ; |
|||
} |
|||
MULT_SUB (x [0], lik, X [2*i]) ; |
|||
MULT_SUB (x [1], lik, X [2*i + 1]) ; |
|||
} |
|||
X [2*k ] = x [0] ; |
|||
X [2*k + 1] = x [1] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
x [0] = X [3*k ] ; |
|||
x [1] = X [3*k + 1] ; |
|||
x [2] = X [3*k + 2] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (lik, Lx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
lik = Lx [p] ; |
|||
} |
|||
MULT_SUB (x [0], lik, X [3*i]) ; |
|||
MULT_SUB (x [1], lik, X [3*i + 1]) ; |
|||
MULT_SUB (x [2], lik, X [3*i + 2]) ; |
|||
} |
|||
X [3*k ] = x [0] ; |
|||
X [3*k + 1] = x [1] ; |
|||
X [3*k + 2] = x [2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = n-1 ; k >= 0 ; k--) |
|||
{ |
|||
x [0] = X [4*k ] ; |
|||
x [1] = X [4*k + 1] ; |
|||
x [2] = X [4*k + 2] ; |
|||
x [3] = X [4*k + 3] ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (lik, Lx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
lik = Lx [p] ; |
|||
} |
|||
MULT_SUB (x [0], lik, X [4*i]) ; |
|||
MULT_SUB (x [1], lik, X [4*i + 1]) ; |
|||
MULT_SUB (x [2], lik, X [4*i + 2]) ; |
|||
MULT_SUB (x [3], lik, X [4*i + 3]) ; |
|||
} |
|||
X [4*k ] = x [0] ; |
|||
X [4*k + 1] = x [1] ; |
|||
X [4*k + 2] = x [2] ; |
|||
X [4*k + 3] = x [3] ; |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_utsolve ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve U'x=b. Assumes U is non-unit upper triangular and where the diagonal |
|||
* entry is stored (and appears last in each column of U). Overwrites B |
|||
* with the solution X. B is n-by-nrhs and is stored in ROW form with row |
|||
* dimension nrhs. nrhs must be in the range 1 to 4. */ |
|||
|
|||
void KLU_utsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Uip [ ], |
|||
Int Ulen [ ], |
|||
Unit LU [ ], |
|||
Entry Udiag [ ], |
|||
Int nrhs, |
|||
#ifdef COMPLEX |
|||
Int conj_solve, |
|||
#endif |
|||
/* right-hand-side on input, solution to Ux=b on output */ |
|||
Entry X [ ] |
|||
) |
|||
{ |
|||
Entry x [4], uik, ukk ; |
|||
Int k, p, len, i ; |
|||
Int *Ui ; |
|||
Entry *Ux ; |
|||
|
|||
switch (nrhs) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
x [0] = X [k] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
/* x [0] -= CONJ (Ux [p]) * X [Ui [p]] ; */ |
|||
MULT_SUB_CONJ (x [0], X [Ui [p]], Ux [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
/* x [0] -= Ux [p] * X [Ui [p]] ; */ |
|||
MULT_SUB (x [0], Ux [p], X [Ui [p]]) ; |
|||
} |
|||
} |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (ukk, Udiag [k]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
ukk = Udiag [k] ; |
|||
} |
|||
DIV (X [k], x [0], ukk) ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
x [0] = X [2*k ] ; |
|||
x [1] = X [2*k + 1] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (uik, Ux [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
uik = Ux [p] ; |
|||
} |
|||
MULT_SUB (x [0], uik, X [2*i]) ; |
|||
MULT_SUB (x [1], uik, X [2*i + 1]) ; |
|||
} |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (ukk, Udiag [k]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
ukk = Udiag [k] ; |
|||
} |
|||
DIV (X [2*k], x [0], ukk) ; |
|||
DIV (X [2*k + 1], x [1], ukk) ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
x [0] = X [3*k ] ; |
|||
x [1] = X [3*k + 1] ; |
|||
x [2] = X [3*k + 2] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (uik, Ux [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
uik = Ux [p] ; |
|||
} |
|||
MULT_SUB (x [0], uik, X [3*i]) ; |
|||
MULT_SUB (x [1], uik, X [3*i + 1]) ; |
|||
MULT_SUB (x [2], uik, X [3*i + 2]) ; |
|||
} |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (ukk, Udiag [k]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
ukk = Udiag [k] ; |
|||
} |
|||
DIV (X [3*k], x [0], ukk) ; |
|||
DIV (X [3*k + 1], x [1], ukk) ; |
|||
DIV (X [3*k + 2], x [2], ukk) ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, len) ; |
|||
x [0] = X [4*k ] ; |
|||
x [1] = X [4*k + 1] ; |
|||
x [2] = X [4*k + 2] ; |
|||
x [3] = X [4*k + 3] ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Ui [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (uik, Ux [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
uik = Ux [p] ; |
|||
} |
|||
MULT_SUB (x [0], uik, X [4*i]) ; |
|||
MULT_SUB (x [1], uik, X [4*i + 1]) ; |
|||
MULT_SUB (x [2], uik, X [4*i + 2]) ; |
|||
MULT_SUB (x [3], uik, X [4*i + 3]) ; |
|||
} |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (ukk, Udiag [k]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
ukk = Udiag [k] ; |
|||
} |
|||
DIV (X [4*k], x [0], ukk) ; |
|||
DIV (X [4*k + 1], x [1], ukk) ; |
|||
DIV (X [4*k + 2], x [2], ukk) ; |
|||
DIV (X [4*k + 3], x [3], ukk) ; |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
@ -0,0 +1,831 @@ |
|||
/* ========================================================================== */ |
|||
/* === klu include file ===================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Include file for user programs that call klu_* routines */ |
|||
|
|||
#ifndef _KLU_H |
|||
#define _KLU_H |
|||
|
|||
/* make it easy for C++ programs to include KLU */ |
|||
#ifdef __cplusplus |
|||
extern "C" { |
|||
#endif |
|||
|
|||
#include "amd.h" |
|||
#include "colamd.h" |
|||
#include "btf.h" |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* Symbolic object - contains the pre-ordering computed by klu_analyze */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
typedef struct |
|||
{ |
|||
/* A (P,Q) is in upper block triangular form. The kth block goes from |
|||
* row/col index R [k] to R [k+1]-1. The estimated number of nonzeros |
|||
* in the L factor of the kth block is Lnz [k]. |
|||
*/ |
|||
|
|||
/* only computed if the AMD ordering is chosen: */ |
|||
double symmetry ; /* symmetry of largest block */ |
|||
double est_flops ; /* est. factorization flop count */ |
|||
double lnz, unz ; /* estimated nz in L and U, including diagonals */ |
|||
double *Lnz ; /* size n, but only Lnz [0..nblocks-1] is used */ |
|||
|
|||
/* computed for all orderings: */ |
|||
int |
|||
n, /* input matrix A is n-by-n */ |
|||
nz, /* # entries in input matrix */ |
|||
*P, /* size n */ |
|||
*Q, /* size n */ |
|||
*R, /* size n+1, but only R [0..nblocks] is used */ |
|||
nzoff, /* nz in off-diagonal blocks */ |
|||
nblocks, /* number of blocks */ |
|||
maxblock, /* size of largest block */ |
|||
ordering, /* ordering used (AMD, COLAMD, or GIVEN) */ |
|||
do_btf ; /* whether or not BTF preordering was requested */ |
|||
|
|||
/* only computed if BTF preordering requested */ |
|||
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank |
|||
* deficient. -1 if not computed. n if the matrix has |
|||
* full structural rank */ |
|||
|
|||
} klu_symbolic ; |
|||
|
|||
typedef struct /* 64-bit version (otherwise same as above) */ |
|||
{ |
|||
double symmetry, est_flops, lnz, unz ; |
|||
double *Lnz ; |
|||
UF_long n, nz, *P, *Q, *R, nzoff, nblocks, maxblock, ordering, do_btf, |
|||
structural_rank ; |
|||
|
|||
} klu_l_symbolic ; |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* Numeric object - contains the factors computed by klu_factor */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
typedef struct |
|||
{ |
|||
/* LU factors of each block, the pivot row permutation, and the |
|||
* entries in the off-diagonal blocks */ |
|||
|
|||
int n ; /* A is n-by-n */ |
|||
int nblocks ; /* number of diagonal blocks */ |
|||
int lnz ; /* actual nz in L, including diagonal */ |
|||
int unz ; /* actual nz in U, including diagonal */ |
|||
int max_lnz_block ; /* max actual nz in L in any one block, incl. diag */ |
|||
int max_unz_block ; /* max actual nz in U in any one block, incl. diag */ |
|||
int *Pnum ; /* size n. final pivot permutation */ |
|||
int *Pinv ; /* size n. inverse of final pivot permutation */ |
|||
|
|||
/* LU factors of each block */ |
|||
int *Lip ; /* size n. pointers into LUbx[block] for L */ |
|||
int *Uip ; /* size n. pointers into LUbx[block] for U */ |
|||
int *Llen ; /* size n. Llen [k] = # of entries in kth column of L */ |
|||
int *Ulen ; /* size n. Ulen [k] = # of entries in kth column of U */ |
|||
void **LUbx ; /* L and U indices and entries (excl. diagonal of U) */ |
|||
size_t *LUsize ; /* size of each LUbx [block], in sizeof (Unit) */ |
|||
void *Udiag ; /* diagonal of U */ |
|||
|
|||
/* scale factors; can be NULL if no scaling */ |
|||
double *Rs ; /* size n. Rs [i] is scale factor for row i */ |
|||
|
|||
/* permanent workspace for factorization and solve */ |
|||
size_t worksize ; /* size (in bytes) of Work */ |
|||
void *Work ; /* workspace */ |
|||
void *Xwork ; /* alias into Numeric->Work */ |
|||
int *Iwork ; /* alias into Numeric->Work */ |
|||
|
|||
/* off-diagonal entries in a conventional compressed-column sparse matrix */ |
|||
int *Offp ; /* size n+1, column pointers */ |
|||
int *Offi ; /* size nzoff, row indices */ |
|||
void *Offx ; /* size nzoff, numerical values */ |
|||
int nzoff ; |
|||
|
|||
} klu_numeric ; |
|||
|
|||
typedef struct /* 64-bit version (otherwise same as above) */ |
|||
{ |
|||
UF_long n, nblocks, lnz, unz, max_lnz_block, max_unz_block, *Pnum, *Pinv, |
|||
*Lip, *Uip, *Llen, *Ulen ; |
|||
void **LUbx ; |
|||
size_t *LUsize ; |
|||
void *Udiag ; |
|||
double *Rs ; |
|||
size_t worksize ; |
|||
void *Work, *Xwork ; |
|||
UF_long *Iwork ; |
|||
UF_long *Offp, *Offi ; |
|||
void *Offx ; |
|||
UF_long nzoff ; |
|||
|
|||
} klu_l_numeric ; |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* KLU control parameters and statistics */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Common->status values */ |
|||
#define KLU_OK 0 |
|||
#define KLU_SINGULAR (1) /* status > 0 is a warning, not an error */ |
|||
#define KLU_OUT_OF_MEMORY (-2) |
|||
#define KLU_INVALID (-3) |
|||
#define KLU_TOO_LARGE (-4) /* integer overflow has occured */ |
|||
|
|||
typedef struct klu_common_struct |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* parameters */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
double tol ; /* pivot tolerance for diagonal preference */ |
|||
double memgrow ; /* realloc memory growth size for LU factors */ |
|||
double initmem_amd ; /* init. memory size with AMD: c*nnz(L) + n */ |
|||
double initmem ; /* init. memory size: c*nnz(A) + n */ |
|||
double maxwork ; /* maxwork for BTF, <= 0 if no limit */ |
|||
|
|||
int btf ; /* use BTF pre-ordering, or not */ |
|||
int ordering ; /* 0: AMD, 1: COLAMD, 2: user P and Q, |
|||
* 3: user function */ |
|||
int scale ; /* row scaling: -1: none (and no error check), |
|||
* 0: none, 1: sum, 2: max */ |
|||
|
|||
/* memory management routines */ |
|||
void *(*malloc_memory) (size_t) ; /* pointer to malloc */ |
|||
void *(*realloc_memory) (void *, size_t) ; /* pointer to realloc */ |
|||
void (*free_memory) (void *) ; /* pointer to free */ |
|||
void *(*calloc_memory) (size_t, size_t) ; /* pointer to calloc */ |
|||
|
|||
/* pointer to user ordering function */ |
|||
int (*user_order) (int, int *, int *, int *, struct klu_common_struct *) ; |
|||
|
|||
/* pointer to user data, passed unchanged as the last parameter to the |
|||
* user ordering function (optional, the user function need not use this |
|||
* information). */ |
|||
void *user_data ; |
|||
|
|||
int halt_if_singular ; /* how to handle a singular matrix: |
|||
* FALSE: keep going. Return a Numeric object with a zero U(k,k). A |
|||
* divide-by-zero may occur when computing L(:,k). The Numeric object |
|||
* can be passed to klu_solve (a divide-by-zero will occur). It can |
|||
* also be safely passed to klu_refactor. |
|||
* TRUE: stop quickly. klu_factor will free the partially-constructed |
|||
* Numeric object. klu_refactor will not free it, but will leave the |
|||
* numerical values only partially defined. This is the default. */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* statistics */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
int status ; /* KLU_OK if OK, < 0 if error */ |
|||
int nrealloc ; /* # of reallocations of L and U */ |
|||
|
|||
int structural_rank ; /* 0 to n-1 if the matrix is structurally rank |
|||
* deficient (as determined by maxtrans). -1 if not computed. n if the |
|||
* matrix has full structural rank. This is computed by klu_analyze |
|||
* if a BTF preordering is requested. */ |
|||
|
|||
int numerical_rank ; /* First k for which a zero U(k,k) was found, |
|||
* if the matrix was singular (in the range 0 to n-1). n if the matrix |
|||
* has full rank. This is not a true rank-estimation. It just reports |
|||
* where the first zero pivot was found. -1 if not computed. |
|||
* Computed by klu_factor and klu_refactor. */ |
|||
|
|||
int singular_col ; /* n if the matrix is not singular. If in the |
|||
* range 0 to n-1, this is the column index of the original matrix A that |
|||
* corresponds to the column of U that contains a zero diagonal entry. |
|||
* -1 if not computed. Computed by klu_factor and klu_refactor. */ |
|||
|
|||
int noffdiag ; /* # of off-diagonal pivots, -1 if not computed */ |
|||
|
|||
double flops ; /* actual factorization flop count, from klu_flops */ |
|||
double rcond ; /* crude reciprocal condition est., from klu_rcond */ |
|||
double condest ; /* accurate condition est., from klu_condest */ |
|||
double rgrowth ; /* reciprocal pivot rgrowth, from klu_rgrowth */ |
|||
double work ; /* actual work done in BTF, in klu_analyze */ |
|||
|
|||
size_t memusage ; /* current memory usage, in bytes */ |
|||
size_t mempeak ; /* peak memory usage, in bytes */ |
|||
|
|||
} klu_common ; |
|||
|
|||
typedef struct klu_l_common_struct /* 64-bit version (otherwise same as above)*/ |
|||
{ |
|||
|
|||
double tol, memgrow, initmem_amd, initmem, maxwork ; |
|||
UF_long btf, ordering, scale ; |
|||
void *(*malloc_memory) (size_t) ; |
|||
void *(*realloc_memory) (void *, size_t) ; |
|||
void (*free_memory) (void *) ; |
|||
void *(*calloc_memory) (size_t, size_t) ; |
|||
UF_long (*user_order) (UF_long, UF_long *, UF_long *, UF_long *, |
|||
struct klu_l_common_struct *) ; |
|||
void *user_data ; |
|||
UF_long halt_if_singular ; |
|||
UF_long status, nrealloc, structural_rank, numerical_rank, singular_col, |
|||
noffdiag ; |
|||
double flops, rcond, condest, rgrowth, work ; |
|||
size_t memusage, mempeak ; |
|||
|
|||
} klu_l_common ; |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_defaults: sets default control parameters */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_defaults |
|||
( |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_defaults (klu_l_common *Common) ; |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_analyze: orders and analyzes a matrix */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Order the matrix with BTF (or not), then order each block with AMD, COLAMD, |
|||
* a natural ordering, or with a user-provided ordering function */ |
|||
|
|||
klu_symbolic *klu_analyze |
|||
( |
|||
/* inputs, not modified */ |
|||
int n, /* A is n-by-n */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
klu_l_symbolic *klu_l_analyze (UF_long, UF_long *, UF_long *, |
|||
klu_l_common *Common) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_analyze_given: analyzes a matrix using given P and Q */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Order the matrix with BTF (or not), then use natural or given ordering |
|||
* P and Q on the blocks. P and Q are interpretted as identity |
|||
* if NULL. */ |
|||
|
|||
klu_symbolic *klu_analyze_given |
|||
( |
|||
/* inputs, not modified */ |
|||
int n, /* A is n-by-n */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
int P [ ], /* size n, user's row permutation (may be NULL) */ |
|||
int Q [ ], /* size n, user's column permutation (may be NULL) */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
klu_l_symbolic *klu_l_analyze_given (UF_long, UF_long *, UF_long *, UF_long *, |
|||
UF_long *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_factor: factors a matrix using the klu_analyze results */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
klu_numeric *klu_factor /* returns KLU_OK if OK, < 0 if error */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], /* size nz, numerical values */ |
|||
klu_symbolic *Symbolic, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
klu_numeric *klu_z_factor /* returns KLU_OK if OK, < 0 if error */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], /* size 2*nz, numerical values (real,imag pairs) */ |
|||
klu_symbolic *Symbolic, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
/* long / real version */ |
|||
klu_l_numeric *klu_l_factor (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_common *) ; |
|||
|
|||
/* long / complex version */ |
|||
klu_l_numeric *klu_zl_factor (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_solve: solves Ax=b using the Symbolic and Numeric objects */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_solve |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
int ldim, /* leading dimension of B */ |
|||
int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size ldim*nrhs */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_solve |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
int ldim, /* leading dimension of B */ |
|||
int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size 2*ldim*nrhs */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, |
|||
double *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_solve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, |
|||
double *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_tsolve: solves A'x=b using the Symbolic and Numeric objects */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_tsolve |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
int ldim, /* leading dimension of B */ |
|||
int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size ldim*nrhs */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_tsolve |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
int ldim, /* leading dimension of B */ |
|||
int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size 2*ldim*nrhs */ |
|||
int conj_solve, /* TRUE: conjugate solve, FALSE: solve A.'x=b */ |
|||
klu_common *Common |
|||
|
|||
) ; |
|||
|
|||
UF_long klu_l_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, |
|||
double *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_tsolve (klu_l_symbolic *, klu_l_numeric *, UF_long, UF_long, |
|||
double *, UF_long, klu_l_common * ) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_refactor: refactorizes matrix with same ordering as klu_factor */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_refactor /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], /* size nz, numerical values */ |
|||
klu_symbolic *Symbolic, |
|||
/* input, and numerical values modified on output */ |
|||
klu_numeric *Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_refactor /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], /* size 2*nz, numerical values */ |
|||
klu_symbolic *Symbolic, |
|||
/* input, and numerical values modified on output */ |
|||
klu_numeric *Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_refactor (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_free_symbolic: destroys the Symbolic object */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_free_symbolic |
|||
( |
|||
klu_symbolic **Symbolic, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_free_symbolic (klu_l_symbolic **, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_free_numeric: destroys the Numeric object */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Note that klu_free_numeric and klu_z_free_numeric are identical; each can |
|||
* free both kinds of Numeric objects (real and complex) */ |
|||
|
|||
int klu_free_numeric |
|||
( |
|||
klu_numeric **Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_free_numeric |
|||
( |
|||
klu_numeric **Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_free_numeric (klu_l_numeric **, klu_l_common *) ; |
|||
UF_long klu_zl_free_numeric (klu_l_numeric **, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_sort: sorts the columns of the LU factorization */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* this is not needed except for the MATLAB interface */ |
|||
|
|||
int klu_sort |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
/* input/output */ |
|||
klu_numeric *Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_sort |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
/* input/output */ |
|||
klu_numeric *Numeric, |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
UF_long klu_zl_sort (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_flops: determines # of flops performed in numeric factorzation */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_flops |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
/* input/output */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_flops |
|||
( |
|||
/* inputs, not modified */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
/* input/output */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
UF_long klu_zl_flops (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_rgrowth : compute the reciprocal pivot growth */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Pivot growth is computed after the input matrix is permuted, scaled, and |
|||
* off-diagonal entries pruned. This is because the LU factorization of each |
|||
* block takes as input the scaled diagonal blocks of the BTF form. The |
|||
* reciprocal pivot growth in column j of an LU factorization of a matrix C |
|||
* is the largest entry in C divided by the largest entry in U; then the overall |
|||
* reciprocal pivot growth is the smallest such value for all columns j. Note |
|||
* that the off-diagonal entries are not scaled, since they do not take part in |
|||
* the LU factorization of the diagonal blocks. |
|||
* |
|||
* In MATLAB notation: |
|||
* |
|||
* rgrowth = min (max (abs ((R \ A(p,q)) - F)) ./ max (abs (U))) */ |
|||
|
|||
int klu_rgrowth |
|||
( |
|||
int Ap [ ], |
|||
int Ai [ ], |
|||
double Ax [ ], |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */ |
|||
) ; |
|||
|
|||
int klu_z_rgrowth |
|||
( |
|||
int Ap [ ], |
|||
int Ai [ ], |
|||
double Ax [ ], |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
klu_common *Common /* Common->rgrowth = reciprocal pivot growth */ |
|||
) ; |
|||
|
|||
UF_long klu_l_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_rgrowth (UF_long *, UF_long *, double *, klu_l_symbolic *, |
|||
klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_condest */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Computes a reasonably accurate estimate of the 1-norm condition number, using |
|||
* Hager's method, as modified by Higham and Tisseur (same method as used in |
|||
* MATLAB's condest */ |
|||
|
|||
int klu_condest |
|||
( |
|||
int Ap [ ], /* size n+1, column pointers, not modified */ |
|||
double Ax [ ], /* size nz = Ap[n], numerical values, not modified*/ |
|||
klu_symbolic *Symbolic, /* symbolic analysis, not modified */ |
|||
klu_numeric *Numeric, /* numeric factorization, not modified */ |
|||
klu_common *Common /* result returned in Common->condest */ |
|||
) ; |
|||
|
|||
int klu_z_condest |
|||
( |
|||
int Ap [ ], |
|||
double Ax [ ], /* size 2*nz */ |
|||
klu_symbolic *Symbolic, |
|||
klu_numeric *Numeric, |
|||
klu_common *Common /* result returned in Common->condest */ |
|||
) ; |
|||
|
|||
UF_long klu_l_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *, |
|||
klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_condest (UF_long *, double *, klu_l_symbolic *, klu_l_numeric *, |
|||
klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_rcond: compute min(abs(diag(U))) / max(abs(diag(U))) */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_rcond |
|||
( |
|||
klu_symbolic *Symbolic, /* input, not modified */ |
|||
klu_numeric *Numeric, /* input, not modified */ |
|||
klu_common *Common /* result in Common->rcond */ |
|||
) ; |
|||
|
|||
int klu_z_rcond |
|||
( |
|||
klu_symbolic *Symbolic, /* input, not modified */ |
|||
klu_numeric *Numeric, /* input, not modified */ |
|||
klu_common *Common /* result in Common->rcond */ |
|||
) ; |
|||
|
|||
UF_long klu_l_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_rcond (klu_l_symbolic *, klu_l_numeric *, klu_l_common *) ; |
|||
|
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_scale */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_scale /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */ |
|||
int n, |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], |
|||
/* outputs, not defined on input */ |
|||
double Rs [ ], |
|||
/* workspace, not defined on input or output */ |
|||
int W [ ], /* size n, can be NULL */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
int klu_z_scale /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
int scale, /* <0: none, no error check; 0: none, 1: sum, 2: max */ |
|||
int n, |
|||
int Ap [ ], /* size n+1, column pointers */ |
|||
int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], |
|||
/* outputs, not defined on input */ |
|||
double Rs [ ], |
|||
/* workspace, not defined on input or output */ |
|||
int W [ ], /* size n, can be NULL */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_scale (UF_long, UF_long, UF_long *, UF_long *, double *, |
|||
double *, UF_long *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_scale (UF_long, UF_long, UF_long *, UF_long *, double *, |
|||
double *, UF_long *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* klu_extract */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
int klu_extract /* returns TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs: */ |
|||
klu_numeric *Numeric, |
|||
klu_symbolic *Symbolic, |
|||
|
|||
/* outputs, either allocated on input, or ignored otherwise */ |
|||
|
|||
/* L */ |
|||
int *Lp, /* size n+1 */ |
|||
int *Li, /* size Numeric->lnz */ |
|||
double *Lx, /* size Numeric->lnz */ |
|||
|
|||
/* U */ |
|||
int *Up, /* size n+1 */ |
|||
int *Ui, /* size Numeric->unz */ |
|||
double *Ux, /* size Numeric->unz */ |
|||
|
|||
/* F */ |
|||
int *Fp, /* size n+1 */ |
|||
int *Fi, /* size Numeric->nzoff */ |
|||
double *Fx, /* size Numeric->nzoff */ |
|||
|
|||
/* P, row permutation */ |
|||
int *P, /* size n */ |
|||
|
|||
/* Q, column permutation */ |
|||
int *Q, /* size n */ |
|||
|
|||
/* Rs, scale factors */ |
|||
double *Rs, /* size n */ |
|||
|
|||
/* R, block boundaries */ |
|||
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */ |
|||
|
|||
klu_common *Common |
|||
) ; |
|||
|
|||
|
|||
int klu_z_extract /* returns TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs: */ |
|||
klu_numeric *Numeric, |
|||
klu_symbolic *Symbolic, |
|||
|
|||
/* outputs, all of which must be allocated on input */ |
|||
|
|||
/* L */ |
|||
int *Lp, /* size n+1 */ |
|||
int *Li, /* size nnz(L) */ |
|||
double *Lx, /* size nnz(L) */ |
|||
double *Lz, /* size nnz(L) for the complex case, ignored if real */ |
|||
|
|||
/* U */ |
|||
int *Up, /* size n+1 */ |
|||
int *Ui, /* size nnz(U) */ |
|||
double *Ux, /* size nnz(U) */ |
|||
double *Uz, /* size nnz(U) for the complex case, ignored if real */ |
|||
|
|||
/* F */ |
|||
int *Fp, /* size n+1 */ |
|||
int *Fi, /* size nnz(F) */ |
|||
double *Fx, /* size nnz(F) */ |
|||
double *Fz, /* size nnz(F) for the complex case, ignored if real */ |
|||
|
|||
/* P, row permutation */ |
|||
int *P, /* size n */ |
|||
|
|||
/* Q, column permutation */ |
|||
int *Q, /* size n */ |
|||
|
|||
/* Rs, scale factors */ |
|||
double *Rs, /* size n */ |
|||
|
|||
/* R, block boundaries */ |
|||
int *R, /* size Symbolic->nblocks+1 (nblocks is at most n) */ |
|||
|
|||
klu_common *Common |
|||
) ; |
|||
|
|||
UF_long klu_l_extract (klu_l_numeric *, klu_l_symbolic *, |
|||
UF_long *, UF_long *, double *, |
|||
UF_long *, UF_long *, double *, |
|||
UF_long *, UF_long *, double *, |
|||
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ; |
|||
|
|||
UF_long klu_zl_extract (klu_l_numeric *, klu_l_symbolic *, |
|||
UF_long *, UF_long *, double *, double *, |
|||
UF_long *, UF_long *, double *, double *, |
|||
UF_long *, UF_long *, double *, double *, |
|||
UF_long *, UF_long *, double *, UF_long *, klu_l_common *) ; |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* KLU memory management routines */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
void *klu_malloc /* returns pointer to the newly malloc'd block */ |
|||
( |
|||
/* ---- input ---- */ |
|||
size_t n, /* number of items */ |
|||
size_t size, /* size of each item */ |
|||
/* --------------- */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
void *klu_free /* always returns NULL */ |
|||
( |
|||
/* ---- in/out --- */ |
|||
void *p, /* block of memory to free */ |
|||
size_t n, /* number of items */ |
|||
size_t size, /* size of each item */ |
|||
/* --------------- */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
void *klu_realloc /* returns pointer to reallocated block */ |
|||
( |
|||
/* ---- input ---- */ |
|||
size_t nnew, /* requested # of items in reallocated block */ |
|||
size_t nold, /* current size of block, in # of items */ |
|||
size_t size, /* size of each item */ |
|||
/* ---- in/out --- */ |
|||
void *p, /* block of memory to realloc */ |
|||
/* --------------- */ |
|||
klu_common *Common |
|||
) ; |
|||
|
|||
void *klu_l_malloc (size_t, size_t, klu_l_common *) ; |
|||
void *klu_l_free (void *, size_t, size_t, klu_l_common *) ; |
|||
void *klu_l_realloc (size_t, size_t, size_t, void *, klu_l_common *) ; |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU version ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* All versions of KLU include these definitions. |
|||
* As an example, to test if the version you are using is 1.2 or later: |
|||
* |
|||
* if (KLU_VERSION >= KLU_VERSION_CODE (1,2)) ... |
|||
* |
|||
* This also works during compile-time: |
|||
* |
|||
* #if (KLU >= KLU_VERSION_CODE (1,2)) |
|||
* printf ("This is version 1.2 or later\n") ; |
|||
* #else |
|||
* printf ("This is an early version\n") ; |
|||
* #endif |
|||
*/ |
|||
|
|||
#define KLU_DATE "Dec 7, 2011" |
|||
#define KLU_VERSION_CODE(main,sub) ((main) * 1000 + (sub)) |
|||
#define KLU_MAIN_VERSION 1 |
|||
#define KLU_SUB_VERSION 1 |
|||
#define KLU_SUBSUB_VERSION 3 |
|||
#define KLU_VERSION KLU_VERSION_CODE(KLU_MAIN_VERSION,KLU_SUB_VERSION) |
|||
|
|||
#ifdef __cplusplus |
|||
} |
|||
#endif |
|||
#endif |
|||
@ -0,0 +1,488 @@ |
|||
/* ========================================================================== */ |
|||
/* === klu_analyze ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Order the matrix using BTF (or not), and then AMD, COLAMD, the natural |
|||
* ordering, or the user-provided-function on the blocks. Does not support |
|||
* using a given ordering (use klu_analyze_given for that case). */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === analyze_worker ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
static Int analyze_worker /* returns KLU_OK or < 0 if error */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
Int nblocks, /* # of blocks */ |
|||
Int Pbtf [ ], /* BTF row permutation */ |
|||
Int Qbtf [ ], /* BTF col permutation */ |
|||
Int R [ ], /* size n+1, but only Rbtf [0..nblocks] is used */ |
|||
Int ordering, /* what ordering to use (0, 1, or 3 for this routine) */ |
|||
|
|||
/* output only, not defined on input */ |
|||
Int P [ ], /* size n */ |
|||
Int Q [ ], /* size n */ |
|||
double Lnz [ ], /* size n, but only Lnz [0..nblocks-1] is used */ |
|||
|
|||
/* workspace, not defined on input or output */ |
|||
Int Pblk [ ], /* size maxblock */ |
|||
Int Cp [ ], /* size maxblock+1 */ |
|||
Int Ci [ ], /* size MAX (nz+1, Cilen) */ |
|||
Int Cilen, /* nz+1, or COLAMD_recommend(nz,n,n) for COLAMD */ |
|||
Int Pinv [ ], /* size maxblock */ |
|||
|
|||
/* input/output */ |
|||
KLU_symbolic *Symbolic, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double amd_Info [AMD_INFO], lnz, lnz1, flops, flops1 ; |
|||
Int k1, k2, nk, k, block, oldcol, pend, newcol, result, pc, p, newrow, |
|||
maxnz, nzoff, cstats [COLAMD_STATS], ok, err = KLU_INVALID ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* initializations */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* compute the inverse of Pbtf */ |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
P [k] = EMPTY ; |
|||
Q [k] = EMPTY ; |
|||
Pinv [k] = EMPTY ; |
|||
} |
|||
#endif |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
ASSERT (Pbtf [k] >= 0 && Pbtf [k] < n) ; |
|||
Pinv [Pbtf [k]] = k ; |
|||
} |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; |
|||
#endif |
|||
nzoff = 0 ; |
|||
lnz = 0 ; |
|||
maxnz = 0 ; |
|||
flops = 0 ; |
|||
Symbolic->symmetry = EMPTY ; /* only computed by AMD */ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* order each block */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* the block is from rows/columns k1 to k2-1 */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* construct the kth block, C */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
Lnz [block] = EMPTY ; |
|||
pc = 0 ; |
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
newcol = k-k1 ; |
|||
Cp [newcol] = pc ; |
|||
oldcol = Qbtf [k] ; |
|||
pend = Ap [oldcol+1] ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
newrow = Pinv [Ai [p]] ; |
|||
if (newrow < k1) |
|||
{ |
|||
nzoff++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* (newrow,newcol) is an entry in the block */ |
|||
ASSERT (newrow < k2) ; |
|||
newrow -= k1 ; |
|||
Ci [pc++] = newrow ; |
|||
} |
|||
} |
|||
} |
|||
Cp [nk] = pc ; |
|||
maxnz = MAX (maxnz, pc) ; |
|||
ASSERT (KLU_valid (nk, Cp, Ci, NULL)) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* order the block C */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (nk <= 3) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* use natural ordering for tiny blocks (3-by-3 or less) */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
Pblk [k] = k ; |
|||
} |
|||
lnz1 = nk * (nk + 1) / 2 ; |
|||
flops1 = nk * (nk - 1) / 2 + (nk-1)*nk*(2*nk-1) / 6 ; |
|||
ok = TRUE ; |
|||
|
|||
} |
|||
else if (ordering == 0) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* order the block with AMD (C+C') */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
result = AMD_order (nk, Cp, Ci, Pblk, NULL, amd_Info) ; |
|||
ok = (result >= AMD_OK) ; |
|||
if (result == AMD_OUT_OF_MEMORY) |
|||
{ |
|||
err = KLU_OUT_OF_MEMORY ; |
|||
} |
|||
|
|||
/* account for memory usage in AMD */ |
|||
Common->mempeak = MAX (Common->mempeak, |
|||
Common->memusage + amd_Info [AMD_MEMORY]) ; |
|||
|
|||
/* get the ordering statistics from AMD */ |
|||
lnz1 = (Int) (amd_Info [AMD_LNZ]) + nk ; |
|||
flops1 = 2 * amd_Info [AMD_NMULTSUBS_LU] + amd_Info [AMD_NDIV] ; |
|||
if (pc == maxnz) |
|||
{ |
|||
/* get the symmetry of the biggest block */ |
|||
Symbolic->symmetry = amd_Info [AMD_SYMMETRY] ; |
|||
} |
|||
|
|||
} |
|||
else if (ordering == 1) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* order the block with COLAMD (C) */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
/* order (and destroy) Ci, returning column permutation in Cp. |
|||
* COLAMD "cannot" fail since the matrix has already been checked, |
|||
* and Ci allocated. */ |
|||
|
|||
ok = COLAMD (nk, nk, Cilen, Ci, Cp, NULL, cstats) ; |
|||
lnz1 = EMPTY ; |
|||
flops1 = EMPTY ; |
|||
|
|||
/* copy the permutation from Cp to Pblk */ |
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
Pblk [k] = Cp [k] ; |
|||
} |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* pass the block to the user-provided ordering function */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
lnz1 = (Common->user_order) (nk, Cp, Ci, Pblk, Common) ; |
|||
flops1 = EMPTY ; |
|||
ok = (lnz1 != 0) ; |
|||
} |
|||
|
|||
if (!ok) |
|||
{ |
|||
return (err) ; /* ordering method failed */ |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* keep track of nnz(L) and flops statistics */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
Lnz [block] = lnz1 ; |
|||
lnz = (lnz == EMPTY || lnz1 == EMPTY) ? EMPTY : (lnz + lnz1) ; |
|||
flops = (flops == EMPTY || flops1 == EMPTY) ? EMPTY : (flops + flops1) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* combine the preordering with the BTF ordering */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
PRINTF (("Pblk, 1-based:\n")) ; |
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
ASSERT (k + k1 < n) ; |
|||
ASSERT (Pblk [k] + k1 < n) ; |
|||
Q [k + k1] = Qbtf [Pblk [k] + k1] ; |
|||
} |
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
ASSERT (k + k1 < n) ; |
|||
ASSERT (Pblk [k] + k1 < n) ; |
|||
P [k + k1] = Pbtf [Pblk [k] + k1] ; |
|||
} |
|||
} |
|||
|
|||
PRINTF (("nzoff %d Ap[n] %d\n", nzoff, Ap [n])) ; |
|||
ASSERT (nzoff >= 0 && nzoff <= Ap [n]) ; |
|||
|
|||
/* return estimates of # of nonzeros in L including diagonal */ |
|||
Symbolic->lnz = lnz ; /* EMPTY if COLAMD used */ |
|||
Symbolic->unz = lnz ; |
|||
Symbolic->nzoff = nzoff ; |
|||
Symbolic->est_flops = flops ; /* EMPTY if COLAMD or user-ordering used */ |
|||
return (KLU_OK) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === order_and_analyze ==================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Orders the matrix with or with BTF, then orders each block with AMD, COLAMD, |
|||
* or the user ordering function. Does not handle the natural or given |
|||
* ordering cases. */ |
|||
|
|||
static KLU_symbolic *order_and_analyze /* returns NULL if error, or a valid |
|||
KLU_symbolic object if successful */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
/* --------------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double work ; |
|||
KLU_symbolic *Symbolic ; |
|||
double *Lnz ; |
|||
Int *Qbtf, *Cp, *Ci, *Pinv, *Pblk, *Pbtf, *P, *Q, *R ; |
|||
Int nblocks, nz, block, maxblock, k1, k2, nk, do_btf, ordering, k, Cilen, |
|||
*Work ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate the Symbolic object, and check input matrix */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ; |
|||
if (Symbolic == NULL) |
|||
{ |
|||
return (NULL) ; |
|||
} |
|||
P = Symbolic->P ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
Lnz = Symbolic->Lnz ; |
|||
nz = Symbolic->nz ; |
|||
|
|||
ordering = Common->ordering ; |
|||
if (ordering == 1) |
|||
{ |
|||
/* COLAMD */ |
|||
Cilen = COLAMD_recommended (nz, n, n) ; |
|||
} |
|||
else if (ordering == 0 || (ordering == 3 && Common->user_order != NULL)) |
|||
{ |
|||
/* AMD or user ordering function */ |
|||
Cilen = nz+1 ; |
|||
} |
|||
else |
|||
{ |
|||
/* invalid ordering */ |
|||
Common->status = KLU_INVALID ; |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
/* AMD memory management routines */ |
|||
amd_malloc = Common->malloc_memory ; |
|||
amd_free = Common->free_memory ; |
|||
amd_calloc = Common->calloc_memory ; |
|||
amd_realloc = Common->realloc_memory ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate workspace for BTF permutation */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Pbtf = KLU_malloc (n, sizeof (Int), Common) ; |
|||
Qbtf = KLU_malloc (n, sizeof (Int), Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
KLU_free (Pbtf, n, sizeof (Int), Common) ; |
|||
KLU_free (Qbtf, n, sizeof (Int), Common) ; |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the common parameters for BTF and ordering method */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
do_btf = Common->btf ; |
|||
do_btf = (do_btf) ? TRUE : FALSE ; |
|||
Symbolic->ordering = ordering ; |
|||
Symbolic->do_btf = do_btf ; |
|||
Symbolic->structural_rank = EMPTY ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* find the block triangular form (if requested) */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Common->work = 0 ; |
|||
|
|||
if (do_btf) |
|||
{ |
|||
Work = KLU_malloc (5*n, sizeof (Int), Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
KLU_free (Pbtf, n, sizeof (Int), Common) ; |
|||
KLU_free (Qbtf, n, sizeof (Int), Common) ; |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
nblocks = BTF_order (n, Ap, Ai, Common->maxwork, &work, Pbtf, Qbtf, R, |
|||
&(Symbolic->structural_rank), Work) ; |
|||
Common->structural_rank = Symbolic->structural_rank ; |
|||
Common->work += work ; |
|||
|
|||
KLU_free (Work, 5*n, sizeof (Int), Common) ; |
|||
|
|||
/* unflip Qbtf if the matrix does not have full structural rank */ |
|||
if (Symbolic->structural_rank < n) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Qbtf [k] = BTF_UNFLIP (Qbtf [k]) ; |
|||
} |
|||
} |
|||
|
|||
/* find the size of the largest block */ |
|||
maxblock = 1 ; |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("block %d size %d\n", block, nk)) ; |
|||
maxblock = MAX (maxblock, nk) ; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
/* BTF not requested */ |
|||
nblocks = 1 ; |
|||
maxblock = n ; |
|||
R [0] = 0 ; |
|||
R [1] = n ; |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Pbtf [k] = k ; |
|||
Qbtf [k] = k ; |
|||
} |
|||
} |
|||
|
|||
Symbolic->nblocks = nblocks ; |
|||
|
|||
PRINTF (("maxblock size %d\n", maxblock)) ; |
|||
Symbolic->maxblock = maxblock ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate more workspace, for analyze_worker */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Pblk = KLU_malloc (maxblock, sizeof (Int), Common) ; |
|||
Cp = KLU_malloc (maxblock + 1, sizeof (Int), Common) ; |
|||
Ci = KLU_malloc (MAX (Cilen, nz+1), sizeof (Int), Common) ; |
|||
Pinv = KLU_malloc (n, sizeof (Int), Common) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* order each block of the BTF ordering, and a fill-reducing ordering */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common->status == KLU_OK) |
|||
{ |
|||
PRINTF (("calling analyze_worker\n")) ; |
|||
Common->status = analyze_worker (n, Ap, Ai, nblocks, Pbtf, Qbtf, R, |
|||
ordering, P, Q, Lnz, Pblk, Cp, Ci, Cilen, Pinv, Symbolic, Common) ; |
|||
PRINTF (("analyze_worker done\n")) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* free all workspace */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
KLU_free (Pblk, maxblock, sizeof (Int), Common) ; |
|||
KLU_free (Cp, maxblock+1, sizeof (Int), Common) ; |
|||
KLU_free (Ci, MAX (Cilen, nz+1), sizeof (Int), Common) ; |
|||
KLU_free (Pinv, n, sizeof (Int), Common) ; |
|||
KLU_free (Pbtf, n, sizeof (Int), Common) ; |
|||
KLU_free (Qbtf, n, sizeof (Int), Common) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return the symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
} |
|||
return (Symbolic) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_analyze ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
KLU_symbolic *KLU_analyze /* returns NULL if error, or a valid |
|||
KLU_symbolic object if successful */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
/* -------------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the control parameters for BTF and ordering method */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (NULL) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
Common->structural_rank = EMPTY ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* order and analyze */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common->ordering == 2) |
|||
{ |
|||
/* natural ordering */ |
|||
return (KLU_analyze_given (n, Ap, Ai, NULL, NULL, Common)) ; |
|||
} |
|||
else |
|||
{ |
|||
/* order with P and Q */ |
|||
return (order_and_analyze (n, Ap, Ai, Common)) ; |
|||
} |
|||
} |
|||
@ -0,0 +1,369 @@ |
|||
/* ========================================================================== */ |
|||
/* === klu_analyze_given ==================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Given an input permutation P and Q, create the Symbolic object. BTF can |
|||
* be done to modify the user's P and Q (does not perform the max transversal; |
|||
* just finds the strongly-connected components). */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === klu_alloc_symbolic =================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Allocate Symbolic object, and check input matrix. Not user callable. */ |
|||
|
|||
KLU_symbolic *KLU_alloc_symbolic |
|||
( |
|||
Int n, |
|||
Int *Ap, |
|||
Int *Ai, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
KLU_symbolic *Symbolic ; |
|||
Int *P, *Q, *R ; |
|||
double *Lnz ; |
|||
Int nz, i, j, p, pend ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (NULL) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* A is n-by-n, with n > 0. Ap [0] = 0 and nz = Ap [n] >= 0 required. |
|||
* Ap [j] <= Ap [j+1] must hold for all j = 0 to n-1. Row indices in Ai |
|||
* must be in the range 0 to n-1, and no duplicate entries can be present. |
|||
* The list of row indices in each column of A need not be sorted. |
|||
*/ |
|||
|
|||
if (n <= 0 || Ap == NULL || Ai == NULL) |
|||
{ |
|||
/* Ap and Ai must be present, and n must be > 0 */ |
|||
Common->status = KLU_INVALID ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
nz = Ap [n] ; |
|||
if (Ap [0] != 0 || nz < 0) |
|||
{ |
|||
/* nz must be >= 0 and Ap [0] must equal zero */ |
|||
Common->status = KLU_INVALID ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
if (Ap [j] > Ap [j+1]) |
|||
{ |
|||
/* column pointers must be non-decreasing */ |
|||
Common->status = KLU_INVALID ; |
|||
return (NULL) ; |
|||
} |
|||
} |
|||
P = KLU_malloc (n, sizeof (Int), Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (NULL) ; |
|||
} |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
P [i] = EMPTY ; |
|||
} |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
pend = Ap [j+1] ; |
|||
for (p = Ap [j] ; p < pend ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
if (i < 0 || i >= n || P [i] == j) |
|||
{ |
|||
/* row index out of range, or duplicate entry */ |
|||
KLU_free (P, n, sizeof (Int), Common) ; |
|||
Common->status = KLU_INVALID ; |
|||
return (NULL) ; |
|||
} |
|||
/* flag row i as appearing in column j */ |
|||
P [i] = j ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Symbolic = KLU_malloc (sizeof (KLU_symbolic), 1, Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
KLU_free (P, n, sizeof (Int), Common) ; |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
Q = KLU_malloc (n, sizeof (Int), Common) ; |
|||
R = KLU_malloc (n+1, sizeof (Int), Common) ; |
|||
Lnz = KLU_malloc (n, sizeof (double), Common) ; |
|||
|
|||
Symbolic->n = n ; |
|||
Symbolic->nz = nz ; |
|||
Symbolic->P = P ; |
|||
Symbolic->Q = Q ; |
|||
Symbolic->R = R ; |
|||
Symbolic->Lnz = Lnz ; |
|||
|
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
return (Symbolic) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_analyze_given ==================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
KLU_symbolic *KLU_analyze_given /* returns NULL if error, or a valid |
|||
KLU_symbolic object if successful */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
Int Puser [ ], /* size n, user's row permutation (may be NULL) */ |
|||
Int Quser [ ], /* size n, user's column permutation (may be NULL) */ |
|||
/* -------------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
KLU_symbolic *Symbolic ; |
|||
double *Lnz ; |
|||
Int nblocks, nz, block, maxblock, *P, *Q, *R, nzoff, p, pend, do_btf, k ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* determine if input matrix is valid, and get # of nonzeros */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Symbolic = KLU_alloc_symbolic (n, Ap, Ai, Common) ; |
|||
if (Symbolic == NULL) |
|||
{ |
|||
return (NULL) ; |
|||
} |
|||
P = Symbolic->P ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
Lnz = Symbolic->Lnz ; |
|||
nz = Symbolic->nz ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* Q = Quser, or identity if Quser is NULL */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Quser == (Int *) NULL) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Q [k] = k ; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Q [k] = Quser [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the control parameters for BTF and ordering method */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
do_btf = Common->btf ; |
|||
do_btf = (do_btf) ? TRUE : FALSE ; |
|||
Symbolic->ordering = 2 ; |
|||
Symbolic->do_btf = do_btf ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* find the block triangular form, if requested */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (do_btf) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* get workspace for BTF_strongcomp */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
Int *Pinv, *Work, *Bi, k1, k2, nk, oldcol ; |
|||
|
|||
Work = KLU_malloc (4*n, sizeof (Int), Common) ; |
|||
Pinv = KLU_malloc (n, sizeof (Int), Common) ; |
|||
if (Puser != (Int *) NULL) |
|||
{ |
|||
Bi = KLU_malloc (nz+1, sizeof (Int), Common) ; |
|||
} |
|||
else |
|||
{ |
|||
Bi = Ai ; |
|||
} |
|||
|
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
KLU_free (Work, 4*n, sizeof (Int), Common) ; |
|||
KLU_free (Pinv, n, sizeof (Int), Common) ; |
|||
if (Puser != (Int *) NULL) |
|||
{ |
|||
KLU_free (Bi, nz+1, sizeof (Int), Common) ; |
|||
} |
|||
KLU_free_symbolic (&Symbolic, Common) ; |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* B = Puser * A */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (Puser != (Int *) NULL) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Pinv [Puser [k]] = k ; |
|||
} |
|||
for (p = 0 ; p < nz ; p++) |
|||
{ |
|||
Bi [p] = Pinv [Ai [p]] ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* find the strongly-connected components */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
/* modifies Q, and determines P and R */ |
|||
nblocks = BTF_strongcomp (n, Ap, Bi, Q, P, R, Work) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* P = P * Puser */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (Puser != (Int *) NULL) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Work [k] = Puser [P [k]] ; |
|||
} |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
P [k] = Work [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* Pinv = inverse of P */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Pinv [P [k]] = k ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* analyze each block */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
nzoff = 0 ; /* nz in off-diagonal part */ |
|||
maxblock = 1 ; /* size of the largest block */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* the block is from rows/columns k1 to k2-1 */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1, k2-1, nk)) ; |
|||
maxblock = MAX (maxblock, nk) ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* scan the kth block, C */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
oldcol = Q [k] ; |
|||
pend = Ap [oldcol+1] ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
if (Pinv [Ai [p]] < k1) |
|||
{ |
|||
nzoff++ ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* fill-in not estimated */ |
|||
Lnz [block] = EMPTY ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* free all workspace */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
KLU_free (Work, 4*n, sizeof (Int), Common) ; |
|||
KLU_free (Pinv, n, sizeof (Int), Common) ; |
|||
if (Puser != (Int *) NULL) |
|||
{ |
|||
KLU_free (Bi, nz+1, sizeof (Int), Common) ; |
|||
} |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* BTF not requested */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
nzoff = 0 ; |
|||
nblocks = 1 ; |
|||
maxblock = n ; |
|||
R [0] = 0 ; |
|||
R [1] = n ; |
|||
Lnz [0] = EMPTY ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* P = Puser, or identity if Puser is NULL */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
P [k] = (Puser == NULL) ? k : Puser [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return the symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Symbolic->nblocks = nblocks ; |
|||
Symbolic->maxblock = maxblock ; |
|||
Symbolic->lnz = EMPTY ; |
|||
Symbolic->unz = EMPTY ; |
|||
Symbolic->nzoff = nzoff ; |
|||
|
|||
return (Symbolic) ; |
|||
} |
|||
@ -0,0 +1,60 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_defaults ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Sets default parameters for KLU */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_defaults |
|||
( |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
/* parameters */ |
|||
Common->tol = 0.001 ; /* pivot tolerance for diagonal */ |
|||
Common->memgrow = 1.2; /* realloc size ratio increase for LU factors */ |
|||
Common->initmem_amd = 1.2 ; /* init. mem with AMD: c*nnz(L) + n */ |
|||
Common->initmem = 10 ; /* init. mem otherwise: c*nnz(A) + n */ |
|||
Common->btf = TRUE ; /* use BTF pre-ordering, or not */ |
|||
Common->maxwork = 0 ; /* no limit to work done by btf_order */ |
|||
Common->ordering = 0 ; /* 0: AMD, 1: COLAMD, 2: user-provided P and Q, |
|||
* 3: user-provided function */ |
|||
Common->scale = 2 ; /* scale: -1: none, and do not check for errors |
|||
* in the input matrix in KLU_refactor. |
|||
* 0: none, but check for errors, |
|||
* 1: sum, 2: max */ |
|||
Common->halt_if_singular = TRUE ; /* quick halt if matrix is singular */ |
|||
|
|||
/* memory management routines */ |
|||
Common->malloc_memory = malloc ; |
|||
Common->calloc_memory = calloc ; |
|||
Common->free_memory = free ; |
|||
Common->realloc_memory = realloc ; |
|||
|
|||
/* user ordering function and optional argument */ |
|||
Common->user_order = NULL ; |
|||
Common->user_data = NULL ; |
|||
|
|||
/* statistics */ |
|||
Common->status = KLU_OK ; |
|||
Common->nrealloc = 0 ; |
|||
Common->structural_rank = EMPTY ; |
|||
Common->numerical_rank = EMPTY ; |
|||
Common->noffdiag = EMPTY ; |
|||
Common->flops = EMPTY ; |
|||
Common->rcond = EMPTY ; |
|||
Common->condest = EMPTY ; |
|||
Common->rgrowth = EMPTY ; |
|||
Common->work = 0 ; /* work done by btf_order */ |
|||
|
|||
Common->memusage = 0 ; |
|||
Common->mempeak = 0 ; |
|||
|
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,570 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_diagnostics ====================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Linear algebraic diagnostics: |
|||
* KLU_rgrowth: reciprocal pivot growth, takes O(|A|+|U|) time |
|||
* KLU_condest: condition number estimator, takes about O(|A|+5*(|L|+|U|)) time |
|||
* KLU_flops: compute # flops required to factorize A into L*U |
|||
* KLU_rcond: compute a really cheap estimate of the reciprocal of the |
|||
* condition number, min(abs(diag(U))) / max(abs(diag(U))). |
|||
* Takes O(n) time. |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_rgrowth ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Compute the reciprocal pivot growth factor. In MATLAB notation: |
|||
* |
|||
* rgrowth = min (max (abs ((R \ A (p,q)) - F))) ./ max (abs (U))) |
|||
*/ |
|||
|
|||
Int KLU_rgrowth /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
Int *Ap, |
|||
Int *Ai, |
|||
double *Ax, |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double temp, max_ai, max_ui, min_block_rgrowth ; |
|||
Entry aik ; |
|||
Int *Q, *Ui, *Uip, *Ulen, *Pinv ; |
|||
Unit *LU ; |
|||
Entry *Aentry, *Ux, *Ukk ; |
|||
double *Rs ; |
|||
Int i, newrow, oldrow, k1, k2, nk, j, oldcol, k, pend, len ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
if (Symbolic == NULL || Ap == NULL || Ai == NULL || Ax == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
if (Numeric == NULL) |
|||
{ |
|||
/* treat this as a singular matrix */ |
|||
Common->rgrowth = 0 ; |
|||
Common->status = KLU_SINGULAR ; |
|||
return (TRUE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute the reciprocal pivot growth */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Aentry = (Entry *) Ax ; |
|||
Pinv = Numeric->Pinv ; |
|||
Rs = Numeric->Rs ; |
|||
Q = Symbolic->Q ; |
|||
Common->rgrowth = 1 ; |
|||
|
|||
for (i = 0 ; i < Symbolic->nblocks ; i++) |
|||
{ |
|||
k1 = Symbolic->R[i] ; |
|||
k2 = Symbolic->R[i+1] ; |
|||
nk = k2 - k1 ; |
|||
if (nk == 1) |
|||
{ |
|||
continue ; /* skip singleton blocks */ |
|||
} |
|||
LU = (Unit *) Numeric->LUbx[i] ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
Ukk = ((Entry *) Numeric->Udiag) + k1 ; |
|||
min_block_rgrowth = 1 ; |
|||
for (j = 0 ; j < nk ; j++) |
|||
{ |
|||
max_ai = 0 ; |
|||
max_ui = 0 ; |
|||
oldcol = Q[j + k1] ; |
|||
pend = Ap [oldcol + 1] ; |
|||
for (k = Ap [oldcol] ; k < pend ; k++) |
|||
{ |
|||
oldrow = Ai [k] ; |
|||
newrow = Pinv [oldrow] ; |
|||
if (newrow < k1) |
|||
{ |
|||
continue ; /* skip entry outside the block */ |
|||
} |
|||
ASSERT (newrow < k2) ; |
|||
if (Rs != NULL) |
|||
{ |
|||
/* aik = Aentry [k] / Rs [oldrow] */ |
|||
SCALE_DIV_ASSIGN (aik, Aentry [k], Rs [newrow]) ; |
|||
} |
|||
else |
|||
{ |
|||
aik = Aentry [k] ; |
|||
} |
|||
/* temp = ABS (aik) */ |
|||
ABS (temp, aik) ; |
|||
if (temp > max_ai) |
|||
{ |
|||
max_ai = temp ; |
|||
} |
|||
} |
|||
|
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, j, len) ; |
|||
for (k = 0 ; k < len ; k++) |
|||
{ |
|||
/* temp = ABS (Ux [k]) */ |
|||
ABS (temp, Ux [k]) ; |
|||
if (temp > max_ui) |
|||
{ |
|||
max_ui = temp ; |
|||
} |
|||
} |
|||
/* consider the diagonal element */ |
|||
ABS (temp, Ukk [j]) ; |
|||
if (temp > max_ui) |
|||
{ |
|||
max_ui = temp ; |
|||
} |
|||
|
|||
/* if max_ui is 0, skip the column */ |
|||
if (SCALAR_IS_ZERO (max_ui)) |
|||
{ |
|||
continue ; |
|||
} |
|||
temp = max_ai / max_ui ; |
|||
if (temp < min_block_rgrowth) |
|||
{ |
|||
min_block_rgrowth = temp ; |
|||
} |
|||
} |
|||
|
|||
if (min_block_rgrowth < Common->rgrowth) |
|||
{ |
|||
Common->rgrowth = min_block_rgrowth ; |
|||
} |
|||
} |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_condest ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Estimate the condition number. Uses Higham and Tisseur's algorithm |
|||
* (A block algorithm for matrix 1-norm estimation, with applications to |
|||
* 1-norm pseudospectra, SIAM J. Matrix Anal. Appl., 21(4):1185-1201, 2000. |
|||
*/ |
|||
|
|||
Int KLU_condest /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
Int Ap [ ], |
|||
double Ax [ ], |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double xj, Xmax, csum, anorm, ainv_norm, est_old, est_new, abs_value ; |
|||
Entry *Udiag, *Aentry, *X, *S ; |
|||
Int *R ; |
|||
Int nblocks, i, j, jmax, jnew, pend, n ; |
|||
#ifndef COMPLEX |
|||
Int unchanged ; |
|||
#endif |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (Symbolic == NULL || Ap == NULL || Ax == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
abs_value = 0 ; |
|||
if (Numeric == NULL) |
|||
{ |
|||
/* treat this as a singular matrix */ |
|||
Common->condest = 1 / abs_value ; |
|||
Common->status = KLU_SINGULAR ; |
|||
return (TRUE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
n = Symbolic->n ; |
|||
nblocks = Symbolic->nblocks ; |
|||
R = Symbolic->R ; |
|||
Udiag = Numeric->Udiag ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check if diagonal of U has a zero on it */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
ABS (abs_value, Udiag [i]) ; |
|||
if (SCALAR_IS_ZERO (abs_value)) |
|||
{ |
|||
Common->condest = 1 / abs_value ; |
|||
Common->status = KLU_SINGULAR ; |
|||
return (TRUE) ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute 1-norm (maximum column sum) of the matrix */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
anorm = 0.0 ; |
|||
Aentry = (Entry *) Ax ; |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
pend = Ap [i + 1] ; |
|||
csum = 0.0 ; |
|||
for (j = Ap [i] ; j < pend ; j++) |
|||
{ |
|||
ABS (abs_value, Aentry [j]) ; |
|||
csum += abs_value ; |
|||
} |
|||
if (csum > anorm) |
|||
{ |
|||
anorm = csum ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute estimate of 1-norm of inv (A) */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* get workspace (size 2*n Entry's) */ |
|||
X = Numeric->Xwork ; /* size n space used in KLU_solve, tsolve */ |
|||
X += n ; /* X is size n */ |
|||
S = X + n ; /* S is size n */ |
|||
|
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
CLEAR (S [i]) ; |
|||
CLEAR (X [i]) ; |
|||
REAL (X [i]) = 1.0 / ((double) n) ; |
|||
} |
|||
jmax = 0 ; |
|||
|
|||
ainv_norm = 0.0 ; |
|||
for (i = 0 ; i < 5 ; i++) |
|||
{ |
|||
if (i > 0) |
|||
{ |
|||
/* X [jmax] is the largest entry in X */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* X [j] = 0 ;*/ |
|||
CLEAR (X [j]) ; |
|||
} |
|||
REAL (X [jmax]) = 1 ; |
|||
} |
|||
|
|||
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ; |
|||
est_old = ainv_norm ; |
|||
ainv_norm = 0.0 ; |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* ainv_norm += ABS (X [j]) ;*/ |
|||
ABS (abs_value, X [j]) ; |
|||
ainv_norm += abs_value ; |
|||
} |
|||
|
|||
#ifndef COMPLEX |
|||
unchanged = TRUE ; |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
double s = (X [j] >= 0) ? 1 : -1 ; |
|||
if (s != (Int) REAL (S [j])) |
|||
{ |
|||
S [j] = s ; |
|||
unchanged = FALSE ; |
|||
} |
|||
} |
|||
|
|||
if (i > 0 && (ainv_norm <= est_old || unchanged)) |
|||
{ |
|||
break ; |
|||
} |
|||
#else |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
if (IS_NONZERO (X [j])) |
|||
{ |
|||
ABS (abs_value, X [j]) ; |
|||
SCALE_DIV_ASSIGN (S [j], X [j], abs_value) ; |
|||
} |
|||
else |
|||
{ |
|||
CLEAR (S [j]) ; |
|||
REAL (S [j]) = 1 ; |
|||
} |
|||
} |
|||
|
|||
if (i > 0 && ainv_norm <= est_old) |
|||
{ |
|||
break ; |
|||
} |
|||
#endif |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
X [j] = S [j] ; |
|||
} |
|||
|
|||
#ifndef COMPLEX |
|||
/* do a transpose solve */ |
|||
KLU_tsolve (Symbolic, Numeric, n, 1, X, Common) ; |
|||
#else |
|||
/* do a conjugate transpose solve */ |
|||
KLU_tsolve (Symbolic, Numeric, n, 1, (double *) X, 1, Common) ; |
|||
#endif |
|||
|
|||
/* jnew = the position of the largest entry in X */ |
|||
jnew = 0 ; |
|||
Xmax = 0 ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* xj = ABS (X [j]) ;*/ |
|||
ABS (xj, X [j]) ; |
|||
if (xj > Xmax) |
|||
{ |
|||
Xmax = xj ; |
|||
jnew = j ; |
|||
} |
|||
} |
|||
if (i > 0 && jnew == jmax) |
|||
{ |
|||
/* the position of the largest entry did not change |
|||
* from the previous iteration */ |
|||
break ; |
|||
} |
|||
jmax = jnew ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute another estimate of norm(inv(A),1), and take the largest one */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
CLEAR (X [j]) ; |
|||
if (j % 2) |
|||
{ |
|||
REAL (X [j]) = 1 + ((double) j) / ((double) (n-1)) ; |
|||
} |
|||
else |
|||
{ |
|||
REAL (X [j]) = -1 - ((double) j) / ((double) (n-1)) ; |
|||
} |
|||
} |
|||
|
|||
KLU_solve (Symbolic, Numeric, n, 1, (double *) X, Common) ; |
|||
|
|||
est_new = 0.0 ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* est_new += ABS (X [j]) ;*/ |
|||
ABS (abs_value, X [j]) ; |
|||
est_new += abs_value ; |
|||
} |
|||
est_new = 2 * est_new / (3 * n) ; |
|||
ainv_norm = MAX (est_new, ainv_norm) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute estimate of condition number */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Common->condest = ainv_norm * anorm ; |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_flops ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Compute the flop count for the LU factorization (in Common->flops) */ |
|||
|
|||
Int KLU_flops /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double flops = 0 ; |
|||
Int *R, *Ui, *Uip, *Llen, *Ulen ; |
|||
Unit **LUbx ; |
|||
Unit *LU ; |
|||
Int k, ulen, p, n, nk, block, nblocks, k1 ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
Common->flops = EMPTY ; |
|||
if (Numeric == NULL || Symbolic == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
n = Symbolic->n ; |
|||
R = Symbolic->R ; |
|||
nblocks = Symbolic->nblocks ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute the flop count */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = R [block] ; |
|||
nk = R [block+1] - k1 ; |
|||
if (nk > 1) |
|||
{ |
|||
Llen = Numeric->Llen + k1 ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
LU = LUbx [block] ; |
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
/* compute kth column of U, and update kth column of A */ |
|||
GET_I_POINTER (LU, Uip, Ui, k) ; |
|||
ulen = Ulen [k] ; |
|||
for (p = 0 ; p < ulen ; p++) |
|||
{ |
|||
flops += 2 * Llen [Ui [p]] ; |
|||
} |
|||
/* gather and divide by pivot to get kth column of L */ |
|||
flops += Llen [k] ; |
|||
} |
|||
} |
|||
} |
|||
Common->flops = flops ; |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_rcond ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Compute a really cheap estimate of the reciprocal of the condition number, |
|||
* condition number, min(abs(diag(U))) / max(abs(diag(U))). If U has a zero |
|||
* pivot, or a NaN pivot, rcond will be zero. Takes O(n) time. |
|||
*/ |
|||
|
|||
Int KLU_rcond /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
KLU_symbolic *Symbolic, /* input, not modified */ |
|||
KLU_numeric *Numeric, /* input, not modified */ |
|||
KLU_common *Common /* result in Common->rcond */ |
|||
) |
|||
{ |
|||
double ukk, umin = 0, umax = 0 ; |
|||
Entry *Udiag ; |
|||
Int j, n ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (Symbolic == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
if (Numeric == NULL) |
|||
{ |
|||
Common->rcond = 0 ; |
|||
Common->status = KLU_SINGULAR ; |
|||
return (TRUE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* compute rcond */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
n = Symbolic->n ; |
|||
Udiag = Numeric->Udiag ; |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
/* get the magnitude of the pivot */ |
|||
ABS (ukk, Udiag [j]) ; |
|||
if (SCALAR_IS_NAN (ukk) || SCALAR_IS_ZERO (ukk)) |
|||
{ |
|||
/* if NaN, or zero, the rcond is zero */ |
|||
Common->rcond = 0 ; |
|||
Common->status = KLU_SINGULAR ; |
|||
return (TRUE) ; |
|||
} |
|||
if (j == 0) |
|||
{ |
|||
/* first pivot entry */ |
|||
umin = ukk ; |
|||
umax = ukk ; |
|||
} |
|||
else |
|||
{ |
|||
/* subsequent pivots */ |
|||
umin = MIN (umin, ukk) ; |
|||
umax = MAX (umax, ukk) ; |
|||
} |
|||
} |
|||
|
|||
Common->rcond = umin / umax ; |
|||
if (SCALAR_IS_NAN (Common->rcond) || SCALAR_IS_ZERO (Common->rcond)) |
|||
{ |
|||
/* this can occur if umin or umax are Inf or NaN */ |
|||
Common->rcond = 0 ; |
|||
Common->status = KLU_SINGULAR ; |
|||
} |
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,142 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_dump ============================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Debug routines for klu. Only used when NDEBUG is not defined at |
|||
* compile-time. |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
#ifndef NDEBUG |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_valid ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Check if a column-form matrix is valid or not. The matrix A is |
|||
* n-by-n. The row indices of entries in column j are in |
|||
* Ai [Ap [j] ... Ap [j+1]-1]. Required conditions are: |
|||
* |
|||
* n >= 0 |
|||
* nz = Ap [n_col] >= 0 number of entries in the matrix |
|||
* Ap [0] == 0 |
|||
* Ap [j] <= Ap [j+1] for all j in the range 0 to n_col. |
|||
* row indices in Ai [Ap [j] ... Ap [j+1]-1] |
|||
* must be in the range 0 to n_row-1, |
|||
* and no duplicate entries can exist (duplicates not checked here). |
|||
* |
|||
* Not user-callable. Only used when debugging. |
|||
*/ |
|||
|
|||
Int KLU_valid (Int n, Int Ap [ ], Int Ai [ ], Entry Ax [ ]) |
|||
{ |
|||
Int nz, j, p1, p2, i, p ; |
|||
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ; |
|||
if (n <= 0) |
|||
{ |
|||
PRINTF (("n must be >= 0: %d\n", n)) ; |
|||
return (FALSE) ; |
|||
} |
|||
nz = Ap [n] ; |
|||
if (Ap [0] != 0 || nz < 0) |
|||
{ |
|||
/* column pointers must start at Ap [0] = 0, and Ap [n] must be >= 0 */ |
|||
PRINTF (("column 0 pointer bad or nz < 0\n")) ; |
|||
return (FALSE) ; |
|||
} |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
p1 = Ap [j] ; |
|||
p2 = Ap [j+1] ; |
|||
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ; |
|||
if (p1 > p2) |
|||
{ |
|||
/* column pointers must be ascending */ |
|||
PRINTF (("column %d pointer bad\n", j)) ; |
|||
return (FALSE) ; |
|||
} |
|||
for (p = p1 ; p < p2 ; p++) |
|||
{ |
|||
i = Ai [p] ; |
|||
PRINTF (("row: %d", i)) ; |
|||
if (i < 0 || i >= n) |
|||
{ |
|||
/* row index out of range */ |
|||
PRINTF (("index out of range, col %d row %d\n", j, i)) ; |
|||
return (FALSE) ; |
|||
} |
|||
if (Ax != (Entry *) NULL) |
|||
{ |
|||
PRINT_ENTRY (Ax [p]) ; |
|||
} |
|||
PRINTF (("\n")) ; |
|||
} |
|||
} |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_valid_LU ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* This function does the same validity tests as KLU_valid but for the |
|||
* LU factor storage format. The flag flag_test_start_ptr is used to |
|||
* test if Xip [0] = 0. This is not applicable for U. So when calling this |
|||
* function for U, the flag should be set to false. Only used when debugging. |
|||
*/ |
|||
|
|||
Int KLU_valid_LU (Int n, Int flag_test_start_ptr, Int Xip [ ], |
|||
Int Xlen [ ], Unit LU [ ]) |
|||
{ |
|||
Int *Xi ; |
|||
Entry *Xx ; |
|||
Int j, p1, p2, i, p, len ; |
|||
|
|||
PRINTF (("\ncolumn oriented matrix, n = %d\n", n)) ; |
|||
if (n <= 0) |
|||
{ |
|||
PRINTF (("n must be >= 0: %d\n", n)) ; |
|||
return (FALSE) ; |
|||
} |
|||
if (flag_test_start_ptr && Xip [0] != 0) |
|||
{ |
|||
/* column pointers must start at Xip [0] = 0*/ |
|||
PRINTF (("column 0 pointer bad\n")) ; |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
p1 = Xip [j] ; |
|||
p2 = Xip [j+1] ; |
|||
PRINTF (("\nColumn: %d p1: %d p2: %d\n", j, p1, p2)) ; |
|||
if (p1 > p2) |
|||
{ |
|||
/* column pointers must be ascending */ |
|||
PRINTF (("column %d pointer bad\n", j)) ; |
|||
return (FALSE) ; |
|||
} |
|||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
i = Xi [p] ; |
|||
PRINTF (("row: %d", i)) ; |
|||
if (i < 0 || i >= n) |
|||
{ |
|||
/* row index out of range */ |
|||
PRINTF (("index out of range, col %d row %d\n", j, i)) ; |
|||
return (FALSE) ; |
|||
} |
|||
if (Xx != (Entry *) NULL) |
|||
{ |
|||
PRINT_ENTRY (Xx [p]) ; |
|||
} |
|||
PRINTF (("\n")) ; |
|||
} |
|||
} |
|||
|
|||
return (TRUE) ; |
|||
} |
|||
#endif |
|||
@ -0,0 +1,290 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_extract ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Extract KLU factorization into conventional compressed-column matrices. |
|||
* If any output array is NULL, that part of the LU factorization is not |
|||
* extracted (this is not an error condition). |
|||
* |
|||
* nnz(L) = Numeric->lnz, nnz(U) = Numeric->unz, and nnz(F) = Numeric->Offp [n] |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_extract /* returns TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs: */ |
|||
KLU_numeric *Numeric, |
|||
KLU_symbolic *Symbolic, |
|||
|
|||
/* outputs, all of which must be allocated on input */ |
|||
|
|||
/* L */ |
|||
Int *Lp, /* size n+1 */ |
|||
Int *Li, /* size nnz(L) */ |
|||
double *Lx, /* size nnz(L) */ |
|||
#ifdef COMPLEX |
|||
double *Lz, /* size nnz(L) for the complex case, ignored if real */ |
|||
#endif |
|||
|
|||
/* U */ |
|||
Int *Up, /* size n+1 */ |
|||
Int *Ui, /* size nnz(U) */ |
|||
double *Ux, /* size nnz(U) */ |
|||
#ifdef COMPLEX |
|||
double *Uz, /* size nnz(U) for the complex case, ignored if real */ |
|||
#endif |
|||
|
|||
/* F */ |
|||
Int *Fp, /* size n+1 */ |
|||
Int *Fi, /* size nnz(F) */ |
|||
double *Fx, /* size nnz(F) */ |
|||
#ifdef COMPLEX |
|||
double *Fz, /* size nnz(F) for the complex case, ignored if real */ |
|||
#endif |
|||
|
|||
/* P, row permutation */ |
|||
Int *P, /* size n */ |
|||
|
|||
/* Q, column permutation */ |
|||
Int *Q, /* size n */ |
|||
|
|||
/* Rs, scale factors */ |
|||
double *Rs, /* size n */ |
|||
|
|||
/* R, block boundaries */ |
|||
Int *R, /* size nblocks+1 */ |
|||
|
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Int *Lip, *Llen, *Uip, *Ulen, *Li2, *Ui2 ; |
|||
Unit *LU ; |
|||
Entry *Lx2, *Ux2, *Ukk ; |
|||
Int i, k, block, nblocks, n, nz, k1, k2, nk, len, kk, p ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
if (Symbolic == NULL || Numeric == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
Common->status = KLU_OK ; |
|||
n = Symbolic->n ; |
|||
nblocks = Symbolic->nblocks ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract scale factors */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Rs != NULL) |
|||
{ |
|||
if (Numeric->Rs != NULL) |
|||
{ |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
Rs [i] = Numeric->Rs [i] ; |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
/* no scaling */ |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
Rs [i] = 1 ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract block boundaries */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (R != NULL) |
|||
{ |
|||
for (block = 0 ; block <= nblocks ; block++) |
|||
{ |
|||
R [block] = Symbolic->R [block] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract final row permutation */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (P != NULL) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
P [k] = Numeric->Pnum [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract column permutation */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Q != NULL) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Q [k] = Symbolic->Q [k] ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract each block of L */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Lp != NULL && Li != NULL && Lx != NULL |
|||
#ifdef COMPLEX |
|||
&& Lz != NULL |
|||
#endif |
|||
) |
|||
{ |
|||
nz = 0 ; |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = Symbolic->R [block] ; |
|||
k2 = Symbolic->R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
if (nk == 1) |
|||
{ |
|||
/* singleton block */ |
|||
Lp [k1] = nz ; |
|||
Li [nz] = k1 ; |
|||
Lx [nz] = 1 ; |
|||
#ifdef COMPLEX |
|||
Lz [nz] = 0 ; |
|||
#endif |
|||
nz++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* non-singleton block */ |
|||
LU = Numeric->LUbx [block] ; |
|||
Lip = Numeric->Lip + k1 ; |
|||
Llen = Numeric->Llen + k1 ; |
|||
for (kk = 0 ; kk < nk ; kk++) |
|||
{ |
|||
Lp [k1+kk] = nz ; |
|||
/* add the unit diagonal entry */ |
|||
Li [nz] = k1 + kk ; |
|||
Lx [nz] = 1 ; |
|||
#ifdef COMPLEX |
|||
Lz [nz] = 0 ; |
|||
#endif |
|||
nz++ ; |
|||
GET_POINTER (LU, Lip, Llen, Li2, Lx2, kk, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
Li [nz] = k1 + Li2 [p] ; |
|||
Lx [nz] = REAL (Lx2 [p]) ; |
|||
#ifdef COMPLEX |
|||
Lz [nz] = IMAG (Lx2 [p]) ; |
|||
#endif |
|||
nz++ ; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
Lp [n] = nz ; |
|||
ASSERT (nz == Numeric->lnz) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract each block of U */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Up != NULL && Ui != NULL && Ux != NULL |
|||
#ifdef COMPLEX |
|||
&& Uz != NULL |
|||
#endif |
|||
) |
|||
{ |
|||
nz = 0 ; |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = Symbolic->R [block] ; |
|||
k2 = Symbolic->R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
Ukk = ((Entry *) Numeric->Udiag) + k1 ; |
|||
if (nk == 1) |
|||
{ |
|||
/* singleton block */ |
|||
Up [k1] = nz ; |
|||
Ui [nz] = k1 ; |
|||
Ux [nz] = REAL (Ukk [0]) ; |
|||
#ifdef COMPLEX |
|||
Uz [nz] = IMAG (Ukk [0]) ; |
|||
#endif |
|||
nz++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* non-singleton block */ |
|||
LU = Numeric->LUbx [block] ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
for (kk = 0 ; kk < nk ; kk++) |
|||
{ |
|||
Up [k1+kk] = nz ; |
|||
GET_POINTER (LU, Uip, Ulen, Ui2, Ux2, kk, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
Ui [nz] = k1 + Ui2 [p] ; |
|||
Ux [nz] = REAL (Ux2 [p]) ; |
|||
#ifdef COMPLEX |
|||
Uz [nz] = IMAG (Ux2 [p]) ; |
|||
#endif |
|||
nz++ ; |
|||
} |
|||
/* add the diagonal entry */ |
|||
Ui [nz] = k1 + kk ; |
|||
Ux [nz] = REAL (Ukk [kk]) ; |
|||
#ifdef COMPLEX |
|||
Uz [nz] = IMAG (Ukk [kk]) ; |
|||
#endif |
|||
nz++ ; |
|||
} |
|||
} |
|||
} |
|||
Up [n] = nz ; |
|||
ASSERT (nz == Numeric->unz) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* extract the off-diagonal blocks, F */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Fp != NULL && Fi != NULL && Fx != NULL |
|||
#ifdef COMPLEX |
|||
&& Fz != NULL |
|||
#endif |
|||
) |
|||
{ |
|||
for (k = 0 ; k <= n ; k++) |
|||
{ |
|||
Fp [k] = Numeric->Offp [k] ; |
|||
} |
|||
nz = Fp [n] ; |
|||
for (k = 0 ; k < nz ; k++) |
|||
{ |
|||
Fi [k] = Numeric->Offi [k] ; |
|||
} |
|||
for (k = 0 ; k < nz ; k++) |
|||
{ |
|||
Fx [k] = REAL (((Entry *) Numeric->Offx) [k]) ; |
|||
#ifdef COMPLEX |
|||
Fz [k] = IMAG (((Entry *) Numeric->Offx) [k]) ; |
|||
#endif |
|||
} |
|||
} |
|||
|
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,545 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_factor =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Factor the matrix, after ordering and analyzing it with KLU_analyze |
|||
* or KLU_analyze_given. |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_factor2 ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
static void factor2 |
|||
( |
|||
/* inputs, not modified */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
Entry Ax [ ], |
|||
KLU_symbolic *Symbolic, |
|||
|
|||
/* inputs, modified on output: */ |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double lsize ; |
|||
double *Lnz, *Rs ; |
|||
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Pblock, *Pinv, *Iwork, |
|||
*Lip, *Uip, *Llen, *Ulen ; |
|||
Entry *Offx, *X, s, *Udiag ; |
|||
Unit **LUbx ; |
|||
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, lnz, unz, p, newrow, |
|||
nblocks, poff, nzoff, lnz_block, unz_block, scale, max_lnz_block, |
|||
max_unz_block ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* initializations */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* get the contents of the Symbolic object */ |
|||
n = Symbolic->n ; |
|||
P = Symbolic->P ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
Lnz = Symbolic->Lnz ; |
|||
nblocks = Symbolic->nblocks ; |
|||
nzoff = Symbolic->nzoff ; |
|||
|
|||
Pnum = Numeric->Pnum ; |
|||
Offp = Numeric->Offp ; |
|||
Offi = Numeric->Offi ; |
|||
Offx = (Entry *) Numeric->Offx ; |
|||
|
|||
Lip = Numeric->Lip ; |
|||
Uip = Numeric->Uip ; |
|||
Llen = Numeric->Llen ; |
|||
Ulen = Numeric->Ulen ; |
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
Udiag = Numeric->Udiag ; |
|||
|
|||
Rs = Numeric->Rs ; |
|||
Pinv = Numeric->Pinv ; |
|||
X = (Entry *) Numeric->Xwork ; /* X is of size n */ |
|||
Iwork = Numeric->Iwork ; /* 5*maxblock for KLU_factor */ |
|||
/* 1*maxblock for Pblock */ |
|||
Pblock = Iwork + 5*((size_t) Symbolic->maxblock) ; |
|||
Common->nrealloc = 0 ; |
|||
scale = Common->scale ; |
|||
max_lnz_block = 1 ; |
|||
max_unz_block = 1 ; |
|||
|
|||
/* compute the inverse of P from symbolic analysis. Will be updated to |
|||
* become the inverse of the numerical factorization when the factorization |
|||
* is done, for use in KLU_refactor */ |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Pinv [k] = EMPTY ; |
|||
} |
|||
#endif |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
ASSERT (P [k] >= 0 && P [k] < n) ; |
|||
Pinv [P [k]] = k ; |
|||
} |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; |
|||
#endif |
|||
|
|||
lnz = 0 ; |
|||
unz = 0 ; |
|||
Common->noffdiag = 0 ; |
|||
Offp [0] = 0 ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* optionally check input matrix and compute scale factors */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (scale >= 0) |
|||
{ |
|||
/* use Pnum as workspace. NOTE: scale factors are not yet permuted |
|||
* according to the final pivot row ordering, so Rs [oldrow] is the |
|||
* scale factor for A (oldrow,:), for the user's matrix A. Pnum is |
|||
* used as workspace in KLU_scale. When the factorization is done, |
|||
* the scale factors are permuted according to the final pivot row |
|||
* permutation, so that Rs [k] is the scale factor for the kth row of |
|||
* A(p,q) where p and q are the final row and column permutations. */ |
|||
KLU_scale (scale, n, Ap, Ai, (double *) Ax, Rs, Pnum, Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* matrix is invalid */ |
|||
return ; |
|||
} |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
if (scale > 0) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) PRINTF (("Rs [%d] %g\n", k, Rs [k])) ; |
|||
} |
|||
#endif |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* factor each block using klu */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* the block is from rows/columns k1 to k2-1 */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("FACTOR BLOCK %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; |
|||
|
|||
if (nk == 1) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* singleton case */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
poff = Offp [k1] ; |
|||
oldcol = Q [k1] ; |
|||
pend = Ap [oldcol+1] ; |
|||
CLEAR (s) ; |
|||
|
|||
if (scale <= 0) |
|||
{ |
|||
/* no scaling */ |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
oldrow = Ai [p] ; |
|||
newrow = Pinv [oldrow] ; |
|||
if (newrow < k1) |
|||
{ |
|||
Offi [poff] = oldrow ; |
|||
Offx [poff] = Ax [p] ; |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
ASSERT (newrow == k1) ; |
|||
PRINTF (("singleton block %d", block)) ; |
|||
PRINT_ENTRY (Ax [p]) ; |
|||
s = Ax [p] ; |
|||
} |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
/* row scaling. NOTE: scale factors are not yet permuted |
|||
* according to the pivot row permutation, so Rs [oldrow] is |
|||
* used below. When the factorization is done, the scale |
|||
* factors are permuted, so that Rs [newrow] will be used in |
|||
* klu_solve, klu_tsolve, and klu_rgrowth */ |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
oldrow = Ai [p] ; |
|||
newrow = Pinv [oldrow] ; |
|||
if (newrow < k1) |
|||
{ |
|||
Offi [poff] = oldrow ; |
|||
/* Offx [poff] = Ax [p] / Rs [oldrow] ; */ |
|||
SCALE_DIV_ASSIGN (Offx [poff], Ax [p], Rs [oldrow]) ; |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
ASSERT (newrow == k1) ; |
|||
PRINTF (("singleton block %d ", block)) ; |
|||
PRINT_ENTRY (Ax[p]) ; |
|||
SCALE_DIV_ASSIGN (s, Ax [p], Rs [oldrow]) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
Udiag [k1] = s ; |
|||
|
|||
if (IS_ZERO (s)) |
|||
{ |
|||
/* singular singleton */ |
|||
Common->status = KLU_SINGULAR ; |
|||
Common->numerical_rank = k1 ; |
|||
Common->singular_col = oldcol ; |
|||
if (Common->halt_if_singular) |
|||
{ |
|||
return ; |
|||
} |
|||
} |
|||
|
|||
Offp [k1+1] = poff ; |
|||
Pnum [k1] = P [k1] ; |
|||
lnz++ ; |
|||
unz++ ; |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* construct and factorize the kth block */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
if (Lnz [block] < 0) |
|||
{ |
|||
/* COLAMD was used - no estimate of fill-in */ |
|||
/* use 10 times the nnz in A, plus n */ |
|||
lsize = -(Common->initmem) ; |
|||
} |
|||
else |
|||
{ |
|||
lsize = Common->initmem_amd * Lnz [block] + nk ; |
|||
} |
|||
|
|||
/* allocates 1 arrays: LUbx [block] */ |
|||
Numeric->LUsize [block] = KLU_kernel_factor (nk, Ap, Ai, Ax, Q, |
|||
lsize, &LUbx [block], Udiag + k1, Llen + k1, Ulen + k1, |
|||
Lip + k1, Uip + k1, Pblock, &lnz_block, &unz_block, |
|||
X, Iwork, k1, Pinv, Rs, Offp, Offi, Offx, Common) ; |
|||
|
|||
if (Common->status < KLU_OK || |
|||
(Common->status == KLU_SINGULAR && Common->halt_if_singular)) |
|||
{ |
|||
/* out of memory, invalid inputs, or singular */ |
|||
return ; |
|||
} |
|||
|
|||
PRINTF (("\n----------------------- L %d:\n", block)) ; |
|||
ASSERT (KLU_valid_LU (nk, TRUE, Lip+k1, Llen+k1, LUbx [block])) ; |
|||
PRINTF (("\n----------------------- U %d:\n", block)) ; |
|||
ASSERT (KLU_valid_LU (nk, FALSE, Uip+k1, Ulen+k1, LUbx [block])) ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* get statistics */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
lnz += lnz_block ; |
|||
unz += unz_block ; |
|||
max_lnz_block = MAX (max_lnz_block, lnz_block) ; |
|||
max_unz_block = MAX (max_unz_block, unz_block) ; |
|||
|
|||
if (Lnz [block] == EMPTY) |
|||
{ |
|||
/* revise estimate for subsequent factorization */ |
|||
Lnz [block] = MAX (lnz_block, unz_block) ; |
|||
} |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* combine the klu row ordering with the symbolic pre-ordering */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
PRINTF (("Pnum, 1-based:\n")) ; |
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
ASSERT (k + k1 < n) ; |
|||
ASSERT (Pblock [k] + k1 < n) ; |
|||
Pnum [k + k1] = P [Pblock [k] + k1] ; |
|||
PRINTF (("Pnum (%d + %d + 1 = %d) = %d + 1 = %d\n", |
|||
k, k1, k+k1+1, Pnum [k+k1], Pnum [k+k1]+1)) ; |
|||
} |
|||
|
|||
/* the local pivot row permutation Pblock is no longer needed */ |
|||
} |
|||
} |
|||
ASSERT (nzoff == Offp [n]) ; |
|||
PRINTF (("\n------------------- Off diagonal entries:\n")) ; |
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
|
|||
Numeric->lnz = lnz ; |
|||
Numeric->unz = unz ; |
|||
Numeric->max_lnz_block = max_lnz_block ; |
|||
Numeric->max_unz_block = max_unz_block ; |
|||
|
|||
/* compute the inverse of Pnum */ |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Pinv [k] = EMPTY ; |
|||
} |
|||
#endif |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
ASSERT (Pnum [k] >= 0 && Pnum [k] < n) ; |
|||
Pinv [Pnum [k]] = k ; |
|||
} |
|||
#ifndef NDEBUG |
|||
for (k = 0 ; k < n ; k++) ASSERT (Pinv [k] != EMPTY) ; |
|||
#endif |
|||
|
|||
/* permute scale factors Rs according to pivotal row order */ |
|||
if (scale > 0) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
REAL (X [k]) = Rs [Pnum [k]] ; |
|||
} |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Rs [k] = REAL (X [k]) ; |
|||
} |
|||
} |
|||
|
|||
PRINTF (("\n------------------- Off diagonal entries, old:\n")) ; |
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
|
|||
/* apply the pivot row permutations to the off-diagonal entries */ |
|||
for (p = 0 ; p < nzoff ; p++) |
|||
{ |
|||
ASSERT (Offi [p] >= 0 && Offi [p] < n) ; |
|||
Offi [p] = Pinv [Offi [p]] ; |
|||
} |
|||
|
|||
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ; |
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
|
|||
#ifndef NDEBUG |
|||
{ |
|||
PRINTF (("\n ############# KLU_BTF_FACTOR done, nblocks %d\n",nblocks)); |
|||
Entry ss, *Udiag = Numeric->Udiag ; |
|||
for (block = 0 ; block < nblocks && Common->status == KLU_OK ; block++) |
|||
{ |
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("\n======================KLU_factor output: k1 %d k2 %d nk %d\n",k1,k2,nk)) ; |
|||
if (nk == 1) |
|||
{ |
|||
PRINTF (("singleton ")) ; |
|||
/* ENTRY_PRINT (singleton [block]) ; */ |
|||
ss = Udiag [k1] ; |
|||
PRINT_ENTRY (ss) ; |
|||
} |
|||
else |
|||
{ |
|||
Int *Lip, *Uip, *Llen, *Ulen ; |
|||
Unit *LU ; |
|||
Lip = Numeric->Lip + k1 ; |
|||
Llen = Numeric->Llen + k1 ; |
|||
LU = (Unit *) Numeric->LUbx [block] ; |
|||
PRINTF (("\n---- L block %d\n", block)); |
|||
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
PRINTF (("\n---- U block %d\n", block)) ; |
|||
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ; |
|||
} |
|||
} |
|||
} |
|||
#endif |
|||
} |
|||
|
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_factor =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
KLU_numeric *KLU_factor /* returns NULL if error, or a valid |
|||
KLU_numeric object if successful */ |
|||
( |
|||
/* --- inputs --- */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], |
|||
KLU_symbolic *Symbolic, |
|||
/* -------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Int n, nzoff, nblocks, maxblock, k, ok = TRUE ; |
|||
Int *R ; |
|||
KLU_numeric *Numeric ; |
|||
size_t n1, nzoff1, s, b6, n3 ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (NULL) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
Common->numerical_rank = EMPTY ; |
|||
Common->singular_col = EMPTY ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* check for a valid Symbolic object */ |
|||
if (Symbolic == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
n = Symbolic->n ; |
|||
nzoff = Symbolic->nzoff ; |
|||
nblocks = Symbolic->nblocks ; |
|||
maxblock = Symbolic->maxblock ; |
|||
R = Symbolic->R ; |
|||
PRINTF (("KLU_factor: n %d nzoff %d nblocks %d maxblock %d\n", |
|||
n, nzoff, nblocks, maxblock)) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get control parameters and make sure they are in the proper range */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Common->initmem_amd = MAX (1.0, Common->initmem_amd) ; |
|||
Common->initmem = MAX (1.0, Common->initmem) ; |
|||
Common->tol = MIN (Common->tol, 1.0) ; |
|||
Common->tol = MAX (0.0, Common->tol) ; |
|||
Common->memgrow = MAX (1.0, Common->memgrow) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* allocate the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* this will not cause size_t overflow (already checked by KLU_symbolic) */ |
|||
n1 = ((size_t) n) + 1 ; |
|||
nzoff1 = ((size_t) nzoff) + 1 ; |
|||
|
|||
Numeric = KLU_malloc (sizeof (KLU_numeric), 1, Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory */ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (NULL) ; |
|||
} |
|||
Numeric->n = n ; |
|||
Numeric->nblocks = nblocks ; |
|||
Numeric->nzoff = nzoff ; |
|||
Numeric->Pnum = KLU_malloc (n, sizeof (Int), Common) ; |
|||
Numeric->Offp = KLU_malloc (n1, sizeof (Int), Common) ; |
|||
Numeric->Offi = KLU_malloc (nzoff1, sizeof (Int), Common) ; |
|||
Numeric->Offx = KLU_malloc (nzoff1, sizeof (Entry), Common) ; |
|||
|
|||
Numeric->Lip = KLU_malloc (n, sizeof (Int), Common) ; |
|||
Numeric->Uip = KLU_malloc (n, sizeof (Int), Common) ; |
|||
Numeric->Llen = KLU_malloc (n, sizeof (Int), Common) ; |
|||
Numeric->Ulen = KLU_malloc (n, sizeof (Int), Common) ; |
|||
|
|||
Numeric->LUsize = KLU_malloc (nblocks, sizeof (size_t), Common) ; |
|||
|
|||
Numeric->LUbx = KLU_malloc (nblocks, sizeof (Unit *), Common) ; |
|||
if (Numeric->LUbx != NULL) |
|||
{ |
|||
for (k = 0 ; k < nblocks ; k++) |
|||
{ |
|||
Numeric->LUbx [k] = NULL ; |
|||
} |
|||
} |
|||
|
|||
Numeric->Udiag = KLU_malloc (n, sizeof (Entry), Common) ; |
|||
|
|||
if (Common->scale > 0) |
|||
{ |
|||
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ; |
|||
} |
|||
else |
|||
{ |
|||
/* no scaling */ |
|||
Numeric->Rs = NULL ; |
|||
} |
|||
|
|||
Numeric->Pinv = KLU_malloc (n, sizeof (Int), Common) ; |
|||
|
|||
/* allocate permanent workspace for factorization and solve. Note that the |
|||
* solver will use an Xwork of size 4n, whereas the factorization codes use |
|||
* an Xwork of size n and integer space (Iwork) of size 6n. KLU_condest |
|||
* uses an Xwork of size 2n. Total size is: |
|||
* |
|||
* n*sizeof(Entry) + max (6*maxblock*sizeof(Int), 3*n*sizeof(Entry)) |
|||
*/ |
|||
s = KLU_mult_size_t (n, sizeof (Entry), &ok) ; |
|||
n3 = KLU_mult_size_t (n, 3 * sizeof (Entry), &ok) ; |
|||
b6 = KLU_mult_size_t (maxblock, 6 * sizeof (Int), &ok) ; |
|||
Numeric->worksize = KLU_add_size_t (s, MAX (n3, b6), &ok) ; |
|||
Numeric->Work = KLU_malloc (Numeric->worksize, 1, Common) ; |
|||
Numeric->Xwork = Numeric->Work ; |
|||
Numeric->Iwork = (Int *) ((Entry *) Numeric->Xwork + n) ; |
|||
if (!ok || Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory or problem too large */ |
|||
Common->status = ok ? KLU_OUT_OF_MEMORY : KLU_TOO_LARGE ; |
|||
KLU_free_numeric (&Numeric, Common) ; |
|||
return (NULL) ; |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* factorize the blocks */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
factor2 (Ap, Ai, (Entry *) Ax, Symbolic, Numeric, Common) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* return or free the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
/* out of memory or inputs invalid */ |
|||
KLU_free_numeric (&Numeric, Common) ; |
|||
} |
|||
else if (Common->status == KLU_SINGULAR) |
|||
{ |
|||
if (Common->halt_if_singular) |
|||
{ |
|||
/* Matrix is singular, and the Numeric object is only partially |
|||
* defined because we halted early. This is the default case for |
|||
* a singular matrix. */ |
|||
KLU_free_numeric (&Numeric, Common) ; |
|||
} |
|||
} |
|||
else if (Common->status == KLU_OK) |
|||
{ |
|||
/* successful non-singular factorization */ |
|||
Common->numerical_rank = n ; |
|||
Common->singular_col = n ; |
|||
} |
|||
return (Numeric) ; |
|||
} |
|||
@ -0,0 +1,71 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_free_numeric ===================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Free the KLU Numeric object. */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_free_numeric |
|||
( |
|||
KLU_numeric **NumericHandle, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
KLU_numeric *Numeric ; |
|||
Unit **LUbx ; |
|||
size_t *LUsize ; |
|||
Int block, n, nzoff, nblocks ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (NumericHandle == NULL || *NumericHandle == NULL) |
|||
{ |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
Numeric = *NumericHandle ; |
|||
|
|||
n = Numeric->n ; |
|||
nzoff = Numeric->nzoff ; |
|||
nblocks = Numeric->nblocks ; |
|||
LUsize = Numeric->LUsize ; |
|||
|
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
if (LUbx != NULL) |
|||
{ |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
KLU_free (LUbx [block], LUsize ? LUsize [block] : 0, |
|||
sizeof (Unit), Common) ; |
|||
} |
|||
} |
|||
|
|||
KLU_free (Numeric->Pnum, n, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Offp, n+1, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Offi, nzoff+1, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Offx, nzoff+1, sizeof (Entry), Common) ; |
|||
|
|||
KLU_free (Numeric->Lip, n, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Llen, n, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Uip, n, sizeof (Int), Common) ; |
|||
KLU_free (Numeric->Ulen, n, sizeof (Int), Common) ; |
|||
|
|||
KLU_free (Numeric->LUsize, nblocks, sizeof (size_t), Common) ; |
|||
|
|||
KLU_free (Numeric->LUbx, nblocks, sizeof (Unit *), Common) ; |
|||
|
|||
KLU_free (Numeric->Udiag, n, sizeof (Entry), Common) ; |
|||
|
|||
KLU_free (Numeric->Rs, n, sizeof (double), Common) ; |
|||
KLU_free (Numeric->Pinv, n, sizeof (Int), Common) ; |
|||
|
|||
KLU_free (Numeric->Work, Numeric->worksize, 1, Common) ; |
|||
|
|||
KLU_free (Numeric, 1, sizeof (KLU_numeric), Common) ; |
|||
|
|||
*NumericHandle = NULL ; |
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,34 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_free_symbolic ==================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Free the KLU Symbolic object. */ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_free_symbolic |
|||
( |
|||
KLU_symbolic **SymbolicHandle, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
KLU_symbolic *Symbolic ; |
|||
Int n ; |
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (SymbolicHandle == NULL || *SymbolicHandle == NULL) |
|||
{ |
|||
return (TRUE) ; |
|||
} |
|||
Symbolic = *SymbolicHandle ; |
|||
n = Symbolic->n ; |
|||
KLU_free (Symbolic->P, n, sizeof (Int), Common) ; |
|||
KLU_free (Symbolic->Q, n, sizeof (Int), Common) ; |
|||
KLU_free (Symbolic->R, n+1, sizeof (Int), Common) ; |
|||
KLU_free (Symbolic->Lnz, n, sizeof (double), Common) ; |
|||
KLU_free (Symbolic, 1, sizeof (KLU_symbolic), Common) ; |
|||
*SymbolicHandle = NULL ; |
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,243 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU/Include/klu_internal.h =========================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* For internal use in KLU routines only, not for user programs */ |
|||
|
|||
#ifndef _KLU_INTERNAL_H |
|||
#define _KLU_INTERNAL_H |
|||
|
|||
#include "klu.h" |
|||
#include "btf.h" |
|||
#include "klu_version.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* make sure debugging and printing is turned off */ |
|||
|
|||
#ifndef NDEBUG |
|||
#define NDEBUG |
|||
#endif |
|||
#ifndef NPRINT |
|||
#define NPRINT |
|||
#endif |
|||
|
|||
/* To enable debugging and assertions, uncomment this line: |
|||
#undef NDEBUG |
|||
*/ |
|||
|
|||
/* To enable diagnostic printing, uncomment this line: |
|||
#undef NPRINT |
|||
*/ |
|||
|
|||
/* ========================================================================== */ |
|||
|
|||
#include <stdio.h> |
|||
#include <assert.h> |
|||
#include <limits.h> |
|||
#include <stdlib.h> |
|||
#include <math.h> |
|||
|
|||
#undef ASSERT |
|||
#ifndef NDEBUG |
|||
#define ASSERT(a) assert(a) |
|||
#else |
|||
#define ASSERT(a) |
|||
#endif |
|||
|
|||
#define SCALAR_IS_NAN(x) ((x) != (x)) |
|||
|
|||
/* true if an integer (stored in double x) would overflow (or if x is NaN) */ |
|||
#define INT_OVERFLOW(x) ((!((x) * (1.0+1e-8) <= (double) INT_MAX)) \ |
|||
|| SCALAR_IS_NAN (x)) |
|||
|
|||
#undef TRUE |
|||
#undef FALSE |
|||
#undef MAX |
|||
#undef MIN |
|||
#undef PRINTF |
|||
#undef FLIP |
|||
|
|||
#ifndef NPRINT |
|||
#define PRINTF(s) { printf s ; } ; |
|||
#else |
|||
#define PRINTF(s) |
|||
#endif |
|||
|
|||
#define TRUE 1 |
|||
#define FALSE 0 |
|||
#define MAX(a,b) (((a) > (b)) ? (a) : (b)) |
|||
#define MIN(a,b) (((a) < (b)) ? (a) : (b)) |
|||
|
|||
/* FLIP is a "negation about -1", and is used to mark an integer i that is |
|||
* normally non-negative. FLIP (EMPTY) is EMPTY. FLIP of a number > EMPTY |
|||
* is negative, and FLIP of a number < EMTPY is positive. FLIP (FLIP (i)) = i |
|||
* for all integers i. UNFLIP (i) is >= EMPTY. */ |
|||
#define EMPTY (-1) |
|||
#define FLIP(i) (-(i)-2) |
|||
#define UNFLIP(i) (((i) < EMPTY) ? FLIP (i) : (i)) |
|||
|
|||
|
|||
size_t KLU_kernel /* final size of LU on output */ |
|||
( |
|||
/* input, not modified */ |
|||
Int n, /* A is n-by-n */ |
|||
Int Ap [ ], /* size n+1, column pointers for A */ |
|||
Int Ai [ ], /* size nz = Ap [n], row indices for A */ |
|||
Entry Ax [ ], /* size nz, values of A */ |
|||
Int Q [ ], /* size n, optional input permutation */ |
|||
size_t lusize, /* initial size of LU */ |
|||
|
|||
/* output, not defined on input */ |
|||
Int Pinv [ ], /* size n */ |
|||
Int P [ ], /* size n */ |
|||
Unit **p_LU, /* size lusize on input, size Uxp[n] on output*/ |
|||
Entry Udiag [ ], /* size n, diagonal of U */ |
|||
Int Llen [ ], /* size n, column length of L */ |
|||
Int Ulen [ ], /* size n, column length of U */ |
|||
Int Lip [ ], /* size n+1 */ |
|||
Int Uip [ ], /* size n+1 */ |
|||
Int *lnz, /* size of L */ |
|||
Int *unz, /* size of U */ |
|||
|
|||
/* workspace, not defined on input */ |
|||
Entry X [ ], /* size n, zero on output */ |
|||
|
|||
/* workspace, not defined on input or output */ |
|||
Int Stack [ ], /* size n */ |
|||
Int Flag [ ], /* size n */ |
|||
Int adj_pos [ ], /* size n */ |
|||
|
|||
/* workspace for pruning only */ |
|||
Int Lpend [ ], /* size n workspace */ |
|||
|
|||
/* inputs, not modified on output */ |
|||
Int k1, /* the block of A is from k1 to k2-1 */ |
|||
Int PSinv [ ], /* inverse of P from symbolic factorization */ |
|||
double Rs [ ], /* scale factors for A */ |
|||
|
|||
/* inputs, modified on output */ |
|||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ |
|||
Int Offi [ ], |
|||
Entry Offx [ ], |
|||
KLU_common *Common /* the control input/output structure */ |
|||
) ; |
|||
|
|||
|
|||
size_t KLU_kernel_factor /* 0 if failure, size of LU if OK */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int n, /* A is n-by-n. n must be > 0. */ |
|||
Int Ap [ ], /* size n+1, column pointers for A */ |
|||
Int Ai [ ], /* size nz = Ap [n], row indices for A */ |
|||
Entry Ax [ ], /* size nz, values of A */ |
|||
Int Q [ ], /* size n, optional column permutation */ |
|||
double Lsize, /* initial size of L and U */ |
|||
|
|||
/* outputs, not defined on input */ |
|||
Unit **p_LU, /* row indices and values of L and U */ |
|||
Entry Udiag [ ], /* size n, diagonal of U */ |
|||
Int Llen [ ], /* size n, column length of L */ |
|||
Int Ulen [ ], /* size n, column length of U */ |
|||
Int Lip [ ], /* size n+1, column pointers of L */ |
|||
Int Uip [ ], /* size n+1, column pointers of U */ |
|||
Int P [ ], /* row permutation, size n */ |
|||
Int *lnz, /* size of L */ |
|||
Int *unz, /* size of U */ |
|||
|
|||
/* workspace, undefined on input */ |
|||
Entry *X, /* size n entries. Zero on output */ |
|||
Int *Work, /* size 5n Int's */ |
|||
|
|||
/* inputs, not modified on output */ |
|||
Int k1, /* the block of A is from k1 to k2-1 */ |
|||
Int PSinv [ ], /* inverse of P from symbolic factorization */ |
|||
double Rs [ ], /* scale factors for A */ |
|||
|
|||
/* inputs, modified on output */ |
|||
Int Offp [ ], /* off-diagonal matrix (modified by this routine) */ |
|||
Int Offi [ ], |
|||
Entry Offx [ ], |
|||
KLU_common *Common /* the control input/output structure */ |
|||
) ; |
|||
|
|||
void KLU_lsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Lp [ ], |
|||
Int Li [ ], |
|||
Unit LU [ ], |
|||
Int nrhs, |
|||
/* right-hand-side on input, solution to Lx=b on output */ |
|||
Entry X [ ] |
|||
) ; |
|||
|
|||
void KLU_ltsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Lp [ ], |
|||
Int Li [ ], |
|||
Unit LU [ ], |
|||
Int nrhs, |
|||
#ifdef COMPLEX |
|||
Int conj_solve, |
|||
#endif |
|||
/* right-hand-side on input, solution to L'x=b on output */ |
|||
Entry X [ ] |
|||
) ; |
|||
|
|||
|
|||
void KLU_usolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Up [ ], |
|||
Int Ui [ ], |
|||
Unit LU [ ], |
|||
Entry Udiag [ ], |
|||
Int nrhs, |
|||
/* right-hand-side on input, solution to Ux=b on output */ |
|||
Entry X [ ] |
|||
) ; |
|||
|
|||
void KLU_utsolve |
|||
( |
|||
/* inputs, not modified: */ |
|||
Int n, |
|||
Int Up [ ], |
|||
Int Ui [ ], |
|||
Unit LU [ ], |
|||
Entry Udiag [ ], |
|||
Int nrhs, |
|||
#ifdef COMPLEX |
|||
Int conj_solve, |
|||
#endif |
|||
/* right-hand-side on input, solution to U'x=b on output */ |
|||
Entry X [ ] |
|||
) ; |
|||
|
|||
Int KLU_valid |
|||
( |
|||
Int n, |
|||
Int Ap [ ], |
|||
Int Ai [ ], |
|||
Entry Ax [ ] |
|||
) ; |
|||
|
|||
Int KLU_valid_LU |
|||
( |
|||
Int n, |
|||
Int flag_test_start_ptr, |
|||
Int Xip [ ], |
|||
Int Xlen [ ], |
|||
Unit LU [ ] |
|||
); |
|||
|
|||
size_t KLU_add_size_t (size_t a, size_t b, Int *ok) ; |
|||
|
|||
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) ; |
|||
|
|||
KLU_symbolic *KLU_alloc_symbolic (Int n, Int *Ap, Int *Ai, KLU_common *Common) ; |
|||
|
|||
#endif |
|||
1009
src/maths/KLU/klu_kernel.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -0,0 +1,225 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_memory =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* KLU memory management routines: |
|||
* |
|||
* KLU_malloc malloc wrapper |
|||
* KLU_free free wrapper |
|||
* KLU_realloc realloc wrapper |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_add_size_t ======================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Safely compute a+b, and check for size_t overflow */ |
|||
|
|||
size_t KLU_add_size_t (size_t a, size_t b, Int *ok) |
|||
{ |
|||
(*ok) = (*ok) && ((a + b) >= MAX (a,b)) ; |
|||
return ((*ok) ? (a + b) : ((size_t) -1)) ; |
|||
} |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_mult_size_t ====================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Safely compute a*k, where k should be small, and check for size_t overflow */ |
|||
|
|||
size_t KLU_mult_size_t (size_t a, size_t k, Int *ok) |
|||
{ |
|||
size_t i, s = 0 ; |
|||
for (i = 0 ; i < k ; i++) |
|||
{ |
|||
s = KLU_add_size_t (s, a, ok) ; |
|||
} |
|||
return ((*ok) ? s : ((size_t) -1)) ; |
|||
} |
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_malloc =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Wrapper around malloc routine (mxMalloc for a mexFunction). Allocates |
|||
* space of size MAX(1,n)*size, where size is normally a sizeof (...). |
|||
* |
|||
* This routine and KLU_realloc do not set Common->status to KLU_OK on success, |
|||
* so that a sequence of KLU_malloc's or KLU_realloc's can be used. If any of |
|||
* them fails, the Common->status will hold the most recent error status. |
|||
* |
|||
* Usage, for a pointer to Int: |
|||
* |
|||
* p = KLU_malloc (n, sizeof (Int), Common) |
|||
* |
|||
* Uses a pointer to the malloc routine (or its equivalent) defined in Common. |
|||
*/ |
|||
|
|||
void *KLU_malloc /* returns pointer to the newly malloc'd block */ |
|||
( |
|||
/* ---- input ---- */ |
|||
size_t n, /* number of items */ |
|||
size_t size, /* size of each item */ |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
void *p ; |
|||
size_t s ; |
|||
Int ok = TRUE ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
p = NULL ; |
|||
} |
|||
else if (size == 0) |
|||
{ |
|||
/* size must be > 0 */ |
|||
Common->status = KLU_INVALID ; |
|||
p = NULL ; |
|||
} |
|||
else if (n >= INT_MAX) |
|||
{ |
|||
/* object is too big to allocate; p[i] where i is an Int will not |
|||
* be enough. */ |
|||
Common->status = KLU_TOO_LARGE ; |
|||
p = NULL ; |
|||
} |
|||
else |
|||
{ |
|||
/* call malloc, or its equivalent */ |
|||
s = KLU_mult_size_t (MAX (1,n), size, &ok) ; |
|||
p = ok ? ((Common->malloc_memory) (s)) : NULL ; |
|||
if (p == NULL) |
|||
{ |
|||
/* failure: out of memory */ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
} |
|||
else |
|||
{ |
|||
Common->memusage += s ; |
|||
Common->mempeak = MAX (Common->mempeak, Common->memusage) ; |
|||
} |
|||
} |
|||
return (p) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_free ============================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Wrapper around free routine (mxFree for a mexFunction). Returns NULL, |
|||
* which can be assigned to the pointer being freed, as in: |
|||
* |
|||
* p = KLU_free (p, n, sizeof (int), Common) ; |
|||
*/ |
|||
|
|||
void *KLU_free /* always returns NULL */ |
|||
( |
|||
/* ---- in/out --- */ |
|||
void *p, /* block of memory to free */ |
|||
/* ---- input --- */ |
|||
size_t n, /* size of block to free, in # of items */ |
|||
size_t size, /* size of each item */ |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
size_t s ; |
|||
Int ok = TRUE ; |
|||
if (p != NULL && Common != NULL) |
|||
{ |
|||
/* only free the object if the pointer is not NULL */ |
|||
/* call free, or its equivalent */ |
|||
(Common->free_memory) (p) ; |
|||
s = KLU_mult_size_t (MAX (1,n), size, &ok) ; |
|||
Common->memusage -= s ; |
|||
} |
|||
/* return NULL, and the caller should assign this to p. This avoids |
|||
* freeing the same pointer twice. */ |
|||
return (NULL) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_realloc ========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Wrapper around realloc routine (mxRealloc for a mexFunction). Given a |
|||
* pointer p to a block allocated by KLU_malloc, it changes the size of the |
|||
* block pointed to by p to be MAX(1,nnew)*size in size. It may return a |
|||
* pointer different than p. This should be used as (for a pointer to Int): |
|||
* |
|||
* p = KLU_realloc (nnew, nold, sizeof (Int), p, Common) ; |
|||
* |
|||
* If p is NULL, this is the same as p = KLU_malloc (...). |
|||
* A size of nnew=0 is treated as nnew=1. |
|||
* |
|||
* If the realloc fails, p is returned unchanged and Common->status is set |
|||
* to KLU_OUT_OF_MEMORY. If successful, Common->status is not modified, |
|||
* and p is returned (possibly changed) and pointing to a large block of memory. |
|||
* |
|||
* Uses a pointer to the realloc routine (or its equivalent) defined in Common. |
|||
*/ |
|||
|
|||
void *KLU_realloc /* returns pointer to reallocated block */ |
|||
( |
|||
/* ---- input ---- */ |
|||
size_t nnew, /* requested # of items in reallocated block */ |
|||
size_t nold, /* old # of items */ |
|||
size_t size, /* size of each item */ |
|||
/* ---- in/out --- */ |
|||
void *p, /* block of memory to realloc */ |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
void *pnew ; |
|||
size_t snew, sold ; |
|||
Int ok = TRUE ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
p = NULL ; |
|||
} |
|||
else if (size == 0) |
|||
{ |
|||
/* size must be > 0 */ |
|||
Common->status = KLU_INVALID ; |
|||
p = NULL ; |
|||
} |
|||
else if (p == NULL) |
|||
{ |
|||
/* A fresh object is being allocated. */ |
|||
p = KLU_malloc (nnew, size, Common) ; |
|||
} |
|||
else if (nnew >= INT_MAX) |
|||
{ |
|||
/* failure: nnew is too big. Do not change p */ |
|||
Common->status = KLU_TOO_LARGE ; |
|||
} |
|||
else |
|||
{ |
|||
/* The object exists, and is changing to some other nonzero size. */ |
|||
/* call realloc, or its equivalent */ |
|||
snew = KLU_mult_size_t (MAX (1,nnew), size, &ok) ; |
|||
sold = KLU_mult_size_t (MAX (1,nold), size, &ok) ; |
|||
pnew = ok ? ((Common->realloc_memory) (p, snew)) : NULL ; |
|||
if (pnew == NULL) |
|||
{ |
|||
/* Do not change p, since it still points to allocated memory */ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
} |
|||
else |
|||
{ |
|||
/* success: return the new p and change the size of the block */ |
|||
Common->memusage += (snew - sold) ; |
|||
Common->mempeak = MAX (Common->mempeak, Common->memusage) ; |
|||
p = pnew ; |
|||
} |
|||
} |
|||
return (p) ; |
|||
} |
|||
@ -0,0 +1,478 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_refactor ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Factor the matrix, after ordering and analyzing it with KLU_analyze, and |
|||
* factoring it once with KLU_factor. This routine cannot do any numerical |
|||
* pivoting. The pattern of the input matrix (Ap, Ai) must be identical to |
|||
* the pattern given to KLU_factor. |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_refactor ========================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
Int KLU_refactor /* returns TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], |
|||
KLU_symbolic *Symbolic, |
|||
|
|||
/* input/output */ |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Entry ukk, ujk, s ; |
|||
Entry *Offx, *Lx, *Ux, *X, *Az, *Udiag ; |
|||
double *Rs ; |
|||
Int *P, *Q, *R, *Pnum, *Offp, *Offi, *Ui, *Li, *Pinv, *Lip, *Uip, *Llen, |
|||
*Ulen ; |
|||
Unit **LUbx ; |
|||
Unit *LU ; |
|||
Int k1, k2, nk, k, block, oldcol, pend, oldrow, n, p, newrow, scale, |
|||
nblocks, poff, i, j, up, ulen, llen, maxblock, nzoff ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
if (Numeric == NULL) |
|||
{ |
|||
/* invalid Numeric object */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
|
|||
Common->numerical_rank = EMPTY ; |
|||
Common->singular_col = EMPTY ; |
|||
|
|||
Az = (Entry *) Ax ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
n = Symbolic->n ; |
|||
P = Symbolic->P ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
nblocks = Symbolic->nblocks ; |
|||
maxblock = Symbolic->maxblock ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Pnum = Numeric->Pnum ; |
|||
Offp = Numeric->Offp ; |
|||
Offi = Numeric->Offi ; |
|||
Offx = (Entry *) Numeric->Offx ; |
|||
|
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
|
|||
scale = Common->scale ; |
|||
if (scale > 0) |
|||
{ |
|||
/* factorization was not scaled, but refactorization is scaled */ |
|||
if (Numeric->Rs == NULL) |
|||
{ |
|||
Numeric->Rs = KLU_malloc (n, sizeof (double), Common) ; |
|||
if (Common->status < KLU_OK) |
|||
{ |
|||
Common->status = KLU_OUT_OF_MEMORY ; |
|||
return (FALSE) ; |
|||
} |
|||
} |
|||
} |
|||
else |
|||
{ |
|||
/* no scaling for refactorization; ensure Numeric->Rs is freed. This |
|||
* does nothing if Numeric->Rs is already NULL. */ |
|||
Numeric->Rs = KLU_free (Numeric->Rs, n, sizeof (double), Common) ; |
|||
} |
|||
Rs = Numeric->Rs ; |
|||
|
|||
Pinv = Numeric->Pinv ; |
|||
X = (Entry *) Numeric->Xwork ; |
|||
Common->nrealloc = 0 ; |
|||
Udiag = Numeric->Udiag ; |
|||
nzoff = Symbolic->nzoff ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check the input matrix compute the row scale factors, Rs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
/* do no scale, or check the input matrix, if scale < 0 */ |
|||
if (scale >= 0) |
|||
{ |
|||
/* check for out-of-range indices, but do not check for duplicates */ |
|||
if (!KLU_scale (scale, n, Ap, Ai, Ax, Rs, NULL, Common)) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* clear workspace X */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (k = 0 ; k < maxblock ; k++) |
|||
{ |
|||
/* X [k] = 0 */ |
|||
CLEAR (X [k]) ; |
|||
} |
|||
|
|||
poff = 0 ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* factor each block */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (scale <= 0) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* no scaling */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* the block is from rows/columns k1 to k2-1 */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
|
|||
if (nk == 1) |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------- */ |
|||
/* singleton case */ |
|||
/* ---------------------------------------------------------- */ |
|||
|
|||
oldcol = Q [k1] ; |
|||
pend = Ap [oldcol+1] ; |
|||
CLEAR (s) ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
newrow = Pinv [Ai [p]] - k1 ; |
|||
if (newrow < 0 && poff < nzoff) |
|||
{ |
|||
/* entry in off-diagonal block */ |
|||
Offx [poff] = Az [p] ; |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* singleton */ |
|||
s = Az [p] ; |
|||
} |
|||
} |
|||
Udiag [k1] = s ; |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------- */ |
|||
/* construct and factor the kth block */ |
|||
/* ---------------------------------------------------------- */ |
|||
|
|||
Lip = Numeric->Lip + k1 ; |
|||
Llen = Numeric->Llen + k1 ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
LU = LUbx [block] ; |
|||
|
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------ */ |
|||
/* scatter kth column of the block into workspace X */ |
|||
/* ------------------------------------------------------ */ |
|||
|
|||
oldcol = Q [k+k1] ; |
|||
pend = Ap [oldcol+1] ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
newrow = Pinv [Ai [p]] - k1 ; |
|||
if (newrow < 0 && poff < nzoff) |
|||
{ |
|||
/* entry in off-diagonal block */ |
|||
Offx [poff] = Az [p] ; |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* (newrow,k) is an entry in the block */ |
|||
X [newrow] = Az [p] ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------ */ |
|||
/* compute kth column of U, and update kth column of A */ |
|||
/* ------------------------------------------------------ */ |
|||
|
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; |
|||
for (up = 0 ; up < ulen ; up++) |
|||
{ |
|||
j = Ui [up] ; |
|||
ujk = X [j] ; |
|||
/* X [j] = 0 */ |
|||
CLEAR (X [j]) ; |
|||
Ux [up] = ujk ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; |
|||
for (p = 0 ; p < llen ; p++) |
|||
{ |
|||
/* X [Li [p]] -= Lx [p] * ujk */ |
|||
MULT_SUB (X [Li [p]], Lx [p], ujk) ; |
|||
} |
|||
} |
|||
/* get the diagonal entry of U */ |
|||
ukk = X [k] ; |
|||
/* X [k] = 0 */ |
|||
CLEAR (X [k]) ; |
|||
/* singular case */ |
|||
if (IS_ZERO (ukk)) |
|||
{ |
|||
/* matrix is numerically singular */ |
|||
Common->status = KLU_SINGULAR ; |
|||
if (Common->numerical_rank == EMPTY) |
|||
{ |
|||
Common->numerical_rank = k+k1 ; |
|||
Common->singular_col = Q [k+k1] ; |
|||
} |
|||
if (Common->halt_if_singular) |
|||
{ |
|||
/* do not continue the factorization */ |
|||
return (FALSE) ; |
|||
} |
|||
} |
|||
Udiag [k+k1] = ukk ; |
|||
/* gather and divide by pivot to get kth column of L */ |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; |
|||
for (p = 0 ; p < llen ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
DIV (Lx [p], X [i], ukk) ; |
|||
CLEAR (X [i]) ; |
|||
} |
|||
|
|||
} |
|||
} |
|||
} |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* scaling */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* the block is from rows/columns k1 to k2-1 */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
|
|||
if (nk == 1) |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------- */ |
|||
/* singleton case */ |
|||
/* ---------------------------------------------------------- */ |
|||
|
|||
oldcol = Q [k1] ; |
|||
pend = Ap [oldcol+1] ; |
|||
CLEAR (s) ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
oldrow = Ai [p] ; |
|||
newrow = Pinv [oldrow] - k1 ; |
|||
if (newrow < 0 && poff < nzoff) |
|||
{ |
|||
/* entry in off-diagonal block */ |
|||
/* Offx [poff] = Az [p] / Rs [oldrow] */ |
|||
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]) ; |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* singleton */ |
|||
/* s = Az [p] / Rs [oldrow] */ |
|||
SCALE_DIV_ASSIGN (s, Az [p], Rs [oldrow]) ; |
|||
} |
|||
} |
|||
Udiag [k1] = s ; |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
/* ---------------------------------------------------------- */ |
|||
/* construct and factor the kth block */ |
|||
/* ---------------------------------------------------------- */ |
|||
|
|||
Lip = Numeric->Lip + k1 ; |
|||
Llen = Numeric->Llen + k1 ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
LU = LUbx [block] ; |
|||
|
|||
for (k = 0 ; k < nk ; k++) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------ */ |
|||
/* scatter kth column of the block into workspace X */ |
|||
/* ------------------------------------------------------ */ |
|||
|
|||
oldcol = Q [k+k1] ; |
|||
pend = Ap [oldcol+1] ; |
|||
for (p = Ap [oldcol] ; p < pend ; p++) |
|||
{ |
|||
oldrow = Ai [p] ; |
|||
newrow = Pinv [oldrow] - k1 ; |
|||
if (newrow < 0 && poff < nzoff) |
|||
{ |
|||
/* entry in off-diagonal part */ |
|||
/* Offx [poff] = Az [p] / Rs [oldrow] */ |
|||
SCALE_DIV_ASSIGN (Offx [poff], Az [p], Rs [oldrow]); |
|||
poff++ ; |
|||
} |
|||
else |
|||
{ |
|||
/* (newrow,k) is an entry in the block */ |
|||
/* X [newrow] = Az [p] / Rs [oldrow] */ |
|||
SCALE_DIV_ASSIGN (X [newrow], Az [p], Rs [oldrow]) ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------ */ |
|||
/* compute kth column of U, and update kth column of A */ |
|||
/* ------------------------------------------------------ */ |
|||
|
|||
GET_POINTER (LU, Uip, Ulen, Ui, Ux, k, ulen) ; |
|||
for (up = 0 ; up < ulen ; up++) |
|||
{ |
|||
j = Ui [up] ; |
|||
ujk = X [j] ; |
|||
/* X [j] = 0 */ |
|||
CLEAR (X [j]) ; |
|||
Ux [up] = ujk ; |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, j, llen) ; |
|||
for (p = 0 ; p < llen ; p++) |
|||
{ |
|||
/* X [Li [p]] -= Lx [p] * ujk */ |
|||
MULT_SUB (X [Li [p]], Lx [p], ujk) ; |
|||
} |
|||
} |
|||
/* get the diagonal entry of U */ |
|||
ukk = X [k] ; |
|||
/* X [k] = 0 */ |
|||
CLEAR (X [k]) ; |
|||
/* singular case */ |
|||
if (IS_ZERO (ukk)) |
|||
{ |
|||
/* matrix is numerically singular */ |
|||
Common->status = KLU_SINGULAR ; |
|||
if (Common->numerical_rank == EMPTY) |
|||
{ |
|||
Common->numerical_rank = k+k1 ; |
|||
Common->singular_col = Q [k+k1] ; |
|||
} |
|||
if (Common->halt_if_singular) |
|||
{ |
|||
/* do not continue the factorization */ |
|||
return (FALSE) ; |
|||
} |
|||
} |
|||
Udiag [k+k1] = ukk ; |
|||
/* gather and divide by pivot to get kth column of L */ |
|||
GET_POINTER (LU, Lip, Llen, Li, Lx, k, llen) ; |
|||
for (p = 0 ; p < llen ; p++) |
|||
{ |
|||
i = Li [p] ; |
|||
DIV (Lx [p], X [i], ukk) ; |
|||
CLEAR (X [i]) ; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* permute scale factors Rs according to pivotal row order */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (scale > 0) |
|||
{ |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
REAL (X [k]) = Rs [Pnum [k]] ; |
|||
} |
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Rs [k] = REAL (X [k]) ; |
|||
} |
|||
} |
|||
|
|||
#ifndef NDEBUG |
|||
ASSERT (Offp [n] == poff) ; |
|||
ASSERT (Symbolic->nzoff == poff) ; |
|||
PRINTF (("\n------------------- Off diagonal entries, new:\n")) ; |
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
if (Common->status == KLU_OK) |
|||
{ |
|||
PRINTF (("\n ########### KLU_BTF_REFACTOR done, nblocks %d\n",nblocks)); |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (( |
|||
"\n================KLU_refactor output: k1 %d k2 %d nk %d\n", |
|||
k1, k2, nk)) ; |
|||
if (nk == 1) |
|||
{ |
|||
PRINTF (("singleton ")) ; |
|||
PRINT_ENTRY (Udiag [k1]) ; |
|||
} |
|||
else |
|||
{ |
|||
Lip = Numeric->Lip + k1 ; |
|||
Llen = Numeric->Llen + k1 ; |
|||
LU = (Unit *) Numeric->LUbx [block] ; |
|||
PRINTF (("\n---- L block %d\n", block)) ; |
|||
ASSERT (KLU_valid_LU (nk, TRUE, Lip, Llen, LU)) ; |
|||
Uip = Numeric->Uip + k1 ; |
|||
Ulen = Numeric->Ulen + k1 ; |
|||
PRINTF (("\n---- U block %d\n", block)) ; |
|||
ASSERT (KLU_valid_LU (nk, FALSE, Uip, Ulen, LU)) ; |
|||
} |
|||
} |
|||
} |
|||
#endif |
|||
|
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,159 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_scale ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Scale a matrix and check to see if it is valid. Can be called by the user. |
|||
* This is called by KLU_factor and KLU_refactor. Returns TRUE if the input |
|||
* matrix is valid, FALSE otherwise. If the W input argument is non-NULL, |
|||
* then the input matrix is checked for duplicate entries. |
|||
* |
|||
* scaling methods: |
|||
* <0: no scaling, do not compute Rs, and do not check input matrix. |
|||
* 0: no scaling |
|||
* 1: the scale factor for row i is sum (abs (A (i,:))) |
|||
* 2 or more: the scale factor for row i is max (abs (A (i,:))) |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_scale /* return TRUE if successful, FALSE otherwise */ |
|||
( |
|||
/* inputs, not modified */ |
|||
Int scale, /* 0: none, 1: sum, 2: max */ |
|||
Int n, |
|||
Int Ap [ ], /* size n+1, column pointers */ |
|||
Int Ai [ ], /* size nz, row indices */ |
|||
double Ax [ ], |
|||
/* outputs, not defined on input */ |
|||
double Rs [ ], /* size n, can be NULL if scale <= 0 */ |
|||
/* workspace, not defined on input or output */ |
|||
Int W [ ], /* size n, can be NULL */ |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
double a ; |
|||
Entry *Az ; |
|||
Int row, col, p, pend, check_duplicates ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
if (scale < 0) |
|||
{ |
|||
/* return without checking anything and without computing the |
|||
* scale factors */ |
|||
return (TRUE) ; |
|||
} |
|||
|
|||
Az = (Entry *) Ax ; |
|||
|
|||
if (n <= 0 || Ap == NULL || Ai == NULL || Az == NULL || |
|||
(scale > 0 && Rs == NULL)) |
|||
{ |
|||
/* Ap, Ai, Ax and Rs must be present, and n must be > 0 */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
if (Ap [0] != 0 || Ap [n] < 0) |
|||
{ |
|||
/* nz = Ap [n] must be >= 0 and Ap [0] must equal zero */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
for (col = 0 ; col < n ; col++) |
|||
{ |
|||
if (Ap [col] > Ap [col+1]) |
|||
{ |
|||
/* column pointers must be non-decreasing */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
} |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* scale */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (scale > 0) |
|||
{ |
|||
/* initialize row sum or row max */ |
|||
for (row = 0 ; row < n ; row++) |
|||
{ |
|||
Rs [row] = 0 ; |
|||
} |
|||
} |
|||
|
|||
/* check for duplicates only if W is present */ |
|||
check_duplicates = (W != (Int *) NULL) ; |
|||
if (check_duplicates) |
|||
{ |
|||
for (row = 0 ; row < n ; row++) |
|||
{ |
|||
W [row] = EMPTY ; |
|||
} |
|||
} |
|||
|
|||
for (col = 0 ; col < n ; col++) |
|||
{ |
|||
pend = Ap [col+1] ; |
|||
for (p = Ap [col] ; p < pend ; p++) |
|||
{ |
|||
row = Ai [p] ; |
|||
if (row < 0 || row >= n) |
|||
{ |
|||
/* row index out of range, or duplicate entry */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
if (check_duplicates) |
|||
{ |
|||
if (W [row] == col) |
|||
{ |
|||
/* duplicate entry */ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
/* flag row i as appearing in column col */ |
|||
W [row] = col ; |
|||
} |
|||
/* a = ABS (Az [p]) ;*/ |
|||
ABS (a, Az [p]) ; |
|||
if (scale == 1) |
|||
{ |
|||
/* accumulate the abs. row sum */ |
|||
Rs [row] += a ; |
|||
} |
|||
else if (scale > 1) |
|||
{ |
|||
/* find the max abs. value in the row */ |
|||
Rs [row] = MAX (Rs [row], a) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
if (scale > 0) |
|||
{ |
|||
/* do not scale empty rows */ |
|||
for (row = 0 ; row < n ; row++) |
|||
{ |
|||
/* matrix is singular */ |
|||
PRINTF (("Rs [%d] = %g\n", row, Rs [row])) ; |
|||
|
|||
if (Rs [row] == 0.0) |
|||
{ |
|||
PRINTF (("Row %d of A is all zero\n", row)) ; |
|||
Rs [row] = 1.0 ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,396 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_solve ============================================================ */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve Ax=b using the symbolic and numeric objects from KLU_analyze |
|||
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is |
|||
* performed. Uses Numeric->Xwork as workspace (undefined on input and output), |
|||
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with |
|||
* Numeric->Iwork). |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_solve |
|||
( |
|||
/* inputs, not modified */ |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
Int d, /* leading dimension of B */ |
|||
Int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size n*nrhs, in column-oriented form, with |
|||
* leading dimension d. */ |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Entry x [4], offik, s ; |
|||
double rs, *Rs ; |
|||
Entry *Offx, *X, *Bz, *Udiag ; |
|||
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; |
|||
Unit **LUbx ; |
|||
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || |
|||
B == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Bz = (Entry *) B ; |
|||
n = Symbolic->n ; |
|||
nblocks = Symbolic->nblocks ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
ASSERT (nblocks == Numeric->nblocks) ; |
|||
Pnum = Numeric->Pnum ; |
|||
Offp = Numeric->Offp ; |
|||
Offi = Numeric->Offi ; |
|||
Offx = (Entry *) Numeric->Offx ; |
|||
|
|||
Lip = Numeric->Lip ; |
|||
Llen = Numeric->Llen ; |
|||
Uip = Numeric->Uip ; |
|||
Ulen = Numeric->Ulen ; |
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
Udiag = Numeric->Udiag ; |
|||
|
|||
Rs = Numeric->Rs ; |
|||
X = (Entry *) Numeric->Xwork ; |
|||
|
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* solve in chunks of 4 columns at a time */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (chunk = 0 ; chunk < nrhs ; chunk += 4) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* get the size of the current chunk */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
nr = MIN (nrhs - chunk, 4) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* scale and permute the right hand side, X = P*(R\B) */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (Rs == NULL) |
|||
{ |
|||
|
|||
/* no scaling */ |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
X [k] = Bz [Pnum [k]] ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
X [2*k ] = Bz [i ] ; |
|||
X [2*k + 1] = Bz [i + d ] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
X [3*k ] = Bz [i ] ; |
|||
X [3*k + 1] = Bz [i + d ] ; |
|||
X [3*k + 2] = Bz [i + d*2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
X [4*k ] = Bz [i ] ; |
|||
X [4*k + 1] = Bz [i + d ] ; |
|||
X [4*k + 2] = Bz [i + d*2] ; |
|||
X [4*k + 3] = Bz [i + d*3] ; |
|||
} |
|||
break ; |
|||
} |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
SCALE_DIV_ASSIGN (X [k], Bz [Pnum [k]], Rs [k]) ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (X [2*k], Bz [i], rs) ; |
|||
SCALE_DIV_ASSIGN (X [2*k + 1], Bz [i + d], rs) ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (X [3*k], Bz [i], rs) ; |
|||
SCALE_DIV_ASSIGN (X [3*k + 1], Bz [i + d], rs) ; |
|||
SCALE_DIV_ASSIGN (X [3*k + 2], Bz [i + d*2], rs) ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (X [4*k], Bz [i], rs) ; |
|||
SCALE_DIV_ASSIGN (X [4*k + 1], Bz [i + d], rs) ; |
|||
SCALE_DIV_ASSIGN (X [4*k + 2], Bz [i + d*2], rs) ; |
|||
SCALE_DIV_ASSIGN (X [4*k + 3], Bz [i + d*3], rs) ; |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* solve X = (L*U + Off)\X */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (block = nblocks-1 ; block >= 0 ; block--) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* the block of size nk is from rows/columns k1 to k2-1 */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("solve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; |
|||
|
|||
/* solve the block system */ |
|||
if (nk == 1) |
|||
{ |
|||
s = Udiag [k1] ; |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
DIV (X [k1], X [k1], s) ; |
|||
break ; |
|||
|
|||
case 2: |
|||
DIV (X [2*k1], X [2*k1], s) ; |
|||
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; |
|||
break ; |
|||
|
|||
case 3: |
|||
DIV (X [3*k1], X [3*k1], s) ; |
|||
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; |
|||
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; |
|||
break ; |
|||
|
|||
case 4: |
|||
DIV (X [4*k1], X [4*k1], s) ; |
|||
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; |
|||
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; |
|||
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; |
|||
break ; |
|||
|
|||
} |
|||
} |
|||
else |
|||
{ |
|||
KLU_lsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, |
|||
X + nr*k1) ; |
|||
KLU_usolve (nk, Uip + k1, Ulen + k1, LUbx [block], |
|||
Udiag + k1, nr, X + nr*k1) ; |
|||
} |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* block back-substitution for the off-diagonal-block entries */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
if (block > 0) |
|||
{ |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [k] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
MULT_SUB (X [Offi [p]], Offx [p], x [0]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [2*k ] ; |
|||
x [1] = X [2*k + 1] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
offik = Offx [p] ; |
|||
MULT_SUB (X [2*i], offik, x [0]) ; |
|||
MULT_SUB (X [2*i + 1], offik, x [1]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [3*k ] ; |
|||
x [1] = X [3*k + 1] ; |
|||
x [2] = X [3*k + 2] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
offik = Offx [p] ; |
|||
MULT_SUB (X [3*i], offik, x [0]) ; |
|||
MULT_SUB (X [3*i + 1], offik, x [1]) ; |
|||
MULT_SUB (X [3*i + 2], offik, x [2]) ; |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [4*k ] ; |
|||
x [1] = X [4*k + 1] ; |
|||
x [2] = X [4*k + 2] ; |
|||
x [3] = X [4*k + 3] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
offik = Offx [p] ; |
|||
MULT_SUB (X [4*i], offik, x [0]) ; |
|||
MULT_SUB (X [4*i + 1], offik, x [1]) ; |
|||
MULT_SUB (X [4*i + 2], offik, x [2]) ; |
|||
MULT_SUB (X [4*i + 3], offik, x [3]) ; |
|||
} |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* permute the result, Bz = Q*X */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Bz [Q [k]] = X [k] ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
Bz [i ] = X [2*k ] ; |
|||
Bz [i + d ] = X [2*k + 1] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
Bz [i ] = X [3*k ] ; |
|||
Bz [i + d ] = X [3*k + 1] ; |
|||
Bz [i + d*2] = X [3*k + 2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
Bz [i ] = X [4*k ] ; |
|||
Bz [i + d ] = X [4*k + 1] ; |
|||
Bz [i + d*2] = X [4*k + 2] ; |
|||
Bz [i + d*3] = X [4*k + 3] ; |
|||
} |
|||
break ; |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* go to the next chunk of B */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
Bz += d*4 ; |
|||
} |
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,156 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_sort ============================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* sorts the columns of L and U so that the row indices appear in strictly |
|||
* increasing order. |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
/* ========================================================================== */ |
|||
/* === sort ================================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Sort L or U using a double-transpose */ |
|||
|
|||
static void sort (Int n, Int *Xip, Int *Xlen, Unit *LU, Int *Tp, Int *Tj, |
|||
Entry *Tx, Int *W) |
|||
{ |
|||
Int *Xi ; |
|||
Entry *Xx ; |
|||
Int p, i, j, len, nz, tp, xlen, pend ; |
|||
|
|||
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ; |
|||
|
|||
/* count the number of entries in each row of L or U */ |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
W [i] = 0 ; |
|||
} |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
W [Xi [p]]++ ; |
|||
} |
|||
} |
|||
|
|||
/* construct the row pointers for T */ |
|||
nz = 0 ; |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
Tp [i] = nz ; |
|||
nz += W [i] ; |
|||
} |
|||
Tp [n] = nz ; |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
W [i] = Tp [i] ; |
|||
} |
|||
|
|||
/* transpose the matrix into Tp, Ti, Tx */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; |
|||
for (p = 0 ; p < len ; p++) |
|||
{ |
|||
tp = W [Xi [p]]++ ; |
|||
Tj [tp] = j ; |
|||
Tx [tp] = Xx [p] ; |
|||
} |
|||
} |
|||
|
|||
/* transpose the matrix back into Xip, Xlen, Xi, Xx */ |
|||
for (j = 0 ; j < n ; j++) |
|||
{ |
|||
W [j] = 0 ; |
|||
} |
|||
for (i = 0 ; i < n ; i++) |
|||
{ |
|||
pend = Tp [i+1] ; |
|||
for (p = Tp [i] ; p < pend ; p++) |
|||
{ |
|||
j = Tj [p] ; |
|||
GET_POINTER (LU, Xip, Xlen, Xi, Xx, j, len) ; |
|||
xlen = W [j]++ ; |
|||
Xi [xlen] = i ; |
|||
Xx [xlen] = Tx [p] ; |
|||
} |
|||
} |
|||
|
|||
ASSERT (KLU_valid_LU (n, FALSE, Xip, Xlen, LU)) ; |
|||
} |
|||
|
|||
|
|||
/* ========================================================================== */ |
|||
/* === KLU_sort ============================================================= */ |
|||
/* ========================================================================== */ |
|||
|
|||
Int KLU_sort |
|||
( |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Int *R, *W, *Tp, *Ti, *Lip, *Uip, *Llen, *Ulen ; |
|||
Entry *Tx ; |
|||
Unit **LUbx ; |
|||
Int n, nk, nz, block, nblocks, maxblock, k1 ; |
|||
size_t m1 ; |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
n = Symbolic->n ; |
|||
R = Symbolic->R ; |
|||
nblocks = Symbolic->nblocks ; |
|||
maxblock = Symbolic->maxblock ; |
|||
|
|||
Lip = Numeric->Lip ; |
|||
Llen = Numeric->Llen ; |
|||
Uip = Numeric->Uip ; |
|||
Ulen = Numeric->Ulen ; |
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
|
|||
m1 = ((size_t) maxblock) + 1 ; |
|||
|
|||
/* allocate workspace */ |
|||
nz = MAX (Numeric->max_lnz_block, Numeric->max_unz_block) ; |
|||
W = KLU_malloc (maxblock, sizeof (Int), Common) ; |
|||
Tp = KLU_malloc (m1, sizeof (Int), Common) ; |
|||
Ti = KLU_malloc (nz, sizeof (Int), Common) ; |
|||
Tx = KLU_malloc (nz, sizeof (Entry), Common) ; |
|||
|
|||
PRINTF (("\n======================= Start sort:\n")) ; |
|||
|
|||
if (Common->status == KLU_OK) |
|||
{ |
|||
/* sort each block of L and U */ |
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
k1 = R [block] ; |
|||
nk = R [block+1] - k1 ; |
|||
if (nk > 1) |
|||
{ |
|||
PRINTF (("\n-------------------block: %d nk %d\n", block, nk)) ; |
|||
sort (nk, Lip + k1, Llen + k1, LUbx [block], Tp, Ti, Tx, W) ; |
|||
sort (nk, Uip + k1, Ulen + k1, LUbx [block], Tp, Ti, Tx, W) ; |
|||
} |
|||
} |
|||
} |
|||
|
|||
PRINTF (("\n======================= sort done.\n")) ; |
|||
|
|||
/* free workspace */ |
|||
KLU_free (W, maxblock, sizeof (Int), Common) ; |
|||
KLU_free (Tp, m1, sizeof (Int), Common) ; |
|||
KLU_free (Ti, nz, sizeof (Int), Common) ; |
|||
KLU_free (Tx, nz, sizeof (Entry), Common) ; |
|||
return (Common->status == KLU_OK) ; |
|||
} |
|||
@ -0,0 +1,465 @@ |
|||
/* ========================================================================== */ |
|||
/* === KLU_tsolve =========================================================== */ |
|||
/* ========================================================================== */ |
|||
|
|||
/* Solve A'x=b using the symbolic and numeric objects from KLU_analyze |
|||
* (or KLU_analyze_given) and KLU_factor. Note that no iterative refinement is |
|||
* performed. Uses Numeric->Xwork as workspace (undefined on input and output), |
|||
* of size 4n Entry's (note that columns 2 to 4 of Xwork overlap with |
|||
* Numeric->Iwork). |
|||
*/ |
|||
|
|||
#include "klu_internal.h" |
|||
|
|||
Int KLU_tsolve |
|||
( |
|||
/* inputs, not modified */ |
|||
KLU_symbolic *Symbolic, |
|||
KLU_numeric *Numeric, |
|||
Int d, /* leading dimension of B */ |
|||
Int nrhs, /* number of right-hand-sides */ |
|||
|
|||
/* right-hand-side on input, overwritten with solution to Ax=b on output */ |
|||
double B [ ], /* size n*nrhs, in column-oriented form, with |
|||
* leading dimension d. */ |
|||
#ifdef COMPLEX |
|||
Int conj_solve, /* TRUE for conjugate transpose solve, FALSE for |
|||
* array transpose solve. Used for the complex |
|||
* case only. */ |
|||
#endif |
|||
/* --------------- */ |
|||
KLU_common *Common |
|||
) |
|||
{ |
|||
Entry x [4], offik, s ; |
|||
double rs, *Rs ; |
|||
Entry *Offx, *X, *Bz, *Udiag ; |
|||
Int *Q, *R, *Pnum, *Offp, *Offi, *Lip, *Uip, *Llen, *Ulen ; |
|||
Unit **LUbx ; |
|||
Int k1, k2, nk, k, block, pend, n, p, nblocks, chunk, nr, i ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* check inputs */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
if (Common == NULL) |
|||
{ |
|||
return (FALSE) ; |
|||
} |
|||
if (Numeric == NULL || Symbolic == NULL || d < Symbolic->n || nrhs < 0 || |
|||
B == NULL) |
|||
{ |
|||
Common->status = KLU_INVALID ; |
|||
return (FALSE) ; |
|||
} |
|||
Common->status = KLU_OK ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Symbolic object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
Bz = (Entry *) B ; |
|||
n = Symbolic->n ; |
|||
nblocks = Symbolic->nblocks ; |
|||
Q = Symbolic->Q ; |
|||
R = Symbolic->R ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* get the contents of the Numeric object */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
ASSERT (nblocks == Numeric->nblocks) ; |
|||
Pnum = Numeric->Pnum ; |
|||
Offp = Numeric->Offp ; |
|||
Offi = Numeric->Offi ; |
|||
Offx = (Entry *) Numeric->Offx ; |
|||
|
|||
Lip = Numeric->Lip ; |
|||
Llen = Numeric->Llen ; |
|||
Uip = Numeric->Uip ; |
|||
Ulen = Numeric->Ulen ; |
|||
LUbx = (Unit **) Numeric->LUbx ; |
|||
Udiag = Numeric->Udiag ; |
|||
|
|||
Rs = Numeric->Rs ; |
|||
X = (Entry *) Numeric->Xwork ; |
|||
ASSERT (KLU_valid (n, Offp, Offi, Offx)) ; |
|||
|
|||
/* ---------------------------------------------------------------------- */ |
|||
/* solve in chunks of 4 columns at a time */ |
|||
/* ---------------------------------------------------------------------- */ |
|||
|
|||
for (chunk = 0 ; chunk < nrhs ; chunk += 4) |
|||
{ |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* get the size of the current chunk */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
nr = MIN (nrhs - chunk, 4) ; |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* permute the right hand side, X = Q'*B */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
X [k] = Bz [Q [k]] ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
X [2*k ] = Bz [i ] ; |
|||
X [2*k + 1] = Bz [i + d ] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
X [3*k ] = Bz [i ] ; |
|||
X [3*k + 1] = Bz [i + d ] ; |
|||
X [3*k + 2] = Bz [i + d*2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Q [k] ; |
|||
X [4*k ] = Bz [i ] ; |
|||
X [4*k + 1] = Bz [i + d ] ; |
|||
X [4*k + 2] = Bz [i + d*2] ; |
|||
X [4*k + 3] = Bz [i + d*3] ; |
|||
} |
|||
break ; |
|||
|
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* solve X = (L*U + Off)'\X */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
for (block = 0 ; block < nblocks ; block++) |
|||
{ |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* the block of size nk is from rows/columns k1 to k2-1 */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
k1 = R [block] ; |
|||
k2 = R [block+1] ; |
|||
nk = k2 - k1 ; |
|||
PRINTF (("tsolve %d, k1 %d k2-1 %d nk %d\n", block, k1,k2-1,nk)) ; |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* block back-substitution for the off-diagonal-block entries */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
if (block > 0) |
|||
{ |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
MULT_SUB_CONJ (X [k], X [Offi [p]], |
|||
Offx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
MULT_SUB (X [k], Offx [p], X [Offi [p]]) ; |
|||
} |
|||
} |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [2*k ] ; |
|||
x [1] = X [2*k + 1] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (offik, Offx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
offik = Offx [p] ; |
|||
} |
|||
MULT_SUB (x [0], offik, X [2*i]) ; |
|||
MULT_SUB (x [1], offik, X [2*i + 1]) ; |
|||
} |
|||
X [2*k ] = x [0] ; |
|||
X [2*k + 1] = x [1] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [3*k ] ; |
|||
x [1] = X [3*k + 1] ; |
|||
x [2] = X [3*k + 2] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (offik, Offx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
offik = Offx [p] ; |
|||
} |
|||
MULT_SUB (x [0], offik, X [3*i]) ; |
|||
MULT_SUB (x [1], offik, X [3*i + 1]) ; |
|||
MULT_SUB (x [2], offik, X [3*i + 2]) ; |
|||
} |
|||
X [3*k ] = x [0] ; |
|||
X [3*k + 1] = x [1] ; |
|||
X [3*k + 2] = x [2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = k1 ; k < k2 ; k++) |
|||
{ |
|||
pend = Offp [k+1] ; |
|||
x [0] = X [4*k ] ; |
|||
x [1] = X [4*k + 1] ; |
|||
x [2] = X [4*k + 2] ; |
|||
x [3] = X [4*k + 3] ; |
|||
for (p = Offp [k] ; p < pend ; p++) |
|||
{ |
|||
i = Offi [p] ; |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ(offik, Offx [p]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
offik = Offx [p] ; |
|||
} |
|||
MULT_SUB (x [0], offik, X [4*i]) ; |
|||
MULT_SUB (x [1], offik, X [4*i + 1]) ; |
|||
MULT_SUB (x [2], offik, X [4*i + 2]) ; |
|||
MULT_SUB (x [3], offik, X [4*i + 3]) ; |
|||
} |
|||
X [4*k ] = x [0] ; |
|||
X [4*k + 1] = x [1] ; |
|||
X [4*k + 2] = x [2] ; |
|||
X [4*k + 3] = x [3] ; |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
|
|||
/* -------------------------------------------------------------- */ |
|||
/* solve the block system */ |
|||
/* -------------------------------------------------------------- */ |
|||
|
|||
if (nk == 1) |
|||
{ |
|||
#ifdef COMPLEX |
|||
if (conj_solve) |
|||
{ |
|||
CONJ (s, Udiag [k1]) ; |
|||
} |
|||
else |
|||
#endif |
|||
{ |
|||
s = Udiag [k1] ; |
|||
} |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
DIV (X [k1], X [k1], s) ; |
|||
break ; |
|||
|
|||
case 2: |
|||
DIV (X [2*k1], X [2*k1], s) ; |
|||
DIV (X [2*k1 + 1], X [2*k1 + 1], s) ; |
|||
break ; |
|||
|
|||
case 3: |
|||
DIV (X [3*k1], X [3*k1], s) ; |
|||
DIV (X [3*k1 + 1], X [3*k1 + 1], s) ; |
|||
DIV (X [3*k1 + 2], X [3*k1 + 2], s) ; |
|||
break ; |
|||
|
|||
case 4: |
|||
DIV (X [4*k1], X [4*k1], s) ; |
|||
DIV (X [4*k1 + 1], X [4*k1 + 1], s) ; |
|||
DIV (X [4*k1 + 2], X [4*k1 + 2], s) ; |
|||
DIV (X [4*k1 + 3], X [4*k1 + 3], s) ; |
|||
break ; |
|||
|
|||
} |
|||
} |
|||
else |
|||
{ |
|||
KLU_utsolve (nk, Uip + k1, Ulen + k1, LUbx [block], |
|||
Udiag + k1, nr, |
|||
#ifdef COMPLEX |
|||
conj_solve, |
|||
#endif |
|||
X + nr*k1) ; |
|||
KLU_ltsolve (nk, Lip + k1, Llen + k1, LUbx [block], nr, |
|||
#ifdef COMPLEX |
|||
conj_solve, |
|||
#endif |
|||
X + nr*k1) ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* scale and permute the result, Bz = P'(R\X) */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
if (Rs == NULL) |
|||
{ |
|||
|
|||
/* no scaling */ |
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
Bz [Pnum [k]] = X [k] ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
Bz [i ] = X [2*k ] ; |
|||
Bz [i + d ] = X [2*k + 1] ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
Bz [i ] = X [3*k ] ; |
|||
Bz [i + d ] = X [3*k + 1] ; |
|||
Bz [i + d*2] = X [3*k + 2] ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
Bz [i ] = X [4*k ] ; |
|||
Bz [i + d ] = X [4*k + 1] ; |
|||
Bz [i + d*2] = X [4*k + 2] ; |
|||
Bz [i + d*3] = X [4*k + 3] ; |
|||
} |
|||
break ; |
|||
} |
|||
|
|||
} |
|||
else |
|||
{ |
|||
|
|||
switch (nr) |
|||
{ |
|||
|
|||
case 1: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
SCALE_DIV_ASSIGN (Bz [Pnum [k]], X [k], Rs [k]) ; |
|||
} |
|||
break ; |
|||
|
|||
case 2: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (Bz [i], X [2*k], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d], X [2*k + 1], rs) ; |
|||
} |
|||
break ; |
|||
|
|||
case 3: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (Bz [i], X [3*k], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d], X [3*k + 1], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d*2], X [3*k + 2], rs) ; |
|||
} |
|||
break ; |
|||
|
|||
case 4: |
|||
|
|||
for (k = 0 ; k < n ; k++) |
|||
{ |
|||
i = Pnum [k] ; |
|||
rs = Rs [k] ; |
|||
SCALE_DIV_ASSIGN (Bz [i], X [4*k], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d], X [4*k + 1], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d*2], X [4*k + 2], rs) ; |
|||
SCALE_DIV_ASSIGN (Bz [i + d*3], X [4*k + 3], rs) ; |
|||
} |
|||
break ; |
|||
} |
|||
} |
|||
|
|||
/* ------------------------------------------------------------------ */ |
|||
/* go to the next chunk of B */ |
|||
/* ------------------------------------------------------------------ */ |
|||
|
|||
Bz += d*4 ; |
|||
} |
|||
return (TRUE) ; |
|||
} |
|||
@ -0,0 +1,694 @@ |
|||
#ifndef _KLU_VERSION_H |
|||
#define _KLU_VERSION_H |
|||
|
|||
#ifdef DLONG |
|||
#define Int UF_long |
|||
#define Int_id UF_long_id |
|||
#define Int_MAX UF_long_max |
|||
#else |
|||
#define Int int |
|||
#define Int_id "%d" |
|||
#define Int_MAX INT_MAX |
|||
#endif |
|||
|
|||
#define NPRINT |
|||
|
|||
#define BYTES(type,n) (sizeof (type) * (n)) |
|||
#define CEILING(b,u) (((b)+(u)-1) / (u)) |
|||
#define UNITS(type,n) (CEILING (BYTES (type,n), sizeof (Unit))) |
|||
#define DUNITS(type,n) (ceil (BYTES (type, (double) n) / sizeof (Unit))) |
|||
|
|||
#define GET_I_POINTER(LU, Xip, Xi, k) \ |
|||
{ \ |
|||
Xi = (Int *) (LU + Xip [k]) ; \ |
|||
} |
|||
|
|||
#define GET_X_POINTER(LU, Xip, Xlen, Xx, k) \ |
|||
{ \ |
|||
Xx = (Entry *) (LU + Xip [k] + UNITS (Int, Xlen [k])) ; \ |
|||
} |
|||
|
|||
#define GET_POINTER(LU, Xip, Xlen, Xi, Xx, k, xlen) \ |
|||
{ \ |
|||
Unit *xp = LU + Xip [k] ; \ |
|||
xlen = Xlen [k] ; \ |
|||
Xi = (Int *) xp ; \ |
|||
Xx = (Entry *) (xp + UNITS (Int, xlen)) ; \ |
|||
} |
|||
|
|||
/* function names */ |
|||
#ifdef COMPLEX |
|||
|
|||
#ifdef DLONG |
|||
|
|||
#define KLU_scale klu_zl_scale |
|||
#define KLU_solve klu_zl_solve |
|||
#define KLU_tsolve klu_zl_tsolve |
|||
#define KLU_free_numeric klu_zl_free_numeric |
|||
#define KLU_factor klu_zl_factor |
|||
#define KLU_refactor klu_zl_refactor |
|||
#define KLU_kernel_factor klu_zl_kernel_factor |
|||
#define KLU_lsolve klu_zl_lsolve |
|||
#define KLU_ltsolve klu_zl_ltsolve |
|||
#define KLU_usolve klu_zl_usolve |
|||
#define KLU_utsolve klu_zl_utsolve |
|||
#define KLU_kernel klu_zl_kernel |
|||
#define KLU_valid klu_zl_valid |
|||
#define KLU_valid_LU klu_zl_valid_LU |
|||
#define KLU_sort klu_zl_sort |
|||
#define KLU_rgrowth klu_zl_rgrowth |
|||
#define KLU_rcond klu_zl_rcond |
|||
#define KLU_extract klu_zl_extract |
|||
#define KLU_condest klu_zl_condest |
|||
#define KLU_flops klu_zl_flops |
|||
|
|||
#else |
|||
|
|||
#define KLU_scale klu_z_scale |
|||
#define KLU_solve klu_z_solve |
|||
#define KLU_tsolve klu_z_tsolve |
|||
#define KLU_free_numeric klu_z_free_numeric |
|||
#define KLU_factor klu_z_factor |
|||
#define KLU_refactor klu_z_refactor |
|||
#define KLU_kernel_factor klu_z_kernel_factor |
|||
#define KLU_lsolve klu_z_lsolve |
|||
#define KLU_ltsolve klu_z_ltsolve |
|||
#define KLU_usolve klu_z_usolve |
|||
#define KLU_utsolve klu_z_utsolve |
|||
#define KLU_kernel klu_z_kernel |
|||
#define KLU_valid klu_z_valid |
|||
#define KLU_valid_LU klu_z_valid_LU |
|||
#define KLU_sort klu_z_sort |
|||
#define KLU_rgrowth klu_z_rgrowth |
|||
#define KLU_rcond klu_z_rcond |
|||
#define KLU_extract klu_z_extract |
|||
#define KLU_condest klu_z_condest |
|||
#define KLU_flops klu_z_flops |
|||
|
|||
#endif |
|||
|
|||
#else |
|||
|
|||
#ifdef DLONG |
|||
|
|||
#define KLU_scale klu_l_scale |
|||
#define KLU_solve klu_l_solve |
|||
#define KLU_tsolve klu_l_tsolve |
|||
#define KLU_free_numeric klu_l_free_numeric |
|||
#define KLU_factor klu_l_factor |
|||
#define KLU_refactor klu_l_refactor |
|||
#define KLU_kernel_factor klu_l_kernel_factor |
|||
#define KLU_lsolve klu_l_lsolve |
|||
#define KLU_ltsolve klu_l_ltsolve |
|||
#define KLU_usolve klu_l_usolve |
|||
#define KLU_utsolve klu_l_utsolve |
|||
#define KLU_kernel klu_l_kernel |
|||
#define KLU_valid klu_l_valid |
|||
#define KLU_valid_LU klu_l_valid_LU |
|||
#define KLU_sort klu_l_sort |
|||
#define KLU_rgrowth klu_l_rgrowth |
|||
#define KLU_rcond klu_l_rcond |
|||
#define KLU_extract klu_l_extract |
|||
#define KLU_condest klu_l_condest |
|||
#define KLU_flops klu_l_flops |
|||
|
|||
#else |
|||
|
|||
#define KLU_scale klu_scale |
|||
#define KLU_solve klu_solve |
|||
#define KLU_tsolve klu_tsolve |
|||
#define KLU_free_numeric klu_free_numeric |
|||
#define KLU_factor klu_factor |
|||
#define KLU_refactor klu_refactor |
|||
#define KLU_kernel_factor klu_kernel_factor |
|||
#define KLU_lsolve klu_lsolve |
|||
#define KLU_ltsolve klu_ltsolve |
|||
#define KLU_usolve klu_usolve |
|||
#define KLU_utsolve klu_utsolve |
|||
#define KLU_kernel klu_kernel |
|||
#define KLU_valid klu_valid |
|||
#define KLU_valid_LU klu_valid_LU |
|||
#define KLU_sort klu_sort |
|||
#define KLU_rgrowth klu_rgrowth |
|||
#define KLU_rcond klu_rcond |
|||
#define KLU_extract klu_extract |
|||
#define KLU_condest klu_condest |
|||
#define KLU_flops klu_flops |
|||
|
|||
#endif |
|||
|
|||
#endif |
|||
|
|||
|
|||
#ifdef DLONG |
|||
|
|||
#define KLU_analyze klu_l_analyze |
|||
#define KLU_analyze_given klu_l_analyze_given |
|||
#define KLU_alloc_symbolic klu_l_alloc_symbolic |
|||
#define KLU_free_symbolic klu_l_free_symbolic |
|||
#define KLU_defaults klu_l_defaults |
|||
#define KLU_free klu_l_free |
|||
#define KLU_malloc klu_l_malloc |
|||
#define KLU_realloc klu_l_realloc |
|||
#define KLU_add_size_t klu_l_add_size_t |
|||
#define KLU_mult_size_t klu_l_mult_size_t |
|||
|
|||
#define KLU_symbolic klu_l_symbolic |
|||
#define KLU_numeric klu_l_numeric |
|||
#define KLU_common klu_l_common |
|||
|
|||
#define BTF_order btf_l_order |
|||
#define BTF_strongcomp btf_l_strongcomp |
|||
|
|||
#define AMD_order amd_l_order |
|||
#define COLAMD colamd_l |
|||
#define COLAMD_recommended colamd_l_recommended |
|||
|
|||
#else |
|||
|
|||
#define KLU_analyze klu_analyze |
|||
#define KLU_analyze_given klu_analyze_given |
|||
#define KLU_alloc_symbolic klu_alloc_symbolic |
|||
#define KLU_free_symbolic klu_free_symbolic |
|||
#define KLU_defaults klu_defaults |
|||
#define KLU_free klu_free |
|||
#define KLU_malloc klu_malloc |
|||
#define KLU_realloc klu_realloc |
|||
#define KLU_add_size_t klu_add_size_t |
|||
#define KLU_mult_size_t klu_mult_size_t |
|||
|
|||
#define KLU_symbolic klu_symbolic |
|||
#define KLU_numeric klu_numeric |
|||
#define KLU_common klu_common |
|||
|
|||
#define BTF_order btf_order |
|||
#define BTF_strongcomp btf_strongcomp |
|||
|
|||
#define AMD_order amd_order |
|||
#define COLAMD colamd |
|||
#define COLAMD_recommended colamd_recommended |
|||
|
|||
#endif |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* Numerical relop macros for correctly handling the NaN case */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* |
|||
SCALAR_IS_NAN(x): |
|||
True if x is NaN. False otherwise. The commonly-existing isnan(x) |
|||
function could be used, but it's not in Kernighan & Ritchie 2nd edition |
|||
(ANSI C). It may appear in <math.h>, but I'm not certain about |
|||
portability. The expression x != x is true if and only if x is NaN, |
|||
according to the IEEE 754 floating-point standard. |
|||
|
|||
SCALAR_IS_ZERO(x): |
|||
True if x is zero. False if x is nonzero, NaN, or +/- Inf. |
|||
This is (x == 0) if the compiler is IEEE 754 compliant. |
|||
|
|||
SCALAR_IS_NONZERO(x): |
|||
True if x is nonzero, NaN, or +/- Inf. False if x zero. |
|||
This is (x != 0) if the compiler is IEEE 754 compliant. |
|||
|
|||
SCALAR_IS_LTZERO(x): |
|||
True if x is < zero or -Inf. False if x is >= 0, NaN, or +Inf. |
|||
This is (x < 0) if the compiler is IEEE 754 compliant. |
|||
*/ |
|||
|
|||
/* These all work properly, according to the IEEE 754 standard ... except on */ |
|||
/* a PC with windows. Works fine in Linux on the same PC... */ |
|||
#define SCALAR_IS_NAN(x) ((x) != (x)) |
|||
#define SCALAR_IS_ZERO(x) ((x) == 0.) |
|||
#define SCALAR_IS_NONZERO(x) ((x) != 0.) |
|||
#define SCALAR_IS_LTZERO(x) ((x) < 0.) |
|||
|
|||
|
|||
/* scalar absolute value macro. If x is NaN, the result is NaN: */ |
|||
#define SCALAR_ABS(x) ((SCALAR_IS_LTZERO (x)) ? -(x) : (x)) |
|||
|
|||
/* print a scalar (avoid printing "-0" for negative zero). */ |
|||
#ifdef NPRINT |
|||
#define PRINT_SCALAR(a) |
|||
#else |
|||
#define PRINT_SCALAR(a) \ |
|||
{ \ |
|||
if (SCALAR_IS_NONZERO (a)) \ |
|||
{ \ |
|||
PRINTF ((" (%g)", (a))) ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
PRINTF ((" (0)")) ; \ |
|||
} \ |
|||
} |
|||
#endif |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* Real floating-point arithmetic */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
#ifndef COMPLEX |
|||
|
|||
typedef double Unit ; |
|||
#define Entry double |
|||
|
|||
#define SPLIT(s) (1) |
|||
#define REAL(c) (c) |
|||
#define IMAG(c) (0.) |
|||
#define ASSIGN(c,s1,s2,p,split) { (c) = (s1)[p] ; } |
|||
#define CLEAR(c) { (c) = 0. ; } |
|||
#define CLEAR_AND_INCREMENT(p) { *p++ = 0. ; } |
|||
#define IS_NAN(a) SCALAR_IS_NAN (a) |
|||
#define IS_ZERO(a) SCALAR_IS_ZERO (a) |
|||
#define IS_NONZERO(a) SCALAR_IS_NONZERO (a) |
|||
#define SCALE_DIV(c,s) { (c) /= (s) ; } |
|||
#define SCALE_DIV_ASSIGN(a,c,s) { a = c / s ; } |
|||
#define SCALE(c,s) { (c) *= (s) ; } |
|||
#define ASSEMBLE(c,a) { (c) += (a) ; } |
|||
#define ASSEMBLE_AND_INCREMENT(c,p) { (c) += *p++ ; } |
|||
#define DECREMENT(c,a) { (c) -= (a) ; } |
|||
#define MULT(c,a,b) { (c) = (a) * (b) ; } |
|||
#define MULT_CONJ(c,a,b) { (c) = (a) * (b) ; } |
|||
#define MULT_SUB(c,a,b) { (c) -= (a) * (b) ; } |
|||
#define MULT_SUB_CONJ(c,a,b) { (c) -= (a) * (b) ; } |
|||
#define DIV(c,a,b) { (c) = (a) / (b) ; } |
|||
#define RECIPROCAL(c) { (c) = 1.0 / (c) ; } |
|||
#define DIV_CONJ(c,a,b) { (c) = (a) / (b) ; } |
|||
#define APPROX_ABS(s,a) { (s) = SCALAR_ABS (a) ; } |
|||
#define ABS(s,a) { (s) = SCALAR_ABS (a) ; } |
|||
#define PRINT_ENTRY(a) PRINT_SCALAR (a) |
|||
#define CONJ(a,x) a = x |
|||
|
|||
/* for flop counts */ |
|||
#define MULTSUB_FLOPS 2. /* c -= a*b */ |
|||
#define DIV_FLOPS 1. /* c = a/b */ |
|||
#define ABS_FLOPS 0. /* c = abs (a) */ |
|||
#define ASSEMBLE_FLOPS 1. /* c += a */ |
|||
#define DECREMENT_FLOPS 1. /* c -= a */ |
|||
#define MULT_FLOPS 1. /* c = a*b */ |
|||
#define SCALE_FLOPS 1. /* c = a/s */ |
|||
|
|||
#else |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
/* Complex floating-point arithmetic */ |
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* |
|||
Note: An alternative to this Double_Complex type would be to use a |
|||
struct { double r ; double i ; }. The problem with that method |
|||
(used by the Sun Performance Library, for example) is that ANSI C provides |
|||
no guarantee about the layout of a struct. It is possible that the sizeof |
|||
the struct above would be greater than 2 * sizeof (double). This would |
|||
mean that the complex BLAS could not be used. The method used here avoids |
|||
that possibility. ANSI C *does* guarantee that an array of structs has |
|||
the same size as n times the size of one struct. |
|||
|
|||
The ANSI C99 version of the C language includes a "double _Complex" type. |
|||
It should be possible in that case to do the following: |
|||
|
|||
#define Entry double _Complex |
|||
|
|||
and remove the Double_Complex struct. The macros, below, could then be |
|||
replaced with instrinsic operators. Note that the #define Real and |
|||
#define Imag should also be removed (they only appear in this file). |
|||
|
|||
For the MULT, MULT_SUB, MULT_SUB_CONJ, and MULT_CONJ macros, |
|||
the output argument c cannot be the same as any input argument. |
|||
|
|||
*/ |
|||
|
|||
typedef struct |
|||
{ |
|||
double component [2] ; /* real and imaginary parts */ |
|||
|
|||
} Double_Complex ; |
|||
|
|||
typedef Double_Complex Unit ; |
|||
#define Entry Double_Complex |
|||
#define Real component [0] |
|||
#define Imag component [1] |
|||
|
|||
/* for flop counts */ |
|||
#define MULTSUB_FLOPS 8. /* c -= a*b */ |
|||
#define DIV_FLOPS 9. /* c = a/b */ |
|||
#define ABS_FLOPS 6. /* c = abs (a), count sqrt as one flop */ |
|||
#define ASSEMBLE_FLOPS 2. /* c += a */ |
|||
#define DECREMENT_FLOPS 2. /* c -= a */ |
|||
#define MULT_FLOPS 6. /* c = a*b */ |
|||
#define SCALE_FLOPS 2. /* c = a/s or c = a*s */ |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* real part of c */ |
|||
#define REAL(c) ((c).Real) |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* imag part of c */ |
|||
#define IMAG(c) ((c).Imag) |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* Return TRUE if a complex number is in split form, FALSE if in packed form */ |
|||
#define SPLIT(sz) ((sz) != (double *) NULL) |
|||
|
|||
/* c = (s1) + (s2)*i, if s2 is null, then X is in "packed" format (compatible |
|||
* with Entry and ANSI C99 double _Complex type). */ |
|||
/*#define ASSIGN(c,s1,s2,p,split) \ |
|||
{ \ |
|||
if (split) \ |
|||
{ \ |
|||
(c).Real = (s1)[p] ; \ |
|||
(c).Imag = (s2)[p] ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
(c) = ((Entry *)(s1))[p] ; \ |
|||
} \ |
|||
}*/ |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
#define CONJ(a, x) \ |
|||
{ \ |
|||
a.Real = x.Real ; \ |
|||
a.Imag = -x.Imag ; \ |
|||
} |
|||
|
|||
/* c = 0 */ |
|||
#define CLEAR(c) \ |
|||
{ \ |
|||
(c).Real = 0. ; \ |
|||
(c).Imag = 0. ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* *p++ = 0 */ |
|||
#define CLEAR_AND_INCREMENT(p) \ |
|||
{ \ |
|||
p->Real = 0. ; \ |
|||
p->Imag = 0. ; \ |
|||
p++ ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* True if a == 0 */ |
|||
#define IS_ZERO(a) \ |
|||
(SCALAR_IS_ZERO ((a).Real) && SCALAR_IS_ZERO ((a).Imag)) |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* True if a is NaN */ |
|||
#define IS_NAN(a) \ |
|||
(SCALAR_IS_NAN ((a).Real) || SCALAR_IS_NAN ((a).Imag)) |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* True if a != 0 */ |
|||
#define IS_NONZERO(a) \ |
|||
(SCALAR_IS_NONZERO ((a).Real) || SCALAR_IS_NONZERO ((a).Imag)) |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* a = c/s */ |
|||
#define SCALE_DIV_ASSIGN(a,c,s) \ |
|||
{ \ |
|||
a.Real = c.Real / s ; \ |
|||
a.Imag = c.Imag / s ; \ |
|||
} |
|||
|
|||
/* c /= s */ |
|||
#define SCALE_DIV(c,s) \ |
|||
{ \ |
|||
(c).Real /= (s) ; \ |
|||
(c).Imag /= (s) ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c *= s */ |
|||
#define SCALE(c,s) \ |
|||
{ \ |
|||
(c).Real *= (s) ; \ |
|||
(c).Imag *= (s) ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c += a */ |
|||
#define ASSEMBLE(c,a) \ |
|||
{ \ |
|||
(c).Real += (a).Real ; \ |
|||
(c).Imag += (a).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c += *p++ */ |
|||
#define ASSEMBLE_AND_INCREMENT(c,p) \ |
|||
{ \ |
|||
(c).Real += p->Real ; \ |
|||
(c).Imag += p->Imag ; \ |
|||
p++ ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c -= a */ |
|||
#define DECREMENT(c,a) \ |
|||
{ \ |
|||
(c).Real -= (a).Real ; \ |
|||
(c).Imag -= (a).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c = a*b, assert because c cannot be the same as a or b */ |
|||
#define MULT(c,a,b) \ |
|||
{ \ |
|||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ |
|||
(c).Real = (a).Real * (b).Real - (a).Imag * (b).Imag ; \ |
|||
(c).Imag = (a).Imag * (b).Real + (a).Real * (b).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c = a*conjugate(b), assert because c cannot be the same as a or b */ |
|||
#define MULT_CONJ(c,a,b) \ |
|||
{ \ |
|||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ |
|||
(c).Real = (a).Real * (b).Real + (a).Imag * (b).Imag ; \ |
|||
(c).Imag = (a).Imag * (b).Real - (a).Real * (b).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c -= a*b, assert because c cannot be the same as a or b */ |
|||
#define MULT_SUB(c,a,b) \ |
|||
{ \ |
|||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ |
|||
(c).Real -= (a).Real * (b).Real - (a).Imag * (b).Imag ; \ |
|||
(c).Imag -= (a).Imag * (b).Real + (a).Real * (b).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c -= a*conjugate(b), assert because c cannot be the same as a or b */ |
|||
#define MULT_SUB_CONJ(c,a,b) \ |
|||
{ \ |
|||
ASSERT (&(c) != &(a) && &(c) != &(b)) ; \ |
|||
(c).Real -= (a).Real * (b).Real + (a).Imag * (b).Imag ; \ |
|||
(c).Imag -= (a).Imag * (b).Real - (a).Real * (b).Imag ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c = a/b, be careful to avoid underflow and overflow */ |
|||
#ifdef MATHWORKS |
|||
#define DIV(c,a,b) \ |
|||
{ \ |
|||
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (b).Imag, \ |
|||
&((c).Real), &((c).Imag)) ; \ |
|||
} |
|||
#else |
|||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */ |
|||
/* c can be the same variable as a or b. */ |
|||
/* Ignore NaN case for double relop br>=bi. */ |
|||
#define DIV(c,a,b) \ |
|||
{ \ |
|||
double r, den, ar, ai, br, bi ; \ |
|||
br = (b).Real ; \ |
|||
bi = (b).Imag ; \ |
|||
ar = (a).Real ; \ |
|||
ai = (a).Imag ; \ |
|||
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \ |
|||
{ \ |
|||
r = bi / br ; \ |
|||
den = br + r * bi ; \ |
|||
(c).Real = (ar + ai * r) / den ; \ |
|||
(c).Imag = (ai - ar * r) / den ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
r = br / bi ; \ |
|||
den = r * br + bi ; \ |
|||
(c).Real = (ar * r + ai) / den ; \ |
|||
(c).Imag = (ai * r - ar) / den ; \ |
|||
} \ |
|||
} |
|||
#endif |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c = 1/c, be careful to avoid underflow and overflow */ |
|||
/* Not used if MATHWORKS is defined. */ |
|||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */ |
|||
/* Ignore NaN case for double relop cr>=ci. */ |
|||
#define RECIPROCAL(c) \ |
|||
{ \ |
|||
double r, den, cr, ci ; \ |
|||
cr = (c).Real ; \ |
|||
ci = (c).Imag ; \ |
|||
if (SCALAR_ABS (cr) >= SCALAR_ABS (ci)) \ |
|||
{ \ |
|||
r = ci / cr ; \ |
|||
den = cr + r * ci ; \ |
|||
(c).Real = 1.0 / den ; \ |
|||
(c).Imag = - r / den ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
r = cr / ci ; \ |
|||
den = r * cr + ci ; \ |
|||
(c).Real = r / den ; \ |
|||
(c).Imag = - 1.0 / den ; \ |
|||
} \ |
|||
} |
|||
|
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* c = a/conjugate(b), be careful to avoid underflow and overflow */ |
|||
#ifdef MATHWORKS |
|||
#define DIV_CONJ(c,a,b) \ |
|||
{ \ |
|||
(void) utDivideComplex ((a).Real, (a).Imag, (b).Real, (-(b).Imag), \ |
|||
&((c).Real), &((c).Imag)) ; \ |
|||
} |
|||
#else |
|||
/* This uses ACM Algo 116, by R. L. Smith, 1962. */ |
|||
/* c can be the same variable as a or b. */ |
|||
/* Ignore NaN case for double relop br>=bi. */ |
|||
#define DIV_CONJ(c,a,b) \ |
|||
{ \ |
|||
double r, den, ar, ai, br, bi ; \ |
|||
br = (b).Real ; \ |
|||
bi = (b).Imag ; \ |
|||
ar = (a).Real ; \ |
|||
ai = (a).Imag ; \ |
|||
if (SCALAR_ABS (br) >= SCALAR_ABS (bi)) \ |
|||
{ \ |
|||
r = (-bi) / br ; \ |
|||
den = br - r * bi ; \ |
|||
(c).Real = (ar + ai * r) / den ; \ |
|||
(c).Imag = (ai - ar * r) / den ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
r = br / (-bi) ; \ |
|||
den = r * br - bi; \ |
|||
(c).Real = (ar * r + ai) / den ; \ |
|||
(c).Imag = (ai * r - ar) / den ; \ |
|||
} \ |
|||
} |
|||
#endif |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* approximate absolute value, s = |r|+|i| */ |
|||
#define APPROX_ABS(s,a) \ |
|||
{ \ |
|||
(s) = SCALAR_ABS ((a).Real) + SCALAR_ABS ((a).Imag) ; \ |
|||
} |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* exact absolute value, s = sqrt (a.real^2 + amag^2) */ |
|||
#ifdef MATHWORKS |
|||
#define ABS(s,a) \ |
|||
{ \ |
|||
(s) = utFdlibm_hypot ((a).Real, (a).Imag) ; \ |
|||
} |
|||
#else |
|||
/* Ignore NaN case for the double relops ar>=ai and ar+ai==ar. */ |
|||
#define ABS(s,a) \ |
|||
{ \ |
|||
double r, ar, ai ; \ |
|||
ar = SCALAR_ABS ((a).Real) ; \ |
|||
ai = SCALAR_ABS ((a).Imag) ; \ |
|||
if (ar >= ai) \ |
|||
{ \ |
|||
if (ar + ai == ar) \ |
|||
{ \ |
|||
(s) = ar ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
r = ai / ar ; \ |
|||
(s) = ar * sqrt (1.0 + r*r) ; \ |
|||
} \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
if (ai + ar == ai) \ |
|||
{ \ |
|||
(s) = ai ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
r = ar / ai ; \ |
|||
(s) = ai * sqrt (1.0 + r*r) ; \ |
|||
} \ |
|||
} \ |
|||
} |
|||
#endif |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
/* print an entry (avoid printing "-0" for negative zero). */ |
|||
#ifdef NPRINT |
|||
#define PRINT_ENTRY(a) |
|||
#else |
|||
#define PRINT_ENTRY(a) \ |
|||
{ \ |
|||
if (SCALAR_IS_NONZERO ((a).Real)) \ |
|||
{ \ |
|||
PRINTF ((" (%g", (a).Real)) ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
PRINTF ((" (0")) ; \ |
|||
} \ |
|||
if (SCALAR_IS_LTZERO ((a).Imag)) \ |
|||
{ \ |
|||
PRINTF ((" - %gi)", -(a).Imag)) ; \ |
|||
} \ |
|||
else if (SCALAR_IS_ZERO ((a).Imag)) \ |
|||
{ \ |
|||
PRINTF ((" + 0i)")) ; \ |
|||
} \ |
|||
else \ |
|||
{ \ |
|||
PRINTF ((" + %gi)", (a).Imag)) ; \ |
|||
} \ |
|||
} |
|||
#endif |
|||
|
|||
/* -------------------------------------------------------------------------- */ |
|||
|
|||
#endif /* #ifndef COMPLEX */ |
|||
|
|||
#endif |
|||
Write
Preview
Loading…
Cancel
Save
Reference in new issue