Browse Source
convert float to double
convert float to double
9 changed files with 3548 additions and 3501 deletions
-
5ChangeLog
-
40src/frontend/com_fft.c
-
239src/maths/fft/fftext.c
-
14src/maths/fft/fftext.h
-
6162src/maths/fft/fftlib.c
-
12src/maths/fft/fftlib.h
-
569src/maths/fft/matlib.c
-
6src/maths/fft/matlib.h
-
2src/winmain.c
6162
src/maths/fft/fftlib.c
File diff suppressed because it is too large
View File
File diff suppressed because it is too large
View File
@ -1,297 +1,300 @@ |
|||||
/* a few routines from a vector/matrix library */ |
/* a few routines from a vector/matrix library */ |
||||
#include "matlib.h" |
#include "matlib.h" |
||||
|
|
||||
void xpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){ |
|
||||
/* not in-place matrix transpose */ |
|
||||
/* INPUTS */ |
|
||||
/* *indata = input data array */ |
|
||||
/* iRsiz = offset to between rows of input data array */ |
|
||||
/* oRsiz = offset to between rows of output data array */ |
|
||||
/* Nrows = number of rows in input data array */ |
|
||||
/* Ncols = number of columns in input data array */ |
|
||||
/* OUTPUTS */ |
|
||||
/* *outdata = output data array */ |
|
||||
|
void xpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols) |
||||
|
{ |
||||
|
/* not in-place matrix transpose */ |
||||
|
/* INPUTS */ |
||||
|
/* *indata = input data array */ |
||||
|
/* iRsiz = offset to between rows of input data array */ |
||||
|
/* oRsiz = offset to between rows of output data array */ |
||||
|
/* Nrows = number of rows in input data array */ |
||||
|
/* Ncols = number of columns in input data array */ |
||||
|
/* OUTPUTS */ |
||||
|
/* *outdata = output data array */ |
||||
|
|
||||
float *irow; /* pointer to input row start */ |
|
||||
float *ocol; /* pointer to output col start */ |
|
||||
float *idata; /* pointer to input data */ |
|
||||
float *odata; /* pointer to output data */ |
|
||||
long RowCnt; /* row counter */ |
|
||||
long ColCnt; /* col counter */ |
|
||||
float T0; /* data storage */ |
|
||||
float T1; /* data storage */ |
|
||||
float T2; /* data storage */ |
|
||||
float T3; /* data storage */ |
|
||||
float T4; /* data storage */ |
|
||||
float T5; /* data storage */ |
|
||||
float T6; /* data storage */ |
|
||||
float T7; /* data storage */ |
|
||||
const long inRsizd1 = iRsiz; |
|
||||
const long inRsizd2 = 2*iRsiz; |
|
||||
const long inRsizd3 = inRsizd2+iRsiz; |
|
||||
const long inRsizd4 = 4*iRsiz; |
|
||||
const long inRsizd5 = inRsizd3+inRsizd2; |
|
||||
const long inRsizd6 = inRsizd4+inRsizd2; |
|
||||
const long inRsizd7 = inRsizd4+inRsizd3; |
|
||||
const long inRsizd8 = 8*iRsiz; |
|
||||
|
double *irow; /* pointer to input row start */ |
||||
|
double *ocol; /* pointer to output col start */ |
||||
|
double *idata; /* pointer to input data */ |
||||
|
double *odata; /* pointer to output data */ |
||||
|
long RowCnt; /* row counter */ |
||||
|
long ColCnt; /* col counter */ |
||||
|
double T0; /* data storage */ |
||||
|
double T1; /* data storage */ |
||||
|
double T2; /* data storage */ |
||||
|
double T3; /* data storage */ |
||||
|
double T4; /* data storage */ |
||||
|
double T5; /* data storage */ |
||||
|
double T6; /* data storage */ |
||||
|
double T7; /* data storage */ |
||||
|
const long inRsizd1 = iRsiz; |
||||
|
const long inRsizd2 = 2*iRsiz; |
||||
|
const long inRsizd3 = inRsizd2+iRsiz; |
||||
|
const long inRsizd4 = 4*iRsiz; |
||||
|
const long inRsizd5 = inRsizd3+inRsizd2; |
||||
|
const long inRsizd6 = inRsizd4+inRsizd2; |
||||
|
const long inRsizd7 = inRsizd4+inRsizd3; |
||||
|
const long inRsizd8 = 8*iRsiz; |
||||
|
|
||||
ocol = outdata; |
|
||||
irow = indata; |
|
||||
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--){ |
|
||||
idata = irow; |
|
||||
odata = ocol; |
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){ |
|
||||
T0 = *idata; |
|
||||
T1 = *(idata+inRsizd1); |
|
||||
T2 = *(idata+inRsizd2); |
|
||||
T3 = *(idata+inRsizd3); |
|
||||
T4 = *(idata+inRsizd4); |
|
||||
T5 = *(idata+inRsizd5); |
|
||||
T6 = *(idata+inRsizd6); |
|
||||
T7 = *(idata+inRsizd7); |
|
||||
*odata = T0; |
|
||||
*(odata+1) = T1; |
|
||||
*(odata+2) = T2; |
|
||||
*(odata+3) = T3; |
|
||||
*(odata+4) = T4; |
|
||||
*(odata+5) = T5; |
|
||||
*(odata+6) = T6; |
|
||||
*(odata+7) = T7; |
|
||||
idata++; |
|
||||
odata += oRsiz; |
|
||||
} |
|
||||
irow += inRsizd8; |
|
||||
ocol += 8; |
|
||||
} |
|
||||
if (Nrows%8 != 0){ |
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){ |
|
||||
idata = irow++; |
|
||||
odata = ocol; |
|
||||
ocol += oRsiz; |
|
||||
for (RowCnt=Nrows%8; RowCnt>0; RowCnt--){ |
|
||||
T0 = *idata; |
|
||||
*odata++ = T0; |
|
||||
idata += iRsiz; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
ocol = outdata; |
||||
|
irow = indata; |
||||
|
for (RowCnt=Nrows/8; RowCnt>0; RowCnt--) { |
||||
|
idata = irow; |
||||
|
odata = ocol; |
||||
|
for (ColCnt=Ncols; ColCnt>0; ColCnt--) { |
||||
|
T0 = *idata; |
||||
|
T1 = *(idata+inRsizd1); |
||||
|
T2 = *(idata+inRsizd2); |
||||
|
T3 = *(idata+inRsizd3); |
||||
|
T4 = *(idata+inRsizd4); |
||||
|
T5 = *(idata+inRsizd5); |
||||
|
T6 = *(idata+inRsizd6); |
||||
|
T7 = *(idata+inRsizd7); |
||||
|
*odata = T0; |
||||
|
*(odata+1) = T1; |
||||
|
*(odata+2) = T2; |
||||
|
*(odata+3) = T3; |
||||
|
*(odata+4) = T4; |
||||
|
*(odata+5) = T5; |
||||
|
*(odata+6) = T6; |
||||
|
*(odata+7) = T7; |
||||
|
idata++; |
||||
|
odata += oRsiz; |
||||
|
} |
||||
|
irow += inRsizd8; |
||||
|
ocol += 8; |
||||
|
} |
||||
|
if (Nrows%8 != 0) { |
||||
|
for (ColCnt=Ncols; ColCnt>0; ColCnt--) { |
||||
|
idata = irow++; |
||||
|
odata = ocol; |
||||
|
ocol += oRsiz; |
||||
|
for (RowCnt=Nrows%8; RowCnt>0; RowCnt--) { |
||||
|
T0 = *idata; |
||||
|
*odata++ = T0; |
||||
|
idata += iRsiz; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
} |
} |
||||
|
|
||||
void cxpose(float *indata, long iRsiz, float *outdata, long oRsiz, long Nrows, long Ncols){ |
|
||||
/* not in-place complex float matrix transpose */ |
|
||||
/* INPUTS */ |
|
||||
/* *indata = input data array */ |
|
||||
/* iRsiz = offset to between rows of input data array */ |
|
||||
/* oRsiz = offset to between rows of output data array */ |
|
||||
/* Nrows = number of rows in input data array */ |
|
||||
/* Ncols = number of columns in input data array */ |
|
||||
/* OUTPUTS */ |
|
||||
/* *outdata = output data array */ |
|
||||
|
void cxpose(double *indata, long iRsiz, double *outdata, long oRsiz, long Nrows, long Ncols) |
||||
|
{ |
||||
|
/* not in-place complex double matrix transpose */ |
||||
|
/* INPUTS */ |
||||
|
/* *indata = input data array */ |
||||
|
/* iRsiz = offset to between rows of input data array */ |
||||
|
/* oRsiz = offset to between rows of output data array */ |
||||
|
/* Nrows = number of rows in input data array */ |
||||
|
/* Ncols = number of columns in input data array */ |
||||
|
/* OUTPUTS */ |
||||
|
/* *outdata = output data array */ |
||||
|
|
||||
float *irow; /* pointer to input row start */ |
|
||||
float *ocol; /* pointer to output col start */ |
|
||||
float *idata; /* pointer to input data */ |
|
||||
float *odata; /* pointer to output data */ |
|
||||
long RowCnt; /* row counter */ |
|
||||
long ColCnt; /* col counter */ |
|
||||
float T0r; /* data storage */ |
|
||||
float T0i; /* data storage */ |
|
||||
float T1r; /* data storage */ |
|
||||
float T1i; /* data storage */ |
|
||||
float T2r; /* data storage */ |
|
||||
float T2i; /* data storage */ |
|
||||
float T3r; /* data storage */ |
|
||||
float T3i; /* data storage */ |
|
||||
const long inRsizd1 = 2*iRsiz; |
|
||||
const long inRsizd1i = 2*iRsiz + 1; |
|
||||
const long inRsizd2 = 4*iRsiz; |
|
||||
const long inRsizd2i = 4*iRsiz + 1; |
|
||||
const long inRsizd3 = inRsizd2+inRsizd1; |
|
||||
const long inRsizd3i = inRsizd2+inRsizd1 + 1; |
|
||||
const long inRsizd4 = 8*iRsiz; |
|
||||
|
double *irow; /* pointer to input row start */ |
||||
|
double *ocol; /* pointer to output col start */ |
||||
|
double *idata; /* pointer to input data */ |
||||
|
double *odata; /* pointer to output data */ |
||||
|
long RowCnt; /* row counter */ |
||||
|
long ColCnt; /* col counter */ |
||||
|
double T0r; /* data storage */ |
||||
|
double T0i; /* data storage */ |
||||
|
double T1r; /* data storage */ |
||||
|
double T1i; /* data storage */ |
||||
|
double T2r; /* data storage */ |
||||
|
double T2i; /* data storage */ |
||||
|
double T3r; /* data storage */ |
||||
|
double T3i; /* data storage */ |
||||
|
const long inRsizd1 = 2*iRsiz; |
||||
|
const long inRsizd1i = 2*iRsiz + 1; |
||||
|
const long inRsizd2 = 4*iRsiz; |
||||
|
const long inRsizd2i = 4*iRsiz + 1; |
||||
|
const long inRsizd3 = inRsizd2+inRsizd1; |
||||
|
const long inRsizd3i = inRsizd2+inRsizd1 + 1; |
||||
|
const long inRsizd4 = 8*iRsiz; |
||||
|
|
||||
ocol = outdata; |
|
||||
irow = indata; |
|
||||
for (RowCnt=Nrows/4; RowCnt>0; RowCnt--){ |
|
||||
idata = irow; |
|
||||
odata = ocol; |
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){ |
|
||||
T0r = *idata; |
|
||||
T0i = *(idata +1); |
|
||||
T1r = *(idata+inRsizd1); |
|
||||
T1i = *(idata+inRsizd1i); |
|
||||
T2r = *(idata+inRsizd2); |
|
||||
T2i = *(idata+inRsizd2i); |
|
||||
T3r = *(idata+inRsizd3); |
|
||||
T3i = *(idata+inRsizd3i); |
|
||||
*odata = T0r; |
|
||||
*(odata+1) = T0i; |
|
||||
*(odata+2) = T1r; |
|
||||
*(odata+3) = T1i; |
|
||||
*(odata+4) = T2r; |
|
||||
*(odata+5) = T2i; |
|
||||
*(odata+6) = T3r; |
|
||||
*(odata+7) = T3i; |
|
||||
idata+=2; |
|
||||
odata += 2*oRsiz; |
|
||||
} |
|
||||
irow += inRsizd4; |
|
||||
ocol += 8; |
|
||||
} |
|
||||
if (Nrows%4 != 0){ |
|
||||
for (ColCnt=Ncols; ColCnt>0; ColCnt--){ |
|
||||
idata = irow; |
|
||||
odata = ocol; |
|
||||
for (RowCnt=Nrows%4; RowCnt>0; RowCnt--){ |
|
||||
T0r = *idata; |
|
||||
T0i = *(idata+1); |
|
||||
*odata = T0r; |
|
||||
*(odata+1) = T0i; |
|
||||
odata+=2; |
|
||||
idata += 2*iRsiz; |
|
||||
} |
|
||||
irow+=2; |
|
||||
ocol += 2*oRsiz; |
|
||||
} |
|
||||
} |
|
||||
|
ocol = outdata; |
||||
|
irow = indata; |
||||
|
for (RowCnt=Nrows/4; RowCnt>0; RowCnt--) { |
||||
|
idata = irow; |
||||
|
odata = ocol; |
||||
|
for (ColCnt=Ncols; ColCnt>0; ColCnt--) { |
||||
|
T0r = *idata; |
||||
|
T0i = *(idata +1); |
||||
|
T1r = *(idata+inRsizd1); |
||||
|
T1i = *(idata+inRsizd1i); |
||||
|
T2r = *(idata+inRsizd2); |
||||
|
T2i = *(idata+inRsizd2i); |
||||
|
T3r = *(idata+inRsizd3); |
||||
|
T3i = *(idata+inRsizd3i); |
||||
|
*odata = T0r; |
||||
|
*(odata+1) = T0i; |
||||
|
*(odata+2) = T1r; |
||||
|
*(odata+3) = T1i; |
||||
|
*(odata+4) = T2r; |
||||
|
*(odata+5) = T2i; |
||||
|
*(odata+6) = T3r; |
||||
|
*(odata+7) = T3i; |
||||
|
idata+=2; |
||||
|
odata += 2*oRsiz; |
||||
|
} |
||||
|
irow += inRsizd4; |
||||
|
ocol += 8; |
||||
|
} |
||||
|
if (Nrows%4 != 0) { |
||||
|
for (ColCnt=Ncols; ColCnt>0; ColCnt--) { |
||||
|
idata = irow; |
||||
|
odata = ocol; |
||||
|
for (RowCnt=Nrows%4; RowCnt>0; RowCnt--) { |
||||
|
T0r = *idata; |
||||
|
T0i = *(idata+1); |
||||
|
*odata = T0r; |
||||
|
*(odata+1) = T0i; |
||||
|
odata+=2; |
||||
|
idata += 2*iRsiz; |
||||
|
} |
||||
|
irow+=2; |
||||
|
ocol += 2*oRsiz; |
||||
|
} |
||||
|
} |
||||
} |
} |
||||
|
|
||||
void cvprod(float *a, float *b, float *out, long N){ |
|
||||
/* complex vector product, can be in-place */ |
|
||||
/* product of complex vector *a times complex vector *b */ |
|
||||
/* INPUTS */ |
|
||||
/* N vector length */ |
|
||||
/* *a complex vector length N complex numbers */ |
|
||||
/* *b complex vector length N complex numbers */ |
|
||||
/* OUTPUTS */ |
|
||||
/* *out complex vector length N */ |
|
||||
|
void cvprod(double *a, double *b, double *out, long N) |
||||
|
{ |
||||
|
/* complex vector product, can be in-place */ |
||||
|
/* product of complex vector *a times complex vector *b */ |
||||
|
/* INPUTS */ |
||||
|
/* N vector length */ |
||||
|
/* *a complex vector length N complex numbers */ |
||||
|
/* *b complex vector length N complex numbers */ |
||||
|
/* OUTPUTS */ |
||||
|
/* *out complex vector length N */ |
||||
|
|
||||
long OutCnt; /* output counter */ |
|
||||
float A0R; /* A storage */ |
|
||||
float A0I; /* A storage */ |
|
||||
float A1R; /* A storage */ |
|
||||
float A1I; /* A storage */ |
|
||||
float A2R; /* A storage */ |
|
||||
float A2I; /* A storage */ |
|
||||
float A3R; /* A storage */ |
|
||||
float A3I; /* A storage */ |
|
||||
float B0R; /* B storage */ |
|
||||
float B0I; /* B storage */ |
|
||||
float B1R; /* B storage */ |
|
||||
float B1I; /* B storage */ |
|
||||
float B2R; /* B storage */ |
|
||||
float B2I; /* B storage */ |
|
||||
float B3R; /* B storage */ |
|
||||
float B3I; /* B storage */ |
|
||||
float T0R; /* TMP storage */ |
|
||||
float T0I; /* TMP storage */ |
|
||||
float T1R; /* TMP storage */ |
|
||||
float T1I; /* TMP storage */ |
|
||||
float T2R; /* TMP storage */ |
|
||||
float T2I; /* TMP storage */ |
|
||||
float T3R; /* TMP storage */ |
|
||||
float T3I; /* TMP storage */ |
|
||||
|
long OutCnt; /* output counter */ |
||||
|
double A0R; /* A storage */ |
||||
|
double A0I; /* A storage */ |
||||
|
double A1R; /* A storage */ |
||||
|
double A1I; /* A storage */ |
||||
|
double A2R; /* A storage */ |
||||
|
double A2I; /* A storage */ |
||||
|
double A3R; /* A storage */ |
||||
|
double A3I; /* A storage */ |
||||
|
double B0R; /* B storage */ |
||||
|
double B0I; /* B storage */ |
||||
|
double B1R; /* B storage */ |
||||
|
double B1I; /* B storage */ |
||||
|
double B2R; /* B storage */ |
||||
|
double B2I; /* B storage */ |
||||
|
double B3R; /* B storage */ |
||||
|
double B3I; /* B storage */ |
||||
|
double T0R; /* TMP storage */ |
||||
|
double T0I; /* TMP storage */ |
||||
|
double T1R; /* TMP storage */ |
||||
|
double T1I; /* TMP storage */ |
||||
|
double T2R; /* TMP storage */ |
||||
|
double T2I; /* TMP storage */ |
||||
|
double T3R; /* TMP storage */ |
||||
|
double T3I; /* TMP storage */ |
||||
|
|
||||
if (N>=4){ |
|
||||
A0R = *a; |
|
||||
B0R = *b; |
|
||||
A0I = *(a +1); |
|
||||
B0I = *(b +1); |
|
||||
A1R = *(a +2); |
|
||||
B1R = *(b +2); |
|
||||
A1I = *(a +3); |
|
||||
B1I = *(b +3); |
|
||||
A2R = *(a +4); |
|
||||
B2R = *(b +4); |
|
||||
A2I = *(a +5); |
|
||||
B2I = *(b +5); |
|
||||
A3R = *(a +6); |
|
||||
B3R = *(b +6); |
|
||||
A3I = *(a +7); |
|
||||
B3I = *(b +7); |
|
||||
T0R = A0R * B0R; |
|
||||
T0I = (A0R * B0I); |
|
||||
T1R = A1R * B1R; |
|
||||
T1I = (A1R * B1I); |
|
||||
T2R = A2R * B2R; |
|
||||
T2I = (A2R * B2I); |
|
||||
T3R = A3R * B3R; |
|
||||
T3I = (A3R * B3I); |
|
||||
T0R -= (A0I * B0I); |
|
||||
T0I = A0I * B0R + T0I; |
|
||||
T1R -= (A1I * B1I); |
|
||||
T1I = A1I * B1R + T1I; |
|
||||
T2R -= (A2I * B2I); |
|
||||
T2I = A2I * B2R + T2I; |
|
||||
T3R -= (A3I * B3I); |
|
||||
T3I = A3I * B3R + T3I; |
|
||||
for (OutCnt=N/4-1; OutCnt > 0; OutCnt--){ |
|
||||
a += 8; |
|
||||
b += 8; |
|
||||
A0R = *a; |
|
||||
B0R = *b; |
|
||||
A0I = *(a +1); |
|
||||
B0I = *(b +1); |
|
||||
A1R = *(a +2); |
|
||||
B1R = *(b +2); |
|
||||
A1I = *(a +3); |
|
||||
B1I = *(b +3); |
|
||||
A2R = *(a +4); |
|
||||
B2R = *(b +4); |
|
||||
A2I = *(a +5); |
|
||||
B2I = *(b +5); |
|
||||
A3R = *(a +6); |
|
||||
B3R = *(b +6); |
|
||||
A3I = *(a +7); |
|
||||
B3I = *(b +7); |
|
||||
*out = T0R; |
|
||||
*(out +1) = T0I; |
|
||||
*(out +2) = T1R; |
|
||||
*(out +3) = T1I; |
|
||||
*(out +4) = T2R; |
|
||||
*(out +5) = T2I; |
|
||||
*(out +6) = T3R; |
|
||||
*(out +7) = T3I; |
|
||||
T0R = A0R * B0R; |
|
||||
T0I = (A0R * B0I); |
|
||||
T1R = A1R * B1R; |
|
||||
T1I = (A1R * B1I); |
|
||||
T2R = A2R * B2R; |
|
||||
T2I = (A2R * B2I); |
|
||||
T3R = A3R * B3R; |
|
||||
T3I = (A3R * B3I); |
|
||||
T0R -= (A0I * B0I); |
|
||||
T0I = A0I * B0R + T0I; |
|
||||
T1R -= (A1I * B1I); |
|
||||
T1I = A1I * B1R + T1I; |
|
||||
T2R -= (A2I * B2I); |
|
||||
T2I = A2I * B2R + T2I; |
|
||||
T3R -= (A3I * B3I); |
|
||||
T3I = A3I * B3R + T3I; |
|
||||
out += 8; |
|
||||
} |
|
||||
a += 8; |
|
||||
b += 8; |
|
||||
*out = T0R; |
|
||||
*(out +1) = T0I; |
|
||||
*(out +2) = T1R; |
|
||||
*(out +3) = T1I; |
|
||||
*(out +4) = T2R; |
|
||||
*(out +5) = T2I; |
|
||||
*(out +6) = T3R; |
|
||||
*(out +7) = T3I; |
|
||||
out += 8; |
|
||||
} |
|
||||
for (OutCnt=N%4; OutCnt > 0; OutCnt--){ |
|
||||
A0R = *a++; |
|
||||
B0R = *b++; |
|
||||
A0I = *a++; |
|
||||
B0I = *b++; |
|
||||
T0R = A0R * B0R; |
|
||||
T0I = (A0R * B0I); |
|
||||
T0R -= (A0I * B0I); |
|
||||
T0I = A0I * B0R + T0I; |
|
||||
*out++ = T0R; |
|
||||
*out++ = T0I; |
|
||||
} |
|
||||
|
if (N>=4) { |
||||
|
A0R = *a; |
||||
|
B0R = *b; |
||||
|
A0I = *(a +1); |
||||
|
B0I = *(b +1); |
||||
|
A1R = *(a +2); |
||||
|
B1R = *(b +2); |
||||
|
A1I = *(a +3); |
||||
|
B1I = *(b +3); |
||||
|
A2R = *(a +4); |
||||
|
B2R = *(b +4); |
||||
|
A2I = *(a +5); |
||||
|
B2I = *(b +5); |
||||
|
A3R = *(a +6); |
||||
|
B3R = *(b +6); |
||||
|
A3I = *(a +7); |
||||
|
B3I = *(b +7); |
||||
|
T0R = A0R * B0R; |
||||
|
T0I = (A0R * B0I); |
||||
|
T1R = A1R * B1R; |
||||
|
T1I = (A1R * B1I); |
||||
|
T2R = A2R * B2R; |
||||
|
T2I = (A2R * B2I); |
||||
|
T3R = A3R * B3R; |
||||
|
T3I = (A3R * B3I); |
||||
|
T0R -= (A0I * B0I); |
||||
|
T0I = A0I * B0R + T0I; |
||||
|
T1R -= (A1I * B1I); |
||||
|
T1I = A1I * B1R + T1I; |
||||
|
T2R -= (A2I * B2I); |
||||
|
T2I = A2I * B2R + T2I; |
||||
|
T3R -= (A3I * B3I); |
||||
|
T3I = A3I * B3R + T3I; |
||||
|
for (OutCnt=N/4-1; OutCnt > 0; OutCnt--) { |
||||
|
a += 8; |
||||
|
b += 8; |
||||
|
A0R = *a; |
||||
|
B0R = *b; |
||||
|
A0I = *(a +1); |
||||
|
B0I = *(b +1); |
||||
|
A1R = *(a +2); |
||||
|
B1R = *(b +2); |
||||
|
A1I = *(a +3); |
||||
|
B1I = *(b +3); |
||||
|
A2R = *(a +4); |
||||
|
B2R = *(b +4); |
||||
|
A2I = *(a +5); |
||||
|
B2I = *(b +5); |
||||
|
A3R = *(a +6); |
||||
|
B3R = *(b +6); |
||||
|
A3I = *(a +7); |
||||
|
B3I = *(b +7); |
||||
|
*out = T0R; |
||||
|
*(out +1) = T0I; |
||||
|
*(out +2) = T1R; |
||||
|
*(out +3) = T1I; |
||||
|
*(out +4) = T2R; |
||||
|
*(out +5) = T2I; |
||||
|
*(out +6) = T3R; |
||||
|
*(out +7) = T3I; |
||||
|
T0R = A0R * B0R; |
||||
|
T0I = (A0R * B0I); |
||||
|
T1R = A1R * B1R; |
||||
|
T1I = (A1R * B1I); |
||||
|
T2R = A2R * B2R; |
||||
|
T2I = (A2R * B2I); |
||||
|
T3R = A3R * B3R; |
||||
|
T3I = (A3R * B3I); |
||||
|
T0R -= (A0I * B0I); |
||||
|
T0I = A0I * B0R + T0I; |
||||
|
T1R -= (A1I * B1I); |
||||
|
T1I = A1I * B1R + T1I; |
||||
|
T2R -= (A2I * B2I); |
||||
|
T2I = A2I * B2R + T2I; |
||||
|
T3R -= (A3I * B3I); |
||||
|
T3I = A3I * B3R + T3I; |
||||
|
out += 8; |
||||
|
} |
||||
|
a += 8; |
||||
|
b += 8; |
||||
|
*out = T0R; |
||||
|
*(out +1) = T0I; |
||||
|
*(out +2) = T1R; |
||||
|
*(out +3) = T1I; |
||||
|
*(out +4) = T2R; |
||||
|
*(out +5) = T2I; |
||||
|
*(out +6) = T3R; |
||||
|
*(out +7) = T3I; |
||||
|
out += 8; |
||||
|
} |
||||
|
for (OutCnt=N%4; OutCnt > 0; OutCnt--) { |
||||
|
A0R = *a++; |
||||
|
B0R = *b++; |
||||
|
A0I = *a++; |
||||
|
B0I = *b++; |
||||
|
T0R = A0R * B0R; |
||||
|
T0I = (A0R * B0I); |
||||
|
T0R -= (A0I * B0I); |
||||
|
T0I = A0I * B0R + T0I; |
||||
|
*out++ = T0R; |
||||
|
*out++ = T0I; |
||||
|
} |
||||
} |
} |
||||
Write
Preview
Loading…
Cancel
Save
Reference in new issue