62 #define NFSFT_DEFAULT_NFFT_CUTOFF 6
69 #define NFSFT_DEFAULT_THRESHOLD 1000
76 #define NFSFT_BREAK_EVEN 5
108 static inline void c2e(nfsft_plan *plan)
112 double _Complex last;
122 memset(plan->f_hat_intern,0U,(2*plan->N+2)*
sizeof(
double _Complex));
125 lowe = -plan->N + (plan->N%2);
129 for (n = lowe; n <= upe; n += 2)
133 xm = &(plan->f_hat_intern[NFSFT_INDEX(-1,n,plan)]);
134 xp = &(plan->f_hat_intern[NFSFT_INDEX(+1,n,plan)]);
135 for(k = 1; k <= plan->N; k++)
146 low = -plan->N + (1-plan->N%2);
151 for (n = low; n <= up; n += 2)
156 plan->f_hat_intern[NFSFT_INDEX(0,n,plan)] *= 2.0;
157 xp = &(plan->f_hat_intern[NFSFT_INDEX(-plan->N-1,n,plan)]);
161 xm = &(plan->f_hat_intern[NFSFT_INDEX(plan->N,n,plan)]);
163 *xm = 0.5 * _Complex_I * (0.5*xm[-1]);
165 for (k = plan->N-1; k > 0; k--)
168 *xm = 0.5 * _Complex_I * (0.5*(xm[-1] - last));
190 double _Complex last;
200 lowe = -plan->N + (plan->N%2);
204 for (n = lowe; n <= upe; n += 2)
208 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
209 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
210 for(k = 1; k <= plan->N; k++)
218 low = -plan->N + (1-plan->N%2);
222 for (n = low; n <= up; n += 2)
226 xm = &(plan->f_hat[NFSFT_INDEX(-1,n,plan)]);
227 xp = &(plan->f_hat[NFSFT_INDEX(+1,n,plan)]);
228 for(k = 1; k <= plan->N; k++)
233 plan->f_hat[NFSFT_INDEX(0,n,plan)] =
234 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(1,n,plan)];
235 last = plan->f_hat[NFSFT_INDEX(1,n,plan)];
236 plan->f_hat[NFSFT_INDEX(1,n,plan)] =
237 -0.25*_Complex_I*plan->f_hat[NFSFT_INDEX(2,n,plan)];
239 xp = &(plan->f_hat[NFSFT_INDEX(2,n,plan)]);
240 for (k = 2; k < plan->N; k++)
243 *xp = -0.25 * _Complex_I * (xp[1] - last);
247 *xp = 0.25 * _Complex_I * last;
249 plan->f_hat[NFSFT_INDEX(0,n,plan)] *= 2.0;
253 void nfsft_init(nfsft_plan *plan,
int N,
int M)
256 nfsft_init_advanced(plan, N, M, NFSFT_MALLOC_X | NFSFT_MALLOC_F |
260 void nfsft_init_advanced(nfsft_plan* plan,
int N,
int M,
264 nfsft_init_guru(plan, N, M, flags, PRE_PHI_HUT | PRE_PSI | FFTW_INIT | NFFT_OMP_BLOCKWISE_ADJOINT |
268 void nfsft_init_guru(nfsft_plan *plan,
int N,
int M,
unsigned int flags,
269 unsigned int nfft_flags,
int nfft_cutoff)
287 plan->N_total = (2*plan->N+2)*(2*plan->N+2);
291 if (plan->flags & NFSFT_PRESERVE_F_HAT)
293 plan->f_hat_intern = (
double _Complex*) nfft_malloc(plan->N_total*
294 sizeof(
double _Complex));
298 if (plan->flags & NFSFT_MALLOC_F_HAT)
300 plan->f_hat = (
double _Complex*) nfft_malloc(plan->N_total*
301 sizeof(
double _Complex));
305 if (plan->flags & NFSFT_MALLOC_F)
307 plan->f = (
double _Complex*) nfft_malloc(plan->M_total*
sizeof(
double _Complex));
311 if (plan->flags & NFSFT_MALLOC_X)
313 plan->x = (
double*) nfft_malloc(plan->M_total*2*
sizeof(
double));
317 if (plan->flags & NFSFT_NO_FAST_ALGORITHM)
322 nfft_size = (
int*)nfft_malloc(2*
sizeof(
int));
323 fftw_size = (
int*)nfft_malloc(2*
sizeof(
int));
326 nfft_size[0] = 2*plan->N+2;
327 nfft_size[1] = 2*plan->N+2;
328 fftw_size[0] = 4*plan->N;
329 fftw_size[1] = 4*plan->N;
332 nfft_init_guru(&plan->plan_nfft, 2, nfft_size, plan->M_total, fftw_size,
333 nfft_cutoff, nfft_flags,
334 FFTW_ESTIMATE | FFTW_DESTROY_INPUT);
337 plan->plan_nfft.x = plan->x;
339 plan->plan_nfft.f = plan->f;
341 plan->plan_nfft.f_hat = plan->f_hat;
349 nfft_free(nfft_size);
350 nfft_free(fftw_size);
353 plan->mv_trafo = (void (*) (
void* ))nfsft_trafo;
354 plan->mv_adjoint = (void (*) (
void* ))nfsft_adjoint;
357 void nfsft_precompute(
int N,
double kappa,
unsigned int nfsft_flags,
358 unsigned int fpt_flags)
369 #pragma omp parallel default(shared)
371 int nthreads = omp_get_num_threads();
372 int threadid = omp_get_thread_num();
375 wisdom.nthreads = nthreads;
381 wisdom.flags = nfsft_flags;
388 if (
wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
412 if (
wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
423 #pragma omp parallel default(shared) private(n)
425 int nthreads = omp_get_num_threads();
426 int threadid = omp_get_thread_num();
429 wisdom.nthreads = nthreads;
430 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*
sizeof(fpt_set));
434 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
444 fpt_flags | FPT_AL_SYMMETRY | FPT_PERSISTENT_DATA);
458 #pragma omp parallel default(shared) private(n)
461 int nthreads = omp_get_num_threads();
462 int threadid = omp_get_thread_num();
465 wisdom.nthreads = nthreads;
466 wisdom.set_threads = (fpt_set*) nfft_malloc(nthreads*
sizeof(fpt_set));
473 fpt_flags | FPT_AL_SYMMETRY);
496 fpt_flags | FPT_AL_SYMMETRY);
526 void nfsft_forget(
void)
536 if (
wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
551 if (
wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
558 for (k = 0; k <
wisdom.nthreads; k++)
559 fpt_finalize(
wisdom.set_threads[k]);
560 nfft_free(
wisdom.set_threads);
572 void nfsft_finalize(nfsft_plan *plan)
578 nfft_finalize(&plan->plan_nfft);
582 if (plan->flags & NFSFT_PRESERVE_F_HAT)
584 nfft_free(plan->f_hat_intern);
588 if (plan->flags & NFSFT_MALLOC_F_HAT)
591 nfft_free(plan->f_hat);
595 if (plan->flags & NFSFT_MALLOC_F)
602 if (plan->flags & NFSFT_MALLOC_X)
609 void nfsft_trafo_direct(nfsft_plan *plan)
624 double _Complex temp;
631 plan->MEASURE_TIME_t[0] = 0.0;
632 plan->MEASURE_TIME_t[1] = 0.0;
633 plan->MEASURE_TIME_t[2] = 0.0;
636 if (
wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
642 if (plan->flags & NFSFT_PRESERVE_F_HAT)
644 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
645 sizeof(
double _Complex));
649 plan->f_hat_intern = plan->f_hat;
655 if (plan->flags & NFSFT_NORMALIZED)
659 #pragma omp parallel for default(shared) private(k,n)
661 for (k = 0; k <= plan->N; k++)
663 for (n = -k; n <= k; n++)
666 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
667 sqrt((2*k+1)/(4.0*KPI));
678 for (m = 0; m < plan->M_total; m++)
680 plan->f[m] = plan->f_hat_intern[NFSFT_INDEX(0,0,plan)];
693 #pragma omp parallel for default(shared) private(m,stheta,sphi,f_m,n,a,n_abs,alpha,gamma,it2,it1,k,temp)
695 for (m = 0; m < plan->M_total; m++)
698 stheta = cos(2.0*KPI*plan->x[2*m+1]);
700 sphi = 2.0*KPI*plan->x[2*m];
709 for (n = -plan->N; n <= plan->N; n++)
712 a = &(plan->f_hat_intern[NFSFT_INDEX(0,n,plan)]);
724 for (k = plan->N; k > n_abs + 1; k--)
726 temp = a[k-2] + it2 *
gamma[k];
727 it2 = it1 + it2 *
alpha[k] * stheta;
734 it2 = it1 + it2 *
wisdom.
alpha[ROWK(n_abs)+1] * stheta;
744 pow(1- stheta * stheta, 0.5*n_abs) * cexp(_Complex_I*n*sphi);
753 void nfsft_adjoint_direct(nfsft_plan *plan)
767 double _Complex temp;
772 plan->MEASURE_TIME_t[0] = 0.0;
773 plan->MEASURE_TIME_t[1] = 0.0;
774 plan->MEASURE_TIME_t[2] = 0.0;
777 if (
wisdom.flags & NFSFT_NO_DIRECT_ALGORITHM)
783 memset(plan->f_hat,0U,plan->N_total*
sizeof(
double _Complex));
791 for (m = 0; m < plan->M_total; m++)
793 plan->f_hat[NFSFT_INDEX(0,0,plan)] += plan->f[m];
802 #pragma omp parallel for default(shared) private(n,n_abs,alpha,gamma,m,stheta,sphi,it2,it1,k,temp)
803 for (n = -plan->N; n <= plan->N; n++)
813 for (m = 0; m < plan->M_total; m++)
816 stheta = cos(2.0*KPI*plan->x[2*m+1]);
818 sphi = 2.0*KPI*plan->x[2*m];
824 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
825 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
831 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
835 for (k = n_abs+2; k <= plan->N; k++)
838 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
840 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
846 for (m = 0; m < plan->M_total; m++)
849 stheta = cos(2.0*KPI*plan->x[2*m+1]);
851 sphi = 2.0*KPI*plan->x[2*m];
854 for (n = -plan->N; n <= plan->N; n++)
867 pow(1 - stheta * stheta, 0.5*n_abs) * cexp(-_Complex_I*n*sphi);
868 plan->f_hat[NFSFT_INDEX(n_abs,n,plan)] += it1;
874 plan->f_hat[NFSFT_INDEX(n_abs+1,n,plan)] += it2;
878 for (k = n_abs+2; k <= plan->N; k++)
881 it2 =
alpha[k] * stheta * it2 +
gamma[k] * it1;
883 plan->f_hat[NFSFT_INDEX(k,n,plan)] += it2;
893 if (plan->flags & NFSFT_NORMALIZED)
897 #pragma omp parallel for default(shared) private(k,n)
899 for (k = 0; k <= plan->N; k++)
901 for (n = -k; n <= k; n++)
904 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
905 sqrt((2*k+1)/(4.0*KPI));
911 if (plan->flags & NFSFT_ZERO_F_HAT)
913 for (n = -plan->N; n <= plan->N+1; n++)
915 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
916 (plan->N+1+abs(n))*
sizeof(
double _Complex));
921 void nfsft_trafo(nfsft_plan *plan)
929 double t, t_pre, t_nfft, t_fpt, t_c2e, t_norm;
938 plan->MEASURE_TIME_t[0] = 0.0;
939 plan->MEASURE_TIME_t[1] = 0.0;
940 plan->MEASURE_TIME_t[2] = 0.0;
943 if (
wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
960 nfsft_trafo_direct(plan);
967 if (plan->flags & NFSFT_PRESERVE_F_HAT)
969 memcpy(plan->f_hat_intern,plan->f_hat,plan->N_total*
970 sizeof(
double _Complex));
974 plan->f_hat_intern = plan->f_hat;
980 plan->plan_nfft.x = plan->x;
981 plan->plan_nfft.f = plan->f;
982 plan->plan_nfft.f_hat = plan->f_hat_intern;
987 if (plan->flags & NFSFT_NORMALIZED)
991 #pragma omp parallel for default(shared) private(k,n)
993 for (k = 0; k <= plan->N; k++)
995 for (n = -k; n <= k; n++)
998 plan->f_hat_intern[NFSFT_INDEX(k,n,plan)] *=
999 sqrt((2*k+1)/(4.0*KPI));
1008 if (plan->flags & NFSFT_USE_DPT)
1011 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1012 for (n = -plan->N; n <= plan->N; n++)
1013 fpt_trafo_direct(
wisdom.set_threads[omp_get_thread_num()],abs(n),
1014 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1015 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1019 for (n = -plan->N; n <= plan->N; n++)
1024 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1025 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1033 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1034 for (n = -plan->N; n <= plan->N; n++)
1035 fpt_trafo(
wisdom.set_threads[omp_get_thread_num()],abs(n),
1036 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1037 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1041 for (n = -plan->N; n <= plan->N; n++)
1046 &plan->f_hat_intern[NFSFT_INDEX(abs(n),n,plan)],
1047 &plan->f_hat_intern[NFSFT_INDEX(0,n,plan)],
1073 if (plan->flags & NFSFT_USE_NDFT)
1076 nfft_trafo_direct(&plan->plan_nfft);
1082 nfft_trafo_2d(&plan->plan_nfft);
1091 void nfsft_adjoint(nfsft_plan *plan)
1100 plan->MEASURE_TIME_t[0] = 0.0;
1101 plan->MEASURE_TIME_t[1] = 0.0;
1102 plan->MEASURE_TIME_t[2] = 0.0;
1105 if (
wisdom.flags & NFSFT_NO_FAST_ALGORITHM)
1122 nfsft_adjoint_direct(plan);
1132 plan->plan_nfft.x = plan->x;
1133 plan->plan_nfft.f = plan->f;
1134 plan->plan_nfft.f_hat = plan->f_hat;
1142 if (plan->flags & NFSFT_USE_NDFT)
1147 nfft_adjoint_direct(&plan->plan_nfft);
1155 nfft_adjoint_2d(&plan->plan_nfft);
1178 if (plan->flags & NFSFT_USE_DPT)
1181 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1182 for (n = -plan->N; n <= plan->N; n++)
1183 fpt_transposed_direct(
wisdom.set_threads[omp_get_thread_num()],abs(n),
1184 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1185 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1189 for (n = -plan->N; n <= plan->N; n++)
1193 fpt_transposed_direct(
wisdom.
set,abs(n),
1194 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1195 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1203 #pragma omp parallel for default(shared) private(n) num_threads(wisdom.nthreads)
1204 for (n = -plan->N; n <= plan->N; n++)
1205 fpt_transposed(
wisdom.set_threads[omp_get_thread_num()],abs(n),
1206 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1207 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1212 for (n = -plan->N; n <= plan->N; n++)
1217 &plan->f_hat[NFSFT_INDEX(abs(n),n,plan)],
1218 &plan->f_hat[NFSFT_INDEX(0,n,plan)],
1231 if (plan->flags & NFSFT_NORMALIZED)
1237 #pragma omp parallel for default(shared) private(k,n)
1239 for (k = 0; k <= plan->N; k++)
1241 for (n = -k; n <= k; n++)
1244 plan->f_hat[NFSFT_INDEX(k,n,plan)] *=
1245 sqrt((2*k+1)/(4.0*KPI));
1251 if (plan->flags & NFSFT_ZERO_F_HAT)
1255 for (n = -plan->N; n <= plan->N+1; n++)
1257 memset(&plan->f_hat[NFSFT_INDEX(-plan->N-1,n,plan)],0U,
1258 (plan->N+1+abs(n))*
sizeof(
double _Complex));
1266 void nfsft_precompute_x(nfsft_plan *plan)
1269 plan->plan_nfft.x = plan->x;
1272 if (plan->plan_nfft.flags & PRE_ONE_PSI)
1273 nfft_precompute_one_psi(&plan->plan_nfft);
#define X(name)
Include header for C99 complex datatype.
R nfft_elapsed_seconds(ticks t1, ticks t0)
Return number of elapsed seconds between two time points.
static struct nfsft_wisdom wisdom
The global wisdom structure for precomputed data.
static void c2e_transposed(nfsft_plan *plan)
Transposed version of the function c2e.
double * gamma
Precomputed recursion coefficients /f$\gamma_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
fpt_set set
Structure for discrete polynomial transform (DPT)
#define NFSFT_DEFAULT_NFFT_CUTOFF
The default NFFT cutoff parameter.
static void c2e(nfsft_plan *plan)
Converts coefficients with , from a linear combination of Chebyshev polynomials.
#define NFSFT_BREAK_EVEN
The break-even bandwidth .
void alpha_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
double * beta
Precomputed recursion coefficients /f$\beta_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
double * alpha
Precomputed recursion coefficients /f$\alpha_k^n/f$ for /f$k = 0,/ldots, N_{\text{max}}; n=-k,...
void gamma_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
bool initialized
Indicates wether the structure has been initialized.
int N_MAX
Stores precomputation flags.
int T_MAX
The logarithm /f$t = \log_2 N_{\text{max}}/f$ of the maximum bandwidth.
void beta_al_all(R *alpha, const int N)
Compute three-term-recurrence coefficients of associated Legendre functions for .
Header file with internal API of the NFSFT module.