Source code for cuvarbase.tests.test_nfft

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from builtins import zip
from builtins import range
from builtins import object
import pytest
import numpy as np
from numpy.testing import assert_allclose
from scipy import fftpack

from pycuda.tools import mark_cuda_test
from pycuda import gpuarray

import skcuda.fft as cufft

from nfft import nfft_adjoint as nfft_adjoint_cpu
from nfft.utils import nfft_matrix
from nfft.kernels import KERNELS

from ..cunfft import NFFTAsyncProcess

nfft_sigma = 5
nfft_m = 8
nfft_rtol = 5E-3
nfft_atol = 5E-3
spp = 1


[docs]def direct_sums(t, y, freqs): def sfunc(func): return [np.sum(y * func(2 * np.pi * t * f)) for f in freqs] return np.asarray(sfunc(np.cos)) + 1j * np.asarray(sfunc(np.sin))
[docs]def scale_time(t, samples_per_peak): return (t - min(t)) / (samples_per_peak * (max(t) - min(t))) - 0.5
[docs]@pytest.fixture def data(seed=100, sigma=0.1, ndata=100, samples_per_peak=spp): rand = np.random.RandomState(seed) t = np.sort(rand.rand(ndata)) y = np.cos(2 * np.pi * (3./(max(t) - min(t))) * t) tscl = scale_time(t, samples_per_peak=samples_per_peak) y += sigma * rand.randn(len(t)) err = sigma * np.ones_like(y) return t, tscl, y, err
[docs]def get_b(sigma, m): return (2. * sigma * m) / ((2 * sigma - 1) * np.pi)
[docs]def precomp_psi(t, b, n, m): xg = m + n * t - np.floor(n * t) q1 = np.exp(-xg ** 2 / b) / np.sqrt(np.pi * b) q2 = np.exp(2 * xg / b) q3 = np.exp(-np.arange(2 * m + 1) ** 2 / b) return q1, q2, q3
[docs]def gpu_grid_scalar(t, y, sigma, m, N): b = get_b(sigma, m) n = int(sigma * N) q1, q2, q3 = precomp_psi(t, b, n, m) u = (np.floor(n * (t + 0.5) - m)).astype(np.int) grid = np.zeros(n) inds = np.arange(2 * m + 1) for i, (U, Y) in enumerate(zip(u, y)): q2vals = np.array([pow(q2[i], j) for j in inds]) grid[(U + inds) % len(grid)] += Y * q1[i] * q2vals * q3 return grid
[docs]def simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, use_double=False, m=nfft_m, samples_per_peak=spp, **kwargs): proc = NFFTAsyncProcess(sigma=sigma, m=m, autoset_m=False, use_double=use_double) for stream in proc.streams: stream.synchronize() nfft_kwargs = dict(samples_per_peak=samples_per_peak) nfft_kwargs.update(kwargs) results = proc.run([(t, y, nf)], **nfft_kwargs) proc.finish() return results[0]
[docs]def get_cpu_grid(t, y, nf, sigma=nfft_sigma, m=nfft_m): kernel = KERNELS.get('gaussian', 'gaussian') mat = nfft_matrix(t, int(nf * sigma), m, sigma, kernel, truncated=True) return mat.T.dot(y)
#@mark_cuda_test
[docs]class TestNFFT(object):
[docs] def test_fast_gridding_with_jvdp_nfft(self): t, tsc, y, err = data() nf = int(nfft_sigma * len(t)) gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m, just_return_gridded_data=True, fast_grid=True, minimum_frequency=-int(nf/2), samples_per_peak=spp) # get CPU grid cpu_grid = get_cpu_grid(tsc, y, nf, sigma=nfft_sigma, m=nfft_m) assert_allclose(gpu_grid, cpu_grid, atol=1E-4, rtol=0)
[docs] def test_fast_gridding_against_scalar_version(self): t, tsc, y, err = data() nf = int(nfft_sigma * len(t)) gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m, just_return_gridded_data=True, fast_grid=True, minimum_frequency=-int(nf/2), samples_per_peak=spp) # get python version of gpu grid calculation cpu_grid = gpu_grid_scalar(tsc, y, nfft_sigma, nfft_m, nf) tols = dict(rtol=nfft_rtol, atol=nfft_atol) assert_allclose(gpu_grid, cpu_grid, **tols)
[docs] def test_slow_gridding_against_scalar_fast_gridding(self): t, tsc, y, err = data() nf = int(nfft_sigma * len(t)) gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m, just_return_gridded_data=True, fast_grid=False, minimum_frequency=-int(nf/2), samples_per_peak=spp) # get python version of gpu grid calculation cpu_grid = gpu_grid_scalar(tsc, y, nfft_sigma, nfft_m, nf) tols = dict(rtol=nfft_rtol, atol=nfft_atol) assert_allclose(gpu_grid, cpu_grid, **tols)
[docs] def test_slow_gridding_against_jvdp_nfft(self): t, tsc, y, err = data() nf = int(nfft_sigma * len(t)) gpu_grid = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m, just_return_gridded_data=True, fast_grid=False, minimum_frequency=-int(nf/2), samples_per_peak=spp) # get CPU grid cpu_grid = get_cpu_grid(tsc, y, nf, sigma=nfft_sigma, m=nfft_m) diffs = np.absolute(gpu_grid - cpu_grid) inds = (np.argsort(diffs)[::-1])[:10] for i, gpug, cpug, d in zip(inds, gpu_grid[inds], cpu_grid[inds], diffs[inds]): print(i, gpug, cpug, d) tols = dict(rtol=nfft_rtol, atol=nfft_atol) assert_allclose(gpu_grid, cpu_grid, **tols)
[docs] def test_ffts(self): t, tsc, y, err = data() yhat = np.empty(len(y)) yg = gpuarray.to_gpu(y.astype(np.complex128)) yghat = gpuarray.to_gpu(yhat.astype(np.complex128)) plan = cufft.Plan(len(y), np.complex128, np.complex128) cufft.ifft(yg, yghat, plan) yhat = fftpack.ifft(y) * len(y) tols = dict(rtol=nfft_rtol, atol=nfft_atol) assert_allclose(yhat, yghat.get(), **tols)
[docs] def nfft_against_direct_sums(self, samples_per_peak=spp, f0=None, scaled=True): t, tsc, y, err = data(samples_per_peak=samples_per_peak) nf = int(nfft_sigma * len(t)) df = 1./(samples_per_peak * (max(t) - min(t))) if f0 is None: f0 = -0.5 * nf * df k0 = int(f0 / df) f0 = k0 if scaled else k0 * df tg = tsc if scaled else t sppg = samples_per_peak gpu_nfft = simple_gpu_nfft(tg, y, nf, sigma=nfft_sigma, m=nfft_m, minimum_frequency=f0, samples_per_peak=sppg) freqs = (float(k0) + np.arange(nf)) if not scaled: freqs *= df direct_dft = direct_sums(tg, y, freqs) tols = dict(rtol=nfft_rtol, atol=nfft_atol) def dsort(arr0, arr): d = np.absolute(arr0 - arr) return np.argsort(-d) inds = dsort(np.real(direct_dft), np.real(gpu_nfft)) npr = 5 q = list(zip(inds[:npr], direct_dft[inds[:npr]], gpu_nfft[inds[:npr]])) for i, dft, gnfft in q: print(i, dft, gnfft) assert_allclose(np.real(direct_dft), np.real(gpu_nfft), **tols) assert_allclose(np.imag(direct_dft), np.imag(gpu_nfft), **tols)
[docs] def test_nfft_against_existing_impl_scaled_centered_spp1(self): self.nfft_against_direct_sums(samples_per_peak=1, scaled=True, f0=None)
[docs] def test_nfft_against_existing_impl_scaled_centered_spp5(self): self.nfft_against_direct_sums(samples_per_peak=5, scaled=True, f0=None)
[docs] def test_nfft_against_existing_impl_scaled_uncentered_spp1(self): self.nfft_against_direct_sums(samples_per_peak=1, scaled=True, f0=10.)
[docs] def test_nfft_against_existing_impl_unscaled_centered_spp1(self): self.nfft_against_direct_sums(samples_per_peak=1, scaled=False, f0=None)
[docs] def test_nfft_against_existing_impl_unscaled_uncentered_spp5(self): self.nfft_against_direct_sums(samples_per_peak=5, scaled=False, f0=0.)
[docs] def test_nfft_adjoint_async(self, f0=0., ndata=10, batch_size=3, use_double=False): datas = [] for i in range(ndata): t, tsc, y, err = data() nf = int(nfft_sigma * len(t)) datas.append((t, y, nf)) kwargs = dict(minimum_frequency=f0, samples_per_peak=spp) proc = NFFTAsyncProcess(sigma=nfft_sigma, m=nfft_m, autoset_m=False, use_double=use_double) single_nffts = [] for t, y, nf in datas: nfft = simple_gpu_nfft(t, y, nf, sigma=nfft_sigma, m=nfft_m, use_double=use_double, **kwargs) single_nffts.append(nfft) multi_nffts = proc.run(datas, **kwargs) batch_nffts = proc.batched_run(datas, batch_size=batch_size, **kwargs) proc.finish() tols = dict(rtol=nfft_rtol, atol=nfft_atol) for ghat_m, ghat_s, ghat_b in zip(multi_nffts, single_nffts, batch_nffts): assert_allclose(ghat_s.real, ghat_m.real, **tols) assert_allclose(ghat_s.imag, ghat_m.imag, **tols) assert_allclose(ghat_s.real, ghat_b.real, **tols) assert_allclose(ghat_s.imag, ghat_b.imag, **tols)