Source code for cuvarbase.lombscargle

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from builtins import zip
from builtins import map
from builtins import range
from builtins import object
import resource

import numpy as np
from scipy.special import gamma, gammaln

import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
# import pycuda.autoinit

from .core import GPUAsyncProcess
from .utils import weights, find_kernel, _module_reader
from .utils import autofrequency as utils_autofreq
from .cunfft import NFFTAsyncProcess, nfft_adjoint_async, NFFTMemory


[docs]def get_k0(freqs): return max([1, int(round(freqs[0] / (freqs[1] - freqs[0])))])
[docs]def check_k0(freqs, k0=None, rtol=1E-2, atol=1E-7): k0 = k0 if k0 is not None else get_k0(freqs) df = freqs[1] - freqs[0] f0 = k0 * df assert(abs(f0 - freqs[0]) < rtol * df + atol)
[docs]class LombScargleMemory(object): """ Container class for allocating memory and transferring data between the GPU and CPU for Lomb-Scargle computations Parameters ---------- sigma: int The ``sigma`` parameter for the NFFT stream: :class:`pycuda.driver.Stream` instance The CUDA stream used for calculations/data transfer m: int The ``m`` parameter for the NFFT """ def __init__(self, sigma, stream, m, **kwargs): self.sigma = sigma self.stream = stream self.m = m self.k0 = kwargs.get('k0', 0) self.precomp_psi = kwargs.get('precomp_psi', True) self.amplitude_prior = kwargs.get('amplitude_prior', None) self.window = kwargs.get('window', False) self.nharmonics = kwargs.get('nharmonics', 1) self.use_fft = kwargs.get('use_fft', True) self.other_settings = {} self.other_settings.update(kwargs) self.floating_mean = kwargs.get('floating_mean', True) self.use_double = kwargs.get('use_double', False) self.mode = 1 if self.floating_mean else 0 if self.window: self.mode = 2 self.n0 = kwargs.get('n0', None) self.nf = kwargs.get('nf', None) self.t_g = kwargs.get('t_g', None) self.yw_g = kwargs.get('yw_g', None) self.w_g = kwargs.get('w_g', None) self.lsp_g = kwargs.get('lsp_g', None) if self.use_fft: self.nfft_mem_yw = kwargs.get('nfft_mem_yw', None) self.nfft_mem_w = kwargs.get('nfft_mem_w', None) if self.nfft_mem_yw is None: self.nfft_mem_yw = NFFTMemory(self.sigma, self.stream, self.m, **kwargs) if self.nfft_mem_w is None: self.nfft_mem_w = NFFTMemory(self.sigma, self.stream, self.m, **kwargs) self.real_type = self.nfft_mem_yw.real_type self.complex_type = self.nfft_mem_yw.complex_type else: self.real_type = np.float32 self.complex_type = np.complex64 if self.use_double: self.real_type = np.float64 self.complex_type = np.complex128 # Set up regularization self.reg_g = gpuarray.zeros(2 * self.nharmonics + 1, dtype=self.real_type) self.reg = np.zeros(2 * self.nharmonics + 1, dtype=self.real_type) if self.amplitude_prior is not None: lmbda = np.power(self.amplitude_prior, -2) if isinstance(lmbda, float): lmbda = lmbda * np.ones(self.nharmonics) for i, l in enumerate(lmbda): self.reg[2 * i] = self.real_type(l) self.reg[1 + 2 * i] = self.real_type(l) self.reg_g.set_async(self.reg, stream=self.stream) self.buffered_transfer = kwargs.get('buffered_transfer', False) self.n0_buffer = kwargs.get('n0_buffer', None) self.lsp_c = kwargs.get('lsp_c', None) self.t = kwargs.get('t', None) self.yw = kwargs.get('yw', None) self.w = kwargs.get('w', None)
[docs] def allocate_data(self, **kwargs): """ Allocates memory for lightcurve """ n0 = kwargs.get('n0', self.n0) if self.buffered_transfer: n0 = kwargs.get('n0_buffer', self.n0_buffer) assert(n0 is not None) self.t_g = gpuarray.zeros(n0, dtype=self.real_type) self.yw_g = gpuarray.zeros(n0, dtype=self.real_type) self.w_g = gpuarray.zeros(n0, dtype=self.real_type) if self.use_fft: self.nfft_mem_w.t_g = self.t_g self.nfft_mem_w.y_g = self.w_g self.nfft_mem_yw.t_g = self.t_g self.nfft_mem_yw.y_g = self.yw_g self.nfft_mem_yw.n0 = n0 self.nfft_mem_w.n0 = n0 return self
[docs] def allocate_grids(self, **kwargs): """ Allocates memory for NFFT grids, NFFT precomputation vectors, and the GPU vector for the Lomb-Scargle power """ k0 = kwargs.get('k0', self.k0) n0 = kwargs.get('n0', self.n0) if self.buffered_transfer: n0 = kwargs.get('n0_buffer', self.n0_buffer) assert(n0 is not None) self.nf = kwargs.get('nf', self.nf) assert(self.nf is not None) if self.use_fft: if self.nfft_mem_yw.precomp_psi: self.nfft_mem_yw.allocate_precomp_psi(n0=n0) # Only one precomp psi needed self.nfft_mem_w.precomp_psi = False self.nfft_mem_w.q1 = self.nfft_mem_yw.q1 self.nfft_mem_w.q2 = self.nfft_mem_yw.q2 self.nfft_mem_w.q3 = self.nfft_mem_yw.q3 fft_size = self.nharmonics * (self.nf + k0) self.nfft_mem_yw.allocate_grid(nf=fft_size - k0) self.nfft_mem_w.allocate_grid(nf=2 * fft_size - k0) self.lsp_g = gpuarray.zeros(self.nf, dtype=self.real_type) return self
[docs] def allocate_pinned_cpu(self, **kwargs): """ Allocates pinned CPU memory for asynchronous transfer of result """ nf = kwargs.get('nf', self.nf) assert(nf is not None) self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type, alignment=resource.getpagesize()) self.lsp_c = cuda.register_host_memory(self.lsp_c) return self
[docs] def is_ready(self): """ don't use this. """ raise NotImplementedError()
[docs] def allocate_buffered_data_arrays(self, **kwargs): """ Allocates pinned memory for lightcurves if we're reusing this container """ n0 = kwargs.get('n0', self.n0) if self.buffered_transfer: n0 = kwargs.get('n0_buffer', self.n0_buffer) assert(n0 is not None) self.t = cuda.aligned_zeros(shape=(n0,), dtype=self.real_type, alignment=resource.getpagesize()) self.t = cuda.register_host_memory(self.t) self.yw = cuda.aligned_zeros(shape=(n0,), dtype=self.real_type, alignment=resource.getpagesize()) self.yw = cuda.register_host_memory(self.yw) self.w = cuda.aligned_zeros(shape=(n0,), dtype=self.real_type, alignment=resource.getpagesize()) self.w = cuda.register_host_memory(self.w) return self
[docs] def allocate(self, **kwargs): """ Allocate all memory necessary """ self.nf = kwargs.get('nf', self.nf) assert(self.nf is not None) self.allocate_data(**kwargs) self.allocate_grids(**kwargs) self.allocate_pinned_cpu(**kwargs) if self.buffered_transfer: self.allocate_buffered_data_arrays(**kwargs) return self
[docs] def setdata(self, **kwargs): """ Sets the value of the data arrays. """ t = kwargs.get('t', self.t) yw = kwargs.get('yw', self.yw) w = kwargs.get('w', self.w) y = kwargs.get('y', None) dy = kwargs.get('dy', None) self.ybar = 0. self.yy = kwargs.get('yy', 1.) self.n0 = kwargs.get('n0', len(t)) if dy is not None: assert('w' not in kwargs) w = weights(dy) if y is not None: assert('yw' not in kwargs) self.ybar = np.dot(y, w) yw = np.multiply(w, y - self.ybar) y2 = np.power(y - self.ybar, 2) self.yy = np.dot(w, y2) t = np.asarray(t).astype(self.real_type) yw = np.asarray(yw).astype(self.real_type) w = np.asarray(w).astype(self.real_type) if self.buffered_transfer: if any([arr is None for arr in [self.t, self.yw, self.w]]): if self.buffered_transfer: self.allocate_buffered_data_arrays(**kwargs) assert(self.n0 <= len(self.t)) self.t[:self.n0] = t[:self.n0] self.yw[:self.n0] = yw[:self.n0] self.w[:self.n0] = w[:self.n0] else: self.t = np.asarray(t).astype(self.real_type) self.yw = np.asarray(yw).astype(self.real_type) self.w = np.asarray(w).astype(self.real_type) # Set minimum and maximum t values (needed to scale things # for the NFFT) self.tmin = min(t) self.tmax = max(t) if self.use_fft: self.nfft_mem_yw.tmin = self.tmin self.nfft_mem_w.tmin = self.tmin self.nfft_mem_yw.tmax = self.tmax self.nfft_mem_w.tmax = self.tmax self.nfft_mem_w.n0 = len(t) self.nfft_mem_yw.n0 = len(t) return self
[docs] def transfer_data_to_gpu(self, **kwargs): """ Transfers the lightcurve to the GPU """ t, yw, w = self.t, self.yw, self.w assert(not any([arr is None for arr in [t, yw, w]])) # Do asynchronous data transfer self.t_g.set_async(t, stream=self.stream) self.yw_g.set_async(yw, stream=self.stream) self.w_g.set_async(w, stream=self.stream)
[docs] def transfer_lsp_to_cpu(self, **kwargs): """ Asynchronous transfer of LSP result to CPU """ self.lsp_g.get_async(ary=self.lsp_c, stream=self.stream)
[docs] def fromdata(self, **kwargs): """ Sets and (optionally) allocates memory for data """ self.setdata(**kwargs) if kwargs.get('allocate', True): self.allocate(**kwargs) return self
[docs] def set_gpu_arrays_to_zero(self, **kwargs): """ Sets all gpu arrays to zero """ for x in [self.t_g, self.yw_g, self.w_g]: if x is not None: x.fill(self.real_type(0), stream=self.stream) for x in [self.t, self.yw, self.w]: if x is not None: x[:] = 0. if hasattr(self, 'nfft_mem_yw'): self.nfft_mem_yw.ghat_g.fill(self.complex_type(0), stream=self.stream) if hasattr(self, 'nfft_mem_w'): self.nfft_mem_w.ghat_g.fill(self.complex_type(0), stream=self.stream)
[docs]def mhdirect_sums(t, yw, w, freq, YY, nharms=1): """ Compute the set of frequency-dependent sums for (multi-harmonic) Lomb Scargle at a given frequency Parameters ---------- t: array_like Observation times. yw: array_like Observations multiplied by their corresponding weights. `sum(yw)` is the weighted mean. w: array_like Weights for each of the observations. Usually proportional to `1/sigma ** 2` where `sigma` is the observation uncertainties. Normalized so that `sum(w) = 1`. freq: float Signal frequency. YY: float Weighted variance of the observations. nharms: int, optional (default: 1) Number of harmonics to compute. This is 1 for the standard Lomb-Scargle Returns ------- C, S, CC, CS, SS, YC, YS: array_like The set of sums with regularization added. See also -------- :func:`mhdirect_sums` """ phase = 2 * np.pi * ((t * freq) % 1.0).astype(np.float64) ns = np.arange(2 * nharms + 1) def sgn(n): return 1 if n == 0 else np.sign(n) c = [np.dot(w, np.cos(n * phase)) for n in ns] s = [np.dot(w, np.sin(n * phase)) for n in ns] yc = [np.dot(yw, np.cos(n * phase)) for n in ns[1:nharms+1]] ys = [np.dot(yw, np.sin(n * phase)) for n in ns[1:nharms+1]] cc = [[0.5 * (c[n+m] + c[abs(n-m)]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]] cs = [[0.5 * (s[n+m] - sgn(n-m) * s[abs(n-m)]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]] ss = [[0.5 * (c[abs(n-m)] - c[n+m]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]] C = np.asarray(c)[1:nharms+1] S = np.asarray(s)[1:nharms+1] ybar = sum(yw) YC = np.asarray(yc) - ybar * C YS = np.asarray(ys) - ybar * S CC = np.asarray(cc) - np.outer(C, C) CS = np.asarray(cs) - np.outer(C, S) SS = np.asarray(ss) - np.outer(S, S) return C, S, CC, CS, SS, YC, YS
[docs]def add_regularization(sums, amplitude_priors=None, cn0=None, sn0=None): """ Add regularization to sums. See Zechmeister & Kuerster 2009 for details about notation. Parameters ---------- sums: tuple of array_like C, S, CC, CS, SS, YC, YS. See `mhdirect_sums` for more information. amplitude_priors: float or array_like, optional Corresponds to standard deviation of a Gaussian prior on the amplitudes of all harmonics (if its a `float`), or for each of the harmonics (if it's an `array_like`). cn0: array_like Location of the centroid of the Gaussian amplitude prior on each of the cosine amplitudes sn0: array_like Location of the centroid of the Gaussian amplitude prior on each of the sine amplitudes Returns ------- C, S, CC, CS, SS, YC, YS: array_like The set of sums with regularization added. See also -------- :func:`mhdirect_sums` """ C, S, CC, CS, SS, YC, YS = sums D = np.zeros_like(C) if amplitude_priors is not None: D = np.ones_like(C) * np.power(amplitude_priors, -2) cn0 = np.zeros(len(C)) if cn0 is None else cn0 sn0 = np.zeros(len(S)) if sn0 is None else sn0 CCreg = CC + np.diag(D) SSreg = SS + np.diag(D) YCreg = YC + D * cn0 YSreg = YS + D * sn0 return C, S, CCreg, CS, SSreg, YCreg, YSreg
[docs]def mhgls_params_from_sums(sums, YY, ybar): """ Compute optimal amplitudes and offset from set of sums. See Zechmeister & Kuerster 2009 for details about notation. Parameters ---------- sums: tuple of array_like C, S, CC, CS, SS, YC, YS. See `mhdirect_sums` for more information. YY: float Weighted variance of `y - ybar`, where `ybar` is the weighted mean and `y` are the observations ybar: float Weighted mean of the data (`np.dot(w, y)`), where `w` are the weights and `y` are the observations. Returns ------- cn: array_like Cosine amplitudes of each harmonic sn: array_like Sine amplitudes of each harmonic offset: float Constant offset See also -------- :func:`mhdirect_sums` """ C, S, CC, CS, SS, YC, YS = sums nharms = len(C) A = np.block([[CC, CS], [CS.T, SS]]) b = np.concatenate((YC, YS)) theta = np.linalg.solve(A, b) cn = theta[:nharms] sn = theta[nharms:] offset = ybar - (np.dot(cn, C) + np.dot(sn, S)) return cn, sn, offset
[docs]def mhgls_from_sums(sums, YY, ybar): """ Compute multiharmonic periodogram power from set of sums. See Zechmeister & Kuerster 2009 for details about notation. Parameters ---------- sums: tuple of array_like C, S, CC, CS, SS, YC, YS. See `mhdirect_sums` for more information. YY: float Weighted variance of `y - ybar`, where `ybar` is the weighted mean and `y` are the observations ybar: float Weighted mean of the data (`np.dot(w, y)`), where `w` are the weights and `y` are the observations. Returns ------- power: float periodogram power See also -------- :func:`mhdirect_sums` """ C, S, CC, CS, SS, YC, YS = sums cn, sn, offset = mhgls_params_from_sums(sums, YY, ybar) XX = np.outer(cn, cn) * CC XX += 2 * np.outer(cn, sn) * CS XX += np.outer(sn, sn) * SS YX = 2 * (np.dot(cn, YC) + np.dot(sn, YS)) P = (YX - np.sum(XX)) / YY return P
[docs]def lomb_scargle_direct_sums(t, yw, w, freqs, YY, nharms=1, **kwargs): """ Compute Lomb-Scargle periodogram using direct summations. This is usually only useful for debugging and/or small numbers of frequencies. Parameters ---------- t: array_like Observation times. yw: array_like Observations multiplied by their corresponding weights. `sum(yw)` is the weighted mean. w: array_like Weights for each of the observations. Usually proportional to `1/sigma ** 2` where `sigma` is the observation uncertainties. Normalized so that `sum(w) = 1`. freqs: array_like Trial frequencies to evaluate the periodogram YY: float Weighted variance of the observations. nharms: int, optional (default: 1) Number of harmonics to use in the model. Lomb Scargle only uses 1, but more harmonics allow for greater model flexibility at the cost of higher complexity and therefore reduced signal- to-noise. Returns ------- power: array_like The periodogram powers at each of the trial frequencies """ def sfunc(f): return mhdirect_sums(t, yw, w, f, YY, nharms=nharms) sums = [add_regularization(s, **kwargs) for s in list(map(sfunc, freqs))] ybar = sum(yw) return np.array([mhgls_from_sums(s, YY, ybar) for s in sums])
[docs]def lomb_scargle_async(memory, functions, freqs, block_size=256, use_fft=True, python_dir_sums=False, transfer_to_device=True, transfer_to_host=True, window=False, **kwargs): """ Asynchronous Lomb Scargle periodogram Use the ``LombScargleAsyncProcess`` class and related subroutines when possible. Parameters ---------- memory: ``LombScargleMemory`` Allocated memory, must have data already set (see, e.g., ``LombScargleAsyncProcess.allocate()``) functions: tuple (lombscargle_functions, nfft_functions) Tuple of compiled functions from ``SourceModule``. Must be prepared with their appropriate dtype. freqs: array_like, optional (default: 0) Linearly-spaced frequencies starting at an integer multiple of the frequency spacing (i.e. freqs = df * (k0 + np.arange(nf))) block_size: int, optional Number of CUDA threads per block use_fft: bool, optional (default: True) If False, uses direct sums. python_dir_sums: bool, optional (default: False) If True, performs direct sums with Python on the CPU transfer_to_device: bool, optional, (default: True) If the data is already on the gpu, set as False transfer_to_host: bool, optional, (default: True) If False, will not transfer the resulting periodogram to CPU memory window: bool, optional (default: False) If True, computes the window function for the data Returns ------- lsp_c: ``np.array`` The resulting periodgram (``memory.lsp_c``) """ (lomb, lomb_dirsum), nfft_funcs = functions df = freqs[1] - freqs[0] samples_per_peak = 1./((memory.tmax - memory.tmin) * df) assert(get_k0(freqs) == memory.k0) stream = memory.stream block = (block_size, 1, 1) grid = (int(np.ceil(memory.nf / float(block_size))), 1) # lightcurve -> gpu if transfer_to_device: memory.transfer_data_to_gpu() # do direct summations with python on the CPU (for debugging) if python_dir_sums: t = memory.t_g.get() yw = memory.yw_g.get() w = memory.w_g.get() return lomb_scargle_direct_sums(t, yw, w, freqs, memory.yy) # Use direct sums (on GPU) if not use_fft: args = (grid, block, stream, memory.t_g.ptr, memory.yw_g.ptr, memory.w_g.ptr, memory.lsp_g.ptr, memory.reg_g.ptr, np.int32(memory.nf), np.int32(memory.n0), memory.real_type(memory.yy), memory.real_type(memory.ybar), memory.real_type(df), memory.real_type(min(freqs)), memory.mode) lomb_dirsum.prepared_async_call(*args) if transfer_to_device: memory.transfer_lsp_to_cpu() return memory.lsp_c else: # NFFT nfft_kwargs = dict(transfer_to_host=False, transfer_to_device=False) nfft_kwargs.update(kwargs) nfft_kwargs['minimum_frequency'] = freqs[0] nfft_kwargs['samples_per_peak'] = samples_per_peak # if not memory.window: # NFFT(w * (y - ybar)) nfft_adjoint_async(memory.nfft_mem_yw, nfft_funcs, **nfft_kwargs) # NFFT(w) nfft_adjoint_async(memory.nfft_mem_w, nfft_funcs, **nfft_kwargs) args = (grid, block, stream) args += (memory.nfft_mem_w.ghat_g.ptr, memory.nfft_mem_yw.ghat_g.ptr) args += (memory.lsp_g.ptr, memory.reg_g.ptr, np.int32(memory.nf)) args += (memory.real_type(memory.yy), memory.real_type(memory.ybar)) args += (np.int32(memory.k0), np.int32(memory.mode)) lomb.prepared_async_call(*args) if transfer_to_host: memory.transfer_lsp_to_cpu() return memory.lsp_c
[docs]class LombScargleAsyncProcess(GPUAsyncProcess): """ GPUAsyncProcess for the Lomb Scargle periodogram Parameters ---------- **kwargs: passed to ``NFFTAsyncProcess`` Example ------- >>> proc = LombScargleAsyncProcess() >>> Ndata = 1000 >>> t = np.sort(365 * np.random.rand(N)) >>> y = 12 + 0.01 * np.cos(2 * np.pi * t / 5.0) >>> y += 0.01 * np.random.randn(len(t)) >>> dy = 0.01 * np.ones_like(y) >>> freqs, powers = proc.run([(t, y, dy)]) >>> proc.finish() >>> ls_freqs, ls_powers = freqs[0], powers[0] """ def __init__(self, *args, **kwargs): super(LombScargleAsyncProcess, self).__init__(*args, **kwargs) self.nfft_proc = NFFTAsyncProcess(*args, **kwargs) self._cpp_defs = self.nfft_proc._cpp_defs self.real_type = self.nfft_proc.real_type self.complex_type = self.nfft_proc.complex_type self.block_size = self.nfft_proc.block_size self.module_options = self.nfft_proc.module_options self.use_double = self.nfft_proc.use_double self.memory = None self.nharmonics = kwargs.get('nharmonics', 1) if self.nharmonics > 1: raise Exception("Only 1 harmonic is supported right now") def _compile_and_prepare_functions(self, **kwargs): module_text = _module_reader(find_kernel('lomb'), self._cpp_defs) self.module = SourceModule(module_text, options=self.module_options) self.dtypes = dict( lomb=[np.intp, np.intp, np.intp, np.intp, np.int32, self.real_type, self.real_type, np.int32, np.int32], lomb_dirsum=[np.intp, np.intp, np.intp, np.intp, np.intp, np.int32, np.int32, self.real_type, self.real_type, self.real_type, self.real_type, np.int32] ) self.nfft_proc._compile_and_prepare_functions(**kwargs) for fname, dtype in self.dtypes.items(): func = self.module.get_function(fname) self.prepared_functions[fname] = func.prepare(dtype) self.function_tuple = tuple(self.prepared_functions[fname] for fname in sorted(self.dtypes.keys()))
[docs] def memory_requirement(self, n0, nf, k0, nbatch=1, autoadjust_sigma=False, **kwargs): """ return an approximate GPU memory requirement in bytes """ H = self.nharmonics sigma = self.nfft_proc.sigma m = self.nfft_proc.get_m(nf) if autoadjust_sigma: sigma = int(np.round(float(sigma * (nf + k0)) / nf)) fft_size = H * (nf + k0) # data mem += 3 * n0 # final result mem += nf rsize = self.real_type(1).nbytes csize = self.complex_type(1).nbytes c = int(np.ceil(float(csize) / rsize)) if kwargs.get('use_fft', True): # yw grid / fft (doubled because complex) mem = c * sigma * (fft_size - k0) # w grid / fft (doubled because complex) mem += c * sigma * (2 * fft_size - k0) # precomputation (q1 = n0, q2 = n0, q3 = 2m + 1) mem += 2 * n0 + 2 * m + 1 # inverse of design matrix if H > 1: # sparse matrix A (block-diagonal) mem += (2 * H) ** 2 * nbatch # vector b (Ax = b) mem += nbatch # size of float mem *= rsize return mem
[docs] def allocate_for_single_lc(self, t, y, dy, nf, k0=0, stream=None, **kwargs): """ Allocate GPU (and possibly CPU) memory for single lightcurve Parameters ---------- t: array_like Observation times y: array_like Observations dy: array_like Observation uncertainties nf: int Number of frequencies k0: int The starting index for the Fourier transform. The minimum frequency ``f0 = k0 * df``, where ``df`` is the frequency spacing stream: pycuda.driver.Stream CUDA stream you want this to run on **kwargs Returns ------- mem: LombScargleMemory Memory object. """ m = self.nfft_proc.get_m(nf) sigma = self.nfft_proc.sigma kwargs_lsmem = dict(use_double=self.use_double, nharmonics=self.nharmonics) kwargs_lsmem.update(kwargs) mem = LombScargleMemory(sigma, stream, m, k0=k0, **kwargs_lsmem) mem.fromdata(t=t, y=y, dy=dy, nf=nf, allocate=True, **kwargs) return mem
[docs] def preallocate(self, max_nobs, nlcs=1, nf=None, k0=None, freqs=None, streams=None, **kwargs): if freqs is not None: k0 = get_k0(freqs) nf = len(freqs) if nf is not None: assert k0 is not None m = self.nfft_proc.get_m(nf) sigma = self.nfft_proc.sigma self.memory = [] for i in range(nlcs): stream = None if streams is None else streams[i] mem = LombScargleMemory(sigma, stream, m, k0=k0, buffered_transfer=True, n0_buffer=max_nobs, nf=nf, use_double=self.use_double, nharmonics=self.nharmonics, **kwargs) mem.allocate(**kwargs) self.memory.append(mem) return self.memory
[docs] def autofrequency(self, *args, **kwargs): return utils_autofreq(*args, **kwargs)
def _nfreqs(self, *args, **kwargs): return len(self.autofrequency(*args, **kwargs))
[docs] def allocate(self, data, nfreqs=None, k0s=None, **kwargs): """ Allocate GPU memory for Lomb Scargle computations Parameters ---------- data: list of (t, y, dy) tuples List of data, ``[(t_1, y_1, dy_1), ...]`` * ``t``: Observation times * ``y``: Observations * ``dy``: Observation uncertainties **kwargs Returns ------- allocated_memory: list of ``LombScargleMemory`` list of allocated memory objects for each lightcurve """ if len(data) > len(self.streams): self._create_streams(len(data) - len(self.streams)) allocated_memory = [] nfrqs = nfreqs k0 = k0s if nfrqs is None: nfrqs = [self._nfreqs(t, **kwargs) for (t, y, dy) in data] elif isinstance(nfreqs, int): nfrqs = nfrqs * np.ones(len(data)) if k0s is None: k0s = [1] * len(nfrqs) elif isinstance(k0s, float): k0s = [k0s] * len(nfrqs) for i, ((t, y, dy), nf, k0) in enumerate(zip(data, nfrqs, k0s)): mem = self.allocate_for_single_lc(t, y, dy, nf, k0=k0, stream=self.streams[i], **kwargs) allocated_memory.append(mem) return allocated_memory
[docs] def run(self, data, use_fft=True, memory=None, freqs=None, **kwargs): """ Run Lomb Scargle on a batch of data. Parameters ---------- data: list of tuples list of [(t, y, dy), ...] containing * ``t``: observation times * ``y``: observations * ``dy``: observation uncertainties freqs: optional, list of ``np.ndarray`` frequencies List of custom frequencies. Right now, this has to be linearly spaced with ``freqs[0] / (freqs[1] - freqs[0])`` being an integer. memory: optional, list of ``LombScargleMemory`` objects List of memory objects, length of list must be ``>= len(data)`` use_fft: optional, bool (default: True) Uses the NFFT, otherwise just does direct summations (which are quite slow...) floating_mean: optional, bool (default: True) Add a floating mean to the model (see Zechmeister & Kurster 2009) window: optional, bool (default: False) If true, computes the window function for the data instead of Lomb-Scargle amplitude_prior: optional, float (default: None) If not None, sets the variance of a Gaussian prior on the amplitude (sometimes useful for suppressing aliases) **kwargs Returns ------- results: list of lists list of (freqs, pows) for each LS periodogram """ # compile module if not compiled already if not hasattr(self, 'prepared_functions') or \ not all([func in self.prepared_functions for func in ['lomb', 'lomb_dirsum']]): self._compile_and_prepare_functions(**kwargs) # create and/or check frequencies frqs = freqs if frqs is None: frqs = [self.autofrequency(d[0], **kwargs) for d in data] elif isinstance(frqs[0], float): frqs = [frqs] * len(data) assert(len(frqs) == len(data)) dfs = [frq[1] - frq[0] for frq in frqs] k0s = [get_k0(frq) for frq in frqs] # make sure k0 * df is the minimum frequency [check_k0(frq, k0=k0) for frq, k0 in zip(frqs, k0s)] if memory is None: memory = self.memory if memory is None: nfreqs = [len(frq) for frq in frqs] memory = self.allocate(data, nfreqs=nfreqs, k0s=k0s, use_fft=use_fft, **kwargs) else: for i, (t, y, dy) in enumerate(data): memory[i].set_gpu_arrays_to_zero(**kwargs) memory[i].setdata(t=t, y=y, dy=dy, **kwargs) ls_kwargs = dict(block_size=self.block_size, use_fft=use_fft) ls_kwargs.update(kwargs) funcs = (self.function_tuple, self.nfft_proc.function_tuple) results = [lomb_scargle_async(memory[i], funcs, frqs[i], **ls_kwargs) for i in range(len(data))] results = [(f, r) for f, r in zip(frqs, results)] return results
[docs] def batched_run_const_nfreq(self, data, batch_size=10, use_fft=True, freqs=None, **kwargs): """ Same as ``batched_run`` but is more efficient when the frequencies are the same for each lightcurve. Doesn't reallocate memory for each batch. Notes ----- To get best efficiency, make sure the maximum number of observations is not much larger than the typical number of observations """ # compile and prepare module functions if not already done if not hasattr(self, 'prepared_functions') or \ not all([func in self.prepared_functions for func in ['lomb', 'lomb_dirsum']]): self._compile_and_prepare_functions(**kwargs) # create streams if needed bsize = min([len(data), batch_size]) if len(self.streams) < bsize: self._create_streams(bsize - len(self.streams)) streams = [self.streams[i] for i in range(bsize)] max_ndata = max([len(t) for t, y, dy in data]) if freqs is None: data_with_max_baseline = max(data, key=lambda d: max(d[0]) - min(d[0])) freqs = self.autofrequency(data_with_max_baseline[0], **kwargs) # now correct frequencies df = freqs[1] - freqs[0] k0 = get_k0(freqs) # nf = len(freqs) nf = int(round(max(freqs) / df)) - k0 freqs = df * (k0 + np.arange(nf)) df = freqs[1] - freqs[0] k0 = get_k0(freqs) nf = len(freqs) check_k0(freqs, k0=k0) lsps = [] # make data batches batches = [] while len(batches) * batch_size < len(data): start = len(batches) * batch_size finish = start + min([batch_size, len(data) - start]) batches.append([data[i] for i in range(start, finish)]) # set up memory containers for gpu and cpu (pinned) memory m = self.nfft_proc.get_m(nf) sigma = self.nfft_proc.sigma kwargs_lsmem = dict(buffered_transfer=True, n0_buffer=max_ndata, use_double=self.use_double, nharmonics=self.nharmonics, use_fft=use_fft) kwargs_lsmem.update(kwargs) memory = [LombScargleMemory(sigma, stream, m, k0=k0, **kwargs_lsmem) for stream in streams] # allocate memory [mem.allocate(nf=nf, **kwargs) for mem in memory] funcs = (self.function_tuple, self.nfft_proc.function_tuple) for b, batch in enumerate(batches): results = self.run(batch, memory=memory, freqs=freqs, use_fft=use_fft, **kwargs) self.finish() for i, (f, p) in enumerate(results): lsps.append(np.copy(p)) return [(freqs, lsp) for lsp in lsps]
[docs]def fap_baluev(t, dy, z, fmax, d_K=3, d_H=1, use_gamma=True): """ False alarm probability for periodogram peak based on Baluev (2008) [2008MNRAS.385.1279B] Parameters ---------- t: array_like Observation times. dy: array_like Observation uncertainties. z: array_like or float Periodogram value(s) fmax: float Maximum frequency searched d_K: int, optional (default: 3) Number of degrees of fredom for periodgram model. 2H - 1 where H is the number of harmonics d_H: int, optional (default: 1) Number of degrees of freedom for default model. use_gamma: bool, optional (default: True) Use gamma function for computation of numerical coefficient; replaced with scipy.special.gammaln and should be stable now Returns ------- fap: float False alarm probability Example ------- >>> rand = np.random.RandomState(100) >>> t = np.sort(rand.rand(100)) >>> y = 12 + 0.01 * np.cos(2 * np.pi * 10. * t) >>> dy = 0.01 * np.ones_like(y) >>> y += dy * rand.rand(len(t)) >>> proc = LombScargleAsyncProcess() >>> results = proc.run([(t, y, dy)]) >>> freqs, powers = results[0] >>> fap_baluev(t, dy, powers, max(freqs)) """ N = len(t) d = d_K - d_H N_K = N - d_K N_H = N - d_H g = (0.5 * N_H) ** (0.5 * (d - 1)) if use_gamma: g = np.exp(gammaln(0.5 * N_H) - gammaln(0.5 * (N_K + 1))) w = np.power(dy, -2) tbar = np.dot(w, t) / sum(w) Dt = np.dot(w, np.power(t - tbar, 2)) / sum(w) Teff = np.sqrt(4 * np.pi * Dt) W = fmax * Teff A = (2 * np.pi ** 1.5) * W eZ1 = (z / np.pi) ** 0.5 * (d - 1) eZ2 = (1 - z) ** (0.5 * (N_K - 1)) tau = (g * A / (2 * np.pi)) * eZ1 * eZ2 Psing = 1 - (1 - z) ** (0.5 * N_K) return 1 - Psing * np.exp(-tau)
[docs]def lomb_scargle_simple(t, y, dy, **kwargs): """ Simple lomb-scargle interface for testing that things work on the GPU. Note: This will be substantially slower than working with the ``LombScargleAsyncProcess`` interface. """ w = np.power(dy, -2) w /= sum(w) proc = LombScargleAsyncProcess() results = proc.run([(t, y, w)], **kwargs) freqs, powers = results[0] proc.finish() return freqs, powers