Source code for cuvarbase.lombscargle

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from builtins import zip
from builtins import map
from builtins import range
from builtins import object
import resource

import numpy as np
from scipy.special import gamma, gammaln

import pycuda.driver as cuda
import pycuda.gpuarray as gpuarray
from pycuda.compiler import SourceModule
# import pycuda.autoinit

from .core import GPUAsyncProcess
from .utils import weights, find_kernel, _module_reader
from .utils import autofrequency as utils_autofreq
from .cunfft import NFFTAsyncProcess, nfft_adjoint_async, NFFTMemory


[docs]def get_k0(freqs):
    return max([1, int(round(freqs[0] / (freqs[1] - freqs[0])))])


[docs]def check_k0(freqs, k0=None, rtol=1E-2, atol=1E-7):
    k0 = k0 if k0 is not None else get_k0(freqs)
    df = freqs[1] - freqs[0]
    f0 = k0 * df
    assert(abs(f0 - freqs[0]) < rtol * df + atol)


[docs]class LombScargleMemory(object):
    """
    Container class for allocating memory and transferring
    data between the GPU and CPU for Lomb-Scargle computations

    Parameters
    ----------
    sigma: int
        The ``sigma`` parameter for the NFFT
    stream: :class:`pycuda.driver.Stream` instance
        The CUDA stream used for calculations/data transfer
    m: int
        The ``m`` parameter for the NFFT
    """
    def __init__(self, sigma, stream, m, **kwargs):

        self.sigma = sigma
        self.stream = stream
        self.m = m
        self.k0 = kwargs.get('k0', 0)
        self.precomp_psi = kwargs.get('precomp_psi', True)
        self.amplitude_prior = kwargs.get('amplitude_prior', None)
        self.window = kwargs.get('window', False)
        self.nharmonics = kwargs.get('nharmonics', 1)
        self.use_fft = kwargs.get('use_fft', True)

        self.other_settings = {}
        self.other_settings.update(kwargs)

        self.floating_mean = kwargs.get('floating_mean', True)
        self.use_double = kwargs.get('use_double', False)

        self.mode = 1 if self.floating_mean else 0
        if self.window:
            self.mode = 2

        self.n0 = kwargs.get('n0', None)
        self.nf = kwargs.get('nf', None)

        self.t_g = kwargs.get('t_g', None)
        self.yw_g = kwargs.get('yw_g', None)
        self.w_g = kwargs.get('w_g', None)
        self.lsp_g = kwargs.get('lsp_g', None)

        if self.use_fft:
            self.nfft_mem_yw = kwargs.get('nfft_mem_yw', None)
            self.nfft_mem_w = kwargs.get('nfft_mem_w', None)

            if self.nfft_mem_yw is None:
                self.nfft_mem_yw = NFFTMemory(self.sigma, self.stream,
                                              self.m, **kwargs)

            if self.nfft_mem_w is None:
                self.nfft_mem_w = NFFTMemory(self.sigma, self.stream,
                                             self.m, **kwargs)

            self.real_type = self.nfft_mem_yw.real_type
            self.complex_type = self.nfft_mem_yw.complex_type

        else:
            self.real_type = np.float32
            self.complex_type = np.complex64

            if self.use_double:
                self.real_type = np.float64
                self.complex_type = np.complex128

        # Set up regularization
        self.reg_g = gpuarray.zeros(2 * self.nharmonics + 1,
                                    dtype=self.real_type)
        self.reg = np.zeros(2 * self.nharmonics + 1,
                            dtype=self.real_type)

        if self.amplitude_prior is not None:
            lmbda = np.power(self.amplitude_prior, -2)
            if isinstance(lmbda, float):
                lmbda = lmbda * np.ones(self.nharmonics)

            for i, l in enumerate(lmbda):
                self.reg[2 * i] = self.real_type(l)
                self.reg[1 + 2 * i] = self.real_type(l)

            self.reg_g.set_async(self.reg, stream=self.stream)

        self.buffered_transfer = kwargs.get('buffered_transfer', False)
        self.n0_buffer = kwargs.get('n0_buffer', None)

        self.lsp_c = kwargs.get('lsp_c', None)

        self.t = kwargs.get('t', None)
        self.yw = kwargs.get('yw', None)
        self.w = kwargs.get('w', None)

[docs]    def allocate_data(self, **kwargs):
        """ Allocates memory for lightcurve """
        n0 = kwargs.get('n0', self.n0)
        if self.buffered_transfer:
            n0 = kwargs.get('n0_buffer', self.n0_buffer)

        assert(n0 is not None)
        self.t_g = gpuarray.zeros(n0, dtype=self.real_type)
        self.yw_g = gpuarray.zeros(n0, dtype=self.real_type)
        self.w_g = gpuarray.zeros(n0, dtype=self.real_type)

        if self.use_fft:
            self.nfft_mem_w.t_g = self.t_g
            self.nfft_mem_w.y_g = self.w_g

            self.nfft_mem_yw.t_g = self.t_g
            self.nfft_mem_yw.y_g = self.yw_g

            self.nfft_mem_yw.n0 = n0
            self.nfft_mem_w.n0 = n0

        return self

[docs]    def allocate_grids(self, **kwargs):
        """
        Allocates memory for NFFT grids, NFFT precomputation vectors,
        and the GPU vector for the Lomb-Scargle power
        """
        k0 = kwargs.get('k0', self.k0)
        n0 = kwargs.get('n0', self.n0)
        if self.buffered_transfer:
            n0 = kwargs.get('n0_buffer', self.n0_buffer)
        assert(n0 is not None)

        self.nf = kwargs.get('nf', self.nf)
        assert(self.nf is not None)

        if self.use_fft:
            if self.nfft_mem_yw.precomp_psi:
                self.nfft_mem_yw.allocate_precomp_psi(n0=n0)

            # Only one precomp psi needed
            self.nfft_mem_w.precomp_psi = False
            self.nfft_mem_w.q1 = self.nfft_mem_yw.q1
            self.nfft_mem_w.q2 = self.nfft_mem_yw.q2
            self.nfft_mem_w.q3 = self.nfft_mem_yw.q3

            fft_size = self.nharmonics * (self.nf + k0)
            self.nfft_mem_yw.allocate_grid(nf=fft_size - k0)
            self.nfft_mem_w.allocate_grid(nf=2 * fft_size - k0)

        self.lsp_g = gpuarray.zeros(self.nf, dtype=self.real_type)
        return self

[docs]    def allocate_pinned_cpu(self, **kwargs):
        """ Allocates pinned CPU memory for asynchronous transfer of result """
        nf = kwargs.get('nf', self.nf)
        assert(nf is not None)

        self.lsp_c = cuda.aligned_zeros(shape=(nf,), dtype=self.real_type,
                                        alignment=resource.getpagesize())
        self.lsp_c = cuda.register_host_memory(self.lsp_c)

        return self

[docs]    def is_ready(self):
        """ don't use this. """
        raise NotImplementedError()

[docs]    def allocate_buffered_data_arrays(self, **kwargs):
        """
        Allocates pinned memory for lightcurves if we're reusing
        this container
        """
        n0 = kwargs.get('n0', self.n0)
        if self.buffered_transfer:
            n0 = kwargs.get('n0_buffer', self.n0_buffer)
        assert(n0 is not None)

        self.t = cuda.aligned_zeros(shape=(n0,),
                                    dtype=self.real_type,
                                    alignment=resource.getpagesize())
        self.t = cuda.register_host_memory(self.t)

        self.yw = cuda.aligned_zeros(shape=(n0,),
                                     dtype=self.real_type,
                                     alignment=resource.getpagesize())
        self.yw = cuda.register_host_memory(self.yw)

        self.w = cuda.aligned_zeros(shape=(n0,),
                                    dtype=self.real_type,
                                    alignment=resource.getpagesize())
        self.w = cuda.register_host_memory(self.w)
        return self

[docs]    def allocate(self, **kwargs):
        """ Allocate all memory necessary """
        self.nf = kwargs.get('nf', self.nf)
        assert(self.nf is not None)

        self.allocate_data(**kwargs)
        self.allocate_grids(**kwargs)
        self.allocate_pinned_cpu(**kwargs)

        if self.buffered_transfer:
            self.allocate_buffered_data_arrays(**kwargs)

        return self

[docs]    def setdata(self, **kwargs):
        """ Sets the value of the data arrays. """
        t = kwargs.get('t', self.t)
        yw = kwargs.get('yw', self.yw)
        w = kwargs.get('w', self.w)

        y = kwargs.get('y', None)
        dy = kwargs.get('dy', None)
        self.ybar = 0.
        self.yy = kwargs.get('yy', 1.)

        self.n0 = kwargs.get('n0', len(t))
        if dy is not None:
            assert('w' not in kwargs)
            w = weights(dy)

        if y is not None:
            assert('yw' not in kwargs)

            self.ybar = np.dot(y, w)
            yw = np.multiply(w, y - self.ybar)
            y2 = np.power(y - self.ybar, 2)
            self.yy = np.dot(w, y2)

        t = np.asarray(t).astype(self.real_type)
        yw = np.asarray(yw).astype(self.real_type)
        w = np.asarray(w).astype(self.real_type)

        if self.buffered_transfer:
            if any([arr is None for arr in [self.t, self.yw, self.w]]):
                if self.buffered_transfer:
                    self.allocate_buffered_data_arrays(**kwargs)

            assert(self.n0 <= len(self.t))

            self.t[:self.n0] = t[:self.n0]
            self.yw[:self.n0] = yw[:self.n0]
            self.w[:self.n0] = w[:self.n0]
        else:
            self.t = np.asarray(t).astype(self.real_type)
            self.yw = np.asarray(yw).astype(self.real_type)
            self.w = np.asarray(w).astype(self.real_type)

        # Set minimum and maximum t values (needed to scale things
        # for the NFFT)
        self.tmin = min(t)
        self.tmax = max(t)

        if self.use_fft:
            self.nfft_mem_yw.tmin = self.tmin
            self.nfft_mem_w.tmin = self.tmin

            self.nfft_mem_yw.tmax = self.tmax
            self.nfft_mem_w.tmax = self.tmax

            self.nfft_mem_w.n0 = len(t)
            self.nfft_mem_yw.n0 = len(t)

        return self

[docs]    def transfer_data_to_gpu(self, **kwargs):
        """ Transfers the lightcurve to the GPU """
        t, yw, w = self.t, self.yw, self.w

        assert(not any([arr is None for arr in [t, yw, w]]))

        # Do asynchronous data transfer
        self.t_g.set_async(t, stream=self.stream)
        self.yw_g.set_async(yw, stream=self.stream)
        self.w_g.set_async(w, stream=self.stream)

[docs]    def transfer_lsp_to_cpu(self, **kwargs):
        """ Asynchronous transfer of LSP result to CPU """
        self.lsp_g.get_async(ary=self.lsp_c, stream=self.stream)

[docs]    def fromdata(self, **kwargs):
        """ Sets and (optionally) allocates memory for data """
        self.setdata(**kwargs)

        if kwargs.get('allocate', True):
            self.allocate(**kwargs)

        return self

[docs]    def set_gpu_arrays_to_zero(self, **kwargs):
        """ Sets all gpu arrays to zero """
        for x in [self.t_g, self.yw_g, self.w_g]:
            if x is not None:
                x.fill(self.real_type(0), stream=self.stream)

        for x in [self.t, self.yw, self.w]:
            if x is not None:
                x[:] = 0.

        if hasattr(self, 'nfft_mem_yw'):
            self.nfft_mem_yw.ghat_g.fill(self.complex_type(0),
                                         stream=self.stream)
        if hasattr(self, 'nfft_mem_w'):
            self.nfft_mem_w.ghat_g.fill(self.complex_type(0),
                                        stream=self.stream)


[docs]def mhdirect_sums(t, yw, w, freq, YY, nharms=1):
    """
    Compute the set of frequency-dependent sums
    for (multi-harmonic) Lomb Scargle at a given frequency

    Parameters
    ----------
    t: array_like
        Observation times.
    yw: array_like
        Observations multiplied by their corresponding weights.
        `sum(yw)` is the weighted mean.
    w: array_like
        Weights for each of the observations. Usually proportional
        to `1/sigma ** 2` where `sigma` is the observation uncertainties.
        Normalized so that `sum(w) = 1`.
    freq: float
        Signal frequency.
    YY: float
        Weighted variance of the observations.
    nharms: int, optional (default: 1)
        Number of harmonics to compute. This is 1 for the standard
        Lomb-Scargle

    Returns
    -------
    C, S, CC, CS, SS, YC, YS: array_like
        The set of sums with regularization added.

    See also
    --------
    :func:`mhdirect_sums`
    """
    phase = 2 * np.pi * ((t * freq) % 1.0).astype(np.float64)

    ns = np.arange(2 * nharms + 1)

    def sgn(n):
        return 1 if n == 0 else np.sign(n)

    c = [np.dot(w, np.cos(n * phase)) for n in ns]
    s = [np.dot(w, np.sin(n * phase)) for n in ns]

    yc = [np.dot(yw, np.cos(n * phase)) for n in ns[1:nharms+1]]
    ys = [np.dot(yw, np.sin(n * phase)) for n in ns[1:nharms+1]]

    cc = [[0.5 * (c[n+m] + c[abs(n-m)]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]]

    cs = [[0.5 * (s[n+m] - sgn(n-m) * s[abs(n-m)]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]]

    ss = [[0.5 * (c[abs(n-m)] - c[n+m]) for m in ns[1:nharms+1]] for n in ns[1:nharms+1]]

    C = np.asarray(c)[1:nharms+1]
    S = np.asarray(s)[1:nharms+1]

    ybar = sum(yw)
    YC = np.asarray(yc) - ybar * C
    YS = np.asarray(ys) - ybar * S

    CC = np.asarray(cc) - np.outer(C, C)
    CS = np.asarray(cs) - np.outer(C, S)
    SS = np.asarray(ss) - np.outer(S, S)

    return C, S, CC, CS, SS, YC, YS


[docs]def add_regularization(sums, amplitude_priors=None, cn0=None, sn0=None):
    """
    Add regularization to sums. See Zechmeister & Kuerster 2009
    for details about notation.

    Parameters
    ----------
    sums: tuple of array_like
        C, S, CC, CS, SS, YC, YS. See `mhdirect_sums`
        for more information.
    amplitude_priors: float or array_like, optional
        Corresponds to standard deviation of a Gaussian
        prior on the amplitudes of all harmonics (if its a `float`),
        or for each of the harmonics (if it's an `array_like`).
    cn0: array_like
        Location of the centroid of the Gaussian amplitude prior
        on each of the cosine amplitudes
    sn0: array_like
        Location of the centroid of the Gaussian amplitude prior
        on each of the sine amplitudes

    Returns
    -------
    C, S, CC, CS, SS, YC, YS: array_like
        The set of sums with regularization added.

    See also
    --------
    :func:`mhdirect_sums`
    """
    C, S, CC, CS, SS, YC, YS = sums

    D = np.zeros_like(C)
    if amplitude_priors is not None:
        D = np.ones_like(C) * np.power(amplitude_priors, -2)

    cn0 = np.zeros(len(C)) if cn0 is None else cn0
    sn0 = np.zeros(len(S)) if sn0 is None else sn0

    CCreg = CC + np.diag(D)
    SSreg = SS + np.diag(D)
    YCreg = YC + D * cn0
    YSreg = YS + D * sn0

    return C, S, CCreg, CS, SSreg, YCreg, YSreg


[docs]def mhgls_params_from_sums(sums, YY, ybar):
    """
    Compute optimal amplitudes and offset from
    set of sums. See Zechmeister & Kuerster 2009
    for details about notation.

    Parameters
    ----------
    sums: tuple of array_like
        C, S, CC, CS, SS, YC, YS. See `mhdirect_sums`
        for more information.
    YY: float
        Weighted variance of `y - ybar`, where `ybar`
        is the weighted mean and `y` are the observations
    ybar: float
        Weighted mean of the data (`np.dot(w, y)`), where
        `w` are the weights and `y` are the observations.

    Returns
    -------
    cn: array_like
        Cosine amplitudes of each harmonic
    sn: array_like
        Sine amplitudes of each harmonic
    offset: float
        Constant offset

    See also
    --------
    :func:`mhdirect_sums`
    """
    C, S, CC, CS, SS, YC, YS = sums

    nharms = len(C)

    A = np.block([[CC, CS], [CS.T, SS]])
    b = np.concatenate((YC, YS))

    theta = np.linalg.solve(A, b)

    cn = theta[:nharms]
    sn = theta[nharms:]
    offset = ybar - (np.dot(cn, C) + np.dot(sn, S))

    return cn, sn, offset


[docs]def mhgls_from_sums(sums, YY, ybar):
    """
    Compute multiharmonic periodogram power from
    set of sums. See Zechmeister & Kuerster 2009
    for details about notation.

    Parameters
    ----------
    sums: tuple of array_like
        C, S, CC, CS, SS, YC, YS. See `mhdirect_sums`
        for more information.
    YY: float
        Weighted variance of `y - ybar`, where `ybar`
        is the weighted mean and `y` are the observations
    ybar: float
        Weighted mean of the data (`np.dot(w, y)`), where
        `w` are the weights and `y` are the observations.

    Returns
    -------
    power: float
        periodogram power


    See also
    --------
    :func:`mhdirect_sums`
    """
    C, S, CC, CS, SS, YC, YS = sums

    cn, sn, offset = mhgls_params_from_sums(sums, YY, ybar)

    XX = np.outer(cn, cn) * CC
    XX += 2 * np.outer(cn, sn) * CS
    XX += np.outer(sn, sn) * SS

    YX = 2 * (np.dot(cn, YC) + np.dot(sn, YS))
    P = (YX - np.sum(XX)) / YY

    return P


[docs]def lomb_scargle_direct_sums(t, yw, w, freqs, YY, nharms=1, **kwargs):
    """
    Compute Lomb-Scargle periodogram using direct summations. This
    is usually only useful for debugging and/or small numbers of
    frequencies.

    Parameters
    ----------
    t: array_like
        Observation times.
    yw: array_like
        Observations multiplied by their corresponding weights.
        `sum(yw)` is the weighted mean.
    w: array_like
        Weights for each of the observations. Usually proportional
        to `1/sigma ** 2` where `sigma` is the observation uncertainties.
        Normalized so that `sum(w) = 1`.
    freqs: array_like
        Trial frequencies to evaluate the periodogram
    YY: float
        Weighted variance of the observations.
    nharms: int, optional (default: 1)
        Number of harmonics to use in the model. Lomb Scargle only uses
        1, but more harmonics allow for greater model flexibility
        at the cost of higher complexity and therefore reduced signal-
        to-noise.

    Returns
    -------
    power: array_like
        The periodogram powers at each of the trial frequencies
    """
    def sfunc(f):
        return mhdirect_sums(t, yw, w, f, YY, nharms=nharms)
    sums = [add_regularization(s, **kwargs) for s in list(map(sfunc, freqs))]

    ybar = sum(yw)
    return np.array([mhgls_from_sums(s, YY, ybar) for s in sums])


[docs]def lomb_scargle_async(memory, functions, freqs,
                       block_size=256, use_fft=True,
                       python_dir_sums=False,
                       transfer_to_device=True,
                       transfer_to_host=True,
                       window=False, **kwargs):
    """
    Asynchronous Lomb Scargle periodogram

    Use the ``LombScargleAsyncProcess`` class and
    related subroutines when possible.

    Parameters
    ----------
    memory: ``LombScargleMemory``
        Allocated memory, must have data already set (see, e.g.,
        ``LombScargleAsyncProcess.allocate()``)
    functions: tuple (lombscargle_functions, nfft_functions)
        Tuple of compiled functions from ``SourceModule``. Must be
        prepared with their appropriate dtype.
    freqs: array_like, optional (default: 0)
        Linearly-spaced frequencies starting at an integer multiple
        of the frequency spacing (i.e. freqs = df * (k0 + np.arange(nf)))
    block_size: int, optional
        Number of CUDA threads per block
    use_fft: bool, optional (default: True)
        If False, uses direct sums.
    python_dir_sums: bool, optional (default: False)
        If True, performs direct sums with Python on the CPU
    transfer_to_device: bool, optional, (default: True)
        If the data is already on the gpu, set as False
    transfer_to_host: bool, optional, (default: True)
        If False, will not transfer the resulting periodogram to
        CPU memory
    window: bool, optional (default: False)
        If True, computes the window function for the data

    Returns
    -------
    lsp_c: ``np.array``
        The resulting periodgram (``memory.lsp_c``)
    """
    (lomb, lomb_dirsum), nfft_funcs = functions

    df = freqs[1] - freqs[0]
    samples_per_peak = 1./((memory.tmax - memory.tmin) * df)
    assert(get_k0(freqs) == memory.k0)

    stream = memory.stream

    block = (block_size, 1, 1)
    grid = (int(np.ceil(memory.nf / float(block_size))), 1)

    # lightcurve -> gpu
    if transfer_to_device:
        memory.transfer_data_to_gpu()

    # do direct summations with python on the CPU (for debugging)
    if python_dir_sums:
        t = memory.t_g.get()
        yw = memory.yw_g.get()
        w = memory.w_g.get()
        return lomb_scargle_direct_sums(t, yw, w, freqs, memory.yy)

    # Use direct sums (on GPU)
    if not use_fft:
        args = (grid, block, stream,
                memory.t_g.ptr, memory.yw_g.ptr, memory.w_g.ptr,
                memory.lsp_g.ptr, memory.reg_g.ptr,
                np.int32(memory.nf),
                np.int32(memory.n0),
                memory.real_type(memory.yy),
                memory.real_type(memory.ybar),
                memory.real_type(df),
                memory.real_type(min(freqs)),
                memory.mode)

        lomb_dirsum.prepared_async_call(*args)
        if transfer_to_device:
            memory.transfer_lsp_to_cpu()
        return memory.lsp_c
    else:
        # NFFT
        nfft_kwargs = dict(transfer_to_host=False,
                           transfer_to_device=False)

        nfft_kwargs.update(kwargs)

        nfft_kwargs['minimum_frequency'] = freqs[0]
        nfft_kwargs['samples_per_peak'] = samples_per_peak

        # if not memory.window:
        # NFFT(w * (y - ybar))
        nfft_adjoint_async(memory.nfft_mem_yw, nfft_funcs, **nfft_kwargs)

        # NFFT(w)
        nfft_adjoint_async(memory.nfft_mem_w, nfft_funcs, **nfft_kwargs)

    args = (grid, block, stream)
    args += (memory.nfft_mem_w.ghat_g.ptr, memory.nfft_mem_yw.ghat_g.ptr)
    args += (memory.lsp_g.ptr, memory.reg_g.ptr, np.int32(memory.nf))
    args += (memory.real_type(memory.yy),
             memory.real_type(memory.ybar))
    args += (np.int32(memory.k0),
             np.int32(memory.mode))
    lomb.prepared_async_call(*args)

    if transfer_to_host:
        memory.transfer_lsp_to_cpu()

    return memory.lsp_c


[docs]class LombScargleAsyncProcess(GPUAsyncProcess):
    """
    GPUAsyncProcess for the Lomb Scargle periodogram

    Parameters
    ----------
    **kwargs: passed to ``NFFTAsyncProcess``

    Example
    -------
    >>> proc = LombScargleAsyncProcess()
    >>> Ndata = 1000
    >>> t = np.sort(365 * np.random.rand(N))
    >>> y = 12 + 0.01 * np.cos(2 * np.pi * t / 5.0)
    >>> y += 0.01 * np.random.randn(len(t))
    >>> dy = 0.01 * np.ones_like(y)
    >>> freqs, powers = proc.run([(t, y, dy)])
    >>> proc.finish()
    >>> ls_freqs, ls_powers = freqs[0], powers[0]

    """
    def __init__(self, *args, **kwargs):
        super(LombScargleAsyncProcess, self).__init__(*args, **kwargs)

        self.nfft_proc = NFFTAsyncProcess(*args, **kwargs)
        self._cpp_defs = self.nfft_proc._cpp_defs

        self.real_type = self.nfft_proc.real_type
        self.complex_type = self.nfft_proc.complex_type

        self.block_size = self.nfft_proc.block_size
        self.module_options = self.nfft_proc.module_options
        self.use_double = self.nfft_proc.use_double
        self.memory = None

        self.nharmonics = kwargs.get('nharmonics', 1)

        if self.nharmonics > 1:
            raise Exception("Only 1 harmonic is supported right now")

    def _compile_and_prepare_functions(self, **kwargs):

        module_text = _module_reader(find_kernel('lomb'), self._cpp_defs)

        self.module = SourceModule(module_text, options=self.module_options)
        self.dtypes = dict(
            lomb=[np.intp, np.intp, np.intp, np.intp, np.int32,
                  self.real_type, self.real_type, np.int32, np.int32],
            lomb_dirsum=[np.intp, np.intp, np.intp, np.intp, np.intp,
                         np.int32, np.int32, self.real_type, self.real_type,
                         self.real_type, self.real_type, np.int32]
        )

        self.nfft_proc._compile_and_prepare_functions(**kwargs)
        for fname, dtype in self.dtypes.items():
            func = self.module.get_function(fname)
            self.prepared_functions[fname] = func.prepare(dtype)
        self.function_tuple = tuple(self.prepared_functions[fname]
                                    for fname in sorted(self.dtypes.keys()))

[docs]    def memory_requirement(self, n0, nf, k0, nbatch=1,
                           autoadjust_sigma=False, **kwargs):
        """ return an approximate GPU memory requirement in bytes """
        H = self.nharmonics
        sigma = self.nfft_proc.sigma
        m = self.nfft_proc.get_m(nf)

        if autoadjust_sigma:
            sigma = int(np.round(float(sigma * (nf + k0)) / nf))

        fft_size = H * (nf + k0)

        # data
        mem += 3 * n0

        # final result
        mem += nf

        rsize = self.real_type(1).nbytes
        csize = self.complex_type(1).nbytes
        c = int(np.ceil(float(csize) / rsize))

        if kwargs.get('use_fft', True):
            # yw grid / fft (doubled because complex)
            mem = c * sigma * (fft_size - k0)

            # w grid / fft (doubled because complex)
            mem += c * sigma * (2 * fft_size - k0)

            # precomputation (q1 = n0, q2 = n0, q3 = 2m + 1)
            mem += 2 * n0 + 2 * m + 1

        # inverse of design matrix
        if H > 1:

            # sparse matrix A (block-diagonal)
            mem += (2 * H) ** 2 * nbatch

            # vector b (Ax = b)
            mem += nbatch

        # size of float
        mem *= rsize

        return mem

[docs]    def allocate_for_single_lc(self, t, y, dy, nf, k0=0,
                               stream=None, **kwargs):
        """
        Allocate GPU (and possibly CPU) memory for single lightcurve

        Parameters
        ----------
        t: array_like
            Observation times
        y: array_like
            Observations
        dy: array_like
            Observation uncertainties
        nf: int
            Number of frequencies
        k0: int
            The starting index for the Fourier transform. The minimum
            frequency ``f0 = k0 * df``, where ``df`` is the frequency
            spacing
        stream: pycuda.driver.Stream
            CUDA stream you want this to run on
        **kwargs

        Returns
        -------
        mem: LombScargleMemory
            Memory object.
        """
        m = self.nfft_proc.get_m(nf)

        sigma = self.nfft_proc.sigma

        kwargs_lsmem = dict(use_double=self.use_double,
                            nharmonics=self.nharmonics)

        kwargs_lsmem.update(kwargs)
        mem = LombScargleMemory(sigma, stream, m, k0=k0,
                                **kwargs_lsmem)

        mem.fromdata(t=t, y=y, dy=dy, nf=nf, allocate=True,
                     **kwargs)

        return mem

[docs]    def preallocate(self, max_nobs, nlcs=1, nf=None, k0=None,
                    freqs=None, streams=None, **kwargs):

        if freqs is not None:
            k0 = get_k0(freqs)
            nf = len(freqs)
        if nf is not None:
            assert k0 is not None

        m = self.nfft_proc.get_m(nf)

        sigma = self.nfft_proc.sigma

        self.memory = []
        for i in range(nlcs):
            stream = None if streams is None else streams[i]
            mem = LombScargleMemory(sigma, stream, m,
                                    k0=k0,
                                    buffered_transfer=True,
                                    n0_buffer=max_nobs,
                                    nf=nf,
                                    use_double=self.use_double,
                                    nharmonics=self.nharmonics,
                                    **kwargs)

            mem.allocate(**kwargs)
            self.memory.append(mem)
        return self.memory

[docs]    def autofrequency(self, *args, **kwargs):
        return utils_autofreq(*args, **kwargs)

    def _nfreqs(self, *args, **kwargs):

        return len(self.autofrequency(*args, **kwargs))

[docs]    def allocate(self, data, nfreqs=None, k0s=None, **kwargs):

        """
        Allocate GPU memory for Lomb Scargle computations

        Parameters
        ----------
        data: list of (t, y, dy) tuples
            List of data, ``[(t_1, y_1, dy_1), ...]``
            * ``t``: Observation times
            * ``y``: Observations
            * ``dy``: Observation uncertainties
        **kwargs

        Returns
        -------
        allocated_memory: list of ``LombScargleMemory``
            list of allocated memory objects for each lightcurve

        """

        if len(data) > len(self.streams):
            self._create_streams(len(data) - len(self.streams))

        allocated_memory = []

        nfrqs = nfreqs
        k0 = k0s
        if nfrqs is None:
            nfrqs = [self._nfreqs(t, **kwargs) for (t, y, dy) in data]
        elif isinstance(nfreqs, int):
            nfrqs = nfrqs * np.ones(len(data))

        if k0s is None:
            k0s = [1] * len(nfrqs)
        elif isinstance(k0s, float):
            k0s = [k0s] * len(nfrqs)
        for i, ((t, y, dy), nf, k0) in enumerate(zip(data, nfrqs, k0s)):
            mem = self.allocate_for_single_lc(t, y, dy, nf, k0=k0,
                                              stream=self.streams[i],
                                              **kwargs)
            allocated_memory.append(mem)

        return allocated_memory

[docs]    def run(self, data,
            use_fft=True, memory=None,
            freqs=None,
            **kwargs):

        """
        Run Lomb Scargle on a batch of data.

        Parameters
        ----------
        data: list of tuples
            list of [(t, y, dy), ...] containing
            * ``t``: observation times
            * ``y``: observations
            * ``dy``: observation uncertainties
        freqs: optional, list of ``np.ndarray`` frequencies
            List of custom frequencies. Right now, this has to be linearly
            spaced with ``freqs[0] / (freqs[1] - freqs[0])`` being an integer.
        memory: optional, list of ``LombScargleMemory`` objects
            List of memory objects, length of list must be ``>= len(data)``
        use_fft: optional, bool (default: True)
            Uses the NFFT, otherwise just does direct summations (which
            are quite slow...)
        floating_mean: optional, bool (default: True)
            Add a floating mean to the model (see Zechmeister & Kurster 2009)
        window: optional, bool (default: False)
            If true, computes the window function for the data instead of
            Lomb-Scargle
        amplitude_prior: optional, float (default: None)
            If not None, sets the variance of a Gaussian prior on
            the amplitude (sometimes useful for suppressing aliases)
        **kwargs

        Returns
        -------
        results: list of lists
            list of (freqs, pows) for each LS periodogram

        """

        # compile module if not compiled already
        if not hasattr(self, 'prepared_functions') or \
            not all([func in self.prepared_functions for func in
                     ['lomb', 'lomb_dirsum']]):
            self._compile_and_prepare_functions(**kwargs)

        # create and/or check frequencies
        frqs = freqs
        if frqs is None:
            frqs = [self.autofrequency(d[0], **kwargs) for d in data]

        elif isinstance(frqs[0], float):
            frqs = [frqs] * len(data)

        assert(len(frqs) == len(data))

        dfs = [frq[1] - frq[0] for frq in frqs]
        k0s = [get_k0(frq) for frq in frqs]

        # make sure k0 * df is the minimum frequency
        [check_k0(frq, k0=k0) for frq, k0 in zip(frqs, k0s)]

        if memory is None:
            memory = self.memory

        if memory is None:
            nfreqs = [len(frq) for frq in frqs]
            memory = self.allocate(data, nfreqs=nfreqs, k0s=k0s,
                                   use_fft=use_fft, **kwargs)
        else:
            for i, (t, y, dy) in enumerate(data):
                memory[i].set_gpu_arrays_to_zero(**kwargs)
                memory[i].setdata(t=t, y=y, dy=dy, **kwargs)

        ls_kwargs = dict(block_size=self.block_size,
                         use_fft=use_fft)
        ls_kwargs.update(kwargs)

        funcs = (self.function_tuple, self.nfft_proc.function_tuple)
        results = [lomb_scargle_async(memory[i], funcs, frqs[i],
                                      **ls_kwargs)
                   for i in range(len(data))]

        results = [(f, r) for f, r in zip(frqs, results)]
        return results

[docs]    def batched_run_const_nfreq(self, data, batch_size=10,
                                use_fft=True, freqs=None,
                                **kwargs):
        """
        Same as ``batched_run`` but is more efficient when the frequencies are
        the same for each lightcurve. Doesn't reallocate memory for each batch.

        Notes
        -----
        To get best efficiency, make sure the maximum number of observations
        is not much larger than the typical number of observations
        """

        # compile and prepare module functions if not already done
        if not hasattr(self, 'prepared_functions') or \
            not all([func in self.prepared_functions for func in
                     ['lomb', 'lomb_dirsum']]):
            self._compile_and_prepare_functions(**kwargs)

        # create streams if needed
        bsize = min([len(data), batch_size])
        if len(self.streams) < bsize:
            self._create_streams(bsize - len(self.streams))

        streams = [self.streams[i] for i in range(bsize)]
        max_ndata = max([len(t) for t, y, dy in data])

        if freqs is None:
            data_with_max_baseline = max(data,
                                         key=lambda d: max(d[0]) - min(d[0]))
            freqs = self.autofrequency(data_with_max_baseline[0], **kwargs)

            # now correct frequencies
            df = freqs[1] - freqs[0]
            k0 = get_k0(freqs)
            # nf = len(freqs)
            nf = int(round(max(freqs) / df)) - k0
            freqs = df * (k0 + np.arange(nf))

        df = freqs[1] - freqs[0]
        k0 = get_k0(freqs)
        nf = len(freqs)
        check_k0(freqs, k0=k0)

        lsps = []

        # make data batches
        batches = []
        while len(batches) * batch_size < len(data):
            start = len(batches) * batch_size
            finish = start + min([batch_size, len(data) - start])
            batches.append([data[i] for i in range(start, finish)])

        # set up memory containers for gpu and cpu (pinned) memory
        m = self.nfft_proc.get_m(nf)
        sigma = self.nfft_proc.sigma

        kwargs_lsmem = dict(buffered_transfer=True,
                            n0_buffer=max_ndata,
                            use_double=self.use_double,
                            nharmonics=self.nharmonics,
                            use_fft=use_fft)
        kwargs_lsmem.update(kwargs)
        memory = [LombScargleMemory(sigma, stream, m, k0=k0,
                                    **kwargs_lsmem)
                  for stream in streams]

        # allocate memory
        [mem.allocate(nf=nf, **kwargs) for mem in memory]

        funcs = (self.function_tuple, self.nfft_proc.function_tuple)
        for b, batch in enumerate(batches):
            results = self.run(batch, memory=memory, freqs=freqs,
                               use_fft=use_fft,
                               **kwargs)
            self.finish()

            for i, (f, p) in enumerate(results):
                lsps.append(np.copy(p))

        return [(freqs, lsp) for lsp in lsps]


[docs]def fap_baluev(t, dy, z, fmax, d_K=3, d_H=1, use_gamma=True):
    """
    False alarm probability for periodogram peak
    based on Baluev (2008) [2008MNRAS.385.1279B]

    Parameters
    ----------
    t: array_like
        Observation times.
    dy: array_like
        Observation uncertainties.
    z: array_like or float
        Periodogram value(s)
    fmax: float
        Maximum frequency searched
    d_K: int, optional (default: 3)
        Number of degrees of fredom for periodgram model.
        2H - 1 where H is the number of harmonics
    d_H: int, optional (default: 1)
        Number of degrees of freedom for default model.
    use_gamma: bool, optional (default: True)
        Use gamma function for computation of numerical
        coefficient; replaced with scipy.special.gammaln
        and should be stable now
    Returns
    -------
    fap: float
        False alarm probability

    Example
    -------
    >>> rand = np.random.RandomState(100)
    >>> t = np.sort(rand.rand(100))
    >>> y = 12 + 0.01 * np.cos(2 * np.pi * 10. * t)
    >>> dy = 0.01 * np.ones_like(y)
    >>> y += dy * rand.rand(len(t))
    >>> proc = LombScargleAsyncProcess()
    >>> results = proc.run([(t, y, dy)])
    >>> freqs, powers = results[0]
    >>> fap_baluev(t, dy, powers, max(freqs))
    """

    N = len(t)
    d = d_K - d_H

    N_K = N - d_K
    N_H = N - d_H
    g = (0.5 * N_H) ** (0.5 * (d - 1))

    if use_gamma:
        g = np.exp(gammaln(0.5 * N_H) - gammaln(0.5 * (N_K + 1)))

    w = np.power(dy, -2)

    tbar = np.dot(w, t) / sum(w)
    Dt = np.dot(w, np.power(t - tbar, 2)) / sum(w)

    Teff = np.sqrt(4 * np.pi * Dt)

    W = fmax * Teff
    A = (2 * np.pi ** 1.5) * W

    eZ1 = (z / np.pi) ** 0.5 * (d - 1)
    eZ2 = (1 - z) ** (0.5 * (N_K - 1))

    tau = (g * A / (2 * np.pi)) * eZ1 * eZ2

    Psing = 1 - (1 - z) ** (0.5 * N_K)

    return 1 - Psing * np.exp(-tau)


[docs]def lomb_scargle_simple(t, y, dy, **kwargs):
    """
    Simple lomb-scargle interface for testing that
    things work on the GPU. Note: This will be
    substantially slower than working with the
    ``LombScargleAsyncProcess`` interface.
    """

    w = np.power(dy, -2)
    w /= sum(w)
    proc = LombScargleAsyncProcess()
    results = proc.run([(t, y, w)], **kwargs)

    freqs, powers = results[0]

    proc.finish()

    return freqs, powers
Source code for cuvarbase.lombscargle

cuvarbase

Navigation

Related Topics