"""
Fast Lomb-Scargle Algorithm, following Press & Rybicki 1989
"""
from __future__ import print_function, division
__all__ = ['LombScargleFast']
import warnings
import numpy as np
from .lomb_scargle import LombScargle
# Precomputed factorials
FACTORIALS = [1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800]
def factorial(N):
"""Compute the factorial of N.
If N <= 10, use a fast lookup table; otherwise use scipy.special.factorial
"""
if N < len(FACTORIALS):
return FACTORIALS[N]
else:
from scipy import special
return int(special.factorial(N))
def bitceil(N):
"""
Find the bit (i.e. power of 2) immediately greater than or equal to N
Note: this works for numbers up to 2 ** 64.
Roughly equivalent to int(2 ** np.ceil(np.log2(N)))
"""
# Note: for Python 2.7 and 3.x, this is faster:
# return 1 << int(N - 1).bit_length()
N = int(N) - 1
for i in [1, 2, 4, 8, 16, 32]:
N |= N >> i
return N + 1
def extirpolate(x, y, N=None, M=4):
"""
Extirpolate the values (x, y) onto an integer grid range(N),
using lagrange polynomial weights on the M nearest points.
Parameters
----------
x : array_like
array of abscissas
y : array_like
array of ordinates
N : int
number of integer bins to use. For best performance, N should be larger
than the maximum of x
M : int
number of adjoining points on which to extirpolate.
Returns
-------
yN : ndarray
N extirpolated values associated with range(N)
Example
-------
>>> rng = np.random.RandomState(0)
>>> x = 100 * rng.rand(20)
>>> y = np.sin(x)
>>> y_hat = extirpolate(x, y)
>>> x_hat = np.arange(len(y_hat))
>>> f = lambda x: np.sin(x / 10)
>>> np.allclose(np.sum(y * f(x)), np.sum(y_hat * f(x_hat)))
True
Notes
-----
This code is based on the C implementation of spread() presented in
Numerical Recipes in C, Second Edition (Press et al. 1989; p.583).
"""
x, y = map(np.ravel, np.broadcast_arrays(x, y))
if N is None:
N = int(np.max(x) + 0.5 * M + 1)
# Now use legendre polynomial weights to populate the results array;
# This is an efficient recursive implementation (See Press et al. 1989)
result = np.zeros(N, dtype=y.dtype)
# first take care of the easy cases where x is an integer
integers = (x % 1 == 0)
np.add.at(result, x[integers].astype(int), y[integers])
x, y = x[~integers], y[~integers]
# For each remaining x, find the index describing the extirpolation range.
# i.e. ilo[i] < x[i] < ilo[i] + M with x[i] in the center,
# adjusted so that the limits are within the range 0...N
ilo = np.clip((x - M // 2).astype(int), 0, N - M)
numerator = y * np.prod(x - ilo - np.arange(M)[:, np.newaxis], 0)
denominator = factorial(M - 1)
for j in range(M):
if j > 0:
denominator *= j / (j - M)
ind = ilo + (M - 1 - j)
np.add.at(result, ind, numerator / (denominator * (x - ind)))
return result
def trig_sum(t, h, df, N, f0=0, freq_factor=1,
oversampling=5, use_fft=True, Mfft=4):
"""Compute (approximate) trigonometric sums for a number of frequencies
This routine computes weighted sine and cosine sums:
S_j = sum_i { h_i * sin(2 pi * f_j * t_i) }
C_j = sum_i { h_i * cos(2 pi * f_j * t_i) }
Where f_j = freq_factor * (f0 + j * df) for the values j in 1 ... N.
The sums can be computed either by a brute force O[N^2] method, or
by an FFT-based O[Nlog(N)] method.
Parameters
----------
t : array_like
array of input times
h : array_like
array weights for the sum
df : float
frequency spacing
N : int
number of frequency bins to return
f0 : float (optional, default=0)
The low frequency to use
freq_factor : float (optional, default=1)
Factor which multiplies the frequency
use_fft : bool
if True, use the approximate FFT algorithm to compute the result.
This uses the FFT with Press & Rybicki's Lagrangian extirpolation.
oversampling : int (default = 5)
oversampling freq_factor for the approximation; roughtly the number of
time samples across the highest-frequency sinusoid. This parameter
contains the tradeoff between accuracy and speed. Not referenced
if use_fft is False.
Mfft : int
The number of adjacent points to use in the FFT approximation.
Not referenced if use_fft is False.
Returns
-------
S, C : ndarrays
summation arrays for frequencies f = df * np.arange(1, N + 1)
"""
df *= freq_factor
f0 *= freq_factor
assert df > 0
t, h = map(np.ravel, np.broadcast_arrays(t, h))
if use_fft:
Mfft = int(Mfft)
assert(Mfft > 0)
# required size of fft is the power of 2 above the oversampling rate
Nfft = bitceil(N * oversampling)
t0 = t.min()
if f0 > 0:
h = h * np.exp(2j * np.pi * f0 * (t - t0))
tnorm = ((t - t0) * Nfft * df) % Nfft
grid = extirpolate(tnorm, h, Nfft, Mfft)
fftgrid = np.fft.ifft(grid)
if t0 != 0:
f = f0 + df * np.arange(Nfft)
fftgrid *= np.exp(2j * np.pi * t0 * f)
fftgrid = fftgrid[:N]
C = Nfft * fftgrid.real
S = Nfft * fftgrid.imag
else:
f = f0 + df * np.arange(N)
C = np.dot(h, np.cos(2 * np.pi * f * t[:, np.newaxis]))
S = np.dot(h, np.sin(2 * np.pi * f * t[:, np.newaxis]))
return S, C
def lomb_scargle_fast(t, y, dy=1, f0=0, df=None, Nf=None,
center_data=True, fit_offset=True,
use_fft=True, freq_oversampling=5, nyquist_factor=2,
trig_sum_kwds=None):
"""Compute a lomb-scargle periodogram for the given data
This implements both an O[N^2] method if use_fft==False, or an
O[NlogN] method if use_fft==True.
Parameters
----------
t, y, dy : array_like
times, values, and errors of the data points. These should be
broadcastable to the same shape. If dy is not specified, a
constant error will be used.
f0, df, Nf : (float, float, int)
parameters describing the frequency grid, f = f0 + df * arange(Nf).
Defaults, with T = t.max() - t.min():
- f0 = 0
- df is set such that there are ``freq_oversampling`` points per
peak width. ``freq_oversampling`` defaults to 5.
- Nf is set such that the highest frequency is ``nyquist_factor``
times the so-called "average Nyquist frequency".
``nyquist_factor`` defaults to 2.
Note that for unevenly-spaced data, the periodogram can be sensitive
to frequencies far higher than the average Nyquist frequency.
center_data : bool (default=True)
Specify whether to subtract the mean of the data before the fit
fit_offset : bool (default=True)
If True, then compute the floating-mean periodogram; i.e. let the mean
vary with the fit.
use_fft : bool (default=True)
If True, then use the Press & Rybicki O[NlogN] algorithm to compute
the result. Otherwise, use a slower O[N^2] algorithm
Other Parameters
----------------
freq_oversampling : float (default=5)
Oversampling factor for the frequency bins. Only referenced if
``df`` is not specified
nyquist_factor : float (default=2)
Parameter controlling the highest probed frequency. Only referenced
if ``Nf`` is not specified.
trig_sum_kwds : dict or None (optional)
extra keyword arguments to pass to the ``trig_sum`` utility.
Options are ``oversampling`` and ``Mfft``. See documentation
of ``trig_sum`` for details.
Notes
-----
Note that the ``use_fft=True`` algorithm is an approximation to the true
Lomb-Scargle periodogram, and as the number of points grows this
approximation improves. On the other hand, for very small datasets
(<~50 points or so) this approximation may not be useful.
References
----------
.. [1] Press W.H. and Rybicki, G.B, "Fast algorithm for spectral analysis
of unevenly sampled data". ApJ 1:338, p277, 1989
.. [2] M. Zechmeister and M. Kurster, A&A 496, 577-584 (2009)
.. [3] W. Press et al, Numerical Recipies in C (2002)
"""
# Validate and setup input data
t, y, dy = map(np.ravel, np.broadcast_arrays(t, y, dy))
w = 1. / (dy ** 2)
w /= w.sum()
# Validate and setup frequency grid
if df is None:
peak_width = 1. / (t.max() - t.min())
df = peak_width / freq_oversampling
if Nf is None:
avg_Nyquist = 0.5 * len(t) / (t.max() - t.min())
Nf = max(16, (nyquist_factor * avg_Nyquist - f0) / df)
Nf = int(Nf)
assert(df > 0)
assert(Nf > 0)
freq = f0 + df * np.arange(Nf)
# Center the data. Even if we're fitting the offset,
# this step makes the expressions below more succinct
if center_data or fit_offset:
y = y - np.dot(w, y)
# set up arguments to trig_sum
kwargs = dict.copy(trig_sum_kwds or {})
kwargs.update(f0=f0, df=df, use_fft=use_fft, N=Nf)
#----------------------------------------------------------------------
# 1. compute functions of the time-shift tau at each frequency
Sh, Ch = trig_sum(t, w * y, **kwargs)
S2, C2 = trig_sum(t, w, freq_factor=2, **kwargs)
if fit_offset:
S, C = trig_sum(t, w, **kwargs)
with warnings.catch_warnings():
# Filter "invalid value in divide" warnings for zero-frequency
if f0 == 0:
warnings.simplefilter("ignore")
tan_2omega_tau = (S2 - 2 * S * C) / (C2 - (C * C - S * S))
# fix NaN at zero frequency
if np.isnan(tan_2omega_tau[0]):
tan_2omega_tau[0] = 0
else:
tan_2omega_tau = S2 / C2
# slower/less stable way: we'll use trig identities instead
# omega_tau = 0.5 * np.arctan(tan_2omega_tau)
# S2w, C2w = np.sin(2 * omega_tau), np.cos(2 * omega_tau)
# Sw, Cw = np.sin(omega_tau), np.cos(omega_tau)
S2w = tan_2omega_tau / np.sqrt(1 + tan_2omega_tau * tan_2omega_tau)
C2w = 1 / np.sqrt(1 + tan_2omega_tau * tan_2omega_tau)
Cw = np.sqrt(0.5) * np.sqrt(1 + C2w)
Sw = np.sqrt(0.5) * np.sign(S2w) * np.sqrt(1 - C2w)
#----------------------------------------------------------------------
# 2. Compute the periodogram, following Zechmeister & Kurster
# and using tricks from Press & Rybicki.
YY = np.dot(w, y ** 2)
YC = Ch * Cw + Sh * Sw
YS = Sh * Cw - Ch * Sw
CC = 0.5 * (1 + C2 * C2w + S2 * S2w)
SS = 0.5 * (1 - C2 * C2w - S2 * S2w)
if fit_offset:
CC -= (C * Cw + S * Sw) ** 2
SS -= (S * Cw - C * Sw) ** 2
with warnings.catch_warnings():
# Filter "invalid value in divide" warnings for zero-frequency
if fit_offset and f0 == 0:
warnings.simplefilter("ignore")
power = (YC * YC / CC + YS * YS / SS) / YY
# fix NaN and INF at zero frequency
if np.isnan(power[0]) or np.isinf(power[0]):
power[0] = 0
return freq, power
[docs]class LombScargleFast(LombScargle):
"""Fast FFT-based Lomb-Scargle Periodogram Implementation
This implements the O[N log N] lomb-scargle periodogram, described in
Press & Rybicki (1989) [1].
To compute the periodogram via the fast algorithm, use the
``score_frequency_grid()`` method. The ``score()`` method and
``periodogram()`` method will default to the slower algorithm.
See Notes below for more information about the algorithm.
Parameters
----------
optimizer : PeriodicOptimizer instance
Optimizer to use to find the best period. If not specified, the
LinearScanOptimizer will be used.
center_data : boolean (default = True)
If True, then compute the weighted mean of the input data and subtract
before fitting the model.
fit_offset : boolean (default = True)
If True, then fit a floating-mean sinusoid model.
use_fft : boolean (default = True)
Specify whether to use the Press & Rybicki FFT algorithm to compute
the result
ls_kwds : dict
Dictionary of keywords to pass to the ``lomb_scargle_fast`` routine.
fit_period : bool (optional)
If True, then fit for the best period when fit() method is called.
optimizer_kwds : dict (optional)
Dictionary of keyword arguments for constructing the optimizer. For
example, silence optimizer output with `optimizer_kwds={"quiet": True}`.
silence_warnings : bool (default=False)
If False, then warn the user when doing silly things, like calling
``score()`` rather than ``score_frequency_grid()`` or fitting this to
small datasets (fewer than 50 points).
Examples
--------
>>> rng = np.random.RandomState(0)
>>> t = 100 * rng.rand(100)
>>> dy = 0.1
>>> omega = 10
>>> y = np.sin(omega * t) + dy * rng.randn(100)
>>> ls = LombScargleFast().fit(t, y, dy)
>>> ls.optimizer.period_range = (0.2, 1.2)
>>> ls.best_period
Finding optimal frequency:
- Estimated peak width = 0.0639
- Using 5 steps per peak; omega_step = 0.0128
- User-specified period range: 0.2 to 1.2
- Computing periods at 2051 steps
Zooming-in on 5 candidate peaks:
- Computing periods at 1000 steps
0.62826265739259146
>>> ls.predict([0, 0.5])
array([-0.02019474, -0.92910567])
Notes
-----
Currently, a NotImplementedError will be raised if both center_data
and fit_offset are False.
Note also that the fast algorithm is only an approximation to the true
Lomb-Scargle periodogram, and as the number of points grows this
approximation improves. On the other hand, for very small datasets
(<~50 points or so) this approximation may produce incorrect results
for some datasets.
See Also
--------
LombScargle
LombScargleAstroML
References
----------
.. [1] Press W.H. and Rybicki, G.B, "Fast algorithm for spectral analysis
of unevenly sampled data". ApJ 1:338, p277, 1989
"""
def __init__(self, optimizer=None, center_data=True, fit_offset=True,
use_fft=True, ls_kwds=None, Nterms=1,
fit_period=False, optimizer_kwds=None,
silence_warnings=False):
self.use_fft = use_fft
self.ls_kwds = ls_kwds
self.silence_warnings = silence_warnings
if Nterms != 1:
raise ValueError("LombScargleFast supports only Nterms = 1")
LombScargle.__init__(self, optimizer=optimizer,
center_data=center_data, fit_offset=fit_offset,
Nterms=1, regularization=None,
fit_period=fit_period,
optimizer_kwds=optimizer_kwds)
def _score_frequency_grid(self, f0, df, N):
if not self.silence_warnings and self.t.size < 50:
warnings.warn("For smaller datasets, the approximation used by "
"LombScargleFast may not be suitable.\n"
"It is recommended to use LombScargle instead.\n"
"To silence this warning, set "
"``silence_warnings=True``")
freq, P = lomb_scargle_fast(self.t, self.y, self.dy,
f0=f0, df=df, Nf=N,
center_data=self.center_data,
fit_offset=self.fit_offset,
use_fft=self.use_fft,
**(self.ls_kwds or {}))
return P
def _score(self, periods):
if not self.silence_warnings:
warnings.warn("The score() method defaults to a slower O[N^2] "
"algorithm.\nUse the score_frequency_grid() method "
"to access the fast FFT-based algorithm.\n"
"To silence this warning, set "
"``silence_warnings=True``")
return LombScargle._score(self, periods)