Source code for astroML.density_estimation.empirical

import numpy as np
from scipy import interpolate
from sklearn.utils import check_random_state


[docs]class FunctionDistribution:
    """Generate random variables distributed according to an arbitrary function

    Parameters
    ----------
    func : function
        func should take an array of x values, and return an array
        proportional to the probability density at each value
    xmin : float
        minimum value of interest
    xmax : float
        maximum value of interest
    Nx : int (optional)
        number of samples to draw.  Default is 1000
    random_state : None, int, or np.random.RandomState instance
        random seed or random number generator
    func_args : dictionary (optional)
        additional keyword arguments to be passed to func
    """
[docs]    def __init__(self, func, xmin, xmax, Nx=1000,
                 random_state=None, func_args=None):
        self.random_state = check_random_state(random_state)

        if func_args is None:
            func_args = {}

        x = np.linspace(xmin, xmax, Nx)
        Px = func(x, **func_args)

        # if there are too many zeros, interpolation will fail
        positive = (Px > 1E-10 * Px.max())
        x = x[positive]
        Px = Px[positive].cumsum()
        Px /= Px[-1]

        self._tck = interpolate.splrep(Px, x)

    def rvs(self, shape):
        """Draw random variables from the distribution

        Parameters
        ----------
        shape : integer or tuple
            shape of desired array

        Returns
        -------
        rv : ndarray, shape=shape
            random variables
        """
        # generate uniform variables between 0 and 1
        y = self.random_state.random_sample(shape)
        return interpolate.splev(y, self._tck)


[docs]class EmpiricalDistribution:
    """Empirically learn a distribution from one-dimensional data

    Parameters
    ----------
    data : one-dimensional array
        input data

    Examples
    --------
    >>> import numpy as np
    >>> np.random.seed(0)
    >>> x = np.random.normal(size=10000)  # normally-distributed variables
    >>> x.mean(), x.std()
    (-0.018433720158265783, 0.98755656817612003)
    >>> x2 = EmpiricalDistribution(x).rvs(10000)
    >>> x2.mean(), x2.std()
    (-0.020293716681613363, 1.0039249294845276)

    Notes
    -----
    This function works by approximating the inverse of the cumulative
    distribution using an efficient spline fit to the sorted values.
    """
[docs]    def __init__(self, data):
        # copy, because we'll need to sort in-place
        data = np.array(data, copy=True)
        if data.ndim != 1:
            raise ValueError("data should be one-dimensional")
        data.sort()

        # set up spline
        y = np.linspace(0, 1, data.size)
        self._tck = interpolate.splrep(y, data)

    def rvs(self, shape):
        """Draw random variables from the distribution

        Parameters
        ----------
        shape : integer or tuple
            shape of desired array

        Returns
        -------
        rv : ndarray, shape=shape
            random variables
        """
        # generate uniform variables between 0 and 1
        y = np.random.random(shape)
        return interpolate.splev(y, self._tck)