Source code for astroML.lumfunc

import numpy as np


def _sorted_interpolate(x, y, x_eval):
    """utility function for binned_Cminus"""
    # note that x should be sorted
    N = len(x)
    ind = x.searchsorted(x_eval)
    ind[ind == N] = N - 1

    y_eval = np.zeros(x_eval.shape)

    # find perfect matches
    match = (x[ind] == x_eval) | (x_eval > x[-1]) | (x_eval < x[0])
    y_eval[match] = y[ind[match]]

    ind = ind[~match]

    # take care of extrapolation
    ind[ind == 0] = 1

    x_lo = x[ind - 1]
    x_up = x[ind]

    y_lo = y[ind - 1]
    y_up = y[ind]

    # take care of places where x_lo = x_up

    y_eval[~match] = (y_lo + (x_eval[~match] - x_lo)
                      * (y_up - y_lo) / (x_up - x_lo))

    return y_eval


[docs]def Cminus(x, y, xmax, ymax):
    """Lynden-Bell's C-minus method

    Parameters
    ----------
    x : array_like
        array of x values
    y : array_like
        array of y values
    xmax : array_like
        array of maximum x values for each y value
    ymax : array_like
        array of maximum y values for each x value

    Returns
    -------
    Nx, Ny, cuml_x, cuml_y: ndarrays
        Nx and cuml_x are in the order of the sorted x array
        Ny and cuml_y are in the order of the sorted y array
    """
    # make copies of input
    x, y, xmax, ymax = map(np.array, (x, y, xmax, ymax))

    Nall = len(x)

    cuml_x = np.zeros(x.shape)
    cuml_y = np.zeros(y.shape)
    Nx = np.zeros(x.shape)
    Ny = np.zeros(y.shape)

    # first the y direction.
    i_sort = np.argsort(y)
    x = x[i_sort]
    y = y[i_sort]
    xmax = xmax[i_sort]
    ymax = ymax[i_sort]

    for j in range(1, Nall):
        # Making sure we don't divide with 0 later
        objects = np.sum(x[:j] < xmax[j])
        if objects:
            Ny[j] = objects
        else:
            Ny[j] = np.inf

    Ny[0] = np.inf
    cuml_y = np.cumprod(1. + 1. / Ny)

    Ny[np.isinf(Ny)] = 0

    # renormalize
    cuml_y *= Nall / cuml_y[-1]

    # now the x direction
    i_sort = np.argsort(x)
    x = x[i_sort]
    y = y[i_sort]
    xmax = xmax[i_sort]
    ymax = ymax[i_sort]

    for i in range(1, Nall):
        # Making sure we don't divide with 0 later
        objects = np.sum(y[:i] < ymax[i])
        if objects:
            Nx[i] = objects
        else:
            Nx[i] = np.inf

    Nx[0] = np.inf
    cuml_x = np.cumprod(1. + 1. / Nx)

    Nx[np.isinf(Nx)] = 0

    # renormalize
    cuml_x *= Nall / cuml_x[-1]

    return Nx, Ny, cuml_x, cuml_y


[docs]def binned_Cminus(x, y, xmax, ymax, xbins, ybins, normalize=False):
    """Compute the binned distributions using the Cminus method

    Parameters
    ----------
    x : array_like
        array of x values
    y : array_like
        array of y values
    xmax : array_like
        array of maximum x values for each y value
    ymax : array_like
        array of maximum y values for each x value
    xbins : array_like
        array of bin edges for the x function: size=Nbins_x + 1
    ybins : array_like
        array of bin edges for the y function: size=Nbins_y + 1
    normalize : boolean
        if true, then returned distributions are normalized.  Default
        is False.

    Returns
    -------
    dist_x, dist_y : ndarrays
        distributions of size Nbins_x and Nbins_y
    """
    Nx, Ny, cuml_x, cuml_y = Cminus(x, y, xmax, ymax)

    # simple linear interpolation using a binary search
    # interpolate the cumulative distributions
    x_sort = np.sort(x)
    y_sort = np.sort(y)

    Ix_edges = _sorted_interpolate(x_sort, cuml_x, xbins)
    Iy_edges = _sorted_interpolate(y_sort, cuml_y, ybins)

    if xbins[0] < x_sort[0]:
        Ix_edges[0] = cuml_x[0]
    if xbins[-1] > x_sort[-1]:
        Ix_edges[-1] = cuml_x[-1]

    if ybins[0] < y_sort[0]:
        Iy_edges[0] = cuml_y[0]
    if ybins[-1] > y_sort[-1]:
        Iy_edges[-1] = cuml_y[-1]

    x_dist = np.diff(Ix_edges) / np.diff(xbins)
    y_dist = np.diff(Iy_edges) / np.diff(ybins)

    if normalize:
        x_dist /= len(x)
        y_dist /= len(y)

    return x_dist, y_dist


[docs]def bootstrap_Cminus(x, y, xmax, ymax, xbins, ybins,
                     Nbootstraps=10, normalize=False):
    """
    Compute the binned distributions using the Cminus method, with
    bootstrapped estimates of the errors

    Parameters
    ----------
    x : array_like
        array of x values
    y : array_like
        array of y values
    xmax : array_like
        array of maximum x values for each y value
    ymax : array_like
        array of maximum y values for each x value
    xbins : array_like
        array of bin edges for the x function: size=Nbins_x + 1
    ybins : array_like
        array of bin edges for the y function: size=Nbins_y + 1
    Nbootstraps : int
        number of bootstrap resamplings to perform
    normalize : boolean
        if true, then returned distributions are normalized.  Default
        is False.

    Returns
    -------
    dist_x, err_x, dist_y, err_y : ndarrays
        distributions of size Nbins_x and Nbins_y
    """
    x, y, xmax, ymax = map(np.asarray, (x, y, xmax, ymax))

    x_dist = np.zeros((Nbootstraps, len(xbins) - 1))
    y_dist = np.zeros((Nbootstraps, len(ybins) - 1))

    for i in range(Nbootstraps):
        ind = np.random.randint(0, len(x), len(x))
        x_dist[i], y_dist[i] = binned_Cminus(x[ind], y[ind],
                                             xmax[ind], ymax[ind],
                                             xbins, ybins,
                                             normalize=normalize)

    return (x_dist.mean(0), x_dist.std(0, ddof=1),
            y_dist.mean(0), y_dist.std(0, ddof=1))