Source code for astroML.datasets.sdss_specgals

import os

import numpy as np
from astropy.cosmology import FlatLambdaCDM
from astropy.table import Table, vstack

from . import get_data_home

# We store the data in two parts to comply with GitHub 100Mb file size limit
DATA_URL1 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
             "SDSSspecgalsDR8_1.fit.gz")
DATA_URL2 = ("https://github.com/astroML/astroML-data/raw/main/datasets/"
             "SDSSspecgalsDR8_2.fit.gz")


[docs]def fetch_sdss_specgals(data_home=None, download_if_missing=True): """Loader for SDSS Galaxies with spectral information Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all astroML data is stored in '~/astroML_data'. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. Returns ------- data : recarray, shape = (661598,) record array containing pipeline parameters Notes ----- These were compiled from the SDSS database using the following SQL query:: SELECT G.ra, G.dec, S.mjd, S.plate, S.fiberID, --- basic identifiers --- basic spectral data S.z, S.zErr, S.rChi2, S.velDisp, S.velDispErr, --- some useful imaging parameters G.extinction_r, G.petroMag_r, G.psfMag_r, G.psfMagErr_r, G.modelMag_u, modelMagErr_u, G.modelMag_g, modelMagErr_g, G.modelMag_r, modelMagErr_r, G.modelMag_i, modelMagErr_i, G.modelMag_z, modelMagErr_z, G.petroR50_r, G.petroR90_r, --- line fluxes for BPT diagram and other derived spec. parameters GSL.nii_6584_flux, GSL.nii_6584_flux_err, GSL.h_alpha_flux, GSL.h_alpha_flux_err, GSL.oiii_5007_flux, GSL.oiii_5007_flux_err, GSL.h_beta_flux, GSL.h_beta_flux_err, GSL.h_delta_flux, GSL.h_delta_flux_err, GSX.d4000, GSX.d4000_err, GSE.bptclass, GSE.lgm_tot_p50, GSE.sfr_tot_p50, G.objID, GSI.specObjID INTO mydb.SDSSspecgalsDR8 FROM SpecObj S CROSS APPLY dbo.fGetNearestObjEQ(S.ra, S.dec, 0.06) N, Galaxy G, GalSpecInfo GSI, GalSpecLine GSL, GalSpecIndx GSX, GalSpecExtra GSE WHERE N.objID = G.objID AND GSI.specObjID = S.specObjID AND GSL.specObjID = S.specObjID AND GSX.specObjID = S.specObjID AND GSE.specObjID = S.specObjID --- add some quality cuts to get rid of obviously bad measurements AND (G.petroMag_r > 10 AND G.petroMag_r < 18) AND (G.modelMag_u-G.modelMag_r) > 0 AND (G.modelMag_u-G.modelMag_r) < 6 AND (modelMag_u > 10 AND modelMag_u < 25) AND (modelMag_g > 10 AND modelMag_g < 25) AND (modelMag_r > 10 AND modelMag_r < 25) AND (modelMag_i > 10 AND modelMag_i < 25) AND (modelMag_z > 10 AND modelMag_z < 25) AND S.rChi2 < 2 AND (S.zErr > 0 AND S.zErr < 0.01) AND S.z > 0.02 --- end of query --- Examples -------- >>> from astroML.datasets import fetch_sdss_specgals >>> data = fetch_sdss_specgals() # doctest: +IGNORE_OUTPUT +REMOTE_DATA >>> # number of objects in dataset >>> data.shape # doctest: +REMOTE_DATA (661598,) >>> # first five column names >>> data.dtype.names[:5] # doctest: +REMOTE_DATA ('ra', 'dec', 'mjd', 'plate', 'fiberID') >>> # first three RA values >>> print(data['ra'][:3]) # doctest: +REMOTE_DATA [ 146.71419105 146.74414186 146.62857334] >>> # first three declination values >>> print(data['dec'][:3]) # doctest: +REMOTE_DATA [-1.04127639 -0.6522198 -0.7651468 ] """ data_home = get_data_home(data_home) archive_file1 = os.path.join(data_home, os.path.basename(DATA_URL1)) archive_file2 = os.path.join(data_home, os.path.basename(DATA_URL2)) if not (os.path.exists(archive_file1) and os.path.exists(archive_file2)): if not download_if_missing: raise IOError('data not present on disk. ' 'set download_if_missing=True to download') for url, name in zip([DATA_URL1, DATA_URL2], [archive_file1, archive_file2]): data = Table.read(url) data.write(name) data1 = Table.read(archive_file1) data2 = Table.read(archive_file2) data = vstack([data1, data2]) return np.asarray(data)
[docs]def fetch_great_wall(data_home=None, download_if_missing=True, xlim=(-375, -175), ylim=(-300, 200), cosmo=None): """Get the 2D SDSS "Great Wall" distribution, following Cowan et al 2008 Parameters ---------- data_home : optional, default=None Specify another download and cache folder for the datasets. By default all astroML data is stored in '~/astroML_data'. download_if_missing : optional, default=True If False, raise a IOError if the data is not locally available instead of trying to download the data from the source site. xlim, ylim : tuples or None the limits in Mpc of the data: default values are the same as that used for the plots in Cowan 2008. If set to None, no cuts will be performed. cosmo : `astropy.cosmology` instance specifying cosmology to use when generating the sample. If not provided, a Flat Lambda CDM model with H0=73.2, Om0=0.27, Tcmb0=0 is used. Returns ------- data : ndarray, shape = (Ngals, 2) grid of projected (x, y) locations of galaxies in Mpc """ # local imports so we don't need dependencies for loading module from scipy.interpolate import interp1d # We need some cosmological information to compute the r-band # absolute magnitudes. if cosmo is None: cosmo = FlatLambdaCDM(H0=73.2, Om0=0.27, Tcmb0=0) data = fetch_sdss_specgals(data_home, download_if_missing) # cut to the part of the sky with the "great wall" data = data[(data['dec'] > -7) & (data['dec'] < 7)] data = data[(data['ra'] > 80) & (data['ra'] < 280)] # do a redshift cut, following Cowan et al 2008 z = data['z'] data = data[(z > 0.01) & (z < 0.12)] # first sample the distance modulus on a grid zgrid = np.linspace(min(data['z']), max(data['z']), 100) mugrid = cosmo.distmod(zgrid).value f = interp1d(zgrid, mugrid) mu = f(data['z']) # do an absolute magnitude cut at -20 Mr = data['petroMag_r'] + data['extinction_r'] - mu data = data[Mr < -21] # compute distances in the equatorial plane # first sample comoving distance Dcgrid = cosmo.comoving_distance(zgrid).value f = interp1d(zgrid, Dcgrid) dist = f(data['z']) locs = np.vstack([dist * np.cos(data['ra'] * np.pi / 180.), dist * np.sin(data['ra'] * np.pi / 180.)]).T # cut on x and y limits if specified if xlim is not None: locs = locs[(locs[:, 0] > xlim[0]) & (locs[:, 0] < xlim[1])] if ylim is not None: locs = locs[(locs[:, 1] > ylim[0]) & (locs[:, 1] < ylim[1])] return locs