Source code for TheCannon.apogee

""" Functions for reading in APOGEE spectra and training labels """

from __future__ import (absolute_import, division, print_function,)
import numpy as np
import scipy.optimize as opt
import os
import sys
import matplotlib.pyplot as plt
from astropy.io import ascii
from TheCannon import *

# python 3 special
PY3 = sys.version_info[0] > 2
if not PY3:
    range = xrange

try:
    from astropy.io import fits as pyfits
except ImportError:
    import pyfits

[docs]def get_pixmask(fluxes, flux_errs):
    """ Create and return a bad pixel mask for an APOGEE spectrum

    Bad pixels are defined as follows: fluxes or errors are not finite, or 
    reported errors are <= 0, or fluxes are 0

    Parameters
    ----------
    fluxes: ndarray
        Flux data values 

    flux_errs: ndarray
        Flux uncertainty data values 

    Returns
    -------
    mask: ndarray
        Bad pixel mask, value of True corresponds to bad pixels
    """
    bad_flux = np.logical_or(~np.isfinite(fluxes), fluxes == 0)
    bad_err = (~np.isfinite(flux_errs)) | (flux_errs <= 0)
    bad_pix = bad_err | bad_flux
    return bad_pix


[docs]def get_starmask(ids, labels, aspcapflag, paramflag):
    """ Identifies which APOGEE objects have unreliable physical parameters,
    as laid out in Holzman et al 2015 and on the APOGEE DR12 website

    Parameters
    ----------
    data: np array
        all APOGEE DR12 IDs and labels

    Returns
    -------
    bad: np array
        mask where 1 corresponds to a star with unreliable parameters
    """
    # teff outside range (4000,6000) K and logg < 0
    teff = labels[0,:]
    bad_teff = np.logical_or(teff < 4000, teff > 6000)
    logg = labels[1,:]
    bad_logg = logg < 0
    cuts = bad_teff | bad_logg

    # STAR_WARN flag set (TEFF, LOGG, CHI2, COLORTE, ROTATION, SN)
    # M_H_WARN, ALPHAFE_WARN not included in the above, so do them separately
    star_warn = np.bitwise_and(aspcapflag, 2**7) != 0
    star_bad = np.bitwise_and(aspcapflag, 2**23) != 0
    feh_warn = np.bitwise_and(aspcapflag, 2**3) != 0
    alpha_warn = np.bitwise_and(aspcapflag, 2**4) != 0
    aspcapflag_bad = star_warn | star_bad | feh_warn | alpha_warn

    # separate element flags
    teff_flag = paramflag[:,0] != 0
    logg_flag = paramflag[:,1] != 0
    feh_flag = paramflag[:,3] != 0
    alpha_flag = paramflag[:,4] != 0
    paramflag_bad = teff_flag | logg_flag | feh_flag | alpha_flag

    return cuts | aspcapflag_bad | paramflag_bad 


[docs]def load_spectra(data_dir):
    """ Reads wavelength, flux, and flux uncertainty data from apogee fits files

    Parameters
    ----------
    data_dir: str
        Name of the directory containing all of the data files

    Returns
    -------
    wl: ndarray
        Rest-frame wavelength vector

    fluxes: ndarray
        Flux data values

    ivars: ndarray
        Inverse variance values corresponding to flux values
    """
    print("Loading spectra from directory %s" %data_dir)
    files = list(sorted([data_dir + "/" + filename
             for filename in os.listdir(data_dir) if filename.endswith('fits')]))
    nstars = len(files)  
    for jj, fits_file in enumerate(files):
        file_in = pyfits.open(fits_file)
        flux = np.array(file_in[1].data)
        if jj == 0:
            npixels = len(flux)
            fluxes = np.zeros((nstars, npixels), dtype=float)
            ivars = np.zeros(fluxes.shape, dtype=float)
            start_wl = file_in[1].header['CRVAL1']
            diff_wl = file_in[1].header['CDELT1']
            val = diff_wl * (npixels) + start_wl
            wl_full_log = np.arange(start_wl,val, diff_wl)
            wl_full = [10 ** aval for aval in wl_full_log]
            wl = np.array(wl_full)
        flux_err = np.array((file_in[2].data))
        badpix = get_pixmask(flux, flux_err)
        ivar = np.zeros(npixels)
        ivar[~badpix] = 1. / flux_err[~badpix]**2
        fluxes[jj,:] = flux
        ivars[jj,:] = ivar
    # convert filenames to actual IDs
    names = np.array([f.split('v304-')[1].split('.fits')[0] for f in files])
    print("Spectra loaded")
    # make sure they are numpy arrays
    return np.array(names), np.array(wl), np.array(fluxes), np.array(ivars)


[docs]def load_labels(filename):
    """ Extracts reference labels from a file

    Parameters
    ----------
    filename: str
        Name of the file containing the table of reference labels

    ids: array
        The IDs of stars to retrieve labels for

    Returns
    -------
    labels: ndarray
        Reference label values for all reference objects
    """
    print("Loading reference labels from file %s" %filename)
    data = ascii.read(filename)
    ids = data['ID']
    inds = ids.argsort()
    ids = ids[inds]
    teff = data['Teff_{corr}']
    teff = teff[inds]
    logg = data['logg_{corr}']
    logg = logg[inds]
    mh = data['[M/H]_{corr}']
    mh = mh[inds]
    return np.vstack((teff,logg,mh)).T 


[docs]def continuum_normalize_training(ds):
    """ 
    Continuum normalize the training set, using an iterative Cannon approach

    Parameters
    ----------
    ds: dataset object

    Returns
    -------
    updated dataset object
    
    """

    # To initialize the continuum-pixel determination, we define a
    # preliminary pseudo-continuum-normalization by using a polynomial
    # fit to an upper quantile (in this case 90%) of the spectra, determined
    # from a running median
    # this is SNR-dependent
    pseudo_tr_flux, pseudo_tr_ivar = ds.continuum_normalize_training_q(
            q=0.90, delta_lambda=50)
    ds.tr_flux = pseudo_tr_flux
    ds.tr_ivar = pseudo_tr_ivar

    # Run the training step
    m = model.CannonModel(2)
    m.fit(ds)

    # Baseline spectrum
    baseline_spec = m.coeffs[:,0]

    # Flux cut: 1 +/- 0.15 (0.985 - 1.015)
    fcut = (np.abs(baseline_spec - 1) <= 0.15)

    # Smallest percentiles of the first order coefficients
    ccut_1 = np.logical_and(
            np.abs(m.coeffs[:,1]) < 1.0e-5, np.abs(m.coeffs[:,1] > 0))
    ccut_2 = np.logical_and(
            np.abs(m.coeffs[:,2]) < 0.0045, np.abs(m.coeffs[:,2] > 0))
    ccut_3 = np.logical_and(
            np.abs(m.coeffs[:,3]) < 0.0085, np.abs(m.coeffs[:,3] > 0))
    ccut12 = np.logical_and(ccut_1, ccut_2)
    ccut = np.logical_and(ccut12, ccut_3)

    # Choose the continuum pixels
    contpix = np.logical_and(fcut, ccut)
    ds.set_continuum(contpix)

    # Fit a sinusoid to these pixels, using the inverse variance weighting
    # Adding an additional error term that is set to 0 for continuum pixels
    # and a large error value for all other pixels so that the new error 
    # term becomes 
    #err2 = err1 + err[0 OR LARGE]
    cont = dataset.fit_continuum(3, "sinusoid")
    norm_tr_flux, norm_tr_ivar, norm_test_flux, norm_test_ivar = \
            dataset.continuum_normalize(cont)
   
    ds.tr_flux = norm_tr_flux
    ds.tr_ivar = norm_tr_ivar
    ds.test_flux = norm_test_flux
    ds.test_ivar = norm_test_ivar

    return ds

if  __name__ =='__main__':
    make_apogee_label_file()