Source code for TheCannon.model

from .dataset import Dataset
from .train_model import _train_model 
from .train_model import _train_model_new
from .train_model import _get_lvec
from .infer_labels import _infer_labels
from .helpers.corner import corner
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
from copy import deepcopy

plt.rc('text', usetex=True)
plt.rc('font', family='serif')

[docs]class CannonModel(object): def __init__(self, order, useErrors): self.coeffs = None self.scatters = None self.chisqs = None self.pivots = None self.scales = None self.new_tr_labels = None self.order = order self.model_spectra = None self.useErrors = useErrors
[docs] def model(self): """ Return the model definition or raise an error if not trained """ if self.coeffs is None: raise RuntimeError('Model not trained') else: return self.coeffs
[docs] def train(self, ds): """ Run training step: solve for best-fit spectral model """ if self.useErrors: self.coeffs, self.scatters, self.new_tr_labels, self.chisqs, self.pivots, self.scales = _train_model_new(ds) else: self.coeffs, self.scatters, self.chisqs, self.pivots, self.scales = _train_model(ds)
[docs] def diagnostics(self): """ Produce a set of diagnostics plots about the model. """ _model_diagnostics(self.dataset, self.model)
[docs] def infer_labels(self, ds, starting_guess = None): """ Uses the model to solve for labels of the test set, updates Dataset Then use those inferred labels to set the model.test_spectra attribute Parameters ---------- ds: Dataset Dataset that needs label inference Returns ------- errs_all: ndarray Covariance matrix of the fit """ return _infer_labels(self, ds, starting_guess)
[docs] def infer_spectra(self, ds): """ After inferring labels for the test spectra, infer the model spectra and update the dataset model_spectra attribute. Parameters ---------- ds: Dataset object """ lvec_all = _get_lvec(ds.test_label_vals, self.pivots, self.scales, derivs=False) self.model_spectra = np.dot(lvec_all, self.coeffs.T)
[docs] def plot_contpix(self, x, y, contpix_x, contpix_y, figname): """ Plot baseline spec with continuum pix overlaid Parameters ---------- """ fig, axarr = plt.subplots(2, sharex=True) plt.xlabel(r"Wavelength $\lambda (\AA)$") plt.xlim(min(x), max(x)) ax = axarr[0] ax.step(x, y, where='mid', c='k', linewidth=0.3, label=r'$\theta_0$' + "= the leading fit coefficient") ax.scatter(contpix_x, contpix_y, s=1, color='r', label="continuum pixels") ax.legend(loc='lower right', prop={'family':'serif', 'size':'small'}) ax.set_title("Baseline Spectrum with Continuum Pixels") ax.set_ylabel(r'$\theta_0$') ax = axarr[1] ax.step(x, y, where='mid', c='k', linewidth=0.3, label=r'$\theta_0$' + "= the leading fit coefficient") ax.scatter(contpix_x, contpix_y, s=1, color='r', label="continuum pixels") ax.set_title("Baseline Spectrum with Continuum Pixels, Zoomed") ax.legend(loc='upper right', prop={'family':'serif', 'size':'small'}) ax.set_ylabel(r'$\theta_0$') ax.set_ylim(0.95, 1.05) print("Diagnostic plot: fitted 0th order spec w/ cont pix") print("Saved as %s.png" % (figname)) plt.savefig(figname) plt.close()
[docs] def diagnostics_contpix(self, data, nchunks=10, fig = "baseline_spec_with_cont_pix"): """ Call plot_contpix once for each nth of the spectrum """ if data.contmask is None: print("No contmask set") else: coeffs_all = self.coeffs wl = data.wl baseline_spec = coeffs_all[:,0] contmask = data.contmask contpix_x = wl[contmask] contpix_y = baseline_spec[contmask] rem = len(wl)%nchunks wl_split = np.array(np.split(wl[0:len(wl)-rem],nchunks)) baseline_spec_split = np.array( np.split(baseline_spec[0:len(wl)-rem],nchunks)) nchunks = wl_split.shape[0] for i in range(nchunks): fig_chunk = fig + "_%s" %str(i) wl_chunk = wl_split[i,:] baseline_spec_chunk = baseline_spec_split[i,:] take = np.logical_and( contpix_x>wl_chunk[0], contpix_x<wl_chunk[-1]) self.plot_contpix( wl_chunk, baseline_spec_chunk, contpix_x[take], contpix_y[take], fig_chunk)
def diagnostics_leading_coeffs(self, ds): label_names = ds.get_plotting_labels() lams = ds.wl npixels = len(lams) pivots = self.pivots nlabels = len(pivots) chisqs = self.chisqs coeffs = self.coeffs scatters = self.scatters # Leading coefficients for each label & scatter fig, axarr = plt.subplots(nlabels+1, figsize=(8,8), sharex=True) ax1 = axarr[0] plt.subplots_adjust(hspace=0.001) nbins = len(ax1.get_xticklabels()) for i in range(1,nlabels+1): axarr[i].yaxis.set_major_locator( MaxNLocator(nbins=nbins, prune='upper')) plt.xlabel(r"Wavelength $\lambda (\AA)$", fontsize=14) plt.xlim(np.min(lams), np.max(lams)) plt.tick_params(axis='x', labelsize=14) axarr[0].set_title( "First-Order Fit Coeffs and Scatter from the Spectral Model", fontsize=14) axarr[0].locator_params(axis='x', nbins=10) first_order = np.zeros((len(coeffs[:,0]), nlabels)) for i in range(0, nlabels): ax = axarr[i] lbl = r'$%s$'%label_names[i] ax.set_ylabel(lbl, fontsize=14) ax.tick_params(axis='y', labelsize=14) ax.xaxis.grid(True) y = coeffs[:,i+1] first_order[:, i] = y ax.step(lams, y, where='mid', linewidth=0.5, c='k') ax.locator_params(axis='y', nbins=4) ax = axarr[nlabels] ax.tick_params(axis='y', labelsize=14) ax.set_ylabel("scatter", fontsize=14) top = np.max(scatters[scatters < 0.8]) stretch = np.std(scatters[scatters < 0.8]) ax.set_ylim(0, top + stretch) ax.step(lams, scatters, where='mid', c='k', linewidth=0.7) ax.xaxis.grid(True) ax.locator_params(axis='y', nbins=4) print("Diagnostic plot: leading coeffs and scatters across wavelength.") return fig def diagnostics_leading_coeffs_triangle(self, ds, figname = "leading_coeffs_triangle.png"): label_names = ds.get_plotting_labels() lams = ds.wl pivots = self.pivots npixels = len(lams) nlabels = len(pivots) chisqs = self.chisqs coeffs = self.coeffs first_order = coeffs[:,1:1+nlabels] scatters = self.scatters # triangle plot of the higher-order coefficients labels = [r"$%s$" % l for l in label_names] fig = corner(first_order, labels=labels, show_titles=True, title_args = {"fontsize":12}) filename = "leading_coeffs_triangle.png" print("Diagnostic plot: triangle plot of leading coefficients") fig.savefig(figname) print("Saved as %s" %figname) plt.close(fig)
[docs] def diagnostics_plot_chisq(self, ds, figname = "modelfit_chisqs.png"): """ Produce a set of diagnostic plots for the model Parameters ---------- (optional) chisq_dist_plot_name: str Filename of output saved plot """ label_names = ds.get_plotting_labels() lams = ds.wl pivots = self.pivots npixels = len(lams) nlabels = len(pivots) chisqs = self.chisqs coeffs = self.coeffs scatters = self.scatters # Histogram of the chi squareds of ind. stars plt.hist(np.sum(chisqs, axis=0), color='lightblue', alpha=0.7, bins=int(np.sqrt(len(chisqs)))) dof = len(lams) - coeffs.shape[1] # for one star plt.axvline(x=dof, c='k', linewidth=2, label="DOF") plt.legend() plt.title("Distribution of " + r"$\chi^2$" + " of the Model Fit") plt.ylabel("Count") plt.xlabel(r"$\chi^2$" + " of Individual Star") print("Diagnostic plot: histogram of the red chi squareds of the fit") print("Saved as %s" %figname) plt.savefig(figname) plt.close()
# convenient namings to match existing packages predict = _infer_labels fit = train