def main(): # TODO: bad, hard-coded... # base_path = '/Volumes/ProjectData/gaia-comoving-followup/' base_path = '../data/' db_path = path.join(base_path, 'db.sqlite') engine = db_connect(db_path) session = Session() credentials = dict(user='******', password='******') Gaia.login(**credentials) for obs in session.query(Observation).all(): q = session.query(Photometry).join(Observation).filter(Observation.id == obs.id).count() if q > 0: logger.debug('Photometry already exists') continue if obs.tgas_source is None: continue tgas_source_id = obs.tgas_source.source_id res = get_photometry(tgas_source_id) phot_kw = dict() for col in result_columns: phot_kw[col] = res[col] phot = Photometry(**phot_kw) phot.observation = obs session.add(phot) session.commit()
def get_corrected_rv(self, obs): """Compute a corrected radial velocity for the given observation""" # Compute the raw offset: difference between Halpha centroid and true # wavelength value x0 = obs.measurements[0].x0 * u.angstrom raw_offset = (x0 - self.Halpha) # precision estimate from line centroid error precision = (obs.measurements[0].x0_error * u.angstrom) / self.Halpha * c.to(u.km / u.s) # For each sky line (that passes certain quality checks), compute the # offset between the predicted wavelength and measured centroid # TODO: generalize these quality cuts - see also above in # _compute_offset_corrections sky_offsets = np.full(3, np.nan) * u.angstrom for j, meas in enumerate(obs.measurements[1:]): sky_offset = meas.x0 * u.angstrom - meas.info.wavelength if (meas.amp > 16 and meas.std_G < 2 and meas.std_G > 0.3 and np.abs(sky_offset) < 3.3 * u.angstrom): # MAGIC NUMBER: quality cuts sky_offsets[j] = sky_offset # final sky offset to apply flag = 0 sky_offset = np.nanmean(sky_offsets) if np.isnan(sky_offset.value): logger.debug("not correcting with sky line for {0}".format(obs)) sky_offset = 0 * u.angstrom flag = 1 # apply global sky offset correction - see _compute_offset_corrections() sky_offset -= self._night_polys[obs.night](obs.utc_hour) * u.angstrom # compute radial velocity and correct for sky line rv = (raw_offset - sky_offset) / self.Halpha * c.to(u.km / u.s) # correct for offset of median of ∆RV distribution from targets with # prior/known RV's rv -= self._night_final_offsets[obs.night] # rv error err = np.sqrt(self._abs_err**2 + precision**2) return rv, err, flag
def main(db_path, run_name, overwrite=False, pool=None): if pool is None: pool = schwimmbad.SerialPool() # connect to the database engine = db_connect(db_path) # engine.echo = True logger.debug("Connected to database at '{}'".format(db_path)) # create a new session for interacting with the database session = Session() root_path, _ = path.split(db_path) plot_path = path.join(root_path, 'plots', run_name) if not path.exists(plot_path): os.makedirs(plot_path, exist_ok=True) # get object to correct the observed RV's rv_corr = RVCorrector(session, run_name) observations = session.query(Observation).join(Run)\ .filter(Run.name == run_name).all() for obs in observations: q = session.query(RVMeasurement).join(Observation)\ .filter(Observation.id == obs.id) if q.count() > 0 and not overwrite: logger.debug('RV measurement already complete for object ' '{0} in file {1}'.format(obs.object, obs.filename_raw)) continue elif q.count() > 1: raise RuntimeError( 'Multiple RV measurements found for object {0}'.format(obs)) elif len(obs.measurements) == 0: logger.debug( 'Observation {0} has no line measurements.'.format(obs)) continue corrected_rv, err, flag = rv_corr.get_corrected_rv(obs) # remove previous RV measurements if q.count() > 0: session.delete(q.one()) session.commit() rv_meas = RVMeasurement(rv=corrected_rv, err=err, flag=flag) rv_meas.observation = obs session.add(rv_meas) session.commit() pool.close()
def work(self, id): engine = db_connect(self.db_path) session = Session() obs = session.query(Observation).filter(Observation.id == id).one() model = obs_to_starmodel(obs) # initial conditions for emcee walkers p0 = [] m0, age0, feh0 = model.ic.random_points(self.nwalkers, minmass=0.01, maxmass=10., minfeh=-1, maxfeh=1) _, max_distance = model.bounds('distance') _, max_AV = model.bounds('AV') d0 = 10**(np.random.uniform(0, np.log10(max_distance), size=self.nwalkers)) AV0 = np.random.uniform(0, max_AV, size=self.nwalkers) p0 += [m0] p0 += [age0, feh0, d0, AV0] p0 = np.array(p0).T npars = p0.shape[1] logger.debug('Running emcee - initial sampling...') sampler = emcee.EnsembleSampler(self.nwalkers, npars, model.lnpost) pos, prob, _ = sampler.run_mcmc(p0, self.ninit) # cull the weak walkers best_ix = sampler.flatlnprobability.argmax() best_p0 = (sampler.flatchain[best_ix][None] + np.random.normal(0, 1E-5, size=(self.nwalkers, npars))) sampler.reset() logger.debug('burn-in...') pos, prob, _ = sampler.run_mcmc(best_p0, self.nburn) sampler.reset() logger.debug('sampling...') _ = sampler.run_mcmc(pos, self.niter) model._sampler = sampler model._make_samples(0.01) return id, model
def main(db_path, run_root_path, drop_all=False, overwrite=False, **kwargs): # Make sure the specified paths actually exist db_path = path.abspath(db_path) run_root_path = path.abspath(run_root_path) for path_ in [path.dirname(db_path), run_root_path]: if not path.exists(path_): raise ValueError("Path '{0}' doesn't exist!".format(path_)) # -------------------------------------------------------------------------- # These are relative paths, so the script needs to be run from the # scripts path... # ID table for mapping group index to TGAS row ID_tbl = Table.read('../data/star_identifier.csv') # TGAS table logger.debug("Loading TGAS data...") tgas = Table.read('../../gaia-comoving-stars/data/stacked_tgas.fits') # Catalog of velocities for Bensby's HIP stars: bensby = Table.read('../data/bensbyrv_bestunique.csv') # -------------------------------------------------------------------------- # connect to the database engine = db_connect(db_path, ensure_db_exists=True) # engine.echo = True logger.debug("Connected to database at '{}'".format(db_path)) if drop_all: # remove all tables and replace Base.metadata.drop_all() Base.metadata.create_all() # create a new session for interacting with the database session = Session() logger.debug("Loading SpectralLineInfo table") line_info = OrderedDict() # air wavelength of Halpha -- wavelength calibration from comp lamp is done # at air wavelengths, so this is where Halpha should be, right? line_info['Halpha'] = 6562.8*u.angstrom # [OI] emission lines -- wavelengths from: # http://physics.nist.gov/PhysRefData/ASD/lines_form.html line_info['[OI] 5577'] = 5577.3387*u.angstrom line_info['[OI] 6300'] = 6300.304*u.angstrom line_info['[OI] 6364'] = 6363.776*u.angstrom for name, wvln in line_info.items(): n = session.query(SpectralLineInfo).filter(SpectralLineInfo.name == name).count() if n == 0: logger.debug('Loading line {0} at {1}'.format(name, wvln)) line = SpectralLineInfo(name=name, wavelength=wvln) session.add(line) session.commit() else: logger.debug('Line {0} already loaded'.format(name)) # Create an entry for this observing run data_path, run_name = path.split(run_root_path) logger.info("Path to night paths: {0}".format(data_path)) n = session.query(Run).filter(Run.name == run_name).count() if n == 0: logger.debug('Adding run {0} to database'.format(run_name)) run = Run(name=run_name) session.add(run) session.commit() elif n == 1: logger.debug('Loading run from database'.format(run_name)) run = session.query(Run).filter(Run.name == run_name).limit(1).one() else: raise RuntimeError("F**k.") # Now we need to go through each processed night of data and load all of the # relevant observations of sources. # First we get the column names for the Observation and TGASSource tables obs_columns = [str(c).split('.')[1] for c in Observation.__table__.columns] tgassource_columns = [str(c).split('.')[1] for c in TGASSource.__table__.columns] # Here's where there's a bit of hard-coded bewitchery - the nights (within # each run) have to be labeled 'n1', 'n2', and etc. Sorry. glob_pattr_proc = path.join(data_path, 'processed', run_name, 'n?') for proc_night_path in glob.glob(glob_pattr_proc): night = path.basename(proc_night_path) night_id = int(night[1]) logger.debug('Loading night {0}...'.format(night_id)) observations = [] tgas_sources = [] prior_rvs = [] glob_pattr_1d = path.join(proc_night_path, '1d_*.fit') for path_1d in ProgressBar(glob.glob(glob_pattr_1d)): hdr = fits.getheader(path_1d) # skip all except OBJECT observations if hdr['IMAGETYP'] != 'OBJECT': continue basename = path.basename(path_1d)[3:] logger.log(1, 'loading row for {0}'.format(basename)) kw = dict() # construct filenames using hard-coded bullshit kw['filename_raw'] = basename kw['filename_p'] = 'p_' + basename kw['filename_1d'] = '1d_' + basename # check if this filename is already in the database, if so, drop it base_query = session.query(Observation)\ .filter(Observation.filename_raw == kw['filename_raw']) already_loaded = base_query.count() > 0 if already_loaded and overwrite: base_query.delete() session.commit() elif already_loaded: logger.debug('Object {0} [{1}] already loaded' .format(hdr['OBJECT'], path.basename(kw['filename_raw']))) continue # read in header of 1d file and store keywords that exist as columns kw.update(fits_header_to_cols(hdr, obs_columns)) # HACK: skip empty object name if len(str(hdr['OBJECT'])) == 0: logger.warning('SKIPPING - empty OBJECT key') continue # get group id from object name if '-' in str(hdr['OBJECT']): # Per APW and SMOH's convention split_name = hdr['OBJECT'].split('-') kw['group_id'] = int(split_name[0]) # because: reasons if kw['group_id'] == 10: tgas_row_idx = int(split_name[1]) else: smoh_idx = int(split_name[1]) tgas_row_idx = ID_tbl[smoh_idx]['tgas_row'] tgas_row = tgas[tgas_row_idx] # query Simbad to get all possible names for this target if tgas_row['hip'] > 0: object_name = 'HIP{0}'.format(tgas_row['hip']) else: object_name = 'TYC {0}'.format(tgas_row['tycho2_id']) logger.log(1, 'common name: {0}'.format(object_name)) try: all_ids = Simbad.query_objectids(object_name)['ID'].astype(str) except Exception as e: logger.warning('Simbad query_objectids failed for "{0}" ' 'with error: {1}' .format(object_name, str(e))) all_ids = [] logger.log(1, 'this is a group object') if len(all_ids) > 0: logger.log(1, 'other names for this object: {0}' .format(', '.join(all_ids))) else: logger.log(1, 'simbad names for this object could not be ' 'retrieved') elif (isinstance(hdr['OBJECT'], int) or str(hdr['OBJECT']).startswith('k') or hdr['OBJECT'][0].isdigit()): # Assume it's a KIC number - per Ruth and Dan's convention if isinstance(hdr['OBJECT'], int): object_name = 'KIC {0:d}'.format(hdr['OBJECT']) elif hdr['OBJECT'].startswith('k'): object_name = 'KIC {0}'.format(hdr['OBJECT'][1:]) else: object_name = 'KIC {0}'.format(hdr['OBJECT']) # query Simbad to get all possible names for this target logger.log(1, 'common name: {0}'.format(object_name)) try: all_ids = Simbad.query_objectids(object_name)['ID'].astype(str) except Exception as e: logger.warning('Simbad query_objectids failed for "{0}" ' 'with error: {1}' .format(object_name, str(e))) all_ids = [] logger.log(1, 'this is a KIC object') if len(all_ids) > 0: logger.log(1, 'other names for this object: {0}' .format(', '.join(all_ids))) else: logger.log(1, 'simbad names for this object could not be ' 'retrieved') # get the Tycho 2 ID, if it has one hip_id = [id_ for id_ in all_ids if 'HIP' in id_] tyc_id = [id_ for id_ in all_ids if 'TYC' in id_] if hip_id: hip_id = int(hip_id[0].replace('HIP', '').strip()) logger.log(1, 'source has HIP id: {0}'.format(hip_id)) tgas_row_idx = np.where(tgas['hip'] == hip_id)[0] if len(tgas_row_idx) == 0: tgas_row_idx = None else: tgas_row = tgas[tgas_row_idx] elif tyc_id: tyc_id = tyc_id[0].replace('TYC', '').strip() logger.log(1, 'source has tycho 2 id: {0}'.format(tyc_id)) tgas_row_idx = np.where(tgas['tycho2_id'] == tyc_id)[0] if len(tgas_row_idx) == 0: tgas_row_idx = None else: tgas_row = tgas[tgas_row_idx] else: logger.log(1, 'source has no HIP or TYC id.') tgas_row_idx = None # result_table = Simbad.query_object(object_name) else: object_name = hdr['OBJECT'] logger.log(1, 'common name: {0}'.format(object_name)) logger.log(1, 'this is not a group object') # query Simbad to get all possible names for this target try: all_ids = Simbad.query_objectids(object_name)['ID'].astype(str) except Exception as e: logger.warning('SKIPPING: Simbad query_objectids failed for ' '"{0}" with error: {1}' .format(object_name, str(e))) continue # get the Tycho 2 ID, if it has one hip_id = [id_ for id_ in all_ids if 'HIP' in id_] tyc_id = [id_ for id_ in all_ids if 'TYC' in id_] if hip_id: hip_id = int(hip_id[0].replace('HIP', '').strip()) logger.log(1, 'source has HIP id: {0}'.format(hip_id)) tgas_row_idx = np.where(tgas['hip'] == hip_id)[0] if len(tgas_row_idx) == 0: tgas_row_idx = None else: tgas_row = tgas[tgas_row_idx] elif tyc_id: tyc_id = tyc_id[0].replace('TYC', '').strip() logger.log(1, 'source has tycho 2 id: {0}'.format(tyc_id)) tgas_row_idx = np.where(tgas['tycho2_id'] == tyc_id)[0] if len(tgas_row_idx) == 0: tgas_row_idx = None else: tgas_row = tgas[tgas_row_idx] else: logger.log(1, 'source has no tycho 2 id.') tgas_row_idx = None # store relevant names / IDs simbad_info_kw = dict() for id_ in all_ids: if id_.lower().startswith('hd'): simbad_info_kw['hd_id'] = id_[2:] elif id_.lower().startswith('hip'): simbad_info_kw['hip_id'] = id_[3:] elif id_.lower().startswith('tyc'): simbad_info_kw['tyc_id'] = id_[3:] elif id_.lower().startswith('2mass'): simbad_info_kw['twomass_id'] = id_[5:] for k,v in simbad_info_kw.items(): simbad_info_kw[k] = v.strip() simbad_info = SimbadInfo(**simbad_info_kw) # Compute barycenter velocity given coordinates of where the # telescope was pointing and observation time t = Time(hdr['JD'], format='jd', scale='utc') sc = coord.SkyCoord(ra=hdr['RA'], dec=hdr['DEC'], unit=(u.hourangle, u.degree)) kw['v_bary'] = bary_vel_corr(t, sc, location=kitt_peak) obs = Observation(night=night_id, **kw) obs.run = run # Get the TGAS data if the source is in TGAS if tgas_row_idx is not None: logger.log(1, 'TGAS row: {0}'.format(tgas_row_idx)) tgas_kw = dict() tgas_kw['row_index'] = tgas_row_idx for name in tgas.colnames: if name in tgassource_columns: tgas_kw[name] = tgas_row[name] job = Gaia.launch_job(gaia_query.format(tgas_kw['source_id'][0]), dump_to_file=False) res = job.get_results() if len(res) == 0: logger.warning("No 2MASS data found for: {0}" .format(tgas_kw['source_id'])) elif len(res) == 1: tgas_kw['J'] = res['j_m'][0] tgas_kw['J_err'] = res['j_msigcom'][0] tgas_kw['H'] = res['h_m'][0] tgas_kw['H_err'] = res['h_msigcom'][0] tgas_kw['Ks'] = res['ks_m'][0] tgas_kw['Ks_err'] = res['ks_msigcom'][0] tgas_source = TGASSource(**tgas_kw) tgas_sources.append(tgas_source) obs.tgas_source = tgas_source else: logger.log(1, 'TGAS row could not be found.') obs.simbad_info = simbad_info observations.append(obs) # retrieve a previous measurement from the literature result = get_best_rv(obs) if result is not None: rv, rv_err, rv_qual, rv_bibcode, rv_source = result prv = PriorRV(rv=rv*u.km/u.s, err=rv_err*u.km/u.s, qual=rv_qual, bibcode=rv_bibcode, source=rv_source) obs.prior_rv = prv prior_rvs.append(prv) logger.log(1, '-'*68) session.add_all(observations) session.add_all(tgas_sources) session.add_all(prior_rvs) session.commit() # Last thing to do is cross-match with the Bensby catalog to # replace velocities when they are better for sim_info in session.query(SimbadInfo)\ .filter(SimbadInfo.hip_id != None).all(): hip_id = 'HIP' + str(sim_info.hip_id) row = bensby[bensby['OBJECT'] == hip_id] if len(row) > 0: sim_info.rv = row['velValue'] sim_info.rv_qual = row['quality'] sim_info.rv_bibcode = row['bibcode'] session.flush() session.close()
def main(db_path, run_name, data_root_path=None, filename=None, overwrite=False, pool=None): if pool is None: pool = schwimmbad.SerialPool() # connect to the database engine = db_connect(db_path) # engine.echo = True logger.debug("Connected to database at '{}'".format(db_path)) # create a new session for interacting with the database session = Session() root_path, _ = path.split(db_path) if data_root_path is None: data_root_path = root_path plot_path = path.join(root_path, 'plots', run_name) if not path.exists(plot_path): os.makedirs(plot_path, exist_ok=True) # TODO: there might be some bugs here... n_lines = session.query(SpectralLineInfo).count() Halpha = session.query(SpectralLineInfo)\ .filter(SpectralLineInfo.name == 'Halpha').one() OI_lines = session.query(SpectralLineInfo)\ .filter(SpectralLineInfo.name.contains('[OI]')).all() if filename is None: # grab all unfinished sources observations = session.query(Observation).join(Run)\ .filter(Run.name == run_name).all() else: # only process the observation corresponding to this filename observations = session.query(Observation).join(Run)\ .filter(Run.name == run_name)\ .filter(Observation.filename_raw == filename).all() for obs in observations: measurements = session.query(SpectralLineMeasurement)\ .join(Observation)\ .filter(Observation.id == obs.id).all() if len(measurements) == n_lines and not overwrite: logger.debug('All line measurements already complete for object ' '{0} in file {1}'.format(obs.object, obs.filename_raw)) continue # Read the spectrum data and get wavelength solution filebase, _ = path.splitext(obs.filename_1d) filename_1d = obs.path_1d(data_root_path) spec = Table.read(filename_1d) logger.debug('Loaded 1D spectrum for object {0} from file {1}'.format( obs.object, filename_1d)) # Extract region around Halpha x, (flux, ivar) = extract_region( spec['wavelength'], center=Halpha.wavelength.value, width=100, arrs=[spec['source_flux'], spec['source_ivar']]) # We start by doing maximum likelihood estimation to fit the line, then # use the best-fit parameters to initialize an MCMC run. # TODO: need to figure out if it's emission or absorption...for now just # assume absorption absorp_emiss = -1. lf = VoigtLineFitter(x, flux, ivar, absorp_emiss=absorp_emiss) lf.fit() fit_pars = lf.get_gp_mean_pars() if (not lf.success or abs(fit_pars['x0'] - Halpha.wavelength.value) > 16. or # 16 Å = ~700 km/s abs(fit_pars['amp']) < 10): # minimum amplitude - MAGIC NUMBER # TODO: should try again with emission line logger.error('absorption line has tiny amplitude! did ' 'auto-determination of absorption/emission fail?') # TODO: what now? continue fig = lf.plot_fit() fig.savefig(path.join(plot_path, '{}_maxlike.png'.format(filebase)), dpi=256) plt.close(fig) # ---------------------------------------------------------------------- # Run `emcee` instead to sample over GP model parameters: if fit_pars['std_G'] < 1E-2: lf.gp.freeze_parameter('mean:ln_std_G') initial = np.array(lf.gp.get_parameter_vector()) if initial[4] < -10: # TODO: ??? initial[4] = -8. if initial[5] < -10: # TODO: ??? initial[5] = -8. ndim, nwalkers = len(initial), 64 sampler = emcee.EnsembleSampler(nwalkers, ndim, log_probability, pool=pool, args=(lf.gp, flux)) logger.debug("Running burn-in...") p0 = initial + 1e-6 * np.random.randn(nwalkers, ndim) p0, lp, _ = sampler.run_mcmc(p0, 128) logger.debug("Running 2nd burn-in...") sampler.reset() p0 = p0[lp.argmax()] + 1e-3 * np.random.randn(nwalkers, ndim) p0, lp, _ = sampler.run_mcmc(p0, 512) logger.debug("Running production...") sampler.reset() pos, lp, _ = sampler.run_mcmc(p0, 1024) fit_kw = dict() for i, par_name in enumerate(lf.gp.get_parameter_names()): if 'kernel' in par_name: continue # remove 'mean:' par_name = par_name[5:] # skip bg if par_name.startswith('bg'): continue samples = sampler.flatchain[:, i] if par_name.startswith('ln_'): par_name = par_name[3:] samples = np.exp(samples) MAD = np.median(np.abs(samples - np.median(samples))) fit_kw[par_name] = np.median(samples) fit_kw[par_name + '_error'] = 1.5 * MAD # convert to ~stddev # remove all previous line measurements q = session.query(SpectralLineMeasurement).join(Observation)\ .filter(Observation.id == obs.id) if q.count() > 0: for meas in q.all(): session.delete(meas) session.commit() slm = SpectralLineMeasurement(**fit_kw) slm.info = Halpha slm.observation = obs session.add(slm) session.commit() # -------------------------------------------------------------------- # plot MCMC traces fig, axes = plt.subplots(2, 4, figsize=(18, 6)) for i in range(sampler.dim): for walker in sampler.chain[..., i]: axes.flat[i].plot(walker, marker='', drawstyle='steps-mid', alpha=0.2) axes.flat[i].set_title(lf.gp.get_parameter_names()[i], fontsize=12) fig.tight_layout() fig.savefig(path.join(plot_path, '{}_mcmc_trace.png'.format(filebase)), dpi=256) plt.close(fig) # -------------------------------------------------------------------- # -------------------------------------------------------------------- # plot samples fig, axes = plt.subplots(3, 1, figsize=(10, 10), sharex=True) samples = sampler.flatchain for s in samples[np.random.randint(len(samples), size=32)]: lf.gp.set_parameter_vector(s) lf.plot_fit(axes=axes, fit_alpha=0.2) fig.tight_layout() fig.savefig(path.join(plot_path, '{}_mcmc_fits.png'.format(filebase)), dpi=256) plt.close(fig) # -------------------------------------------------------------------- # -------------------------------------------------------------------- # corner plot fig = corner.corner( sampler.flatchain[::10, :], labels=[x.split(':')[1] for x in lf.gp.get_parameter_names()]) fig.savefig(path.join(plot_path, '{}_corner.png'.format(filebase)), dpi=256) plt.close(fig) # -------------------------------------------------------------------- # compute centroids for sky lines sky_centroids = [] for j, sky_line in enumerate(OI_lines): wvln = sky_line.wavelength.value x, (flux, ivar) = extract_region( spec['wavelength'], center=wvln, width=32., # angstroms arrs=[spec['background_flux'], spec['background_ivar']]) lf = GaussianLineFitter(x, flux, ivar, absorp_emiss=1.) # all emission lines try: lf.fit() fit_pars = lf.get_gp_mean_pars() except Exception as e: logger.warn("Failed to fit sky line {0}:\n{1}".format( sky_line, e)) lf.success = False fit_pars = lf.get_init() # OMG this is the biggest effing hack fit_pars['amp'] = 0. fit_pars['bg_coef'] = None fit_pars['x0'] = 0. # HACK: hackish signal-to-noise max_ = fit_pars['amp'] / np.sqrt(2 * np.pi * fit_pars['std']**2) SNR = max_ / np.median(1 / np.sqrt(ivar)) if (not lf.success or abs(fit_pars['x0'] - wvln) > 4 or fit_pars['amp'] < 10 or fit_pars['std'] > 4 or SNR < 2.5): # failed x0 = np.nan * u.angstrom title = 'f****d' fit_pars['amp'] = 0. else: x0 = fit_pars['x0'] * u.angstrom title = '{:.2f}'.format(fit_pars['amp']) if lf.success: fig = lf.plot_fit() fig.suptitle(title, y=0.95) fig.subplots_adjust(top=0.8) fig.savefig(path.join( plot_path, '{}_maxlike_sky_{:.0f}.png'.format(filebase, wvln)), dpi=256) plt.close(fig) # store the sky line measurements fit_pars['std_G'] = fit_pars.pop('std') # HACK fit_pars.pop('bg_coef') # HACK slm = SpectralLineMeasurement(**fit_pars) slm.info = sky_line slm.observation = obs session.add(slm) session.commit() sky_centroids.append(x0) sky_centroids = u.Quantity(sky_centroids) logger.info('{} [{}]: x0={x0:.3f} σ={err:.3f}\n--------'.format( obs.object, filebase, x0=fit_kw['x0'], err=fit_kw['x0_error'])) session.commit() pool.close()
def _compute_offset_corrections(self): session = self.session run_name = self.run_name q = session.query(Observation).join(Run, SpectralLineMeasurement, PriorRV) q = q.filter(Run.name == run_name) q = q.filter(SpectralLineMeasurement.x0 != None) q = q.filter(PriorRV.rv != None) logger.debug('{0} observations with prior RV measurements'.format( q.distinct().count())) # retrieve all observations with measured centroids and previous RV's observations = q.all() # What we do below is look at the residual offsets between applying a naïve # sky-line correction and the true RV (with the barycentric velocity # applied) raw_offsets = np.zeros(len(observations)) * u.angstrom all_sky_offsets = np.full((len(observations), 3), np.nan) * u.angstrom true_rv = np.zeros(len(observations)) * u.km / u.s obs_time = np.zeros(len(observations)) night_id = np.zeros(len(observations), dtype=int) corrected_rv = np.zeros(len(observations)) * u.km / u.s for i, obs in enumerate(observations): # convert obstime into decimal hour obs_time[i] = np.sum( np.array(list(map(float, obs.time_obs.split(':')))) / np.array([1., 60., 3600.])) # Compute the raw offset: difference between Halpha centroid and true # wavelength value x0 = obs.measurements[0].x0 * u.angstrom offset = (x0 - self.Halpha) raw_offsets[i] = offset night_id[i] = obs.night # For each sky line (that passes certain quality checks), compute the # offset between the predicted wavelength and measured centroid # TODO: generalize these quality cuts - see also below in # get_corrected_rv sky_offsets = [] for j, meas in enumerate(obs.measurements[1:]): sky_offset = meas.x0 * u.angstrom - meas.info.wavelength if (meas.amp > 16 and meas.std_G < 2 and meas.std_G > 0.3 and np.abs(sky_offset) < 4 * u.angstrom): # MAGIC NUMBER: quality cuts sky_offsets.append(sky_offset) all_sky_offsets[i, j] = sky_offset sky_offsets = u.Quantity(sky_offsets) if len(sky_offsets) > 0: sky_offset = np.mean(sky_offsets) else: sky_offset = np.nan * u.angstrom logger.debug( "not correcting with sky line for {0}".format(obs)) true_rv[i] = obs.prior_rv.rv - obs.v_bary raw_rv = raw_offsets / self.Halpha * c.to(u.km / u.s) # unique night ID's unq_night_id = np.unique(night_id) unq_night_id.sort() # Now we do a totally insane thing. From visualizing the residual # differences, there seems to be a trend with the observation time. We # fit a line to these residuals and use this to further correct the # wavelength solutions using just the (strongest) [OI] 5577 Å line. diff = all_sky_offsets[:, 0] - ( (raw_rv - true_rv) / c * 5577 * u.angstrom).decompose() diff[np.abs(diff) > 2 * u.angstrom] = np.nan * u.angstrom # reject BIG offsets self._night_polys = dict() self._night_final_offsets = dict() for n in unq_night_id: mask = (night_id == n) & np.isfinite(diff) coef = np.polyfit(obs_time[mask], diff[mask], deg=1, w=np.full(mask.sum(), 1 / 0.1)) poly = np.poly1d(coef) self._night_polys[n] = poly sky_offset = np.nanmean(all_sky_offsets[mask, :2], axis=1) sky_offset[np.isnan(sky_offset)] = 0. * u.angstrom sky_offset -= self._night_polys[n](obs_time[mask]) * u.angstrom corrected_rv[mask] = (raw_offsets[mask] - sky_offset) / self.Halpha * c.to(u.km / u.s) # Finally, we align the median of each night's ∆RV distribution with 0 drv = corrected_rv[mask] - true_rv[mask] self._night_final_offsets[n] = np.nanmedian(drv) # now estimate the std. dev. uncertainty using the MAD all_drv = corrected_rv - true_rv self._abs_err = 1.5 * np.nanmedian( np.abs(all_drv - np.nanmedian(all_drv)))
def main(night_path, skip_list_file, mask_file, overwrite=False, plot=False): """ See argparse block at bottom of script for description of parameters. """ night_path = path.realpath(path.expanduser(night_path)) if not path.exists(night_path): raise IOError("Path '{}' doesn't exist".format(night_path)) logger.info("Reading data from path: {}".format(night_path)) base_path, night_name = path.split(night_path) data_path, run_name = path.split(base_path) output_path = path.realpath( path.join(data_path, 'processed', run_name, night_name)) os.makedirs(output_path, exist_ok=True) logger.info("Saving processed files to path: {}".format(output_path)) if plot: # if we're making plots plot_path = path.realpath(path.join(output_path, 'plots')) logger.debug("Will make and save plots to: {}".format(plot_path)) os.makedirs(plot_path, exist_ok=True) else: plot_path = None # check for files to skip (e.g., saturated or errored exposures) if skip_list_file is not None: # a file containing a list of filenames to skip with open(skip_list_file, 'r') as f: skip_list = [x.strip() for x in f if x.strip()] else: skip_list = None # look for pixel mask file if mask_file is not None: with open( mask_file, 'r' ) as f: # load YAML file specifying pixel masks for nearby sources pixel_mask_spec = yaml.load(f.read()) else: pixel_mask_spec = None # generate the raw image file collection to process ic = GlobImageFileCollection(night_path, skip_filenames=skip_list) logger.info("Frames to process:") logger.info("- Bias frames: {}".format( len(ic.files_filtered(imagetyp='BIAS')))) logger.info("- Flat frames: {}".format( len(ic.files_filtered(imagetyp='FLAT')))) logger.info("- Comparison lamp frames: {}".format( len(ic.files_filtered(imagetyp='COMP')))) logger.info("- Object frames: {}".format( len(ic.files_filtered(imagetyp='OBJECT')))) # HACK: ic = GlobImageFileCollection(night_path, skip_filenames=skip_list) # ============================ # Create the master bias frame # ============================ # overscan region of the CCD, using FITS index notation oscan_fits_section = "[{}:{},:]".format(oscan_idx, oscan_idx + oscan_size) master_bias_file = path.join(output_path, 'master_bias.fits') if not os.path.exists(master_bias_file) or overwrite: # get list of overscan-subtracted bias frames as 2D image arrays bias_list = [] for hdu, fname in ic.hdus(return_fname=True, imagetyp='BIAS'): logger.debug('Processing Bias frame: {0}'.format(fname)) ccd = CCDData.read(path.join(ic.location, fname), unit='adu') ccd = ccdproc.gain_correct(ccd, gain=ccd_gain) ccd = ccdproc.subtract_overscan(ccd, overscan=ccd[:, oscan_idx:]) ccd = ccdproc.trim_image(ccd, fits_section="[1:{},:]".format(oscan_idx)) bias_list.append(ccd) # combine all bias frames into a master bias frame logger.info("Creating master bias frame") master_bias = ccdproc.combine(bias_list, method='average', clip_extrema=True, nlow=1, nhigh=1, error=True) master_bias.write(master_bias_file, overwrite=True) else: logger.info("Master bias frame file already exists: {}".format( master_bias_file)) master_bias = CCDData.read(master_bias_file) if plot: # TODO: this assumes vertical CCD assert master_bias.shape[0] > master_bias.shape[1] aspect_ratio = master_bias.shape[1] / master_bias.shape[0] fig, ax = plt.subplots(1, 1, figsize=(10, 12 * aspect_ratio)) vmin, vmax = zscaler.get_limits(master_bias.data) cs = ax.imshow(master_bias.data.T, origin='bottom', cmap=cmap, vmin=max(0, vmin), vmax=vmax) ax.set_title('master bias frame [zscale]') fig.colorbar(cs) fig.tight_layout() fig.savefig(path.join(plot_path, 'master_bias.png')) plt.close(fig) # ============================ # Create the master flat field # ============================ # HACK: ic = GlobImageFileCollection(night_path, skip_filenames=skip_list) master_flat_file = path.join(output_path, 'master_flat.fits') if not os.path.exists(master_flat_file) or overwrite: # create a list of flat frames flat_list = [] for hdu, fname in ic.hdus(return_fname=True, imagetyp='FLAT'): logger.debug('Processing Flat frame: {0}'.format(fname)) ccd = CCDData.read(path.join(ic.location, fname), unit='adu') ccd = ccdproc.gain_correct(ccd, gain=ccd_gain) ccd = ccdproc.ccd_process(ccd, oscan=oscan_fits_section, trim="[1:{},:]".format(oscan_idx), master_bias=master_bias) flat_list.append(ccd) # combine into a single master flat - use 3*sigma sigma-clipping logger.info("Creating master flat frame") master_flat = ccdproc.combine(flat_list, method='average', sigma_clip=True, low_thresh=3, high_thresh=3) master_flat.write(master_flat_file, overwrite=True) # TODO: make plot if requested? else: logger.info("Master flat frame file already exists: {}".format( master_flat_file)) master_flat = CCDData.read(master_flat_file) if plot: # TODO: this assumes vertical CCD assert master_flat.shape[0] > master_flat.shape[1] aspect_ratio = master_flat.shape[1] / master_flat.shape[0] fig, ax = plt.subplots(1, 1, figsize=(10, 12 * aspect_ratio)) vmin, vmax = zscaler.get_limits(master_flat.data) cs = ax.imshow(master_flat.data.T, origin='bottom', cmap=cmap, vmin=max(0, vmin), vmax=vmax) ax.set_title('master flat frame [zscale]') fig.colorbar(cs) fig.tight_layout() fig.savefig(path.join(plot_path, 'master_flat.png')) plt.close(fig) # ===================== # Process object frames # ===================== # HACK: ic = GlobImageFileCollection(night_path, skip_filenames=skip_list) logger.info("Beginning object frame processing...") for hdu, fname in ic.hdus(return_fname=True, imagetyp='OBJECT'): new_fname = path.join(output_path, 'p_{}'.format(fname)) # ------------------------------------------- # First do the simple processing of the frame # ------------------------------------------- logger.debug("Processing '{}' [{}]".format(hdu.header['OBJECT'], fname)) if path.exists(new_fname) and not overwrite: logger.log(1, "\tAlready processed! {}".format(new_fname)) ext = SourceCCDExtractor(filename=path.join( ic.location, new_fname), plot_path=plot_path, zscaler=zscaler, cmap=cmap, **ccd_props) nccd = ext.ccd # HACK: F**K this is a bad hack ext._filename_base = ext._filename_base[2:] else: # process the frame! ext = SourceCCDExtractor(filename=path.join(ic.location, fname), plot_path=plot_path, zscaler=zscaler, cmap=cmap, unit='adu', **ccd_props) _pix_mask = pixel_mask_spec.get( fname, None) if pixel_mask_spec is not None else None nccd = ext.process_raw_frame(pixel_mask_spec=_pix_mask, master_bias=master_bias, master_flat=master_flat) nccd.write(new_fname, overwrite=overwrite) # ------------------------------------------- # Now do the 1D extraction # ------------------------------------------- fname_1d = path.join(output_path, '1d_{0}'.format(fname)) if path.exists(fname_1d) and not overwrite: logger.log(1, "\tAlready extracted! {}".format(fname_1d)) continue else: logger.debug("\tExtracting to 1D") # first step is to fit a voigt profile to a middle-ish row to determine LSF lsf_p = ext.get_lsf_pars() # MAGIC NUMBER try: tbl = ext.extract_1d(lsf_p) except Exception as e: logger.error('Failed! {}: {}'.format(e.__class__.__name__, str(e))) continue hdu0 = fits.PrimaryHDU(header=nccd.header) hdu1 = fits.table_to_hdu(tbl) hdulist = fits.HDUList([hdu0, hdu1]) hdulist.writeto(fname_1d, overwrite=overwrite) del ext # ============================== # Process comparison lamp frames # ============================== # HACK: ic = GlobImageFileCollection(night_path, skip_filenames=skip_list) logger.info("Beginning comp. lamp frame processing...") for hdu, fname in ic.hdus(return_fname=True, imagetyp='COMP'): new_fname = path.join(output_path, 'p_{}'.format(fname)) logger.debug("\tProcessing '{}'".format(hdu.header['OBJECT'])) if path.exists(new_fname) and not overwrite: logger.log(1, "\tAlready processed! {}".format(new_fname)) ext = CompCCDExtractor(filename=path.join(ic.location, new_fname), plot_path=plot_path, zscaler=zscaler, cmap=cmap, **ccd_props) nccd = ext.ccd # HACK: F**K this is a bad hack ext._filename_base = ext._filename_base[2:] else: # process the frame! ext = CompCCDExtractor(filename=path.join(ic.location, fname), plot_path=plot_path, unit='adu', **ccd_props) _pix_mask = pixel_mask_spec.get( fname, None) if pixel_mask_spec is not None else None nccd = ext.process_raw_frame( pixel_mask_spec=_pix_mask, master_bias=master_bias, master_flat=master_flat, ) nccd.write(new_fname, overwrite=overwrite) # ------------------------------------------- # Now do the 1D extraction # ------------------------------------------- fname_1d = path.join(output_path, '1d_{0}'.format(fname)) if path.exists(fname_1d) and not overwrite: logger.log(1, "\tAlready extracted! {}".format(fname_1d)) continue else: logger.debug("\tExtracting to 1D") try: tbl = ext.extract_1d() except Exception as e: logger.error('Failed! {}: {}'.format(e.__class__.__name__, str(e))) continue hdu0 = fits.PrimaryHDU(header=nccd.header) hdu1 = fits.table_to_hdu(tbl) hdulist = fits.HDUList([hdu0, hdu1]) hdulist.writeto(fname_1d, overwrite=overwrite)
def auto_identify(self): if self.line_list is None: raise ValueError("Can't auto-identify lines without a line list.") if len(self._map_dict['wavel']) < 4: msg = "Please identify at least 4 lines before trying auto-identify." logger.error(msg) self._ui['textbox'].setText("ERROR: {}".format(msg)) return None _idx = np.argsort(self._map_dict['wavel']) wvln = np.array(self._map_dict['wavel'])[_idx] pixl = np.array(self._map_dict['pixel'])[_idx] # build an approximate wavelength solution to predict where lines are spl = InterpolatedUnivariateSpline(wvln, pixl, k=1) # use linear interp. predicted_pixels = spl(self.line_list) new_wavels = [] new_pixels = [] # from Wikipedia: https://en.wikipedia.org/wiki/Voigt_profile fG = 2*self._line_std_G*np.sqrt(2*np.log(2)) fL = 2*self._line_hwhm_L lw = 0.5346*fL + np.sqrt(0.2166*fL**2 + fG**2) for pix_ctr,xmin,xmax,wave_idx,wave in zip(predicted_pixels, predicted_pixels-5*lw, predicted_pixels+5*lw, range(len(self.line_list)), self.line_list): if pix_ctr < 200 or pix_ctr > 1600: # skip if outside good rows continue elif wave_idx in self._done_wavel_idx: # skip if already fit continue logger.debug("Fitting line at predicted pix={:.2f}, λ={:.2f}" .format(pix_ctr, wave)) try: lp,gp = self.get_line_props(xmin, xmax, std_G0=self._line_std_G, hwhm_L0=self._line_hwhm_L) except Exception as e: logger.error("Failed to auto-fit line at {} ({msg})" .format(wave, msg=str(e))) continue print(lp['amp'], lp['x0']) if lp is None or lp['amp'] < 100.: # HACK continue # figure out closest line # _all_pix = np.concatenate((self._map_dict['pixel'], new_pixels)) # _all_wav = np.concatenate((self._map_dict['wavel'], new_wavels)) # _diff = np.abs(lp['x0'] - np.array(_all_pix)) # min_diff_idx = np.argmin(_diff) # min_diff_pix = _all_pix[min_diff_idx] # min_diff_wav = _all_wav[min_diff_idx] # if _diff[min_diff_idx] < 3.: # logger.error("Fit line is too close to another at pix={:.2f}, λ={:.2f}" # .format(min_diff_pix, min_diff_wav)) # continue self.draw_line_marker(lp, wave, xmin, xmax, gp=gp) new_wavels.append(wave) new_pixels.append(pix_ctr) self._done_wavel_idx.append(wave_idx) self.fig.canvas.draw() _idx = np.argsort(new_wavels) self._map_dict['wavel'] = np.array(new_wavels)[_idx] self._map_dict['pixel'] = np.array(new_pixels)[_idx]
def main(): # TODO: bad, hard-coded... # base_path = '/Volumes/ProjectData/gaia-comoving-followup/' base_path = '../../data/' db_path = path.join(base_path, 'db.sqlite') engine = db_connect(db_path) session = Session() chain_path = path.abspath('./isochrone_chains') os.makedirs(chain_path, exist_ok=True) # Check out the bottom of "Color-magnitude diagram.ipynb": interesting_group_ids = [1500, 1229, 1515] all_photometry = OrderedDict([ ('1500-8455', OrderedDict([('J', (6.8379998, 0.021)), ('H', (6.4640002, 0.017000001)), ('K', (6.3369999, 0.017999999)), ('W1', (6.2950001, 0.093000002)), ('W2', (6.2490001, 0.026000001)), ('W3', (6.3330002, 0.015)), ('B', (9.5950003, 0.022)), ('V', (8.5120001, 0.014))])), ('1500-1804', OrderedDict([('J', (6.9039998, 0.041000001)), ('H', (6.8559999, 0.027000001)), ('K', (6.7989998, 0.017000001)), ('W1', (6.803, 0.064999998)), ('W2', (6.7600002, 0.018999999)), ('W3', (6.8270001, 0.016000001)), ('B', (7.4980001, 0.015)), ('V', (7.289, 0.011))])), ('1229-1366', OrderedDict([('J', (6.7290001, 0.024)), ('H', (6.2449999, 0.02)), ('K', (6.1529999, 0.023)), ('W1', (6.1799998, 0.096000001)), ('W2', (6.04, 0.035)), ('W3', (6.132, 0.016000001)), ('B', (9.5539999, 0.021)), ('V', (8.4619999, 0.014))])), ('1229-7470', OrderedDict([ ('J', (9.1709995, 0.024)), ('H', (8.7959995, 0.026000001)), ('K', (8.7299995, 0.022)), ('W1', (8.6669998, 0.023)), ('W2', (8.7189999, 0.02)), ('W3', (8.6680002, 0.025)), ('B', (11.428, 0.054000001)), ('V', (10.614, 0.039999999)) ])), ('1515-3584', OrderedDict([('J', (5.363999843597412, 0.024000000208616257)), ('H', (4.965000152587891, 0.035999998450279236)), ('K', (4.815999984741211, 0.032999999821186066)), ('W1', (4.758, 0.215)), ('W2', (4.565, 0.115)), ('W3', (4.771, 0.015)), ('B', (8.347999572753906, 0.01600000075995922)), ('V', (7.182000160217285, 0.009999999776482582))])), ('1515-1834', OrderedDict([('J', (8.855999946594238, 0.024000000208616257)), ('H', (8.29699993133545, 0.020999999716877937)), ('K', (8.178999900817871, 0.017999999225139618)), ('W1', (8.117, 0.022)), ('W2', (8.15, 0.019)), ('W3', (8.065, 0.02)), ('B', (12.309000015258789, 0.11999999731779099)), ('V', (11.069999694824219, 0.054999999701976776))])) ]) for k in all_photometry: samples_file = path.join(chain_path, '{0}.hdf5'.format(k)) if path.exists(samples_file): logger.info("skipping {0} - samples exist at {1}".format( k, samples_file)) continue phot = all_photometry[k] obs = session.query(Observation).filter(Observation.object == k).one() plx = (obs.tgas_source.parallax, obs.tgas_source.parallax_error) # fit an isochrone model = StarModel(iso, use_emcee=True, parallax=plx, **phot) model.set_bounds(mass=(0.01, 20), feh=(-1, 1), distance=(0, 300), AV=(0, 1)) # initial conditions for emcee walkers nwalkers = 128 p0 = [] m0, age0, feh0 = model.ic.random_points(nwalkers, minmass=0.01, maxmass=10., minfeh=-1, maxfeh=1) _, max_distance = model.bounds('distance') _, max_AV = model.bounds('AV') d0 = 10**(np.random.uniform(0, np.log10(max_distance), size=nwalkers)) AV0 = np.random.uniform(0, max_AV, size=nwalkers) p0 += [m0] p0 += [age0, feh0, d0, AV0] p0 = np.array(p0).T npars = p0.shape[1] # run emcee ninit = 256 nburn = 1024 niter = 4096 logger.debug('Running emcee - initial sampling...') sampler = emcee.EnsembleSampler(nwalkers, npars, model.lnpost) # pos, prob, state = sampler.run_mcmc(p0, ninit) for pos, prob, state in tqdm(sampler.sample(p0, iterations=ninit), total=ninit): pass # cull the weak walkers best_ix = sampler.flatlnprobability.argmax() best_p0 = (sampler.flatchain[best_ix][None] + np.random.normal(0, 1E-5, size=(nwalkers, npars))) sampler.reset() logger.debug('burn-in...') for pos, prob, state in tqdm(sampler.sample(best_p0, iterations=nburn), total=nburn): pass # pos,_,_ = sampler.run_mcmc(best_p0, nburn) sampler.reset() logger.debug('sampling...') # _ = sampler.run_mcmc(pos, niter) for pos, prob, state in tqdm(sampler.sample(pos, iterations=niter), total=niter): pass model._sampler = sampler model._make_samples(0.08) model.samples.to_hdf(samples_file, key='samples') # np.save('isochrone_chains/chain.npy', sampler.chain) logger.debug('...done and saved!')
def solve_radial_velocity(filename, wavelength_coef, done_list=None, plot=False): hdulist = fits.open(filename) # read both hdu's hdu0 = hdulist[0] hdu1 = hdulist[1] name = hdu0.header['OBJECT'] logger.debug("\tObject: {}".format(name)) if done_list is not None and name in done_list: return # extract just the middle part of the CCD (we only really care about Halpha) tbl = hdu1.data[200:1600][::-1] # compute wavelength array for the pixels wvln = np.polynomial.polynomial.polyval(tbl['pix'], wavelength_coef) # ============================== # Fit a Voigt profile to H-alpha # ============================== # extract region of SOURCE spectrum around Halpha i1 = np.argmin(np.abs(wvln - 6460)) i2 = np.argmin(np.abs(wvln - 6665)) wave = wvln[i1:i2 + 1] flux = tbl['source_flux'][i1:i2 + 1] ivar = tbl['source_ivar'][i1:i2 + 1] halpha_fit_p = fit_spec_line(wave, flux, ivar, n_bg_coef=2, target_x=6563., absorp_emiss=-1.) if plot: _grid = np.linspace(wave.min(), wave.max(), 512) fit_flux = voigt_polynomial(_grid, **halpha_fit_p) plt.figure(figsize=(14, 8)) plt.title("OBJECT: {}, EXPTIME: {}".format(hdu0.header['OBJECT'], hdu0.header['EXPTIME'])) plt.plot(wave, flux, marker='', drawstyle='steps-mid', alpha=0.5) plt.errorbar(wave, flux, 1 / np.sqrt(ivar), linestyle='none', marker='', ecolor='#666666', alpha=0.75, zorder=-10) plt.plot(_grid, fit_flux, marker='', alpha=0.75) # ========================================= # Fit a Voigt profile to [OI] 6300 and 5577 # ========================================= # needed for Barycenter correction # earth_loc = coord.EarthLocation.of_site('KPNO') for target_wave in [5577.3387, 6300.3]: # extract region of SKY spectrum around line i1 = np.argmin(np.abs(wvln - (target_wave - 25))) i2 = np.argmin(np.abs(wvln - (target_wave + 25))) wave = wvln[i1:i2 + 1] flux = tbl['background_flux'][i1:i2 + 1] ivar = tbl['background_ivar'][i1:i2 + 1] OI_fit_p = fit_spec_line(wave, flux, ivar, std_G0=1., n_bg_coef=2, target_x=target_wave, absorp_emiss=1.) print('[OI] {:.2f}'.format(target_wave)) print('∆x0: {:.3f}'.format(OI_fit_p['x0'] - target_wave)) print('amp: {:.3e}'.format(OI_fit_p['amp'])) chi2 = np.sum((voigt_polynomial(wave, **OI_fit_p) - flux)**2 * ivar) print('chi2: {}'.format(chi2)) if plot: _grid = np.linspace(wave.min(), wave.max(), 512) fit_flux = voigt_polynomial(_grid, **OI_fit_p) plt.figure(figsize=(14, 8)) plt.title("OBJECT: {}, EXPTIME: {}".format(hdu0.header['OBJECT'], hdu0.header['EXPTIME'])) plt.plot(wave, flux, marker='', drawstyle='steps-mid', alpha=0.5) plt.errorbar(wave, flux, 1 / np.sqrt(ivar), linestyle='none', marker='', ecolor='#666666', alpha=0.75, zorder=-10) plt.plot(_grid, fit_flux, marker='', alpha=0.75) # from: http://www.star.ucl.ac.uk/~msw/lines.html # Halpha = 6562.80 # air, STP # OI_5577 = 5577.3387 # air, STP # OI_6300 = 6300.30 # air, STP # dOI = OI_fit_p['x0'] - OI_5577 # dHalpha = halpha_fit_p['x0'] - Halpha # dlambda = dHalpha - dOI # RV = dlambda / Halpha * c # print("Radial velocity: ", RV.to(u.km/u.s)) # sc = coord.SkyCoord(ra=hdu0.header['RA'], dec=hdu0.header['DEC'], # unit=(u.hourangle, u.degree)) # time = hdu0.header['JD']*u.day + hdu0.header['EXPTIME']/2.*u.second # time = Time(time.to(u.day), format='jd', scale='utc') # v_bary = bary_vel_corr(time, sc, location=earth_loc) # RV_corr = (RV + v_bary).to(u.km/u.s) # print("Bary. correction: ", v_bary.to(u.km/u.s)) # print("Radial velocity (bary. corrected): ", RV_corr) # print() if plot: plt.show()
def generate_wavelength_model(comp_lamp_path, night_path, plot_path): """ Fit a line + Gaussian Process model to the pixel vs. wavelength relation for identified and centroided comp. lamp spectrum emission lines. Parameters ---------- comp_lamp_path : str night_path : str plot_path : str """ # read 1D comp lamp spectrum spec = Table.read(comp_lamp_path) # read wavelength guess file guess_path = path.abspath( path.join(night_path, '..', 'wavelength_guess.csv')) pix_wav = np.genfromtxt(guess_path, delimiter=',', names=True) # get emission line centroids at the guessed positions of the lines pix_x0s = fit_all_lines(spec['pix'], spec['flux'], spec['ivar'], pix_wav['wavelength'], pix_wav['pixel']) # only keep successful ones: mask = np.isfinite(pix_x0s) logger.debug("Successfully fit {}/{} comp. lamp lines".format( mask.sum(), len(mask))) pix_wav = pix_wav[mask] pix_x0s = pix_x0s[mask] # -------------------------------------------------------------------------- # fit a gaussian process to determine the pixel-to-wavelength transformation # idx = np.argsort(pix_x0s) med_x = np.median(pix_x0s[idx]) x = pix_x0s[idx] - med_x y = pix_wav['wavelength'][idx] model = GPModel(x=x, y=y, n_bg_coef=n_bg_coef, x_shift=med_x) # Fit for the maximum likelihood parameters bounds = model.gp.get_parameter_bounds() init_params = model.gp.get_parameter_vector() soln = minimize(model, init_params, method="L-BFGS-B", bounds=bounds) model.gp.set_parameter_vector(soln.x) logger.debug("Success: {}, Final log-likelihood: {}".format( soln.success, -soln.fun)) # --- # residuals to the mean model x_grid = np.linspace(0, 1600, 1024) - med_x mu, var = model.gp.predict(y, x_grid, return_var=True) std = np.sqrt(var) _y_mean = model.mean_model.get_value(x) _mu_mean = model.mean_model.get_value(x_grid) # Plot the maximum likelihood model fig, ax = plt.subplots(1, 1, figsize=(8, 8)) # data ax.scatter(x + med_x, y - _y_mean, marker='o') # full GP model gp_color = "#ff7f0e" ax.plot(x_grid + med_x, mu - _mu_mean, color=gp_color, marker='') ax.fill_between(x_grid + med_x, mu + std - _mu_mean, mu - std - _mu_mean, color=gp_color, alpha=0.3, edgecolor="none") ax.set_xlabel('pixel') ax.set_ylabel(r'wavelength [$\AA$]') ax.set_title(path.basename(comp_lamp_path)) fig.tight_layout() fig.savefig(path.join(plot_path, 'wavelength_mean_subtracted.png'), dpi=200) # --- # --- # residuals to full GP model mu, var = model.gp.predict(y, x_grid, return_var=True) std = np.sqrt(var) y_mu, var = model.gp.predict(y, x, return_var=True) # Plot the maximum likelihood model fig, ax = plt.subplots(1, 1, figsize=(12, 8)) # data ax.scatter(x + med_x, y - y_mu, marker='o') gp_color = "#ff7f0e" ax.plot(x_grid + med_x, mu - mu, color=gp_color, marker='') ax.fill_between(x_grid + med_x, std, -std, color=gp_color, alpha=0.3, edgecolor="none") ax.set_xlabel('pixel') ax.set_ylabel(r'wavelength residual [$\AA$]') ax.set_title(path.basename(comp_lamp_path)) ax.set_ylim(-1, 1) ax.axvline(683., zorder=-10, color='#666666', alpha=0.5) ax2 = ax.twinx() ax2.set_ylim([x / 6563 * 300000 for x in ax.get_ylim()]) ax2.set_ylabel(r'velocity error at ${{\rm H}}_\alpha$ [{}]'.format( (u.km / u.s).to_string(format='latex_inline'))) fig.tight_layout() fig.savefig(path.join(plot_path, 'wavelength_residuals.png'), dpi=200) # -------------------------------------------------------------------------- return model
def add_wavelength(filename, model, std_tol, overwrite=False, plot_path=None): """ Given an extracted, 1D spectrum FITS file, add wavelength and wavelength_prec columnes to the file. Parameters ---------- filename : str Path to a 1D extracted spectrum file. model : `comoving_rv.longslit.GPModel` std_tol : quantity_like Set the wavelength grid to NaN when the root-variance of the prediction from the Gaussian process is larger than this tolerance. overwrite : bool (optional) Overwrite any existing wavelength information. plot_path : str (optional) """ hdulist = fits.open(filename) # read both hdu's logger.debug("\tObject: {}".format(hdulist[0].header['OBJECT'])) # extract just the middle part of the CCD (we only really care about Halpha) tbl = Table(hdulist[1].data) if 'wavelength' in tbl.colnames and not overwrite: logger.debug("\tTable already contains wavelength values!") return # compute wavelength array for the pixels wavelength, var = model.gp.predict(model.y, tbl['pix'] - model.x_shift, return_var=True) bad_idx = np.sqrt(var) > std_tol.to(u.angstrom).value wavelength[bad_idx] = np.nan tbl['wavelength'] = wavelength tbl['wavelength_err'] = np.sqrt(var) new_hdu1 = fits.table_to_hdu(tbl) new_hdulist = fits.HDUList([hdulist[0], new_hdu1]) logger.debug("\tWriting out file with wavelength array.") new_hdulist.writeto(filename, overwrite=True) if plot_path is not None: # plot the spectrum vs. wavelength fig, axes = plt.subplots(2, 1, figsize=(12, 8), sharex=True) axes[0].plot(tbl['wavelength'], tbl['source_flux'], marker='', drawstyle='steps-mid', linewidth=1.) axes[0].errorbar(tbl['wavelength'], tbl['source_flux'], 1 / np.sqrt(tbl['source_ivar']), linestyle='none', marker='', ecolor='#666666', alpha=1., zorder=-10) axes[0].set_ylim(tbl['source_flux'][200] / 4, np.nanmax(tbl['source_flux'])) axes[0].set_yscale('log') axes[1].plot(tbl['wavelength'], tbl['background_flux'], marker='', drawstyle='steps-mid', linewidth=1.) axes[1].errorbar(tbl['wavelength'], tbl['background_flux'], 1 / np.sqrt(tbl['background_ivar']), linestyle='none', marker='', ecolor='#666666', alpha=1., zorder=-10) axes[1].set_ylim(1e-1, np.nanmax(tbl['background_flux'])) axes[1].set_yscale('log') fig.tight_layout() _filename_base = path.splitext(path.basename(filename))[0] fig.savefig( path.join(plot_path, '{0}_1d_wvln.png'.format(_filename_base))) plt.close(fig)