def process(self, inputs): """Reads the light curve of a particular Kepler ID.""" kep_id = inputs["kepler_id"] all_time = None all_flux = None filenames = kepler_io.kepler_filenames( base_dir=self.kepler_data_dir, kep_id=kep_id, long_cadence=self.long_cadence, quarters=self.quarters, injected_group=self.injected_group) if filenames: try: all_time, all_flux = kepler_io.read_kepler_light_curve( filenames, light_curve_extension=self.extension, scramble_type=self.scramble_type, invert=self.invert_light_curves) except (IOError, ValueError) as e: raise ValueError("Kepler ID: {}, {}".format(kep_id, e)) else: Metrics.counter(self.__class__.__name__, "no-fits-%s" % kep_id).inc() raw_lc = light_curve_pb2.RawLightCurve() for time, flux in zip(all_time, all_flux): raw_lc.segments.add(time=time, flux=flux) inputs["raw_light_curve"] = raw_lc yield inputs
def process(self, inputs): """Reads the light curve of a particular Kepler ID.""" kep_id = inputs["kepler_id"] Metrics.counter(self.__class__.__name__, "inputs").inc() # Get light curve filenames. filenames = kepler_io.kepler_filenames( base_dir=self.kepler_data_dir, kep_id=kep_id, injected_group=self.injected_group) if not filenames: Metrics.counter(self.__class__.__name__, "no-fits-{}".format(kep_id)).inc() return cadence_no, time, flux = self._read_kepler_light_curve(filenames) # Additional normalization. stddev = 1.4826 * np.median(np.abs(flux)) # 1.4826 * MAD (median is zero). if self.upward_outlier_clipping: # Clip values greater than n stddev from the median (which is zero). flux = np.minimum(flux, self.upward_outlier_clipping * stddev) if self.downward_outlier_clipping: # pylint: disable=invalid-unary-operand-type flux = np.maximum(flux, -self.downward_outlier_clipping * stddev) if self.clip_lowest_n_values: nth_flux = _nth_smallest(flux, self.clip_lowest_n_values) flux = np.maximum(flux, nth_flux) if self.normalize_stddev: flux /= stddev cadence_no, time, flux, mask = util.uniform_cadence_light_curve( cadence_no, time, flux) ex = tf.train.Example() example_util.set_int64_feature(ex, "kepler_id", [kep_id]) example_util.set_bytes_feature(ex, "flux_column", [self.flux_column]) example_util.set_bytes_feature(ex, "injected_group", [self.injected_group]) example_util.set_bytes_feature(ex, "scramble_type", [self.scramble_type]) example_util.set_float_feature(ex, "time", time) example_util.set_float_feature(ex, "flux", flux) example_util.set_int64_feature(ex, "cadence_no", cadence_no) example_util.set_int64_feature(ex, "mask", mask) inputs["example"] = ex Metrics.counter(self.__class__.__name__, "outputs").inc() yield inputs
def read_light_curve(kepid, kepler_data_dir): """Reads a Kepler light curve. Args: kepid: Kepler id of the target star. kepler_data_dir: Base directory containing Kepler data. See kepler_io.kepler_filenames(). Returns: all_time: A list of numpy arrays; the time values of the raw light curve. all_flux: A list of numpy arrays corresponding to the time arrays in all_time. Raises: IOError: If the light curve files for this Kepler ID cannot be found. """ # Read the Kepler light curve. file_names = kepler_io.kepler_filenames(kepler_data_dir, kepid) if not file_names: raise IOError( "Failed to find .fits files in {} for Kepler ID {}".format( kepler_data_dir, kepid)) return kepler_io.read_kepler_light_curve(file_names)
def testKeplerFilenames(self): # All quarters. filenames = kepler_io.kepler_filenames("/my/dir/", 1234567, check_existence=False) self.assertCountEqual([ "/my/dir/0012/001234567/kplr001234567-2009131105131_llc.fits", "/my/dir/0012/001234567/kplr001234567-2009166043257_llc.fits", "/my/dir/0012/001234567/kplr001234567-2009259160929_llc.fits", "/my/dir/0012/001234567/kplr001234567-2009350155506_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010078095331_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010009091648_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010174085026_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010265121752_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010355172524_llc.fits", "/my/dir/0012/001234567/kplr001234567-2011073133259_llc.fits", "/my/dir/0012/001234567/kplr001234567-2011177032512_llc.fits", "/my/dir/0012/001234567/kplr001234567-2011271113734_llc.fits", "/my/dir/0012/001234567/kplr001234567-2012004120508_llc.fits", "/my/dir/0012/001234567/kplr001234567-2012088054726_llc.fits", "/my/dir/0012/001234567/kplr001234567-2012179063303_llc.fits", "/my/dir/0012/001234567/kplr001234567-2012277125453_llc.fits", "/my/dir/0012/001234567/kplr001234567-2013011073258_llc.fits", "/my/dir/0012/001234567/kplr001234567-2013098041711_llc.fits", "/my/dir/0012/001234567/kplr001234567-2013131215648_llc.fits" ], filenames) # Subset of quarters. filenames = kepler_io.kepler_filenames("/my/dir/", 1234567, quarters=[3, 4], check_existence=False) self.assertCountEqual([ "/my/dir/0012/001234567/kplr001234567-2009350155506_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010078095331_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010009091648_llc.fits" ], filenames) # Injected group. filenames = kepler_io.kepler_filenames("/my/dir/", 1234567, quarters=[3, 4], injected_group="inj1", check_existence=False) # pylint:disable=line-too-long self.assertCountEqual([ "/my/dir/0012/001234567/kplr001234567-2009350155506_INJECTED-inj1_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010078095331_INJECTED-inj1_llc.fits", "/my/dir/0012/001234567/kplr001234567-2010009091648_INJECTED-inj1_llc.fits" ], filenames) # pylint:enable=line-too-long # Short cadence. filenames = kepler_io.kepler_filenames("/my/dir/", 1234567, long_cadence=False, quarters=[0, 1], check_existence=False) self.assertCountEqual([ "/my/dir/0012/001234567/kplr001234567-2009131110544_slc.fits", "/my/dir/0012/001234567/kplr001234567-2009166044711_slc.fits" ], filenames) # Check existence. filenames = kepler_io.kepler_filenames(self.data_dir, 11442793, check_existence=True) expected_filenames = [ os.path.join(self.data_dir, "0114/011442793/kplr011442793-{}_llc.fits".format(q)) for q in ["2009350155506", "2010009091648", "2010174085026"] ] self.assertCountEqual(expected_filenames, filenames)