def test_aperturephotometry(SHARED_INPUT_DIR, datasource):
    with TemporaryDirectory() as OUTPUT_DIR:
        with AperturePhotometry(DUMMY_TARGET,
                                SHARED_INPUT_DIR,
                                OUTPUT_DIR,
                                plot=True,
                                datasource=datasource,
                                **DUMMY_KWARG) as pho:

            pho.photometry()
            filepath = pho.save_lightcurve()
            print(pho.lightcurve)

            # It should set the status to one of these:
            assert (pho.status in (STATUS.OK, STATUS.WARNING))

            # Check the sumimage:
            plt.figure()
            plot_image(pho.sumimage, title=datasource)

            assert not anynan(pho.sumimage), "There are NaNs in the SUMIMAGE"

            # They shouldn't be exactly zero:
            assert not np.all(pho.lightcurve['flux'] == 0)
            assert not np.all(pho.lightcurve['flux_err'] == 0)
            assert not np.all(pho.lightcurve['pos_centroid'][:, 0] == 0)
            assert not np.all(pho.lightcurve['pos_centroid'][:, 1] == 0)

            # They shouldn't be NaN (in this case!):
            assert not allnan(pho.lightcurve['flux'])
            assert not allnan(pho.lightcurve['flux_err'])
            assert not allnan(pho.lightcurve['pos_centroid'][:, 0])
            assert not allnan(pho.lightcurve['pos_centroid'][:, 1])

            assert not np.any(~np.isfinite(pho.lightcurve['time']))
            assert not np.any(pho.lightcurve['time'] == 0)

            # Test the outputted FITS file:
            with fits.open(filepath, mode='readonly') as hdu:
                # Should be the same vectors in FITS as returned in Table:
                np.testing.assert_allclose(pho.lightcurve['time'],
                                           hdu[1].data['TIME'])
                np.testing.assert_allclose(pho.lightcurve['timecorr'],
                                           hdu[1].data['TIMECORR'])
                np.testing.assert_allclose(pho.lightcurve['flux'],
                                           hdu[1].data['FLUX_RAW'])
                np.testing.assert_allclose(pho.lightcurve['flux_err'],
                                           hdu[1].data['FLUX_RAW_ERR'])
                np.testing.assert_allclose(pho.lightcurve['cadenceno'],
                                           hdu[1].data['CADENCENO'])

                # Test FITS aperture image:
                ap = hdu['APERTURE'].data
                print(ap)
                assert np.all(pho.aperture == ap), "Aperture image mismatch"
                assert not anynan(ap), "NaN in aperture image"
                assert np.all(ap >= 0), "Negative values in aperture image"
                assert np.any(ap & 2 != 0), "No photometric mask set"
                assert np.any(ap & 8 != 0), "No position mask set"
示例#2
0
def remove_whole_nan_ys(x, ys):
    """Remove whole NaN columns of ys with corresponding x coordinates."""
    whole_nan_columns = bottleneck.allnan(ys, axis=0)
    if np.any(whole_nan_columns):
        x = x[~whole_nan_columns]
        ys = ys[:, ~whole_nan_columns]
    return x, ys
    def fit(self, X, y):
        X_y = self._check_params(X, y)
        self.X = X_y[0]
        self.y = X_y[1].reshape((-1, 1))
        n, p = X.shape

        S = []    # list of selected features
        F = range(p)    # list of unselected features

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # Find the first feature
        k_min = 3
        range_k = 7
        xy_MI = np.empty((range_k, p))
        for i in range(range_k):
            xy_MI[i, :] = self._get_first_mi_vector(i + k_min)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        if self.verbose > 0:
            self._info_print(S, S_mi)

        # Find the next features
        if self.n_features == 'auto':
            n_features = np.inf
        else:
            n_features = self.n_features

        while len(S) < n_features:
            s = len(S) - 1
            feature_mi_matrix[s, F] = self._get_mi_vector(F, S[-1])
            fmm = feature_mi_matrix[:len(S), F]
            if bn.allnan(bn.nanmean(fmm, axis=0)):
                break
            MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
            if np.isnan(MRMR).all():
                break
            selected = F[bn.nanargmax(MRMR)]
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)
            if self.verbose > 0:
                self._info_print(S, S_mi)
            if self.n_features == 'auto' and len(S) > 10:
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break
        self.n_features_ = len(S)
        self.ranking_ = S
        self.mi_ = S_mi

        return self
示例#4
0
def ptp(lc):
    """
	Compute robust Point-To-Point scatter.

	Parameters:
		lc (``lightkurve.TessLightCurve`` object): Lightcurve to calculate PTP for.

	Returns:
		float: Robust PTP.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""
    if len(lc.flux) == 0 or allnan(lc.flux):
        return np.nan
    if len(lc.time) == 0 or allnan(lc.time):
        raise ValueError("Invalid time-vector specified. No valid timestamps.")
    return nanmedian(np.abs(np.diff(lc.flux)))
示例#5
0
def test_halo(SHARED_INPUT_DIR, datasource):
	with TemporaryDirectory() as OUTPUT_DIR:
		with HaloPhotometry(267211065, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, sector=1, camera=3, ccd=2) as pho:

			pho.photometry()
			filepath = pho.save_lightcurve()
			print( pho.lightcurve )

			# It should set the status to one of these:
			print(pho.status)
			assert pho.status in (STATUS.OK, STATUS.WARNING)

			# They shouldn't be exactly zero:
			assert not np.all(pho.lightcurve['flux'] == 0)
			assert not np.all(pho.lightcurve['flux_err'] == 0)
			assert not np.all(pho.lightcurve['pos_centroid'][:,0] == 0)
			assert not np.all(pho.lightcurve['pos_centroid'][:,1] == 0)

			# They shouldn't be NaN (in this case!):
			assert not allnan(pho.lightcurve['flux'])
			assert not allnan(pho.lightcurve['flux_err'])
			assert not allnan(pho.lightcurve['pos_centroid'][:,0])
			assert not allnan(pho.lightcurve['pos_centroid'][:,1])

			# Test the outputted FITS file:
			with fits.open(filepath, mode='readonly') as hdu:
				# Should be the same vectors in FITS as returned in Table:
				np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME'])
				np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR'])
				np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW'])
				np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR'])
				np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO'])

				# Test FITS aperture image:
				ap = hdu['APERTURE'].data
				print(ap)
				assert np.all(pho.aperture == ap), "Aperture image mismatch"
				assert not anynan(ap), "NaN in aperture image"
				assert np.all(ap >= 0), "Negative values in aperture image"
				assert np.any(ap & 2 != 0), "No photometric mask set"
				#assert np.any(ap & 8 != 0), "No position mask set"

	print("Passed Tests for %s" % datasource)
示例#6
0
def rms_timescale(time, flux, timescale=3600 / 86400):
    """
	Compute robust RMS on specified timescale. Using MAD scaled to RMS.

	Parameters:
		time (ndarray): Timestamps in days.
		flux (ndarray): Flux to calculate RMS for.
		timescale (float, optional): Timescale to bin timeseries before calculating RMS. Default=1 hour.

	Returns:
		float: Robust RMS on specified timescale.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    time = np.asarray(time)
    flux = np.asarray(flux)
    if len(flux) == 0 or allnan(flux):
        return np.nan
    if len(time) == 0 or allnan(time):
        raise ValueError("Invalid time-vector specified. No valid timestamps.")

    time_min = np.nanmin(time)
    time_max = np.nanmax(time)
    if not np.isfinite(time_min) or not np.isfinite(
            time_max) or time_max - time_min <= 0:
        raise ValueError("Invalid time-vector specified")

    # Construct the bin edges seperated by the timescale:
    bins = np.arange(time_min, time_max, timescale)
    bins = np.append(bins, time_max)

    # Bin the timeseries to one hour:
    indx = np.isfinite(flux)
    flux_bin, _, _ = binned_statistic(time[indx],
                                      flux[indx],
                                      nanmean,
                                      bins=bins)

    # Compute robust RMS value (MAD scaled to RMS)
    return mad_to_sigma * nanmedian(np.abs(flux_bin - nanmedian(flux_bin)))
示例#7
0
def lc_matrix_calc(Nstars, mat0):
    logger = logging.getLogger(__name__)

    logger.info("Calculating correlations...")

    indx_nancol = allnan(mat0, axis=0)
    mat1 = mat0[:, ~indx_nancol]

    mat1[np.isnan(mat1)] = 0
    correlations = np.abs(AlmightyCorrcoefEinsumOptimized(mat1.T, mat1.T))
    np.fill_diagonal(correlations, np.nan)

    return correlations
示例#8
0
    def __call__(self, data):
        """
        Remove columns with constant values from the dataset and return
        the resulting data table.

        Parameters
        ----------
        data : an input dataset
        """

        oks = np.logical_and(~bn.allnan(data.X, axis=0),
                             bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0))
        atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok]
        domain = Orange.data.Domain(atts, data.domain.class_vars,
                                    data.domain.metas)
        return data.transform(domain)
示例#9
0
    def __call__(self, data):
        """
        Remove columns with constant values from the dataset and return
        the resulting data table.

        Parameters
        ----------
        data : an input dataset
        """

        oks = np.logical_and(~bn.allnan(data.X, axis=0),
                             bn.nanmin(data.X, axis=0) != bn.nanmax(data.X, axis=0))
        atts = [data.domain.attributes[i] for i, ok in enumerate(oks) if ok]
        domain = Orange.data.Domain(atts, data.domain.class_vars,
                                    data.domain.metas)
        return data.transform(domain)
示例#10
0
def lightcurve_correlation_matrix(mat):
    """
	Calculate the correlation matrix between all lightcurves in matrix.

	Parameters:
		mat (numpy.array): (NxM)

	Returns:
		numpy.array: Correlation matrix (NxN).
	"""

    indx_nancol = allnan(mat, axis=0)
    mat1 = mat[:, ~indx_nancol]

    mat1[np.isnan(mat1)] = 0
    correlations = np.abs(AlmightyCorrcoefEinsumOptimized(mat1.T, mat1.T))
    np.fill_diagonal(correlations, np.nan)

    return correlations
示例#11
0
    def __call__(self, data):
        data = self.transform_domain(data)

        if "edge_jump" in data.domain:
            edges = data.transform(
                Orange.data.Domain([data.domain["edge_jump"]]))
            I_jumps = edges.X[:, 0]
        else:
            raise NoEdgejumpProvidedException(
                'Invalid meta data: Intensity jump at edge is missing')

        # order X by wavenumbers:
        # xs non ordered energies
        # xsind - indecies corresponding to the ordered energies
        # mon = True
        # X spectra as corresponding to the ordered energies
        xs, xsind, mon, X = transform_to_sorted_features(data)

        # for the missing data
        X, nans = nan_extend_edges_and_interpolate(xs[xsind], X)
        # TODO notify the user if some unknown values were interpolated

        # Replace remaining NaNs (where whole rows were NaN) with
        # with some values so that the function does not crash.
        # Results are going to be discarded later.
        nan_rows = bottleneck.allnan(X, axis=1)
        if np.any(nan_rows
                  ):  # if there were no nans X is a view, so do not modify
            X[nan_rows] = 1.

        # do the transformation
        X = self.transformed(X, xs[xsind], I_jumps)

        # discard nan rows
        X[nan_rows] = np.nan

        # k scores are always ordered, so do not restore order
        return X
示例#12
0
def test_freqextr_onlynoise():

	np.random.seed(42)
	time = np.arange(0, 27.0, 1800/86400)
	flux = np.random.normal(0, 2, size=len(time))
	lc = lk.TessLightCurve(time=time, flux=flux)

	tab = freqextr(lc, n_peaks=5, n_harmonics=2)
	_summary(lc, tab)

	assert tab.meta['n_peaks'] == 5
	assert tab.meta['n_harmonics'] == 2

	#print(tab.loc[1])

	assert allnan(tab['frequency'])
	assert allnan(tab['amplitude'])
	assert allnan(tab['phase'])
	assert allnan(tab['alpha'])
	assert allnan(tab['beta'])
	assert allnan(tab['deviation'])
示例#13
0
def test_known_star(SHARED_INPUT_DIR, corrector, starid, cadence, var_goal, rms_goal, ptp_goal):
	""" Check that the ensemble returns values that are reasonable and within expected bounds """

	# All stars we check here come from the same sector and camera.
	# Define these here for the future where we may test on other combinations of these:
	sector = 1
	camera = 1

	__dir__ = os.path.abspath(os.path.dirname(__file__))
	logger = logging.getLogger(__name__)
	logger.info("-------------------------------------------------------------")
	logger.info("CORRECTOR = %s, SECTOR=%d, CADENCE=%s, STARID=%d", corrector, sector, cadence, starid)

	# All stars are from the same CCD, find the task for it:
	with corrections.TaskManager(SHARED_INPUT_DIR) as tm:
		task = tm.get_task(starid=starid, sector=sector, camera=camera, cadence=cadence)

	# Check that task was actually found:
	assert task is not None, "Task could not be found"

	# Load lightcurve that will also be plotted together with the result:
	# This lightcurve is of the same objects, at a state where it was deemed that the
	# corrections were doing a good job.
	compare_lc_path = os.path.join(__dir__, 'compare', f'compare-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.ecsv.gz')
	compare_lc = None
	if os.path.isfile(compare_lc_path):
		compare_lc = Table.read(compare_lc_path, format='ascii.ecsv')
	else:
		warnings.warn("Comparison data does not exist: " + compare_lc_path)

	# Initiate the class
	CorrClass = corrections.corrclass(corrector)
	with tempfile.TemporaryDirectory() as tmpdir:
		with CorrClass(SHARED_INPUT_DIR, plot=True) as corr:
			# Check basic parameters of object (from BaseCorrector):
			assert corr.input_folder == SHARED_INPUT_DIR, "Incorrect input folder"
			assert corr.plot, "Plot parameter passed appropriately"
			assert os.path.isdir(corr.data_folder), "DATA_FOLDER doesn't exist"

			# Load the input lightcurve:
			inlc = corr.load_lightcurve(task)

			# Print input lightcurve properties:
			print( inlc.show_properties() )
			assert inlc.sector == sector
			assert inlc.camera == camera

			# Run correction:
			tmplc = inlc.copy()
			outlc, status = corr.do_correction(tmplc)

			# Check status
			assert outlc is not None, "Correction fails"
			assert isinstance(outlc, TessLightCurve), "Should return TessLightCurve object"
			assert isinstance(status, corrections.STATUS), "Should return a STATUS object"
			assert status in (corrections.STATUS.OK, corrections.STATUS.WARNING), "STATUS was not set appropriately"

			# Print output lightcurve properties:
			print( outlc.show_properties() )

			# Save the lightcurve to FITS file to be tested later on:
			save_file = corr.save_lightcurve(outlc, output_folder=tmpdir)

		# Check contents
		assert len(outlc) == len(inlc), "Input flux ix different length to output flux"
		assert isinstance(outlc.flux, np.ndarray), "FLUX is not a ndarray"
		assert isinstance(outlc.flux_err, np.ndarray), "FLUX_ERR is not a ndarray"
		assert isinstance(outlc.quality, np.ndarray), "QUALITY is not a ndarray"
		assert outlc.flux.dtype.type is inlc.flux.dtype.type, "FLUX changes dtype"
		assert outlc.flux_err.dtype.type is inlc.flux_err.dtype.type, "FLUX_ERR changes dtype"
		assert outlc.quality.dtype.type is inlc.quality.dtype.type, "QUALITY changes dtype"
		assert outlc.flux.shape == inlc.flux.shape, "FLUX changes shape"
		assert outlc.flux_err.shape == inlc.flux_err.shape, "FLUX_ERR changes shape"
		assert outlc.quality.shape == inlc.quality.shape, "QUALITY changes shape"

		# Plot output lightcurves:
		fig, (ax1, ax2, ax3) = plt.subplots(3, 1, squeeze=True, figsize=[10, 10])
		ax1.plot(inlc.time, inlc.flux, lw=0.5)
		ax1.set_title(f"{corrector} - Sector {sector:d} - {cadence}s - TIC {starid:d}")
		if compare_lc:
			ax2.plot(compare_lc['time'], compare_lc['flux'], label='Compare', lw=0.5)
			ax3.axhline(0, lw=0.5, ls=':', color='0.7')
			ax3.plot(outlc.time, outlc.flux - compare_lc['flux'], lw=0.5)
		ax2.plot(outlc.time, outlc.flux, label='New', lw=0.5)
		ax1.set_ylabel('Flux [e/s]')
		ax1.minorticks_on()
		ax2.set_ylabel('Relative Flux [ppm]')
		ax2.minorticks_on()
		ax2.legend()
		ax3.set_ylabel('New - Compare [ppm]')
		ax3.set_xlabel('Time [TBJD]')
		ax3.minorticks_on()
		fig.savefig(os.path.join(__dir__, f'test-{corrector}-s{sector:04d}-c{cadence:04d}-tic{starid:011d}.png'), bbox_inches='tight')
		plt.close(fig)

		# Check things that are allowed to change:
		assert all(outlc.flux != inlc.flux), "Input and output flux are identical."
		assert not np.any(np.isinf(outlc.flux)), "FLUX contains Infinite"
		assert not np.any(np.isinf(outlc.flux_err)), "FLUX_ERR contains Infinite"
		assert np.sum(np.isnan(outlc.flux)) < 0.5*len(outlc), "More than half the lightcurve is NaN"
		assert allnan(outlc.flux_err[np.isnan(outlc.flux)]), "FLUX_ERR should be NaN where FLUX is"

		# TODO: Check that quality hasn't changed in ways that are not allowed:
		# - Only values defined in CorrectorQualityFlags
		# - No removal of flags already set
		assert all(outlc.quality >= 0)
		assert all(outlc.quality <= 128)
		assert all(outlc.quality >= inlc.quality)

		# Things that shouldn't chance from the corrections:
		assert outlc.targetid == inlc.targetid, "TARGETID has changed"
		assert outlc.label == inlc.label, "LABEL has changed"
		assert outlc.sector == inlc.sector, "SECTOR has changed"
		assert outlc.camera == inlc.camera, "CAMERA has changed"
		assert outlc.ccd == inlc.ccd, "CCD has changed"
		assert outlc.quality_bitmask == inlc.quality_bitmask, "QUALITY_BITMASK has changed"
		assert outlc.ra == inlc.ra, "RA has changed"
		assert outlc.dec == inlc.dec, "DEC has changed"
		assert outlc.mission == 'TESS', "MISSION has changed"
		assert outlc.time_format == 'btjd', "TIME_FORMAT has changed"
		assert outlc.time_scale == 'tdb', "TIME_SCALE has changed"
		assert_array_equal(outlc.time, inlc.time, "TIME has changed")
		assert_array_equal(outlc.timecorr, inlc.timecorr, "TIMECORR has changed")
		assert_array_equal(outlc.cadenceno, inlc.cadenceno, "CADENCENO has changed")
		assert_array_equal(outlc.pixel_quality, inlc.pixel_quality, "PIXEL_QUALITY has changed")
		assert_array_equal(outlc.centroid_col, inlc.centroid_col, "CENTROID_COL has changed")
		assert_array_equal(outlc.centroid_row, inlc.centroid_row, "CENTROID_ROW has changed")

		# Check metadata
		assert tmplc.meta == inlc.meta, "Correction changed METADATA in-place"
		assert outlc.meta['task'] == inlc.meta['task'], "Metadata is incomplete"
		assert isinstance(outlc.meta['additional_headers'], fits.Header)

		# Check performance metrics:
		#logger.warning("VAR: %e", nanvar(outlc.flux))
		if var_goal is not None:
			var_in = nanvar(inlc.flux)
			var_out = nanvar(outlc.flux)
			var_diff = np.abs(var_out - var_goal) / var_goal
			logger.info("VAR: %f - %f - %f", var_in, var_out, var_diff)
			assert_array_less(var_diff, 0.05, "VARIANCE changed outside interval")

		#logger.warning("RMS: %e", rms_timescale(outlc))
		if rms_goal is not None:
			rms_in = rms_timescale(inlc)
			rms_out = rms_timescale(outlc)
			rms_diff = np.abs(rms_out - rms_goal) / rms_goal
			logger.info("RMS: %f - %f - %f", rms_in, rms_out, rms_diff)
			assert_array_less(rms_diff, 0.05, "RMS changed outside interval")

		#logger.warning("PTP: %e", ptp(outlc))
		if ptp_goal is not None:
			ptp_in = ptp(inlc)
			ptp_out = ptp(outlc)
			ptp_diff = np.abs(ptp_out - ptp_goal) / ptp_goal
			logger.info("PTP: %f - %f - %f", ptp_in, ptp_out, ptp_diff)
			assert_array_less(ptp_diff, 0.05, "PTP changed outside interval")

		# Check FITS file:
		with fits.open(os.path.join(tmpdir, save_file), mode='readonly') as hdu:
			# Lightcurve FITS table:
			fitslc = hdu['LIGHTCURVE'].data
			hdr = hdu['LIGHTCURVE'].header

			# Simple checks of header values:
			assert hdu[0].header['TICID'] == starid

			# Checks of things in FITS table that should not have changed at all:
			assert_array_equal(fitslc['TIME'], inlc.time, "FITS: TIME has changed")
			assert_array_equal(fitslc['TIMECORR'], inlc.timecorr, "FITS: TIMECORR has changed")
			assert_array_equal(fitslc['CADENCENO'], inlc.cadenceno, "FITS: CADENCENO has changed")
			assert_array_equal(fitslc['FLUX_RAW'], inlc.flux, "FITS: FLUX_RAW has changed")
			assert_array_equal(fitslc['FLUX_RAW_ERR'], inlc.flux_err, "FITS: FLUX_RAW_ERR has changed")
			assert_array_equal(fitslc['MOM_CENTR1'], inlc.centroid_col, "FITS: CENTROID_COL has changed")
			assert_array_equal(fitslc['MOM_CENTR2'], inlc.centroid_row, "FITS: CENTROID_ROW has changed")

			# Some things are allowed to change, but still within some requirements:
			assert all(fitslc['FLUX_CORR'] != inlc.flux), "FITS: Input and output flux are identical."
			assert np.sum(np.isnan(fitslc['FLUX_CORR'])) < 0.5*len(fitslc['TIME']), "FITS: More than half the lightcurve is NaN"
			assert allnan(fitslc['FLUX_CORR_ERR'][np.isnan(fitslc['FLUX_CORR'])]), "FITS: FLUX_ERR should be NaN where FLUX is"

			if corrector == 'ensemble':
				# Check special headers:
				assert np.isfinite(hdr['ENS_MED']) and hdr['ENS_MED'] > 0
				assert isinstance(hdr['ENS_NUM'], int) and hdr['ENS_NUM'] > 0
				assert hdr['ENS_DLIM'] == 1.0
				assert hdr['ENS_DREL'] == 10.0
				assert hdr['ENS_RLIM'] == 0.4

				# Special extension for ensemble:
				tic = hdu['ENSEMBLE'].data['TIC']
				bzeta = hdu['ENSEMBLE'].data['BZETA']
				assert len(tic) == len(bzeta)
				assert len(np.unique(tic)) == len(tic), "TIC numbers in ENSEMBLE table are not unique"
				assert len(tic) == hdr['ENS_NUM'], "Not the same number of targets in ENSEMBLE table as specified in header"

			elif corrector == 'cbv':
				# Check special headers:
				assert isinstance(hdr['CBV_NUM'], int) and hdr['CBV_NUM'] > 0

				# Check coefficients:
				for k in range(0, hdr['CBV_NUM']+1):
					assert np.isfinite(hdr['CBV_C%d' % k])
				for k in range(1, hdr['CBV_NUM']+1):
					assert np.isfinite(hdr['CBVS_C%d' % k])
				# Check that no other coefficients are present
				assert 'CBV_C%d' % (hdr['CBV_NUM']+1) not in hdr
				assert 'CBVS_C%d' % (hdr['CBV_NUM']+1) not in hdr

			elif corrector == 'kasoc_filter':
				# Check special headers:
				assert hdr['KF_POSS'] == 'None'
				assert np.isfinite(hdr['KF_LONG']) and hdr['KF_LONG'] > 0
				assert np.isfinite(hdr['KF_SHORT']) and hdr['KF_SHORT'] > 0
				assert hdr['KF_SCLIP'] == 4.5
				assert hdr['KF_TCLIP'] == 5.0
				assert hdr['KF_TWDTH'] == 1.0
				assert hdr['KF_PSMTH'] == 200

				assert isinstance(hdr['NUM_PER'], int) and hdr['NUM_PER'] >= 0
				for k in range(1, hdr['NUM_PER']+1):
					assert np.isfinite(hdr['PER_%d' % k]) and hdr['PER_%d' % k] > 0
				# Check that no other periods are present
				assert 'PER_%d' % (hdr['NUM_PER'] + 1) not in hdr

		# Test that the Gzip FITS file has the correct uncompressed file name, by simply
		# decompressing the Gzip file, asking to keep the original file name.
		# This uses the system GZIP utility, since there doesn't seem to be a way to do this
		# through the Python gzip module:
		fpath = os.path.join(tmpdir, save_file)
		fpath_uncompressed = fpath.replace('.fits.gz', '.fits')
		assert not os.path.exists(fpath_uncompressed), "Uncompressed file already exists"
		gzip_output = subprocess.check_output(['gzip', '-dkNv', os.path.basename(fpath)],
			cwd=os.path.dirname(fpath),
			stderr=subprocess.STDOUT,
			encoding='utf8')
		print("Gzip output:")
		print(gzip_output)
		assert os.path.isfile(fpath_uncompressed), "Incorrect uncompressed file name"

		# Just see if we can in fact also open the uncompressed FITS file and get a simple header:
		with fits.open(fpath_uncompressed, mode='readonly') as hdu:
			assert hdu[0].header['TICID'] == starid
示例#14
0
    def lightcurve_matrix(self):
        """
		Load matrix filled with light curves.

		The steps performed are the following:

		#. Only targets with a variability below a threshold are included.

		#. Computes correlation matrix for light curves in a given cbv-area and only includes the
		   :meth:`threshold_correlation` most correlated light curves.

		#. Performs gap-filling of light curves and removes time stamps where all flux values are NaN.

		Returns:
			tuple:

			- :class:`numpy.ndarray`: matrix of light curves to be used in CBV calculation.
			- :class:`numpy.ndarray`: the indices for the timestamps with nans in all light curves.
			- `int`: Number of timestamps.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		.. codeauthor:: Mikkel N. Lund <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)
        tqdm_settings = {
            'disable': None if logger.isEnabledFor(logging.INFO) else True
        }

        logger.info('Running matrix clean')
        if logger.isEnabledFor(
                logging.DEBUG) and 'matrix' in self.hdf:  # pragma: no cover
            logger.info("Loading existing file...")
            return self.hdf['matrix'], self.hdf['nancol'], self.hdf.attrs[
                'Ntimes']

        logger.info("We are running CBV_AREA=%d", self.cbv_area)

        # Set up search parameters for database:
        search_params = [
            f'status={STATUS.OK.value:d}',  # Only including targets with status=OK from photometry
            "method_used='aperture'",  # Only including aperature photometry targets
            f'cadence={self.cadence:d}',
            f'cbv_area={self.cbv_area:d}',
            f'sector={self.sector:d}'
        ]

        # Find the median of the variabilities:
        variability = np.array([
            float(row['variability'])
            for row in self.search_database(search=search_params,
                                            select='variability')
        ],
                               dtype='float64')
        if len(variability) == 0:
            raise ValueError(
                "No lightcurves found for this CBV_AREA that have VARIABILITY defined"
            )
        median_variability = nanmedian(variability)

        # Plot the distribution of variability for all stars:
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.hist(variability / median_variability,
                bins=np.logspace(np.log10(0.1), np.log10(1000.0), 50))
        ax.axvline(self.threshold_variability, color='r')
        ax.set_xscale('log')
        ax.set_xlabel('Variability')
        fig.savefig(
            os.path.join(
                self.cbv_plot_folder,
                f'variability-s{self.sector:04d}-c{self.cadence:04d}-a{self.cbv_area}.png'
            ))
        plt.close(fig)

        # Get the list of star that we are going to load in the lightcurves for:
        search_params.append('variability < %f' %
                             (self.threshold_variability * median_variability))
        stars = self.search_database(
            select=['lightcurve', 'mean_flux', 'variance'],
            search=search_params)

        # Number of stars returned:
        Nstars = len(stars)

        # Load the very first timeseries only to find the number of timestamps.
        lc = self.load_lightcurve(stars[0])
        Ntimes = len(lc.time)

        # Save aux information about this CBV to an separate file.
        self.hdf.create_dataset('time', data=lc.time - lc.timecorr)
        self.hdf.create_dataset('cadenceno', data=lc.cadenceno)
        self.hdf.attrs['camera'] = lc.camera
        self.hdf.attrs['ccd'] = lc.ccd
        self.hdf.attrs['data_rel'] = lc.meta['data_rel']
        self.hdf.flush()

        logger.info("Matrix size: %d x %d", Nstars, Ntimes)

        # Make the matrix that will hold all the lightcurves:
        logger.info("Loading in lightcurves...")
        mat = np.full((Nstars, Ntimes), np.nan, dtype='float64')
        varis = np.empty(Nstars, dtype='float64')

        # Loop over stars, fill
        for k, star in tqdm(enumerate(stars), total=Nstars, **tqdm_settings):
            # Load lightkurve object
            lc = self.load_lightcurve(star)

            # Remove bad data based on quality
            flag_good = TESSQualityFlags.filter(
                lc.pixel_quality,
                TESSQualityFlags.CBV_BITMASK) & CorrectorQualityFlags.filter(
                    lc.quality, CorrectorQualityFlags.CBV_BITMASK)
            lc.flux[~flag_good] = np.nan

            # Normalize the data and store it in the rows of the matrix:
            mat[k, :] = lc.flux / star['mean_flux'] - 1.0

            # Store the standard deviations of each lightcurve:
            varis[k] = np.NaN if star['variance'] is None else star['variance']

        # Only start calculating correlations if we are actually filtering using them:
        if self.threshold_correlation < 1.0:
            # Calculate the correlation matrix between all lightcurves:
            logger.info("Calculating correlations...")
            correlations = lightcurve_correlation_matrix(mat)

            # If running in DEBUG mode, save the correlations matrix to file:
            if logger.isEnabledFor(logging.DEBUG):  # pragma: no cover
                self.hdf.create_dataset('correlations', data=correlations)

            # Find the median absolute correlation between each lightcurve and all other lightcurves:
            c = nanmedian(correlations, axis=0)

            # Indicies that would sort the lightcurves by correlations in descending order:
            indx = np.argsort(c)[::-1]
            indx = indx[:int(self.threshold_correlation * Nstars)]
            #TODO: remove based on threshold value? rather than just % of stars

            # Only keep the top "threshold_correlation"% of the lightcurves that are most correlated:
            mat = mat[indx, :]
            varis = varis[indx]

            # Clean up a bit:
            del correlations, c, indx

        # Print the final shape of the matrix:
        Nstars = mat.shape[0]
        Ntimes = mat.shape[1]

        # Find columns where all stars have NaNs and remove them:
        indx_nancol = allnan(mat, axis=0)
        mat = mat[:, ~indx_nancol]

        logger.info("Matrix size: %d x %d", mat.shape[0], mat.shape[1])

        logger.info("Gap-filling lightcurves...")
        cadenceno = np.arange(mat.shape[1])
        count_interp = 0
        for k in tqdm(range(Nstars), total=Nstars, **tqdm_settings):
            # Normalize the lightcurves by their variances:
            mat[k, :] /= varis[k]

            # Fill out missing values by interpolating the lightcurve:
            ibad = ~np.isfinite(mat[k, :])
            Ninterp = int(np.sum(ibad))
            count_interp += Ninterp
            if Ninterp > 0:
                mat[k, ibad] = pchip_interpolate(cadenceno[~ibad], mat[k,
                                                                       ~ibad],
                                                 cadenceno[ibad])

        # Print the average number of interpolated points:
        avg_interp = count_interp / Nstars
        logger.info("Average interpolated per star: %f points, %.3f%%",
                    avg_interp, 100 * avg_interp / Ntimes)

        # Save something for debugging:
        self.hdf.attrs['Ntimes'] = Ntimes
        self.hdf.attrs['Nstars'] = Nstars
        self.hdf.attrs['average_interpolated_points'] = avg_interp
        if logger.isEnabledFor(logging.DEBUG):  # pragma: no cover
            self.hdf.create_dataset('matrix', data=mat)
            self.hdf.create_dataset('nancols', data=indx_nancol)

        return mat, indx_nancol, Ntimes
示例#15
0
文件: cbv.py 项目: tasoc/corrections
    def fit(self,
            lc,
            use_bic=True,
            use_prior=False,
            cbvs=None,
            alpha=1.3,
            WS_lim=0.5,
            N_neigh=1000):
        """
		Fit the CBV object to a lightcurve, and return the fitted cotrending-lightcurve
		and the fitting coefficients.

		Parameters:
			lc (:class:`LightCurve`): Lightcurve to be cotrended.
			use_bic (bool, optional): Use the Bayesian Information Criterion to find the
				optimal number of CBVs to fit. Default=True.
			use_prior (bool, optional):
			cbvs (int, optional): Number of CBVs to fit to lightcurve. If `use_bic=True`, this
				indicated the maximum number of CBVs to fit.

		Returns:
			- `numpy.array`: Fitted lightcurve with the same length as `lc`.
			- list: Coefficients for each CBV.
			- dict: Diagnostics information about the fitting.

		"""

        logger = logging.getLogger(__name__)

        # If no uncertainties are provided, fill it with ones:
        if allnan(lc.flux_err):
            lc.flux_err[:] = 1

        # Remove bad data based on quality
        if not allnan(lc.quality):
            flag_good = CorrectorQualityFlags.filter(lc.quality)
            lc.flux[~flag_good] = np.nan
            lc.flux_err[~flag_good] = np.nan

        # Diagnostics to return at the end about what was
        # actually used in the fitting:
        diagnostics = {
            'method': None,
            'use_bic': use_bic,
            'use_prior': use_prior
        }

        # Fit the CBV to the flux:
        if use_prior:
            # Do fits including prior information from the initial fits
            # allow switching to a simple LSSQ fit depending on
            # variability measures (not fully implemented yet!)

            # Position of target in multidimentional prior space:
            row = lc.meta['task']['pos_row']
            col = lc.meta['task']['pos_column']
            tmag = np.clip(lc.meta['task']['tmag'], 2, 20)
            pos = np.array([row, col, tmag])

            # Prior curve
            n_components = self.cbs.shape[1]
            pc0, opts = self._priorcurve(pos, n_components, N_neigh)
            pc = pc0 * lc.meta['task']['mean_flux']

            # Compute new variability measure
            idx = np.isfinite(lc.flux)
            polyfit = np.polyval(np.polyfit(lc.time[idx], lc.flux[idx], 3),
                                 lc.time)
            residual = MAD_model(lc.flux - pc)
            #residual_ratio = MAD_model(lc.flux-lc.meta['task']['mean_flux'])/residual
            #WS = np.min([1, residual_ratio])

            AA = 2
            GRAW = np.std((pc - polyfit) / MAD_model2(lc.flux - polyfit) - 1)
            GPR = 0 + (1 - (GRAW / AA)**2) * (GRAW < AA)

            beta1 = 1
            beta2 = 1
            VAR = np.nanstd(lc.flux - polyfit)
            WS = np.min([1, (VAR**beta1) * (GPR**beta2)])

            if WS > WS_lim:
                logger.debug('Fitting using LLSQ')
                flux_filter, res = self._fit(
                    lc, Numcbvs=5,
                    use_bic=use_bic)  # use smaller number of CBVs
                diagnostics['method'] = 'LS'
                diagnostics['use_prior'] = False
                diagnostics['use_bic'] = False

            else:
                logger.debug('Fitting using Priors')

                # Define multi-dimentional prior:
                dist, ind = self.priors.query(pos, k=N_neigh + 1)
                W = 1 / dist[0][1:]**2
                V = self.inifit[ind[1:], :]
                KDE = gaussian_kde(V, weights=W.flatten(), bw_method='scott')
                wscale = 1.0

                def logprior(coeff):
                    return wscale * KDE.logpdf(coeff)

                flux_filter, res = self._fit(lc,
                                             err=residual,
                                             use_bic=use_bic,
                                             logprior=logprior,
                                             start_guess=opts)

                diagnostics.update({
                    'method': 'MAP',
                    'residual': residual,
                    'WS': WS,
                    'pc': pc
                })

        else:
            # Do "simple" LSSQ fits using BIC to decide on number of CBVs to include
            logger.debug('Fitting TIC %d using LLSQ', lc.targetid)
            flux_filter, res = self._fit(lc, Numcbvs=cbvs, use_bic=use_bic)
            diagnostics['method'] = 'LS'

        return flux_filter, res, diagnostics
示例#16
0
                del correlations, c, indx

            # Save something for debugging:
            np.savez('mat-sector%02d-%d.npz' % (sector, cbv_area),
                     mat=mat,
                     priorities=priorities,
                     stds=stds)

        # Print the final shape of the matrix:
        print("Matrix size: %d x %d" % mat.shape)

        # Simple low-pass filter of the individual targets:
        #mat = move_median_central(mat, 48, axis=1)

        # Find columns where all stars have NaNs and remove them:
        indx_nancol = allnan(mat, axis=0)
        Ntimes = mat.shape[1]
        mat = mat[:, ~indx_nancol]

        cadenceno = np.arange(mat.shape[1])

        # TODO: Is this even needed? Or should it be done earlier?
        print("Gap-filling lightcurves...")
        for k in tqdm(range(mat.shape[0]), total=mat.shape[0]):

            mat[k, :] /= stds[k]

            # Fill out missing values by interpolating the lightcurve:
            indx = np.isfinite(mat[k, :])
            mat[k, ~indx] = pchip_interpolate(cadenceno[indx], mat[k, indx],
                                              cadenceno[~indx])
示例#17
0
 def time_allnan(self, dtype, shape, order, axis, case):
     bn.allnan(self.arr, axis=axis)
def filtered_extinction(inv,msl_altitudes,min_alt,window_od, t_window_length
                        ,max_z_window_length,order,adaptive,telescope_pointing=None):
    """filtered_extinction(inv,msl_altitudes,telescope_pointing, min_alt
                        window_od,max_window_length,order)
       Stravitzky-Golay fitting which can use a fixed window or
       an adaptive widow which restricts the length of the
       fit to a user specified optical depth interval estimated from the
       integrated backscatter cross section divided by a p180/4pi = 0.025
       inv                = dictionary containing beta_a_backscat_par, beta_a_backscat_perp
                            and beta_r_backscat, the Rayliegh backscatter
       msl_altitudes      = vector of bin altitudes (m)
       min_alt            = smooth alititudes > (min_alt + max_window_length/2.0)
                             (this is ignored  in current code)
       window_od          = max estimated od within fit window
       t_window_length    = length of time window (seconds)
       max_z_window_length= max length of altitude fit window (m)
       order              = order of polynomial to use in fit
       adaptive           = 0, fixed length window
                          = 1, window length can not exceed od limit"""

    if not hasattr(inv,'Nm'):
      print 'ERROR: XXXXXXXXXXXXXXXXXX Filtered extinction missing Nm in inv. Nothing to do.'
      return inv

    data_len = len(inv.Nm[0,:])    
    ntimes = len(inv.Nm[:,0])
    
    #distance between altitude bins
    dz=0
    for dzi in range(len(msl_altitudes)-1):
      ndz = msl_altitudes[dzi+1]-msl_altitudes[dzi]
      if ndz>0 and abs(dz-ndz)<.01:
        break
      dz=ndz
    z_window_pts = int(max_z_window_length/dz)
    #must be at least order +1
    if z_window_pts < order +1:
        z_window_pts = order +1
    #must be odd     
    if z_window_pts%2==0:
        z_window_pts = z_window_pts + 1

    if ntimes > order: 
        dt = inv.delta_t.copy()#(inv.times[2] - inv.times[1]).seconds
        dt[dt<1.0] = 1.0
        t_window_pts = np.array(t_window_length/dt,dtype='int')
        #must be at least order +1
        t_window_pts[t_window_pts < order +1]=order +1
        #must be odd     
        t_window_pts[t_window_pts%2==0]+=1

        filtered_Nm = inv.Nm.copy()
        integrated_backscat = inv.integrated_backscat.copy()
       
        #filter in time
        for k in range(data_len-1):
            if not allnan(filtered_Nm[:,k]) and  ntimes > t_window_pts:
               filtered_Nm[:,k] = sg.savitzky_golay(filtered_Nm[:,k],t_window_pts[k],order)
    else:
        filtered_Nm = inv.Nm.copy()
        integrated_backscat = inv.integrated_backscat.copy()

    if telescope_pointing is None :
        #if not provided assume zenith pointing
        t_pointing = np.ones_like(inv.Nm[:,0])
    else:    
        t_pointing = telescope_pointing.copy()
        t_pointing[t_pointing < 0.1] = -1.0
    extinction = np.zeros(filtered_Nm.shape)
    
    #if (data_len-min_alt/dz)  <  z_window_pts*5.0:
    if data_len  <  z_window_pts*5.0:
        print
        print 'WARNING---filtered_extinction--filter window length too long'
        print 'reseting z_window_pts from ',z_window_pts,
        #z_window_pts = int((data_len-min_alt/dz)/5.0)
        z_window_pts = int(data_len/5.0)
        z_window_pts = 2*(z_window_pts/2)+1
        print 'to ', z_window_pts
        if z_window_pts < order +2:
            print
            #raise ValueError, 'number of altitude resolution elements two few for filter length'
            print 'WARNING-----Savitzky_golay---number of altitude resolution elements two few for filter length'
            print '            inv.extintiction returned as NaN'
            print
            inv.extinction = np.NaN * inv.Nm
            return inv
        print
    #start_pt = int(np.ceil(z_window_pts/2 + min_alt/dz))
    start_pt = int(np.ceil(z_window_pts/2))
    end_pt   = data_len -z_window_pts/2 -1
    
    if adaptive == 0:
        slope_Nm = np.zeros_like(filtered_Nm[0,:])
        inv.p180 = np.zeros_like(integrated_backscat)
                             
        dbeta_dr = np.zeros_like(filtered_Nm[0,:])
        dbeta_dr[1:] =  + 0.5*(1/inv.beta_r_backscat[1:])\
                    *(inv.beta_r_backscat[1:]
                    -inv.beta_r_backscat[:-1])/dz 
        for i in range(ntimes):
            #compute extinction from the first derivative of a filtered Nm
            slope_Nm[start_pt:end_pt] = -sg.savitzky_golay(
                  filtered_Nm[i,start_pt:end_pt],z_window_pts,order,deriv = 1) / dz
            extinction[i,start_pt:end_pt]= \
               (-0.5*(1/filtered_Nm[i,start_pt:end_pt])
               *slope_Nm[start_pt:end_pt] 
               +dbeta_dr[start_pt:end_pt])*t_pointing[i]
        if 0:
          import matplotlib.pyplot as plt
          bin_vec = np.arange(len(filtered_Nm[0,:]))
          bin_vec2 = np.arange(len(slope_Nm))
          plt.figure(777777)
          plt.plot(slope_Nm,bin_vec2,'b',extinction[0,:],bin_vec2,'r',filtered_Nm[0,:],bin_vec2,'g'
                 ,-dbeta_dr,bin_vec2,'k',inv.p180[0,:],bin_vec2,'c')
          ax=plt.gca()
          ax.set_xscale('log')
          ax.grid(True)
         
    #when adaptive ==1,  window length derived from integrated backscat
    else:    
        #pick a intermediate value of p180/4pi for od estimate   
        p180 = 0.025

        #half of the maximum fit window in bins
        max_half_win =int((max_z_window_length/dz)/2)    
        wind_od = window_od/2.0
        
            
        #reflect data at end for padding
        yy=np.zeros((ntimes,data_len+max_half_win+1))
        end_range = range(data_len-1,(data_len-max_half_win-1),-1)
        yy[0:ntimes,0:data_len]=inv.Nm[0:ntimes,0:data_len]           
        yy[0:ntimes,data_len:data_len+max_half_win] = inv.Nm[0:ntimes,end_range]
        filtered_Nm = inv.Nm.copy()
        #extinction = np.zeros_like(inv.Nm)
        extinction = np.zeros(inv.Nm.shape)
        low_limit = np.zeros(data_len)
        high_limit = np.zeros_like(low_limit)
        beta_a=np.zeros(data_len+max_half_win+1)
    
        for k in range(ntimes):
            beta_a[0:data_len]=(inv.beta_a_backscat_par[k,0:data_len]\
                      +inv.beta_a_backscat_perp[k,0:data_len])
            beta_a[range((data_len-max_half_win),data_len)] \
                  =(inv.beta_a_backscat_par[k,end_range] \
                        +inv.beta_a_backscat_perp[k,end_range])
    
            #compute integrated backscatter
            beta_a[np.isnan(beta_a)]=0
            int_bs_od=np.cumsum(beta_a)*dz/(2*p180)
            int_bs_od[data_len:data_len+max_half_win+1]=int_bs_od[data_len]
            #find end points of fit for each data point
            #use the optical depth estimated from the integrated backscatter to compute
            #limits for polynomial fit at each data point
            derivative_coefs=np.arange(order,0,-1)
            order_lmt = order/2 +1
                        
            for i in range(start_pt,data_len):
            
                lo_lmt=np.max([i-max_half_win,start_pt])
                while (int_bs_od[i] -int_bs_od[lo_lmt] > wind_od) and i-lo_lmt > order_lmt:
                   lo_lmt = lo_lmt + 1

                hi_lmt = i+max_half_win
                while (int_bs_od[hi_lmt] - int_bs_od[i] > wind_od) and hi_lmt-i > order_lmt:
                    hi_lmt = hi_lmt -1
                
                #make fitting interval symetric around i
                if i - lo_lmt < hi_lmt-i :
                    hi_lmt = 2*i -lo_lmt
            
                elif i - lo_lmt > hi_lmt -i :
                   lo_lmt = 2*i - hi_lmt
            
    

                ylocal = yy[k,lo_lmt:hi_lmt+1]
                x=np.arange(len(ylocal))
                #print x
                pc = np.polyfit(x,ylocal,order)
                filtered_Nm[k,i]= np.polyval(pc,i-lo_lmt)
    
                slope_Nm = np.polyval(derivative_coefs*pc[0:order],i-lo_lmt)/dz

                #print 'ext',i,lo_lmt,hi_lmt,ylocal,slope_Nm,pc,pc[0:order]\
                #         ,dz,np.polyval(pc,[0,1,2,3]),t_pointing[k]
       
                extinction[k,i]=(-0.5*(1/filtered_Nm[k,i])*slope_Nm \
                    + 0.5*(1/inv.beta_r_backscat[i])\
                    *(inv.beta_r_backscat[i]-inv.beta_r_backscat[i-1])/dz)\
                    *t_pointing[k]

                #print 'ext',i,lo_lmt,hi_lmt,extinction[k,i],slope_Nm\
                #      ,(inv.beta_r_backscat[i]-inv.beta_r_backscat[i-1])/dz\
                #      ,inv.beta_r_backscat[i],inv.beta_r_backscat[i-1],t_pointing[k]
                      
                xx=np.arange(len(x)*10)/10.0
           
       
            
    inv.extinction = type(filtered_Nm)(extinction)
    time_vec = np.ones_like(inv.extinction[:,0])
    beta_r_array = 8 * np.pi * (time_vec[:,np.newaxis] * inv.beta_r_backscat[np.newaxis,:])/3.0 
    inv.extinction_aerosol = inv.extinction - beta_r_array

    #compute p180 from integrated backscatter and optical depth over window segments
    inv.p180 = np.zeros_like(inv.extinction_aerosol)
    start_pt = int(np.ceil(z_window_pts/2))
    delta_tau = np.zeros_like(inv.extinction)
   
    delta_tau[:,start_pt:] = -0.5 * np.log(
        inv.Nm[:,start_pt:]/inv.Nm[:,:-start_pt]\
        *(beta_r_array[:,:-start_pt]/beta_r_array[:,start_pt:]))
    
    delta_tau[:,start_pt:] = delta_tau[:,start_pt:] \
          - (beta_r_array[:,start_pt:]+beta_r_array[:,:-start_pt]) * msl_altitudes[start_pt]/2.0
    
    inv.p180[:,start_pt:] = (\
        integrated_backscat[:,start_pt:] - integrated_backscat[:,:-start_pt])\
        /delta_tau[:,start_pt:]
   
    return inv       
示例#19
0
    def do_photometry(self):
        """Perform photometry on the given target.

		This function needs to set
			* self.lightcurve
		"""

        logger = logging.getLogger(__name__)
        logger.info("Running aperture photometry...")

        k2p2_settings = {
            'thresh': 0.8,
            'min_no_pixels_in_mask': 4,
            'min_for_cluster': 4,
            'cluster_radius': np.sqrt(2) + np.finfo(np.float64).eps,
            'segmentation': True,
            'ws_blur': 0.5,
            'ws_thres': 0,
            'ws_footprint': 3,
            'extend_overflow': True
        }

        for retries in range(5):
            # Delete any plots left over in the plots folder from an earlier iteration:
            self.delete_plots()

            # Create the sum-image:
            SumImage = self.sumimage

            logger.info(self.stamp)
            logger.info("Target position in stamp: (%f, %f)",
                        self.target_pos_row_stamp,
                        self.target_pos_column_stamp)

            cat = np.column_stack(
                (self.catalog['column_stamp'], self.catalog['row_stamp'],
                 self.catalog['tmag']))

            logger.info("Creating new masks...")
            try:
                masks, background_bandwidth = k2p2.k2p2FixFromSum(
                    SumImage,
                    plot_folder=self.plot_folder,
                    show_plot=False,
                    catalog=cat,
                    **k2p2_settings)
                masks = np.asarray(masks, dtype='bool')
            except k2p2.K2P2NoStars:
                self.report_details(error='No flux above threshold.')
                masks = np.asarray(0, dtype='bool')

            using_minimum_mask = False
            if len(masks.shape) == 0:
                logger.warning("No masks found")
                self.report_details(
                    error='No masks found. Using minimum aperture.')
                mask_main = self._minimum_aperture()
                using_minimum_mask = True

            else:
                # Look at the central pixel where the target should be:
                indx_main = masks[:,
                                  int(round(self.target_pos_row_stamp)),
                                  int(round(self.target_pos_column_stamp)
                                      )].flatten()

                if not np.any(indx_main):
                    logger.warning(
                        'No mask found for main target. Using minimum aperture.'
                    )
                    self.report_details(
                        error=
                        'No mask found for main target. Using minimum aperture.'
                    )
                    mask_main = self._minimum_aperture()
                    using_minimum_mask = True

                elif np.sum(indx_main) > 1:
                    logger.error('Too many masks')
                    self.report_details(error='Too many masks')
                    return STATUS.ERROR

                else:
                    # Mask of the main target:
                    mask_main = masks[indx_main, :, :].reshape(SumImage.shape)

            # Find out if we are touching any of the edges:
            resize_args = {}
            if np.any(mask_main[0, :]):
                resize_args['down'] = 10
            if np.any(mask_main[-1, :]):
                resize_args['up'] = 10
            if np.any(mask_main[:, 0]):
                resize_args['left'] = 10
            if np.any(mask_main[:, -1]):
                resize_args['right'] = 10

            if resize_args:
                logger.warning("Touching the edges! Retrying")
                logger.info(resize_args)
                if not self.resize_stamp(**resize_args):
                    resize_args = {}
                    logger.warning("Could not resize stamp any further")
                    break
            else:
                break

        # If we reached the last retry but still needed a resize, give up:
        if resize_args:
            self.report_details(error='Too many stamp resizes')
            return STATUS.ERROR

        # XY of pixels in frame
        cols, rows = self.get_pixel_grid()
        members = np.column_stack((cols[mask_main], rows[mask_main]))

        # Loop through the images and backgrounds together:
        for k, (img, imgerr, bck) in enumerate(
                zip(self.images, self.images_err, self.backgrounds)):

            flux_in_cluster = img[mask_main]

            # Calculate flux in mask:
            if allnan(flux_in_cluster) or np.all(flux_in_cluster == 0):
                self.lightcurve['flux'][k] = np.NaN
                self.lightcurve['flux_err'][k] = np.NaN
                self.lightcurve['pos_centroid'][k, :] = np.NaN
                #self.lightcurve['quality']
            else:
                self.lightcurve['flux'][k] = np.sum(flux_in_cluster)
                self.lightcurve['flux_err'][k] = np.sqrt(
                    np.sum(imgerr[mask_main]**2))

                # Calculate flux centroid:
                finite_vals = (flux_in_cluster > 0)
                if np.any(finite_vals):
                    self.lightcurve['pos_centroid'][k, :] = np.average(
                        members[finite_vals],
                        weights=flux_in_cluster[finite_vals],
                        axis=0)
                else:
                    self.lightcurve['pos_centroid'][k, :] = np.NaN

            if allnan(bck[mask_main]):
                self.lightcurve['flux_background'][k] = np.NaN
            else:
                self.lightcurve['flux_background'][k] = np.nansum(
                    bck[mask_main])

        # Save the mask to be stored in the outout file:
        self.final_mask = mask_main

        # Add additional headers specific to this method:
        #self.additional_headers['KP_SUBKG'] = (bool(subtract_background), 'K2P2 subtract background?')
        self.additional_headers['KP_THRES'] = (k2p2_settings['thresh'],
                                               'K2P2 sum-image threshold')
        self.additional_headers['KP_MIPIX'] = (
            k2p2_settings['min_no_pixels_in_mask'], 'K2P2 min pixels in mask')
        self.additional_headers['KP_MICLS'] = (
            k2p2_settings['min_for_cluster'], 'K2P2 min pix. for cluster')
        self.additional_headers['KP_CLSRA'] = (k2p2_settings['cluster_radius'],
                                               'K2P2 cluster radius')
        self.additional_headers['KP_WS'] = (bool(
            k2p2_settings['segmentation']), 'K2P2 watershed segmentation')
        #self.additional_headers['KP_WSALG'] = (k2p2_settings['ws_alg'], 'K2P2 watershed weighting')
        self.additional_headers['KP_WSBLR'] = (k2p2_settings['ws_blur'],
                                               'K2P2 watershed blur')
        self.additional_headers['KP_WSTHR'] = (k2p2_settings['ws_thres'],
                                               'K2P2 watershed threshold')
        self.additional_headers['KP_WSFOT'] = (k2p2_settings['ws_footprint'],
                                               'K2P2 watershed footprint')
        self.additional_headers['KP_EX'] = (bool(
            k2p2_settings['extend_overflow']), 'K2P2 extend overflow')

        # Targets that are in the mask:
        target_in_mask = [
            k for k, t in enumerate(self.catalog)
            if np.any(mask_main & (rows == np.round(t['row']) + 1)
                      & (cols == np.round(t['column']) + 1))
        ]

        # Figure out which status to report back:
        my_status = STATUS.OK

        # Calculate contamination from the other targets in the mask:
        if len(target_in_mask) == 0:
            logger.error("No targets in mask")
            self.report_details(error='No targets in mask')
            contamination = np.nan
            my_status = STATUS.ERROR
        elif len(target_in_mask) == 1 and self.catalog[target_in_mask][0][
                'starid'] == self.starid:
            contamination = 0
        else:
            # Calculate contamination metric as defined in Lund & Handberg (2014):
            mags_in_mask = self.catalog[target_in_mask]['tmag']
            mags_total = -2.5 * np.log10(np.nansum(10**(-0.4 * mags_in_mask)))
            contamination = 1.0 - 10**(0.4 * (mags_total - self.target_tmag))
            contamination = np.abs(
                contamination)  # Avoid stupid signs due to round-off errors

        logger.info("Contamination: %f", contamination)
        if not np.isnan(contamination):
            self.additional_headers['AP_CONT'] = (contamination,
                                                  'AP contamination')

        # Check if there are other targets in the mask that could then be skipped from
        # processing, and report this back to the TaskManager. The TaskManager will decide
        # if this means that this target or the other targets should be skipped in the end.
        skip_targets = [
            t['starid'] for t in self.catalog[target_in_mask]
            if t['starid'] != self.starid
        ]
        if skip_targets:
            logger.info("These stars could be skipped: %s", skip_targets)
            self.report_details(skip_targets=skip_targets)

        # Figure out which status to report back:
        if using_minimum_mask:
            my_status = STATUS.WARNING

        # Return whether you think it went well:
        return my_status
示例#20
0
    def do_photometry(self):
        """Linear PSF Photometry
		TODO: add description of method and what A and b are
		"""

        logger = logging.getLogger(__name__)

        # Load catalog to determine what stars to fit:
        cat = self.catalog
        staridx = np.squeeze(np.where(cat['starid'] == self.starid))

        # Log full catalog for current stamp:
        logger.debug(cat)

        # Calculate distance from main target:
        cat['dist'] = np.sqrt(
            (cat['row_stamp'][staridx] - cat['row_stamp'])**2 +
            (cat['column_stamp'][staridx] - cat['column_stamp'])**2)

        # Find indices of stars in catalog to fit:
        # (only include stars that are close to the main target and that are
        # not much fainter)
        indx = (cat['dist'] < 5) & (cat['tmag'][staridx] - cat['tmag'] > -5)
        nstars = int(np.sum(indx))

        # Get target star index in the reduced catalog of stars to fit:
        staridx = np.squeeze(np.where(cat[indx]['starid'] == self.starid))
        logger.debug('Target star index: %s', np.str(staridx))

        # Preallocate flux sum array for contamination calculation:
        fluxes_sum = np.zeros(nstars, dtype='float64')

        # Start looping through the images (time domain):
        for k, img in enumerate(self.images):
            # Get catalog at current time in MJD:
            cat = self.catalog_attime(self.lightcurve['time'][k] -
                                      self.lightcurve['timecorr'][k])

            # Reduce catalog to only include stars that should be fitted:
            cat = cat[indx]
            logger.debug(cat)

            # Get the number of pixels in the image:
            good_pixels = np.isfinite(img)
            npx = int(np.sum(good_pixels))

            # Create A, the 2D of vertically reshaped PRF 1D arrays:
            A = np.empty([npx, nstars], dtype='float64')
            for col, target in enumerate(cat):
                # Get star parameters with flux set to 1 and reshape:
                params0 = np.atleast_2d(
                    [target['row_stamp'], target['column_stamp'], 1.])

                # Fill out column of A with reshaped PRF array from one star:
                A[:, col] = self.psf.integrate_to_image(
                    params0, ctoff_radius=20)[good_pixels].flatten()

            # Crate b, the solution array by reshaping the image to a 1D array:
            b = img[good_pixels].flatten()

            # Do linear least squares fit to solve Ax=b:
            try:
                # Linear least squares:
                res = np.linalg.lstsq(A, b)
                fluxes = res[0]

                # Non-negative linear least squares:
                #fluxes, rnorm = scipy.optimize.nnls(A, b)
            except np.linalg.LinAlgError:
                logger.debug("Linear PSF Fitting failed")
                fluxes = None

            # Pass result if fit did not fail:
            if fluxes is None:
                logger.warning("We should flag that this has not gone well.")
                self.lightcurve['flux'][k] = np.NaN
                self.lightcurve['quality'][k] = 1  # FIXME: Use the real flag!

            else:
                # Get flux of target star:
                result = fluxes[staridx]

                logger.debug('Fluxes are: %s', fluxes)
                logger.debug('Result is: %f', result)

                # Add the result of the main star to the lightcurve:
                self.lightcurve['flux'][k] = result

                # Add current fitted fluxes for contamination calculation:
                fluxes_sum += fluxes

                # Make plots for debugging:
                if self.plot and logger.isEnabledFor(logging.DEBUG):
                    fig = plt.figure()
                    result4plot = []
                    for star, target in enumerate(cat):
                        result4plot.append(
                            np.array([
                                target['row_stamp'], target['column_stamp'],
                                fluxes[star]
                            ]))

                    # Add subplots with the image, fit and residuals:
                    ax_list = plot_image_fit_residuals(
                        fig=fig,
                        image=img,
                        fit=self.psf.integrate_to_image(result4plot,
                                                        cutoff_radius=20))

                    # Add star position to the first plot:
                    ax_list[0].scatter(result4plot[staridx][1],
                                       result4plot[staridx][0],
                                       c='r',
                                       alpha=0.5)

                    # Save figure to file:
                    fig_name = 'tess_{0:011d}_linpsf_{1:05d}'.format(
                        self.starid, k)
                    save_figure(os.path.join(self.plot_folder, fig_name))
                    plt.close(fig)

        # Set contamination to NaN if all flux values are NaN:
        if allnan(self.lightcurve['flux']):
            self.report_details(error='All target flux values are NaN.')
            return STATUS.ERROR

        # Divide by number of added fluxes to get the mean flux:
        fluxes_mean = fluxes_sum / np.sum(~np.isnan(self.lightcurve['flux']))
        logger.debug('Mean fluxes are: %s', fluxes_mean)

        # Calculate contamination from other stars in target PSF using latest A:
        not_target_star = np.arange(len(fluxes_mean)) != staridx
        contamination = np.sum(
            A[:, not_target_star].dot(fluxes_mean[not_target_star]) *
            A[:, staridx]) / fluxes_mean[staridx]

        logger.info("Contamination: %f", contamination)
        self.additional_headers['PSF_CONT'] = (contamination,
                                               'PSF contamination')

        # If contamination is high, return a warning:
        if contamination > 0.1:
            self.report_details(error='High contamination')
            return STATUS.WARNING

        # Return whether you think it went well:
        return STATUS.OK
示例#21
0
def plot_image(image,
               ax=None,
               scale='log',
               cmap=None,
               origin='lower',
               xlabel=None,
               ylabel=None,
               cbar=None,
               clabel='Flux ($e^{-}s^{-1}$)',
               cbar_ticks=None,
               cbar_ticklabels=None,
               cbar_pad=None,
               cbar_size='5%',
               title=None,
               percentile=95.0,
               vmin=None,
               vmax=None,
               offset_axes=None,
               color_bad='k',
               **kwargs):
    """
	Utility function to plot a 2D image.

	Parameters:
		image (2d array): Image data.
		ax (matplotlib.pyplot.axes, optional): Axes in which to plot.
			Default (None) is to use current active axes.
		scale (str or :py:class:`astropy.visualization.ImageNormalize` object, optional):
			Normalization used to stretch the colormap.
			Options: ``'linear'``, ``'sqrt'``, ``'log'``, ``'asinh'``, ``'histeq'``, ``'sinh'``
			and ``'squared'``.
			Can also be a :py:class:`astropy.visualization.ImageNormalize` object.
			Default is ``'log'``.
		origin (str, optional): The origin of the coordinate system.
		xlabel (str, optional): Label for the x-axis.
		ylabel (str, optional): Label for the y-axis.
		cbar (string, optional): Location of color bar.
			Choises are ``'right'``, ``'left'``, ``'top'``, ``'bottom'``.
			Default is not to create colorbar.
		clabel (str, optional): Label for the color bar.
		cbar_size (float, optional): Fractional size of colorbar compared to axes. Default=0.03.
		cbar_pad (float, optional): Padding between axes and colorbar.
		title (str or None, optional): Title for the plot.
		percentile (float, optional): The fraction of pixels to keep in color-trim.
			If single float given, the same fraction of pixels is eliminated from both ends.
			If tuple of two floats is given, the two are used as the percentiles.
			Default=95.
		cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap.
		vmin (float, optional): Lower limit to use for colormap.
		vmax (float, optional): Upper limit to use for colormap.
		color_bad (str, optional): Color to apply to bad pixels (NaN). Default is black.
		kwargs (dict, optional): Keyword arguments to be passed to :py:func:`matplotlib.pyplot.imshow`.

	Returns:
		:py:class:`matplotlib.image.AxesImage`: Image from returned
			by :py:func:`matplotlib.pyplot.imshow`.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger(__name__)

    # Backward compatible settings:
    make_cbar = kwargs.pop('make_cbar', None)
    if make_cbar:
        raise FutureWarning("'make_cbar' is deprecated. Use 'cbar' instead.")
        if not cbar:
            cbar = make_cbar

    # Special treatment for boolean arrays:
    if isinstance(image, np.ndarray) and image.dtype == 'bool':
        if vmin is None: vmin = 0
        if vmax is None: vmax = 1
        if cbar_ticks is None: cbar_ticks = [0, 1]
        if cbar_ticklabels is None: cbar_ticklabels = ['False', 'True']

    # Calculate limits of color scaling:
    interval = None
    if vmin is None or vmax is None:
        if allnan(image):
            logger.warning("Image is all NaN")
            vmin = 0
            vmax = 1
            if cbar_ticks is None:
                cbar_ticks = []
            if cbar_ticklabels is None:
                cbar_ticklabels = []
        elif isinstance(percentile, (list, tuple, np.ndarray)):
            interval = viz.AsymmetricPercentileInterval(
                percentile[0], percentile[1])
        else:
            interval = viz.PercentileInterval(percentile)

    # Create ImageNormalize object with extracted limits:
    if scale in ('log', 'linear', 'sqrt', 'asinh', 'histeq', 'sinh',
                 'squared'):
        if scale == 'log':
            stretch = viz.LogStretch()
        elif scale == 'linear':
            stretch = viz.LinearStretch()
        elif scale == 'sqrt':
            stretch = viz.SqrtStretch()
        elif scale == 'asinh':
            stretch = viz.AsinhStretch()
        elif scale == 'histeq':
            stretch = viz.HistEqStretch(image[np.isfinite(image)])
        elif scale == 'sinh':
            stretch = viz.SinhStretch()
        elif scale == 'squared':
            stretch = viz.SquaredStretch()

        # Create ImageNormalize object. Very important to use clip=False if the image contains
        # NaNs, otherwise NaN points will not be plotted correctly.
        norm = viz.ImageNormalize(data=image[np.isfinite(image)],
                                  interval=interval,
                                  vmin=vmin,
                                  vmax=vmax,
                                  stretch=stretch,
                                  clip=not anynan(image))

    elif isinstance(scale, (viz.ImageNormalize, matplotlib.colors.Normalize)):
        norm = scale
    else:
        raise ValueError("scale {} is not available.".format(scale))

    if offset_axes:
        extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5,
                  offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5)
    else:
        extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5)

    if ax is None:
        ax = plt.gca()

    # Set up the colormap to use. If a bad color is defined,
    # add it to the colormap:
    if cmap is None:
        cmap = copy.copy(plt.get_cmap('Blues'))
    elif isinstance(cmap, str):
        cmap = copy.copy(plt.get_cmap(cmap))

    if color_bad:
        cmap.set_bad(color_bad, 1.0)

    # Plotting the image using all the settings set above:
    im = ax.imshow(image,
                   cmap=cmap,
                   norm=norm,
                   origin=origin,
                   extent=extent,
                   interpolation='nearest',
                   **kwargs)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    if title is not None:
        ax.set_title(title)
    ax.set_xlim([extent[0], extent[1]])
    ax.set_ylim([extent[2], extent[3]])

    if cbar:
        colorbar(im,
                 ax=ax,
                 loc=cbar,
                 size=cbar_size,
                 pad=cbar_pad,
                 label=clabel,
                 ticks=cbar_ticks,
                 ticklabels=cbar_ticklabels)

    # Settings for ticks:
    integer_locator = MaxNLocator(nbins=10, integer=True)
    ax.xaxis.set_major_locator(integer_locator)
    ax.xaxis.set_minor_locator(integer_locator)
    ax.yaxis.set_major_locator(integer_locator)
    ax.yaxis.set_minor_locator(integer_locator)
    ax.tick_params(which='both', direction='out', pad=5)
    ax.xaxis.tick_bottom()
    ax.yaxis.tick_left()

    return im
示例#22
0
def plot_image(image,
               scale='log',
               origin='lower',
               xlabel='Pixel Column Number',
               ylabel='Pixel Row Number',
               make_cbar=False,
               clabel='Flux ($e^{-}s^{-1}$)',
               title=None,
               percentile=95.0,
               ax=None,
               cmap=plt.cm.Blues,
               offset_axes=None,
               **kwargs):
    """
	Utility function to plot a 2D image.

	Parameters:
		image (2d array): Image data.
		scale (str or astropy.visualization.ImageNormalize object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, or ``'log'``. Can also be a `astropy.visualization.ImageNormalize` object. Default is ``'log'``.
		origin (str, optional): The origin of the coordinate system.
		xlabel (str, optional): Label for the x-axis.
		ylabel (str, optional): Label for the y-axis.
		make_cbar (boolean, optional): Create colorbar? Default is ``False``.
		clabel (str, optional): Label for the color bar.
		title (str or None, optional): Title for the plot.
		percentile (float, optional): The fraction of pixels to keep in color-trim. The same fraction of pixels is eliminated from both ends. Default=95.
		ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes.
		cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap.
		kwargs (dict, optional): Keyword arguments to be passed to `matplotlib.pyplot.imshow`.
	"""

    # Negative values will throw warnings, so add offset so we are above zero:
    # TODO: Something weird is going on, and this doesn't work, so for now we ignore warnings?! (see above)
    if scale == 'log' or scale == 'sqrt':
        img_min = np.nanmin(image)
        if img_min <= 0:
            image = image.copy()
            image += np.abs(img_min) + 1.0

    #print(scale, np.all(np.isfinite(image)), np.all(image > 0), np.min(image), np.max(image))

    if allnan(image):
        logger = logging.getLogger(__name__)
        logger.error("Image is all NaN")
        return None

    # Calcualte limits of color scaling:
    vmin, vmax = PercentileInterval(percentile).get_limits(image)

    # Create ImageNormalize object with extracted limits:
    if scale == 'log':
        norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LogStretch())
    elif scale == 'linear':
        norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LinearStretch())
    elif scale == 'sqrt':
        norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=SqrtStretch())
    elif isinstance(scale, matplotlib.colors.Normalize) or isinstance(
            scale, ImageNormalize):
        norm = scale
    else:
        raise ValueError("scale {} is not available.".format(scale))

    if offset_axes:
        extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5,
                  offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5)
    else:
        extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5)

    if ax is None:
        ax = plt.gca()

    if isinstance(cmap, six.string_types):
        cmap = plt.get_cmap(cmap)

    im = ax.imshow(image,
                   origin=origin,
                   norm=norm,
                   extent=extent,
                   cmap=cmap,
                   interpolation='nearest',
                   **kwargs)
    if not xlabel is None: ax.set_xlabel(xlabel)
    if not ylabel is None: ax.set_ylabel(ylabel)
    if not title is None: ax.set_title(title)
    ax.set_xlim([extent[0], extent[1]])
    ax.set_ylim([extent[2], extent[3]])

    if make_cbar:
        # TODO: In cases where image was rescaled, should we change something here?
        cbar = plt.colorbar(im, norm=norm)
        cbar.set_label(clabel)

    # Settings for ticks (to make Mikkel happy):
    ax.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax.xaxis.set_minor_locator(MaxNLocator(integer=True))
    ax.yaxis.set_major_locator(MaxNLocator(integer=True))
    ax.yaxis.set_minor_locator(MaxNLocator(integer=True))
    ax.tick_params(direction='out', which='both', pad=5)
    ax.xaxis.tick_bottom()
    #ax.set_aspect(aspect)

    return im
示例#23
0
    def _fit(self, X, y):
        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = list(range(p))

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ---------------------------------------------------------------------
        # FIND FIRST FEATURE
        # ---------------------------------------------------------------------

        # check a range of ks (3-10), and choose the one with the max median MI
        k_min = 3
        k_max = 11
        xy_MI = np.zeros((k_max - k_min, p))
        xy_MI[:] = np.nan
        for i, k in enumerate(range(k_min, k_max)):
            xy_MI[i, :] = mi.get_first_mi_vector(self, k)
        xy_MI = bn.nanmedian(xy_MI, axis=0)

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ---------------------------------------------------------------------
        # FIND SUBSEQUENT FEATURES
        # ---------------------------------------------------------------------
        if self.n_features == 'auto': n_features = np.inf
        else: n_features = self.n_features

        while len(S) < n_features:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            feature_mi_matrix[s, F] = mi.get_mi_vector(self, F, S[-1])

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S), F]
            if self.method == 'JMI':
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
            elif self.method == 'JMIM':
                if bn.allnan(bn.nanmin(fmm, axis=0)):
                    break
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
            elif self.method == 'MRMR':
                if bn.allnan(bn.nanmean(fmm, axis=0)):
                    break
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]

            # record the JMIM of the newly selected feature and add it to S
            S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break

        # ---------------------------------------------------------------------
        # SAVE RESULTS
        # ---------------------------------------------------------------------

        self.n_features_ = len(S)
        self.support_ = np.zeros(p, dtype=np.bool)
        self.support_[S] = 1
        self.ranking_ = S
        self.mi_ = S_mi

        return self
示例#24
0
def remove_jumps(t, x, jumps, width=3, return_flags=False):
	"""
	Remove jumps from timeseries.

	Parameters:
		t (ndarray): Time vector (days). Must be sorted in time.
		x (ndarray): Flux vector. Can contain invalid points (NaN).
		jumps (list): Vector of timestamps where jumps are to be corrected.
		width (float): Width of the region on each side of jumps to compare (default=3 days).
		return_flags (boolean): Return two additional arrays with location of corrected jumps.
	"""

	# Get the logger to use for printing messages:
	logger = logging.getLogger(__name__)

	# Number of points:
	N = len(t)

	dt = nanmedian(diff(t))

	# Convert a simple list of times to a jumps-dictionary:
	jumps = np.atleast_1d(jumps)
	for k,jump in enumerate(jumps):
		if np.isscalar(jump):
			jumps[k] = {'time': jump}
		elif not isinstance(jump, dict):
			raise Exception("Invalid input in JUMPS")

	# Important that we correct the jumps in the right order:
	jumps = sorted(jumps, key=lambda k: k['time'])

	# Arrays needed for the following:
	correction = empty(2, dtype='float64')
	if return_flags:
		flag_jumps = [False]*len(jumps)
		flag_jumps2 = zeros(N, dtype='int64')

	# Correct jumps one after the other:
	kj = 0
	for k,jump in enumerate(jumps):
		logger.debug(jump)
		# Extract information about jump:
		tjump = jump.get('time')
		jumptype = jump.get('type', 'multiplicative')
		jumpforce = jump.get('force', False)

		# Make maps to central region and region after jump:
		kj_pre = kj
		kj = searchsorted(t, tjump)
		if kj == 0 or kj == N or kj == kj_pre: continue # Stop if first, last or same point as previous
		central1 = searchsorted(t, t[kj-1]-width)
		central2 = searchsorted(t, t[kj]+width)


		gapsize = t[kj] - t[kj-1] # The length of the jump

		# Make small timeseries around the gap:
		tcen = t[central1:central2]
		xcen = x[central1:central2]
		xmdl = np.empty_like(xcen)
		indx = searchsorted(tcen, tjump)

		# Do simple check to see if all datapoints are NaN:
		if allnan(x[central1:kj]) or allnan(x[kj:central2]):
			continue

		# Run LOWESS filter on two halves to eliminate effects of transit:
		if (kj-central1 < 0.5*int(width/dt)):
			w1 = np.hstack(([t[central1:kj],], [x[central1:kj],]))
		else:
			w1 = lowess(x[central1:kj], t[central1:kj], frac=1./3, is_sorted=True)

		if (central2-kj< 0.5*int(width/dt)):
			w2 = np.hstack(([t[kj:central2],], [x[kj:central2],]))
		else:
			w2 = lowess(x[kj:central2], t[kj:central2], frac=1./3, is_sorted=True)

		# Calculate median levels before and after jump
		# and make these match up:
		level1_const = nanmedian(w1[:,1])
		level2_const = nanmedian(w2[:,1])

		# Do not try to use linear relation on very long gaps
		# it will in many cases not work.
		if gapsize < 2*width:
			# Do robust linear fit of part before and after jump:
			res1 = theil_sen(w1[:,0], w1[:,1], n_samples=1e5)
			res2 = theil_sen(w2[:,0], w2[:,1], n_samples=1e5)

			# Evaluate fitted lines at midpoint in the gap:
			tmid = (t[kj] + t[kj-1])/2 # Midpoint in gap
			level1_linear = np.polyval(res1, tmid)
			level2_linear = np.polyval(res2, tmid)
		else:
			level1_linear = NaN
			level2_linear = NaN


		# Calculate Bayesian Information Criterion (BIC) for the different
		# models of the jump to decide which one should be applied to the data:
		if jumptype == 'additive':
			# Constant model:
			correction[0] = level1_const - level2_const
			if isfinite(correction[0]):
				# Calculate model:
				xmdl[:indx] = level1_const
				xmdl[indx:] = level2_const
				# Calculate BIC:
				s1 = BIC(xcen, xmdl, 2)
			else:
				s1 = Inf

			# Linear model:
			correction[1] = level1_linear - level2_linear
			if isfinite(correction[1]):
				# Calculate model:
				xmdl[:indx] = np.polyval(res1, tcen[:indx])
				xmdl[indx:] = np.polyval(res2, tcen[indx:])
				# Calculate BIC:
				s2 = BIC(xcen, xmdl, 4)
			else:
				s2 = Inf

		elif jumptype == 'multiplicative':
			# Constant model:
			correction[0] = level1_const / level2_const
			if isfinite(correction[0]) and correction[0] > 0:
				# Correct data:
				xcen2 = dc(xcen) # take a deep copy, such that corrections doesn't affect xcen
				xcen2[indx:] *= correction[0]
				# Calculate model:
				xmdl[:] = level1_const
				# Calculate BIC:
				s1 = BIC(xcen2, xmdl, 2)
			else:
				s1 = Inf

			# Linear model:
			correction[1] = level1_linear / level2_linear
			if isfinite(correction[1]) and correction[1] > 0:
				# Correct data:
				xcen2 = dc(xcen) # take a deep copy, such that corrections doesn't affect xcen
				xcen2[indx:] *= correction[1]
				# Calculate model:
				xmdl[:indx] = np.polyval(res1, tcen[:indx])
				xmdl[indx:] = np.polyval(res2, tcen[indx:]) * correction[1]
				# Calculate BIC:
				s2 = BIC(xcen2, xmdl, 4)
			else:
				s2 = Inf

		else:
			raise Exception('Unknown jump type')

		# Apply correction to entire timeseries if the standard deviation improves:
		if jumpforce:
			i = np.argmin([s1, s2]) + 1
		else:
			# Calculate BIC of uncorrected central part:
			s0 = BIC(xcen, nanmedian(xcen), 1)
			i = np.argmin([s0, s1, s2])
		logger.debug(i)

		if i != 0: # Do not correct if unaltered data gives the best
			# Apply the best correction to everything to the right of the jump:
			if jumptype == 'additive':
				x[kj:] += correction[i-1]
			else:
				x[kj:] *= correction[i-1]
			# Set the flags, if required:
			if return_flags:
				flag_jumps[k] = True
				flag_jumps2[kj] = 2**i # Returns 2 (mean) or 4 (linear) when correction was made, zero otherwise

	if return_flags:
		return x, flag_jumps, flag_jumps2
	else:
		return x
示例#25
0
    def correct(self, task, output_folder=None):
        """
		Run correction.

		Parameters:
			task (dict): Dictionary defining a task/lightcurve to process.
			output_folder (str, optional): Path to directory where lightcurve should be saved.

		Returns:
			dict: Result dictionary containing information about the processing.

		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)

        t1 = default_timer()

        error_msg = []
        details = {}
        save_file = None
        result = task.copy()
        try:
            # Load the lightcurve
            lc = self.load_lightcurve(task)

            # Run the correction on this lightcurve:
            lc_corr, status = self.do_correction(lc)

        except (KeyboardInterrupt, SystemExit):  # pragma: no cover
            status = STATUS.ABORT
            logger.warning("Correction was aborted (priority=%d)",
                           task['priority'])
        except:  # noqa: E722 pragma: no cover
            status = STATUS.ERROR
            logger.exception("Correction failed (priority=%d)",
                             task['priority'])

        # Check that the status has been changed:
        if status == STATUS.UNKNOWN:  # pragma: no cover
            raise ValueError("STATUS was not set by do_correction")

        # Do sanity checks:
        if status in (STATUS.OK, STATUS.WARNING):
            # Make sure all NaN fluxes have corresponding NaN errors:
            lc_corr.flux_err[np.isnan(lc_corr.flux)] = np.NaN

            # Simple check that entire lightcurve is not NaN:
            if allnan(lc_corr.flux):
                logger.error("Final lightcurve is all NaNs")
                status = STATUS.ERROR
            if allnan(lc_corr.flux_err):
                logger.error("Final lightcurve errors are all NaNs")
                status = STATUS.ERROR
            if np.any(np.isinf(lc_corr.flux)):
                logger.error("Final lightcurve contains Inf")
                status = STATUS.ERROR
            if np.any(np.isinf(lc_corr.flux_err)):
                logger.error("Final lightcurve errors contains Inf")
                status = STATUS.ERROR

        # Calculate diagnostics:
        if status in (STATUS.OK, STATUS.WARNING):
            # Calculate diagnostics:
            details['variance'] = nanvar(lc_corr.flux, ddof=1)
            details['rms_hour'] = rms_timescale(lc_corr,
                                                timescale=3600 / 86400)
            details['ptp'] = ptp(lc_corr)

            # Diagnostics specific to the method:
            if self.CorrMethod == 'cbv':
                details['cbv_num'] = lc_corr.meta['additional_headers'][
                    'CBV_NUM']
            elif self.CorrMethod == 'ensemble':
                details['ens_num'] = lc_corr.meta['additional_headers'][
                    'ENS_NUM']
                details['ens_fom'] = lc_corr.meta['FOM']

            # Save the lightcurve to file:
            try:
                save_file = self.save_lightcurve(lc_corr,
                                                 output_folder=output_folder)
            except (KeyboardInterrupt, SystemExit):  # pragma: no cover
                status = STATUS.ABORT
                logger.warning("Correction was aborted (priority=%d)",
                               task['priority'])
            except:  # noqa: E722 pragma: no cover
                status = STATUS.ERROR
                logger.exception(
                    "Could not save lightcurve file (priority=%d)",
                    task['priority'])

            # Plot the final lightcurve:
            if self.plot:
                fig = plt.figure(dpi=200)
                ax = fig.add_subplot(111)
                ax.scatter(lc.time,
                           1e6 * (lc.flux / nanmedian(lc.flux) - 1),
                           s=2,
                           alpha=0.3,
                           marker='o',
                           label="Original")
                ax.scatter(lc_corr.time,
                           lc_corr.flux,
                           s=2,
                           alpha=0.3,
                           marker='o',
                           label="Corrected")
                ax.set_xlabel('Time (TBJD)')
                ax.set_ylabel('Relative flux (ppm)')
                ax.legend()
                save_figure(os.path.join(self.plot_folder(lc),
                                         self.CorrMethod + '_final'),
                            fig=fig)
                plt.close(fig)

        # Unpack any errors or warnings that were sent to the logger during the correction:
        if self.message_queue:
            error_msg += self.message_queue
            self.message_queue.clear()
        if not error_msg:
            error_msg = None

        # Update results:
        t2 = default_timer()
        details['errors'] = error_msg
        result.update({
            'corrector': self.CorrMethod,
            'status_corr': status,
            'elaptime_corr': t2 - t1,
            'lightcurve_corr': save_file,
            'details': details
        })

        return result
示例#26
0
def plot_image(image, scale='log', origin='lower', xlabel='Pixel Column Number',
	ylabel='Pixel Row Number', make_cbar=False, clabel='Flux ($e^{-}s^{-1}$)', cbar_ticks=None, cbar_ticklabels=None,
	title=None, percentile=95.0, vmin=None, vmax=None, ax=None, cmap=plt.cm.Blues, offset_axes=None, **kwargs):
	"""
	Utility function to plot a 2D image.

	Parameters:
		image (2d array): Image data.
		scale (str or astropy.visualization.ImageNormalize object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, or ``'log'``. Can also be a `astropy.visualization.ImageNormalize` object. Default is ``'log'``.
		origin (str, optional): The origin of the coordinate system.
		xlabel (str, optional): Label for the x-axis.
		ylabel (str, optional): Label for the y-axis.
		make_cbar (boolean, optional): Create colorbar? Default is ``False``.
		clabel (str, optional): Label for the color bar.
		title (str or None, optional): Title for the plot.
		percentile (float, optional): The fraction of pixels to keep in color-trim. The same fraction of pixels is eliminated from both ends. Default=95.
		ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes.
		cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap.
		kwargs (dict, optional): Keyword arguments to be passed to `matplotlib.pyplot.imshow`.
	"""

	if allnan(image):
		logger = logging.getLogger(__name__)
		logger.error("Image is all NaN")
		return None

	# Calcualte limits of color scaling:
	if vmin is None or vmax is None:
		vmin1, vmax1 = PercentileInterval(percentile).get_limits(image)
		if vmin is None: vmin = vmin1
		if vmax is None: vmax = vmax1

	# Create ImageNormalize object with extracted limits:
	if scale == 'log':
		norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LogStretch())
	elif scale == 'linear':
		norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=LinearStretch())
	elif scale == 'sqrt':
		norm = ImageNormalize(vmin=vmin, vmax=vmax, stretch=SqrtStretch())
	elif isinstance(scale, matplotlib.colors.Normalize) or isinstance(scale, ImageNormalize):
		norm = scale
	else:
		raise ValueError("scale {} is not available.".format(scale))

	if offset_axes:
		extent = (offset_axes[0]-0.5, offset_axes[0] + image.shape[1]-0.5, offset_axes[1]-0.5, offset_axes[1] + image.shape[0]-0.5)
	else:
		extent = (-0.5, image.shape[1]-0.5, -0.5, image.shape[0]-0.5)

	if ax is None:
		ax = plt.gca()

	if isinstance(cmap, str):
		cmap = plt.get_cmap(cmap)

	im = ax.imshow(image, origin=origin, norm=norm, extent=extent, cmap=cmap, interpolation='nearest', **kwargs)
	if xlabel is not None: ax.set_xlabel(xlabel)
	if ylabel is not None: ax.set_ylabel(ylabel)
	if title is not None: ax.set_title(title)
	ax.set_xlim([extent[0], extent[1]])
	ax.set_ylim([extent[2], extent[3]])

	if make_cbar:
		cbar = plt.colorbar(im, norm=norm, ax=ax, orientation='horizontal', pad=0.02)
		cbar.set_label(clabel)
		if cbar_ticks is not None: cbar.set_ticks(cbar_ticks)
		if cbar_ticklabels is not None: cbar.set_ticklabels(cbar_ticklabels)

	# Settings for ticks (to make Mikkel happy):
	ax.xaxis.set_major_locator(MaxNLocator(integer=True))
	ax.xaxis.set_minor_locator(MaxNLocator(integer=True))
	ax.yaxis.set_major_locator(MaxNLocator(integer=True))
	ax.yaxis.set_minor_locator(MaxNLocator(integer=True))
	ax.tick_params(direction='out', which='both', pad=5)
	ax.xaxis.tick_bottom()
	#ax.set_aspect(aspect)

	return im
示例#27
0
    def lc_matrix_clean(self, cbv_area):
        """
		Performs gap-filling of light curves returned by :py:func:`CBVCorrector.lc_matrix`, and
		removes time stamps where all flux values are nan

		Parameters:
			cbv_area: the cbv area to calculate light curve matrix for

		Returns:
			mat: matrix from :py:func:`CBVCorrector.lc_matrix` that has been gap-filled and with nans removed, to be used in CBV calculation
			varis: variances of light curves in "mat"
			indx_nancol: the indices for the timestamps with nans in all light curves
			Ntimes: Number of timestamps in light curves contained in mat before removing nans

		.. codeauthor:: Mikkel N. Lund <*****@*****.**>
		"""

        logger = logging.getLogger(__name__)

        logger.info('Running matrix clean')
        tmpfile = os.path.join(
            self.data_folder,
            'mat-%s-%d_clean.npz' % (self.datasource, cbv_area))
        if logger.isEnabledFor(logging.DEBUG) and os.path.exists(tmpfile):
            logger.info("Loading existing file...")
            data = np.load(tmpfile)
            mat = data['mat']
            varis = data['varis']

            Ntimes = data['Ntimes']
            indx_nancol = data['indx_nancol']

        else:
            # Compute light curve correlation matrix
            mat0, varis = self.lc_matrix(cbv_area)

            # Print the final shape of the matrix:
            logger.info("Matrix size: %d x %d" % mat0.shape)

            # Find columns where all stars have NaNs and remove them:
            indx_nancol = allnan(mat0, axis=0)
            Ntimes = mat0.shape[1]
            mat = mat0[:, ~indx_nancol]
            cadenceno = np.arange(mat.shape[1])

            logger.info("Gap-filling lightcurves...")
            for k in tqdm(range(mat.shape[0]),
                          total=mat.shape[0],
                          disable=not logger.isEnabledFor(logging.INFO)):

                mat[k, :] /= varis[k]
                # Fill out missing values by interpolating the lightcurve:
                indx = np.isfinite(mat[k, :])
                mat[k, ~indx] = pchip_interpolate(cadenceno[indx], mat[k,
                                                                       indx],
                                                  cadenceno[~indx])

            # Save something for debugging:
            if logger.isEnabledFor(logging.DEBUG):
                np.savez(tmpfile,
                         mat=mat,
                         varis=varis,
                         indx_nancol=indx_nancol,
                         Ntimes=Ntimes)

        return mat, varis, indx_nancol, Ntimes
示例#28
0
    def fit(self, X, y):
        """
        Fits the MI_FS feature selection with the chosen MI_FS method.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            The training input samples.

        y : array-like, shape = [n_samples]
            The target values.
        """

        # Check if n_jobs is negative
        if self.n_jobs < 0:
            self.n_jobs = NUM_CORES - self.n_jobs

        self.X, y = self._check_params(X, y)
        n, p = X.shape
        self.y = y.reshape((n, 1))

        # list of selected features
        S = []
        # list of all features
        F = list(range(p))

        if self.n_features != 'auto':
            feature_mi_matrix = np.zeros((self.n_features, p))
        else:
            feature_mi_matrix = np.zeros((n, p))
        feature_mi_matrix[:] = np.nan
        S_mi = []

        # ---------------------------------------------------------------------
        # FIND FIRST FEATURE
        # ---------------------------------------------------------------------
        xy_MI = np.array(mimy.get_first_mi_vector(self, self.k))
        #print(xy_MI)
        #xy_MI[np.where(np.isnan(xy_MI))]=0.
        #print("first", sorted(enumerate(xy_MI), key=lambda x:x[1], reverse=True)[0])

        # choose the best, add it to S, remove it from F
        S, F = self._add_remove(S, F, bn.nanargmax(xy_MI))
        S_mi.append(bn.nanmax(xy_MI))

        # notify user
        if self.verbose > 0:
            self._print_results(S, S_mi)

        # ---------------------------------------------------------------------
        # FIND SUBSEQUENT FEATURES
        # ---------------------------------------------------------------------
        if self.n_features == 'auto': n_features = np.inf
        else: n_features = self.n_features

        while len(S) < n_features:
            # loop through the remaining unselected features and calculate MI
            s = len(S) - 1
            # Calculate s-th row of feature_mi_matrix which contains the JMI score of the last element in S
            # with all remaining features in F
            feature_mi_matrix[s, F] = mimy.get_mi_vector(self, F, S[-1])

            # make decision based on the chosen FS algorithm
            fmm = feature_mi_matrix[:len(S), F]
            if self.method == 'JMI':
                # Which feature in F has the largest \sum_{s\in S}
                selected = F[bn.nanargmax(bn.nansum(fmm, axis=0))]
                # Find out which pair of features is the jmim for
                if self.verbose > 0:
                    jmim = bn.nanmax(bn.nanmin(fmm, axis=0))
                    jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))]
                    jmi_idx = np.where(jmi_vals == jmim)[0]
                    print(jmim, S[jmi_idx[0]], selected)
            elif self.method == 'JMIM':
                if bn.allnan(bn.nanmin(fmm, axis=0)):
                    break
                selected = F[bn.nanargmax(bn.nanmin(fmm, axis=0))]
                # Find out which pair of features is the jmim for
                if self.verbose > 0:
                    jmim = bn.nanmax(bn.nanmin(fmm, axis=0))
                    jmi_vals = fmm[:, bn.nanargmax(bn.nanmin(fmm, axis=0))]
                    jmi_idx = np.where(jmi_vals == jmim)[0]
                    print(jmim, S[jmi_idx[0]], selected)
            elif self.method == 'MRMR':
                if bn.allnan(bn.nanmean(fmm, axis=0)):
                    break
                MRMR = xy_MI[F] - bn.nanmean(fmm, axis=0)
                selected = F[bn.nanargmax(MRMR)]
                S_mi.append(bn.nanmax(MRMR))

            # record the JMIM of the newly selected feature and add it to S
            if self.method != 'MRMR':
                S_mi.append(bn.nanmax(bn.nanmin(fmm, axis=0)))
            S, F = self._add_remove(S, F, selected)

            # notify user
            if self.verbose > 0:
                self._print_results(S, S_mi)

            # if n_features == 'auto', let's check the S_mi to stop
            if self.n_features == 'auto' and len(S) > 10:
                # smooth the 1st derivative of the MI values of previously sel
                MI_dd = signal.savgol_filter(S_mi[1:], 9, 2, 1)
                # does the mean of the last 5 converge to 0?
                if np.abs(np.mean(MI_dd[-5:])) < 1e-3:
                    break

        # ---------------------------------------------------------------------
        # SAVE RESULTS
        # ---------------------------------------------------------------------

        self.n_features_ = len(S)
        self._support_mask = np.zeros(p, dtype=np.bool)
        self._support_mask[S] = True
        self.ranking_ = S
        self.mi_ = S_mi

        return self
示例#29
0
文件: Meta.py 项目: tasoc/starclass
    def train(self, tset, savecl=True, overwrite=False):
        """
		Train the Meta-classifier.

		Parameters:
			tset (:class:`TrainingSet`): Training set to train classifier on.
			savecl (bool, optional): Save the classifier to file?
			overwrite (bool, optional): Overwrite existing classifer save file.

		.. codeauthor:: James S. Kuszlewicz <*****@*****.**>
		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""
        # Start a logger that should be used to output e.g. debug information:
        logger = logging.getLogger(__name__)

        # Check for pre-calculated features
        fitlabels = self.parse_labels(tset.labels())

        # First create list of all possible classifiers:
        all_classifiers = list(classifier_list)
        all_classifiers.remove('meta')

        # Create list of all features:
        # Save this to object, we are using it to keep track of which features were used
        # to train the classifier:
        self.features_used = list(
            itertools.product(all_classifiers, self.StellarClasses))
        self.features_names = [
            f'{classifier:s}_{stcl.name:s}'
            for classifier, stcl in self.features_used
        ]

        # Create table of features:
        # Create as float32, since that is what RandomForestClassifier converts it to anyway.
        logger.info("Importing features...")
        features = self.build_features_table(tset.features(), total=len(tset))

        # Remove columns that are all NaN:
        # This can be classifiers that never returns a given class or a classifier that
        # has not been run at all.
        keepcols = ~allnan(features, axis=0)
        features = features[:, keepcols]
        self.features_used = [
            x for i, x in enumerate(self.features_used) if keepcols[i]
        ]
        self.features_names = [
            x for i, x in enumerate(self.features_names) if keepcols[i]
        ]

        # Throw an error if a classifier is not run at all:
        run_classifiers = set([fu[0] for fu in self.features_used])
        if run_classifiers != set(all_classifiers):
            raise RuntimeError(
                "Classifier did not contribute at all: %s" %
                set(all_classifiers).difference(run_classifiers))

        # Raise an exception if there are NaNs left in the features:
        if anynan(features):
            raise ValueError("Features contains NaNs")

        logger.info("Features imported. Shape = %s", features.shape)

        # Run actual training:
        self.classifier.oob_score = True
        logger.info("Fitting model.")
        self.classifier.fit(features, fitlabels)
        logger.info('Trained. OOB Score = %s', self.classifier.oob_score_)
        self.classifier.trained = True

        if savecl and self.classifier.trained and self.clfile is not None:
            if overwrite or not os.path.exists(self.clfile):
                logger.info("Saving pickled classifier instance to '%s'",
                            self.clfile)
                self.save(self.clfile)