Пример #1
0
    def _filter_is_defined(self, columns=None, negate=False):
        """
            Extract rows without undefined values.

        :param columns: optional list of columns that are checked for unknowns
        :type columns: sequence of ints, variable names or descriptors
        :param negate: invert the selection
        :type negate: bool
        :return: a new Table
        :rtype: Orange.data.Table
        """
        if columns is None:
            if sp.issparse(self.X):
                remove = (self.X.indptr[1:] !=
                          self.X.indptr[-1:] + self.X.shape[1])
            else:
                remove = bn.anynan(self.X, axis=1)
            if sp.issparse(self.Y):
                remove = np.logical_or(remove, self.Y.indptr[1:] !=
                                       self.Y.indptr[-1:] + self.Y.shape[1])
            else:
                remove = np.logical_or(remove, bn.anynan(self.Y, axis=1))
        else:
            remove = np.zeros(len(self), dtype=bool)
            for column in columns:
                col, sparse = self.get_column_view(column)
                if sparse:
                    remove = np.logical_or(remove, col == 0)
                else:
                    remove = np.logical_or(remove, bn.anynan(col))
        retain = remove if negate else np.logical_not(remove)
        return Table.from_table_rows(self, retain)
Пример #2
0
def test_aperturephotometry(SHARED_INPUT_DIR, datasource):
    with TemporaryDirectory() as OUTPUT_DIR:
        with AperturePhotometry(DUMMY_TARGET,
                                SHARED_INPUT_DIR,
                                OUTPUT_DIR,
                                plot=True,
                                datasource=datasource,
                                **DUMMY_KWARG) as pho:

            pho.photometry()
            filepath = pho.save_lightcurve()
            print(pho.lightcurve)

            # It should set the status to one of these:
            assert (pho.status in (STATUS.OK, STATUS.WARNING))

            # Check the sumimage:
            plt.figure()
            plot_image(pho.sumimage, title=datasource)

            assert not anynan(pho.sumimage), "There are NaNs in the SUMIMAGE"

            # They shouldn't be exactly zero:
            assert not np.all(pho.lightcurve['flux'] == 0)
            assert not np.all(pho.lightcurve['flux_err'] == 0)
            assert not np.all(pho.lightcurve['pos_centroid'][:, 0] == 0)
            assert not np.all(pho.lightcurve['pos_centroid'][:, 1] == 0)

            # They shouldn't be NaN (in this case!):
            assert not allnan(pho.lightcurve['flux'])
            assert not allnan(pho.lightcurve['flux_err'])
            assert not allnan(pho.lightcurve['pos_centroid'][:, 0])
            assert not allnan(pho.lightcurve['pos_centroid'][:, 1])

            assert not np.any(~np.isfinite(pho.lightcurve['time']))
            assert not np.any(pho.lightcurve['time'] == 0)

            # Test the outputted FITS file:
            with fits.open(filepath, mode='readonly') as hdu:
                # Should be the same vectors in FITS as returned in Table:
                np.testing.assert_allclose(pho.lightcurve['time'],
                                           hdu[1].data['TIME'])
                np.testing.assert_allclose(pho.lightcurve['timecorr'],
                                           hdu[1].data['TIMECORR'])
                np.testing.assert_allclose(pho.lightcurve['flux'],
                                           hdu[1].data['FLUX_RAW'])
                np.testing.assert_allclose(pho.lightcurve['flux_err'],
                                           hdu[1].data['FLUX_RAW_ERR'])
                np.testing.assert_allclose(pho.lightcurve['cadenceno'],
                                           hdu[1].data['CADENCENO'])

                # Test FITS aperture image:
                ap = hdu['APERTURE'].data
                print(ap)
                assert np.all(pho.aperture == ap), "Aperture image mismatch"
                assert not anynan(ap), "NaN in aperture image"
                assert np.all(ap >= 0), "Negative values in aperture image"
                assert np.any(ap & 2 != 0), "No photometric mask set"
                assert np.any(ap & 8 != 0), "No position mask set"
Пример #3
0
    def __call__(self, data):
        if isinstance(data, Instance):
            return self.negate == bn.anynan(data._y)
        if isinstance(data, Storage):
            try:
                return data._filter_has_class(self.negate)
            except NotImplementedError:
                pass

        r = np.fromiter((not bn.anynan(inst._y) for inst in data), bool, len(data))
        if self.negate:
            r = np.logical_not(r)
        return data[r]
Пример #4
0
    def __call__(self, data):
        if isinstance(data, Instance):
            return self.negate == bn.anynan(data._x)
        if isinstance(data, Storage):
            try:
                return data._filter_is_defined(self.columns, self.negate)
            except NotImplementedError:
                pass

        r = np.fromiter((not bn.anynan(inst._x) for inst in data), dtype=bool, count=len(data))
        if self.negate:
            r = np.logical_not(r)
        return data[r]
Пример #5
0
    def __call__(self, data):
        if isinstance(data, Instance):
            return self.negate == bn.anynan(data._y)
        if isinstance(data, Storage):
            try:
                return data._filter_has_class(self.negate)
            except NotImplementedError:
                pass

        r = np.fromiter((not bn.anynan(inst._y) for inst in data), bool, len(data))
        if self.negate:
            r = np.logical_not(r)
        return data[r]
Пример #6
0
    def __call__(self, data):
        if isinstance(data, Instance):
            return self.negate == bn.anynan(data._x)
        if isinstance(data, Storage):
            try:
                return data._filter_is_defined(self.columns, self.negate)
            except NotImplementedError:
                pass

        r = np.fromiter((not bn.anynan(inst._x) for inst in data),
                        dtype=bool, count=len(data))
        if self.negate:
            r = np.logical_not(r)
        return data[r]
def process_stack(data,
                  xat,
                  yat,
                  upsample_factor=100,
                  use_sobel=False,
                  ref_frame_num=0):
    hypercube, lsx, lsy = get_hypercube(data, xat, yat)
    if bn.anynan(hypercube):
        raise NanInsideHypercube(True)

    calculate_shift = RegisterTranslation(upsample_factor=upsample_factor)
    filterfn = sobel if use_sobel else lambda x: x
    shifts, aligned_stack = alignstack(hypercube.T,
                                       shiftfn=calculate_shift,
                                       ref_frame_num=ref_frame_num,
                                       filterfn=filterfn)

    xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min()
    xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max()
    xmin, xmax = int(round(xmin)), int(round(xmax))
    ymin, ymax = int(round(ymin)), int(round(ymax))

    shape = hypercube.shape
    slicex = slice(max(xmax, 0), min(shape[1], shape[1] + xmin))
    slicey = slice(max(ymax, 0), min(shape[0], shape[0] + ymin))
    cropped = np.array(aligned_stack).T[slicey, slicex]

    # transform numpy array back to Orange.data.Table
    return shifts, build_spec_table(
        *_spectra_from_image(cropped, getx(data),
                             np.linspace(*lsx)[slicex],
                             np.linspace(*lsy)[slicey]))
Пример #8
0
def bincount(X, max_val=None, weights=None, minlength=None):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.
    """
    if sp.issparse(X):
        minlength = max_val + 1
        bin_weights = weights[X.indices] if weights is not None else None
        return (np.bincount(
            X.data.astype(int),
            weights=bin_weights,
            minlength=minlength,
        ), _count_nans_per_row_sparse(X, weights))

    X = np.asanyarray(X)
    if X.dtype.kind == 'f' and bn.anynan(X):
        nonnan = ~np.isnan(X)
        X = X[nonnan]
        if weights is not None:
            nans = (~nonnan * weights).sum(axis=0)
            weights = weights[nonnan]
        else:
            nans = (~nonnan).sum(axis=0)
    else:
        nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float)
    if minlength is None and max_val is not None:
        minlength = max_val + 1
    bc = np.array([]) if minlength is not None and minlength <= 0 else \
        np.bincount(X.astype(np.int32, copy=False),
                    weights=weights, minlength=minlength).astype(float)
    return bc, nans
Пример #9
0
        def checks(metric, data):
            if data is None:
                return

            if issparse(data.X) and not metric.supports_sparse:
                self.Error.dense_metric_sparse_data()
                return

            if not any(a.is_continuous for a in data.domain.attributes):
                self.Error.no_continuous_features()
                return

            needs_preprocessing = False
            if any(a.is_discrete for a in self.data.domain.attributes):
                self.Warning.ignoring_discrete()
                needs_preprocessing = True

            if not issparse(data.X) and bn.anynan(data.X):
                self.Warning.imputing_data()
                needs_preprocessing = True

            if needs_preprocessing:
                # removes discrete features and imputes data
                data = distance._preprocess(data)

            if not data.X.size:
                self.Error.empty_data()
                return

            return data
Пример #10
0
 def transformed(self, X, x):
     newd = np.zeros_like(X)
     for rowi, row in enumerate(X):
         # remove NaNs which ConvexHull can not handle
         source = np.column_stack((x, row))
         source = source[~bottleneck.anynan(source, axis=1)]
         try:
             v = ConvexHull(source).vertices
         except (QhullError, ValueError):
             # FIXME notify user
             baseline = np.zeros_like(row)
         else:
             if self.peak_dir == RubberbandBaseline.PeakPositive:
                 v = np.roll(v, -v.argmin())
                 v = v[:v.argmax() + 1]
             elif self.peak_dir == RubberbandBaseline.PeakNegative:
                 v = np.roll(v, -v.argmax())
                 v = v[:v.argmin() + 1]
             # If there are NaN values at the edges of data then convex hull
             # does not include the endpoints. Because the same values are also
             # NaN in the current row, we can fill them with NaN (bounds_error
             # achieves this).
             baseline = interp1d(source[v, 0],
                                 source[v, 1],
                                 bounds_error=False)(x)
         finally:
             if self.sub == 0:
                 newd[rowi] = row - baseline
             else:
                 newd[rowi] = baseline
     return newd
Пример #11
0
def bincount(X, max_val=None, weights=None, minlength=None):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.
    """
    if sp.issparse(X):
        minlength = max_val + 1
        bin_weights = weights[X.indices] if weights is not None else None
        return (np.bincount(X.data.astype(int),
                            weights=bin_weights,
                            minlength=minlength, ),
                _count_nans_per_row_sparse(X, weights))

    X = np.asanyarray(X)
    if X.dtype.kind == 'f' and bn.anynan(X):
        nonnan = ~np.isnan(X)
        X = X[nonnan]
        if weights is not None:
            nans = (~nonnan * weights).sum(axis=0)
            weights = weights[nonnan]
        else:
            nans = (~nonnan).sum(axis=0)
    else:
        nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float)
    if minlength is None and max_val is not None:
        minlength = max_val + 1
    return (np.bincount(X.astype(np.int32, copy=False),
                        weights=weights,
                        minlength=minlength).astype(float),
            nans)
Пример #12
0
def feature_extract(features,
                    featnames,
                    total=None,
                    linflatten=False,
                    recalc=False):

    if isinstance(features, dict):
        features = [features]
    if total is None:
        total = len(features)

    featout = np.empty([total, len(featnames)], dtype='float32')
    for k, obj in enumerate(features):
        # Load features from the provided (cached) features if they exist:
        featout[k, :] = np.array([obj.get(key, np.NaN) for key in featnames],
                                 dtype='float32')

        if recalc or anynan(featout[k, :]):
            # TODO: Why is it needed to re-normalize the lightcurve here?
            lc = RF_GC_featcalc.prepLCs(obj['lightcurve'], linflatten)

            featout[k, 0] = ss.skew(lc.flux)  # Skewness
            featout[k, 1] = ss.kurtosis(lc.flux)  # Kurtosis
            featout[k, 2] = ss.shapiro(
                lc.flux)[0]  # Shapiro-Wilk test statistic for normality
            featout[k, 3] = calculate_eta(lc)

            periods, n_usedfreqs, usedfreqs = get_periods(
                obj, 6, lc.time, ignore_harmonics=False)
            amp21, amp31 = RF_GC_featcalc.freq_ampratios(
                obj, n_usedfreqs, usedfreqs)
            pd21, pd31 = RF_GC_featcalc.freq_phasediffs(
                obj, n_usedfreqs, usedfreqs)

            featout[k, 4] = periods[0]

            if n_usedfreqs > 0:
                featout[k, 5] = obj['frequencies'][
                    (obj['frequencies']['num'] == 1)
                    & (obj['frequencies']['harmonic'] == 0)]['amplitude']
            else:
                featout[k, 5] = 0.

            featout[k, 6] = amp21
            featout[k, 7] = amp31
            featout[k, 8] = pd21
            featout[k, 9] = pd31

            # phase-fold lightcurve on dominant period
            folded_lc = lc.fold(period=periods[0])

            # Compute phi_rcs and rcs features
            featout[k, 10] = Rcs(lc)
            featout[k, 11] = Rcs(folded_lc)

        # If the amp1 features is NaN, replace it with zero:
        if np.isnan(featout[k, 5]):
            featout[k, 5] = 0

    return featout
Пример #13
0
 def _filter_is_defined(self, columns=None, negate=False):
     if columns is None:
         if sp.issparse(self.X):
             remove = self.X.indptr[1:] != self.X.indptr[-1:] + self.X.shape[1]
         else:
             remove = bn.anynan(self.X, axis=1)
         if sp.issparse(self.Y):
             remove = np.logical_or(remove, self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1])
         else:
             remove = np.logical_or(remove, bn.anynan(self.Y, axis=1))
     else:
         remove = np.zeros(len(self), dtype=bool)
         for column in columns:
             col, sparse = self.get_column_view(column)
             if sparse:
                 remove = np.logical_or(remove, col == 0)
             else:
                 remove = np.logical_or(remove, bn.anynan(col))
     retain = remove if negate else np.logical_not(remove)
     return Table.from_table_rows(self, retain)
Пример #14
0
 def _filter_has_class(self, negate=False):
     if sp.issparse(self.Y):
         if negate:
             retain = self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1]
         else:
             retain = self.Y.indptr[1:] == self.Y.indptr[-1:] + self.Y.shape[1]
     else:
         retain = bn.anynan(self.Y, axis=1)
         if not negate:
             retain = np.logical_not(retain)
     return Table.from_table_rows(self, retain)
Пример #15
0
def nan_extend_edges_and_interpolate(xs, X):
    """
    Handle NaNs at the edges are handled as with savgol_filter mode nearest:
    the edge values are interpolated. NaNs in the middle are interpolated
    so that they do not propagate.
    """
    nans = None
    if bottleneck.anynan(X):
        nans = np.isnan(X)
        X = X.copy()
        xs, xsind, mon, X = transform_to_sorted_wavenumbers(xs, X)
        fill_edges(X)
        X = interp1d_with_unknowns_numpy(xs[xsind], X, xs[xsind])
        X = transform_back_to_features(xsind, mon, X)
    return X, nans
Пример #16
0
    def featcalc(self, features, total=None, recalc=False):
        """
		Calculates features for set of lightcurves
		"""

        if isinstance(features, dict):  # trick for single features
            features = [features]
        if total is None:
            total = len(features)

        featout = np.empty([total, len(self.features_names)], dtype='float32')
        for k, obj in enumerate(features):
            # Load features from the provided (cached) features if they exist:
            featout[k, :] = [
                obj.get(key, np.NaN) for key in self.features_names
            ]

            # If not all features are already populated, we are going to recalculate them all:
            if recalc or anynan(featout[k, :]):
                lc = fc.prepLCs(obj['lightcurve'], linflatten=False)

                periods, _, _ = get_periods(obj,
                                            NFREQUENCIES,
                                            lc.time,
                                            in_days=False)
                featout[k, :NFREQUENCIES] = periods

                #EBper = EBperiod(lc.time, lc.flux, periods[0], linflatten=linflatten-1)
                #featout[k, 0] = EBper # overwrites top period

                featout[k,
                        NFREQUENCIES:NFREQUENCIES + 2] = fc.compute_varrat(obj)
                #featout[k, NFREQUENCIES+1:NFREQUENCIES+2] = fc.compute_lpf1pa11(obj)
                featout[k,
                        NFREQUENCIES + 2:NFREQUENCIES + 3] = stat.skew(lc.flux)
                featout[k, NFREQUENCIES + 3:NFREQUENCIES +
                        4] = fc.compute_flux_ratio(lc.flux)
                featout[k, NFREQUENCIES + 4:NFREQUENCIES +
                        5] = fc.compute_differential_entropy(lc.flux)
                featout[k, NFREQUENCIES + 5:NFREQUENCIES +
                        6] = fc.compute_differential_entropy(
                            obj['powerspectrum'].standard[1])
                featout[k, NFREQUENCIES + 6:NFREQUENCIES +
                        10] = fc.compute_multiscale_entropy(lc.flux)
                #featout[k, NFREQUENCIES+10:NFREQUENCIES+11] = fc.compute_max_lyapunov_exponent(lc.flux)

        return featout
Пример #17
0
    def compute_distances(self, metric, data):
        self.clear_messages()

        if data is None:
            return

        if issparse(data.X) and not metric.supports_sparse:
            self.Error.dense_metric_sparse_data()
            return

        if not any(a.is_continuous for a in data.domain.attributes):
            self.Error.no_continuous_features()
            return

        needs_preprocessing = False
        if any(a.is_discrete for a in self.data.domain.attributes):
            self.Warning.ignoring_discrete()
            needs_preprocessing = True

        if not issparse(data.X) and bn.anynan(data.X):
            self.Warning.imputing_data()
            needs_preprocessing = True

        if needs_preprocessing:
            # removes discrete features and imputes data
            data = distance._preprocess(data)

        if not data.X.size:
            self.Error.empty_data()
            return

        if isinstance(metric, distance.MahalanobisDistance):
            n, m = data.X.shape
            if self.axis == 1:
                n, m = m, n

        if isinstance(metric, distance.MahalanobisDistance):
            # Mahalanobis distance has to be trained before it can be used
            # to compute distances
            try:
                metric.fit(data, axis=1 - self.axis)
            except (ValueError, MemoryError) as e:
                self.Error.mahalanobis_error(e)
                return

        return metric(data, data, 1 - self.axis, impute=True)
Пример #18
0
    def compute_distances(self, metric, data):
        self.clear_messages()

        if data is None:
            return

        if issparse(data.X) and not metric.supports_sparse:
            self.Error.dense_metric_sparse_data()
            return

        if not any(a.is_continuous for a in data.domain.attributes):
            self.Error.no_continuous_features()
            return

        needs_preprocessing = False
        if any(a.is_discrete for a in self.data.domain.attributes):
            self.Warning.ignoring_discrete()
            needs_preprocessing = True

        if not issparse(data.X) and bn.anynan(data.X):
            self.Warning.imputing_data()
            needs_preprocessing = True

        if needs_preprocessing:
            # removes discrete features and imputes data
            data = distance._preprocess(data)

        if not data.X.size:
            self.Error.empty_data(data.X.shape)
            return

        if isinstance(metric, distance.MahalanobisDistance):
            n, m = data.X.shape
            if self.axis == 1:
                n, m = m, n
            if n <= m:
                self.Error.too_few_observations()
                return

        if isinstance(metric, distance.MahalanobisDistance):
            # Mahalanobis distance has to be trained before it can be used
            # to compute distances
            metric.fit(data, axis=1 - self.axis)

        return metric(data, data, 1 - self.axis, impute=True)
Пример #19
0
def test_halo(SHARED_INPUT_DIR, datasource):
	with TemporaryDirectory() as OUTPUT_DIR:
		with HaloPhotometry(267211065, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, sector=1, camera=3, ccd=2) as pho:

			pho.photometry()
			filepath = pho.save_lightcurve()
			print( pho.lightcurve )

			# It should set the status to one of these:
			print(pho.status)
			assert pho.status in (STATUS.OK, STATUS.WARNING)

			# They shouldn't be exactly zero:
			assert not np.all(pho.lightcurve['flux'] == 0)
			assert not np.all(pho.lightcurve['flux_err'] == 0)
			assert not np.all(pho.lightcurve['pos_centroid'][:,0] == 0)
			assert not np.all(pho.lightcurve['pos_centroid'][:,1] == 0)

			# They shouldn't be NaN (in this case!):
			assert not allnan(pho.lightcurve['flux'])
			assert not allnan(pho.lightcurve['flux_err'])
			assert not allnan(pho.lightcurve['pos_centroid'][:,0])
			assert not allnan(pho.lightcurve['pos_centroid'][:,1])

			# Test the outputted FITS file:
			with fits.open(filepath, mode='readonly') as hdu:
				# Should be the same vectors in FITS as returned in Table:
				np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME'])
				np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR'])
				np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW'])
				np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR'])
				np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO'])

				# Test FITS aperture image:
				ap = hdu['APERTURE'].data
				print(ap)
				assert np.all(pho.aperture == ap), "Aperture image mismatch"
				assert not anynan(ap), "NaN in aperture image"
				assert np.all(ap >= 0), "Negative values in aperture image"
				assert np.any(ap & 2 != 0), "No photometric mask set"
				#assert np.any(ap & 8 != 0), "No position mask set"

	print("Passed Tests for %s" % datasource)
    def freq_counts(self, arrs, lens):
        """
        Calculates frequencies of samples.

        Parameters
        ----------
        arrs
            A sequence of arrays.
        lens
            A sequence of number of distinct values in arrays.
        Returns
        -------
        numpy.ndarray
            A 1D numpy array of frequencies.

        """
        no_nans = reduce(np.logical_and, [~np.isnan(a) if bn.anynan(a) else np.ones(self.m).astype(bool) for a in arrs])
        combined = reduce(add, [arrs[i][no_nans]*reduce(mul, lens[:i]) for i in range(1, len(arrs))], arrs[0][no_nans])
        return np.bincount(combined.astype(np.int32, copy=False), minlength=reduce(mul, lens)).astype(float)
Пример #21
0
def bincount(X, max_val=None, weights=None, minlength=None):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.
    """
    X = np.asanyarray(X)
    if X.dtype.kind == 'f' and bn.anynan(X):
        nonnan = ~np.isnan(X)
        nans = (~nonnan).sum(axis=0)
        X = X[nonnan]
        if weights is not None:
            weights = weights[nonnan]
    else:
        nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float)
    if minlength is None and max_val is not None:
        minlength = max_val + 1
    return (np.bincount(X.astype(np.int32, copy=False),
                        weights=weights,
                        minlength=minlength).astype(float), nans)
Пример #22
0
def bincount(X, max_val=None, weights=None, minlength=None):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.
    """
    X = np.asanyarray(X)
    if X.dtype.kind == 'f' and bn.anynan(X):
        nonnan = ~np.isnan(X)
        nans = (~nonnan).sum(axis=0)
        X = X[nonnan]
        if weights is not None:
            weights = weights[nonnan]
    else:
        nans = 0 if X.ndim == 1 else np.zeros(X.shape[1])
    if minlength is None and max_val is not None:
        minlength = max_val + 1
    return (np.bincount(X.astype(np.int32, copy=False),
                        weights=weights,
                        minlength=minlength),
            nans)
Пример #23
0
def argnanmedoid(x, axis=1):
    """
    Return the indices of the medoid

    :param x: input array
    :param axis: axis to medoid along
    :return: indices of the medoid
    """
    if axis == 0:
        x = x.T

    invalid = anynan(x, axis=0)
    band, time = x.shape
    diff = x.reshape(band, time, 1) - x.reshape(band, 1, time)
    dist = np.sqrt(np.sum(
        diff * diff,
        axis=0))  # dist = np.linalg.norm(diff, axis=0) is slower somehow...
    dist_sum = nansum(dist, axis=0)
    dist_sum[invalid] = np.inf
    i = np.argmin(dist_sum)

    return i
Пример #24
0
def medoid_indices(arr, invalid=None):
    """
    The indices of the medoid.

    :arg arr: input array
    :arg invalid: mask for invalid data containing NaNs
    """
    # vectorized version of `argnanmedoid`
    bands, times, ys, xs = arr.shape

    diff = (arr.reshape(bands, times, 1, ys, xs) -
            arr.reshape(bands, 1, times, ys, xs))

    dist = np.linalg.norm(diff, axis=0)
    dist_sum = nansum(dist, axis=0)

    if invalid is None:
        # compute it in case it's not already available
        invalid = anynan(arr, axis=0)

    dist_sum[invalid] = np.inf
    return np.argmin(dist_sum, axis=0)
Пример #25
0
    def _filter_has_class(self, negate=False):
        """
        Return rows with known class attribute. If there are multiple classes,
        all must be defined.

        :param negate: invert the selection
        :type negate: bool
        :return: new table
        :rtype: Orange.data.Table
        """
        if sp.issparse(self.Y):
            if negate:
                retain = (self.Y.indptr[1:] !=
                          self.Y.indptr[-1:] + self.Y.shape[1])
            else:
                retain = (self.Y.indptr[1:] ==
                          self.Y.indptr[-1:] + self.Y.shape[1])
        else:
            retain = bn.anynan(self.Y, axis=1)
            if not negate:
               retain = np.logical_not(retain)
        return Table.from_table_rows(self, retain)
Пример #26
0
 def __call__(self, data):
     # convert to data domain if any conversion is possible,
     # otherwise we use the interpolator directly to make domains compatible
     if self.domain is not None and data.domain != self.domain \
             and any(at.compute_value for at in self.domain.attributes):
         data = data.from_table(self.domain, data)
     x = getx(data)
     # removing whole NaN columns from the data will effectively replace
     # NaNs that are not on the edges with interpolated values
     ys = data.X
     if self.handle_nans:
         x, ys = remove_whole_nan_ys(x, ys)  # relatively fast
     if len(x) == 0:
         return np.ones((len(data), len(self.points))) * np.nan
     interpfn = self.interpfn
     if interpfn is None:
         if self.handle_nans and bottleneck.anynan(ys):
             if self.kind == "linear":
                 interpfn = interp1d_with_unknowns_numpy
             else:
                 interpfn = interp1d_with_unknowns_scipy
         else:
             interpfn = interp1d_wo_unknowns_scipy
     return interpfn(x, ys, self.points, kind=self.kind)
Пример #27
0
    def do_classify(self, features):
        """
		Classify a single lightcurve.

		Parameters:
			features (dict): Dictionary of features.

		Returns:
			dict: Dictionary of stellar classifications.
		"""
        # Start a logger that should be used to output e.g. debug information:
        logger = logging.getLogger(__name__)

        if not self.classifier.trained:
            logger.error('Classifier has not been trained. Exiting.')
            raise ValueError('Classifier has not been trained. Exiting.')

        # Build features array from the probabilities from the other classifiers:
        # TODO: What about NaN values?
        logger.debug("Importing features...")
        featarray = self.build_features_table([features], total=1)

        if anynan(featarray):
            raise ValueError("Features contains NaNs")

        logger.debug("We are starting the magic...")
        # Comes out with shape (1,8), but instead want shape (8,) so squeeze
        classprobs = self.classifier.predict_proba(featarray).squeeze()
        logger.debug("Classification complete")

        # Format the output:
        result = {}
        for c, cla in enumerate(self.classifier.classes_):
            key = self.StellarClasses(cla)
            result[key] = classprobs[c]
        return result, featarray
Пример #28
0
 def _fix_missing():
     nonlocal data
     if not metric.supports_missing and bn.anynan(data.X):
         self.Warning.imputing_data()
         data = distance.impute(data)
Пример #29
0
    def __call__(self, raw, cdf_attr):
        """input_translator convert raw netcdf variables into form
           used by the hsrl processing code and preforms pileup
           correction on photon counts"""

        if hasattr(
                raw,
                'wfov_counts') and self.constants['wfov_type'] == 'molecular':
            raw.molecular_wfov_counts = raw.wfov_counts.copy()
        elif hasattr(raw, 'wfov_counts'):
            raw.combined_wfov_hi_counts = raw.wfov_counts.copy()

        if hasattr(raw, 'op_mode'):
            #extract i2 lock bit from operating mode
            #this will allow testing of bit even after averaging
            raw.i2_locked = (raw.op_mode[:].astype(int) & 4) / 4

        if hasattr(raw, 'seeded_shots'):
            setattr(
                raw, 'delta_t', raw.seeded_shots[:, 0] /
                float(self.constants['laser_rep_rate']))
        else:
            setattr(raw, 'delta_t', np.zeros([0]))
        #for i in np.arange(raw.times.size):
        #    raw.times[i]-=timedelta(seconds=raw.delta_t[i])

        if hasattr(raw, 'transmitted_energy'):
            # convert to mJ per preaveraged accumulation interval
            raw.transmitted_energy[:] = raw.transmitted_energy \
                *self.constants['transmitted_energy_monitor'][0]\
                +self.constants['transmitted_energy_monitor'][1]\
                *raw.seeded_shots[:,0]
            #compute tranmitted power
            setattr(raw, 'transmitted_power',
                    raw.transmitted_energy / raw.delta_t)

        if hasattr(raw, 'transmitted_1064_energy'):
            # convert to mJ per preaveraged accumulation interval
            raw.transmitted_1064_energy[:] = raw.transmitted_1064_energy \
                *self.constants['transmitted_1064_energy_monitor'][0]\
                +self.constants['transmitted_1064_energy_monitor'][1]\
                *raw.seeded_shots[:,0]
            #compute tranmitted 1064 power
            setattr(raw, 'transmitted_1064_power',
                    raw.transmitted_1064_energy / raw.delta_t)

        if hasattr(raw, 'filtered_energy'):
            if raw.filtered_energy.dtype == 'int32':
                raw.nonfiltered_energy = raw.nonfiltered_energy.astype(
                    'float64')
                raw.filtered_energy = raw.filtered_energy.astype('float64')
            if len(raw.filtered_energy.shape) == 1:
                raw.filtered_energy = raw.filtered_energy[:, np.newaxis]
                raw.nonfiltered_energy = raw.nonfiltered_energy[:, np.newaxis]
            raw.filtered_energy[raw.filtered_energy > 1e10] = np.NaN
            raw.nonfiltered_energy[raw.nonfiltered_energy > 1e10] = np.NaN

        if hasattr(raw, 'builduptime') and raw.builduptime.size > 0:
            raw.qswitch_buildup_time = raw.builduptime[:, 0]
            raw.min_qswitch_buildup_time = raw.builduptime[:, 1]
            raw.max_qswitch_buildup_time = raw.builduptime[:, 2]

        if hasattr(raw, 'superseedlasercontrollog'):
            raw.superseedlasercontrollog[
                raw.superseedlasercontrollog > 1e10] = np.NaN

        if hasattr(raw,'energyRatioLockPoint') \
               and raw.energyRatioLockPoint.size>0:
            if len(raw.energyRatioLockPoint.shape) == 2:
                raw.filtered_lockpoint = raw.energyRatioLockPoint[:, 0]
                raw.nonfiltered_lockpoint = raw.energyRatioLockPoint[:, 1]
            else:
                clarray = hau.T_Array(np.ones([raw.filtered_energy.shape[0]]))
                raw.filtered_lockpoint = clarray * raw.energyRatioLockPoint[0]
                raw.nonfiltered_lockpoint = clarray * raw.energyRatioLockPoint[
                    1]

        if hasattr(raw, 'raw_analog_interferometertemperature'):
            thermistor_cal = self.constants['interferometer_temp_cal']
            R = np.abs(raw.raw_analog_interferometertemperature / 0.000250)
            raw.interferometer_temp = 1/(thermistor_cal[0] + thermistor_cal[1] \
                    * np.log(R) + thermistor_cal[2] * np.log(R)** 3) - 273.15

            ntemps = len(raw.interferometer_temp)
            if 0:  #ntemps > 5:
                #do eleven point median filter
                temps = np.zeros((ntemps, 11))
                temps[0:ntemps - 5, 0] = raw.interferometer_temp[5:]
                temps[0:ntemps - 4, 1] = raw.interferometer_temp[4:]
                temps[0:ntemps - 3, 2] = raw.interferometer_temp[3:]
                temps[0:ntemps - 2, 3] = raw.interferometer_temp[2:]
                temps[0:ntemps - 1, 4] = raw.interferometer_temp[1:]
                temps[:, 5] = raw.interferometer_temp
                temps[1:, 6] = raw.interferometer_temp[:ntemps - 1]
                temps[2:, 7] = raw.interferometer_temp[:ntemps - 2]
                temps[3:, 8] = raw.interferometer_temp[:ntemps - 3]
                temps[4:, 9] = raw.interferometer_temp[:ntemps - 4]
                temps[5:, 10] = raw.interferometer_temp[:ntemps - 5]
                raw.interferometer_temp = hau.T_Array(np.median(temps, 1))
            else:
                raw.interferometer_temp = hau.T_Array(raw.interferometer_temp)

        if hasattr(raw, 'raw_analog_etalontemperature'):
            # convert etalon thermistor voltage to themistor resistance
            # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15
            #(Steinhart  Hart equation)
            # Where:
            # a = 0.000862448
            # b = 0.000258456
            # c = 0.000000142
            # and
            # R = (Volts ADC Reading)/(0.000250 amps)
            thermistor_cal = self.constants['interferometer_temp_cal']
            R = np.abs(raw.raw_analog_etalontemperature / 0.000250)
            raw.etalon_temp = (hau.T_Array(
                np.array(
                    (1.0 / (thermistor_cal[0] + thermistor_cal[1] * np.log(R) +
                            thermistor_cal[2] * np.log(R)**3) - 273.15),
                    dtype=np.float32,
                    ndmin=1)))
        if hasattr(raw, 'raw_analog_coolanttemperature'):
            # convert coolant thermistor voltage to themistor resistance
            # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15
            #(Steinhart  Hart equation)
            # Where:
            # a = 0.000862448
            # b = 0.000258456
            # c = 0.000000142
            # and
            # R = (Volts ADC Reading)/(0.000250 amps)
            thermistor_cal = self.constants['interferometer_temp_cal']

            R = np.abs(raw.raw_analog_coolanttemperature / 0.000250)


            raw.coolant_temperature = \
                  (hau.T_Array(np.array((1.0 / (thermistor_cal[0] + thermistor_cal[1]
                  * np.log(R) + thermistor_cal[2] * np.log(R)** 3)
                  - 273.15),dtype=np.float32,ndmin=1)))

        if hasattr(raw, 'telescope_pointing'):
            if not hasattr(raw, 'telescope_locked'):
                setattr(raw, 'telescope_locked',
                        np.ones_like(raw.telescope_pointing))
            raw.telescope_pointing = raw.telescope_pointing.astype('float64')
            raw.telescope_pointing[raw.telescope_locked == 0] = .5
            #roll component of telescope mounting angle in degrees measured relative
            #to platform (zero degrees = vertical)
            #roll angle is + in clockwise direction
            if not hasattr(raw, 'telescope_roll_angle_offset'):
                setattr(raw, 'telescope_roll_angle_offset',
                        np.ones_like(raw.telescope_pointing))
            raw.telescope_roll_angle_offset[:] = self.constants[
                'telescope_roll_angle_offset']
            raw.telescope_roll_angle_offset[raw.telescope_pointing == 0] = \
                                                180.0 - self.constants['telescope_roll_angle_offset']

        if hasattr(raw, 'raw_analog_telescope_temperature'):
            # convert coolant thermistor voltage to themistor resistance
            # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15
            #(Steinhart  Hart equation)
            # Where:
            # a = 0.000862448
            # b = 0.000258456
            # c = 0.000000142
            # and
            # R = (Volts ADC Reading)/(0.000250 amps)
            thermistor_cal = self.constants['interferometer_temp_cal']

            R = np.abs(raw.raw_analog_telescope_temperature / 0.000250)


            raw.telescope_temperature = \
                  (hau.T_Array(np.array((1.0 / (thermistor_cal[0] + thermistor_cal[1]
                  * np.log(R) + thermistor_cal[2] * np.log(R)** 3)
                  - 273.15),dtype=np.float32,ndmin=1)))
        if hasattr(raw,'OutgoingBeamPosition_centermass')\
               and raw.OutgoingBeamPosition_centermass.size > 0 :
            raw.cg_xs = raw.OutgoingBeamPosition_centermass[:, 0]
            raw.cg_ys = raw.OutgoingBeamPosition_centermass[:, 1]

        if hasattr(raw,'OutgoingBeamPosition2_centermass')\
                  and raw.OutgoingBeamPosition2_centermass.size > 0 :
            raw.cg_xs2 = raw.OutgoingBeamPosition2_centermass[:, 0]
            raw.cg_ys2 = raw.OutgoingBeamPosition2_centermass[:, 1]

        if hasattr(raw,'interferometer_intensity') \
               and raw.interferometer_intensity.size > 0:
            interf_peak = \
                self.constants['interferometer_spectral_peak']
            phase_to_freq = \
                self.constants['interferometer_phase_to_freq']
            npixels = self.constants['interferometer_fft_npixels']
            xform = np.fft.rfft(raw.interferometer_intensity[:, :npixels],
                                axis=1)
            tmp = np.concatenate(
                ([self.unwrap_firstangle], np.angle(xform[:, interf_peak])))
            newlast = tmp[-1]
            tmp = np.unwrap(tmp)
            tmp = (self.unwrap_firstangle_atmagnitude - tmp[0]) + tmp
            if np.isfinite(tmp[-1]):
                self.unwrap_firstangle_atmagnitude = tmp[-1]
                self.unwrap_firstangle = newlast
            raw.interf_freq = tmp[1:]
            raw.interf_freq = hau.T_Array(-raw.interf_freq * phase_to_freq[0])

        #compute temperature compensated interferometer freq
        if 0 and hasattr(raw,'interferometer_temp') \
               and hasattr(raw,'interf_freq')\
               and self.constants.has_key('interf_temp_coef'):
            raw.tcomp_interf_freq = raw.interf_freq \
                        - (raw.interferometer_temp-raw.interferometer_temp[0])\
                         * self.constants['interf_temp_coef']*1e9
        for imagetime in ('interferometer_snapshot_time',
                          'outgoingbeamalignment_snapshot_time',
                          'overhead_snapshot_time', 'snowscope_snapshot_time'):
            if hasattr(raw, imagetime):
                setattr(
                    raw, imagetime,
                    hru.convert_to_python_times(
                        getattr(raw, imagetime)[np.newaxis, :]))

        #replace missing values witn NaN's
        if hasattr(raw, 'seedvoltage'):
            raw.seedvoltage[raw.seedvoltage > 100] = np.NaN
        if hasattr(raw, 'latitude'):
            raw.latitude[raw.latitude > 100] = np.NaN
        if hasattr(raw, 'longitude'):
            raw.longitude[raw.longitude > 200] = np.NaN

        if hasattr(raw, 'laserpowervalues') and raw.laserpowervalues.size > 0:
            raw.laser_current = raw.laserpowervalues[:, 0]
            raw.laser_voltage = raw.laserpowervalues[:, 1]
            if raw.laserpowervalues.shape[1] > 2:
                raw.laser_current_setpoint = raw.laserpowervalues[:, 2]
                raw.laser_diode_temp = raw.laserpowervalues[:, 3]
                raw.laser_diode_temp_setpoint = raw.laserpowervalues[:, 4]
            if raw.laserpowervalues.shape[1] > 6:
                raw.ktp_temp = raw.laserpowervalues[:, 5]
                raw.ktp_temp_setpoint = raw.laserpowervalues[:, 6]

        #remove spikes from tcs records
        for fiel in ('tcsopticstop_', 'tcsoptics_', 'tcstelescope_',
                     'thermal1_', 'thermal2_', 'tcsaft_', 'tcsfore_'):
            for f in vars(raw).keys():
                if f.startswith(fiel):
                    v = getattr(raw, f)
                    v[v > 1000] = np.NaN

        if hasattr(raw,'one_wire_temperatures') \
                and raw.one_wire_temperatures.size >0 :

            #raw.one_wire_attrib = cdf_attr['one_wire_temperatures']
            raw.one_wire_attrib = []
            [ntime, ntemps] = raw.one_wire_temperatures.shape
            for i in range(ntemps):
                string = 'field' + str(i) + '_name'
                try:
                    raw.one_wire_attrib.append(
                        cdf_attr['one_wire_temperatures'][string])
                except KeyError:
                    print "Couldn't find attribute for ", string
                    raw.one_wire_attrib.append(None)
            #remove spikes of 1e37 that appear in temperatues
            raw.one_wire_temperatures[raw.one_wire_temperatures>1000.]\
                         =np.NaN
        if hasattr(raw, 'RemoveLongI2Cell'):
            servo_range = cdf_attr['RemoveLongI2Cell']['range']
            raw.i2_cell_out = np.abs(raw.RemoveLongI2Cell-servo_range[1]) \
                     > np.abs(raw.RemoveLongI2Cell-servo_range[0])

        if hasattr(raw, 'RemoveLongI2ArCell'):
            servo_range = cdf_attr['RemoveLongI2ArCell']['range']
            raw.i2a_cell_out = np.abs(raw.RemoveLongI2ArCell-servo_range[1]) \
                     > np.abs(raw.RemoveLongI2ArCell-servo_range[0])

        if hasattr(raw, 'shot_count'):
            if raw.shot_count.size > 0:
                raw.shot_count = raw.shot_count[:, 0]
            else:
                raw.shot_count = raw.shot_count.reshape([0])

        if hasattr(raw, 'seeded_shots'):
            if raw.seeded_shots.size > 0:
                raw.seeded_shots = raw.seeded_shots[:, 0]
            else:
                raw.seeded_shots = raw.seeded_shots.reshape([0])

        #extract average dark counts from profiles and add dark counts to raw
        #dark count extracted from 'first_bins' or 'last_bins' as specified in constants
        #pu.extract_dark_count(raw,self.constants) #moved to after PILEUP 20140805

        #extract cal pulse from light scattered as laser pulse exits system
        #and place in raw
        #pu.extract_cal_pulse(raw,self.constants)

        if hasattr(raw, 'l3cavityvoltage') and raw.l3cavityvoltage.size > 0:
            raw.piezo_voltage_ave = raw.l3cavityvoltage[:, 0]
            raw.piezo_voltage_min = raw.l3cavityvoltage[:, 1]
            raw.piezo_voltage_max = raw.l3cavityvoltage[:, 2]
        if hasattr(raw, 'l3locking_stats'
                   ) and 'l3slope_to_frequency' in self.constants:
            raw.l3frequency_offset = raw.l3locking_stats.copy()
            for x in range(0, 3):
                raw.l3frequency_offset[:, x] = np.polyval(
                    self.constants['l3slope_to_frequency'],
                    raw.l3locking_stats[:, x])

        if hasattr(raw, 'GPS_MSL_Alt'):
            #replace and spikes in a altitude with base altitude
            #this allows the code to run but produces garbage data
            if np.any(raw.GPS_MSL_Alt > 20000.0):
                raw.GPS_MSL_Alt[raw.GPS_MSL_Alt > 20000.0] = \
                           self.constants['lidar_altitude']
        if hasattr(raw, 'roll_angle'):
            if anynan(raw.roll_angle):
                raw.roll_angle[np.isnan(raw.roll_angle)] = 0.0
            if anynan(raw.pitch_angle):
                raw.pitch_angle[np.isnan(raw.pitch_angle)] = 0.0
        if hasattr(raw, 'opticalbenchairpressure'):
            #convert psi to mb
            #print 'pre--opticalbenchairpressure ',raw.opticalbenchairpressure.shape
            raw.opticalbenchairpressure=hau.T_Array(np.array((raw.opticalbenchairpressure\
                    *self.constants['optical_bench_air_pressure_cal']),ndmin=1,dtype=np.float32))
            #print 'optical_bench_air_pressure.size',raw.opticalbenchairpressure.size,raw.times.size
        if hasattr(raw,
                   'chillertemperature') and raw.chillertemperature.size > 0:
            raw.chiller_temp = raw.chillertemperature[:, 0]
            raw.chiller_setpt = raw.chillertemperature[:, 1]

        if hasattr(raw, 'etalon_pressure'):
            raw.etalon_pressure = raw.etalon_pressure * self.constants[
                'etalon_pressure']

        if hasattr(raw, 'qw_rotation_angle'):
            #convert gv quarter wave plate rotation angle from radians to deg
            raw.qw_rotation_angle = raw.qw_rotation_angle * 180.0 / np.pi

        if hasattr(raw, 'GPS_MSL_Alt') or self.constants['installation'] in (
                'airborne', 'shipborne'):
            #do quality check on aircraft GPS and INS data
            pu.gps_quality_check(raw, self.constants)

        if hasattr(raw, 'molecular_counts'):
            for k, v in vars(raw).items():
                if '_counts' in k:
                    if raw.molecular_counts.shape[1] != v.shape[1]:
                        print 'raw field ', k, ' is messed up. size difference'
                        tmp = copy.deepcopy(raw.molecular_counts)
                        minidx = min(tmp.shape[1], v.shape[1])
                        tmp[:, :] = 0
                        tmp[:, :minidx] = v[:, :minidx]
                        setattr(raw, k, tmp)

        #do pileup correction on signals before any averaging is done
        pu.pileup_correction(raw, self.constants, self.corr_adjusts)

        #extract average dark counts from profiles and add dark counts to raw
        #dark count extracted from 'first_bins' or 'last_bins' as specified in constants
        if hasattr(raw, 'molecular_counts'):
            pu.extract_dark_count(
                raw, self.constants)  #relocated from above 20140805

            #extract cal pulse from light scattered as laser pulse exits system
            #and place in raw

            pu.extract_cal_pulse(raw, self.constants)

            if 0:
                import hsrl.simulation.rb_simulation as sim
                #rescale for new energy
                sim.rb_simulation(raw, self.constants)
                #redo dark count
                pu.extract_dark_count(raw, self.constants)
Пример #30
0
def polynomial_smoothing(array, delta_z, smoothing):
    """If smoothing[0]>0, smooth with running polynomial fit of order smoothing[1]
       width of smoothing width in meters increases linearly from
       smoothing[2] at lowest altitude  too smoothing[3] highest altitude.

       array        = data array to smooth with running polynomial 
       smoothing[0] = enable smoothing if True
       smoothing[1] = order of polynominal to use for local fit
       smoothing[2] = width of smoothing at lowest range (m)
       smoothing[3] = width of smoothing at highest range (m)
       smoothing[4] = first range to smooth
       delta_z      = bin width (m)"""

    if smoothing[0] == False:
        print 'no smoothing'
        return array

    #check to see if array is 2-d
    try:
        [ntimes, nbins] = array.shape
    except:
        nbins = len(array)
        ntimes = 1

    #compute how much to increment smoothing half-width per altitude index
    delta_w = (smoothing[3] - smoothing[2]) / (2.0 * delta_z * nbins)

    #initial half-width in bins--note this is float
    w0 = smoothing[2] / (2.0 * delta_z)

    #start at larger of polynomial order, or specified start range
    first_bin = np.int(np.float(smoothing[4]) / delta_z)
    #first_bin = np.max(first_bin,np.int(smoothing[1]))

    for i in range(ntimes):
        #smooth profiles with local 2nd order polynomial fit
        if ntimes > 1:
            temp = array[i, :].copy()
        else:
            temp = array.copy()
            #set NaNs to 0.0
            np.nan_to_num(temp)

        #loop over ranges limited by number of points needed to fit polynomial
        #where the polynomial order = smoothing[1]

        for bin in range(first_bin + 1, nbins - np.int(smoothing[1]) - 1):
            w = int(w0 + bin * delta_w)

            if bin >= w and bin <= nbins - w - 2:
                start = bin - w
                end = bin + w
                #print 'w1 ',start,j,end

            elif bin < w:  #i is less than half_width
                start = 0
                end = 2 * bin

            else:  #bin + half_width bumping against nbins
                #"top of profile not smoothed to prevent introduction of extra NaN's"
                a = 1

            x = range(start, end + 1)
            p = np.polyfit(x, temp[x], np.int(smoothing[1]))

            if not anynan(p):
                if ntimes > 1:
                    array[i, bin] = np.polyval(p, bin)
                else:
                    array[bin] = np.polyval(p, bin)

    return array
Пример #31
0
 def time_anynan(self, dtype, shape, order, axis, case):
     bn.anynan(self.arr, axis=axis)
Пример #32
0
 def has_missing_class(self):
     """Return `True` if there are any missing class values."""
     return bn.anynan(self.Y)
Пример #33
0
 def has_missing(self):
     """Return `True` if there are any missing attribute or class values."""
     return bn.anynan(self.X) or bn.anynan(self.Y)
Пример #34
0
    def featcalc(self,
                 features,
                 total=None,
                 cardinality=64,
                 linflatten=False,
                 recalc=False):
        """
		Calculates features for set features.
		"""

        if isinstance(features, dict):  # trick for single features
            features = [features]
        if total is None:
            total = len(features)

        # Loop through the provided features and build feature table:
        featout = np.empty([total, len(self.features_names)], dtype='float32')
        for k, obj in enumerate(features):
            # Load features from the provided (cached) features if they exist:
            featout[k, :] = [
                obj.get(key, np.NaN) for key in self.features_names
            ]

            # If not all features are already populated, we are going to recalculate them all:
            if recalc or anynan(featout[k, :]):

                lc = fc.prepLCs(obj['lightcurve'], linflatten=linflatten)

                periods, n_usedfreqs, usedfreqs = get_periods(
                    obj, NFREQUENCIES, lc.time, ignore_harmonics=True)
                featout[k, :NFREQUENCIES] = periods

                EBper = fc.EBperiod(lc.time,
                                    lc.flux,
                                    periods[0],
                                    linflatten=True)
                featout[k, 0] = EBper  # overwrites top period

                featout[k, NFREQUENCIES:NFREQUENCIES + 2] = fc.freq_ampratios(
                    obj, n_usedfreqs, usedfreqs)

                featout[k, NFREQUENCIES + 2:NFREQUENCIES +
                        4] = fc.freq_phasediffs(obj, n_usedfreqs, usedfreqs)

                # Self Organising Map
                featout[k, NFREQUENCIES + 4:NFREQUENCIES + 6] = fc.SOMloc(
                    self.classifier.som, lc.time, lc.flux, EBper, cardinality)

                featout[k,
                        NFREQUENCIES + 6:NFREQUENCIES + 8] = fc.phase_features(
                            lc.time, lc.flux, EBper)

                featout[k, NFREQUENCIES + 8:NFREQUENCIES +
                        10] = fc.p2p_features(lc.flux)

                # Higher Order Crossings:
                psi, zc = fc.compute_hocs(lc.time, lc.flux, 5)
                featout[k, NFREQUENCIES + 10:NFREQUENCIES + 12] = psi, zc[0]

                # FliPer:
                featout[k, NFREQUENCIES + 12:NFREQUENCIES +
                        16] = obj['Fp07'], obj['Fp7'], obj['Fp20'], obj['Fp50']

                # If we are running with linfit enabled, add an extra feature
                # which is the absoulte value of the fitted linear trend, divided
                # with the point-to-point scatter:
                if self.linfit:
                    slope_feature = np.abs(
                        obj['detrend_coeff'][0]) / obj['ptp']
                    featout[k, NFREQUENCIES + 16] = slope_feature

        return featout
Пример #35
0
def bincount(x, weights=None, max_val=None, minlength=0):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.

    Parameters
    ----------
    x : array_like, 1 dimension, nonnegative ints
        Input array.
    weights : array_like, optional
        Weights, array of the same shape as x.
    max_val : int, optional
        Indicates the maximum value we expect to find in X and sets the result
        array size accordingly. E.g. if we set `max_val=2` yet the largest
        value in X is 1, the result will contain a bin for the value 2, and
        will be set to 0. See examples for usage.
    minlength : int, optional
        A minimum number of bins for the output array. See numpy docs for info.

    Returns
    -------
    Tuple[np.ndarray, int]
        Returns the bincounts and the number of NaN values.

    Examples
    --------
    In case `max_val` is provided, the return shape includes bins for these
    values as well, even if they do not appear in the data. However, this will
    not truncate the bincount if values larger than `max_count` are found.
    >>> bincount([0, 0, 1, 1, 2], max_val=4)
    (array([ 2.,  2.,  1.,  0.,  0.]), 0.0)
    >>> bincount([0, 1, 2, 3, 4], max_val=2)
    (array([ 1.,  1.,  1.,  1.,  1.]), 0.0)

    """
    # Store the original matrix before any manipulation to check for sparse
    x_original = x
    if sp.issparse(x):
        if weights is not None:
            # Match weights and x axis so `indices` will be set appropriately
            if x.shape[0] == weights.shape[0]:
                x = x.tocsc()
            elif x.shape[1] == weights.shape[0]:
                x = x.tocsr()

            zero_weights = sparse_implicit_zero_weights(x, weights).sum()
            weights = weights[x.indices]
        else:
            zero_weights = sparse_count_implicit_zeros(x)

        x = x.data

    x = np.asanyarray(x)
    if x.dtype.kind == 'f' and bn.anynan(x):
        nonnan = ~np.isnan(x)
        x = x[nonnan]
        if weights is not None:
            nans = (~nonnan * weights).sum(axis=0)
            weights = weights[nonnan]
        else:
            nans = (~nonnan).sum(axis=0)
    else:
        nans = 0. if x.ndim == 1 else np.zeros(x.shape[1], dtype=float)

    if minlength == 0 and max_val is not None:
        minlength = max_val + 1

    bc = np.bincount(
        x.astype(np.int32, copy=False), weights=weights, minlength=minlength
    ).astype(float)
    # Since `csr_matrix.values` only contain non-zero values or explicit
    # zeros, we must count implicit zeros separately and add them to the
    # explicit ones found before
    if sp.issparse(x_original):
        # If x contains only NaNs, then bc will be an empty array
        if zero_weights and bc.size == 0:
            bc = [zero_weights]
        elif zero_weights:
            bc[0] += zero_weights

    return bc, nans
Пример #36
0
def plot_image(image,
               ax=None,
               scale='log',
               cmap=None,
               origin='lower',
               xlabel=None,
               ylabel=None,
               cbar=None,
               clabel='Flux ($e^{-}s^{-1}$)',
               cbar_ticks=None,
               cbar_ticklabels=None,
               cbar_pad=None,
               cbar_size='5%',
               title=None,
               percentile=95.0,
               vmin=None,
               vmax=None,
               offset_axes=None,
               color_bad='k',
               **kwargs):
    """
	Utility function to plot a 2D image.

	Parameters:
		image (2d array): Image data.
		ax (matplotlib.pyplot.axes, optional): Axes in which to plot.
			Default (None) is to use current active axes.
		scale (str or :py:class:`astropy.visualization.ImageNormalize` object, optional):
			Normalization used to stretch the colormap.
			Options: ``'linear'``, ``'sqrt'``, ``'log'``, ``'asinh'``, ``'histeq'``, ``'sinh'``
			and ``'squared'``.
			Can also be a :py:class:`astropy.visualization.ImageNormalize` object.
			Default is ``'log'``.
		origin (str, optional): The origin of the coordinate system.
		xlabel (str, optional): Label for the x-axis.
		ylabel (str, optional): Label for the y-axis.
		cbar (string, optional): Location of color bar.
			Choises are ``'right'``, ``'left'``, ``'top'``, ``'bottom'``.
			Default is not to create colorbar.
		clabel (str, optional): Label for the color bar.
		cbar_size (float, optional): Fractional size of colorbar compared to axes. Default=0.03.
		cbar_pad (float, optional): Padding between axes and colorbar.
		title (str or None, optional): Title for the plot.
		percentile (float, optional): The fraction of pixels to keep in color-trim.
			If single float given, the same fraction of pixels is eliminated from both ends.
			If tuple of two floats is given, the two are used as the percentiles.
			Default=95.
		cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap.
		vmin (float, optional): Lower limit to use for colormap.
		vmax (float, optional): Upper limit to use for colormap.
		color_bad (str, optional): Color to apply to bad pixels (NaN). Default is black.
		kwargs (dict, optional): Keyword arguments to be passed to :py:func:`matplotlib.pyplot.imshow`.

	Returns:
		:py:class:`matplotlib.image.AxesImage`: Image from returned
			by :py:func:`matplotlib.pyplot.imshow`.

	.. codeauthor:: Rasmus Handberg <*****@*****.**>
	"""

    logger = logging.getLogger(__name__)

    # Backward compatible settings:
    make_cbar = kwargs.pop('make_cbar', None)
    if make_cbar:
        raise FutureWarning("'make_cbar' is deprecated. Use 'cbar' instead.")
        if not cbar:
            cbar = make_cbar

    # Special treatment for boolean arrays:
    if isinstance(image, np.ndarray) and image.dtype == 'bool':
        if vmin is None: vmin = 0
        if vmax is None: vmax = 1
        if cbar_ticks is None: cbar_ticks = [0, 1]
        if cbar_ticklabels is None: cbar_ticklabels = ['False', 'True']

    # Calculate limits of color scaling:
    interval = None
    if vmin is None or vmax is None:
        if allnan(image):
            logger.warning("Image is all NaN")
            vmin = 0
            vmax = 1
            if cbar_ticks is None:
                cbar_ticks = []
            if cbar_ticklabels is None:
                cbar_ticklabels = []
        elif isinstance(percentile, (list, tuple, np.ndarray)):
            interval = viz.AsymmetricPercentileInterval(
                percentile[0], percentile[1])
        else:
            interval = viz.PercentileInterval(percentile)

    # Create ImageNormalize object with extracted limits:
    if scale in ('log', 'linear', 'sqrt', 'asinh', 'histeq', 'sinh',
                 'squared'):
        if scale == 'log':
            stretch = viz.LogStretch()
        elif scale == 'linear':
            stretch = viz.LinearStretch()
        elif scale == 'sqrt':
            stretch = viz.SqrtStretch()
        elif scale == 'asinh':
            stretch = viz.AsinhStretch()
        elif scale == 'histeq':
            stretch = viz.HistEqStretch(image[np.isfinite(image)])
        elif scale == 'sinh':
            stretch = viz.SinhStretch()
        elif scale == 'squared':
            stretch = viz.SquaredStretch()

        # Create ImageNormalize object. Very important to use clip=False if the image contains
        # NaNs, otherwise NaN points will not be plotted correctly.
        norm = viz.ImageNormalize(data=image[np.isfinite(image)],
                                  interval=interval,
                                  vmin=vmin,
                                  vmax=vmax,
                                  stretch=stretch,
                                  clip=not anynan(image))

    elif isinstance(scale, (viz.ImageNormalize, matplotlib.colors.Normalize)):
        norm = scale
    else:
        raise ValueError("scale {} is not available.".format(scale))

    if offset_axes:
        extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5,
                  offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5)
    else:
        extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5)

    if ax is None:
        ax = plt.gca()

    # Set up the colormap to use. If a bad color is defined,
    # add it to the colormap:
    if cmap is None:
        cmap = copy.copy(plt.get_cmap('Blues'))
    elif isinstance(cmap, str):
        cmap = copy.copy(plt.get_cmap(cmap))

    if color_bad:
        cmap.set_bad(color_bad, 1.0)

    # Plotting the image using all the settings set above:
    im = ax.imshow(image,
                   cmap=cmap,
                   norm=norm,
                   origin=origin,
                   extent=extent,
                   interpolation='nearest',
                   **kwargs)

    if xlabel is not None:
        ax.set_xlabel(xlabel)
    if ylabel is not None:
        ax.set_ylabel(ylabel)
    if title is not None:
        ax.set_title(title)
    ax.set_xlim([extent[0], extent[1]])
    ax.set_ylim([extent[2], extent[3]])

    if cbar:
        colorbar(im,
                 ax=ax,
                 loc=cbar,
                 size=cbar_size,
                 pad=cbar_pad,
                 label=clabel,
                 ticks=cbar_ticks,
                 ticklabels=cbar_ticklabels)

    # Settings for ticks:
    integer_locator = MaxNLocator(nbins=10, integer=True)
    ax.xaxis.set_major_locator(integer_locator)
    ax.xaxis.set_minor_locator(integer_locator)
    ax.yaxis.set_major_locator(integer_locator)
    ax.yaxis.set_minor_locator(integer_locator)
    ax.tick_params(which='both', direction='out', pad=5)
    ax.xaxis.tick_bottom()
    ax.yaxis.tick_left()

    return im
 def _fix_missing():
     nonlocal data
     if not metric.supports_missing and bn.anynan(data.X):
         self.Warning.imputing_data()
         data = distance.impute(data)
Пример #38
0
	def update(self):
		#get merge-extract
		for n,m in enumerate(self.show_merge):
			if self.show_merge_as_density[m]:
				self.merge_extract = self.densityMatrix[m][tuple(self.basis_dim_plot_range)]
			else:
				self.merge_extract = self.mergeMatrix[m][tuple(self.basis_dim_plot_range)]
			for b in range(len(self._basis_dim)-1,-1,-1):
				#basis dim to concentrate
				if b not in self.show_basis:
					pos_corr = self.concentrate_basis_dim[:b].count("pos")
					if self.concentrate_basis_dim[b] == "sum":
						self.merge_extract = bn.nansum(self.merge_extract,b-pos_corr)
					elif self.concentrate_basis_dim[b] == "mean":
						self.merge_extract = bn.nanmean(self.merge_extract,b-pos_corr)
					elif self.concentrate_basis_dim[b] == "max":
						self.merge_extract = bn.nanmax(self.merge_extract,b-pos_corr)
					elif self.concentrate_basis_dim[b] == "min":
						self.merge_extract = bn.nanmin(self.merge_extract,b-pos_corr)

			for b in range(len(self._basis_dim)-2,-1,-1):
				# check from end to start whether to roll-axis
				# the time-axis has to be the last one
				# dont roll the last basis-dim (start with len(self._basis_dim)-2 )
				basis_time_index = None
				if b not in self.show_basis and self.concentrate_basis_dim[b] == "time":
					#reshape the matrix
					self.merge_extract = np.rollaxis(self.merge_extract,b,0)
					basis_time_index = b
					break # dont needto continue iterating because only one dim can be 'time'

			if len(self.show_basis) == 1:
				basis_extract = self.basisMatrix[self.show_basis[0]][self.basis_dim_plot_range[self.show_basis[0]]]

				if self.scale_plot == True:
					self.plot.enableAutoRange('xy', True)
				else:
					if self.enableAutoRangeX:
						self.plot.enableAutoRange('x', True)
						#self.plot.setXRange(
							#self._basis_dim[self.show_basis[0]]._include_range[0],
							#self._basis_dim[self.show_basis[0]]._include_range[1])
					if self.enableAutoRangeY:
						self.plot.enableAutoRange('y', True)

				if self.transpose_axes:
					self.curves[n].setData(self.merge_extract, basis_extract)
				else:
					self.curves[n].setData(basis_extract, self.merge_extract)

			elif len(self.show_basis) >=2:
				#calc scale and zero-position for axes-tics
				x0=self._basis_dim[self.show_basis[0]]._include_range[0]
				x1=self._basis_dim[self.show_basis[0]]._include_range[1]
				y0=self._basis_dim[self.show_basis[1]]._include_range[0]
				y1=self._basis_dim[self.show_basis[1]]._include_range[1]
				xscale = (x1-x0) / self._basis_dim[self.show_basis[0]].resolution
				yscale = (y1-y0) / self._basis_dim[self.show_basis[1]].resolution
				args = {'pos':[x0, y0], 'scale':[xscale, yscale]}
				if self.transpose_axes:
					args = {'pos':[y0, x0], 'scale':[yscale, xscale]}

				#set time-ticks
				if basis_time_index != None:
					args["xvals"] = self.basisMatrix[basis_time_index]

				if self.enableAutoRangeX:
					self.view.enableAutoRange('x', True)
					#self.view.setXRange(**tuple(self._basis_dim[self.show_basis[0]]._include_range))#[0],
						#self._basis_dim[self.show_basis[0]]._include_range[1])
				if self.enableAutoRangeY:
					self.view.enableAutoRange('y', True)

				#bydefault autoLevel (the colorlevel of the merge-dims) == True
				#(calc. by pyqtgraph)
				#thus it only can process array without nan-values the calc. colorlevel
				#is wrong when the real values are boyond the nan-replacement(zero)
				#therefore i calc the colorlevel by my self in case nans arein the array:
				anynan = bn.anynan(self.merge_extract)
				if anynan:
					
					mmin = bn.nanmin(self.merge_extract)
					mmax = bn.nanmax(self.merge_extract)
					if np.isnan(mmin):
						mmin,mmax=0,0
					self.plot.setLevels(mmin, mmax)
					args["autoLevels"]= False
					##the following line dont work with my version of pyQtGraph
					#args["levels"]= [mmin,mmax]#np.nanmin(merge_extract), np.nanmax(merge_extract))
				self.merge_extract = _utils.nanToZeros(self.merge_extract)

				if self.transpose_axes:
					self.plot.setImage(self.merge_extract.transpose(),
						autoRange=self.scale_plot,**args)
				else:
					self.plot.setImage(self.merge_extract,
						autoRange=self.scale_plot,**args)
				if anynan: # scale the histogramm to the new range
					self.plot.ui.histogram.vb.setYRange(mmin,mmax)

		self.scale_plot = False
Пример #39
0
def bincount(x, weights=None, max_val=None, minlength=0):
    """Return counts of values in array X.

    Works kind of like np.bincount(), except that it also supports floating
    arrays with nans.

    Parameters
    ----------
    x : array_like, 1 dimension, nonnegative ints
        Input array.
    weights : array_like, optional
        Weights, array of the same shape as x.
    max_val : int, optional
        Indicates the maximum value we expect to find in X and sets the result
        array size accordingly. E.g. if we set `max_val=2` yet the largest
        value in X is 1, the result will contain a bin for the value 2, and
        will be set to 0. See examples for usage.
    minlength : int, optional
        A minimum number of bins for the output array. See numpy docs for info.

    Returns
    -------
    Tuple[np.ndarray, int]
        Returns the bincounts and the number of NaN values.

    Examples
    --------
    In case `max_val` is provided, the return shape includes bins for these
    values as well, even if they do not appear in the data. However, this will
    not truncate the bincount if values larger than `max_count` are found.
    >>> bincount([0, 0, 1, 1, 2], max_val=4)
    (array([ 2.,  2.,  1.,  0.,  0.]), 0.0)
    >>> bincount([0, 1, 2, 3, 4], max_val=2)
    (array([ 1.,  1.,  1.,  1.,  1.]), 0.0)

    """
    # Store the original matrix before any manipulation to check for sparse
    x_original = x
    if sp.issparse(x):
        if weights is not None:
            # Match weights and x axis so `indices` will be set appropriately
            if x.shape[0] == weights.shape[0]:
                x = x.tocsc()
            elif x.shape[1] == weights.shape[0]:
                x = x.tocsr()

            zero_weights = sparse_implicit_zero_weights(x, weights).sum()
            weights = weights[x.indices]
        else:
            zero_weights = sparse_count_implicit_zeros(x)

        x = x.data

    x = np.asanyarray(x)
    if x.dtype.kind == 'f' and bn.anynan(x):
        nonnan = ~np.isnan(x)
        x = x[nonnan]
        if weights is not None:
            nans = (~nonnan * weights).sum(axis=0)
            weights = weights[nonnan]
        else:
            nans = (~nonnan).sum(axis=0)
    else:
        nans = 0. if x.ndim == 1 else np.zeros(x.shape[1], dtype=float)

    if minlength == 0 and max_val is not None:
        minlength = max_val + 1

    bc = np.bincount(x.astype(np.int32, copy=False),
                     weights=weights,
                     minlength=minlength).astype(float)
    # Since `csr_matrix.values` only contain non-zero values or explicit
    # zeros, we must count implicit zeros separately and add them to the
    # explicit ones found before
    if sp.issparse(x_original):
        # If x contains only NaNs, then bc will be an empty array
        if zero_weights and bc.size == 0:
            bc = [zero_weights]
        elif zero_weights:
            bc[0] += zero_weights

    return bc, nans
Пример #40
0
    def train(self, tset, savecl=True, overwrite=False):
        """
		Train the Meta-classifier.

		Parameters:
			tset (:class:`TrainingSet`): Training set to train classifier on.
			savecl (bool, optional): Save the classifier to file?
			overwrite (bool, optional): Overwrite existing classifer save file.

		.. codeauthor:: James S. Kuszlewicz <*****@*****.**>
		.. codeauthor:: Rasmus Handberg <*****@*****.**>
		"""
        # Start a logger that should be used to output e.g. debug information:
        logger = logging.getLogger(__name__)

        # Check for pre-calculated features
        fitlabels = self.parse_labels(tset.labels())

        # First create list of all possible classifiers:
        all_classifiers = list(classifier_list)
        all_classifiers.remove('meta')

        # Create list of all features:
        # Save this to object, we are using it to keep track of which features were used
        # to train the classifier:
        self.features_used = list(
            itertools.product(all_classifiers, self.StellarClasses))
        self.features_names = [
            f'{classifier:s}_{stcl.name:s}'
            for classifier, stcl in self.features_used
        ]

        # Create table of features:
        # Create as float32, since that is what RandomForestClassifier converts it to anyway.
        logger.info("Importing features...")
        features = self.build_features_table(tset.features(), total=len(tset))

        # Remove columns that are all NaN:
        # This can be classifiers that never returns a given class or a classifier that
        # has not been run at all.
        keepcols = ~allnan(features, axis=0)
        features = features[:, keepcols]
        self.features_used = [
            x for i, x in enumerate(self.features_used) if keepcols[i]
        ]
        self.features_names = [
            x for i, x in enumerate(self.features_names) if keepcols[i]
        ]

        # Throw an error if a classifier is not run at all:
        run_classifiers = set([fu[0] for fu in self.features_used])
        if run_classifiers != set(all_classifiers):
            raise RuntimeError(
                "Classifier did not contribute at all: %s" %
                set(all_classifiers).difference(run_classifiers))

        # Raise an exception if there are NaNs left in the features:
        if anynan(features):
            raise ValueError("Features contains NaNs")

        logger.info("Features imported. Shape = %s", features.shape)

        # Run actual training:
        self.classifier.oob_score = True
        logger.info("Fitting model.")
        self.classifier.fit(features, fitlabels)
        logger.info('Trained. OOB Score = %s', self.classifier.oob_score_)
        self.classifier.trained = True

        if savecl and self.classifier.trained and self.clfile is not None:
            if overwrite or not os.path.exists(self.clfile):
                logger.info("Saving pickled classifier instance to '%s'",
                            self.clfile)
                self.save(self.clfile)