def _filter_is_defined(self, columns=None, negate=False): """ Extract rows without undefined values. :param columns: optional list of columns that are checked for unknowns :type columns: sequence of ints, variable names or descriptors :param negate: invert the selection :type negate: bool :return: a new Table :rtype: Orange.data.Table """ if columns is None: if sp.issparse(self.X): remove = (self.X.indptr[1:] != self.X.indptr[-1:] + self.X.shape[1]) else: remove = bn.anynan(self.X, axis=1) if sp.issparse(self.Y): remove = np.logical_or(remove, self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1]) else: remove = np.logical_or(remove, bn.anynan(self.Y, axis=1)) else: remove = np.zeros(len(self), dtype=bool) for column in columns: col, sparse = self.get_column_view(column) if sparse: remove = np.logical_or(remove, col == 0) else: remove = np.logical_or(remove, bn.anynan(col)) retain = remove if negate else np.logical_not(remove) return Table.from_table_rows(self, retain)
def test_aperturephotometry(SHARED_INPUT_DIR, datasource): with TemporaryDirectory() as OUTPUT_DIR: with AperturePhotometry(DUMMY_TARGET, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, **DUMMY_KWARG) as pho: pho.photometry() filepath = pho.save_lightcurve() print(pho.lightcurve) # It should set the status to one of these: assert (pho.status in (STATUS.OK, STATUS.WARNING)) # Check the sumimage: plt.figure() plot_image(pho.sumimage, title=datasource) assert not anynan(pho.sumimage), "There are NaNs in the SUMIMAGE" # They shouldn't be exactly zero: assert not np.all(pho.lightcurve['flux'] == 0) assert not np.all(pho.lightcurve['flux_err'] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:, 0] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:, 1] == 0) # They shouldn't be NaN (in this case!): assert not allnan(pho.lightcurve['flux']) assert not allnan(pho.lightcurve['flux_err']) assert not allnan(pho.lightcurve['pos_centroid'][:, 0]) assert not allnan(pho.lightcurve['pos_centroid'][:, 1]) assert not np.any(~np.isfinite(pho.lightcurve['time'])) assert not np.any(pho.lightcurve['time'] == 0) # Test the outputted FITS file: with fits.open(filepath, mode='readonly') as hdu: # Should be the same vectors in FITS as returned in Table: np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME']) np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR']) np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW']) np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR']) np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO']) # Test FITS aperture image: ap = hdu['APERTURE'].data print(ap) assert np.all(pho.aperture == ap), "Aperture image mismatch" assert not anynan(ap), "NaN in aperture image" assert np.all(ap >= 0), "Negative values in aperture image" assert np.any(ap & 2 != 0), "No photometric mask set" assert np.any(ap & 8 != 0), "No position mask set"
def __call__(self, data): if isinstance(data, Instance): return self.negate == bn.anynan(data._y) if isinstance(data, Storage): try: return data._filter_has_class(self.negate) except NotImplementedError: pass r = np.fromiter((not bn.anynan(inst._y) for inst in data), bool, len(data)) if self.negate: r = np.logical_not(r) return data[r]
def __call__(self, data): if isinstance(data, Instance): return self.negate == bn.anynan(data._x) if isinstance(data, Storage): try: return data._filter_is_defined(self.columns, self.negate) except NotImplementedError: pass r = np.fromiter((not bn.anynan(inst._x) for inst in data), dtype=bool, count=len(data)) if self.negate: r = np.logical_not(r) return data[r]
def process_stack(data, xat, yat, upsample_factor=100, use_sobel=False, ref_frame_num=0): hypercube, lsx, lsy = get_hypercube(data, xat, yat) if bn.anynan(hypercube): raise NanInsideHypercube(True) calculate_shift = RegisterTranslation(upsample_factor=upsample_factor) filterfn = sobel if use_sobel else lambda x: x shifts, aligned_stack = alignstack(hypercube.T, shiftfn=calculate_shift, ref_frame_num=ref_frame_num, filterfn=filterfn) xmin, ymin = shifts[:, 0].min(), shifts[:, 1].min() xmax, ymax = shifts[:, 0].max(), shifts[:, 1].max() xmin, xmax = int(round(xmin)), int(round(xmax)) ymin, ymax = int(round(ymin)), int(round(ymax)) shape = hypercube.shape slicex = slice(max(xmax, 0), min(shape[1], shape[1] + xmin)) slicey = slice(max(ymax, 0), min(shape[0], shape[0] + ymin)) cropped = np.array(aligned_stack).T[slicey, slicex] # transform numpy array back to Orange.data.Table return shifts, build_spec_table( *_spectra_from_image(cropped, getx(data), np.linspace(*lsx)[slicex], np.linspace(*lsy)[slicey]))
def bincount(X, max_val=None, weights=None, minlength=None): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. """ if sp.issparse(X): minlength = max_val + 1 bin_weights = weights[X.indices] if weights is not None else None return (np.bincount( X.data.astype(int), weights=bin_weights, minlength=minlength, ), _count_nans_per_row_sparse(X, weights)) X = np.asanyarray(X) if X.dtype.kind == 'f' and bn.anynan(X): nonnan = ~np.isnan(X) X = X[nonnan] if weights is not None: nans = (~nonnan * weights).sum(axis=0) weights = weights[nonnan] else: nans = (~nonnan).sum(axis=0) else: nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float) if minlength is None and max_val is not None: minlength = max_val + 1 bc = np.array([]) if minlength is not None and minlength <= 0 else \ np.bincount(X.astype(np.int32, copy=False), weights=weights, minlength=minlength).astype(float) return bc, nans
def checks(metric, data): if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data() return return data
def transformed(self, X, x): newd = np.zeros_like(X) for rowi, row in enumerate(X): # remove NaNs which ConvexHull can not handle source = np.column_stack((x, row)) source = source[~bottleneck.anynan(source, axis=1)] try: v = ConvexHull(source).vertices except (QhullError, ValueError): # FIXME notify user baseline = np.zeros_like(row) else: if self.peak_dir == RubberbandBaseline.PeakPositive: v = np.roll(v, -v.argmin()) v = v[:v.argmax() + 1] elif self.peak_dir == RubberbandBaseline.PeakNegative: v = np.roll(v, -v.argmax()) v = v[:v.argmin() + 1] # If there are NaN values at the edges of data then convex hull # does not include the endpoints. Because the same values are also # NaN in the current row, we can fill them with NaN (bounds_error # achieves this). baseline = interp1d(source[v, 0], source[v, 1], bounds_error=False)(x) finally: if self.sub == 0: newd[rowi] = row - baseline else: newd[rowi] = baseline return newd
def bincount(X, max_val=None, weights=None, minlength=None): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. """ if sp.issparse(X): minlength = max_val + 1 bin_weights = weights[X.indices] if weights is not None else None return (np.bincount(X.data.astype(int), weights=bin_weights, minlength=minlength, ), _count_nans_per_row_sparse(X, weights)) X = np.asanyarray(X) if X.dtype.kind == 'f' and bn.anynan(X): nonnan = ~np.isnan(X) X = X[nonnan] if weights is not None: nans = (~nonnan * weights).sum(axis=0) weights = weights[nonnan] else: nans = (~nonnan).sum(axis=0) else: nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float) if minlength is None and max_val is not None: minlength = max_val + 1 return (np.bincount(X.astype(np.int32, copy=False), weights=weights, minlength=minlength).astype(float), nans)
def feature_extract(features, featnames, total=None, linflatten=False, recalc=False): if isinstance(features, dict): features = [features] if total is None: total = len(features) featout = np.empty([total, len(featnames)], dtype='float32') for k, obj in enumerate(features): # Load features from the provided (cached) features if they exist: featout[k, :] = np.array([obj.get(key, np.NaN) for key in featnames], dtype='float32') if recalc or anynan(featout[k, :]): # TODO: Why is it needed to re-normalize the lightcurve here? lc = RF_GC_featcalc.prepLCs(obj['lightcurve'], linflatten) featout[k, 0] = ss.skew(lc.flux) # Skewness featout[k, 1] = ss.kurtosis(lc.flux) # Kurtosis featout[k, 2] = ss.shapiro( lc.flux)[0] # Shapiro-Wilk test statistic for normality featout[k, 3] = calculate_eta(lc) periods, n_usedfreqs, usedfreqs = get_periods( obj, 6, lc.time, ignore_harmonics=False) amp21, amp31 = RF_GC_featcalc.freq_ampratios( obj, n_usedfreqs, usedfreqs) pd21, pd31 = RF_GC_featcalc.freq_phasediffs( obj, n_usedfreqs, usedfreqs) featout[k, 4] = periods[0] if n_usedfreqs > 0: featout[k, 5] = obj['frequencies'][ (obj['frequencies']['num'] == 1) & (obj['frequencies']['harmonic'] == 0)]['amplitude'] else: featout[k, 5] = 0. featout[k, 6] = amp21 featout[k, 7] = amp31 featout[k, 8] = pd21 featout[k, 9] = pd31 # phase-fold lightcurve on dominant period folded_lc = lc.fold(period=periods[0]) # Compute phi_rcs and rcs features featout[k, 10] = Rcs(lc) featout[k, 11] = Rcs(folded_lc) # If the amp1 features is NaN, replace it with zero: if np.isnan(featout[k, 5]): featout[k, 5] = 0 return featout
def _filter_is_defined(self, columns=None, negate=False): if columns is None: if sp.issparse(self.X): remove = self.X.indptr[1:] != self.X.indptr[-1:] + self.X.shape[1] else: remove = bn.anynan(self.X, axis=1) if sp.issparse(self.Y): remove = np.logical_or(remove, self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1]) else: remove = np.logical_or(remove, bn.anynan(self.Y, axis=1)) else: remove = np.zeros(len(self), dtype=bool) for column in columns: col, sparse = self.get_column_view(column) if sparse: remove = np.logical_or(remove, col == 0) else: remove = np.logical_or(remove, bn.anynan(col)) retain = remove if negate else np.logical_not(remove) return Table.from_table_rows(self, retain)
def _filter_has_class(self, negate=False): if sp.issparse(self.Y): if negate: retain = self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1] else: retain = self.Y.indptr[1:] == self.Y.indptr[-1:] + self.Y.shape[1] else: retain = bn.anynan(self.Y, axis=1) if not negate: retain = np.logical_not(retain) return Table.from_table_rows(self, retain)
def nan_extend_edges_and_interpolate(xs, X): """ Handle NaNs at the edges are handled as with savgol_filter mode nearest: the edge values are interpolated. NaNs in the middle are interpolated so that they do not propagate. """ nans = None if bottleneck.anynan(X): nans = np.isnan(X) X = X.copy() xs, xsind, mon, X = transform_to_sorted_wavenumbers(xs, X) fill_edges(X) X = interp1d_with_unknowns_numpy(xs[xsind], X, xs[xsind]) X = transform_back_to_features(xsind, mon, X) return X, nans
def featcalc(self, features, total=None, recalc=False): """ Calculates features for set of lightcurves """ if isinstance(features, dict): # trick for single features features = [features] if total is None: total = len(features) featout = np.empty([total, len(self.features_names)], dtype='float32') for k, obj in enumerate(features): # Load features from the provided (cached) features if they exist: featout[k, :] = [ obj.get(key, np.NaN) for key in self.features_names ] # If not all features are already populated, we are going to recalculate them all: if recalc or anynan(featout[k, :]): lc = fc.prepLCs(obj['lightcurve'], linflatten=False) periods, _, _ = get_periods(obj, NFREQUENCIES, lc.time, in_days=False) featout[k, :NFREQUENCIES] = periods #EBper = EBperiod(lc.time, lc.flux, periods[0], linflatten=linflatten-1) #featout[k, 0] = EBper # overwrites top period featout[k, NFREQUENCIES:NFREQUENCIES + 2] = fc.compute_varrat(obj) #featout[k, NFREQUENCIES+1:NFREQUENCIES+2] = fc.compute_lpf1pa11(obj) featout[k, NFREQUENCIES + 2:NFREQUENCIES + 3] = stat.skew(lc.flux) featout[k, NFREQUENCIES + 3:NFREQUENCIES + 4] = fc.compute_flux_ratio(lc.flux) featout[k, NFREQUENCIES + 4:NFREQUENCIES + 5] = fc.compute_differential_entropy(lc.flux) featout[k, NFREQUENCIES + 5:NFREQUENCIES + 6] = fc.compute_differential_entropy( obj['powerspectrum'].standard[1]) featout[k, NFREQUENCIES + 6:NFREQUENCIES + 10] = fc.compute_multiscale_entropy(lc.flux) #featout[k, NFREQUENCIES+10:NFREQUENCIES+11] = fc.compute_max_lyapunov_exponent(lc.flux) return featout
def compute_distances(self, metric, data): self.clear_messages() if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data() return if isinstance(metric, distance.MahalanobisDistance): n, m = data.X.shape if self.axis == 1: n, m = m, n if isinstance(metric, distance.MahalanobisDistance): # Mahalanobis distance has to be trained before it can be used # to compute distances try: metric.fit(data, axis=1 - self.axis) except (ValueError, MemoryError) as e: self.Error.mahalanobis_error(e) return return metric(data, data, 1 - self.axis, impute=True)
def compute_distances(self, metric, data): self.clear_messages() if data is None: return if issparse(data.X) and not metric.supports_sparse: self.Error.dense_metric_sparse_data() return if not any(a.is_continuous for a in data.domain.attributes): self.Error.no_continuous_features() return needs_preprocessing = False if any(a.is_discrete for a in self.data.domain.attributes): self.Warning.ignoring_discrete() needs_preprocessing = True if not issparse(data.X) and bn.anynan(data.X): self.Warning.imputing_data() needs_preprocessing = True if needs_preprocessing: # removes discrete features and imputes data data = distance._preprocess(data) if not data.X.size: self.Error.empty_data(data.X.shape) return if isinstance(metric, distance.MahalanobisDistance): n, m = data.X.shape if self.axis == 1: n, m = m, n if n <= m: self.Error.too_few_observations() return if isinstance(metric, distance.MahalanobisDistance): # Mahalanobis distance has to be trained before it can be used # to compute distances metric.fit(data, axis=1 - self.axis) return metric(data, data, 1 - self.axis, impute=True)
def test_halo(SHARED_INPUT_DIR, datasource): with TemporaryDirectory() as OUTPUT_DIR: with HaloPhotometry(267211065, SHARED_INPUT_DIR, OUTPUT_DIR, plot=True, datasource=datasource, sector=1, camera=3, ccd=2) as pho: pho.photometry() filepath = pho.save_lightcurve() print( pho.lightcurve ) # It should set the status to one of these: print(pho.status) assert pho.status in (STATUS.OK, STATUS.WARNING) # They shouldn't be exactly zero: assert not np.all(pho.lightcurve['flux'] == 0) assert not np.all(pho.lightcurve['flux_err'] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:,0] == 0) assert not np.all(pho.lightcurve['pos_centroid'][:,1] == 0) # They shouldn't be NaN (in this case!): assert not allnan(pho.lightcurve['flux']) assert not allnan(pho.lightcurve['flux_err']) assert not allnan(pho.lightcurve['pos_centroid'][:,0]) assert not allnan(pho.lightcurve['pos_centroid'][:,1]) # Test the outputted FITS file: with fits.open(filepath, mode='readonly') as hdu: # Should be the same vectors in FITS as returned in Table: np.testing.assert_allclose(pho.lightcurve['time'], hdu[1].data['TIME']) np.testing.assert_allclose(pho.lightcurve['timecorr'], hdu[1].data['TIMECORR']) np.testing.assert_allclose(pho.lightcurve['flux'], hdu[1].data['FLUX_RAW']) np.testing.assert_allclose(pho.lightcurve['flux_err'], hdu[1].data['FLUX_RAW_ERR']) np.testing.assert_allclose(pho.lightcurve['cadenceno'], hdu[1].data['CADENCENO']) # Test FITS aperture image: ap = hdu['APERTURE'].data print(ap) assert np.all(pho.aperture == ap), "Aperture image mismatch" assert not anynan(ap), "NaN in aperture image" assert np.all(ap >= 0), "Negative values in aperture image" assert np.any(ap & 2 != 0), "No photometric mask set" #assert np.any(ap & 8 != 0), "No position mask set" print("Passed Tests for %s" % datasource)
def freq_counts(self, arrs, lens): """ Calculates frequencies of samples. Parameters ---------- arrs A sequence of arrays. lens A sequence of number of distinct values in arrays. Returns ------- numpy.ndarray A 1D numpy array of frequencies. """ no_nans = reduce(np.logical_and, [~np.isnan(a) if bn.anynan(a) else np.ones(self.m).astype(bool) for a in arrs]) combined = reduce(add, [arrs[i][no_nans]*reduce(mul, lens[:i]) for i in range(1, len(arrs))], arrs[0][no_nans]) return np.bincount(combined.astype(np.int32, copy=False), minlength=reduce(mul, lens)).astype(float)
def bincount(X, max_val=None, weights=None, minlength=None): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. """ X = np.asanyarray(X) if X.dtype.kind == 'f' and bn.anynan(X): nonnan = ~np.isnan(X) nans = (~nonnan).sum(axis=0) X = X[nonnan] if weights is not None: weights = weights[nonnan] else: nans = 0. if X.ndim == 1 else np.zeros(X.shape[1], dtype=float) if minlength is None and max_val is not None: minlength = max_val + 1 return (np.bincount(X.astype(np.int32, copy=False), weights=weights, minlength=minlength).astype(float), nans)
def bincount(X, max_val=None, weights=None, minlength=None): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. """ X = np.asanyarray(X) if X.dtype.kind == 'f' and bn.anynan(X): nonnan = ~np.isnan(X) nans = (~nonnan).sum(axis=0) X = X[nonnan] if weights is not None: weights = weights[nonnan] else: nans = 0 if X.ndim == 1 else np.zeros(X.shape[1]) if minlength is None and max_val is not None: minlength = max_val + 1 return (np.bincount(X.astype(np.int32, copy=False), weights=weights, minlength=minlength), nans)
def argnanmedoid(x, axis=1): """ Return the indices of the medoid :param x: input array :param axis: axis to medoid along :return: indices of the medoid """ if axis == 0: x = x.T invalid = anynan(x, axis=0) band, time = x.shape diff = x.reshape(band, time, 1) - x.reshape(band, 1, time) dist = np.sqrt(np.sum( diff * diff, axis=0)) # dist = np.linalg.norm(diff, axis=0) is slower somehow... dist_sum = nansum(dist, axis=0) dist_sum[invalid] = np.inf i = np.argmin(dist_sum) return i
def medoid_indices(arr, invalid=None): """ The indices of the medoid. :arg arr: input array :arg invalid: mask for invalid data containing NaNs """ # vectorized version of `argnanmedoid` bands, times, ys, xs = arr.shape diff = (arr.reshape(bands, times, 1, ys, xs) - arr.reshape(bands, 1, times, ys, xs)) dist = np.linalg.norm(diff, axis=0) dist_sum = nansum(dist, axis=0) if invalid is None: # compute it in case it's not already available invalid = anynan(arr, axis=0) dist_sum[invalid] = np.inf return np.argmin(dist_sum, axis=0)
def _filter_has_class(self, negate=False): """ Return rows with known class attribute. If there are multiple classes, all must be defined. :param negate: invert the selection :type negate: bool :return: new table :rtype: Orange.data.Table """ if sp.issparse(self.Y): if negate: retain = (self.Y.indptr[1:] != self.Y.indptr[-1:] + self.Y.shape[1]) else: retain = (self.Y.indptr[1:] == self.Y.indptr[-1:] + self.Y.shape[1]) else: retain = bn.anynan(self.Y, axis=1) if not negate: retain = np.logical_not(retain) return Table.from_table_rows(self, retain)
def __call__(self, data): # convert to data domain if any conversion is possible, # otherwise we use the interpolator directly to make domains compatible if self.domain is not None and data.domain != self.domain \ and any(at.compute_value for at in self.domain.attributes): data = data.from_table(self.domain, data) x = getx(data) # removing whole NaN columns from the data will effectively replace # NaNs that are not on the edges with interpolated values ys = data.X if self.handle_nans: x, ys = remove_whole_nan_ys(x, ys) # relatively fast if len(x) == 0: return np.ones((len(data), len(self.points))) * np.nan interpfn = self.interpfn if interpfn is None: if self.handle_nans and bottleneck.anynan(ys): if self.kind == "linear": interpfn = interp1d_with_unknowns_numpy else: interpfn = interp1d_with_unknowns_scipy else: interpfn = interp1d_wo_unknowns_scipy return interpfn(x, ys, self.points, kind=self.kind)
def do_classify(self, features): """ Classify a single lightcurve. Parameters: features (dict): Dictionary of features. Returns: dict: Dictionary of stellar classifications. """ # Start a logger that should be used to output e.g. debug information: logger = logging.getLogger(__name__) if not self.classifier.trained: logger.error('Classifier has not been trained. Exiting.') raise ValueError('Classifier has not been trained. Exiting.') # Build features array from the probabilities from the other classifiers: # TODO: What about NaN values? logger.debug("Importing features...") featarray = self.build_features_table([features], total=1) if anynan(featarray): raise ValueError("Features contains NaNs") logger.debug("We are starting the magic...") # Comes out with shape (1,8), but instead want shape (8,) so squeeze classprobs = self.classifier.predict_proba(featarray).squeeze() logger.debug("Classification complete") # Format the output: result = {} for c, cla in enumerate(self.classifier.classes_): key = self.StellarClasses(cla) result[key] = classprobs[c] return result, featarray
def _fix_missing(): nonlocal data if not metric.supports_missing and bn.anynan(data.X): self.Warning.imputing_data() data = distance.impute(data)
def __call__(self, raw, cdf_attr): """input_translator convert raw netcdf variables into form used by the hsrl processing code and preforms pileup correction on photon counts""" if hasattr( raw, 'wfov_counts') and self.constants['wfov_type'] == 'molecular': raw.molecular_wfov_counts = raw.wfov_counts.copy() elif hasattr(raw, 'wfov_counts'): raw.combined_wfov_hi_counts = raw.wfov_counts.copy() if hasattr(raw, 'op_mode'): #extract i2 lock bit from operating mode #this will allow testing of bit even after averaging raw.i2_locked = (raw.op_mode[:].astype(int) & 4) / 4 if hasattr(raw, 'seeded_shots'): setattr( raw, 'delta_t', raw.seeded_shots[:, 0] / float(self.constants['laser_rep_rate'])) else: setattr(raw, 'delta_t', np.zeros([0])) #for i in np.arange(raw.times.size): # raw.times[i]-=timedelta(seconds=raw.delta_t[i]) if hasattr(raw, 'transmitted_energy'): # convert to mJ per preaveraged accumulation interval raw.transmitted_energy[:] = raw.transmitted_energy \ *self.constants['transmitted_energy_monitor'][0]\ +self.constants['transmitted_energy_monitor'][1]\ *raw.seeded_shots[:,0] #compute tranmitted power setattr(raw, 'transmitted_power', raw.transmitted_energy / raw.delta_t) if hasattr(raw, 'transmitted_1064_energy'): # convert to mJ per preaveraged accumulation interval raw.transmitted_1064_energy[:] = raw.transmitted_1064_energy \ *self.constants['transmitted_1064_energy_monitor'][0]\ +self.constants['transmitted_1064_energy_monitor'][1]\ *raw.seeded_shots[:,0] #compute tranmitted 1064 power setattr(raw, 'transmitted_1064_power', raw.transmitted_1064_energy / raw.delta_t) if hasattr(raw, 'filtered_energy'): if raw.filtered_energy.dtype == 'int32': raw.nonfiltered_energy = raw.nonfiltered_energy.astype( 'float64') raw.filtered_energy = raw.filtered_energy.astype('float64') if len(raw.filtered_energy.shape) == 1: raw.filtered_energy = raw.filtered_energy[:, np.newaxis] raw.nonfiltered_energy = raw.nonfiltered_energy[:, np.newaxis] raw.filtered_energy[raw.filtered_energy > 1e10] = np.NaN raw.nonfiltered_energy[raw.nonfiltered_energy > 1e10] = np.NaN if hasattr(raw, 'builduptime') and raw.builduptime.size > 0: raw.qswitch_buildup_time = raw.builduptime[:, 0] raw.min_qswitch_buildup_time = raw.builduptime[:, 1] raw.max_qswitch_buildup_time = raw.builduptime[:, 2] if hasattr(raw, 'superseedlasercontrollog'): raw.superseedlasercontrollog[ raw.superseedlasercontrollog > 1e10] = np.NaN if hasattr(raw,'energyRatioLockPoint') \ and raw.energyRatioLockPoint.size>0: if len(raw.energyRatioLockPoint.shape) == 2: raw.filtered_lockpoint = raw.energyRatioLockPoint[:, 0] raw.nonfiltered_lockpoint = raw.energyRatioLockPoint[:, 1] else: clarray = hau.T_Array(np.ones([raw.filtered_energy.shape[0]])) raw.filtered_lockpoint = clarray * raw.energyRatioLockPoint[0] raw.nonfiltered_lockpoint = clarray * raw.energyRatioLockPoint[ 1] if hasattr(raw, 'raw_analog_interferometertemperature'): thermistor_cal = self.constants['interferometer_temp_cal'] R = np.abs(raw.raw_analog_interferometertemperature / 0.000250) raw.interferometer_temp = 1/(thermistor_cal[0] + thermistor_cal[1] \ * np.log(R) + thermistor_cal[2] * np.log(R)** 3) - 273.15 ntemps = len(raw.interferometer_temp) if 0: #ntemps > 5: #do eleven point median filter temps = np.zeros((ntemps, 11)) temps[0:ntemps - 5, 0] = raw.interferometer_temp[5:] temps[0:ntemps - 4, 1] = raw.interferometer_temp[4:] temps[0:ntemps - 3, 2] = raw.interferometer_temp[3:] temps[0:ntemps - 2, 3] = raw.interferometer_temp[2:] temps[0:ntemps - 1, 4] = raw.interferometer_temp[1:] temps[:, 5] = raw.interferometer_temp temps[1:, 6] = raw.interferometer_temp[:ntemps - 1] temps[2:, 7] = raw.interferometer_temp[:ntemps - 2] temps[3:, 8] = raw.interferometer_temp[:ntemps - 3] temps[4:, 9] = raw.interferometer_temp[:ntemps - 4] temps[5:, 10] = raw.interferometer_temp[:ntemps - 5] raw.interferometer_temp = hau.T_Array(np.median(temps, 1)) else: raw.interferometer_temp = hau.T_Array(raw.interferometer_temp) if hasattr(raw, 'raw_analog_etalontemperature'): # convert etalon thermistor voltage to themistor resistance # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15 #(Steinhart Hart equation) # Where: # a = 0.000862448 # b = 0.000258456 # c = 0.000000142 # and # R = (Volts ADC Reading)/(0.000250 amps) thermistor_cal = self.constants['interferometer_temp_cal'] R = np.abs(raw.raw_analog_etalontemperature / 0.000250) raw.etalon_temp = (hau.T_Array( np.array( (1.0 / (thermistor_cal[0] + thermistor_cal[1] * np.log(R) + thermistor_cal[2] * np.log(R)**3) - 273.15), dtype=np.float32, ndmin=1))) if hasattr(raw, 'raw_analog_coolanttemperature'): # convert coolant thermistor voltage to themistor resistance # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15 #(Steinhart Hart equation) # Where: # a = 0.000862448 # b = 0.000258456 # c = 0.000000142 # and # R = (Volts ADC Reading)/(0.000250 amps) thermistor_cal = self.constants['interferometer_temp_cal'] R = np.abs(raw.raw_analog_coolanttemperature / 0.000250) raw.coolant_temperature = \ (hau.T_Array(np.array((1.0 / (thermistor_cal[0] + thermistor_cal[1] * np.log(R) + thermistor_cal[2] * np.log(R)** 3) - 273.15),dtype=np.float32,ndmin=1))) if hasattr(raw, 'telescope_pointing'): if not hasattr(raw, 'telescope_locked'): setattr(raw, 'telescope_locked', np.ones_like(raw.telescope_pointing)) raw.telescope_pointing = raw.telescope_pointing.astype('float64') raw.telescope_pointing[raw.telescope_locked == 0] = .5 #roll component of telescope mounting angle in degrees measured relative #to platform (zero degrees = vertical) #roll angle is + in clockwise direction if not hasattr(raw, 'telescope_roll_angle_offset'): setattr(raw, 'telescope_roll_angle_offset', np.ones_like(raw.telescope_pointing)) raw.telescope_roll_angle_offset[:] = self.constants[ 'telescope_roll_angle_offset'] raw.telescope_roll_angle_offset[raw.telescope_pointing == 0] = \ 180.0 - self.constants['telescope_roll_angle_offset'] if hasattr(raw, 'raw_analog_telescope_temperature'): # convert coolant thermistor voltage to themistor resistance # T(degC) =1/( a + b(Ln R) + cLn R)^3)-273.15 #(Steinhart Hart equation) # Where: # a = 0.000862448 # b = 0.000258456 # c = 0.000000142 # and # R = (Volts ADC Reading)/(0.000250 amps) thermistor_cal = self.constants['interferometer_temp_cal'] R = np.abs(raw.raw_analog_telescope_temperature / 0.000250) raw.telescope_temperature = \ (hau.T_Array(np.array((1.0 / (thermistor_cal[0] + thermistor_cal[1] * np.log(R) + thermistor_cal[2] * np.log(R)** 3) - 273.15),dtype=np.float32,ndmin=1))) if hasattr(raw,'OutgoingBeamPosition_centermass')\ and raw.OutgoingBeamPosition_centermass.size > 0 : raw.cg_xs = raw.OutgoingBeamPosition_centermass[:, 0] raw.cg_ys = raw.OutgoingBeamPosition_centermass[:, 1] if hasattr(raw,'OutgoingBeamPosition2_centermass')\ and raw.OutgoingBeamPosition2_centermass.size > 0 : raw.cg_xs2 = raw.OutgoingBeamPosition2_centermass[:, 0] raw.cg_ys2 = raw.OutgoingBeamPosition2_centermass[:, 1] if hasattr(raw,'interferometer_intensity') \ and raw.interferometer_intensity.size > 0: interf_peak = \ self.constants['interferometer_spectral_peak'] phase_to_freq = \ self.constants['interferometer_phase_to_freq'] npixels = self.constants['interferometer_fft_npixels'] xform = np.fft.rfft(raw.interferometer_intensity[:, :npixels], axis=1) tmp = np.concatenate( ([self.unwrap_firstangle], np.angle(xform[:, interf_peak]))) newlast = tmp[-1] tmp = np.unwrap(tmp) tmp = (self.unwrap_firstangle_atmagnitude - tmp[0]) + tmp if np.isfinite(tmp[-1]): self.unwrap_firstangle_atmagnitude = tmp[-1] self.unwrap_firstangle = newlast raw.interf_freq = tmp[1:] raw.interf_freq = hau.T_Array(-raw.interf_freq * phase_to_freq[0]) #compute temperature compensated interferometer freq if 0 and hasattr(raw,'interferometer_temp') \ and hasattr(raw,'interf_freq')\ and self.constants.has_key('interf_temp_coef'): raw.tcomp_interf_freq = raw.interf_freq \ - (raw.interferometer_temp-raw.interferometer_temp[0])\ * self.constants['interf_temp_coef']*1e9 for imagetime in ('interferometer_snapshot_time', 'outgoingbeamalignment_snapshot_time', 'overhead_snapshot_time', 'snowscope_snapshot_time'): if hasattr(raw, imagetime): setattr( raw, imagetime, hru.convert_to_python_times( getattr(raw, imagetime)[np.newaxis, :])) #replace missing values witn NaN's if hasattr(raw, 'seedvoltage'): raw.seedvoltage[raw.seedvoltage > 100] = np.NaN if hasattr(raw, 'latitude'): raw.latitude[raw.latitude > 100] = np.NaN if hasattr(raw, 'longitude'): raw.longitude[raw.longitude > 200] = np.NaN if hasattr(raw, 'laserpowervalues') and raw.laserpowervalues.size > 0: raw.laser_current = raw.laserpowervalues[:, 0] raw.laser_voltage = raw.laserpowervalues[:, 1] if raw.laserpowervalues.shape[1] > 2: raw.laser_current_setpoint = raw.laserpowervalues[:, 2] raw.laser_diode_temp = raw.laserpowervalues[:, 3] raw.laser_diode_temp_setpoint = raw.laserpowervalues[:, 4] if raw.laserpowervalues.shape[1] > 6: raw.ktp_temp = raw.laserpowervalues[:, 5] raw.ktp_temp_setpoint = raw.laserpowervalues[:, 6] #remove spikes from tcs records for fiel in ('tcsopticstop_', 'tcsoptics_', 'tcstelescope_', 'thermal1_', 'thermal2_', 'tcsaft_', 'tcsfore_'): for f in vars(raw).keys(): if f.startswith(fiel): v = getattr(raw, f) v[v > 1000] = np.NaN if hasattr(raw,'one_wire_temperatures') \ and raw.one_wire_temperatures.size >0 : #raw.one_wire_attrib = cdf_attr['one_wire_temperatures'] raw.one_wire_attrib = [] [ntime, ntemps] = raw.one_wire_temperatures.shape for i in range(ntemps): string = 'field' + str(i) + '_name' try: raw.one_wire_attrib.append( cdf_attr['one_wire_temperatures'][string]) except KeyError: print "Couldn't find attribute for ", string raw.one_wire_attrib.append(None) #remove spikes of 1e37 that appear in temperatues raw.one_wire_temperatures[raw.one_wire_temperatures>1000.]\ =np.NaN if hasattr(raw, 'RemoveLongI2Cell'): servo_range = cdf_attr['RemoveLongI2Cell']['range'] raw.i2_cell_out = np.abs(raw.RemoveLongI2Cell-servo_range[1]) \ > np.abs(raw.RemoveLongI2Cell-servo_range[0]) if hasattr(raw, 'RemoveLongI2ArCell'): servo_range = cdf_attr['RemoveLongI2ArCell']['range'] raw.i2a_cell_out = np.abs(raw.RemoveLongI2ArCell-servo_range[1]) \ > np.abs(raw.RemoveLongI2ArCell-servo_range[0]) if hasattr(raw, 'shot_count'): if raw.shot_count.size > 0: raw.shot_count = raw.shot_count[:, 0] else: raw.shot_count = raw.shot_count.reshape([0]) if hasattr(raw, 'seeded_shots'): if raw.seeded_shots.size > 0: raw.seeded_shots = raw.seeded_shots[:, 0] else: raw.seeded_shots = raw.seeded_shots.reshape([0]) #extract average dark counts from profiles and add dark counts to raw #dark count extracted from 'first_bins' or 'last_bins' as specified in constants #pu.extract_dark_count(raw,self.constants) #moved to after PILEUP 20140805 #extract cal pulse from light scattered as laser pulse exits system #and place in raw #pu.extract_cal_pulse(raw,self.constants) if hasattr(raw, 'l3cavityvoltage') and raw.l3cavityvoltage.size > 0: raw.piezo_voltage_ave = raw.l3cavityvoltage[:, 0] raw.piezo_voltage_min = raw.l3cavityvoltage[:, 1] raw.piezo_voltage_max = raw.l3cavityvoltage[:, 2] if hasattr(raw, 'l3locking_stats' ) and 'l3slope_to_frequency' in self.constants: raw.l3frequency_offset = raw.l3locking_stats.copy() for x in range(0, 3): raw.l3frequency_offset[:, x] = np.polyval( self.constants['l3slope_to_frequency'], raw.l3locking_stats[:, x]) if hasattr(raw, 'GPS_MSL_Alt'): #replace and spikes in a altitude with base altitude #this allows the code to run but produces garbage data if np.any(raw.GPS_MSL_Alt > 20000.0): raw.GPS_MSL_Alt[raw.GPS_MSL_Alt > 20000.0] = \ self.constants['lidar_altitude'] if hasattr(raw, 'roll_angle'): if anynan(raw.roll_angle): raw.roll_angle[np.isnan(raw.roll_angle)] = 0.0 if anynan(raw.pitch_angle): raw.pitch_angle[np.isnan(raw.pitch_angle)] = 0.0 if hasattr(raw, 'opticalbenchairpressure'): #convert psi to mb #print 'pre--opticalbenchairpressure ',raw.opticalbenchairpressure.shape raw.opticalbenchairpressure=hau.T_Array(np.array((raw.opticalbenchairpressure\ *self.constants['optical_bench_air_pressure_cal']),ndmin=1,dtype=np.float32)) #print 'optical_bench_air_pressure.size',raw.opticalbenchairpressure.size,raw.times.size if hasattr(raw, 'chillertemperature') and raw.chillertemperature.size > 0: raw.chiller_temp = raw.chillertemperature[:, 0] raw.chiller_setpt = raw.chillertemperature[:, 1] if hasattr(raw, 'etalon_pressure'): raw.etalon_pressure = raw.etalon_pressure * self.constants[ 'etalon_pressure'] if hasattr(raw, 'qw_rotation_angle'): #convert gv quarter wave plate rotation angle from radians to deg raw.qw_rotation_angle = raw.qw_rotation_angle * 180.0 / np.pi if hasattr(raw, 'GPS_MSL_Alt') or self.constants['installation'] in ( 'airborne', 'shipborne'): #do quality check on aircraft GPS and INS data pu.gps_quality_check(raw, self.constants) if hasattr(raw, 'molecular_counts'): for k, v in vars(raw).items(): if '_counts' in k: if raw.molecular_counts.shape[1] != v.shape[1]: print 'raw field ', k, ' is messed up. size difference' tmp = copy.deepcopy(raw.molecular_counts) minidx = min(tmp.shape[1], v.shape[1]) tmp[:, :] = 0 tmp[:, :minidx] = v[:, :minidx] setattr(raw, k, tmp) #do pileup correction on signals before any averaging is done pu.pileup_correction(raw, self.constants, self.corr_adjusts) #extract average dark counts from profiles and add dark counts to raw #dark count extracted from 'first_bins' or 'last_bins' as specified in constants if hasattr(raw, 'molecular_counts'): pu.extract_dark_count( raw, self.constants) #relocated from above 20140805 #extract cal pulse from light scattered as laser pulse exits system #and place in raw pu.extract_cal_pulse(raw, self.constants) if 0: import hsrl.simulation.rb_simulation as sim #rescale for new energy sim.rb_simulation(raw, self.constants) #redo dark count pu.extract_dark_count(raw, self.constants)
def polynomial_smoothing(array, delta_z, smoothing): """If smoothing[0]>0, smooth with running polynomial fit of order smoothing[1] width of smoothing width in meters increases linearly from smoothing[2] at lowest altitude too smoothing[3] highest altitude. array = data array to smooth with running polynomial smoothing[0] = enable smoothing if True smoothing[1] = order of polynominal to use for local fit smoothing[2] = width of smoothing at lowest range (m) smoothing[3] = width of smoothing at highest range (m) smoothing[4] = first range to smooth delta_z = bin width (m)""" if smoothing[0] == False: print 'no smoothing' return array #check to see if array is 2-d try: [ntimes, nbins] = array.shape except: nbins = len(array) ntimes = 1 #compute how much to increment smoothing half-width per altitude index delta_w = (smoothing[3] - smoothing[2]) / (2.0 * delta_z * nbins) #initial half-width in bins--note this is float w0 = smoothing[2] / (2.0 * delta_z) #start at larger of polynomial order, or specified start range first_bin = np.int(np.float(smoothing[4]) / delta_z) #first_bin = np.max(first_bin,np.int(smoothing[1])) for i in range(ntimes): #smooth profiles with local 2nd order polynomial fit if ntimes > 1: temp = array[i, :].copy() else: temp = array.copy() #set NaNs to 0.0 np.nan_to_num(temp) #loop over ranges limited by number of points needed to fit polynomial #where the polynomial order = smoothing[1] for bin in range(first_bin + 1, nbins - np.int(smoothing[1]) - 1): w = int(w0 + bin * delta_w) if bin >= w and bin <= nbins - w - 2: start = bin - w end = bin + w #print 'w1 ',start,j,end elif bin < w: #i is less than half_width start = 0 end = 2 * bin else: #bin + half_width bumping against nbins #"top of profile not smoothed to prevent introduction of extra NaN's" a = 1 x = range(start, end + 1) p = np.polyfit(x, temp[x], np.int(smoothing[1])) if not anynan(p): if ntimes > 1: array[i, bin] = np.polyval(p, bin) else: array[bin] = np.polyval(p, bin) return array
def time_anynan(self, dtype, shape, order, axis, case): bn.anynan(self.arr, axis=axis)
def has_missing_class(self): """Return `True` if there are any missing class values.""" return bn.anynan(self.Y)
def has_missing(self): """Return `True` if there are any missing attribute or class values.""" return bn.anynan(self.X) or bn.anynan(self.Y)
def featcalc(self, features, total=None, cardinality=64, linflatten=False, recalc=False): """ Calculates features for set features. """ if isinstance(features, dict): # trick for single features features = [features] if total is None: total = len(features) # Loop through the provided features and build feature table: featout = np.empty([total, len(self.features_names)], dtype='float32') for k, obj in enumerate(features): # Load features from the provided (cached) features if they exist: featout[k, :] = [ obj.get(key, np.NaN) for key in self.features_names ] # If not all features are already populated, we are going to recalculate them all: if recalc or anynan(featout[k, :]): lc = fc.prepLCs(obj['lightcurve'], linflatten=linflatten) periods, n_usedfreqs, usedfreqs = get_periods( obj, NFREQUENCIES, lc.time, ignore_harmonics=True) featout[k, :NFREQUENCIES] = periods EBper = fc.EBperiod(lc.time, lc.flux, periods[0], linflatten=True) featout[k, 0] = EBper # overwrites top period featout[k, NFREQUENCIES:NFREQUENCIES + 2] = fc.freq_ampratios( obj, n_usedfreqs, usedfreqs) featout[k, NFREQUENCIES + 2:NFREQUENCIES + 4] = fc.freq_phasediffs(obj, n_usedfreqs, usedfreqs) # Self Organising Map featout[k, NFREQUENCIES + 4:NFREQUENCIES + 6] = fc.SOMloc( self.classifier.som, lc.time, lc.flux, EBper, cardinality) featout[k, NFREQUENCIES + 6:NFREQUENCIES + 8] = fc.phase_features( lc.time, lc.flux, EBper) featout[k, NFREQUENCIES + 8:NFREQUENCIES + 10] = fc.p2p_features(lc.flux) # Higher Order Crossings: psi, zc = fc.compute_hocs(lc.time, lc.flux, 5) featout[k, NFREQUENCIES + 10:NFREQUENCIES + 12] = psi, zc[0] # FliPer: featout[k, NFREQUENCIES + 12:NFREQUENCIES + 16] = obj['Fp07'], obj['Fp7'], obj['Fp20'], obj['Fp50'] # If we are running with linfit enabled, add an extra feature # which is the absoulte value of the fitted linear trend, divided # with the point-to-point scatter: if self.linfit: slope_feature = np.abs( obj['detrend_coeff'][0]) / obj['ptp'] featout[k, NFREQUENCIES + 16] = slope_feature return featout
def bincount(x, weights=None, max_val=None, minlength=0): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. Parameters ---------- x : array_like, 1 dimension, nonnegative ints Input array. weights : array_like, optional Weights, array of the same shape as x. max_val : int, optional Indicates the maximum value we expect to find in X and sets the result array size accordingly. E.g. if we set `max_val=2` yet the largest value in X is 1, the result will contain a bin for the value 2, and will be set to 0. See examples for usage. minlength : int, optional A minimum number of bins for the output array. See numpy docs for info. Returns ------- Tuple[np.ndarray, int] Returns the bincounts and the number of NaN values. Examples -------- In case `max_val` is provided, the return shape includes bins for these values as well, even if they do not appear in the data. However, this will not truncate the bincount if values larger than `max_count` are found. >>> bincount([0, 0, 1, 1, 2], max_val=4) (array([ 2., 2., 1., 0., 0.]), 0.0) >>> bincount([0, 1, 2, 3, 4], max_val=2) (array([ 1., 1., 1., 1., 1.]), 0.0) """ # Store the original matrix before any manipulation to check for sparse x_original = x if sp.issparse(x): if weights is not None: # Match weights and x axis so `indices` will be set appropriately if x.shape[0] == weights.shape[0]: x = x.tocsc() elif x.shape[1] == weights.shape[0]: x = x.tocsr() zero_weights = sparse_implicit_zero_weights(x, weights).sum() weights = weights[x.indices] else: zero_weights = sparse_count_implicit_zeros(x) x = x.data x = np.asanyarray(x) if x.dtype.kind == 'f' and bn.anynan(x): nonnan = ~np.isnan(x) x = x[nonnan] if weights is not None: nans = (~nonnan * weights).sum(axis=0) weights = weights[nonnan] else: nans = (~nonnan).sum(axis=0) else: nans = 0. if x.ndim == 1 else np.zeros(x.shape[1], dtype=float) if minlength == 0 and max_val is not None: minlength = max_val + 1 bc = np.bincount( x.astype(np.int32, copy=False), weights=weights, minlength=minlength ).astype(float) # Since `csr_matrix.values` only contain non-zero values or explicit # zeros, we must count implicit zeros separately and add them to the # explicit ones found before if sp.issparse(x_original): # If x contains only NaNs, then bc will be an empty array if zero_weights and bc.size == 0: bc = [zero_weights] elif zero_weights: bc[0] += zero_weights return bc, nans
def plot_image(image, ax=None, scale='log', cmap=None, origin='lower', xlabel=None, ylabel=None, cbar=None, clabel='Flux ($e^{-}s^{-1}$)', cbar_ticks=None, cbar_ticklabels=None, cbar_pad=None, cbar_size='5%', title=None, percentile=95.0, vmin=None, vmax=None, offset_axes=None, color_bad='k', **kwargs): """ Utility function to plot a 2D image. Parameters: image (2d array): Image data. ax (matplotlib.pyplot.axes, optional): Axes in which to plot. Default (None) is to use current active axes. scale (str or :py:class:`astropy.visualization.ImageNormalize` object, optional): Normalization used to stretch the colormap. Options: ``'linear'``, ``'sqrt'``, ``'log'``, ``'asinh'``, ``'histeq'``, ``'sinh'`` and ``'squared'``. Can also be a :py:class:`astropy.visualization.ImageNormalize` object. Default is ``'log'``. origin (str, optional): The origin of the coordinate system. xlabel (str, optional): Label for the x-axis. ylabel (str, optional): Label for the y-axis. cbar (string, optional): Location of color bar. Choises are ``'right'``, ``'left'``, ``'top'``, ``'bottom'``. Default is not to create colorbar. clabel (str, optional): Label for the color bar. cbar_size (float, optional): Fractional size of colorbar compared to axes. Default=0.03. cbar_pad (float, optional): Padding between axes and colorbar. title (str or None, optional): Title for the plot. percentile (float, optional): The fraction of pixels to keep in color-trim. If single float given, the same fraction of pixels is eliminated from both ends. If tuple of two floats is given, the two are used as the percentiles. Default=95. cmap (matplotlib colormap, optional): Colormap to use. Default is the ``Blues`` colormap. vmin (float, optional): Lower limit to use for colormap. vmax (float, optional): Upper limit to use for colormap. color_bad (str, optional): Color to apply to bad pixels (NaN). Default is black. kwargs (dict, optional): Keyword arguments to be passed to :py:func:`matplotlib.pyplot.imshow`. Returns: :py:class:`matplotlib.image.AxesImage`: Image from returned by :py:func:`matplotlib.pyplot.imshow`. .. codeauthor:: Rasmus Handberg <*****@*****.**> """ logger = logging.getLogger(__name__) # Backward compatible settings: make_cbar = kwargs.pop('make_cbar', None) if make_cbar: raise FutureWarning("'make_cbar' is deprecated. Use 'cbar' instead.") if not cbar: cbar = make_cbar # Special treatment for boolean arrays: if isinstance(image, np.ndarray) and image.dtype == 'bool': if vmin is None: vmin = 0 if vmax is None: vmax = 1 if cbar_ticks is None: cbar_ticks = [0, 1] if cbar_ticklabels is None: cbar_ticklabels = ['False', 'True'] # Calculate limits of color scaling: interval = None if vmin is None or vmax is None: if allnan(image): logger.warning("Image is all NaN") vmin = 0 vmax = 1 if cbar_ticks is None: cbar_ticks = [] if cbar_ticklabels is None: cbar_ticklabels = [] elif isinstance(percentile, (list, tuple, np.ndarray)): interval = viz.AsymmetricPercentileInterval( percentile[0], percentile[1]) else: interval = viz.PercentileInterval(percentile) # Create ImageNormalize object with extracted limits: if scale in ('log', 'linear', 'sqrt', 'asinh', 'histeq', 'sinh', 'squared'): if scale == 'log': stretch = viz.LogStretch() elif scale == 'linear': stretch = viz.LinearStretch() elif scale == 'sqrt': stretch = viz.SqrtStretch() elif scale == 'asinh': stretch = viz.AsinhStretch() elif scale == 'histeq': stretch = viz.HistEqStretch(image[np.isfinite(image)]) elif scale == 'sinh': stretch = viz.SinhStretch() elif scale == 'squared': stretch = viz.SquaredStretch() # Create ImageNormalize object. Very important to use clip=False if the image contains # NaNs, otherwise NaN points will not be plotted correctly. norm = viz.ImageNormalize(data=image[np.isfinite(image)], interval=interval, vmin=vmin, vmax=vmax, stretch=stretch, clip=not anynan(image)) elif isinstance(scale, (viz.ImageNormalize, matplotlib.colors.Normalize)): norm = scale else: raise ValueError("scale {} is not available.".format(scale)) if offset_axes: extent = (offset_axes[0] - 0.5, offset_axes[0] + image.shape[1] - 0.5, offset_axes[1] - 0.5, offset_axes[1] + image.shape[0] - 0.5) else: extent = (-0.5, image.shape[1] - 0.5, -0.5, image.shape[0] - 0.5) if ax is None: ax = plt.gca() # Set up the colormap to use. If a bad color is defined, # add it to the colormap: if cmap is None: cmap = copy.copy(plt.get_cmap('Blues')) elif isinstance(cmap, str): cmap = copy.copy(plt.get_cmap(cmap)) if color_bad: cmap.set_bad(color_bad, 1.0) # Plotting the image using all the settings set above: im = ax.imshow(image, cmap=cmap, norm=norm, origin=origin, extent=extent, interpolation='nearest', **kwargs) if xlabel is not None: ax.set_xlabel(xlabel) if ylabel is not None: ax.set_ylabel(ylabel) if title is not None: ax.set_title(title) ax.set_xlim([extent[0], extent[1]]) ax.set_ylim([extent[2], extent[3]]) if cbar: colorbar(im, ax=ax, loc=cbar, size=cbar_size, pad=cbar_pad, label=clabel, ticks=cbar_ticks, ticklabels=cbar_ticklabels) # Settings for ticks: integer_locator = MaxNLocator(nbins=10, integer=True) ax.xaxis.set_major_locator(integer_locator) ax.xaxis.set_minor_locator(integer_locator) ax.yaxis.set_major_locator(integer_locator) ax.yaxis.set_minor_locator(integer_locator) ax.tick_params(which='both', direction='out', pad=5) ax.xaxis.tick_bottom() ax.yaxis.tick_left() return im
def update(self): #get merge-extract for n,m in enumerate(self.show_merge): if self.show_merge_as_density[m]: self.merge_extract = self.densityMatrix[m][tuple(self.basis_dim_plot_range)] else: self.merge_extract = self.mergeMatrix[m][tuple(self.basis_dim_plot_range)] for b in range(len(self._basis_dim)-1,-1,-1): #basis dim to concentrate if b not in self.show_basis: pos_corr = self.concentrate_basis_dim[:b].count("pos") if self.concentrate_basis_dim[b] == "sum": self.merge_extract = bn.nansum(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "mean": self.merge_extract = bn.nanmean(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "max": self.merge_extract = bn.nanmax(self.merge_extract,b-pos_corr) elif self.concentrate_basis_dim[b] == "min": self.merge_extract = bn.nanmin(self.merge_extract,b-pos_corr) for b in range(len(self._basis_dim)-2,-1,-1): # check from end to start whether to roll-axis # the time-axis has to be the last one # dont roll the last basis-dim (start with len(self._basis_dim)-2 ) basis_time_index = None if b not in self.show_basis and self.concentrate_basis_dim[b] == "time": #reshape the matrix self.merge_extract = np.rollaxis(self.merge_extract,b,0) basis_time_index = b break # dont needto continue iterating because only one dim can be 'time' if len(self.show_basis) == 1: basis_extract = self.basisMatrix[self.show_basis[0]][self.basis_dim_plot_range[self.show_basis[0]]] if self.scale_plot == True: self.plot.enableAutoRange('xy', True) else: if self.enableAutoRangeX: self.plot.enableAutoRange('x', True) #self.plot.setXRange( #self._basis_dim[self.show_basis[0]]._include_range[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.plot.enableAutoRange('y', True) if self.transpose_axes: self.curves[n].setData(self.merge_extract, basis_extract) else: self.curves[n].setData(basis_extract, self.merge_extract) elif len(self.show_basis) >=2: #calc scale and zero-position for axes-tics x0=self._basis_dim[self.show_basis[0]]._include_range[0] x1=self._basis_dim[self.show_basis[0]]._include_range[1] y0=self._basis_dim[self.show_basis[1]]._include_range[0] y1=self._basis_dim[self.show_basis[1]]._include_range[1] xscale = (x1-x0) / self._basis_dim[self.show_basis[0]].resolution yscale = (y1-y0) / self._basis_dim[self.show_basis[1]].resolution args = {'pos':[x0, y0], 'scale':[xscale, yscale]} if self.transpose_axes: args = {'pos':[y0, x0], 'scale':[yscale, xscale]} #set time-ticks if basis_time_index != None: args["xvals"] = self.basisMatrix[basis_time_index] if self.enableAutoRangeX: self.view.enableAutoRange('x', True) #self.view.setXRange(**tuple(self._basis_dim[self.show_basis[0]]._include_range))#[0], #self._basis_dim[self.show_basis[0]]._include_range[1]) if self.enableAutoRangeY: self.view.enableAutoRange('y', True) #bydefault autoLevel (the colorlevel of the merge-dims) == True #(calc. by pyqtgraph) #thus it only can process array without nan-values the calc. colorlevel #is wrong when the real values are boyond the nan-replacement(zero) #therefore i calc the colorlevel by my self in case nans arein the array: anynan = bn.anynan(self.merge_extract) if anynan: mmin = bn.nanmin(self.merge_extract) mmax = bn.nanmax(self.merge_extract) if np.isnan(mmin): mmin,mmax=0,0 self.plot.setLevels(mmin, mmax) args["autoLevels"]= False ##the following line dont work with my version of pyQtGraph #args["levels"]= [mmin,mmax]#np.nanmin(merge_extract), np.nanmax(merge_extract)) self.merge_extract = _utils.nanToZeros(self.merge_extract) if self.transpose_axes: self.plot.setImage(self.merge_extract.transpose(), autoRange=self.scale_plot,**args) else: self.plot.setImage(self.merge_extract, autoRange=self.scale_plot,**args) if anynan: # scale the histogramm to the new range self.plot.ui.histogram.vb.setYRange(mmin,mmax) self.scale_plot = False
def bincount(x, weights=None, max_val=None, minlength=0): """Return counts of values in array X. Works kind of like np.bincount(), except that it also supports floating arrays with nans. Parameters ---------- x : array_like, 1 dimension, nonnegative ints Input array. weights : array_like, optional Weights, array of the same shape as x. max_val : int, optional Indicates the maximum value we expect to find in X and sets the result array size accordingly. E.g. if we set `max_val=2` yet the largest value in X is 1, the result will contain a bin for the value 2, and will be set to 0. See examples for usage. minlength : int, optional A minimum number of bins for the output array. See numpy docs for info. Returns ------- Tuple[np.ndarray, int] Returns the bincounts and the number of NaN values. Examples -------- In case `max_val` is provided, the return shape includes bins for these values as well, even if they do not appear in the data. However, this will not truncate the bincount if values larger than `max_count` are found. >>> bincount([0, 0, 1, 1, 2], max_val=4) (array([ 2., 2., 1., 0., 0.]), 0.0) >>> bincount([0, 1, 2, 3, 4], max_val=2) (array([ 1., 1., 1., 1., 1.]), 0.0) """ # Store the original matrix before any manipulation to check for sparse x_original = x if sp.issparse(x): if weights is not None: # Match weights and x axis so `indices` will be set appropriately if x.shape[0] == weights.shape[0]: x = x.tocsc() elif x.shape[1] == weights.shape[0]: x = x.tocsr() zero_weights = sparse_implicit_zero_weights(x, weights).sum() weights = weights[x.indices] else: zero_weights = sparse_count_implicit_zeros(x) x = x.data x = np.asanyarray(x) if x.dtype.kind == 'f' and bn.anynan(x): nonnan = ~np.isnan(x) x = x[nonnan] if weights is not None: nans = (~nonnan * weights).sum(axis=0) weights = weights[nonnan] else: nans = (~nonnan).sum(axis=0) else: nans = 0. if x.ndim == 1 else np.zeros(x.shape[1], dtype=float) if minlength == 0 and max_val is not None: minlength = max_val + 1 bc = np.bincount(x.astype(np.int32, copy=False), weights=weights, minlength=minlength).astype(float) # Since `csr_matrix.values` only contain non-zero values or explicit # zeros, we must count implicit zeros separately and add them to the # explicit ones found before if sp.issparse(x_original): # If x contains only NaNs, then bc will be an empty array if zero_weights and bc.size == 0: bc = [zero_weights] elif zero_weights: bc[0] += zero_weights return bc, nans
def train(self, tset, savecl=True, overwrite=False): """ Train the Meta-classifier. Parameters: tset (:class:`TrainingSet`): Training set to train classifier on. savecl (bool, optional): Save the classifier to file? overwrite (bool, optional): Overwrite existing classifer save file. .. codeauthor:: James S. Kuszlewicz <*****@*****.**> .. codeauthor:: Rasmus Handberg <*****@*****.**> """ # Start a logger that should be used to output e.g. debug information: logger = logging.getLogger(__name__) # Check for pre-calculated features fitlabels = self.parse_labels(tset.labels()) # First create list of all possible classifiers: all_classifiers = list(classifier_list) all_classifiers.remove('meta') # Create list of all features: # Save this to object, we are using it to keep track of which features were used # to train the classifier: self.features_used = list( itertools.product(all_classifiers, self.StellarClasses)) self.features_names = [ f'{classifier:s}_{stcl.name:s}' for classifier, stcl in self.features_used ] # Create table of features: # Create as float32, since that is what RandomForestClassifier converts it to anyway. logger.info("Importing features...") features = self.build_features_table(tset.features(), total=len(tset)) # Remove columns that are all NaN: # This can be classifiers that never returns a given class or a classifier that # has not been run at all. keepcols = ~allnan(features, axis=0) features = features[:, keepcols] self.features_used = [ x for i, x in enumerate(self.features_used) if keepcols[i] ] self.features_names = [ x for i, x in enumerate(self.features_names) if keepcols[i] ] # Throw an error if a classifier is not run at all: run_classifiers = set([fu[0] for fu in self.features_used]) if run_classifiers != set(all_classifiers): raise RuntimeError( "Classifier did not contribute at all: %s" % set(all_classifiers).difference(run_classifiers)) # Raise an exception if there are NaNs left in the features: if anynan(features): raise ValueError("Features contains NaNs") logger.info("Features imported. Shape = %s", features.shape) # Run actual training: self.classifier.oob_score = True logger.info("Fitting model.") self.classifier.fit(features, fitlabels) logger.info('Trained. OOB Score = %s', self.classifier.oob_score_) self.classifier.trained = True if savecl and self.classifier.trained and self.clfile is not None: if overwrite or not os.path.exists(self.clfile): logger.info("Saving pickled classifier instance to '%s'", self.clfile) self.save(self.clfile)