def from_string(urn_string): complete = urn_string.split('#') extras = '' if len(complete) > 1: extras = '#{0}'.format(complete[1]) parts = complete[0].split(':') if len(parts) < 5: return IoosUrn() urn = IoosUrn() urn.asset_type = parts[2] urn.authority = parts[3] urn.label = parts[4] if len(parts) > 5: if urn.asset_type == 'station': urn.version = parts[5] elif len(parts) > 6: # Also a verion specified, so this has to be the component urn.component = parts[5] + extras else: logger.debug( "Assuming that {0} is the 'component' piece of the URN (not the 'version')" .format(parts[5] + extras)) urn.component = parts[5] + extras if len(parts) > 6: urn.version = parts[6] if len(parts) > 7: pass logger.warning("The URN is too long stripping off '{}'".format( ':'.join(parts[7:]))) return urn
def from_string(urn_string): complete = urn_string.split('#') extras = '' if len(complete) > 1: extras = '#{0}'.format(complete[1]) parts = complete[0].split(':') if len(parts) < 5: return IoosUrn() urn = IoosUrn() urn.asset_type = parts[2] urn.authority = parts[3] urn.label = parts[4] if len(parts) > 5: if urn.asset_type == 'station': urn.version = parts[5] elif len(parts) > 6: # Also a verion specified, so this has to be the component urn.component = parts[5] + extras else: logger.debug("Assuming that {0} is the 'component' piece of the URN (not the 'version')".format(parts[5] + extras)) urn.component = parts[5] + extras if len(parts) > 6: urn.version = parts[6] if len(parts) > 7: pass logger.warning("The URN is too long stripping off '{}'".format(':'.join(parts[7:]))) return urn
def valid(self): ASSET_TYPES = ['station', 'network', 'sensor', 'survey'] try: assert self.authority is not None except AssertionError: logger.debug('URN not valid - An "authority" is required') return False try: assert self.label is not None except AssertionError: logger.debug('URN not valid - A "label" is required') return False try: assert self.asset_type in ASSET_TYPES except AssertionError: logger.debug( 'URN not valid - asset_type {0} is unknown. Must be one of: {1}' .format(self.asset_type, ', '.join(ASSET_TYPES))) return False if self.asset_type == 'station': try: assert self.component is None except AssertionError: logger.debug( 'URN not valid - An asset_type of "station" may not have a "component".' ) return False return True
def load(cls, path): fpath = os.path.realpath(path) subs = list(all_subclasses(cls)) dsg = cls(fpath) try: for klass in subs: logger.debug('Trying {}...'.format(klass.__name__)) if hasattr(klass, 'is_mine'): if klass.is_mine(dsg): dsg.close() return klass(path) finally: dsg.close() subnames = ', '.join([ s.__name__ for s in subs ]) raise ValueError('Could not open {} as any type of CF Dataset. Tried: {}.'.format(fpath, subnames))
def to_dataframe(self, clean_cols=True, clean_rows=True): # Z zvar = self.z_axes()[0] z = np.ma.fix_invalid(np.ma.MaskedArray(zvar[:])) z = z.flatten().round(5) logger.debug(['z data size: ', z.size]) # T tvar = self.t_axes()[0] t = np.ma.MaskedArray(nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard'))).flatten() # Patch the time variable back to its original mask, since num2date # breaks any missing/fill values if hasattr(tvar[0], 'mask'): t.mask = tvar[:].mask logger.debug(['time data size: ', t.size]) # X xvar = self.x_axes()[0] x = np.ma.fix_invalid(np.ma.MaskedArray(xvar[:])).flatten().round(5) logger.debug(['x data size: ', x.size]) # Y yvar = self.y_axes()[0] y = np.ma.fix_invalid(np.ma.MaskedArray(yvar[:])).flatten().round(5) logger.debug(['y data size: ', y.size]) # Trajectories pvar = self.get_variables_by_attributes(cf_role='trajectory_id')[0] try: p = normalize_array(pvar) except BaseException: logger.exception('Could not pull trajectory values from the variable, using indexes.') p = np.asarray(list(range(len(pvar))), dtype=np.integer) # The Dimension that the trajectory id variable doesn't have is what # the trajectory data needs to be repeated by dim_diff = self.dimensions[list(set(tvar.dimensions).difference(set(pvar.dimensions)))[0]] if dim_diff: p = p.repeat(dim_diff.size) logger.debug(['trajectory data size: ', p.size]) # Distance d = np.append([0], great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance']) d = np.ma.fix_invalid(np.ma.MaskedArray(np.cumsum(d)).astype(np.float64).round(2)) logger.debug(['distance data size: ', d.size]) df_data = { 't': t, 'x': x, 'y': y, 'z': z, 'trajectory': p, 'distance': d } building_index_to_drop = np.ones(t.size, dtype=bool) extract_vars = list(set(self.data_vars() + self.ancillary_vars())) for i, dvar in enumerate(extract_vars): vdata = np.ma.fix_invalid(np.ma.MaskedArray(dvar[:].round(3).flatten())) building_index_to_drop = (building_index_to_drop == True) & (vdata.mask == True) # noqa df_data[dvar.name] = vdata df = pd.DataFrame(df_data) # Drop all data columns with no data if clean_cols: df = df.dropna(axis=1, how='all') # Drop all data rows with no data variable data if clean_rows: df = df.iloc[~building_index_to_drop] return df
def to_dataframe(self, clean_cols=True, clean_rows=True): zvar = self.z_axes()[0] zs = len(self.dimensions[zvar.dimensions[0]]) # Profiles pvar = self.get_variables_by_attributes(cf_role='profile_id')[0] try: p = normalize_array(pvar) except ValueError: p = np.asarray(list(range(len(pvar))), dtype=np.integer) ps = p.size p = p.repeat(zs) logger.debug(['profile data size: ', p.size]) # Z z = generic_masked(zvar[:], attrs=self.vatts(zvar.name)).round(5) try: z = np.tile(z, ps) except ValueError: z = z.flatten() logger.debug(['z data size: ', z.size]) # T tvar = self.t_axes()[0] t = nc4.num2date(tvar[:], tvar.units, getattr(tvar, 'calendar', 'standard')) if isinstance(t, datetime): # Size one t = np.array([t.isoformat()], dtype='datetime64') t = t.repeat(zs) logger.debug(['time data size: ', t.size]) # X xvar = self.x_axes()[0] x = generic_masked(xvar[:].repeat(zs), attrs=self.vatts(xvar.name)).round(5) logger.debug(['x data size: ', x.size]) # Y yvar = self.y_axes()[0] y = generic_masked(yvar[:].repeat(zs), attrs=self.vatts(yvar.name)).round(5) logger.debug(['y data size: ', y.size]) # Distance d = np.ma.zeros(y.size, dtype=np.float64) d[1:] = great_distance(start_latitude=y[0:-1], end_latitude=y[1:], start_longitude=x[0:-1], end_longitude=x[1:])['distance'] d = generic_masked(np.cumsum(d), minv=0).round(2) logger.debug(['distance data size: ', d.size]) df_data = {'t': t, 'x': x, 'y': y, 'z': z, 'profile': p, 'distance': d} building_index_to_drop = np.ones(t.size, dtype=bool) extract_vars = list(set(self.data_vars() + self.ancillary_vars())) for i, dvar in enumerate(extract_vars): vdata = np.ma.fix_invalid( np.ma.MaskedArray(dvar[:].round(3).flatten())) building_index_to_drop = (building_index_to_drop == True) & ( vdata.mask == True) # noqa df_data[dvar.name] = vdata df = pd.DataFrame(df_data) # Drop all data columns with no data if clean_cols: df = df.dropna(axis=1, how='all') # Drop all data rows with no data variable data if clean_rows: df = df.iloc[~building_index_to_drop] return df
def setup_times_and_verticals(self, times, verticals): if isinstance(times, (list, tuple,)): times = np.asarray(times) # Create time as int32 or float64 because DAP does not support int64 until DAP4. if get_type(times) == np.int64: if times[-1] < 2147483647: # We can fit inside of an int32 times = times.astype(np.int32) else: # Create time as float32 because of int32 overflow times = times.astype(np.float64) # If nothing is passed in, set to the vertical_fill value. if not isinstance(verticals, np.ndarray) and not verticals: verticals = np.ma.masked_values([self.vertical_fill], self.vertical_fill) # Convert to masked array if isinstance(verticals, (list, tuple)): verticals = np.ma.masked_values(verticals, self.vertical_fill) elif isinstance(verticals, np.ndarray): self.vertical_fill = verticals.dtype.type(self.vertical_fill) verticals = np.ma.masked_values(verticals, self.vertical_fill) if get_type(verticals) == np.int64: # Create time as int32 because DAP does not support int64 until DAP4. verticals = verticals.astype(np.int32) # Don't unique Time... rely on the person submitting the data correctly. # That means we allow duplicate times, as long as the data contains duplicate times as well. self.time_indexes = np.argsort(times) full_times = times[self.time_indexes] # Unique the vertical values # Special case for all zeros. Added here for greater readability. if np.isclose(verticals, 0).all(): save_mask = verticals.mask verticals.mask = False unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True) if save_mask.size > 1: unique_verticals.mask = save_mask[self.vertical_indexes] elif verticals is not None and verticals.any(): save_mask = verticals.mask verticals.mask = False unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True) if save_mask.size > 1: unique_verticals.mask = save_mask[self.vertical_indexes] else: unique_verticals = verticals self.vertical_indexes = np.arange(len(verticals)) # Calculate time stats based on a unique time array unique_times = np.unique(full_times) starting = datetime.utcfromtimestamp(unique_times[0]) ending = datetime.utcfromtimestamp(unique_times[-1]) logger.debug("Setting up time...") # Time extents self._nc.setncattr("time_coverage_start", starting.isoformat()) self._nc.setncattr("time_coverage_end", ending.isoformat()) # duration (ISO8601 format) self._nc.setncattr("time_coverage_duration", "PT{0:d}S".format(int(round((ending - starting).total_seconds())))) # resolution (ISO8601 format) # subtract adjacent times to produce an array of differences, then get the most common occurance diffs = unique_times[1:] - unique_times[:-1] uniqs, inverse = np.unique(diffs, return_inverse=True) if uniqs.size > 1: time_diffs = diffs[np.bincount(inverse).argmax()] self._nc.setncattr("time_coverage_resolution", "PT{0:d}S".format(int(round(time_diffs)))) # Time self.time_chunk = min(full_times.size, 1000) self._nc.createDimension("time", full_times.size) self.time = self._nc.createVariable(self.time_axis_name, get_type(full_times), ("time",), chunksizes=(self.time_chunk,)) self.time.units = "seconds since 1970-01-01T00:00:00Z" self.time.standard_name = "time" self.time.long_name = "time of measurement" self.time.calendar = "gregorian" self.time.axis = "T" self.time[:] = full_times logger.debug("Setting up {}...".format(self.vertical_axis_name)) # Figure out if we are creating a Profile or just a TimeSeries self._nc.setncattr("geospatial_vertical_units", "meters") self._nc.setncattr("geospatial_vertical_positive", self.vertical_positive) if unique_verticals.size <= 1: # TIMESERIES self._nc.setncattr("featureType", "timeSeries") # Fill in variable if we have an actual height. Else, the fillvalue remains. self._nc.setncattr("geospatial_vertical_resolution", '0') self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), fill_value=self.vertical_fill) if unique_verticals.size == 1 and not np.isnan(unique_verticals[0]) and unique_verticals[0] != self.vertical_fill: # Vertical extents self._nc.setncattr("geospatial_vertical_min", unique_verticals[0]) self._nc.setncattr("geospatial_vertical_max", unique_verticals[0]) self.z.valid_min = unique_verticals[0] self.z.valid_max = unique_verticals[0] elif unique_verticals.size > 1: # TIMESERIES PROFILE self._nc.setncattr("featureType", "timeSeriesProfile") # Vertical extents non_nan_verticals = unique_verticals[ (~np.isnan(unique_verticals)) & (unique_verticals != self.vertical_fill) ] minvertical = float(np.min(non_nan_verticals)) maxvertical = float(np.max(non_nan_verticals)) vertical_diffs = non_nan_verticals[1:] - non_nan_verticals[:-1] self._nc.setncattr("geospatial_vertical_min", minvertical) self._nc.setncattr("geospatial_vertical_max", maxvertical) if vertical_diffs.size >= 1: self._nc.setncattr("geospatial_vertical_resolution", " ".join([ str(x) for x in list(vertical_diffs) if not np.isnan(x) ])) else: self._nc.setncattr("geospatial_vertical_resolution", '0') # There is more than one vertical value for this variable, we need to create a vertical dimension self._nc.createDimension("z", unique_verticals.size) self.z = self._nc.createVariable(self.vertical_axis_name, get_type(unique_verticals), ("z", ), fill_value=self.vertical_fill) self.z.valid_min = minvertical self.z.valid_max = maxvertical self.z.grid_mapping = 'crs' self.z.long_name = "{} of the sensor relative to the water surface".format(self.vertical_axis_name) if self.vertical_positive == 'up': self.z.standard_name = 'height' elif self.vertical_positive == 'down': self.z.standard_name = 'depth' self.z.positive = self.vertical_positive self.z.units = "m" self.z.axis = "Z" self.z[:] = unique_verticals self._nc.sync()
def setup_times_and_verticals(self, times, verticals): if isinstance(times, (list, tuple,)): times = np.asarray(times) # If nothing is passed in, set to the vertical_fill value. if not isinstance(verticals, np.ndarray) and not verticals: verticals = np.ma.masked_values([self.vertical_fill], self.vertical_fill) # Convert to masked array if isinstance(verticals, (list, tuple,)) or isinstance(verticals, np.ndarray): verticals = np.ma.masked_values(verticals, self.vertical_fill) # Don't unique Time... rely on the person submitting the data correctly. # That means we allow duplicate times, as long as the data contains duplicate times as well. self.time_indexes = np.argsort(times) unique_times = times[self.time_indexes] # Unique the vertical values # Special case for all zeros. Added here for greater readability. if np.isclose(verticals, 0).all(): save_mask = verticals.mask verticals.mask = False unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True) if save_mask.size > 1: unique_verticals.mask = save_mask[self.vertical_indexes] elif verticals is not None and verticals.any(): save_mask = verticals.mask verticals.mask = False unique_verticals, self.vertical_indexes = np.ma.unique(verticals, return_index=True) if save_mask.size > 1: unique_verticals.mask = save_mask[self.vertical_indexes] else: unique_verticals = verticals self.vertical_indexes = np.arange(len(verticals)) starting = datetime.utcfromtimestamp(unique_times[0]) ending = datetime.utcfromtimestamp(unique_times[-1]) logger.debug("Setting up time...") # Time extents self.nc.setncattr("time_coverage_start", starting.isoformat()) self.nc.setncattr("time_coverage_end", ending.isoformat()) # duration (ISO8601 format) self.nc.setncattr("time_coverage_duration", "P%sS" % unicode(int(round((ending - starting).total_seconds())))) # resolution (ISO8601 format) # subtract adjacent times to produce an array of differences, then get the most common occurance diffs = unique_times[1:] - unique_times[:-1] uniqs, inverse = np.unique(diffs, return_inverse=True) if uniqs.size > 1: time_diffs = diffs[np.bincount(inverse).argmax()] self.nc.setncattr("time_coverage_resolution", "P%sS" % unicode(int(round(time_diffs)))) # Time - 32-bit unsigned integer self.nc.createDimension("time") self.time = self.nc.createVariable(self.time_axis_name, "f8", ("time",), chunksizes=(1000,)) self.time.units = "seconds since 1970-01-01T00:00:00Z" self.time.standard_name = "time" self.time.long_name = "time of measurement" self.time.calendar = "gregorian" self.time[:] = unique_times logger.debug("Setting up {}...".format(self.vertical_axis_name)) # Figure out if we are creating a Profile or just a TimeSeries if unique_verticals.size <= 1: # TIMESERIES self.nc.setncattr("featureType", "timeSeries") # Fill in variable if we have an actual height. Else, the fillvalue remains. if unique_verticals.any() and unique_verticals.size == 1: # Vertical extents self.nc.setncattr("geospatial_vertical_positive", self.vertical_positive) self.nc.setncattr("geospatial_vertical_min", unique_verticals[0]) self.nc.setncattr("geospatial_vertical_max", unique_verticals[0]) self.z = self.nc.createVariable(self.vertical_axis_name, "f8", fill_value=self.vertical_fill) elif unique_verticals.size > 1: # TIMESERIES PROFILE self.nc.setncattr("featureType", "timeSeriesProfile") # Vertical extents minvertical = float(np.min(unique_verticals)) maxvertical = float(np.max(unique_verticals)) vertical_diffs = unique_verticals[1:] - unique_verticals[:-1] self.nc.setncattr("geospatial_vertical_positive", self.vertical_positive) self.nc.setncattr("geospatial_vertical_min", minvertical) self.nc.setncattr("geospatial_vertical_max", maxvertical) self.nc.setncattr("geospatial_vertical_resolution", " ".join(map(unicode, list(vertical_diffs)))) # There is more than one vertical value for this variable, we need to create a vertical dimension self.nc.createDimension("z", unique_verticals.size) self.z = self.nc.createVariable(self.vertical_axis_name, "f8", ("z", ), fill_value=self.vertical_fill) self.z.grid_mapping = 'crs' self.z.long_name = "{} of the sensor relative to the water surface".format(self.vertical_axis_name) self.z.standard_name = self.vertical_axis_name self.z.positive = self.vertical_positive self.z.units = "m" self.z.axis = "Z" self.z[:] = unique_verticals self.nc.sync()