def fuse_datetime_columns(file): """ Fuses a file's "DATE" and "TIME" columns into a "_DATETIME" column. There are three cases: 1. DATE and TIME both exist A datetime object is inserted representing the combination of the two objects. 2. DATE exists and TIME does not A date object is inserted only representing the date. 3. DATE does not exist but TIME does None is inserted because date is required. Arg: file - a DataFile object """ try: dates = file['DATE'].values except KeyError: log.error(u'No DATE column is present.') return try: times = file['TIME'].values except KeyError: log.warn(u'No TIME column is present.') file['_DATETIME'] = Column('_DATETIME') file['_DATETIME'].values = [ strptime_woce_date_time(*x) for x in zip(dates, times) ] del file['DATE'] del file['TIME']
def test_decimal_places_requires_decimal(self): ccc = Column('test') ccc.values = [ _decimal('-999.0000'), 20.12355, _decimal('-999.00'), ] with self.assertRaises(ValueError): ccc.decimal_places()
def test_decimal_places(self): """A column's decimal places is the max number of places after a decimal in the column. """ ccc = Column('test') ccc.values = [ _decimal('-999.0000'), _decimal('19.0'), _decimal('-999.000'), _decimal('-999.00'), ] self.assertEqual(4, ccc.decimal_places())
def _read_oliver_sun(dfc, fileobj, cfg): """Read HRP2 format from Oliver Sun.""" mat = loadmat(fileobj) filekey = mat.keys()[0] casts = mat[filekey][0] for cast in casts: dfile = DataFile() dfc.append(dfile) dfile.globals['EXPOCODE'] = cfg['expocode'] # TODO dfile.globals['DEPTH'] = 0 for key, item in zip(cast.dtype.names, cast): if item.shape == (1, 1): key = cfg['global_mapping'].get(key, None) if key: dfile.globals[key] = item[0, 0] else: try: dfile[key] = Column(key) dfile[key].values = list(item.flatten()) # Act as if all files had QC and assign it to OceanSITES 1. # Assuming that someone has already gone through level 0 # data and we are receiving level 1 or higher. dfile[key].flags_woce = [2] * len(dfile[key].values) except KeyError: pass try: dfile.globals['STNNBR'] except KeyError: dfile.globals['STNNBR'] = '999' woce.fuse_datetime(dfile)
def read(self, handle): """ How to read a Bottle Exchange file. """ read_identifier_line(self, handle, 'BOTTLE') l = read_comments(self, handle) # Read columns and units columns = [x.strip() for x in l.strip().split(',')] units = [x.strip() for x in handle.readline().strip().split(',')] # Check columns and units to match length if len(columns) != len(units): raise ValueError( ("Expected as many columns as units in file. " "Found %d columns and %d units.") % (len(columns), len(units))) # Check for unique identifer identifier = [] if 'EXPOCODE' in columns and \ 'STNNBR' in columns and \ 'CASTNO' in columns: identifier = ['STNNBR', 'CASTNO'] if 'SAMPNO' in columns: identifier.append('SAMPNO') if 'BTLNBR' in columns: identifier.append('BTLNBR') elif 'BTLNBR' in columns: identifier.append('BTLNBR') else: raise ValueError(("No unique identifer found for file. " "(STNNBR,CASTNO,SAMPNO,BTLNBR)," "(STNNBR,CASTNO,SAMPNO)," "(STNNBR,CASTNO,BTLNBR)")) self.create_columns(columns, units) read_data(self, handle, columns) # Format all data to be what it is try: self['EXPOCODE'].values = map(str, self['EXPOCODE'].values) except KeyError: pass try: self['LATITUDE'].values = map(_decimal, self['LATITUDE'].values) except KeyError: pass try: self['LONGITUDE'].values = map(_decimal, self['LONGITUDE'].values) except KeyError: pass try: self['DATE'] except KeyError: self['DATE'] = Column('DATE') self['DATE'].values = [None] * len(self) try: self['TIME'] except KeyError: self['TIME'] = Column('TIME') self['TIME'].values = [None] * len(self) woce.fuse_datetime(self) self.check_and_replace_parameters()
def australian_navy_ctd(args): """Download and convert Australian Navy CTD data.""" from pydap.client import open_url from libcchdo.thredds import crawl from libcchdo.formats.ctd.zip import exchange as ctdzipex from libcchdo.formats.zip import write as zwrite dfcs = [] cf_param_to_cchdo_param = { 'sea_water_pressure': 'CTDPRS', 'sea_water_temperature': 'CTDTMP', 'sea_water_practical_salinity': 'CTDSAL', } ignored_qc_flags = [ 'time_qc_flag', 'position_qc_flag', ] qc_conventions = { 'Proposed IODE qc scheme March 2012': { 1: 2, # good 2: 5, # not_evaluated_or_unknown 3: 3, # suspect 4: 4, # bad 9: 9, # missing }, } dfc = DataFileCollection() catalog = "http://www.metoc.gov.au/thredds/catalog/RAN_CTD_DATA/catalog.xml" for url in crawl(catalog): df = DataFile() log.info(u'Reading %s', url) dset = open_url(url) vars = dset.keys() for vname in vars: var = dset[vname] attrs = var.attributes if 'standard_name' in attrs: std_name = attrs['standard_name'] if std_name == 'time': df.globals['_DATETIME'] = \ datetime(1950, 1, 1) + timedelta(var[:]) elif std_name == 'latitude': df.globals['LATITUDE'] = var[:] elif std_name == 'longitude': df.globals['LONGITUDE'] = var[:] elif std_name in cf_param_to_cchdo_param: cparam = cf_param_to_cchdo_param[std_name] if '_FillValue' in attrs: fill_value = attrs['_FillValue'] values = [] for x in var[:]: if equal_with_epsilon(x, fill_value): values.append(None) else: values.append(x) else: values = var[:] try: df[cparam].values = values except KeyError: df[cparam] = Column(cparam) df[cparam].values = values elif 'status_flag' in std_name: flagged_param = std_name.replace('status_flag', '').strip() cparam = cf_param_to_cchdo_param[flagged_param] qc_convention = attrs['quality_control_convention'] if qc_convention in qc_conventions: qc_map = qc_conventions[qc_convention] df[cparam].flags_woce = [qc_map[x] for x in var[:]] else: log.debug('unhandled standard_name %s', std_name) elif ('long_name' in attrs and attrs['long_name'] == 'profile identifier'): profile_id = var[:] cruise_id = profile_id / 10**4 profile_id = profile_id - cruise_id * 10**4 df.globals['EXPOCODE'] = str(cruise_id) df.globals['STNNBR'] = str(profile_id) df.globals['CASTNO'] = str(1) elif vname in ignored_qc_flags: df.globals['_' + vname] = var[:] elif (vname.endswith('whole_profile_flag') or vname.endswith('sd_test')): pass else: log.debug('unhandled variable %s', vname) # attach new file to appropriate collection if dfc.files: if dfc.files[0].globals['EXPOCODE'] != df.globals['EXPOCODE']: dfcs.append(dfc) dfc = DataFileCollection() dfc.append(df) with closing(args.output) as out_file: next_id = 0 def get_filename(dfc): try: return '{0}_ct1.zip'.format(dfc.files[0].globals['EXPOCODE']) except IndexError: next_id += 1 return '{0}_ct1.zip'.format(next_id) zwrite(dfcs, out_file, ctdzipex, get_filename)