def write(self, handle): """ How to write a CTD Exchange file. """ pre_write(self) write_identifier(self, handle, 'CTD') if self.globals['header']: handle.write(self.globals['header'].encode('utf8')) # Collect headers headers = OrderedDict() headers['NUMBER_HEADERS'] = 1 woce.split_datetime(self) for key in REQUIRED_HEADERS: try: headers[key] = self.globals[key] except KeyError: log.error('Missing required header %s' % key) keys_less_required = OrderedSet(self.globals.keys()) - \ set(['stamp', 'header']) - \ set(REQUIRED_HEADERS) for key in keys_less_required: headers[key] = self.globals[key] headers['NUMBER_HEADERS'] = len(headers) woce.fuse_datetime(self) # Write headers for key in headers: handle.write(u'{key} = {val}\n'.format(key=key, val=decimal_to_str( headers[key]))) write_data(self, handle)
def write(self, handle): """ How to write a Bottle Exchange file. """ write_identifier(self, handle, 'BOTTLE') if self.globals['header']: handle.write('# Original header:\n') handle.write(self.globals['header'].encode('utf8')) woce.split_datetime(self) # Convert all float stnnbr, castno, sampno, btlnbr to ints def if_float_then_int(x): if type(x) is float: return int(x) return x def convert_column_floats_to_ints(dfile, param, required=True): try: column = dfile[param] column.values = [if_float_then_int(vvv) for vvv in column.values] except KeyError: if required: log.warn(u'Missing {0} column'.format(param)) else: log.warn(u'Missing optional {0} column'.format(param)) convert_column_floats_to_ints(self, 'STNNBR') convert_column_floats_to_ints(self, 'CASTNO') convert_column_floats_to_ints(self, 'SAMPNO', required=False) convert_column_floats_to_ints(self, 'BTLNBR') self.check_and_replace_parameters() write_data(self, handle) woce.fuse_datetime(self)
def write(self, handle): '''How to write a Summary file for WOCE.''' woce.split_datetime(self) ship = self.globals.get('_SHIP', None) or '__SHIP__' leg = self.globals.get('_LEG', None) or '__LEG__' uniq_sects = uniquify(self['SECT_ID'].values) handle.write('%s LEG %s WHP-ID %s %s\n' % (ship, leg, ','.join(uniq_sects), config.stamp())) header_one = 'SHIP/CRS WOCE CAST UTC POSITION UNC COR ABOVE WIRE MAX NO. OF\n' header_two = 'EXPOCODE SECT STNNBR CASTNO TYPE DATE TIME CODE LATITUDE LONGITUDE NAV DEPTH DEPTH BOTTOM OUT PRESS BOTTLES PARAMETERS COMMENTS \n' header_sep = ('-' * (len(header_two) - 1)) + '\n' handle.write(header_one) handle.write(header_two) handle.write(header_sep) for i in range(0, len(self)): exdate = self.columns['DATE'][i] date_str = exdate[4:6] + exdate[6:8] + exdate[2:4] row = '%-14s %-5s %5s ' % (self['EXPOCODE'][i], self['SECT_ID'][i], self['STNNBR'][i]) row += '%3d %3s %-6s %04s ' % (self['CASTNO'][i], self['_CAST_TYPE'][i], date_str, self['TIME'][i]) row += '%2s %-10s %-11s %3s %5d ' % ( self['_CODE'][i], woce.dec_lat_to_woce_lat(self['LATITUDE'][i]), woce.dec_lng_to_woce_lng( self['LONGITUDE'][i]), self['_NAV'][i], self['DEPTH'][i]) row += '%-6d ' % self['_ABOVE_BOTTOM'][i] row += '%5d %7d %-15s %-20s' % ( self['_MAX_PRESSURE'][i], self['_NUM_BOTTLES'][i], self['_PARAMETERS'][i], self['_COMMENTS'][i]) handle.write(row + '\n') woce.fuse_datetime(self)
def read(self, handle, retain_order=False, header_only=False): """How to read a CTD Exchange file. header_only - only read the CTD headers, not the data """ read_identifier_line(self, handle, 'CTD') l = read_comments(self, handle) # Read NUMBER_HEADERS num_headers = re_compile('NUMBER_HEADERS\s*=\s*(\d+)') m = num_headers.match(l) if m: # NUMBER_HEADERS counts itself as a header num_headers = int(m.group(1)) - 1 else: raise ValueError( u'Expected NUMBER_HEADERS as the second non-comment line.') header = re_compile('(\w+)\s*=\s*(-?[\w\.]*)') for i in range(0, num_headers): m = header.match(handle.readline()) if m: if m.group(1) in REQUIRED_HEADERS and m.group(1) in [ 'LATITUDE', 'LONGITUDE' ]: self.globals[m.group(1)] = Decimal(m.group(2)) else: self.globals[m.group(1)] = m.group(2) else: raise ValueError(('Expected %d continuous headers ' 'but only saw %d') % (num_headers, i)) woce.fuse_datetime(self) if header_only: return # Read parameters and units columns = handle.readline().strip().split(',') units = handle.readline().strip().split(',') # Check columns and units to match length if len(columns) is not len(units): raise ValueError(("Expected as many columns as units in file. " "Found %d columns and %d units.") % \ (len(columns), len(units))) # Check all parameters are non-trivial if not all(columns): log.warn( ("Stripped blank parameter from MALFORMED EXCHANGE FILE\n" "This may be caused by an extra comma at the end of a line.")) columns = filter(None, columns) self.create_columns(columns, units, retain_order) read_data(self, handle, columns) self.check_and_replace_parameters()
def setUp(self): self._infile = open( os.path.join( os.path.dirname(__file__), 'samples/nc_hyd/i08s_33RR20070204_00001_00001_hy1.nc'), 'r') self.datafile = DataFile() self._outfile = NamedTemporaryFile() g = self.datafile.globals g['DATE'] = '12341231' g['TIME'] = '2359' g['LATITUDE'] = 90 g['LONGITUDE'] = 180 g['DEPTH'] = -1 g['EXPOCODE'] = 'test' g['STNNBR'] = '20' g['CASTNO'] = '5' g['_OS_ID'] = 'OS1' fuse_datetime(self.datafile)
def read(self, handle): """How to read a Summary file for HOT.""" header = True header_delimiter = re.compile('^-+$') for line in handle: if header: if header_delimiter.match(line): header = False else: self.globals['header'] += line else: # TODO Reimplement by finding ASCII column edges in header and # reading that way. # Spacing is unreliable. tokens = line.split() if len(tokens) is 0: continue self.columns['EXPOCODE'].append(tokens[0].replace('/', '_')) self.columns['SECT_ID'].append(tokens[1]) self.columns['STNNBR'].append(tokens[2]) self.columns['CASTNO'].append(int_or_none(tokens[3])) self.columns['_CAST_TYPE'].append(tokens[4]) date = datetime.strptime(tokens[5], '%m%d%y') self.columns['DATE'].append( "%4d%02d%02d" % (date.year, date.month, date.day)) self.columns['TIME'].append(int_or_none(tokens[6])) self.columns['_CODE'].append(tokens[7]) lat = woce.woce_lat_to_dec_lat(tokens[8:11]) self.columns['LATITUDE'].append(lat) lng = woce.woce_lng_to_dec_lng(tokens[11:14]) self.columns['LONGITUDE'].append(lng) self.columns['_NAV'].append(tokens[14]) self.columns['DEPTH'].append(int_or_none(tokens[15])) self.columns['_ABOVE_BOTTOM'].append(int_or_none(tokens[16])) self.columns['_MAX_PRESSURE'].append(int_or_none(tokens[17])) self.columns['_NUM_BOTTLES'].append(int_or_none(tokens[18])) if len(tokens) > 19: self.columns['_PARAMETERS'].append(tokens[19]) if len(tokens) > 20: self.columns['_COMMENTS'].append(' '.join(tokens[20:])) woce.fuse_datetime(self) self.check_and_replace_parameters()
def _read_oliver_sun(dfc, fileobj, cfg): """Read HRP2 format from Oliver Sun.""" mat = loadmat(fileobj) filekey = mat.keys()[0] casts = mat[filekey][0] for cast in casts: dfile = DataFile() dfc.append(dfile) dfile.globals['EXPOCODE'] = cfg['expocode'] # TODO dfile.globals['DEPTH'] = 0 for key, item in zip(cast.dtype.names, cast): if item.shape == (1, 1): key = cfg['global_mapping'].get(key, None) if key: dfile.globals[key] = item[0, 0] else: try: dfile[key] = Column(key) dfile[key].values = list(item.flatten()) # Act as if all files had QC and assign it to OceanSITES 1. # Assuming that someone has already gone through level 0 # data and we are receiving level 1 or higher. dfile[key].flags_woce = [2] * len(dfile[key].values) except KeyError: pass try: dfile.globals['STNNBR'] except KeyError: dfile.globals['STNNBR'] = '999' woce.fuse_datetime(dfile)
def read(self, handle): """ How to read a Bottle Exchange file. """ read_identifier_line(self, handle, 'BOTTLE') l = read_comments(self, handle) # Read columns and units columns = [x.strip() for x in l.strip().split(',')] units = [x.strip() for x in handle.readline().strip().split(',')] # Check columns and units to match length if len(columns) != len(units): raise ValueError( ("Expected as many columns as units in file. " "Found %d columns and %d units.") % (len(columns), len(units))) # Check for unique identifer identifier = [] if 'EXPOCODE' in columns and \ 'STNNBR' in columns and \ 'CASTNO' in columns: identifier = ['STNNBR', 'CASTNO'] if 'SAMPNO' in columns: identifier.append('SAMPNO') if 'BTLNBR' in columns: identifier.append('BTLNBR') elif 'BTLNBR' in columns: identifier.append('BTLNBR') else: raise ValueError(("No unique identifer found for file. " "(STNNBR,CASTNO,SAMPNO,BTLNBR)," "(STNNBR,CASTNO,SAMPNO)," "(STNNBR,CASTNO,BTLNBR)")) self.create_columns(columns, units) read_data(self, handle, columns) # Format all data to be what it is try: self['EXPOCODE'].values = map(str, self['EXPOCODE'].values) except KeyError: pass try: self['LATITUDE'].values = map(_decimal, self['LATITUDE'].values) except KeyError: pass try: self['LONGITUDE'].values = map(_decimal, self['LONGITUDE'].values) except KeyError: pass try: self['DATE'] except KeyError: self['DATE'] = Column('DATE') self['DATE'].values = [None] * len(self) try: self['TIME'] except KeyError: self['TIME'] = Column('TIME') self['TIME'].values = [None] * len(self) woce.fuse_datetime(self) self.check_and_replace_parameters()
def read(dfile, fileobj, data_type=None): """Read a French CSV file. data_type (optional) if given, must be 'bottle' or 'ctd'. This changes the columns that are created (Adds BTLNBR for bottle data). NOTE: French CSV used for CTD contains all the CTD casts in one file. Split them into a DataFileCollection. """ assert data_type is None or data_type in ['bottle', 'ctd'] reader = csv_reader(fileobj, dialect=FrCSVDialect()) # Read header line that contains parameters and units. Convert them to WOCE. r_param = re_compile('(.*)\s\[(.*)\]') params = [] units = [] header = reader.next() for param in header: matches = r_param.match(param) unit = None if matches: param = matches.group(1) unit = matches.group(2) elif param == 'Flag': param = params[-1] + FLAG_F try: param = frparam_to_param[param] except KeyError: pass params.append(param) try: unit = frunit_to_unit[unit] except KeyError: pass units.append(unit) non_flag_paramunits = [] for paramunit in zip(params, units): if paramunit[0].endswith(FLAG_F): continue non_flag_paramunits.append(paramunit) # Create all the columns. dfile.create_columns(*zip(*non_flag_paramunits)) columns_id = ['EXPOCODE', 'STNNBR', 'CASTNO'] col_exp, col_stn, col_cast = dfile.create_columns(columns_id) if data_type == 'bottle': (col_btln,) = dfile.create_columns(['BTLNBR']) dfile.check_and_replace_parameters() # Read data. Flag columns follow immediately after data columns. flags = set() flag_values = {} for rowi, row in enumerate(reader): for param, value in zip(params, row): if param == 'LATITUDE': lattoks = value[1:].split() + [value[0]] value = woce_lat_to_dec_lat(lattoks) elif param == 'LONGITUDE': lngtoks = value[1:].split() + [value[0]] value = woce_lng_to_dec_lng(lngtoks) if param.endswith(FLAG_F): param = param[:-len(FLAG_F)] col = dfile[param] if value not in flags: flag_values[value] = [param, rowi, col.values[rowi]] flags.add(value) if value == '': value = 9 try: value = int(value) value = frflag_to_woce_flag[value] except (ValueError, KeyError): value = 9 col.set(rowi, col.get(rowi), flag_woce=value) else: col = dfile[param] if value == '' or value is None: col.set(rowi, None) else: col.set(rowi, _decimal(value)) fuse_datetime(dfile) # French CSV does not include cast identifying information. Generate that # by watching for coordinate changes. # While looping through and finding station changes, also populate the # bottom depth column from the _DEPTH column by estimating it as the bottom # most depth. dfile.create_columns(['DEPTH']) last_coord = None last_dt = None last_depths = [] stnnbr = 0 castno = 0 btlnbr = 1 col_lat = dfile['LATITUDE'] col_lng = dfile['LONGITUDE'] col_dt = dfile['_DATETIME'] col_bot = dfile['DEPTH'] try: col_depth = dfile['_DEPTH'] except KeyError: method, col_depth = dfile.calculate_depths(col_lat[rowi]) col_depth = [xxx.to_integral_value() if xxx else xxx for xxx in col_depth] for rowi in range(len(dfile)): coord = (col_lat[rowi], col_lng[rowi]) # location changed => station change if last_coord != coord: stnnbr += 1 castno = 0 btlnbr = 1 last_coord = coord # time changed => cast changed dtime = col_dt[rowi] if last_dt != dtime: castno += 1 btlnbr = 1 if last_depths: col_bot.set_length(rowi, max(last_depths)) last_depths = [] else: # normal measurement row btlnbr += 1 last_dt = dtime col_exp.set(rowi, '') col_stn.set(rowi, stnnbr) col_cast.set(rowi, castno) last_depths.append(col_depth[rowi]) if data_type == 'bottle': col_btln.set(rowi, btlnbr) col_bot.set_length(len(dfile), col_depth[len(dfile) - 1]) try: del dfile['_DEPTH'] except KeyError: pass
except IndexError: lng = None self['LONGITUDE'].append(lng) try: self['_NAV'].append(tokens[10]) self['DEPTH'].append(int_or_none(tokens[11])) self['_ABOVE_BOTTOM'].append(int_or_none(tokens[12])) self['_WIRE_OUT'].append(int_or_none(tokens[13])) self['_MAX_PRESSURE'].append(int_or_none(tokens[14])) self['_NUM_BOTTLES'].append(int_or_none(tokens[15])) self['_PARAMETERS'].append(identity_or_none(tokens[16])) self['_COMMENTS'].append(identity_or_none(tokens[17])) except IndexError: pass woce.fuse_datetime(self) self.check_and_replace_parameters() def write(self, handle): '''How to write a Summary file for WOCE.''' woce.split_datetime(self) ship = self.globals.get('_SHIP', None) or '__SHIP__' leg = self.globals.get('_LEG', None) or '__LEG__' uniq_sects = uniquify(self['SECT_ID'].values) handle.write('%s LEG %s WHP-ID %s %s\n' % (ship, leg, ','.join(uniq_sects), config.stamp())) header_one = 'SHIP/CRS WOCE CAST UTC POSITION UNC COR ABOVE WIRE MAX NO. OF\n' header_two = 'EXPOCODE SECT STNNBR CASTNO TYPE DATE TIME CODE LATITUDE LONGITUDE NAV DEPTH DEPTH BOTTOM OUT PRESS BOTTLES PARAMETERS COMMENTS \n' header_sep = ('-' * (len(header_two) - 1)) + '\n' handle.write(header_one)