def test_read_err_flag_col_no_data_col(self): with closing(StringIO()) as fff: dfile = DataFile() exchange.read_data(dfile, fff, ['CTDSAL_FLAG_W']) lines = [ "Flag column CTDSAL_FLAG_W exists without parameter column CTDSAL", ] self.assertTrue(self.ensure_lines(lines))
def read(self, handle, retain_order=False, header_only=False): """How to read a CTD Exchange file. header_only - only read the CTD headers, not the data """ read_identifier_line(self, handle, 'CTD') l = read_comments(self, handle) # Read NUMBER_HEADERS num_headers = re_compile('NUMBER_HEADERS\s*=\s*(\d+)') m = num_headers.match(l) if m: # NUMBER_HEADERS counts itself as a header num_headers = int(m.group(1)) - 1 else: raise ValueError( u'Expected NUMBER_HEADERS as the second non-comment line.') header = re_compile('(\w+)\s*=\s*(-?[\w\.]*)') for i in range(0, num_headers): m = header.match(handle.readline()) if m: if m.group(1) in REQUIRED_HEADERS and m.group(1) in [ 'LATITUDE', 'LONGITUDE' ]: self.globals[m.group(1)] = Decimal(m.group(2)) else: self.globals[m.group(1)] = m.group(2) else: raise ValueError(('Expected %d continuous headers ' 'but only saw %d') % (num_headers, i)) woce.fuse_datetime(self) if header_only: return # Read parameters and units columns = handle.readline().strip().split(',') units = handle.readline().strip().split(',') # Check columns and units to match length if len(columns) is not len(units): raise ValueError(("Expected as many columns as units in file. " "Found %d columns and %d units.") % \ (len(columns), len(units))) # Check all parameters are non-trivial if not all(columns): log.warn( ("Stripped blank parameter from MALFORMED EXCHANGE FILE\n" "This may be caused by an extra comma at the end of a line.")) columns = filter(None, columns) self.create_columns(columns, units, retain_order) read_data(self, handle, columns) self.check_and_replace_parameters()
def test_read_warn_bad_flag(self): with closing(StringIO()) as fff: fff.name = 'testfile' fff.write('123,a\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['CTDSAL'] = Column('CTDSAL') exchange.read_data(dfile, fff, ['CTDSAL', 'CTDSAL_FLAG_W']) lines = [ "Bad WOCE flag 'a' for CTDSAL on data row 0", ] self.assertTrue(self.ensure_lines(lines))
def test_read_data_btlnbr_as_string(self): with closing(StringIO()) as fff: fff.write('BTLNBR\n') fff.write('\n') fff.write('12\n') fff.flush() fff.seek(0) dfile = DataFile() columns = ['BTLNBR'] exchange.read_data(dfile, fff, columns) self.assertTrue(isinstance(dfile['BTLNBR'].values[0], basestring))
def test_read_btlnbr_as_string(self): with closing(StringIO()) as fff: fff.write('SIO1,33.24\n') fff.write('01,32.10\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['BTLNBR'] = Column('BTLNBR') dfile['CTDSAL'] = Column('CTDSAL') exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL']) self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01']) self.assertEqual( dfile['CTDSAL'].values, [Decimal('33.24'), Decimal('32.10')])
def test_read_unknown_parameter_fillvalue(self): """Reading data for a parameter with unknown format should still check for out of band. """ with closing(StringIO()) as fff: fff.name = 'testfile' fff.write('-999,9,1,012\n') fff.write('11,2,-999,123\n') fff.flush() fff.seek(0) dfile = DataFile() dfile['CTDPRS'] = Column('CTDPRS') dfile['UNKPARAM'] = Column('UNKPARAM') dfile['BTLNBR'] = Column('BTLNBR') exchange.read_data( dfile, fff, ['CTDPRS', 'CTDPRS_FLAG_W', 'UNKPARAM', 'BTLNBR']) self.assertEqual(None, dfile['CTDPRS'].values[0]) self.assertEqual('012', dfile['BTLNBR'].values[0]) self.assertEqual('123', dfile['BTLNBR'].values[1]) self.assertEqual(None, dfile['UNKPARAM'].values[1])
def read(self, handle): """ How to read a Bottle Exchange file. """ read_identifier_line(self, handle, 'BOTTLE') l = read_comments(self, handle) # Read columns and units columns = [x.strip() for x in l.strip().split(',')] units = [x.strip() for x in handle.readline().strip().split(',')] # Check columns and units to match length if len(columns) != len(units): raise ValueError( ("Expected as many columns as units in file. " "Found %d columns and %d units.") % (len(columns), len(units))) # Check for unique identifer identifier = [] if 'EXPOCODE' in columns and \ 'STNNBR' in columns and \ 'CASTNO' in columns: identifier = ['STNNBR', 'CASTNO'] if 'SAMPNO' in columns: identifier.append('SAMPNO') if 'BTLNBR' in columns: identifier.append('BTLNBR') elif 'BTLNBR' in columns: identifier.append('BTLNBR') else: raise ValueError(("No unique identifer found for file. " "(STNNBR,CASTNO,SAMPNO,BTLNBR)," "(STNNBR,CASTNO,SAMPNO)," "(STNNBR,CASTNO,BTLNBR)")) self.create_columns(columns, units) read_data(self, handle, columns) # Format all data to be what it is try: self['EXPOCODE'].values = map(str, self['EXPOCODE'].values) except KeyError: pass try: self['LATITUDE'].values = map(_decimal, self['LATITUDE'].values) except KeyError: pass try: self['LONGITUDE'].values = map(_decimal, self['LONGITUDE'].values) except KeyError: pass try: self['DATE'] except KeyError: self['DATE'] = Column('DATE') self['DATE'].values = [None] * len(self) try: self['TIME'] except KeyError: self['TIME'] = Column('TIME') self['TIME'].values = [None] * len(self) woce.fuse_datetime(self) self.check_and_replace_parameters()