def test_merge_datafiles_does_not_create_extra_columns(self): """Merge datafiles but don't create extra columns. When merging data files, create columns only if they exist in derivative and were requested to be merged in. Thanks to sescher for finding this. """ df0 = DataFile() df0.create_columns(['CTDPRS', 'CTDOXY']) df0['CTDPRS'].append(1, 2) df0['CTDPRS'].append(2, 2) df0['CTDOXY'].append(40, 2) df0['CTDOXY'].append(41, 3) df1 = DataFile() df1.create_columns(['CTDPRS', 'CTDOXY', 'CTDSAL']) df1['CTDPRS'].append(2, 2) df1['CTDPRS'].append(3, 2) df1['CTDOXY'].append(50, 2) df1['CTDOXY'].append(51, 3) df1['CTDSAL'].append(20, 2) df1['CTDSAL'].append(21, 2) mdf = merge_datafiles(df0, df1, ['CTDPRS'], ['CTDOXY']) with self.assertRaises(KeyError): mdf['CTDSAL']
def test_write_fill_value_decimal_places_follow_column(self): """Fill values should follow the column's data's lead for decimal places. E.g. if the column has data [10.001, 11.123], the normal fill value -999 should be written -999.000. I.e. as many trailing zeros as the data has. If the column has no data in it, default to the old-style C format string for how many decimal places to show. """ with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'CTDOXY' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['CTDOXY'].values = [None, _decimal('243.23')] btlex.write(dfile, buff) result = buff.getvalue().split('\n') # CTDPRS default decplaces is 1 but the data has 4 self.assertEqual('-999.0000', result[4].split(',')[5].lstrip()) # CTDOXY default decplaces is 4 but the data has 2 self.assertEqual('-999.00', result[3].split(',')[6].lstrip())
def test_write_exchange_decimal_places(self): """Decimal places should be kept from the original data.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.globals['LONGITUDE'] = _decimal('0.0000000') dfile.create_columns(['CTDPRS']) dfile['CTDPRS'].values = [_decimal('10.0001'), None] ctdex.write(dfile, buff) result = buff.getvalue().split('\n') # Decimal('0.0000000') is converted to 0E-7 by str. The formatting # has to be done manually. self.assertEqual('0.0000000', result[2].split(' = ')[1].lstrip())
def test_functional_write(self): dfile = DataFile() dfile.create_columns(['CTDPRS', 'CTDOXY']) dfile['CTDPRS'].parameter.display_order = 0 dfile['CTDOXY'].parameter.display_order = 1 dfile['CTDPRS'].values = map(_decimal, ['2.0', '4.0']) dfile['CTDOXY'].values = map(_decimal, ['254.0', '253.1']) dfile['CTDOXY'].flags_woce = [2, 3] with closing(StringIO()) as buff: ctdex.write(dfile, buff) result = buff.getvalue().split('\n') self.assertEqual([u' 2.0', u' 254.0', u'2'], result[4].split(','))
def test_merge_collections(self): """When merging collections, map files, then merge mapped files. """ odfc = DataFileCollection() ddfc = DataFileCollection() df0 = DataFile() df0.globals['EXPOCODE'] = 'a' df0.globals['STNNBR'] = 1 df0.globals['CASTNO'] = 1 df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT']) df0['CTDPRS'].append(1, 2) df0['CTDPRS'].append(2, 2) df0['NITRAT'].append(10, 2) df0['NITRAT'].append(11, 2) df0['NITRIT'].append(10, 2) df0['NITRIT'].append(11, 2) odfc.append(df0) df1 = DataFile() df1.globals['EXPOCODE'] = 'a' df1.globals['STNNBR'] = 1 df1.globals['CASTNO'] = 1 df1.create_columns(['CTDPRS', 'NITRAT', 'NITRIT']) df1['CTDPRS'].append(1, 2) df1['CTDPRS'].append(3, 2) df1['NITRAT'].append(20, 2) df1['NITRAT'].append(21, 2) df1['NITRIT'].append(10, 2) df1['NITRIT'].append(11, 2) ddfc.append(df1) def merger(origin, deriv): return merge_datafiles(origin, deriv, ['CTDPRS'], ['NITRAT', 'NITRIT']) merged_dfc = merge_collections(odfc, ddfc, merger) self.assertEqual(merged_dfc.files[0]['CTDPRS'].values, [1, 2]) self.assertEqual(merged_dfc.files[0]['NITRAT'].values, [20, 11]) self.assertEqual(merged_dfc.files[0]['NITRIT'].values, [10, 11]) lines = [ # df1 has an different CTDPRS record (3) 'Key (3,) does not exist in origin from derivative rows', # NITRIT columns are the same "Instructed to merge parameters that are not different: ['NITRIT']" ] self.assertTrue(self.ensure_lines(lines))
def test_diff_decplaces(self): """Derivative is still different when decimal places are different.""" dfo = DataFile() dfo.create_columns(['CTDPRS', 'CTDOXY']) dfo['CTDPRS'].append(_decimal('1')) dfo['CTDOXY'].append(_decimal('0.140')) dfd = DataFile() dfd.create_columns(['CTDPRS', 'CTDOXY']) dfd['CTDPRS'].append(_decimal('1')) dfd['CTDOXY'].append(_decimal('0.14')) p_different, p_not_in_orig, p_not_in_deriv, p_common = \ different_columns(dfo, dfd, ['CTDPRS']) self.assertEqual(p_different, ['CTDOXY']) dfile = merge_datafiles(dfo, dfd, ['CTDPRS'], ['CTDOXY']) self.assertEqual(decimal_to_str(dfile['CTDOXY'][0]), '0.14')
def test_write_btl_date_time_no_decimals(self): """BTL_DATE and BTL_TIME should not have decimal places.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'BTL_DATE', 'BTL_TIME' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['BTL_DATE'].values = [ _decimal('19700101'), _decimal('19700102') ] dfile['BTL_TIME'].values = [_decimal('0000'), _decimal('1234')] btlex.write(dfile, buff) result = buff.getvalue().split('\n') self.assertEqual('19700101', result[3].split(',')[6].lstrip()) self.assertEqual('1234', result[4].split(',')[7].lstrip())
def test_write_exchange_decimal_places(self): """Decimal places should be kept from the original data.""" with closing(StringIO()) as buff: dfile = DataFile() dfile.create_columns([ 'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'LONGITUDE' ]) dfile['STNNBR'].values = [None, None] dfile['CASTNO'].values = [None, None] dfile['BTLNBR'].values = [None, None] dfile['_DATETIME'].values = [None, None] dfile['CTDPRS'].values = [_decimal('10.0001'), None] dfile['LONGITUDE'].values = [ _decimal('0.0000000'), _decimal('1.000000') ] btlex.write(dfile, buff) result = buff.getvalue().split('\n') # Decimal('0.0000000') is converted to 0E-7 by str. The formatting # has to be done manually. self.assertEqual('0.0000000', result[3].split(',')[5].lstrip())
def test_merge_datafiles_no_column(self): """Error to merge columns in neither datafile.""" df0 = DataFile() df0.create_columns(['CTDPRS', 'NITRAT']) df0['CTDPRS'].append(1, 2) df0['CTDPRS'].append(2, 2) df0['NITRAT'].append(10, 2) df0['NITRAT'].append(11, 2) df1 = DataFile() df1.create_columns(['CTDPRS', 'NITRAT']) df1['CTDPRS'].append(1, 2) df1['CTDPRS'].append(2, 2) df1['NITRAT'].append(20, 3) df1['NITRAT'].append(21, 4) with self.assertRaisesRegexp( ValueError, 'No columns selected to merge are different.'): merge_datafiles(df0, df1, ['CTDPRS'], ['CTDSAL']) lines = [ "Instructed to merge parameters that are not in either datafile: ['CTDSAL']", ] self.assertTrue(self.ensure_lines(lines))
def test_merge_datafiles_flags(self): """It should be possible to only merge flag "columns". This includes updating and adding flags. If adding flags and the original column does not exist, warn and fail. """ df0 = DataFile() df0.create_columns(['CTDPRS', 'NITRAT', 'FLUOR']) df0['CTDPRS'].append(1, 2) df0['CTDPRS'].append(2, 2) df0['CTDPRS'].append(3, 2) df0['NITRAT'].append(10, 2) df0['NITRAT'].append(11, 2) df0['NITRAT'].append(12, 2) df0['FLUOR'].append(100) df0['FLUOR'].append(101) df0['FLUOR'].append(102) df1 = DataFile() df1.create_columns(['CTDPRS', 'NITRAT', 'FLUOR']) df1['CTDPRS'].append(1, 2) df1['CTDPRS'].append(2, 2) df1['CTDPRS'].append(4, 2) df1['NITRAT'].append(20, 3) df1['NITRAT'].append(21, 4) df1['NITRAT'].append(22, 4) df1['FLUOR'].append(200, 2) df1['FLUOR'].append(201, 3) df1['FLUOR'].append(202, 3) mdf = merge_datafiles(df0, df1, ['CTDPRS'], ['NITRAT_FLAG_W', 'FLUOR_FLAG_W']) self.assertEqual(mdf['NITRAT'].values, [10, 11, 12]) self.assertEqual(mdf['NITRAT'].flags_woce, [3, 4, 2]) self.assertEqual(mdf['FLUOR'].values, [100, 101, 102]) self.assertEqual(mdf['FLUOR'].flags_woce, [2, 3, 9])
def test_merge_datafiles(self): """Merge datafiles. When merging data files, there are two cases to consider: Case 1: Adding new column If the derivative file has less records, fill in missing records with fill values and missing flags. Case 2: Updating column data It should also be possible to specifically only merge flags. Make sure if only merging flags to not merge the data. Parameter units should be updated from the derivative. """ df0 = DataFile() df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT', 'CTDOXY']) df0['CTDPRS'].append(1, 2) df0['CTDPRS'].append(2, 2) df0['NITRAT'].append(10, 2) df0['NITRAT'].append(11, 2) df0['NITRIT'].append(30, 5) df0['NITRIT'].append(31, 6) df0['CTDOXY'].append(40, 2) df0['CTDOXY'].append(41, 3) df1 = DataFile() df1.create_columns(['CTDPRS', 'NITRAT', 'CTDSAL', 'CTDOXY']) df1['CTDPRS'].append(2, 2) df1['CTDPRS'].append(3, 2) df1['CTDSAL'].append(20, 2) df1['CTDSAL'].append(21, 2) df1['NITRAT'].append(12, 4) df1['NITRAT'].append(13, 4) df1['CTDOXY'].append(40, 2) df1['CTDOXY'].append(41, 3) df1['CTDOXY'].parameter.units = Unit('UMOL/KG') # Case 1 column add mdf = merge_datafiles( df0, df1, ['CTDPRS'], ['NITRAT', 'NITRAT_FLAG_W', 'CTDSAL', 'CTDSAL_FLAG_W', 'CTDOXY']) self.assertEqual(mdf['CTDPRS'].values, [1, 2]) # Make sure missing values and flags are filled in. self.assertEqual(mdf['CTDSAL'].values, [None, 20]) self.assertEqual(mdf['CTDSAL'].flags_woce, [9, 2]) # Case 2 data upate self.assertEqual(mdf['NITRAT'].values, [10, 12]) self.assertEqual(mdf['NITRAT'].flags_woce, [2, 4]) # Columns in origin should be kept self.assertEqual(mdf['NITRIT'].values, [30, 31]) self.assertEqual(mdf['NITRIT'].flags_woce, [5, 6]) # Units should be overwritten for merged columns self.assertEqual(mdf['CTDOXY'].parameter.units, df1['CTDOXY'].parameter.units) # Make sure warning is printed regarding unit overwrite. # This doubles to make sure derivate columns do not wholesale overwrite # the origin column, they must be merged using the row match algo. lines = [ "Changed units for CTDOXY from '' to 'UMOL/KG'", ] self.assertTrue(self.ensure_lines(lines))
def test_different_columns(self): """Columns between two datafiles differ under a wide variety of cases. Case 1: Column values are different Case 1 corollary: Flag values are different Case 2: Units are different Case 3: Column not in original Case 4: Column not in derivative """ with TemporaryFile() as origin, TemporaryFile() as deriv: origin.write("""\ BOTTLE,19700101CCHSIOYYY # header 1 EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,NITRAT_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W ,,,,,,,,,,,METERS,UMOL/KG,,UMOL/KG,/MILLE, 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,2,10.0,-999.000,9 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,2,10.0,-999.000,9 END_DATA """) origin.flush() origin.seek(0) deriv.write("""\ BOTTLE,19700101CCHSIOYYY # header 2 EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,TDN_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W ,,,,,,,,,,,METERS,UMOL/KG,,NMOL/KG,/MILLE,,, 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,3,10.0,-999.000,1,-999.0,9 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,3,10.0, 10.000,9,-999.0,9 END_DATA """) deriv.flush() deriv.seek(0) dforigin = DataFile() dfderiv = DataFile() btlex.read(dforigin, origin) btlex.read(dfderiv, deriv) self.assertEqual( # NITRIT comes after because NMOL/KG is not an expected unit and # gets pushed to the end when sorting ( ['DELC14', 'DELC14_FLAG_W', 'NITRIT'], # PH_SWS_FLAG_W has underscores inside the parameter name. All # parts need to be included ['PH_SWS', 'PH_SWS_FLAG_W', 'TDN', 'TDN_FLAG_W'], ['NITRAT', 'NITRAT_FLAG_W'], [ 'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR', 'BTLNBR_FLAG_W', 'LATITUDE', 'LONGITUDE', 'DEPTH', '_DATETIME' ]), different_columns(dforigin, dfderiv, ( 'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR', ))) lines = [ "DELC14 differs at origin row 1:\t(None, Decimal('10.000'))", "DELC14_FLAG_W differs at origin row 0:\t(9, 1)", ] self.assertTrue(self.ensure_lines(lines)) # Columns are not different if merged results are not different. dfo = DataFile() dfd = DataFile() dfo.create_columns(['CTDPRS', 'CTDOXY']) dfo.check_and_replace_parameters() dfd.create_columns(['CTDPRS', 'CTDOXY']) dfd.check_and_replace_parameters() dfo['CTDPRS'].values = [1, 2, 3] dfo['CTDOXY'].values = [10, 20, 30] dfd['CTDPRS'].values = [3, 2, 1] dfd['CTDOXY'].values = [30, 20, 10] self.assertEqual(([], [], [], ['CTDPRS', 'CTDOXY']), different_columns(dfo, dfd, ('CTDPRS', )))
class TestDataFile(TestCase): def setUp(self): self.file = DataFile() self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE') def tearDown(self): self.file = None def test_init(self): self.assertEqual(len(self.file.columns), 1) self.assertEqual(self.file.footer, None) self.assertEqual(self.file.globals, {'stamp': '', 'header': ''}) def test_expocodes(self): self.c.append('A') self.assertEqual(['A'], self.file.expocodes()) self.c.append('B') self.assertEqual(['A', 'B'], self.file.expocodes()) self.c.append('A') self.assertEqual( ['A', 'B'], self.file.expocodes()) # Expocodes returns unique expocodes. def test_len(self): c = self.file.columns['EXPOCODE'] del self.file.columns['EXPOCODE'] self.assertEqual(len(self.file), 0) self.file.columns['EXPOCODE'] = c self.assertEqual(len(self.file), 0) self.c.append('A') self.assertEqual(len(self.file), 1) self.c.append('A') self.assertEqual(len(self.file), 2) def test_sorted_columns(self): self.file.columns['CASTNO'] = Column('CASTNO') self.file.columns['STNNBR'] = Column('STNNBR') expected = ['EXPOCODE', 'STNNBR', 'CASTNO'] received = map(lambda c: c.parameter.mnemonic_woce(), self.file.sorted_columns()) # If lengths are equal and all expected in received, then assume equal self.assertEqual(len(expected), len(received)) self.assertTrue(all([x in received for x in expected])) def test_get_property_for_columns(self): pass # This is tested by the following tests. def test_column_headers(self): self.assertEqual(['EXPOCODE'], self.file.column_headers()) self.file.columns['STNNBR'] = Column('STNNBR') expected = ['EXPOCODE', 'STNNBR'] received = self.file.column_headers() # If lengths are equal and all expected in received, then assume equal self.assertEqual(len(expected), len(received)) self.assertTrue(all([x in received for x in expected])) def test_formats(self): self.file.columns['CTDOXY'] = Column('CTDOXY') self.file.check_and_replace_parameters() # Order of columns may be wrong self.assertEqual(['%11s', '%9.4f'], self.file.formats()) def test_to_dict(self): self.file.to_dict() pass # TODO def test_str(self): str(self.file) def test_create_columns(self): parameters = ['CTDOXY'] units = ['UMOL/KG'] self.file.create_columns(parameters, units) def test_column_append(self): self.assertEqual(self.c.values, []) self.c.set(2, 'test') self.assertEqual(self.c.values, [None, None, 'test']) self.assertEqual(self.c.flags_woce, []) self.c.append('test2', 'flag2') self.assertEqual(self.c.values, [None, None, 'test', 'test2']) self.assertEqual(self.c.flags_woce, [None, None, None, 'flag2']) def test_calculate_depths(self): self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH') self.assertEqual(('actual', []), self.file.calculate_depths()) del self.file['_ACTUAL_DEPTH'] self.file.globals['LATITUDE'] = 0 self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP']) self.assertEqual(('unesco1983', []), self.file.calculate_depths()) self.file['CTDPRS'].values = [1] self.file['CTDSAL'].values = [1] self.file['CTDTMP'].values = [1] self.assertEqual( ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]), self.file.calculate_depths()) def test_check_and_replace_parameter_contrived(self): """Contrived parameters are not checked.""" col = Column('_DATETIME') col.check_and_replace_parameter(self.file, convert=False)
def read(self, handle, metadata=None): """How to read a Bottle Bermuda Atlantic Time-Series Study file. This function reads bats_bottle.txt. Arguments: self - (special case, see NOTE) dictionary metadata - (optional) BATS cruise metadata to be used to find port dates NOTE: The result for this method is a special case. The bottle file format contains the entire BATS holdings while the internal data format splits data up by cruises. Because cruises for timeseries are split by file for cruise, the end result is a dictionary with cruise_ids as keys to DatafileCollections (cruises) containing Datafiles (casts). """ sections = _read_header_sections(self, handle) _read_variables(self, handle) parameters = _get_variables(self, handle, sections) # Add DON for note in Variables list stating DON is reported for TON prior # to BATS 121 parameters.append(['DON', None, 'umol/kg']) manual_parameters = [ ['BTLNBR', ''], ['_DATETIME', ''], ['LATITUDE', ''], ['LONGITUDE', ''], ['_ACTUAL_DEPTH', 'METERS'], ] columns = [x[0] for x in manual_parameters] units = [x[1] for x in manual_parameters] s = None for i, (var, d, u) in enumerate(parameters): if var == 'Depth': s = i + 1 continue # Only want to add parameters after Depth. The others were done manually. if s is None: continue try: var = bats_to_param[var] except KeyError: pass columns.append(var) units.append(u) template_df = DataFile() template_df.create_columns(columns, units) template_df.check_and_replace_parameters(convert=False) for sec, lines in sections.items(): if sec == 'Variables list': continue if sec != 'Comments': continue template_df.globals['_{0}'.format(sec)] = '\n'.join(lines) df = None params_auto = parameters[s:] dfi = 0 for i, l in enumerate(handle): parts = l.split() id = parts[0] (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id, nisk_id) = _parse_bats_id(id) ship = _ship_from_cruise_num(cruise_num) if not ship: ship = 'R/V Atlantic Explorer' if (df is None or df.globals['_OS_ID'] != cruise_id or df.globals['STNNBR'] != cruise_type or df.globals['CASTNO'] != cast_id): if df is not None: # Done reading one cast. Finalize it. log.info(u'finalizing cast {0} {1} {2}'.format( df.globals['_OS_ID'], df.globals['STNNBR'], df.globals['CASTNO'])) try: meta = metadata[cruise_id] port_date = meta['dates'][0] except (TypeError, KeyError): port_date = None if not port_date: port_date = min(df['_DATETIME']) df.globals['EXPOCODE'] = create_expocode( ship_code(ship, raise_on_unknown=False), port_date) log.info(df.globals['EXPOCODE']) df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH']) collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE']) # Normalize all the parameter column lengths. There may be # columns that did not get data written to them so make sure # they are just as long as the rest length = len(df) for c in df.columns.values(): c.set_length(length) try: dfc = self[df.globals['_OS_ID']] except KeyError: dfc = self[df.globals['_OS_ID']] = DataFileCollection() dfc.files.append(df) dfi = 0 # Create a new cast df = copy(template_df) df.globals['SECT_ID'] = BATS_SECT_ID df.globals['_SHIP'] = ship df.globals['_OS_ID'] = cruise_id df.globals['STNNBR'] = cruise_type df.globals['CASTNO'] = cast_id df['BTLNBR'].set(dfi, nisk_id) dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M') dt_deci = bats_time_to_dt(parts[2]) #if dt_ascii != dt_deci: # log.warn( # u'Dates differ on data row {0}: {5} {1!r}={2} ' # '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2], # dt_deci, dt_deci - dt_ascii)) df['_DATETIME'].set(dfi, dt_ascii) df['LATITUDE'].set(dfi, Decimal(parts[4])) df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5]))) df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6])) parts_auto = parts[s:] for p, v in zip(params_auto, parts_auto): param = p[0] try: param = bats_to_param[param] except KeyError: pass if cruise_num < 121 and param == 'TON': param = 'DON' if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9) or equal_with_epsilon(v, -9.99)): df[param].set_check_range(dfi, None) # TODO determine whether -10 is just bad formatting for -9.9 elif equal_with_epsilon(v, -10): #log.warn(u'Possible missing data value {0}'.format(v)) df[param].set_check_range(dfi, None) elif v == 0: log.warn(u'Data under detection limit, set flag to ' 'WOCE water sample questionable measurement') df[param].set_check_range(dfi, None, flag=3) else: df[param].set_check_range(dfi, Decimal(v)) dfi += 1 # Since this is a super long file that contains multiple cruises and # casts, as the file is processed it is split apart into a list of # DataFileCollection(s) containing DataFile objects for each casts if i % 100 == 0: log.info(u'processed {0} lines'.format(i))
def read(dfc, fileobj, cfg): """Read generic HRP matlab file.""" mat, hrp = load_mat_hrp(fileobj) data = hrp_data_as_dict(hrp) coords = zip(data['lon'][0], data['lat'][0]) del data['lat'] del data['lon'] for key in data.keys(): log.info(u'parameter shape: {0} {1}'.format(key, data[key].shape)) param_map = cfg["parameter_mapping"] for param in data.keys(): if param not in param_map: del data[param] else: new_key = param_map[param] if new_key != param: data[new_key] = data[param] del data[param] for coord in coords: dfile = DataFile() dfc.append(dfile) dfile.globals['LONGITUDE'] = _decimal(coord[0]) dfile.globals['LATITUDE'] = _decimal(coord[1]) # create the columns after extraneous keys have been deleted dfile.create_columns(data.keys()) for dep, dfile in enumerate(dfc): dfile.globals['STNNBR'] = dep + 1 ref_range = ndarray_data_slice(data['PRESSURE'][:, dep]) for param, pdata in data.items(): col = dfile[param] data_col = pdata[:, dep] drange = ndarray_data_slice(data_col) if ref_range is None: ref_range = drange determiner = param elif drange != ref_range: if drange[0] == drange[1]: log.info(u'No data for {0}. Skip.'.format(param)) continue if not is_data_range_inside(drange, ref_range): log.error(u'{0} has data range {1} outside {2}. ' 'Skip.'.format(param, drange, ref_range)) continue col.values = map(_decimal, list(data_col[ref_range[0]:ref_range[1]])) # Act as if all files had QC and assign it to OceanSITES 1. Assuming # that someone has already gone through level 0 data and we are # receiving level 1 or higher. We can set all flags to 2. col.flags_woce = [9 if isnan(val) else 2 for val in col.values] # Somehow, HRP matlab data can have nans in the coordinate arrays. We can't # recalculate depth from that or make other assumptions so we can only # delete them. for iii, dfile in reversed(list(enumerate(dfc))): if (isnan(dfile.globals['LATITUDE']) or isnan(dfile.globals['LONGITUDE'])): log.warn(u'Unable to determine coordinate for matlab row ' '{0}. Discarding.'.format(iii)) dfc.files.remove(dfile)