示例#1
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles_does_not_create_extra_columns(self):
        """Merge datafiles but don't create extra columns.

        When merging data files, create columns only if they exist in derivative
        and were requested to be merged in.

        Thanks to sescher for finding this.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'CTDOXY'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['CTDOXY'].append(40, 2)
        df0['CTDOXY'].append(41, 3)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'CTDOXY', 'CTDSAL'])
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(3, 2)
        df1['CTDOXY'].append(50, 2)
        df1['CTDOXY'].append(51, 3)
        df1['CTDSAL'].append(20, 2)
        df1['CTDSAL'].append(21, 2)

        mdf = merge_datafiles(df0, df1, ['CTDPRS'], ['CTDOXY'])

        with self.assertRaises(KeyError):
            mdf['CTDSAL']
示例#2
0
    def test_write_fill_value_decimal_places_follow_column(self):
        """Fill values should follow the column's data's lead for decimal places.

        E.g. if the column has data [10.001, 11.123], the normal fill value -999
        should be written -999.000. I.e. as many trailing zeros as the data has.

        If the column has no data in it, default to the old-style C format
        string for how many decimal places to show.

        """
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.create_columns([
                'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS', 'CTDOXY'
            ])
            dfile['STNNBR'].values = [None, None]
            dfile['CASTNO'].values = [None, None]
            dfile['BTLNBR'].values = [None, None]
            dfile['_DATETIME'].values = [None, None]
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            dfile['CTDOXY'].values = [None, _decimal('243.23')]
            btlex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            # CTDPRS default decplaces is 1 but the data has 4
            self.assertEqual('-999.0000', result[4].split(',')[5].lstrip())
            # CTDOXY default decplaces is 4 but the data has 2
            self.assertEqual('-999.00', result[3].split(',')[6].lstrip())
示例#3
0
    def test_write_exchange_decimal_places(self):
        """Decimal places should be kept from the original data."""
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.globals['LONGITUDE'] = _decimal('0.0000000')
            dfile.create_columns(['CTDPRS'])
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            ctdex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            # Decimal('0.0000000') is converted to 0E-7 by str. The formatting
            # has to be done manually.
            self.assertEqual('0.0000000', result[2].split(' = ')[1].lstrip())
示例#4
0
    def test_functional_write(self):
        dfile = DataFile()
        dfile.create_columns(['CTDPRS', 'CTDOXY'])
        dfile['CTDPRS'].parameter.display_order = 0
        dfile['CTDOXY'].parameter.display_order = 1
        dfile['CTDPRS'].values = map(_decimal, ['2.0', '4.0'])
        dfile['CTDOXY'].values = map(_decimal, ['254.0', '253.1'])
        dfile['CTDOXY'].flags_woce = [2, 3]

        with closing(StringIO()) as buff:
            ctdex.write(dfile, buff)
            result = buff.getvalue().split('\n')
            self.assertEqual([u'        2.0', u'      254.0', u'2'],
                             result[4].split(','))
示例#5
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_collections(self):
        """When merging collections, map files, then merge mapped files.

        """
        odfc = DataFileCollection()
        ddfc = DataFileCollection()

        df0 = DataFile()
        df0.globals['EXPOCODE'] = 'a'
        df0.globals['STNNBR'] = 1
        df0.globals['CASTNO'] = 1
        df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRIT'].append(10, 2)
        df0['NITRIT'].append(11, 2)
        odfc.append(df0)

        df1 = DataFile()
        df1.globals['EXPOCODE'] = 'a'
        df1.globals['STNNBR'] = 1
        df1.globals['CASTNO'] = 1
        df1.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(3, 2)
        df1['NITRAT'].append(20, 2)
        df1['NITRAT'].append(21, 2)
        df1['NITRIT'].append(10, 2)
        df1['NITRIT'].append(11, 2)
        ddfc.append(df1)

        def merger(origin, deriv):
            return merge_datafiles(origin, deriv, ['CTDPRS'],
                                   ['NITRAT', 'NITRIT'])

        merged_dfc = merge_collections(odfc, ddfc, merger)

        self.assertEqual(merged_dfc.files[0]['CTDPRS'].values, [1, 2])
        self.assertEqual(merged_dfc.files[0]['NITRAT'].values, [20, 11])
        self.assertEqual(merged_dfc.files[0]['NITRIT'].values, [10, 11])

        lines = [
            # df1 has an different CTDPRS record (3)
            'Key (3,) does not exist in origin from derivative rows',
            # NITRIT columns are the same
            "Instructed to merge parameters that are not different: ['NITRIT']"
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#6
0
文件: merge.py 项目: cberys/libcchdo
    def test_diff_decplaces(self):
        """Derivative is still different when decimal places are different."""
        dfo = DataFile()
        dfo.create_columns(['CTDPRS', 'CTDOXY'])
        dfo['CTDPRS'].append(_decimal('1'))
        dfo['CTDOXY'].append(_decimal('0.140'))

        dfd = DataFile()
        dfd.create_columns(['CTDPRS', 'CTDOXY'])
        dfd['CTDPRS'].append(_decimal('1'))
        dfd['CTDOXY'].append(_decimal('0.14'))

        p_different, p_not_in_orig, p_not_in_deriv, p_common = \
            different_columns(dfo, dfd, ['CTDPRS'])
        self.assertEqual(p_different, ['CTDOXY'])

        dfile = merge_datafiles(dfo, dfd, ['CTDPRS'], ['CTDOXY'])
        self.assertEqual(decimal_to_str(dfile['CTDOXY'][0]), '0.14')
示例#7
0
    def test_write_btl_date_time_no_decimals(self):
        """BTL_DATE and BTL_TIME should not have decimal places."""
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.create_columns([
                'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS',
                'BTL_DATE', 'BTL_TIME'
            ])
            dfile['STNNBR'].values = [None, None]
            dfile['CASTNO'].values = [None, None]
            dfile['BTLNBR'].values = [None, None]
            dfile['_DATETIME'].values = [None, None]
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            dfile['BTL_DATE'].values = [
                _decimal('19700101'),
                _decimal('19700102')
            ]
            dfile['BTL_TIME'].values = [_decimal('0000'), _decimal('1234')]
            btlex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            self.assertEqual('19700101', result[3].split(',')[6].lstrip())
            self.assertEqual('1234', result[4].split(',')[7].lstrip())
示例#8
0
    def test_write_exchange_decimal_places(self):
        """Decimal places should be kept from the original data."""
        with closing(StringIO()) as buff:
            dfile = DataFile()
            dfile.create_columns([
                'STNNBR', 'CASTNO', 'BTLNBR', '_DATETIME', 'CTDPRS',
                'LONGITUDE'
            ])
            dfile['STNNBR'].values = [None, None]
            dfile['CASTNO'].values = [None, None]
            dfile['BTLNBR'].values = [None, None]
            dfile['_DATETIME'].values = [None, None]
            dfile['CTDPRS'].values = [_decimal('10.0001'), None]
            dfile['LONGITUDE'].values = [
                _decimal('0.0000000'),
                _decimal('1.000000')
            ]
            btlex.write(dfile, buff)

            result = buff.getvalue().split('\n')
            # Decimal('0.0000000') is converted to 0E-7 by str. The formatting
            # has to be done manually.
            self.assertEqual('0.0000000', result[3].split(',')[5].lstrip())
示例#9
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles_no_column(self):
        """Error to merge columns in neither datafile."""
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)

        with self.assertRaisesRegexp(
                ValueError, 'No columns selected to merge are different.'):
            merge_datafiles(df0, df1, ['CTDPRS'], ['CTDSAL'])
        lines = [
            "Instructed to merge parameters that are not in either datafile: ['CTDSAL']",
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#10
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles_flags(self):
        """It should be possible to only merge flag "columns".

        This includes updating and adding flags.
        If adding flags and the original column does not exist, warn and fail.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['CTDPRS'].append(3, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRAT'].append(12, 2)
        df0['FLUOR'].append(100)
        df0['FLUOR'].append(101)
        df0['FLUOR'].append(102)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(4, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)
        df1['NITRAT'].append(22, 4)
        df1['FLUOR'].append(200, 2)
        df1['FLUOR'].append(201, 3)
        df1['FLUOR'].append(202, 3)

        mdf = merge_datafiles(df0, df1, ['CTDPRS'],
                              ['NITRAT_FLAG_W', 'FLUOR_FLAG_W'])
        self.assertEqual(mdf['NITRAT'].values, [10, 11, 12])
        self.assertEqual(mdf['NITRAT'].flags_woce, [3, 4, 2])
        self.assertEqual(mdf['FLUOR'].values, [100, 101, 102])
        self.assertEqual(mdf['FLUOR'].flags_woce, [2, 3, 9])
示例#11
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles(self):
        """Merge datafiles.

        When merging data files, there are two cases to consider:

        Case 1: Adding new column

            If the derivative file has less records, fill in missing records
            with fill values and missing flags.
            
        Case 2: Updating column data

        It should also be possible to specifically only merge flags. Make sure
        if only merging flags to not merge the data.

        Parameter units should be updated from the derivative.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT', 'CTDOXY'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRIT'].append(30, 5)
        df0['NITRIT'].append(31, 6)
        df0['CTDOXY'].append(40, 2)
        df0['CTDOXY'].append(41, 3)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT', 'CTDSAL', 'CTDOXY'])
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(3, 2)
        df1['CTDSAL'].append(20, 2)
        df1['CTDSAL'].append(21, 2)
        df1['NITRAT'].append(12, 4)
        df1['NITRAT'].append(13, 4)
        df1['CTDOXY'].append(40, 2)
        df1['CTDOXY'].append(41, 3)

        df1['CTDOXY'].parameter.units = Unit('UMOL/KG')

        # Case 1 column add
        mdf = merge_datafiles(
            df0, df1, ['CTDPRS'],
            ['NITRAT', 'NITRAT_FLAG_W', 'CTDSAL', 'CTDSAL_FLAG_W', 'CTDOXY'])
        self.assertEqual(mdf['CTDPRS'].values, [1, 2])
        # Make sure missing values and flags are filled in.
        self.assertEqual(mdf['CTDSAL'].values, [None, 20])
        self.assertEqual(mdf['CTDSAL'].flags_woce, [9, 2])
        # Case 2 data upate
        self.assertEqual(mdf['NITRAT'].values, [10, 12])
        self.assertEqual(mdf['NITRAT'].flags_woce, [2, 4])

        # Columns in origin should be kept
        self.assertEqual(mdf['NITRIT'].values, [30, 31])
        self.assertEqual(mdf['NITRIT'].flags_woce, [5, 6])

        # Units should be overwritten for merged columns
        self.assertEqual(mdf['CTDOXY'].parameter.units,
                         df1['CTDOXY'].parameter.units)

        # Make sure warning is printed regarding unit overwrite.
        # This doubles to make sure derivate columns do not wholesale overwrite
        # the origin column, they must be merged using the row match algo.
        lines = [
            "Changed units for CTDOXY from '' to 'UMOL/KG'",
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#12
0
文件: merge.py 项目: cberys/libcchdo
    def test_different_columns(self):
        """Columns between two datafiles differ under a wide variety of cases.

        Case 1: Column values are different
        Case 1 corollary: Flag values are different
        Case 2: Units are different
        Case 3: Column not in original
        Case 4: Column not in derivative

        """
        with TemporaryFile() as origin, TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,NITRAT_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,2,10.0,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,2,10.0,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,TDN_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,NMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,3,10.0,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,3,10.0,  10.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dforigin = DataFile()
            dfderiv = DataFile()
            btlex.read(dforigin, origin)
            btlex.read(dfderiv, deriv)
            self.assertEqual(
                # NITRIT comes after because NMOL/KG is not an expected unit and
                # gets pushed to the end when sorting
                (
                    ['DELC14', 'DELC14_FLAG_W', 'NITRIT'],
                    # PH_SWS_FLAG_W has underscores inside the parameter name. All
                    # parts need to be included
                    ['PH_SWS', 'PH_SWS_FLAG_W', 'TDN', 'TDN_FLAG_W'],
                    ['NITRAT', 'NITRAT_FLAG_W'],
                    [
                        'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO',
                        'BTLNBR', 'BTLNBR_FLAG_W', 'LATITUDE', 'LONGITUDE',
                        'DEPTH', '_DATETIME'
                    ]),
                different_columns(dforigin, dfderiv, (
                    'EXPOCODE',
                    'SECT_ID',
                    'STNNBR',
                    'CASTNO',
                    'SAMPNO',
                    'BTLNBR',
                )))

            lines = [
                "DELC14 differs at origin row 1:\t(None, Decimal('10.000'))",
                "DELC14_FLAG_W differs at origin row 0:\t(9, 1)",
            ]
            self.assertTrue(self.ensure_lines(lines))

            # Columns are not different if merged results are not different.
            dfo = DataFile()
            dfd = DataFile()

            dfo.create_columns(['CTDPRS', 'CTDOXY'])
            dfo.check_and_replace_parameters()
            dfd.create_columns(['CTDPRS', 'CTDOXY'])
            dfd.check_and_replace_parameters()

            dfo['CTDPRS'].values = [1, 2, 3]
            dfo['CTDOXY'].values = [10, 20, 30]
            dfd['CTDPRS'].values = [3, 2, 1]
            dfd['CTDOXY'].values = [30, 20, 10]

            self.assertEqual(([], [], [], ['CTDPRS', 'CTDOXY']),
                             different_columns(dfo, dfd, ('CTDPRS', )))
示例#13
0
class TestDataFile(TestCase):
    def setUp(self):
        self.file = DataFile()
        self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE')

    def tearDown(self):
        self.file = None

    def test_init(self):
        self.assertEqual(len(self.file.columns), 1)
        self.assertEqual(self.file.footer, None)
        self.assertEqual(self.file.globals, {'stamp': '', 'header': ''})

    def test_expocodes(self):
        self.c.append('A')
        self.assertEqual(['A'], self.file.expocodes())
        self.c.append('B')
        self.assertEqual(['A', 'B'], self.file.expocodes())
        self.c.append('A')
        self.assertEqual(
            ['A', 'B'],
            self.file.expocodes())  # Expocodes returns unique expocodes.

    def test_len(self):
        c = self.file.columns['EXPOCODE']
        del self.file.columns['EXPOCODE']
        self.assertEqual(len(self.file), 0)
        self.file.columns['EXPOCODE'] = c
        self.assertEqual(len(self.file), 0)
        self.c.append('A')
        self.assertEqual(len(self.file), 1)
        self.c.append('A')
        self.assertEqual(len(self.file), 2)

    def test_sorted_columns(self):
        self.file.columns['CASTNO'] = Column('CASTNO')
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR', 'CASTNO']
        received = map(lambda c: c.parameter.mnemonic_woce(),
                       self.file.sorted_columns())
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_get_property_for_columns(self):
        pass  # This is tested by the following tests.

    def test_column_headers(self):
        self.assertEqual(['EXPOCODE'], self.file.column_headers())
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR']
        received = self.file.column_headers()
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_formats(self):
        self.file.columns['CTDOXY'] = Column('CTDOXY')
        self.file.check_and_replace_parameters()
        # Order of columns may be wrong
        self.assertEqual(['%11s', '%9.4f'], self.file.formats())

    def test_to_dict(self):
        self.file.to_dict()
        pass  # TODO

    def test_str(self):
        str(self.file)

    def test_create_columns(self):
        parameters = ['CTDOXY']
        units = ['UMOL/KG']
        self.file.create_columns(parameters, units)

    def test_column_append(self):
        self.assertEqual(self.c.values, [])
        self.c.set(2, 'test')
        self.assertEqual(self.c.values, [None, None, 'test'])
        self.assertEqual(self.c.flags_woce, [])
        self.c.append('test2', 'flag2')
        self.assertEqual(self.c.values, [None, None, 'test', 'test2'])
        self.assertEqual(self.c.flags_woce, [None, None, None, 'flag2'])

    def test_calculate_depths(self):
        self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH')
        self.assertEqual(('actual', []), self.file.calculate_depths())

        del self.file['_ACTUAL_DEPTH']
        self.file.globals['LATITUDE'] = 0
        self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP'])
        self.assertEqual(('unesco1983', []), self.file.calculate_depths())

        self.file['CTDPRS'].values = [1]
        self.file['CTDSAL'].values = [1]
        self.file['CTDTMP'].values = [1]

        self.assertEqual(
            ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]),
            self.file.calculate_depths())

    def test_check_and_replace_parameter_contrived(self):
        """Contrived parameters are not checked."""
        col = Column('_DATETIME')
        col.check_and_replace_parameter(self.file, convert=False)
def read(self, handle, metadata=None):
    """How to read a Bottle Bermuda Atlantic Time-Series Study file.

    This function reads bats_bottle.txt.

    Arguments:
    self - (special case, see NOTE) dictionary
    metadata - (optional) BATS cruise metadata to be used to find port dates

    NOTE: The result for this method is a special case. The bottle file format
    contains the entire BATS holdings while the internal data format splits data
    up by cruises. Because cruises for timeseries are split by file for cruise,
    the end result is a dictionary with cruise_ids as keys to
    DatafileCollections (cruises) containing Datafiles (casts). 

    """
    sections = _read_header_sections(self, handle)
    _read_variables(self, handle)
    parameters = _get_variables(self, handle, sections)

    # Add DON for note in Variables list stating DON is reported for TON prior
    # to BATS 121
    parameters.append(['DON', None, 'umol/kg'])

    manual_parameters = [
        ['BTLNBR', ''],
        ['_DATETIME', ''],
        ['LATITUDE', ''],
        ['LONGITUDE', ''],
        ['_ACTUAL_DEPTH', 'METERS'],
    ]
    columns = [x[0] for x in manual_parameters]
    units = [x[1] for x in manual_parameters]

    s = None
    for i, (var, d, u) in enumerate(parameters):
        if var == 'Depth':
            s = i + 1
            continue
        # Only want to add parameters after Depth. The others were done manually.
        if s is None:
            continue
        try:
            var = bats_to_param[var]
        except KeyError:
            pass
        columns.append(var)
        units.append(u)

    template_df = DataFile()
    template_df.create_columns(columns, units)
    template_df.check_and_replace_parameters(convert=False)

    for sec, lines in sections.items():
        if sec == 'Variables list':
            continue
        if sec != 'Comments':
            continue
        template_df.globals['_{0}'.format(sec)] = '\n'.join(lines)

    df = None
    params_auto = parameters[s:]
    dfi = 0
    for i, l in enumerate(handle):
        parts = l.split()

        id = parts[0]
        (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id,
         nisk_id) = _parse_bats_id(id)
        ship = _ship_from_cruise_num(cruise_num)
        if not ship:
            ship = 'R/V Atlantic Explorer'

        if (df is None or df.globals['_OS_ID'] != cruise_id
                or df.globals['STNNBR'] != cruise_type
                or df.globals['CASTNO'] != cast_id):
            if df is not None:
                # Done reading one cast. Finalize it.
                log.info(u'finalizing cast {0} {1} {2}'.format(
                    df.globals['_OS_ID'], df.globals['STNNBR'],
                    df.globals['CASTNO']))
                try:
                    meta = metadata[cruise_id]
                    port_date = meta['dates'][0]
                except (TypeError, KeyError):
                    port_date = None
                if not port_date:
                    port_date = min(df['_DATETIME'])
                df.globals['EXPOCODE'] = create_expocode(
                    ship_code(ship, raise_on_unknown=False), port_date)
                log.info(df.globals['EXPOCODE'])
                df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH'])
                collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE'])
                # Normalize all the parameter column lengths. There may be
                # columns that did not get data written to them so make sure
                # they are just as long as the rest
                length = len(df)
                for c in df.columns.values():
                    c.set_length(length)
                try:
                    dfc = self[df.globals['_OS_ID']]
                except KeyError:
                    dfc = self[df.globals['_OS_ID']] = DataFileCollection()
                dfc.files.append(df)
                dfi = 0

            # Create a new cast
            df = copy(template_df)
            df.globals['SECT_ID'] = BATS_SECT_ID
            df.globals['_SHIP'] = ship
            df.globals['_OS_ID'] = cruise_id
            df.globals['STNNBR'] = cruise_type
            df.globals['CASTNO'] = cast_id

        df['BTLNBR'].set(dfi, nisk_id)

        dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M')
        dt_deci = bats_time_to_dt(parts[2])
        #if dt_ascii != dt_deci:
        #    log.warn(
        #        u'Dates differ on data row {0}: {5} {1!r}={2} '
        #        '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2],
        #                           dt_deci, dt_deci - dt_ascii))
        df['_DATETIME'].set(dfi, dt_ascii)

        df['LATITUDE'].set(dfi, Decimal(parts[4]))
        df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5])))
        df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6]))

        parts_auto = parts[s:]
        for p, v in zip(params_auto, parts_auto):
            param = p[0]
            try:
                param = bats_to_param[param]
            except KeyError:
                pass
            if cruise_num < 121 and param == 'TON':
                param = 'DON'

            if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9)
                    or equal_with_epsilon(v, -9.99)):
                df[param].set_check_range(dfi, None)
            # TODO determine whether -10 is just bad formatting for -9.9
            elif equal_with_epsilon(v, -10):
                #log.warn(u'Possible missing data value {0}'.format(v))
                df[param].set_check_range(dfi, None)
            elif v == 0:
                log.warn(u'Data under detection limit, set flag to '
                         'WOCE water sample questionable measurement')
                df[param].set_check_range(dfi, None, flag=3)
            else:
                df[param].set_check_range(dfi, Decimal(v))

        dfi += 1
        # Since this is a super long file that contains multiple cruises and
        # casts, as the file is processed it is split apart into a list of
        # DataFileCollection(s) containing DataFile objects for each casts
        if i % 100 == 0:
            log.info(u'processed {0} lines'.format(i))
示例#15
0
def read(dfc, fileobj, cfg):
    """Read generic HRP matlab file."""
    mat, hrp = load_mat_hrp(fileobj)
    data = hrp_data_as_dict(hrp)

    coords = zip(data['lon'][0], data['lat'][0])
    del data['lat']
    del data['lon']

    for key in data.keys():
        log.info(u'parameter shape: {0} {1}'.format(key, data[key].shape))

    param_map = cfg["parameter_mapping"]
    for param in data.keys():
        if param not in param_map:
            del data[param]
        else:
            new_key = param_map[param]
            if new_key != param:
                data[new_key] = data[param]
                del data[param]

    for coord in coords:
        dfile = DataFile()
        dfc.append(dfile)
        dfile.globals['LONGITUDE'] = _decimal(coord[0])
        dfile.globals['LATITUDE'] = _decimal(coord[1])

        # create the columns after extraneous keys have been deleted
        dfile.create_columns(data.keys())

    for dep, dfile in enumerate(dfc):
        dfile.globals['STNNBR'] = dep + 1
        ref_range = ndarray_data_slice(data['PRESSURE'][:, dep])
        for param, pdata in data.items():
            col = dfile[param]
            data_col = pdata[:, dep]

            drange = ndarray_data_slice(data_col)
            if ref_range is None:
                ref_range = drange
                determiner = param
            elif drange != ref_range:
                if drange[0] == drange[1]:
                    log.info(u'No data for {0}. Skip.'.format(param))
                    continue
                if not is_data_range_inside(drange, ref_range):
                    log.error(u'{0} has data range {1} outside {2}. '
                              'Skip.'.format(param, drange, ref_range))
                    continue

            col.values = map(_decimal,
                             list(data_col[ref_range[0]:ref_range[1]]))
            # Act as if all files had QC and assign it to OceanSITES 1. Assuming
            # that someone has already gone through level 0 data and we are
            # receiving level 1 or higher. We can set all flags to 2.
            col.flags_woce = [9 if isnan(val) else 2 for val in col.values]

    # Somehow, HRP matlab data can have nans in the coordinate arrays. We can't
    # recalculate depth from that or make other assumptions so we can only
    # delete them.
    for iii, dfile in reversed(list(enumerate(dfc))):
        if (isnan(dfile.globals['LATITUDE'])
                or isnan(dfile.globals['LONGITUDE'])):
            log.warn(u'Unable to determine coordinate for matlab row '
                     '{0}. Discarding.'.format(iii))
            dfc.files.remove(dfile)