示例#1
0
 def test_read_btlnbr_as_string(self):
     with closing(StringIO()) as fff:
         fff.write('SIO1,33.24\n')
         fff.write('01,32.10\n')
         fff.flush()
         fff.seek(0)
         dfile = DataFile()
         dfile['BTLNBR'] = Column('BTLNBR')
         dfile['CTDSAL'] = Column('CTDSAL')
         exchange.read_data(dfile, fff, ['BTLNBR', 'CTDSAL'])
         self.assertEqual(dfile['BTLNBR'].values, ['SIO1', '01'])
         self.assertEqual(
             dfile['CTDSAL'].values,
             [Decimal('33.24'), Decimal('32.10')])
示例#2
0
def read(self, fileobj):
    """How to read LDEO ASEP files from an NODC accession."""
    def is_fname_ok(fname):
        if '.csv' not in fname:
            return False
        if fname.find('/') > -1:
            raise ValueError(
                u'CTD Exchange Zip files should not contain directories.')
        return True

    def reader(dfile, fileobj, retain_order, header_only):
        ctdex.read(dfile, fileobj, retain_order, header_only)
        dfile.globals['_FILENAME'] = fileobj.name

    dfiles = []

    datapath = None
    datadirname = '0-data'
    with tarfile_open(mode='r:gz', fileobj=fileobj) as fff:
        for member in fff.getmembers():
            if datapath is None:
                if datadirname in member.name:
                    datapath = member.name.split(
                        datadirname)[0] + datadirname + '/'
                    log.info('NODC accession data path: {0}'.format(datapath))
                else:
                    continue

            if not member.name.startswith(datapath):
                continue
            bname = os.path.basename(member.name)
            if bname.endswith('pdf'):
                continue
            if '_ros.' in bname:
                continue
            # don't want upcasts
            if '_ctd_U.' in bname:
                continue

            dfile = DataFile()
            ggg = fff.extractfile(member)
            if ggg is None:
                log.error(u'Unable to extract file {0!r}'.format(member))
            else:
                ldeo_asep.read(dfile, ggg)
                dfiles.append(dfile)

    self.files = sorted(dfiles,
                        key=lambda dfile: lexico(dfile.globals['STNNBR']))
示例#3
0
def guess_ftype_dftype_format(fileobj, file_type=None, file_name=None):
    """Return a tuple of guessed file type, Datafile or DatafileCollection, and 
    the format module.

    """
    from libcchdo.model.datafile import (
        DataFile, SummaryFile, DataFileCollection)
    file_type = guess_file_type_from_file(fileobj, file_type, file_name)
    if 'zip' in file_type or file_type.startswith('archive'):
        dfile = DataFileCollection()
    elif file_type.startswith('sum'):
        dfile = SummaryFile()
    else:
        dfile = DataFile()
    format_module = guess_format_module(fileobj, file_type)
    return (file_type, dfile, format_module)
示例#4
0
文件: merge.py 项目: cberys/libcchdo
    def test_diff_decplaces(self):
        """Derivative is still different when decimal places are different."""
        dfo = DataFile()
        dfo.create_columns(['CTDPRS', 'CTDOXY'])
        dfo['CTDPRS'].append(_decimal('1'))
        dfo['CTDOXY'].append(_decimal('0.140'))

        dfd = DataFile()
        dfd.create_columns(['CTDPRS', 'CTDOXY'])
        dfd['CTDPRS'].append(_decimal('1'))
        dfd['CTDOXY'].append(_decimal('0.14'))

        p_different, p_not_in_orig, p_not_in_deriv, p_common = \
            different_columns(dfo, dfd, ['CTDPRS'])
        self.assertEqual(p_different, ['CTDOXY'])

        dfile = merge_datafiles(dfo, dfd, ['CTDPRS'], ['CTDOXY'])
        self.assertEqual(decimal_to_str(dfile['CTDOXY'][0]), '0.14')
示例#5
0
def read(self, handle):
    """How to read CTD WOCE EGEE files from a Zip."""
    zip = Zip.ZeroCommentZipFile(handle, 'r')
    try:
        for file in zip.namelist():
            tempstream = StringIO(zip.read(file))
            ctdfile = DataFile()
            try:
                woce_egee.read(ctdfile, tempstream)
            except Exception, e:
                log.info('Failed to read file %s in %s' % (file, handle))
                print_exc()
                raise e
            self.append(ctdfile)
            tempstream.close()
    finally:
        zip.close()
示例#6
0
文件: tools.py 项目: cberys/libcchdo
def sbe_asc_to_ctd_exchange(args):
    output, expo = (sys.stdout, '')
    if (args.expo):
        expo = args.expo
    if (args.output):
        output = args.output
    d = DataFile()
    f = args.files
    if len(args.files) == 1:
        if output is not sys.stdout:
            output = output + "_ct1.csv"

        _single_file(asc, args.files, output, expo=expo)

    if len(args.files) > 1:
        if output is not sys.stdout:
            output = output + '_ct1.zip'

        _multi_file(asc, args.files, output, expo=expo)
示例#7
0
    def test_read_unknown_parameter_fillvalue(self):
        """Reading data for a parameter with unknown format should still check
           for out of band.

        """
        with closing(StringIO()) as fff:
            fff.name = 'testfile'
            fff.write('-999,9,1,012\n')
            fff.write('11,2,-999,123\n')
            fff.flush()
            fff.seek(0)
            dfile = DataFile()
            dfile['CTDPRS'] = Column('CTDPRS')
            dfile['UNKPARAM'] = Column('UNKPARAM')
            dfile['BTLNBR'] = Column('BTLNBR')
            exchange.read_data(
                dfile, fff, ['CTDPRS', 'CTDPRS_FLAG_W', 'UNKPARAM', 'BTLNBR'])
        self.assertEqual(None, dfile['CTDPRS'].values[0])
        self.assertEqual('012', dfile['BTLNBR'].values[0])
        self.assertEqual('123', dfile['BTLNBR'].values[1])
        self.assertEqual(None, dfile['UNKPARAM'].values[1])
示例#8
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles_no_column(self):
        """Error to merge columns in neither datafile."""
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)

        with self.assertRaisesRegexp(
                ValueError, 'No columns selected to merge are different.'):
            merge_datafiles(df0, df1, ['CTDPRS'], ['CTDSAL'])
        lines = [
            "Instructed to merge parameters that are not in either datafile: ['CTDSAL']",
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#9
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles_flags(self):
        """It should be possible to only merge flag "columns".

        This includes updating and adding flags.
        If adding flags and the original column does not exist, warn and fail.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['CTDPRS'].append(3, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRAT'].append(12, 2)
        df0['FLUOR'].append(100)
        df0['FLUOR'].append(101)
        df0['FLUOR'].append(102)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT', 'FLUOR'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(4, 2)
        df1['NITRAT'].append(20, 3)
        df1['NITRAT'].append(21, 4)
        df1['NITRAT'].append(22, 4)
        df1['FLUOR'].append(200, 2)
        df1['FLUOR'].append(201, 3)
        df1['FLUOR'].append(202, 3)

        mdf = merge_datafiles(df0, df1, ['CTDPRS'],
                              ['NITRAT_FLAG_W', 'FLUOR_FLAG_W'])
        self.assertEqual(mdf['NITRAT'].values, [10, 11, 12])
        self.assertEqual(mdf['NITRAT'].flags_woce, [3, 4, 2])
        self.assertEqual(mdf['FLUOR'].values, [100, 101, 102])
        self.assertEqual(mdf['FLUOR'].flags_woce, [2, 3, 9])
示例#10
0
def _read_oliver_sun(dfc, fileobj, cfg):
    """Read HRP2 format from Oliver Sun."""
    mat = loadmat(fileobj)
    filekey = mat.keys()[0]
    casts = mat[filekey][0]

    for cast in casts:
        dfile = DataFile()
        dfc.append(dfile)

        dfile.globals['EXPOCODE'] = cfg['expocode']

        # TODO
        dfile.globals['DEPTH'] = 0

        for key, item in zip(cast.dtype.names, cast):
            if item.shape == (1, 1):
                key = cfg['global_mapping'].get(key, None)
                if key:
                    dfile.globals[key] = item[0, 0]
            else:
                try:
                    dfile[key] = Column(key)
                    dfile[key].values = list(item.flatten())
                    # Act as if all files had QC and assign it to OceanSITES 1.
                    # Assuming that someone has already gone through level 0
                    # data and we are receiving level 1 or higher.
                    dfile[key].flags_woce = [2] * len(dfile[key].values)
                except KeyError:
                    pass

        try:
            dfile.globals['STNNBR']
        except KeyError:
            dfile.globals['STNNBR'] = '999'

        woce.fuse_datetime(dfile)
def read(self, handle, metadata=None):
    """How to read a Bottle Bermuda Atlantic Time-Series Study file.

    This function reads bats_bottle.txt.

    Arguments:
    self - (special case, see NOTE) dictionary
    metadata - (optional) BATS cruise metadata to be used to find port dates

    NOTE: The result for this method is a special case. The bottle file format
    contains the entire BATS holdings while the internal data format splits data
    up by cruises. Because cruises for timeseries are split by file for cruise,
    the end result is a dictionary with cruise_ids as keys to
    DatafileCollections (cruises) containing Datafiles (casts). 

    """
    sections = _read_header_sections(self, handle)
    _read_variables(self, handle)
    parameters = _get_variables(self, handle, sections)

    # Add DON for note in Variables list stating DON is reported for TON prior
    # to BATS 121
    parameters.append(['DON', None, 'umol/kg'])

    manual_parameters = [
        ['BTLNBR', ''],
        ['_DATETIME', ''],
        ['LATITUDE', ''],
        ['LONGITUDE', ''],
        ['_ACTUAL_DEPTH', 'METERS'],
    ]
    columns = [x[0] for x in manual_parameters]
    units = [x[1] for x in manual_parameters]

    s = None
    for i, (var, d, u) in enumerate(parameters):
        if var == 'Depth':
            s = i + 1
            continue
        # Only want to add parameters after Depth. The others were done manually.
        if s is None:
            continue
        try:
            var = bats_to_param[var]
        except KeyError:
            pass
        columns.append(var)
        units.append(u)

    template_df = DataFile()
    template_df.create_columns(columns, units)
    template_df.check_and_replace_parameters(convert=False)

    for sec, lines in sections.items():
        if sec == 'Variables list':
            continue
        if sec != 'Comments':
            continue
        template_df.globals['_{0}'.format(sec)] = '\n'.join(lines)

    df = None
    params_auto = parameters[s:]
    dfi = 0
    for i, l in enumerate(handle):
        parts = l.split()

        id = parts[0]
        (cruise_type, type_id, cruise_num, cruise_id, cast_type, cast_id,
         nisk_id) = _parse_bats_id(id)
        ship = _ship_from_cruise_num(cruise_num)
        if not ship:
            ship = 'R/V Atlantic Explorer'

        if (df is None or df.globals['_OS_ID'] != cruise_id
                or df.globals['STNNBR'] != cruise_type
                or df.globals['CASTNO'] != cast_id):
            if df is not None:
                # Done reading one cast. Finalize it.
                log.info(u'finalizing cast {0} {1} {2}'.format(
                    df.globals['_OS_ID'], df.globals['STNNBR'],
                    df.globals['CASTNO']))
                try:
                    meta = metadata[cruise_id]
                    port_date = meta['dates'][0]
                except (TypeError, KeyError):
                    port_date = None
                if not port_date:
                    port_date = min(df['_DATETIME'])
                df.globals['EXPOCODE'] = create_expocode(
                    ship_code(ship, raise_on_unknown=False), port_date)
                log.info(df.globals['EXPOCODE'])
                df.globals['DEPTH'] = max(df['_ACTUAL_DEPTH'])
                collapse_globals(df, ['_DATETIME', 'LATITUDE', 'LONGITUDE'])
                # Normalize all the parameter column lengths. There may be
                # columns that did not get data written to them so make sure
                # they are just as long as the rest
                length = len(df)
                for c in df.columns.values():
                    c.set_length(length)
                try:
                    dfc = self[df.globals['_OS_ID']]
                except KeyError:
                    dfc = self[df.globals['_OS_ID']] = DataFileCollection()
                dfc.files.append(df)
                dfi = 0

            # Create a new cast
            df = copy(template_df)
            df.globals['SECT_ID'] = BATS_SECT_ID
            df.globals['_SHIP'] = ship
            df.globals['_OS_ID'] = cruise_id
            df.globals['STNNBR'] = cruise_type
            df.globals['CASTNO'] = cast_id

        df['BTLNBR'].set(dfi, nisk_id)

        dt_ascii = datetime.strptime(parts[1] + parts[3], '%Y%m%d%H%M')
        dt_deci = bats_time_to_dt(parts[2])
        #if dt_ascii != dt_deci:
        #    log.warn(
        #        u'Dates differ on data row {0}: {5} {1!r}={2} '
        #        '{3!r}={4}'.format(i, parts[1] + parts[3], dt_ascii, parts[2],
        #                           dt_deci, dt_deci - dt_ascii))
        df['_DATETIME'].set(dfi, dt_ascii)

        df['LATITUDE'].set(dfi, Decimal(parts[4]))
        df['LONGITUDE'].set(dfi, Decimal(correct_longitude(parts[5])))
        df['_ACTUAL_DEPTH'].set_check_range(dfi, Decimal(parts[6]))

        parts_auto = parts[s:]
        for p, v in zip(params_auto, parts_auto):
            param = p[0]
            try:
                param = bats_to_param[param]
            except KeyError:
                pass
            if cruise_num < 121 and param == 'TON':
                param = 'DON'

            if (equal_with_epsilon(v, -9) or equal_with_epsilon(v, -9.9)
                    or equal_with_epsilon(v, -9.99)):
                df[param].set_check_range(dfi, None)
            # TODO determine whether -10 is just bad formatting for -9.9
            elif equal_with_epsilon(v, -10):
                #log.warn(u'Possible missing data value {0}'.format(v))
                df[param].set_check_range(dfi, None)
            elif v == 0:
                log.warn(u'Data under detection limit, set flag to '
                         'WOCE water sample questionable measurement')
                df[param].set_check_range(dfi, None, flag=3)
            else:
                df[param].set_check_range(dfi, Decimal(v))

        dfi += 1
        # Since this is a super long file that contains multiple cruises and
        # casts, as the file is processed it is split apart into a list of
        # DataFileCollection(s) containing DataFile objects for each casts
        if i % 100 == 0:
            log.info(u'processed {0} lines'.format(i))
示例#12
0
 def test_read(self):
     self.file = DataFile()
     self.bufr = StringIO(self.input)
     ctdwoce.read(self.file, self.bufr)
     self.bufr.close()
示例#13
0
文件: zip.py 项目: cberys/libcchdo
def read(self, fileobj, is_fname_ok, reader, *args, **kwargs):
    """Generic zip file reader for zip files with multiple datafiles inside."""
    for tempfile in generate_files(fileobj, is_fname_ok):
        dfile = DataFile()
        reader(dfile, tempfile, *args, **kwargs)
        self.append(dfile)
示例#14
0
文件: tools.py 项目: cberys/libcchdo
def australian_navy_ctd(args):
    """Download and convert Australian Navy CTD data."""
    from pydap.client import open_url
    from libcchdo.thredds import crawl
    from libcchdo.formats.ctd.zip import exchange as ctdzipex
    from libcchdo.formats.zip import write as zwrite

    dfcs = []

    cf_param_to_cchdo_param = {
        'sea_water_pressure': 'CTDPRS',
        'sea_water_temperature': 'CTDTMP',
        'sea_water_practical_salinity': 'CTDSAL',
    }
    ignored_qc_flags = [
        'time_qc_flag',
        'position_qc_flag',
    ]
    qc_conventions = {
        'Proposed IODE qc scheme March 2012': {
            1: 2,  # good
            2: 5,  # not_evaluated_or_unknown
            3: 3,  # suspect
            4: 4,  # bad
            9: 9,  # missing
        },
    }

    dfc = DataFileCollection()
    catalog = "http://www.metoc.gov.au/thredds/catalog/RAN_CTD_DATA/catalog.xml"
    for url in crawl(catalog):
        df = DataFile()

        log.info(u'Reading %s', url)
        dset = open_url(url)
        vars = dset.keys()
        for vname in vars:
            var = dset[vname]
            attrs = var.attributes
            if 'standard_name' in attrs:
                std_name = attrs['standard_name']
                if std_name == 'time':
                    df.globals['_DATETIME'] = \
                        datetime(1950, 1, 1) + timedelta(var[:])
                elif std_name == 'latitude':
                    df.globals['LATITUDE'] = var[:]
                elif std_name == 'longitude':
                    df.globals['LONGITUDE'] = var[:]
                elif std_name in cf_param_to_cchdo_param:
                    cparam = cf_param_to_cchdo_param[std_name]
                    if '_FillValue' in attrs:
                        fill_value = attrs['_FillValue']
                        values = []
                        for x in var[:]:
                            if equal_with_epsilon(x, fill_value):
                                values.append(None)
                            else:
                                values.append(x)
                    else:
                        values = var[:]

                    try:
                        df[cparam].values = values
                    except KeyError:
                        df[cparam] = Column(cparam)
                        df[cparam].values = values
                elif 'status_flag' in std_name:
                    flagged_param = std_name.replace('status_flag', '').strip()
                    cparam = cf_param_to_cchdo_param[flagged_param]
                    qc_convention = attrs['quality_control_convention']
                    if qc_convention in qc_conventions:
                        qc_map = qc_conventions[qc_convention]
                        df[cparam].flags_woce = [qc_map[x] for x in var[:]]
                else:
                    log.debug('unhandled standard_name %s', std_name)
            elif ('long_name' in attrs
                  and attrs['long_name'] == 'profile identifier'):
                profile_id = var[:]
                cruise_id = profile_id / 10**4
                profile_id = profile_id - cruise_id * 10**4
                df.globals['EXPOCODE'] = str(cruise_id)
                df.globals['STNNBR'] = str(profile_id)
                df.globals['CASTNO'] = str(1)
            elif vname in ignored_qc_flags:
                df.globals['_' + vname] = var[:]
            elif (vname.endswith('whole_profile_flag')
                  or vname.endswith('sd_test')):
                pass
            else:
                log.debug('unhandled variable %s', vname)

        # attach new file to appropriate collection
        if dfc.files:
            if dfc.files[0].globals['EXPOCODE'] != df.globals['EXPOCODE']:
                dfcs.append(dfc)
                dfc = DataFileCollection()
        dfc.append(df)

    with closing(args.output) as out_file:
        next_id = 0

        def get_filename(dfc):
            try:
                return '{0}_ct1.zip'.format(dfc.files[0].globals['EXPOCODE'])
            except IndexError:
                next_id += 1
                return '{0}_ct1.zip'.format(next_id)

        zwrite(dfcs, out_file, ctdzipex, get_filename)
示例#15
0
def get_ctdex_name(input_file):
    dfile = DataFile()
    ctdex.read(dfile, input_file, header_only=True)
    return ctdex.get_datafile_filename(dfile)
示例#16
0
文件: tools.py 项目: cberys/libcchdo
def _single_file(reader, files, output, **kwargs):
    d = DataFile()
    reader.read(d, files[0], **kwargs)
    if output is not sys.stdout:
        output = open(output, 'w')
    ctdex.write(d, output)
示例#17
0
class TestDataFile(TestCase):
    def setUp(self):
        self.file = DataFile()
        self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE')

    def tearDown(self):
        self.file = None

    def test_init(self):
        self.assertEqual(len(self.file.columns), 1)
        self.assertEqual(self.file.footer, None)
        self.assertEqual(self.file.globals, {'stamp': '', 'header': ''})

    def test_expocodes(self):
        self.c.append('A')
        self.assertEqual(['A'], self.file.expocodes())
        self.c.append('B')
        self.assertEqual(['A', 'B'], self.file.expocodes())
        self.c.append('A')
        self.assertEqual(
            ['A', 'B'],
            self.file.expocodes())  # Expocodes returns unique expocodes.

    def test_len(self):
        c = self.file.columns['EXPOCODE']
        del self.file.columns['EXPOCODE']
        self.assertEqual(len(self.file), 0)
        self.file.columns['EXPOCODE'] = c
        self.assertEqual(len(self.file), 0)
        self.c.append('A')
        self.assertEqual(len(self.file), 1)
        self.c.append('A')
        self.assertEqual(len(self.file), 2)

    def test_sorted_columns(self):
        self.file.columns['CASTNO'] = Column('CASTNO')
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR', 'CASTNO']
        received = map(lambda c: c.parameter.mnemonic_woce(),
                       self.file.sorted_columns())
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_get_property_for_columns(self):
        pass  # This is tested by the following tests.

    def test_column_headers(self):
        self.assertEqual(['EXPOCODE'], self.file.column_headers())
        self.file.columns['STNNBR'] = Column('STNNBR')
        expected = ['EXPOCODE', 'STNNBR']
        received = self.file.column_headers()
        # If lengths are equal and all expected in received, then assume equal
        self.assertEqual(len(expected), len(received))
        self.assertTrue(all([x in received for x in expected]))

    def test_formats(self):
        self.file.columns['CTDOXY'] = Column('CTDOXY')
        self.file.check_and_replace_parameters()
        # Order of columns may be wrong
        self.assertEqual(['%11s', '%9.4f'], self.file.formats())

    def test_to_dict(self):
        self.file.to_dict()
        pass  # TODO

    def test_str(self):
        str(self.file)

    def test_create_columns(self):
        parameters = ['CTDOXY']
        units = ['UMOL/KG']
        self.file.create_columns(parameters, units)

    def test_column_append(self):
        self.assertEqual(self.c.values, [])
        self.c.set(2, 'test')
        self.assertEqual(self.c.values, [None, None, 'test'])
        self.assertEqual(self.c.flags_woce, [])
        self.c.append('test2', 'flag2')
        self.assertEqual(self.c.values, [None, None, 'test', 'test2'])
        self.assertEqual(self.c.flags_woce, [None, None, None, 'flag2'])

    def test_calculate_depths(self):
        self.file['_ACTUAL_DEPTH'] = Column('_ACTUAL_DEPTH')
        self.assertEqual(('actual', []), self.file.calculate_depths())

        del self.file['_ACTUAL_DEPTH']
        self.file.globals['LATITUDE'] = 0
        self.file.create_columns(['CTDPRS', 'CTDSAL', 'CTDTMP'])
        self.assertEqual(('unesco1983', []), self.file.calculate_depths())

        self.file['CTDPRS'].values = [1]
        self.file['CTDSAL'].values = [1]
        self.file['CTDTMP'].values = [1]

        self.assertEqual(
            ('sverdrup', [_decimal('1.021723814950101286444879340E-8')]),
            self.file.calculate_depths())

    def test_check_and_replace_parameter_contrived(self):
        """Contrived parameters are not checked."""
        col = Column('_DATETIME')
        col.check_and_replace_parameter(self.file, convert=False)
示例#18
0
 def setUp(self):
     self.file = DataFile()
示例#19
0
 def setUp(self):
     self.file = DataFile()
     self.c = self.file.columns['EXPOCODE'] = Column('EXPOCODE')
示例#20
0
class TestBottleNetCDF(unittest.TestCase):
    def setUp(self):
        self.infile = open(
            sample_file('nc_hyd', 'i08s_33RR20070204_00001_00001_hy1.nc'), 'r')

    def tearDown(self):
        self.infile.close()

    def assertAlmostEqualOrNones(self, x, y):
        if x is None:
            self.assert_(y is None)
        else:
            self.assertAlmostEqual(x, y)

    def test_read(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)

    def test_read_multiple(self):
        self.file = DataFile()
        botnc.read(self.file, self.infile)

        nitrite_values = (0.11, None, 0.08, 0.08, 0.08, 0.08, 0.06, 0.03, 0.06,
                          0.04, 0.03, None, 0.03, None, 0.03, None)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(nitrite_values, self.file.columns['NITRIT'].values))

        freon11_values = (6.063, 6.055, 5.795, 5.619, 5.486, 5.508, 5.487,
                          5.683, 5.422, 5.190, 5.222, None, 5.289, None, 5.250,
                          5.254)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        freon113_values = (None, ) * 16
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        expocodes = ['33RR20070204'] * 16
        self.assertEqual(expocodes, self.file.columns['EXPOCODE'].values)

        # Read second file
        infile2 = open(sample_file('nc_hyd', 'p03a_00199_00001_hy1.nc'), 'r')
        botnc.read(self.file, infile2)

        # Make sure all columns have the same length
        length = None
        for c in self.file.columns.values():
            if not length:
                length = len(c.values)
            else:
                self.assertEquals(len(c.values), length)
                if c.is_flagged_woce():
                    self.assertEquals(len(c.flags_woce), length)
                if c.is_flagged_igoss():
                    self.assertEquals(len(c.flags_igoss), length)

        # Test parameter in first file not in second is filled with None.
        freon113_values += (None, ) * 36
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon113_values, self.file.columns['CFC113'].values))

        # Test parameter in both files are filled in correctly.
        freon11_values += (1.437, 1.501, 1.515, 1.525, 1.578, 1.596, 1.602,
                           1.725, 1.650, 1.703, 1.694, 1.437, 1.059, 0.702,
                           0.303, 0.130, 0.040, 0.015, -0.001, 0.002, 0.000,
                           None, None, 0.012, None, 0.006, None, None, None,
                           0.014, None, 0.000, None, 0.014, None, -0.001)
        map(lambda x: self.assertAlmostEqualOrNones(*x),
            zip(freon11_values, self.file.columns['CFC-11'].values))

        infile2.close()

    def test_write(self):
        self.file = DataFile()

        g = self.file.globals

        self.file['EXPOCODE'] = Column('EXPOCODE')
        self.file['EXPOCODE'].append('TESTEXPO')

        self.file['SECT_ID'] = Column('SECT_ID')
        self.file['SECT_ID'].append('TEST')

        self.file['STNNBR'] = Column('CASTNO')
        self.file['STNNBR'].append(5)

        self.file['CASTNO'] = Column('STNNBR')
        self.file['CASTNO'].append(20)

        self.file['DEPTH'] = Column('DEPTH')
        self.file['DEPTH'].append(-1)

        self.file['LATITUDE'] = Column('LATITUDE')
        self.file['LATITUDE'].append(90)

        self.file['LONGITUDE'] = Column('LONGITUDE')
        self.file['LONGITUDE'].append(180)

        self.file['_DATETIME'] = Column('_DATETIME')
        self.file['_DATETIME'].append(datetime.utcnow())

        self.file['BTLNBR'] = Column('BTLNBR')
        self.file['BTLNBR'].append(5, 9)

        self.file['CTDOXY'] = Column('CTDOXY')
        self.file['CTDOXY'].append(1, 2)
        self.file.check_and_replace_parameters()
        p = self.file['CTDOXY'].parameter
        p.description = 'ctd oxygen'
        p.bound_lower = 0
        p.bound_upper = 200

        botnc.write(self.file, NamedTemporaryFile())
示例#21
0
文件: merge.py 项目: cberys/libcchdo
    def test_functional_scripts_ctdex(self):
        """Test merging CTD Exchange files."""
        from argparse import Namespace
        from libcchdo.scripts import merge_ctdex_and_ctdex
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv, \
                NamedTemporaryFile(delete=False) as output:
            origin.write("""\
CTD,20120515ODF
# REPORTED CAST DEPTH IS CTD_DEPTH + DISTANCE_ABOVE_BOTTOM AT MAX PRESSURE
NUMBER_HEADERS = 11
EXPOCODE = 33AT20120419
SECT_ID = A20
STNNBR = 1
CASTNO = 1
DATE = 20120421
TIME = 1552
LATITUDE =   6.8682
LONGITUDE =  -53.4793
DEPTH =    66
INSTRUMENT_ID = 796
CTDPRS,CTDPRS_FLAG_W,CTDTMP,CTDTMP_FLAG_W,CTDSAL,CTDSAL_FLAG_W,CTDOXY,CTDOXY_FLAG_W,CTDNOBS,CTDETIME
DBAR,,ITS-90,,PSS-78,,UMOL/KG,,,
      0.0,6,  27.7514,6,  31.2862,6,    229.5,6,        1,    629.9
      2.0,2,  27.7223,2,  31.3925,2,    229.5,2,       11,    640.0
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
CTD,20120515ODF
# REPORTED CAST DEPTH IS CTD_DEPTH + DISTANCE_ABOVE_BOTTOM AT MAX PRESSURE
NUMBER_HEADERS = 11
EXPOCODE = 33AT20120419
SECT_ID = A20
STNNBR = 1
CASTNO = 1
DATE = 20120421
TIME = 1552
LATITUDE =   6.8682
LONGITUDE =  -53.4793
DEPTH =    66
INSTRUMENT_ID = 796
CTDPRS,CTDPRS_FLAG_W,CTDTMP,CTDTMP_FLAG_W,CTDSAL,CTDSAL_FLAG_W,CTDOXY,CTDOXY_FLAG_W,TRANSM,TRANSM_FLAG_W,CTDNOBS,CTDETIME
DBAR,,ITS-90,,PSS-78,,UMOL/KG,,0-5VDC,,,
      0.0,6,  27.7514,6,  31.2862,2,    222.2,6,   4.3348,1,        1,    629.9
      2.0,2,  27.7223,2,  31.3925,2,    229.5,2,   4.3334,1,       11,    640.0
""")
            deriv.flush()
            deriv.seek(0)

            args = Namespace()
            args.origin = origin
            args.derivative = deriv
            args.parameters_to_merge = None
            args.merge_different = True
            args.output = output
            args.guess_key = True
            merge_ctdex_and_ctdex(args)

            with open(output.name) as fff:
                dfile = DataFile()
                ctdex.read(dfile, fff)
                self.assertEqual(dfile['CTDSAL'].flags_woce, [2, 2])
                self.assertEqual(map(str, dfile['TRANSM'].values),
                                 ['4.3348', '4.3334'])
                self.assertEqual(dfile['TRANSM'].flags_woce, [1, 1])
            unlink(output.name)
示例#22
0
文件: merge.py 项目: cberys/libcchdo
    def test_integration_merge_btl(self):
        with    TemporaryFile() as origin, \
                TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,-999.000,9,11,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,9,22,9
 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,33,9
 316N145_9, TRNS1, 574, 1, 32, 32,2,1000,5,-999.000,9,44,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,DEPTH,TDN,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,METERS,UMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 36, 36,2,1000,5,  10.000,9,-999.0,9
 316N145_9, TRNS1, 574, 1, 35, 35,2,1000,5,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 34, 34,2,1000,5,-999.000,9,-999.0,9
 316N145_9, TRNS1, 600, 1,  1,  1,2,1000,5,-999.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dfo = DataFile()
            dfd = DataFile()
            btlex.read(dfo, origin)
            btlex.read(dfd, deriv)
            p_different, p_not_in_orig, p_not_in_deriv, p_common = \
                different_columns(dfo, dfd, BOTTLE_KEY_COLS)
            parameters = p_different + p_not_in_orig
            keys = determine_bottle_keys(dfo, dfd)
            self.assertEqual(
                keys, ('EXPOCODE', 'STNNBR', 'CASTNO', 'SAMPNO', 'BTLNBR'))
            parameters = list(OrderedSet(parameters) - OrderedSet(keys))

            # Parameters with underscores in them may be confused when matching
            # flags with them. E.g. PH_SWS_FLAG_W should be matched with PH_SWS
            # not PH.
            dfile = merge_datafiles(dfo, dfd, keys, parameters)

            self.assertEqual(dfile['DELC14'][0], _decimal('10.000'))
            self.assertEqual(dfile['DELC14'].flags_woce[1], 1)

            # Header should be the origin file's header
            self.assertNotIn('header 2', dfile.globals['header'])
            self.assertIn('header 1', dfile.globals['header'])
            # Header should contain the merged parameters
            self.assertIn('Merged parameters: PH_SWS, DELC14, DELC14_FLAG_W',
                          dfile.globals['header'])
            # No double new lines
            self.assertNotIn('\n\n', dfile.globals['header'])
            # new line for header is not included in the writers
            self.assertEqual('\n', dfile.globals['header'][-1])

            # Key columns should not have been converted to floats. This happens
            # for some reason if pandas combine/update have been used.
            self.assertEqual(str(dfile['STNNBR'][0]), '574')
            self.assertEqual(str(dfile['CASTNO'][0]), '1')
            self.assertEqual(str(dfile['SAMPNO'][0]), '36')
            self.assertEqual(str(dfile['BTLNBR'][0]), '36')
            self.assertEqual(str(dfile['PH_SWS'][0]), 'None')

            # Extra keys in derivative file should not be merged in.
            self.assertNotIn(600, dfile['STNNBR'])

            # Make sure warning is printed regarding extra key in deriv file.
            lines = [[
                'Key ', 'does not exist in origin from derivative rows', '600'
            ]]
            self.assertTrue(self.ensure_lines(lines))
示例#23
0
def read(dfc, fileobj, cfg):
    """Read generic HRP matlab file."""
    mat, hrp = load_mat_hrp(fileobj)
    data = hrp_data_as_dict(hrp)

    coords = zip(data['lon'][0], data['lat'][0])
    del data['lat']
    del data['lon']

    for key in data.keys():
        log.info(u'parameter shape: {0} {1}'.format(key, data[key].shape))

    param_map = cfg["parameter_mapping"]
    for param in data.keys():
        if param not in param_map:
            del data[param]
        else:
            new_key = param_map[param]
            if new_key != param:
                data[new_key] = data[param]
                del data[param]

    for coord in coords:
        dfile = DataFile()
        dfc.append(dfile)
        dfile.globals['LONGITUDE'] = _decimal(coord[0])
        dfile.globals['LATITUDE'] = _decimal(coord[1])

        # create the columns after extraneous keys have been deleted
        dfile.create_columns(data.keys())

    for dep, dfile in enumerate(dfc):
        dfile.globals['STNNBR'] = dep + 1
        ref_range = ndarray_data_slice(data['PRESSURE'][:, dep])
        for param, pdata in data.items():
            col = dfile[param]
            data_col = pdata[:, dep]

            drange = ndarray_data_slice(data_col)
            if ref_range is None:
                ref_range = drange
                determiner = param
            elif drange != ref_range:
                if drange[0] == drange[1]:
                    log.info(u'No data for {0}. Skip.'.format(param))
                    continue
                if not is_data_range_inside(drange, ref_range):
                    log.error(u'{0} has data range {1} outside {2}. '
                              'Skip.'.format(param, drange, ref_range))
                    continue

            col.values = map(_decimal,
                             list(data_col[ref_range[0]:ref_range[1]]))
            # Act as if all files had QC and assign it to OceanSITES 1. Assuming
            # that someone has already gone through level 0 data and we are
            # receiving level 1 or higher. We can set all flags to 2.
            col.flags_woce = [9 if isnan(val) else 2 for val in col.values]

    # Somehow, HRP matlab data can have nans in the coordinate arrays. We can't
    # recalculate depth from that or make other assumptions so we can only
    # delete them.
    for iii, dfile in reversed(list(enumerate(dfc))):
        if (isnan(dfile.globals['LATITUDE'])
                or isnan(dfile.globals['LONGITUDE'])):
            log.warn(u'Unable to determine coordinate for matlab row '
                     '{0}. Discarding.'.format(iii))
            dfc.files.remove(dfile)
示例#24
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_datafiles(self):
        """Merge datafiles.

        When merging data files, there are two cases to consider:

        Case 1: Adding new column

            If the derivative file has less records, fill in missing records
            with fill values and missing flags.
            
        Case 2: Updating column data

        It should also be possible to specifically only merge flags. Make sure
        if only merging flags to not merge the data.

        Parameter units should be updated from the derivative.

        """
        df0 = DataFile()
        df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT', 'CTDOXY'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRIT'].append(30, 5)
        df0['NITRIT'].append(31, 6)
        df0['CTDOXY'].append(40, 2)
        df0['CTDOXY'].append(41, 3)

        df1 = DataFile()
        df1.create_columns(['CTDPRS', 'NITRAT', 'CTDSAL', 'CTDOXY'])
        df1['CTDPRS'].append(2, 2)
        df1['CTDPRS'].append(3, 2)
        df1['CTDSAL'].append(20, 2)
        df1['CTDSAL'].append(21, 2)
        df1['NITRAT'].append(12, 4)
        df1['NITRAT'].append(13, 4)
        df1['CTDOXY'].append(40, 2)
        df1['CTDOXY'].append(41, 3)

        df1['CTDOXY'].parameter.units = Unit('UMOL/KG')

        # Case 1 column add
        mdf = merge_datafiles(
            df0, df1, ['CTDPRS'],
            ['NITRAT', 'NITRAT_FLAG_W', 'CTDSAL', 'CTDSAL_FLAG_W', 'CTDOXY'])
        self.assertEqual(mdf['CTDPRS'].values, [1, 2])
        # Make sure missing values and flags are filled in.
        self.assertEqual(mdf['CTDSAL'].values, [None, 20])
        self.assertEqual(mdf['CTDSAL'].flags_woce, [9, 2])
        # Case 2 data upate
        self.assertEqual(mdf['NITRAT'].values, [10, 12])
        self.assertEqual(mdf['NITRAT'].flags_woce, [2, 4])

        # Columns in origin should be kept
        self.assertEqual(mdf['NITRIT'].values, [30, 31])
        self.assertEqual(mdf['NITRIT'].flags_woce, [5, 6])

        # Units should be overwritten for merged columns
        self.assertEqual(mdf['CTDOXY'].parameter.units,
                         df1['CTDOXY'].parameter.units)

        # Make sure warning is printed regarding unit overwrite.
        # This doubles to make sure derivate columns do not wholesale overwrite
        # the origin column, they must be merged using the row match algo.
        lines = [
            "Changed units for CTDOXY from '' to 'UMOL/KG'",
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#25
0
文件: merge.py 项目: cberys/libcchdo
    def test_merge_collections(self):
        """When merging collections, map files, then merge mapped files.

        """
        odfc = DataFileCollection()
        ddfc = DataFileCollection()

        df0 = DataFile()
        df0.globals['EXPOCODE'] = 'a'
        df0.globals['STNNBR'] = 1
        df0.globals['CASTNO'] = 1
        df0.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df0['CTDPRS'].append(1, 2)
        df0['CTDPRS'].append(2, 2)
        df0['NITRAT'].append(10, 2)
        df0['NITRAT'].append(11, 2)
        df0['NITRIT'].append(10, 2)
        df0['NITRIT'].append(11, 2)
        odfc.append(df0)

        df1 = DataFile()
        df1.globals['EXPOCODE'] = 'a'
        df1.globals['STNNBR'] = 1
        df1.globals['CASTNO'] = 1
        df1.create_columns(['CTDPRS', 'NITRAT', 'NITRIT'])
        df1['CTDPRS'].append(1, 2)
        df1['CTDPRS'].append(3, 2)
        df1['NITRAT'].append(20, 2)
        df1['NITRAT'].append(21, 2)
        df1['NITRIT'].append(10, 2)
        df1['NITRIT'].append(11, 2)
        ddfc.append(df1)

        def merger(origin, deriv):
            return merge_datafiles(origin, deriv, ['CTDPRS'],
                                   ['NITRAT', 'NITRIT'])

        merged_dfc = merge_collections(odfc, ddfc, merger)

        self.assertEqual(merged_dfc.files[0]['CTDPRS'].values, [1, 2])
        self.assertEqual(merged_dfc.files[0]['NITRAT'].values, [20, 11])
        self.assertEqual(merged_dfc.files[0]['NITRIT'].values, [10, 11])

        lines = [
            # df1 has an different CTDPRS record (3)
            'Key (3,) does not exist in origin from derivative rows',
            # NITRIT columns are the same
            "Instructed to merge parameters that are not different: ['NITRIT']"
        ]
        self.assertTrue(self.ensure_lines(lines))
示例#26
0
文件: merge.py 项目: cberys/libcchdo
    def test_different_columns(self):
        """Columns between two datafiles differ under a wide variety of cases.

        Case 1: Column values are different
        Case 1 corollary: Flag values are different
        Case 2: Units are different
        Case 3: Column not in original
        Case 4: Column not in derivative

        """
        with TemporaryFile() as origin, TemporaryFile() as deriv:
            origin.write("""\
BOTTLE,19700101CCHSIOYYY
# header 1
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,NITRAT,NITRAT_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,UMOL/KG,/MILLE,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,3.00,2,10.0,-999.000,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,4.00,2,10.0,-999.000,9
END_DATA
""")
            origin.flush()
            origin.seek(0)
            deriv.write("""\
BOTTLE,19700101CCHSIOYYY
# header 2
EXPOCODE,SECT_ID,STNNBR,CASTNO,SAMPNO,BTLNBR,BTLNBR_FLAG_W,LATITUDE,LONGITUDE,DATE,TIME,DEPTH,TDN,TDN_FLAG_W,NITRIT,DELC14,DELC14_FLAG_W,PH_SWS,PH_SWS_FLAG_W
,,,,,,,,,,,METERS,UMOL/KG,,NMOL/KG,/MILLE,,,
 316N145_9, TRNS1, 574, 1, 16, 36, 2, 0, 0, 19700101, 0000,1000,6.00,3,10.0,-999.000,1,-999.0,9
 316N145_9, TRNS1, 574, 1, 15, 35, 2, 0, 0, 19700101, 0000,1000,5.00,3,10.0,  10.000,9,-999.0,9
END_DATA
""")
            deriv.flush()
            deriv.seek(0)

            dforigin = DataFile()
            dfderiv = DataFile()
            btlex.read(dforigin, origin)
            btlex.read(dfderiv, deriv)
            self.assertEqual(
                # NITRIT comes after because NMOL/KG is not an expected unit and
                # gets pushed to the end when sorting
                (
                    ['DELC14', 'DELC14_FLAG_W', 'NITRIT'],
                    # PH_SWS_FLAG_W has underscores inside the parameter name. All
                    # parts need to be included
                    ['PH_SWS', 'PH_SWS_FLAG_W', 'TDN', 'TDN_FLAG_W'],
                    ['NITRAT', 'NITRAT_FLAG_W'],
                    [
                        'EXPOCODE', 'SECT_ID', 'STNNBR', 'CASTNO', 'SAMPNO',
                        'BTLNBR', 'BTLNBR_FLAG_W', 'LATITUDE', 'LONGITUDE',
                        'DEPTH', '_DATETIME'
                    ]),
                different_columns(dforigin, dfderiv, (
                    'EXPOCODE',
                    'SECT_ID',
                    'STNNBR',
                    'CASTNO',
                    'SAMPNO',
                    'BTLNBR',
                )))

            lines = [
                "DELC14 differs at origin row 1:\t(None, Decimal('10.000'))",
                "DELC14_FLAG_W differs at origin row 0:\t(9, 1)",
            ]
            self.assertTrue(self.ensure_lines(lines))

            # Columns are not different if merged results are not different.
            dfo = DataFile()
            dfd = DataFile()

            dfo.create_columns(['CTDPRS', 'CTDOXY'])
            dfo.check_and_replace_parameters()
            dfd.create_columns(['CTDPRS', 'CTDOXY'])
            dfd.check_and_replace_parameters()

            dfo['CTDPRS'].values = [1, 2, 3]
            dfo['CTDOXY'].values = [10, 20, 30]
            dfd['CTDPRS'].values = [3, 2, 1]
            dfd['CTDOXY'].values = [30, 20, 10]

            self.assertEqual(([], [], [], ['CTDPRS', 'CTDOXY']),
                             different_columns(dfo, dfd, ('CTDPRS', )))