Пример #1
0
    def convert(self):
        # Copy to tempdir
        for f in self.matched_files:
            fname = os.path.basename(f)
            tmpf = os.path.join(self.tmpdir, fname)
            shutil.copy2(f, tmpf)

        safe_makedirs(self.destination_directory)

        # Run conversion script
        convert_binary_path = os.path.join(os.path.dirname(__file__), 'bin',
                                           'convertDbds.sh')
        pargs = [convert_binary_path, '-q', '-p', '-c', self.cache_directory]

        pargs.append(self.tmpdir)
        pargs.append(self.destination_directory)

        command_output, return_code = generate_stream(pargs)

        # Return
        processed = []
        output_files = command_output.read().split('\n')
        # iterate and every time we hit a .dat file we return the cache
        binary_files = []
        for x in output_files:

            if x.startswith('Error'):
                L.error(x)
                continue

            if x.startswith('Skipping'):
                continue

            fname = os.path.basename(x)
            _, suff = os.path.splitext(fname)

            if suff == '.dat':
                ascii_file = os.path.join(self.destination_directory, fname)
                if os.path.isfile(ascii_file):
                    processed.append({
                        'ascii': ascii_file,
                        'binary': sorted(binary_files)
                    })
                    L.info("Converted {} to {}".format(
                        ','.join([
                            os.path.basename(x) for x in sorted(binary_files)
                        ]), fname))
                else:
                    L.warning("{} not an output file".format(x))

                binary_files = []
            else:
                bf = os.path.join(self.source_directory, fname)
                if os.path.isfile(x):
                    binary_files.append(bf)

        return processed
Пример #2
0
def merge_profile_netcdf_files(folder, output):
    import pandas as pd
    from glob import glob

    new_fp, new_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_merge_')

    try:
        # Get the number of profiles
        members = sorted(list(glob(os.path.join(folder, '*.nc'))))

        # Iterate over the netCDF files and create a dataframe for each
        dfs = []
        axes = {
            'trajectory': 'trajectory',
            't': 'time',
            'x': 'lon',
            'y': 'lat',
            'z': 'depth',
        }
        for ncf in members:
            with IncompleteMultidimensionalTrajectory(ncf) as old:
                df = old.to_dataframe(axes=axes, clean_cols=False)
                dfs.append(df)

        full_df = pd.concat(dfs, ignore_index=True)

        # Now add a profile axes
        axes = {
            'trajectory': 'trajectory',
            'profile': 'profile_id',
            't': 'profile_time',
            'x': 'profile_lon',
            'y': 'profile_lat',
            'z': 'depth',
        }

        newds = ContiguousRaggedTrajectoryProfile.from_dataframe(
            full_df,
            output=new_path,
            axes=axes,
            mode='a'
        )

        # Apply default metadata
        attrs = read_attrs(template='ioos_ngdac')
        newds.apply_meta(attrs, create_vars=False, create_dims=False)
        newds.close()

        safe_makedirs(os.path.dirname(output))
        shutil.move(new_path, output)
    finally:
        os.close(new_fp)
        if os.path.exists(new_path):
            os.remove(new_path)
Пример #3
0
    def test_all_ascii(self):
        out_base = resource('slocum', 'real', 'netcdf', 'bass-20160909T1733')
        safe_makedirs(out_base)

        for f in glob(resource('slocum', 'usf_bass*.dat')):
            args = dict(
                file=f,
                reader_class=SlocumReader,
                config_path=resource('slocum', 'config', 'bass-20160909T1733'),
                output_path=out_base,
                subset=False,
                template='ioos_ngdac',
                profile_id_type=2,
                tsint=10,
                filter_distance=1,
                filter_points=5,
                filter_time=10,
                filter_z=1
            )
            create_dataset(**args)

        output_files = sorted(os.listdir(out_base))
        output_files = [ os.path.join(out_base, o) for o in output_files ]

        # First profile
        with nc4.Dataset(output_files[0]) as ncd:
            assert ncd.variables['profile_id'].ndim == 0
            assert ncd.variables['profile_id'][0] == 0

        # Last profile
        with nc4.Dataset(output_files[-1]) as ncd:
            assert ncd.variables['profile_id'].ndim == 0
            assert ncd.variables['profile_id'][0] == len(output_files) - 1

        # Check netCDF file for compliance
        ds = namedtuple('Arguments', ['file'])
        for o in output_files:
            assert check_dataset(ds(file=o)) == 0
Пример #4
0
    def setUp(self):
        super().setUp()

        safe_makedirs(binary_path)
        safe_makedirs(ascii_path)
        safe_makedirs(netcdf_path)
        safe_makedirs(ftp_path)
        safe_makedirs(erddap_content_path)
        safe_makedirs(erddap_flag_path)
Пример #5
0
    def convert(self):
        # Copy to tempdir
        for f in self.matched_files:
            fname = os.path.basename(f)
            tmpf = os.path.join(self.tmpdir, fname)
            shutil.copy2(f, tmpf)

        safe_makedirs(self.destination_directory)

        # Run conversion script
        convert_binary_path = os.path.join(os.path.dirname(__file__), 'bin',
                                           'convertDbds.sh')
        pargs = [convert_binary_path, '-q', '-p', '-c', self.cache_directory]

        # Perform pseudograms if this ASCII file matches the deployment
        # name of things we know to have the data. There needs to be a
        # better way to figure this out, but we don't have any understanding
        # of a deployment config object at this point.

        # Ideally this code isn't tacked into convertDbds.sh to output separate
        # files and can be done using the ASCII files exported from SlocumMerger
        # using pandas. Maybe Rob Cermack will take a look and integrate the code
        # more tightly into GUTILS? For now we are going to keep it separate
        # so UAF can iterate on the code and we can just plop their updated files
        # into the "ecotools" folder of GUTILS.
        for d in PSEUDOGRAM_DEPLOYMENTS:
            if d in self.matched_files[0]:
                pargs = pargs + [
                    '-y',
                    sys.executable,
                    '-g',  # Makes the pseudogram ASCII
                    '-i',  # Makes the pseudogram images. This is slow!
                    '-r',
                    "60.0"
                ]

        pargs.append(self.tmpdir)
        pargs.append(self.destination_directory)

        command_output, return_code = generate_stream(pargs)

        # Return
        processed = []
        output_files = command_output.read().split('\n')
        # iterate and every time we hit a .dat file we return the cache
        binary_files = []
        for x in output_files:

            if x.startswith('Error'):
                L.error(x)
                continue

            if x.startswith('Skipping'):
                continue

            fname = os.path.basename(x)
            _, suff = os.path.splitext(fname)

            if suff == '.dat':
                ascii_file = os.path.join(self.destination_directory, fname)
                if os.path.isfile(ascii_file):
                    processed.append({
                        'ascii': ascii_file,
                        'binary': sorted(binary_files)
                    })
                    L.info("Converted {} to {}".format(
                        ','.join([
                            os.path.basename(x) for x in sorted(binary_files)
                        ]), fname))
                else:
                    L.warning("{} not an output file".format(x))

                binary_files = []
            else:
                bf = os.path.join(self.source_directory, fname)
                if os.path.isfile(x):
                    binary_files.append(bf)

        return processed
Пример #6
0
def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH):
    try:
        # Path to hold file while we create it
        tmp_handle, tmp_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_glider_netcdf_')

        profile_time = profile.t.dropna().iloc[0]

        if profile_id_type == ProfileIdTypes.EPOCH:
            # We are using the epoch as the profile_index!
            profile_index = calendar.timegm(profile_time.utctimetuple())
        # Figure out which profile index to use (epoch or integer)
        elif profile_id_type == ProfileIdTypes.COUNT:
            # Get all existing netCDF outputs and find out the index of this netCDF file. That
            # will be the profile_id of this file. This is effectively keeping a tally of netCDF
            # files that have been created and only works if NETCDF FILES ARE WRITTEN IN
            # ASCENDING ORDER.
            # There is a race condition here if files are being in parallel and one should be
            # sure that when this function is being run there can be no more files writtten.
            # This file being written is the last profile available.
            netcdf_files_same_mode = list(glob(
                os.path.join(
                    output_path,
                    '*_{}.nc'.format(mode)
                )
            ))
            profile_index = len(netcdf_files_same_mode)
        elif profile_id_type == ProfileIdTypes.FRAME:
            profile_index = profile.profile.iloc[0]
        else:
            raise ValueError('{} is not a valid profile type'.format(profile_id_type))

        # Create final filename
        filename = "{0}_{1:010d}_{2:%Y%m%dT%H%M%S}Z_{3}.nc".format(
            attrs['glider'],
            profile_index,
            profile_time,
            mode
        )
        output_file = os.path.join(output_path, filename)

        # Add in the trajectory dimension to make pocean happy
        traj_name = '{}-{}'.format(
            attrs['glider'],
            attrs['trajectory_date']
        )
        profile = profile.assign(trajectory=traj_name)

        # We add this back in later
        profile.drop('profile', axis=1, inplace=True)

        # Compute U/V scalar values
        uv_txy = get_uv_data(profile)
        if 'u_orig' in profile.columns and 'v_orig' in profile.columns:
            profile.drop(['u_orig', 'v_orig'], axis=1, inplace=True)

        # Compute profile scalar values
        profile_txy = get_profile_data(profile, method=None)

        # Calculate some geographic global attributes
        attrs = dict_update(attrs, get_geographic_attributes(profile))
        # Calculate some vertical global attributes
        attrs = dict_update(attrs, get_vertical_attributes(profile))
        # Calculate some temporal global attributes
        attrs = dict_update(attrs, get_temporal_attributes(profile))
        # Set the creation dates and history
        attrs = dict_update(attrs, get_creation_attributes(profile))

        # Changing column names here from the default 't z x y'
        axes = {
            't': 'time',
            'z': 'depth',
            'x': 'lon',
            'y': 'lat',
            'sample': 'time'
        }
        profile = profile.rename(columns=axes)

        # Use pocean to create NetCDF file
        with IncompleteMultidimensionalTrajectory.from_dataframe(
                profile,
                tmp_path,
                axes=axes,
                reduce_dims=True,
                mode='a') as ncd:

            # We only want to apply metadata from the `attrs` map if the variable is already in
            # the netCDF file or it is a scalar variable (no shape defined). This avoids
            # creating measured variables that were not measured in this profile.
            prof_attrs = attrs.copy()

            vars_to_update = OrderedDict()
            for vname, vobj in prof_attrs['variables'].items():
                if vname in ncd.variables or ('shape' not in vobj and 'type' in vobj):
                    if 'shape' in vobj:
                        # Assign coordinates
                        vobj['attributes']['coordinates'] = '{} {} {} {}'.format(
                            axes.get('t'),
                            axes.get('z'),
                            axes.get('x'),
                            axes.get('y'),
                        )
                    vars_to_update[vname] = vobj
                else:
                    # L.debug("Skipping missing variable: {}".format(vname))
                    pass

            prof_attrs['variables'] = vars_to_update
            ncd.apply_meta(prof_attrs)

            # Set trajectory value
            ncd.id = traj_name
            ncd.variables['trajectory'][0] = traj_name

            # Set profile_* data
            set_profile_data(ncd, profile_txy, profile_index)

            # Set *_uv data
            set_uv_data(ncd, uv_txy)

        # Move to final destination
        safe_makedirs(os.path.dirname(output_file))
        os.chmod(tmp_path, 0o664)
        shutil.move(tmp_path, output_file)
        L.info('Created: {}'.format(output_file))
        return output_file
    except BaseException:
        raise
    finally:
        os.close(tmp_handle)
        if os.path.exists(tmp_path):
            os.remove(tmp_path)