def convert(self): # Copy to tempdir for f in self.matched_files: fname = os.path.basename(f) tmpf = os.path.join(self.tmpdir, fname) shutil.copy2(f, tmpf) safe_makedirs(self.destination_directory) # Run conversion script convert_binary_path = os.path.join(os.path.dirname(__file__), 'bin', 'convertDbds.sh') pargs = [convert_binary_path, '-q', '-p', '-c', self.cache_directory] pargs.append(self.tmpdir) pargs.append(self.destination_directory) command_output, return_code = generate_stream(pargs) # Return processed = [] output_files = command_output.read().split('\n') # iterate and every time we hit a .dat file we return the cache binary_files = [] for x in output_files: if x.startswith('Error'): L.error(x) continue if x.startswith('Skipping'): continue fname = os.path.basename(x) _, suff = os.path.splitext(fname) if suff == '.dat': ascii_file = os.path.join(self.destination_directory, fname) if os.path.isfile(ascii_file): processed.append({ 'ascii': ascii_file, 'binary': sorted(binary_files) }) L.info("Converted {} to {}".format( ','.join([ os.path.basename(x) for x in sorted(binary_files) ]), fname)) else: L.warning("{} not an output file".format(x)) binary_files = [] else: bf = os.path.join(self.source_directory, fname) if os.path.isfile(x): binary_files.append(bf) return processed
def merge_profile_netcdf_files(folder, output): import pandas as pd from glob import glob new_fp, new_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_merge_') try: # Get the number of profiles members = sorted(list(glob(os.path.join(folder, '*.nc')))) # Iterate over the netCDF files and create a dataframe for each dfs = [] axes = { 'trajectory': 'trajectory', 't': 'time', 'x': 'lon', 'y': 'lat', 'z': 'depth', } for ncf in members: with IncompleteMultidimensionalTrajectory(ncf) as old: df = old.to_dataframe(axes=axes, clean_cols=False) dfs.append(df) full_df = pd.concat(dfs, ignore_index=True) # Now add a profile axes axes = { 'trajectory': 'trajectory', 'profile': 'profile_id', 't': 'profile_time', 'x': 'profile_lon', 'y': 'profile_lat', 'z': 'depth', } newds = ContiguousRaggedTrajectoryProfile.from_dataframe( full_df, output=new_path, axes=axes, mode='a' ) # Apply default metadata attrs = read_attrs(template='ioos_ngdac') newds.apply_meta(attrs, create_vars=False, create_dims=False) newds.close() safe_makedirs(os.path.dirname(output)) shutil.move(new_path, output) finally: os.close(new_fp) if os.path.exists(new_path): os.remove(new_path)
def test_all_ascii(self): out_base = resource('slocum', 'real', 'netcdf', 'bass-20160909T1733') safe_makedirs(out_base) for f in glob(resource('slocum', 'usf_bass*.dat')): args = dict( file=f, reader_class=SlocumReader, config_path=resource('slocum', 'config', 'bass-20160909T1733'), output_path=out_base, subset=False, template='ioos_ngdac', profile_id_type=2, tsint=10, filter_distance=1, filter_points=5, filter_time=10, filter_z=1 ) create_dataset(**args) output_files = sorted(os.listdir(out_base)) output_files = [ os.path.join(out_base, o) for o in output_files ] # First profile with nc4.Dataset(output_files[0]) as ncd: assert ncd.variables['profile_id'].ndim == 0 assert ncd.variables['profile_id'][0] == 0 # Last profile with nc4.Dataset(output_files[-1]) as ncd: assert ncd.variables['profile_id'].ndim == 0 assert ncd.variables['profile_id'][0] == len(output_files) - 1 # Check netCDF file for compliance ds = namedtuple('Arguments', ['file']) for o in output_files: assert check_dataset(ds(file=o)) == 0
def setUp(self): super().setUp() safe_makedirs(binary_path) safe_makedirs(ascii_path) safe_makedirs(netcdf_path) safe_makedirs(ftp_path) safe_makedirs(erddap_content_path) safe_makedirs(erddap_flag_path)
def convert(self): # Copy to tempdir for f in self.matched_files: fname = os.path.basename(f) tmpf = os.path.join(self.tmpdir, fname) shutil.copy2(f, tmpf) safe_makedirs(self.destination_directory) # Run conversion script convert_binary_path = os.path.join(os.path.dirname(__file__), 'bin', 'convertDbds.sh') pargs = [convert_binary_path, '-q', '-p', '-c', self.cache_directory] # Perform pseudograms if this ASCII file matches the deployment # name of things we know to have the data. There needs to be a # better way to figure this out, but we don't have any understanding # of a deployment config object at this point. # Ideally this code isn't tacked into convertDbds.sh to output separate # files and can be done using the ASCII files exported from SlocumMerger # using pandas. Maybe Rob Cermack will take a look and integrate the code # more tightly into GUTILS? For now we are going to keep it separate # so UAF can iterate on the code and we can just plop their updated files # into the "ecotools" folder of GUTILS. for d in PSEUDOGRAM_DEPLOYMENTS: if d in self.matched_files[0]: pargs = pargs + [ '-y', sys.executable, '-g', # Makes the pseudogram ASCII '-i', # Makes the pseudogram images. This is slow! '-r', "60.0" ] pargs.append(self.tmpdir) pargs.append(self.destination_directory) command_output, return_code = generate_stream(pargs) # Return processed = [] output_files = command_output.read().split('\n') # iterate and every time we hit a .dat file we return the cache binary_files = [] for x in output_files: if x.startswith('Error'): L.error(x) continue if x.startswith('Skipping'): continue fname = os.path.basename(x) _, suff = os.path.splitext(fname) if suff == '.dat': ascii_file = os.path.join(self.destination_directory, fname) if os.path.isfile(ascii_file): processed.append({ 'ascii': ascii_file, 'binary': sorted(binary_files) }) L.info("Converted {} to {}".format( ','.join([ os.path.basename(x) for x in sorted(binary_files) ]), fname)) else: L.warning("{} not an output file".format(x)) binary_files = [] else: bf = os.path.join(self.source_directory, fname) if os.path.isfile(x): binary_files.append(bf) return processed
def create_profile_netcdf(attrs, profile, output_path, mode, profile_id_type=ProfileIdTypes.EPOCH): try: # Path to hold file while we create it tmp_handle, tmp_path = tempfile.mkstemp(suffix='.nc', prefix='gutils_glider_netcdf_') profile_time = profile.t.dropna().iloc[0] if profile_id_type == ProfileIdTypes.EPOCH: # We are using the epoch as the profile_index! profile_index = calendar.timegm(profile_time.utctimetuple()) # Figure out which profile index to use (epoch or integer) elif profile_id_type == ProfileIdTypes.COUNT: # Get all existing netCDF outputs and find out the index of this netCDF file. That # will be the profile_id of this file. This is effectively keeping a tally of netCDF # files that have been created and only works if NETCDF FILES ARE WRITTEN IN # ASCENDING ORDER. # There is a race condition here if files are being in parallel and one should be # sure that when this function is being run there can be no more files writtten. # This file being written is the last profile available. netcdf_files_same_mode = list(glob( os.path.join( output_path, '*_{}.nc'.format(mode) ) )) profile_index = len(netcdf_files_same_mode) elif profile_id_type == ProfileIdTypes.FRAME: profile_index = profile.profile.iloc[0] else: raise ValueError('{} is not a valid profile type'.format(profile_id_type)) # Create final filename filename = "{0}_{1:010d}_{2:%Y%m%dT%H%M%S}Z_{3}.nc".format( attrs['glider'], profile_index, profile_time, mode ) output_file = os.path.join(output_path, filename) # Add in the trajectory dimension to make pocean happy traj_name = '{}-{}'.format( attrs['glider'], attrs['trajectory_date'] ) profile = profile.assign(trajectory=traj_name) # We add this back in later profile.drop('profile', axis=1, inplace=True) # Compute U/V scalar values uv_txy = get_uv_data(profile) if 'u_orig' in profile.columns and 'v_orig' in profile.columns: profile.drop(['u_orig', 'v_orig'], axis=1, inplace=True) # Compute profile scalar values profile_txy = get_profile_data(profile, method=None) # Calculate some geographic global attributes attrs = dict_update(attrs, get_geographic_attributes(profile)) # Calculate some vertical global attributes attrs = dict_update(attrs, get_vertical_attributes(profile)) # Calculate some temporal global attributes attrs = dict_update(attrs, get_temporal_attributes(profile)) # Set the creation dates and history attrs = dict_update(attrs, get_creation_attributes(profile)) # Changing column names here from the default 't z x y' axes = { 't': 'time', 'z': 'depth', 'x': 'lon', 'y': 'lat', 'sample': 'time' } profile = profile.rename(columns=axes) # Use pocean to create NetCDF file with IncompleteMultidimensionalTrajectory.from_dataframe( profile, tmp_path, axes=axes, reduce_dims=True, mode='a') as ncd: # We only want to apply metadata from the `attrs` map if the variable is already in # the netCDF file or it is a scalar variable (no shape defined). This avoids # creating measured variables that were not measured in this profile. prof_attrs = attrs.copy() vars_to_update = OrderedDict() for vname, vobj in prof_attrs['variables'].items(): if vname in ncd.variables or ('shape' not in vobj and 'type' in vobj): if 'shape' in vobj: # Assign coordinates vobj['attributes']['coordinates'] = '{} {} {} {}'.format( axes.get('t'), axes.get('z'), axes.get('x'), axes.get('y'), ) vars_to_update[vname] = vobj else: # L.debug("Skipping missing variable: {}".format(vname)) pass prof_attrs['variables'] = vars_to_update ncd.apply_meta(prof_attrs) # Set trajectory value ncd.id = traj_name ncd.variables['trajectory'][0] = traj_name # Set profile_* data set_profile_data(ncd, profile_txy, profile_index) # Set *_uv data set_uv_data(ncd, uv_txy) # Move to final destination safe_makedirs(os.path.dirname(output_file)) os.chmod(tmp_path, 0o664) shutil.move(tmp_path, output_file) L.info('Created: {}'.format(output_file)) return output_file except BaseException: raise finally: os.close(tmp_handle) if os.path.exists(tmp_path): os.remove(tmp_path)