def pygeons_merge(input_files, output_stem=None): ''' Merge data files ''' logger.info('Running pygeons merge ...') data_list = [dict_from_hdf5(i) for i in input_files] data_list = _common_context(data_list) out = data_list[0] for d in data_list[1:]: for dir in ['east', 'north', 'vertical']: # overwrite data in *out* with non-missing data in *d* missing_in_d = np.isinf(d[dir + '_std_dev']) missing_in_out = np.isinf(out[dir + '_std_dev']) if np.any(~missing_in_d & ~missing_in_out): warnings.warn( 'Data for some stations and times exist in multiple ' 'datasets. Precedence is determined by the order the data ' 'files were specified in.') out[dir][~missing_in_d] = d[dir][~missing_in_d] out[dir + '_std_dev'][~missing_in_d] = d[dir + '_std_dev'][~missing_in_d] # set output file name if output_stem is None: output_stem = 'merged' output_file = output_stem + '.h5' hdf5_from_dict(output_file, out) logger.info('Merged data written to %s' % output_file)
def pygeons_merge(input_files,output_stem=None): ''' Merge data files ''' logger.info('Running pygeons merge ...') data_list = [dict_from_hdf5(i) for i in input_files] data_list = _common_context(data_list) out = data_list[0] for d in data_list[1:]: for dir in ['east','north','vertical']: # overwrite data in *out* with non-missing data in *d* missing_in_d = np.isinf(d[dir+'_std_dev']) missing_in_out = np.isinf(out[dir+'_std_dev']) if np.any(~missing_in_d & ~missing_in_out): warnings.warn( 'Data for some stations and times exist in multiple ' 'datasets. Precedence is determined by the order the data ' 'files were specified in.') out[dir][~missing_in_d] = d[dir][~missing_in_d] out[dir+'_std_dev'][~missing_in_d] = d[dir+'_std_dev'][~missing_in_d] # set output file name if output_stem is None: output_stem = 'merged' output_file = output_stem + '.h5' hdf5_from_dict(output_file,out) logger.info('Merged data written to %s' % output_file)
def pygeons_autoclean(input_file, network_model=('spwen12-se',), network_params=(1.0,0.1,100.0), station_model=('linear',), station_params=(), output_stem=None, outlier_tol=4.0): ''' Remove outliers with a data editing algorithm ''' logger.info('Running pygeons autoclean ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') # dictionary which will contain the edited data out = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_params = _params_dict(network_params) station_params = _params_dict(station_params) # make output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.autoclean' output_file = output_stem + '.h5' # convert geodetic positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T _log_autoclean(input_file, network_model,network_params, station_model,station_params, outlier_tol, output_file) for dir in ['east','north','vertical']: de,sde = autoclean(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_model=network_model, network_params=network_params[dir], station_model=station_model, station_params=station_params[dir], tol=outlier_tol) out[dir] = de out[dir+'_std_dev'] = sde hdf5_from_dict(output_file,out) logger.info('Edited data written to %s' % output_file) return
def pygeons_fit(input_file, network_model=('spwen12-se',), network_params=(1.0,0.1,100.0), station_model=('linear',), station_params=(), output_stem=None): ''' Condition the Gaussian process to the observations and evaluate the posterior at the observation points. ''' logger.info('Running pygeons fit ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') # create output dictionary out = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_params = _params_dict(network_params) station_params = _params_dict(station_params) # make output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.fit' output_file = output_stem + '.h5' # convert geodetic positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T _log_fit(input_file, network_model,network_params, station_model,station_params, output_file) for dir in ['east','north','vertical']: u,su = fit(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_model=network_model, network_params=network_params[dir], station_model=station_model, station_params=station_params[dir]) out[dir] = u out[dir+'_std_dev'] = su hdf5_from_dict(output_file,out) logger.info('Posterior fit written to %s' % output_file) return
def pygeons_toh5(input_text_file, file_type='csv', output_stem=None): ''' converts a text file to an hdf5 file ''' logger.info('Running pygeons toh5 ...') data = dict_from_text(input_text_file, parser=file_type) if output_stem is None: output_stem = _remove_extension(input_text_file) output_file = output_stem + '.h5' hdf5_from_dict(output_file, data) logger.info('Data written to %s' % output_file) return
def pygeons_toh5(input_text_file,file_type='csv',output_stem=None): ''' converts a text file to an hdf5 file ''' logger.info('Running pygeons toh5 ...') data = dict_from_text(input_text_file,parser=file_type) if output_stem is None: output_stem = _remove_extension(input_text_file) output_file = output_stem + '.h5' hdf5_from_dict(output_file,data) logger.info('Data written to %s' % output_file) return
def pygeons_clean(input_file, resolution='i', input_edits_file=None, break_lons=None, break_lats=None, break_conn=None, no_display=False, output_stem=None, **kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k, np.copy(v)) for k, v in data.iteritems()) ts_fig, ts_ax = plt.subplots(3, 1, sharex=True, num='Time Series View', facecolor='white') _setup_ts_ax(ts_ax) map_fig, map_ax = plt.subplots(num='Map View', facecolor='white') bm = make_basemap(data['longitude'], data['latitude'], resolution=resolution) _setup_map_ax(bm, map_ax) x, y = bm(data['longitude'], data['latitude']) pos = np.array([x, y]).T t = data['time'] dates = [mjd_inv(ti, '%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'], data['time_exponent']) conv = 1.0 / unit_conversion(units, time='day', space='m') u = conv * data['east'] v = conv * data['north'] z = conv * data['vertical'] su = conv * data['east_std_dev'] sv = conv * data['north_std_dev'] sz = conv * data['vertical_std_dev'] ic = InteractiveCleaner(t, pos, u=u, v=v, z=z, su=su, sv=sv, sz=sz, map_ax=map_ax, ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file, 'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type, sta, a, b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a, '%Y-%m-%d') stop_time = mjd(b, '%Y-%m-%d') ic.remove_outliers(start_time, stop_time) elif type == 'jump': jump_time = mjd(a, '%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time, delta) else: raise ValueError( 'edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file, 'w') as fout: for i in ic.log: type, xidx, a, b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a, '%Y-%m-%d') stop_date = mjd_inv(b, '%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station, start_date, stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a, '%Y-%m-%d') fout.write('jump %s %s %s\n' % (station, jump_date, b)) else: raise ValueError( 'edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0] / conv out['north'] = clean_data[1] / conv out['vertical'] = clean_data[2] / conv out['east_std_dev'] = clean_data[3] / conv out['north_std_dev'] = clean_data[4] / conv out['vertical_std_dev'] = clean_data[5] / conv hdf5_from_dict(output_file, out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return
#!/usr/bin/env python # this script removes high sigma values from an HDF5 file import numpy as np from pygeons.io.convert import dict_from_hdf5, hdf5_from_dict import sys fname = sys.argv[1] cutoff = 0.02 # 2 cm cutoff data = dict_from_hdf5(fname) mask = ((data['north_std_dev'] > cutoff) | (data['east_std_dev'] > cutoff) | (data['vertical_std_dev'] > cutoff)) data['north'][mask] = np.nan data['north_std_dev'][mask] = np.inf data['east'][mask] = np.nan data['east_std_dev'][mask] = np.inf data['vertical'][mask] = np.nan data['vertical_std_dev'][mask] = np.inf hdf5_from_dict(fname, data)
def pygeons_crop(input_file, start_date=None, stop_date=None, min_lat=-np.inf, max_lat=np.inf, min_lon=-np.inf, max_lon=np.inf, stations=None, output_stem=None): ''' Sets the time span of the data set to be between *start_date* and *stop_date*. Sets the stations to be within the latitude and longitude bounds. Parameters ---------- data : dict data dictionary start_date : str, optional start date of output data set in YYYY-MM-DD. Uses the start date of *data* if not provided. Defaults to the earliest date. stop_date : str, optional Stop date of output data set in YYYY-MM-DD. Uses the stop date of *data* if not provided. Defaults to the latest date. min_lon, max_lon, min_lat, max_lat : float, optional Spatial bounds on the output data set stations : str list, optional List of stations to be removed from the dataset. This is in addition to the station removed by the lon/lat bounds. Returns ------- out_dict : dict output data dictionary ''' logger.info('Running pygeons crop ...') data = dict_from_hdf5(input_file) out = dict((k, np.copy(v)) for k, v in data.iteritems()) if start_date is None: start_date = mjd.mjd_inv(data['time'].min(), '%Y-%m-%d') if stop_date is None: stop_date = mjd.mjd_inv(data['time'].max(), '%Y-%m-%d') if stations is None: stations = [] # remove times that are not within the bounds of *start_date* and # *stop_date* start_time = int(mjd.mjd(start_date, '%Y-%m-%d')) stop_time = int(mjd.mjd(stop_date, '%Y-%m-%d')) idx = ((data['time'] >= start_time) & (data['time'] <= stop_time)) out['time'] = out['time'][idx] for dir in ['east', 'north', 'vertical']: out[dir] = out[dir][idx, :] out[dir + '_std_dev'] = out[dir + '_std_dev'][idx, :] # find stations that are within the bounds in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon) & (data['latitude'] > min_lat) & (data['latitude'] < max_lat)) # find stations that are in the list of stations to be removed in_list = np.array([i in stations for i in data['id']]) # keep stations that are in bounds and not in the list idx, = (in_bounds & ~in_list).nonzero() out['id'] = out['id'][idx] out['longitude'] = out['longitude'][idx] out['latitude'] = out['latitude'][idx] for dir in ['east', 'north', 'vertical']: out[dir] = out[dir][:, idx] out[dir + '_std_dev'] = out[dir + '_std_dev'][:, idx] # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.crop' output_file = output_stem + '.h5' hdf5_from_dict(output_file, out) logger.info('Cropped data written to %s' % output_file) return
def pygeons_strain(input_file, network_prior_model=('spwen12-se',), network_prior_params=(1.0,0.1,100.0), network_noise_model=(), network_noise_params=(), station_noise_model=('linear',), station_noise_params=(), start_date=None,stop_date=None, positions=None,positions_file=None, rate=True,vertical=True,covariance=False, output_stem=None): ''' calculates strain ''' logger.info('Running pygeons strain ...') data = dict_from_hdf5(input_file) if data['time_exponent'] != 0: raise ValueError('input dataset must have units of displacement') if data['space_exponent'] != 1: raise ValueError('input dataset must have units of displacement') out_dx = dict((k,np.copy(v)) for k,v in data.iteritems()) out_dy = dict((k,np.copy(v)) for k,v in data.iteritems()) # convert params to a dictionary of hyperparameters for each direction network_prior_params = _params_dict(network_prior_params) network_noise_params = _params_dict(network_noise_params) station_noise_params = _params_dict(station_noise_params) # convert geodetic input positions to cartesian bm = make_basemap(data['longitude'],data['latitude']) x,y = bm(data['longitude'],data['latitude']) xy = np.array([x,y]).T # set output positions if (positions is None) & (positions_file is None): # no output positions were specified so return the solution at the # input data positions output_id = np.array(data['id'],copy=True) output_lon = np.array(data['longitude'],copy=True) output_lat = np.array(data['latitude'],copy=True) else: output_id = np.zeros((0,),dtype=str) output_lon = np.zeros((0,),dtype=float) output_lat = np.zeros((0,),dtype=float) if positions_file is not None: # if positions file was specified pos = np.loadtxt(positions_file,dtype=str,ndmin=2) if pos.shape[1] != 3: raise ValueError( 'positions file must contain a column for IDs, longitudes, ' 'and latitudes') output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) if positions is not None: # if positions were specified via the command line pos = np.array(positions,dtype=str).reshape((-1,3)) output_id = np.hstack((output_id,pos[:,0])) output_lon = np.hstack((output_lon,pos[:,1].astype(float))) output_lat = np.hstack((output_lat,pos[:,2].astype(float))) # convert geodetic output positions to cartesian output_x,output_y = bm(output_lon,output_lat) output_xy = np.array([output_x,output_y]).T # set output times if start_date is None: start_date = mjd_inv(np.min(data['time']),'%Y-%m-%d') if stop_date is None: stop_date = mjd_inv(np.max(data['time']),'%Y-%m-%d') start_time = mjd(start_date,'%Y-%m-%d') stop_time = mjd(stop_date,'%Y-%m-%d') output_time = np.arange(start_time,stop_time+1) # set output file names if output_stem is None: output_stem = _remove_extension(input_file) + '.strain' output_dx_file = output_stem + '.dudx.h5' output_dy_file = output_stem + '.dudy.h5' _log_strain(input_file, network_prior_model,network_prior_params, network_noise_model,network_noise_params, station_noise_model,station_noise_params, start_date,stop_date,output_id,rate,vertical, covariance,output_dx_file,output_dy_file) for dir in ['east','north','vertical']: if (dir == 'vertical') & (not vertical): logger.debug('Not computing vertical deformation gradients') # do not compute the deformation gradients for vertical. Just # return zeros. dx = np.zeros((output_time.shape[0],output_xy.shape[0])) sdx = np.zeros((output_time.shape[0],output_xy.shape[0])) dy = np.zeros((output_time.shape[0],output_xy.shape[0])) sdy = np.zeros((output_time.shape[0],output_xy.shape[0])) if covariance: # if covariance is True then create an empty array of # covariances cdx = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) cdy = np.zeros((output_time.shape[0],output_xy.shape[0], output_time.shape[0],output_xy.shape[0])) soln = (dx,sdx,cdx,dy,sdy,cdy) else: soln = (dx,sdx,dy,sdy) else: soln = strain(t=data['time'][:,None], x=xy, d=data[dir], sd=data[dir+'_std_dev'], network_prior_model=network_prior_model, network_prior_params=network_prior_params[dir], network_noise_model=network_noise_model, network_noise_params=network_noise_params[dir], station_noise_model=station_noise_model, station_noise_params=station_noise_params[dir], out_t=output_time[:,None], out_x=output_xy, rate=rate, covariance=covariance) if covariance: # soln contains six entries when covariance is True dx,sdx,cdx,dy,sdy,cdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dx[dir+'_covariance'] = cdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dy[dir+'_covariance'] = cdy else: # soln contains four entries when covariance is False dx,sdx,dy,sdy = soln out_dx[dir] = dx out_dx[dir+'_std_dev'] = sdx out_dy[dir] = dy out_dy[dir+'_std_dev'] = sdy out_dx['time'] = output_time out_dx['longitude'] = output_lon out_dx['latitude'] = output_lat out_dx['id'] = output_id out_dx['time_exponent'] = -int(rate) out_dx['space_exponent'] = 0 out_dy['time'] = output_time out_dy['longitude'] = output_lon out_dy['latitude'] = output_lat out_dy['id'] = output_id out_dy['time_exponent'] = -int(rate) out_dy['space_exponent'] = 0 hdf5_from_dict(output_dx_file,out_dx) hdf5_from_dict(output_dy_file,out_dy) if rate: logger.info('Posterior velocity gradients written to %s and %s' % (output_dx_file,output_dy_file)) else: logger.info('Posterior displacement gradients written to %s and %s' % (output_dx_file,output_dy_file)) return
def pygeons_clean(input_file,resolution='i', input_edits_file=None, break_lons=None,break_lats=None, break_conn=None,no_display=False, output_stem=None,**kwargs): ''' runs the PyGeoNS Interactive Cleaner Parameters ---------- data : dict data dictionary resolution : str basemap resolution input_edits_file : str Name of the file containing edits which will automatically be applied before opening up the interactive viewer. output_edits_file : str Name of the file where all edits will be recorded. **kwargs : gets passed to pygeons.clean.clean Returns ------- out : dict output data dictionary ''' logger.info('Running pygeons clean ...') data = dict_from_hdf5(input_file) out = dict((k,np.copy(v)) for k,v in data.iteritems()) ts_fig,ts_ax = plt.subplots(3,1,sharex=True,num='Time Series View',facecolor='white') _setup_ts_ax(ts_ax) map_fig,map_ax = plt.subplots(num='Map View',facecolor='white') bm = make_basemap(data['longitude'],data['latitude'],resolution=resolution) _setup_map_ax(bm,map_ax) x,y = bm(data['longitude'],data['latitude']) pos = np.array([x,y]).T t = data['time'] dates = [mjd_inv(ti,'%Y-%m-%d') for ti in t] units = _unit_string(data['space_exponent'],data['time_exponent']) conv = 1.0/unit_conversion(units,time='day',space='m') u = conv*data['east'] v = conv*data['north'] z = conv*data['vertical'] su = conv*data['east_std_dev'] sv = conv*data['north_std_dev'] sz = conv*data['vertical_std_dev'] ic = InteractiveCleaner( t,pos,u=u,v=v,z=z,su=su,sv=sv,sz=sz, map_ax=map_ax,ts_ax=ts_ax, time_labels=dates, units=units, station_labels=data['id'], **kwargs) # make edits to the data set prior to displaying it if input_edits_file is not None: with open(input_edits_file,'r') as fin: for line in fin: # ignore blank lines if line.isspace(): continue type,sta,a,b = line.strip().split() # set the current station in *ic* to the station for this edit xidx, = (data['id'] == sta).nonzero() if len(xidx) == 0: # continue because the station does not exist in this # dataset continue ic.xidx = xidx[0] if type == 'outliers': start_time = mjd(a,'%Y-%m-%d') stop_time = mjd(b,'%Y-%m-%d') ic.remove_outliers(start_time,stop_time) elif type == 'jump': jump_time = mjd(a,'%Y-%m-%d') delta = int(b) ic.remove_jump(jump_time,delta) else: raise ValueError('edit type must be either "outliers" or "jump"') if not no_display: ic.update() ic.connect() # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.clean' output_file = output_stem + '.h5' output_edits_file = output_stem + '.txt' with open(output_edits_file,'w') as fout: for i in ic.log: type,xidx,a,b = i if type == 'outliers': station = data['id'][xidx] start_date = mjd_inv(a,'%Y-%m-%d') stop_date = mjd_inv(b,'%Y-%m-%d') fout.write('outliers %s %s %s\n' % (station,start_date,stop_date)) elif type == 'jump': station = data['id'][xidx] jump_date = mjd_inv(a,'%Y-%m-%d') fout.write('jump %s %s %s\n' % (station,jump_date,b)) else: raise ValueError('edit type must be either "outliers" or "jump"') logger.info('Edits saved to %s' % output_edits_file) clean_data = ic.get_data() out['east'] = clean_data[0]/conv out['north'] = clean_data[1]/conv out['vertical'] = clean_data[2]/conv out['east_std_dev'] = clean_data[3]/conv out['north_std_dev'] = clean_data[4]/conv out['vertical_std_dev'] = clean_data[5]/conv hdf5_from_dict(output_file,out) logger.info('Cleaned data written to %s' % output_file) logger.info('Edits written to %s' % output_edits_file) return
def pygeons_crop(input_file,start_date=None,stop_date=None, min_lat=-np.inf,max_lat=np.inf, min_lon=-np.inf,max_lon=np.inf, stations=None,output_stem=None): ''' Sets the time span of the data set to be between *start_date* and *stop_date*. Sets the stations to be within the latitude and longitude bounds. Parameters ---------- data : dict data dictionary start_date : str, optional start date of output data set in YYYY-MM-DD. Uses the start date of *data* if not provided. Defaults to the earliest date. stop_date : str, optional Stop date of output data set in YYYY-MM-DD. Uses the stop date of *data* if not provided. Defaults to the latest date. min_lon, max_lon, min_lat, max_lat : float, optional Spatial bounds on the output data set stations : str list, optional List of stations to be removed from the dataset. This is in addition to the station removed by the lon/lat bounds. Returns ------- out_dict : dict output data dictionary ''' logger.info('Running pygeons crop ...') data = dict_from_hdf5(input_file) out = dict((k,np.copy(v)) for k,v in data.iteritems()) if start_date is None: start_date = mjd.mjd_inv(data['time'].min(),'%Y-%m-%d') if stop_date is None: stop_date = mjd.mjd_inv(data['time'].max(),'%Y-%m-%d') if stations is None: stations = [] # remove times that are not within the bounds of *start_date* and # *stop_date* start_time = int(mjd.mjd(start_date,'%Y-%m-%d')) stop_time = int(mjd.mjd(stop_date,'%Y-%m-%d')) idx = ((data['time'] >= start_time) & (data['time'] <= stop_time)) out['time'] = out['time'][idx] for dir in ['east','north','vertical']: out[dir] = out[dir][idx,:] out[dir + '_std_dev'] = out[dir + '_std_dev'][idx,:] # find stations that are within the bounds in_bounds = ((data['longitude'] > min_lon) & (data['longitude'] < max_lon) & (data['latitude'] > min_lat) & (data['latitude'] < max_lat)) # find stations that are in the list of stations to be removed in_list = np.array([i in stations for i in data['id']]) # keep stations that are in bounds and not in the list idx, = (in_bounds & ~in_list).nonzero() out['id'] = out['id'][idx] out['longitude'] = out['longitude'][idx] out['latitude'] = out['latitude'][idx] for dir in ['east','north','vertical']: out[dir] = out[dir][:,idx] out[dir + '_std_dev'] = out[dir + '_std_dev'][:,idx] # set output file name if output_stem is None: output_stem = _remove_extension(input_file) + '.crop' output_file = output_stem + '.h5' hdf5_from_dict(output_file,out) logger.info('Cropped data written to %s' % output_file) return