def read_ATL06_data(ATL06_files, beam_pair=2, cycles=[1, 12]): ''' Read ATL06 data from a list of files for a specific beam pair required arguments: ATL06_files: a list of ATL06 files beam_pair: pair number to read from the files cycles: first and last cycles to include ''' params_11 = ATL11.defaults() # read in the ATL06 data from all the repeats D6_list = [] ATL06_re = re.compile('ATL06_\d+_\d\d\d\d(\d\d)\d\d_') for filename in ATL06_files: try: m = ATL06_re.search(filename) if (int(m.group(1)) < cycles[0]) or (int(m.group(1)) > cycles[1]): continue except Exception: pass try: D6_list.append( ATL06_data(field_dict=params_11.ATL06_field_dict, beam_pair=beam_pair).from_file(filename)) except KeyError: pass if len(D6_list) == 0: return None D6 = ATL06_data(beam_pair=beam_pair).from_list(D6_list) if D6.size == 0: return None # reorder data rows from D6 by cycle D6.index(np.argsort(D6.cycle_number[:, 0], axis=0)) # choose the hemisphere and project the data to polar stereographic if np.max(D6.latitude) < 0: D6.get_xy(None, EPSG=3031) else: D6.get_xy(None, EPSG=3413) return D6
def get_data_for_geo_index(query_results, delta=[10000., 10000.], fields=None, data=None, group='index', dir_root=''): # read the data from a set of query results # Currently the function knows how to read: # h5_geoindex # indexed h5s # Qfit data (waveform and plain) # DEM data (filtered and not) # ATL06 data. # Append more cases as needed if len(dir_root) > 0: dir_root += '/' out_data = list() # some data types take a dictionary rather than a list of fields if isinstance(fields, dict): field_dict = fields field_list = None else: field_dict = None field_list = fields # if we are querying any DEM data, work out the bounds of the query so we don't have to read the whole DEMs all_types = [query_results[key]['type'] for key in query_results] if 'DEM' in all_types or 'filtered_DEM' in all_types: all_x = list() all_y = list() for key, result in query_results.items(): all_x += result['x'].tolist() all_y += result['y'].tolist() bounds = [[np.min(all_x) - delta[0] / 2, np.max(all_x) + delta[0] / 2], [np.min(all_y) - delta[1] / 2, np.max(all_y) + delta[1] / 2]] for file_key, result in query_results.items(): if dir_root is not None: try: this_file = dir_root + file_key except TypeError: this_file = dir_root + file_key.decode() else: this_file = file_key if result['type'] == 'h5_geoindex': D = geo_index().from_file(this_file).query_xy( (result['x'], result['y']), fields=fields, get_data=True, dir_root=dir_root) if result['type'] == 'ATL06': if fields is None: fields = { None: (u'latitude', u'longitude', u'h_li', u'delta_time') } D6_file, pair = this_file.split(':pair') D=[ATL06_data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\ filename=D6_file, index_range=np.array(temp)) \ for temp in zip(result['offset_start'], result['offset_end'])] if result['type'] == 'ATL11': D11_file, pair = this_file.split(':pair') D=[ATL11.data(beam_pair=int(pair), list_of_fields=field_list, field_dict=field_dict).from_file(\ filename=D11_file, index_range=np.array(temp)) \ for temp in zip(result['offset_start'], result['offset_end'])] if result['type'] == 'ATM_Qfit': D = [ Qfit_data(filename=this_file, index_range=np.array(temp)) for temp in zip(result['offset_start'], result['offset_end']) ] if result['type'] == 'ATM_waveform': D = [ Qfit_data(filename=this_file, index_range=np.array(temp), waveform_format=True) for temp in zip(result['offset_start'], result['offset_end']) ] if result['type'] == 'DEM': D = dict() D['x'], D['y'], D['z'] = read_DEM(filename=this_file, asPoints=True, bounds=bounds) D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file) D = point_data().from_dict(D) D.index(D, np.isfinite(D.z)) if result['type'] == 'filtered_DEM': D = dict() D['x'], D['y'], D['z'] = read_DEM(this_file, asPoints=True, band_num=1, keepAll=True, bounds=bounds) D['x'], D['y'], D['sigma'] = read_DEM(this_file, asPoints=True, band_num=2, keepAll=True, bounds=bounds) D['time'] = np.zeros_like(D['x']) + WV_MatlabDate(this_file) D = point_data().from_dict(D) D.index(np.isfinite(D.z) & np.isfinite(D.sigma)) D.filename = this_file if result['type'] == 'indexed_h5': D = [ read_indexed_h5_file( this_file, [result['x'], result['y']], fields=fields, index_range=[result['offset_start'], result['offset_end']]) ] if result['type'] == 'indexed_h5_from_matlab': D = [ read_indexed_h5_file( this_file, [result['x'] / 1000, result['y'] / 1000], fields=fields, index_range=[result['offset_start'], result['offset_end']]) ] if result['type'] is None: D = [ data.subset(np.arange(temp[0], temp[1])) for temp in zip(result['offset_start'], result['offset_end']) ] # add data to list of results. May be a list or a single result if isinstance(D, list): for Di in D: if Di.filename is None: Di.filename = this_file out_data += D else: if D.filename is None: D.filename = this_file out_data.append(D) return out_data
# query ATL06 for the current bin, and index it D6list = D6_GI.query_xy([[bin_xy[0]], [bin_xy[1]]], get_data=True, fields=ATL06_fields) if not isinstance(D6list, list): D6list = [D6list] for item in D6list: item.BP = np.zeros_like(item.latitude) + item.beam_pair item.rgt = np.zeros_like(item.latitude) + item.rgt item.list_of_fields.append('BP') KK = ATL06_field_dict.copy() KK['Derived'] = ['BP'] D6sub = ATL06_data( field_dict=KK).from_list(D6list).get_xy(SRS_proj4).get_xy(SRS_proj4) x0 = np.round(np.nanmean(D6sub.x, axis=1) / 100.) * 100 y0 = np.round(np.nanmean(D6sub.y, axis=1) / 100.) * 100 iB = np.argsort(x0 + (y0 - y0.min()) / (y0.max() - y0.min())) D6sub.index(iB) # index the sorted ATL06 data GI_D6sub = geo_index(delta=[100, 100]).from_xy( [np.nanmean(D6sub.x, axis=1), np.nanmean(D6sub.y, axis=1)]) # find the common bins at 100 m GI_D6sub, GI_Qsub = GI_D6sub.intersect(GI_Qsub, pad=[0, 1]) if GI_D6sub is None: print("no intersections found for ATL06 bin %s\n" % (bin_name)) continue
def for_file(self, filename, file_type, number=0, dir_root=''): # make a geo_index for file 'filename' if dir_root is not None: # eliminate the string in 'dir_root' from the filename filename_out = filename.replace(dir_root, '') if file_type in ['ATL06']: temp = list() this_field_dict = { None: ('latitude', 'longitude', 'h_li', 'delta_time') } for beam_pair in (1, 2, 3): D = ATL06_data( beam_pair=beam_pair, field_dict=this_field_dict).from_file(filename).get_xy( self.attrs['SRS_proj4']) if D.latitude.shape[0] > 0: temp.append(geo_index(delta=self.attrs['delta'], SRS_proj4=\ self.attrs['SRS_proj4']).from_xy([np.nanmean(D.x, axis=1), np.nanmean(D.y, axis=1)], '%s:pair%d' % (filename_out, beam_pair), 'ATL06', number=number)) self.from_list(temp) if file_type in ['ATL11']: temp = list() this_field_dict = {'corrected_h': ('ref_pt_lat', 'ref_pt_lon')} for beam_pair in (1, 2, 3): D = ATL11.data().from_file(filename, pair=beam_pair, field_dict=this_field_dict).get_xy( self.attrs['SRS_proj4']) D.get_xy(self.attrs['SRS_proj4']) if D.x.shape[0] > 0: temp.append(geo_index(delta=self.attrs['delta'], \ SRS_proj4=self.attrs['SRS_proj4']).from_xy([D.x, D.y], '%s:pair%d' % (filename_out, beam_pair), 'ATL06', number=number)) self.from_list(temp) if file_type in ['ATM_Qfit']: D = Qfit_data(filename=filename, list_of_fields=['latitiude', 'longitude', 'time']) if D.latitude.shape[0] > 0: self.from_latlon(D.latitude, D.longitude, filename_out, 'ATM_Qfit', number=number) if file_type in ['ATM_waveform']: D = Qfit_data(filename=filename, waveform_format=True) if D.latitude.shape[0] > 0: self.from_latlon(D.latitude, D.longitude, filename_out, 'ATM_waveform', number=number) if file_type in ['filtered_DEM', 'DEM']: D = dict() D['x'], D['y'], D['z'] = read_DEM(filename, asPoints=True) if D['x'].size > 0: self.from_xy((D['x'], D['y']), filename=filename_out, file_type=file_type, number=number) if file_type in ['h5_geoindex']: # read the file as a collection of points temp_GI = geo_index().from_file(filename) xy_bin = temp_GI.bins_as_array() # loop over a minimal set of attributes: for attr in ['delta', 'SRS_proj4', 'dir_root']: if attr in temp_GI.attrs: self.attrs[attr] = temp_GI.attrs[attr] self.attrs['file_%d' % number] = filename_out self.attrs['type_%d' % number] = file_type if dir_root is not None: self.attrs['dir_root'] = dir_root self.attrs['n_files'] = 1 self.from_xy(xy_bin, filename=filename_out, file_type=file_type, number=number, fake_offset_val=-1) if file_type in ['indexed_h5']: h5f = h5py.File(filename, 'r') if 'INDEX' in h5f: xy = [ np.array(h5f['INDEX']['bin_x']), np.array(h5f['INDEX']['bin_y']) ] if 'bin_index' in h5f['INDEX']: # this is the type of indexed h5 that has all of the data in single datasets i0_i1 = h5f['INDEX']['bin_index'] first_last = [i0_i1[0, :].ravel(), i0_i1[1, :].ravel()] fake_offset = None else: first_last = None fake_offset = -1 else: # there is no index-- just a bunch of bins, maybe? first_last = None fake_offset = -1 bin_re = re.compile("(.*)E_(.*)N") xy = [[], []] for key in h5f: m = bin_re.match(key) if m is None: continue xy[0].append(np.float(m.group(1))) xy[1].append(np.float(m.group(2))) xy[0] = np.array(xy[0]) xy[1] = np.array(xy[1]) self.from_xy(xy, filename=filename_out, file_type=file_type, number=number, first_last=first_last, fake_offset_val=fake_offset) if dir_root is not None: self.attrs['dir_root'] = dir_root h5f.close() if file_type in ['indexed_h5_from_matlab']: h5f = h5py.File(filename, 'r') xy = [ np.array(h5f['INDEX']['bin_x']), np.array(h5f['INDEX']['bin_y']) ] first_last = None fake_offset = -1 self.from_xy(xy, filename_out, file_type, number=number, first_last=first_last, fake_offset_val=fake_offset) h5f.close() return self
def readPointData(args): pointDataSets = dict() if args.dem: pointData = dict() pointData['x'], pointData['y'], pointData['h'] = read_DEM( args.pointFile, asPoints=True) pointDataSets[args.pointFile] = pointData elif args.index is True: pointDataSets = queryIndex(args.pointFile, args.demFile, verbose=args.verbose) elif args.ATL06 is False and args.ATL08 is False: pointData = dict() h5f = h5py.File(args.pointFile, 'r') try: pointData['latitude'] = np.array(h5f['latitude']) pointData['longitude'] = np.array(h5f['longitude']) except KeyError: pointData['x'] = np.array(h5f['x']) pointData['y'] = np.array(h5f['y']) pointData['h'] = np.array(h5f['h']) pointDataSets[args.pointFile] = pointData h5f.close() elif args.ATL06 is True: field_dict = { None: ['delta_time', 'h_li', 'h_li_sigma', 'latitude', 'longitude'], 'geophysical': ['dac'], 'fit_statistics': [ 'dh_fit_dx', 'h_rms_misfit', 'h_robust_sprd', 'n_fit_photons', 'signal_selection_source', 'snr_significance', 'w_surface_window_final', 'h_mean' ], 'derived': ['valid'] } beamPairs = [1, 2, 3] beams = ['l', 'r'] pointData = dict() for beamPair in beamPairs: pairName = 'gt%d' % beamPair try: D6 = ATL06_data(beam_pair=beamPair, field_dict=field_dict).from_file( args.pointFile) f06.phDensityFilter(D6, toNaN=True, subset=True, minDensity={ 'weak': 0.5, 'strong': 2 }) #if D6.h_li.size==0: # continue #f06.segDifferenceFilter(D6, tol=10, toNaN=True, subset=True) if D6.h_li.size == 0: continue for ind, beam in enumerate(beams): if D6.longitude.size < 50: continue group = pairName + beam pointData['longitude'] = D6.longitude[:, ind].ravel() pointData['latitude'] = D6.latitude[:, ind].ravel() pointData['h'] = D6.h_mean[:, ind].ravel() pointData['delta_time'] = D6.delta_time[:, ind].ravel() these = np.where(np.isfinite(D6.latitude[:, ind]))[0] if these.size > 2: first = np.max(these) last = np.min(these) pointData['AD'] = np.sign( D6.latitude[last, ind] - D6.latitude[first, ind]) * np.ones_like( pointData['h']) if pointData['h'].size > 0: pointDataSets[args.pointFile + ":" + group] = pointData.copy() except KeyError: print("pair %s not in %s\n" % (pairName, args.pointFile)) if args.blockMedian is not None: for key in list(pointDataSets): pointDataSets[key]['x'], pointDataSets[key]['y'], pointDataSets[key]['h'] = \ pt_blockmedian(pointDataSets[key]['x'], pointDataSets[key]['y'], pointDataSets[key]['h'], delta=args.blockMedian) # assign the 'name' property of the dataset for key in pointDataSets: pointDataSets[key]['name'] = key return pointDataSets