def uhsas2sizedist(df): """ Creates size distribution time series instance from uhsas data (as returned by the read_file function) Parameters ---------- df : pandas.DataFrame as put out by the read_file function. Returns ------- dist : TYPE DESCRIPTION. """ ## make bins (based on whats mentioned in the header) bins = _np.linspace(40, 1000, 99) ## the size distribution data data = df.iloc[:, :-1].copy() data.columns = bins ### to my knowledge the uhsas can not measure below ~70 nm data_trunc = data.loc[:, 69:] # make the size distribution bined, _ = _db.bincenters2binsANDnames(data_trunc.columns.values) dist = _sd.SizeDist_TS(data_trunc, bined, 'numberConcentration') return dist
def _concat(self, arm_data_objs, close_gaps=True): for att in self._concatable: first_object = getattr(arm_data_objs[0], att) which_type = type(first_object).__name__ data_period = first_object._data_period if which_type == 'TimeSeries_2D': value = _timeseries.TimeSeries_2D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'TimeSeries': value = _timeseries.TimeSeries( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'AMS_Timeseries_lev01': value = _AMS.AMS_Timeseries_lev01( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'SizeDist_TS': # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs])) data = _pd.concat( [getattr(i, att).data for i in arm_data_objs]) value = _sizedistribution.SizeDist_TS( data, getattr(arm_data_objs[0], att).bins, 'dNdlogDp') elif which_type == 'TimeSeries_3D': value = _timeseries.TimeSeries_3D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) else: raise TypeError( '%s is not an allowed type here (TimeSeries_2D, TimeSeries)' % which_type) value._data_period = data_period if close_gaps: setattr(self, att, value.close_gaps()) else: setattr(self, att, value)
def read_file(fn): out = {} df = pd.read_csv(fn) df.index = pd.to_datetime(df.DateTimeUTC) df.drop('DateTimeUTC', axis=1, inplace=True) # df.shape dist = df.loc[:, [i for i in df.columns if i[:2] == 'Nn']].copy().astype(float) dist.columns = df.loc[:, [i for i in df.columns if i[:2] == 'Ns']].iloc[0].astype(float) * 1000 # dist.index = pd.to_datetime(df.DateTimeUTC) dist = sd.SizeDist_TS(dist, db.bincenters2binsANDnames(dist.columns.values)[0], 'dNdlogDp') dist = dist.convert2dNdlogDp() out['size_distribution'] = dist rest = df.drop([i for i in df.columns if i[:2] == 'Nn'], axis=1) rest = rest.drop([i for i in df.columns if i[:2] == 'Ns'], axis=1) # rest = rest.rename({h['cpd3']: h['mylabel'] for h in header_dict}, axis = 1) out['rest'] = rest return out
def _read_csv(fname, norm2time = True, norm2flow = True): uhsas = _readFromFakeXLS(fname) # return uhsas sd,hk = _separate_sizedist_and_housekeep(uhsas, norm2time = norm2time, norm2flow = norm2flow) hk = timeseries.TimeSeries(hk) # return size_distribution,hk bins = _get_bins(sd) # return bins dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration") return dist, hk
def read_radiosonde_csv(fname, cal): """reads a csv file and returns a TimeSeries Parameters ---------- fname: str Name of file to be opend calibration: str or calibration instance Either pass the name of the file containing the calibration data, or a calibration instance. """ df = pd.read_csv(fname, header=15) fkt = lambda x: x.lstrip(' ').replace(' ', '_') col_new = [fkt(i) for i in df.columns.values] df.columns = col_new time = df['date_[y-m-d_GMT]'] + df['time_[h:m:s_GMT]'] + '.' + df[ 'milliseconds'].astype(str) df.index = pd.Series( pd.to_datetime(time, format=time_tools.get_time_formate())) df[df == 99999.000] = np.nan alt = df['GPS_altitude_[km]'].copy() df['Altitude'] = alt * 1e3 df.rename(columns={ 'GPS_latitude': 'Lat', 'GPS_longitude': 'Lon' }, inplace=True) bins = [] for k in df.keys(): if 'Bin' in k: bins.append(k) # print(k) # print(bins) sd = df.loc[:, bins] hk = df.drop(bins, axis=1) hk = timeseries.TimeSeries(hk) hk.data.sort_index(inplace=True) hk.data.Altitude.interpolate(inplace=True) hk.data['temperature_K'] = hk.data[ 'iMet_air_temperature_(corrected)_[deg_C]'] + 273.15 hk.data['pressure_Pa'] = hk.data['iMet_pressure_[mb]'] * 100 # fname_cal = '/Users/htelg/data/POPS_calibrations/150622_china_UAV.csv' cal = calibration.read_csv(cal) ib = cal.get_interface_bins(20) sd = sizedistribution.SizeDist_TS( sd, ib['binedges_v_int'].values.transpose()[0], 'numberConcentration') return sd, hk
def _parse_netCDF(self): super(ArmDatasetSub, self)._parse_netCDF() df = pd.DataFrame(self._read_variable('number_concentration'), index=self.time_stamps) d = self._read_variable('diameter') bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000) self.size_distribution = sizedistribution.SizeDist_TS( df, bins, 'dNdlogDp') self.size_distribution._data_period = self._data_period
def _concat(self, arm_data_objs, close_gaps=True): for att in self._concatable: first_object = getattr(arm_data_objs[0], att) which_type = type(first_object).__name__ data_period = first_object._data_period if which_type == 'TimeSeries_2D': value = _timeseries.TimeSeries_2D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'TimeSeries': value = _timeseries.TimeSeries( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'AMS_Timeseries_lev01': value = _AMS.AMS_Timeseries_lev01( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'SizeDist_TS': # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs])) data = _pd.concat( [getattr(i, att).data for i in arm_data_objs]) value = _sizedistribution.SizeDist_TS( data, getattr(arm_data_objs[0], att).bins, 'dNdlogDp', ignore_data_gap_error=True, ) elif which_type == 'TimeSeries_3D': value = _timeseries.TimeSeries_3D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) else: raise TypeError( '%s is not an allowed type here (TimeSeries_2D, TimeSeries)' % which_type) if hasattr(first_object, 'availability'): try: avail_concat = _pd.concat([ getattr(i, att).availability.availability for i in arm_data_objs ]) avail = Data_Quality(None, avail_concat, None, first_object.flag_info) value.availability = avail except: _warnings.warn( 'availability could not be concatinated make sure you converted it to a pandas frame at some point!' ) value._data_period = data_period if close_gaps: setattr(self, att, value.close_gaps()) else: setattr(self, att, value)
def read_netCDF(fname): # fname = '/Volumes/HTelg_4TB_Backup/arm_data/OLI/uhsas/oliaosuhsasM1.a1.20170401.000008.nc' if type(fname) == str: fname = [fname] sds = [] for fn in fname: data = _xr.open_dataset(fn) if data.sampling_interval.split()[1] != 'seconds': raise ValueError('This should be seconds, but it is {}.'.format( data.sampling_interval.split()[1])) if not _np.all((data.upper_size_limit.data[:-1] - data.lower_size_limit.data[1:]) == 0): raise ValueError('Something is wrong with the bins') # flow rate variable name changed at some point if 'sample_flow_rate' in data.variables.keys(): flowrate = data.sample_flow_rate elif 'sampling_volume' in data.variables.keys(): flowrate = data.sampling_volume if flowrate.units not in ['sccm', 'cc/min']: raise ValueError('units has to be sccm, but is {}.'.format( flowrate.units)) sd = data.size_distribution.to_pandas() # normalize total numbers to numbers/(cc) ## normalize to integration interval sd /= float(data.sampling_interval.split() [0]) # normalize to integration interval ## normalize to flow rate flowrate = flowrate.values / 60. sd = sd.divide(flowrate, axis=0) sds.append(sd) sd = _pd.concat(sds).sort_index() binedges = _np.append(data.lower_size_limit.data, data.upper_size_limit.data[-1]) sdts = _sizedist.SizeDist_TS(sd, binedges, 'numberConcentration') sdts._data_period = float(data.sampling_interval.split()[0]) sdts = sdts.convert2dNdlogDp() return sdts
def _parse_netCDF(self): super(ArmDatasetSub, self)._parse_netCDF() data = self._read_variable('number_concentration_DMA_APS') df = pd.DataFrame(data['data'], index=self.time_stamps) d = self._read_variable('diameter')['data'] bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000) self.size_distribution = sizedistribution.SizeDist_TS( df, bins, 'dNdlogDp', ignore_data_gap_error=True, # fill_data_gaps_with = np.nan ) self.size_distribution._data_period = self._data_period self.size_distribution.flag_info = self.flag_info availability = pd.DataFrame(data['availability'], index=self.time_stamps) self.size_distribution.availability = Data_Quality( self, availability, data['availability_type'], self.flag_info)
def extract_sizedistribution(df): #### get the size distribution data cols = df.columns cols = [i for i in cols if i.replace('.', '').isnumeric()] dist = df.loc[:, cols] if len(cols) == 0: return False # create bins for atmpy bins, _ = atmdb.bincenters2binsANDnames(np.array([float(i) for i in cols])) bins *= 2 #radius to diameter bins *= 1e3 # um to nm #### create sizedistribution instance #### todo: there is a scaling error since AERONET uses 'dVdlnDp' and I use 'dVdlogDp' dist_ts = atmsd.SizeDist_TS( dist, bins, 'dVdlogDp', # fill_data_gaps_with=np.nan, ignore_data_gap_error=True, ) return dist_ts
def process(ipmatchrow, folders, test=False, raise_error=True, path2product_file=None): # imet #### imet = open_iMet(folders['path2imet_folder'].joinpath(ipmatchrow.fn_imet)) didit = set_altitude_column(imet, ipmatchrow.which_alt) if not didit: return False ## fill missing timestamps with nans imet = imet.resample('1s').mean() start_time, end_time = imet.index.min(), imet.index.max() # POPS ds = xr.open_dataset( folders['path2pops_folder'].joinpath(ipmatchrow.fn_pops + '.nc')) ## size distribution dist = ds.size_distributions.to_pandas() dist = dist.resample('1s').mean() dist = dist.truncate(start_time, end_time) dist_ts = sd.SizeDist_TS( dist, size_distribution.diameter_binning.bincenters2binsANDnames( ds.bincenters.values)[0], 'dNdlogDp') dist_ts._data_period = 1 particle_no_concentration = dist_ts.particle_number_concentration.data.copy( ) particle_mean_d = dist_ts.particle_mean_diameter.data.copy() ## housekeeping df = ds.housekeeping.to_pandas() df = df.Altitude df = df.resample('1s').mean() df = df.truncate(start_time, end_time) # merge tbs = imet.copy() tbs['pops_particle_number_concentration'] = particle_no_concentration tbs['pops_particle_mean_diameter'] = particle_mean_d tbs['test_POPS_altitude'] = df.copy() # met ## ground pressure tbs['atm_pressure_ground'] = load_met_files(start_time, end_time, folders) # retrievals ## potential temperatur add_eqiv_potential_temp(tbs) # create xarray dataset dstbs = xr.Dataset(tbs) dstbs['pops_size_distribution'] = dist # additional retrievals that take the xarray dataset to work with try: add_cloud_base_distance_and_transit(dstbs) except: if raise_error: print('Error in adding adding cloud base stuff: ', sys.exc_info()) else: return dstbs # add_sectioning(dstbs, folders=folders, path2product_file = path2product_file) add_uhsas_stuff(dstbs, dist_ts, uhsas_folder=folders['path2uhsas']) try: add_mwr_products(dstbs, folders['path2mwr']) except Exception as e: txt = e.__str__() warnings.warn(txt) add_cloud_top(dstbs, folders['path2cloudtop']) if test: out = {} out['tbs'] = tbs out['start'] = start_time out['end'] = end_time return out else: return dstbs
def _peak2Distribution(self, bins=defaultBins, distributionType='number', differentialStyle=False): """Action required: clean up! Returns the particle size distribution normalized in various ways distributionType dNdDp, should be fixed to that, change to other types later once the distribution is created! old: \t calibration: this will create a intensity distribution instead of size distribution. bins should only be a number of bins which will be logaritmically spaced \t number:\t numbers only $\mu m^{-1}\, cm^{-3}$ \t surface:\t surface area distribution, unit: $\mu m\, cm^{-3}$ \t volume:\t volume distribution, unit: $\mu m^{2}\, cm^{-3}$ differentialStyle:\t if False a raw histogram will be created, else: \t dNdDp: \t distribution normalized to the bin width, bincenters are given by (Dn+Dn+1)/2 \t dNdlogDp:\t distribution normalized to the log of the bin width, bincenters are given by 10**((logDn+logDn+1)/2) """ notMasked = np.where(self.data.Masked == 0) # too_big_condi = np.where(self.data.Masked == 2) unique = np.unique(self.data.index.values[notMasked]) N = np.zeros((unique.shape[0], bins.shape[0] - 1)) too_big = np.zeros(unique.shape[0]) for e, i in enumerate(unique): condi = np.where( np.logical_and(self.data.Masked == 0, self.data.index.values == i)) if distributionType == 'calibration': process = self.data.Amplitude.values[condi] else: process = self.data.Diameter.values[condi] n, edg = np.histogram(process, bins=bins) N[e] = n too_big[e] = np.logical_and(self.data.Masked == 2, self.data.index.values == i).sum() N = N.astype(np.float) too_big = too_big.astype(np.float) deltaT = (unique[1:] - unique[:-1]) / np.timedelta64(1, 's') deltaT_sl = np.append(deltaT[0], deltaT) deltaT = np.repeat(np.array([deltaT_sl]), bins.shape[0] - 1, axis=0) N /= deltaT.transpose() too_big /= deltaT_sl.transpose() binwidth = edg[1:] - edg[:-1] if not differentialStyle: pass elif differentialStyle == 'dNdDp': N = N / binwidth else: raise ValueError('wrong type for argument "differentialStyle"') binstr = bins.astype(int).astype(str) cols = [] for e, i in enumerate(binstr[:-1]): cols.append(i + '-' + binstr[e + 1]) dataFrame = pd.DataFrame(N, columns=cols, index=unique) # too_big = pd.DataFrame(too_big, columns=['# too big']) too_big = _timeseries.TimeSeries( pd.DataFrame(too_big, columns=['# too big'], index=unique)) if distributionType == 'calibration': return sizedistribution.SizeDist_TS(dataFrame, bins, 'calibration') else: dist = sizedistribution.SizeDist_TS(dataFrame, bins, 'dNdDp') dist = dist.convert2dNdlogDp() dist.particle_number_concentration_outside_range = too_big return dist
def read_csv(fname): las = _readFromFakeXLS(fname) sd, hk = _separate_sizedist_and_housekeep(las) bins = _get_bins(sd) dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration") return dist
def open_path(path, window=('2016-11-15', '2016-11-18'), average=None, verbose=True): """ Parameters ---------- path start_time end_time average: tuple [None] The purpose of this is to keep the memory usage low in case a lower reolution is required. E.g. (60, 's') Returns ------- """ def read_aosaps(file, verbose=False): ds = _xr.open_dataset(file, autoclose=True) data_dist = ds.N_TOF.to_pandas() data_dist = data_dist.iloc[:, :-1] # bincenters = bincenters = data_dist.columns.values * 1000 # dist = sd.SizeDist_TS(data_dist, bincenters, 'numberConcentration') binedges = _np.unique( ds.aerodynamic_diameter_bound.data.flatten())[1:] * 1000 # normalize to sample flow rate sample_flow_rate_cc_s = (ds.total_flow_rate.to_pandas() - ds.sheath_flow_rate.to_pandas()) * 1000 / 60 data_dist = data_dist.divide(sample_flow_rate_cc_s, axis='index') out = {} out['data_dist'] = data_dist out['bincenters'] = bincenters out['binedges'] = binedges if verbose: print(file) print('shapes: {}, {}'.format(data_dist.shape, bincenters.shape)) return out # start_time, end_time = window files = _tools.path2filelist(path=path, window=window, product='aosaps') if verbose: print('Opening {} files.'.format(len(files))) print(_tools.path2info(files[0])) data_dist = None binedges = None for file in files: out = read_aosaps(file) ddt = _ts.TimeSeries(out['data_dist']) if average: ddt = ddt.average_time(average) ddt = ddt.data if isinstance(data_dist, type(None)): data_dist = ddt binedges = out['binedges'] else: data_dist = data_dist.append(ddt, sort=True) # make sure bincenters did not change assert (_np.all(_np.equal(binedges, out['binedges']))) dist = _sd.SizeDist_TS(data_dist, binedges, 'numberConcentration', ignore_data_gap_error=True) return dist
def read_file( path, version='BBB_02', pattern='HK', skip_histogram=False, size_bins=None, # calibration_file = None, ignore_colums=[], #['Flow_Rate_ccps', 'LED_P_MON', 'AI_4', 'AI_5', 'AI_7', 'AI_8', 'AI_9', 'AI_10', 'AI_11', 'LED_P_Mon_Therm', 'AO_Flow', 'AO_LaserPower', 'No_Pts', 'ValidParts', 'writeTime', 'currMax'], verbose=False, ): """ Parameters ---------- path: string or list of strings. This can either be a file name, a list of filenames or a folder. pattern: str if folder is given than this is the pattern housekeeping files will be identified by. version: string ['BBB_01'] BBB_02: Hendix version, not sure since when. At least since 2022-06, but most likely way earlier ... BBB_01: Beagle bone (original) sbRio: sbRio size_bins: int or pathlib.Path Path to a file containing the bin edges (EDGES not CENTERS!!). Structure: currently one value per line. verbose: bool Returns ------- TimeSeries instance """ # test_data_folder = os.listdir() # test_data_folder = '20150419_000_POPS_HK.csv' def read_sbRio(fname, skip_histogram=False, verbose=False): """Reads housekeeping file (test_data_folder; csv-format) returns a pandas data frame instance. """ if verbose: print('reading %s' % fname) try: df = pd.read_csv(fname, error_bad_lines=False) except ValueError: return False # data = df.values # dateString = test_data_folder.split('_')[0] dt = datetime.datetime.strptime('19700101', "%Y%m%d") - datetime.datetime.strptime( '19040101', "%Y%m%d") dts = dt.total_seconds() # todo: (low) what is that delta t for, looks fishi (Hagen) dtsPlus = datetime.timedelta(hours=0).total_seconds() # Time_s = data[:,0] # data = data[:,1:] df.index = pd.Series(pd.to_datetime(df.Time_s - dts - dtsPlus, unit='s'), name='Time_UTC') # if 'P_Baro' in df.keys(): # df['barometric_pressure'] = df.P_Baro # df.drop('P_Baro', 1, inplace=True) # df['altitude'] = ct.p2h(df.barometric_pressure) return POPSHouseKeeping(df) def read_BBB(fname, skip_histogram=False, verbose=False): if verbose: print(f'read pops house keeping bbb file: {fname}') col_names = pd.read_csv( fname, sep=',', nrows=1, header=None, # index_col=1, # usecols=np.arange() ).values[0][:-1].astype(str) col_names = _np.char.strip(col_names) if skip_histogram: usecols = list(range(27)) else: usecols = None data = pd.read_csv( fname, sep=',', skiprows=1, header=None, usecols=usecols # index_col=1, # usecols=np.arange() ) data_hk = data.iloc[:, :27] data_hk.columns = col_names data_hk.index = pd.to_datetime(data_hk['DateTime'], unit='s') data_hk.drop('DateTime', axis=1, inplace=True) # hk = atmPy.general.timeseries.TimeSeries(data_hk, sampling_period = 1) hk = POPSHouseKeeping(data_hk, sampling_period=1) hk.data['Barometric_pressure'] = hk.data['P'] return hk def read_BBB_02(fname, skip_histogram=False, verbose=False): if verbose: print(f'read pops house keeping file: {fname}') if skip_histogram: usecols = list(range(27)) else: usecols = None data = pd.read_csv(fname, sep=',', usecols=usecols) data.columns = [col.strip() for col in data.columns] data.index = pd.to_datetime(data['DateTime'], unit='s') data.drop('DateTime', axis=1, inplace=True) hk = POPSHouseKeeping(data, sampling_period=1) hk.data['Barometric_pressure'] = hk.data['P'] return hk dist = f'Extraction of the sizedistribution is currently not implemented for the file_version {version}' #### assign version if version == 'sbRio': read = read_sbRio elif version == 'BBB_01': read = read_BBB elif version == 'BBB_02': read = read_BBB_02 else: raise ValueError('Housekeeping version {} is unknown!'.format(version)) #### workplan path = pl.Path(path) if path.is_dir(): file_paths = sorted(list(path.glob('*{}*'.format(pattern)))) elif path.is_file(): file_paths = [path] elif type(path) == list: file_paths = path else: raise TypeError('fname is of unknown type: {}'.format( type(path).__name__)) file_paths.sort() #### read files hk_data = [] for file in file_paths: hktmp = read(file, skip_histogram=skip_histogram, verbose=verbose) hk_data.append(hktmp.data) data = pd.concat(hk_data) #### generate POPSHouseKeeping instance and condition data hk = POPSHouseKeeping(data) hk.data = hk.data.dropna( how='all') # this is necessary to avoid errors in further processing if ('P_Baro' in hk.data.keys()) or ('P_Ambient' in hk.data.keys()): if 'P_Baro' in hk.data.keys(): hk.data['Barometric_pressure'] = hk.data.P_Baro hk.data.drop('P_Baro', 1, inplace=True) if 'P_Ambient' in hk.data.keys(): hk.data['Barometric_pressure'] = hk.data.P_Ambient hk.data.drop('P_Ambient', 1, inplace=True) # try: # hk.data['Altitude'] = ct.p2h(hk.data.barometric_pressure) if ignore_colums: hk.data = hk.data.drop(ignore_colums, axis=1) #### separate housekeeping and sizedistribution if version == 'BBB_02': data = hk.data hist_cols = [ col for col in data.columns if (col[0] == 'b' and col[1:].isnumeric()) ] dist = data.loc[:, hist_cols] data.drop(hist_cols, axis=1, inplace=True) #### read size bin file fn = pl.Path(size_bins) with open(fn, 'r') as rein: lines = rein.readlines() bins = _np.array([float(l) for l in lines]) #### generate size distribution timeseries instance dist = atmsd.SizeDist_TS(dist, bins, 'numberConcentration') dist.housekeeping = hk return {'housekeeping': hk, 'sizedistribution': dist}
def process(ipmatchrow, folders, test=False, raise_error=True, path2product_file=None, log={}): # imet #### imet = open_iMet(folders['path2imet_folder'].joinpath(ipmatchrow.fn_imet)) didit = set_altitude_column(imet, ipmatchrow.which_alt, log=log) if not didit: return False ## fill missing timestamps with nans imet = imet.resample('1s').mean() start_time, end_time = imet.index.min(), imet.index.max() # POPS logentry = {'success': True} logentry['problems'] = [] log['open_pops_file'] = logentry fn_pops = folders['path2pops_folder'].joinpath(ipmatchrow.fn_pops + '.nc') if 'sn00' in fn_pops.name: fn_pops_old = fn_pops fn_pops = fn_pops.parent.joinpath( fn_pops.name.replace('sn00', f'sn{ipmatchrow.popssn}')) warnings.warn( f'replaced sn00 with sn{ipmatchrow.popssn}\n{fn_pops_old} -> \n{fn_pops}' ) if not fn_pops.is_file(): logentry['success'] = False txt = f'POPS file not found ({fn_pops})' logentry['problems'].append(txt) warnings.warn(txt) return False ds = xr.open_dataset(fn_pops) ## size distribution dist = ds.size_distributions.to_pandas() dist = dist.resample('1s').mean() dist = dist.truncate(start_time, end_time) dist_ts = sd.SizeDist_TS( dist, size_distribution.diameter_binning.bincenters2binsANDnames( ds.bincenters.values)[0], 'dNdlogDp') dist_ts._data_period = 1 particle_no_concentration = dist_ts.particle_number_concentration.data.copy( ) particle_mean_d = dist_ts.particle_mean_diameter.data.copy() ## housekeeping df = ds.housekeeping.to_pandas() df = df.Altitude df = df.resample('1s').mean() df = df.truncate(start_time, end_time) # merge tbs = imet.copy() tbs['pops_particle_number_concentration'] = particle_no_concentration tbs['pops_particle_mean_diameter'] = particle_mean_d tbs['test_POPS_altitude'] = df.copy() # met met = load_met_files(start_time, end_time, folders) ## ground pressure tbs['ground_atm_pressure'] = met['press'] tbs['ground_precip_rate'] = met['precip'] # retrievals ## potential temperatur add_eqiv_potential_temp(tbs) # create xarray dataset dstbs = xr.Dataset(tbs) dstbs['pops_size_distribution'] = dist # additional retrievals that take the xarray dataset to work with # try: add_cloud_base_distance_and_transit(dstbs, log=log) # except: # if raise_error: # print('Error in adding adding cloud base stuff: ', sys.exc_info()) # else: # return dstbs # add_sectioning(dstbs, folders=folders, path2product_file = path2product_file) add_uhsas_stuff(dstbs, dist_ts, uhsas_folder=folders['path2uhsas']) # try: add_mwr_products(dstbs, folders['path2mwr'], log=log) # except Exception as e: # txt = e.__str__() # warnings.warn(txt) add_cloud_top(dstbs, folders['path2cloudtop']) ### rename a vew variables change_list = { 'temp': 'temperature', 'rh': 'relative_humidity', 'potential_temperature': 'temperature_potential', 'equiv_potential_temperature': 'temperature_equiv_potential' } dstbs = dstbs.rename_vars(change_list) ### sort variables in the dataset ... could not find a attribute that does that?!? varlist = list(dstbs.variables) varlist.sort() # regenerate the dataset sorted and cleaned dst = xr.Dataset() for var in varlist: dst[var] = dstbs[var] dstbs = dst ### standardize towards CF? # standardize time dstbs = dstbs.rename_dims({"datetime": 'time'}) dstbs = dstbs.rename_vars({'datetime': 'time'}) dstbs.time.attrs['long_name'] = 'Time offset from base_time' dstbs['time_offset'] = dstbs.time.copy() # adding base_time according to arm or CF standard td = pd.to_datetime(dstbs.time.values[0]) - pd.to_datetime('1970') dstbs['base_time'] = int(td.total_seconds()) dstbs.base_time.attrs['string'] = pd.to_datetime( dstbs.time.values[0]).__str__() + ' 0:00' dstbs.base_time.attrs['long_name'] = 'Base time in Epoch' dstbs.base_time.attrs['units'] = 'seconds since 1970-1-1 0:00:00 0:00' # test if test: out = {} out['tbs'] = tbs out['start'] = start_time out['end'] = end_time return out else: return dstbs