def uhsas2sizedist(df): """ Creates size distribution time series instance from uhsas data (as returned by the read_file function) Parameters ---------- df : pandas.DataFrame as put out by the read_file function. Returns ------- dist : TYPE DESCRIPTION. """ ## make bins (based on whats mentioned in the header) bins = _np.linspace(40, 1000, 99) ## the size distribution data data = df.iloc[:, :-1].copy() data.columns = bins ### to my knowledge the uhsas can not measure below ~70 nm data_trunc = data.loc[:, 69:] # make the size distribution bined, _ = _db.bincenters2binsANDnames(data_trunc.columns.values) dist = _sd.SizeDist_TS(data_trunc, bined, 'numberConcentration') return dist
def read_file(fn): out = {} df = pd.read_csv(fn) df.index = pd.to_datetime(df.DateTimeUTC) df.drop('DateTimeUTC', axis=1, inplace=True) # df.shape dist = df.loc[:, [i for i in df.columns if i[:2] == 'Nn']].copy().astype(float) dist.columns = df.loc[:, [i for i in df.columns if i[:2] == 'Ns']].iloc[0].astype(float) * 1000 # dist.index = pd.to_datetime(df.DateTimeUTC) dist = sd.SizeDist_TS(dist, db.bincenters2binsANDnames(dist.columns.values)[0], 'dNdlogDp') dist = dist.convert2dNdlogDp() out['size_distribution'] = dist rest = df.drop([i for i in df.columns if i[:2] == 'Nn'], axis=1) rest = rest.drop([i for i in df.columns if i[:2] == 'Ns'], axis=1) # rest = rest.rename({h['cpd3']: h['mylabel'] for h in header_dict}, axis = 1) out['rest'] = rest return out
def _parse_netCDF(self): super(ArmDatasetSub,self)._parse_netCDF() df = pd.DataFrame(self._read_variable('number_concentration_DMA_APS'), index = self.time_stamps) d = self._read_variable('diameter') bins, colnames = diameter_binning.bincenters2binsANDnames(d[:]*1000) self.size_distribution = sizedistribution.SizeDist_TS(df,bins,'dNdlogDp') self.size_distribution._data_period = self._data_period
def _parse_netCDF(self): super(ArmDatasetSub, self)._parse_netCDF() df = pd.DataFrame(self._read_variable('number_concentration'), index=self.time_stamps) d = self._read_variable('diameter') bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000) self.size_distribution = sizedistribution.SizeDist_TS( df, bins, 'dNdlogDp') self.size_distribution._data_period = self._data_period
def load_PMEL_APS(fname): na_values = [u'StartDateTime', u'Dp_1', u'Dp_2', u'Dp_3', u'Dp_4', u'Dp_5', u'Dp_6', u'Dp_7', u'Dp_8', u'Dp_9', u'Dp_10', u'Dp_11', u'Dp_12', u'Dp_13', u'Dp_14', u'Dp_15', u'Dp_16', u'Dp_17', u'Dp_18', u'Dp_19', u'Dp_20', u'Dp_21', u'Dp_22', u'Dp_23', u'Dp_24', u'Dp_25', u'Dp_26', u'Dp_27', u'Dp_28', u'Dp_29', u'Dp_30', u'Dp_31', u'Dp_32', u'Dp_33', u'Dp_34', u'Dp_35', u'Dp_36', u'Dp_37', u'Dp_38', u'Dp_39', u'Dp_40', u'Dp_41', u'Dp_42', u'Dp_43', u'Dp_44', u'Dp_45', u'Dp_46', u'Dp_47', u'Dp_48', u'Dp_49', u'Dp_50', u'Dp_51', u'Dp_52',u'dNdlogDp_1', u'dNdlogDp_2', u'dNdlogDp_3', u'dNdlogDp_4', u'dNdlogDp_5', u'dNdlogDp_6', u'dNdlogDp_7', u'dNdlogDp_8', u'dNdlogDp_9', u'dNdlogDp_10', u'dNdlogDp_11', u'dNdlogDp_12', u'dNdlogDp_13', u'dNdlogDp_14', u'dNdlogDp_15', u'dNdlogDp_16', u'dNdlogDp_17', u'dNdlogDp_18', u'dNdlogDp_19', u'dNdlogDp_20', u'dNdlogDp_21', u'dNdlogDp_22', u'dNdlogDp_23', u'dNdlogDp_24', u'dNdlogDp_25', u'dNdlogDp_26', u'dNdlogDp_27', u'dNdlogDp_28', u'dNdlogDp_29', u'dNdlogDp_30', u'dNdlogDp_31', u'dNdlogDp_32', u'dNdlogDp_33', u'dNdlogDp_34', u'dNdlogDp_35', u'dNdlogDp_36', u'dNdlogDp_37', u'dNdlogDp_38', u'dNdlogDp_39', u'dNdlogDp_40', u'dNdlogDp_41', u'dNdlogDp_42', u'dNdlogDp_43', u'dNdlogDp_44', u'dNdlogDp_45', u'dNdlogDp_46', u'dNdlogDp_47', u'dNdlogDp_48', u'dNdlogDp_49', u'dNdlogDp_50', u'dNdlogDp_51', u'dNdlogDp_52'] tab = pd.read_csv(fname, sep = '\t', na_values=na_values) tab = tab.dropna() newIndex = pd.to_datetime(tab.StartDateTime.values) tab.index = newIndex reducedTab = tab.iloc[:,53:] bincenters = tab.iloc[0,1:53].values*1000 binedges,newColnames = diameter_binning.bincenters2binsANDnames(bincenters) reducedTab.columns = newColnames dist = sizedistribution.aerosolSizeDistribution(reducedTab, binedges, 'dNdlogDp') return dist
def _parse_netCDF(self): super(ArmDatasetSub, self)._parse_netCDF() data = self._read_variable('number_concentration_DMA_APS') df = pd.DataFrame(data['data'], index=self.time_stamps) d = self._read_variable('diameter')['data'] bins, colnames = diameter_binning.bincenters2binsANDnames(d[:] * 1000) self.size_distribution = sizedistribution.SizeDist_TS( df, bins, 'dNdlogDp', ignore_data_gap_error=True, # fill_data_gaps_with = np.nan ) self.size_distribution._data_period = self._data_period self.size_distribution.flag_info = self.flag_info availability = pd.DataFrame(data['availability'], index=self.time_stamps) self.size_distribution.availability = Data_Quality( self, availability, data['availability_type'], self.flag_info)
def extract_sizedistribution(df): #### get the size distribution data cols = df.columns cols = [i for i in cols if i.replace('.', '').isnumeric()] dist = df.loc[:, cols] if len(cols) == 0: return False # create bins for atmpy bins, _ = atmdb.bincenters2binsANDnames(np.array([float(i) for i in cols])) bins *= 2 #radius to diameter bins *= 1e3 # um to nm #### create sizedistribution instance #### todo: there is a scaling error since AERONET uses 'dVdlnDp' and I use 'dVdlogDp' dist_ts = atmsd.SizeDist_TS( dist, bins, 'dVdlogDp', # fill_data_gaps_with=np.nan, ignore_data_gap_error=True, ) return dist_ts
def __init__(self, aod=0.1, diameter_range=[1e2, 2e4, 100]): """ WARNING: I am pretty sure there is a normalization problem, since I substitude um with nm and r with d without doing proper normailzing. Also, the model is created for the natural logarithm while atmPy assumes a log_10! This requires a further normalizaion (see Seignfeld & Pandis). These things do not affect general shape but will need to be addressed if absolute values are considered. Aerosol models used by the ABI aerosol optical depth retrieval. From: GOES-R Advanced Baseline Imager (ABI) Algorithm Theoretical Basis Document For Suspended Matter/Aerosol Optical Depth and Aerosol Size Parameter https://www.goes-r.gov/resources/docs.html The aerosol models of satellite retrievals do not strictly follow my model class so this is not inheriting Model at this point ... maybe later? Parameters ---------- aod : float Aerosol optical depth. The exact aerosol model depends on the aerosol optical depth. diameter_range : array-like, optional Diameter range, in nanometer, and number of points the model is created for. The default is [1e2, 1e4, 100]. Returns ------- None. """ models = pd.DataFrame([ { 'model': 'generic', 'mode': 'fine', 'rv': 0.145, 'rv_scale': 0.0203, 'sig': 0.3738, 'sig_scale': 0.1365, 'Cv': .1642, 'Cv_scale': 0.7747, 'n_r': 1.43, 'n_i': 0.008, 'n_scale': 0.002 }, { 'model': 'generic', 'mode': 'coarse', 'rv': 3.1007, 'rv_scale': 0.3364, 'sig': 0.7292, 'sig_scale': 0.098, 'Cv': 0.1482, 'Cv_scale': 0.6846, 'n_r': 1.43, 'n_i': 0.008, 'n_scale': 0.002 }, { 'model': 'urban', 'mode': 'fine', 'rv': 0.1604, 'rv_scale': 0.434, 'sig': 0.3642, 'sig_scale': 0.1529, 'Cv': 0.1718, 'Cv_scale': 0.8213, 'n_r': 1.42, 'n_i': 0.007, 'n_scale': 0.0015 }, { 'model': 'urban', 'mode': 'coarse', 'rv': 3.3252, 'rv_scale': 0.1411, 'sig': 0.7595, 'sig_scale': 0.1638, 'Cv': 0.0934, 'Cv_scale': 0.6394, 'n_r': 1.42, 'n_i': 0.007, 'n_scale': 0.0015 }, { 'model': 'smoke', 'mode': 'fine', 'rv': 0.1335, 'rv_scale': 0.0096, 'sig': 0.3834, 'sig_scale': 0.0794, 'Cv': 0.1748, 'Cv_scale': 0.8914, 'n_r': 1.51, 'n_i': 0.02, 'n_scale': 0 }, { 'model': 'smoke', 'mode': 'coarse', 'rv': 3.4479, 'rv_scale': 0.9489, 'sig': 0.7433, 'sig_scale': 0.0409, 'Cv': 0.1043, 'Cv_scale': 0.6824, 'n_r': 1.51, 'n_i': 0.02, 'n_scale': 0 }, { 'model': 'dust', 'mode': 'fine', 'rv': 0.1416, 'rv_scale': -0.0519, 'sig': 0.7561, 'sig_scale': 0.148, 'Cv': 0.087, 'Cv_scale': 1.026, 'n_r': 1.48, 'n_i': 0.0025, 'n_scale': (-0.021, 0.132) }, { 'model': 'dust', 'mode': 'coarse', 'rv': 2.2, 'rv_scale': 0, 'sig': 0.554, 'sig_scale': -0.0519, 'Cv': 0.6786, 'Cv_scale': 1.0569, 'n_r': 1.48, 'n_i': 0.0025, 'n_scale': (-0.021, 0.132) }, ]) self.model_parameters = models self.aod = aod r_range = np.array(diameter_range[:2]) / 2 / 1e3 r = np.logspace(np.log10(r_range[0]), np.log10(r_range[1]), diameter_range[2]) ##### bins, names = db.bincenters2binsANDnames(r * 2 * 1e3) dists = {} for mo in models.model.unique(): mos = models[models.model == mo] dist = np.zeros(r.shape) for idx, row in mos.iterrows(): if row.model == 'dust': rv = row.rv * aod**row.rv_scale sig = row.sig * aod**row.sig_scale else: rv = row.rv + (row.rv_scale * aod) sig = row.sig + (row.sig_scale * aod) Cv = row.Cv * aod**row.Cv_scale dist += Cv / (np.sqrt(2 * np.pi) * sig) * np.exp( -(np.log(r) - np.log(rv))**2 / (2 * sig**2)) dist = sd.SizeDist(pd.DataFrame([dist], columns=names), bins, 'dVdlogDp') dists[mo] = dist self.models = dists for mo in dists: dist = dists[mo] setattr(self, mo, dists[mo])