def read_csv(fname, temperature_limits=(-20, -0.5)): """ Arguments --------- temerature_limits: tuple. The temperature reading has false readings in it which can cause porblems later""" df = pd.read_csv(fname, sep='\t') pandas_tools.ensure_column_exists(df, 'DateTime', _date_time_alts) pandas_tools.ensure_column_exists(df, 'Pressure_Pa', _pressure_alt) pandas_tools.ensure_column_exists(df, 'Temperature', _temp_alt) pandas_tools.ensure_column_exists(df, 'Relative_humidity', _RH_alt) # return df df.index = pd.Series( pd.to_datetime(df.DateTime, format='%Y-%m-%d %H:%M:%S')) # df['Pressure_Pa'] = df.PRESS # df['Temperature'] = df.AT # df['Relative_humidity'] = df.RH # df = df.drop('PRESS', axis=1) # df = df.drop('AT', axis=1) # df = df.drop('RH', axis=1) df = df.drop('DateTime', axis=1) df = df.sort_index() if temperature_limits: df = df[df.Temperature > temperature_limits[0]] df = df[temperature_limits[1] > df.Temperature] hk = timeseries.TimeSeries(df) return hk
def extract_singlescatteringalbedo(df, version): """ Extract the single scattering albedo for all aerosols (there is no seperation into fine and coarse). Parameters ---------- df : TYPE DESCRIPTION. Returns ------- None. """ if version == 2: ssa_txt = 'SSA' elif version == 3: ssa_txt = 'Single_Scattering_Albedo' # def ssa = df.loc[:, [i for i in df.columns if ssa_txt in i]] ssa.columns = [ ''.join([e for e in i if e.isnumeric()]) for i in ssa.columns ] ssa.columns.name = 'channel (nm)' return atmts.TimeSeries(ssa)
def _concat(self, arm_data_objs, close_gaps=True): for att in self._concatable: first_object = getattr(arm_data_objs[0], att) which_type = type(first_object).__name__ data_period = first_object._data_period if which_type == 'TimeSeries_2D': value = _timeseries.TimeSeries_2D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'TimeSeries': value = _timeseries.TimeSeries( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'AMS_Timeseries_lev01': value = _AMS.AMS_Timeseries_lev01( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'SizeDist_TS': # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs])) data = _pd.concat( [getattr(i, att).data for i in arm_data_objs]) value = _sizedistribution.SizeDist_TS( data, getattr(arm_data_objs[0], att).bins, 'dNdlogDp') elif which_type == 'TimeSeries_3D': value = _timeseries.TimeSeries_3D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) else: raise TypeError( '%s is not an allowed type here (TimeSeries_2D, TimeSeries)' % which_type) value._data_period = data_period if close_gaps: setattr(self, att, value.close_gaps()) else: setattr(self, att, value)
def hemispheric_forwardscattering(self): if not self.__hemispheric_forwardscattering: out = hemispheric_forwardscattering(self.angular_scatt_func.data) out = timeseries.TimeSeries(out) out._data_period = self.angular_scatt_func._data_period self.__hemispheric_forwardscattering = out return self.__hemispheric_forwardscattering
def _read_files(folder, files, verbose): if len(files) == 0: raise ValueError('no Files to open') if verbose: print('Reading files:') data_list = [] header_first = _read_header(folder, files[0]) for fname in files: if verbose: print('\t{}'.format(fname), end=' ... ') header = _read_header(folder, fname) # make sure that all the headers are identical if header_first['platform'] != header['platform']: raise ValueError('The site name changed from {} to {}!'.format( header_first['platform'], header['platform'])) data = read_data(folder, fname, header=header) data_list.append(data) if verbose: print('done') # concatinate and sort Dataframes and create Timeseries instance data = _pd.concat(data_list, sort=True) data[data == -999.0] = _np.nan data[data == -9.999] = _np.nan data = _timeseries.TimeSeries(data, sampling_period=1 * 60) data.header = header_first if verbose: print('done') return data
def get_oceanic_nino_index(): # get the data from the internet (by parsing) url = 'https://origin.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/ONI_v5.php' html = _ul.request.urlopen(url).read() tabels = _pd.read_html(html) i = 8 noi = tabels[i] # format the table noi_ts = _pd.DataFrame() for idx, row in noi.iterrows(): if row.iloc[0] == 'Year': continue # break year = row[0] values = _pd.DataFrame(row[1:]) values.index = values.apply( lambda x: _pd.to_datetime(f'{year}-{x.name:02d}-15'), axis=1) values.index.name = 'datetime' values.columns = ['noi'] noi_ts = noi_ts.append(values.astype(float), sort=True) noi_ts = _ts.TimeSeries(noi_ts) return noi_ts
def hemispheric_backscattering_ratio(self): if not self.__hemispheric_backscattering_ratio: if _np.any(self.back_scatt.data.index != self.scatt_coeff.data.index): raise IndexError( "The indeces doe not seam to match, that should not be possible!") bdf = self.back_scatt.data sdf = self.scatt_coeff.data bk = [i.replace('Bbs_', '') for i in bdf.keys()] sk = [i.replace('Bs_', '') for i in sdf.keys()] if bk != sk: raise KeyError( 'These two data frames seam to be not the right ones ... headers do not match (%s,%s)' % ( bk, sk)) new_col_names = bk bdf.columns = new_col_names sdf.columns = new_col_names out = _timeseries.TimeSeries(bdf.div(sdf)) out._data_period = self.back_scatt._data_period self.__hemispheric_backscattering_ratio = out return self.__hemispheric_backscattering_ratio
def add_sun_elevetion(self, picco): """ doc is not correct!!! This function uses telemetry data from the airplain (any timeseries including Lat and Lon) to calculate the sun's elevation. Based on the sun's elevation an airmass factor is calculated which the data is corrected for. Arguments --------- sun_intensities: Sun_Intensities_TS instance picco: any timeseries instance containing Lat and Lon """ picco_t = timeseries.TimeSeries( picco.data.loc[:, ['Lat', 'Lon', 'Altitude']] ) # only Altitude, Lat and Lon sun_int_su = self.merge(picco_t) out = sun_int_su.get_sun_position() # sun_int_su = sun_int_su.zoom_time(spiral_up_start, spiral_up_end) arrays = np.array([ sun_int_su.data.index, sun_int_su.data.Altitude, sun_int_su.data.Solar_position_elevation ]) tuples = list(zip(*arrays)) index = pd.MultiIndex.from_tuples( tuples, names=['Time', 'Altitude', 'Sunelevation']) sun_int_su.data.index = index sun_int_su.data = sun_int_su.data.drop([ 'Altitude', 'Solar_position_elevation', 'Solar_position_azimuth', 'Lon', 'Lat' ], axis=1) return sun_int_su
def load_skybrighness(fname): keys = [460.3, 860.7, 550.4, 671.2] outt = SkyBrightDict() for k in keys: fn = fname + '_' + str(k) + '.csv' df = pd.read_csv(fn, index_col=0) df.columns = df.columns.astype(float) outt[float(k)] = timeseries.TimeSeries(df) return outt
def var2ts(self, var_list, column_name): """extracts the list of variables from the file_obj and puts them all in one data frame""" df = _pd.DataFrame(index = self.time_stamps) for var in var_list: data = self._read_variable(var) df[var] = _pd.Series(data, index = self.time_stamps) df.columns.name = column_name out = _timeseries.TimeSeries(df) out._data_period = self._data_period return out
def AOD(self): if not self._aod: if not self._aot: raise AttributeError('Make sure either AOD or AOT is set.') aod = self.AOT.data.div(self.sun_position.data.airmass, axis='rows') aod.columns.name = 'AOD@wavelength(nm)' aod = _timeseries.TimeSeries(aod) self._aod = aod return self._aod
def AOT(self): if not self._aot: if not self._aod: raise AttributeError('Make sure either AOD or AOT is set.') aot = self.AOD.data.mul(self.sun_position.data.airmass, axis='rows') aot.columns.name = 'AOT@wavelength(nm)' aot = _timeseries.TimeSeries(aot) self._aot = aot return self._aot
def _read_csv(fname, norm2time = True, norm2flow = True): uhsas = _readFromFakeXLS(fname) # return uhsas sd,hk = _separate_sizedist_and_housekeep(uhsas, norm2time = norm2time, norm2flow = norm2flow) hk = timeseries.TimeSeries(hk) # return size_distribution,hk bins = _get_bins(sd) # return bins dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration") return dist, hk
def read_radiosonde_csv(fname, cal): """reads a csv file and returns a TimeSeries Parameters ---------- fname: str Name of file to be opend calibration: str or calibration instance Either pass the name of the file containing the calibration data, or a calibration instance. """ df = pd.read_csv(fname, header=15) fkt = lambda x: x.lstrip(' ').replace(' ', '_') col_new = [fkt(i) for i in df.columns.values] df.columns = col_new time = df['date_[y-m-d_GMT]'] + df['time_[h:m:s_GMT]'] + '.' + df[ 'milliseconds'].astype(str) df.index = pd.Series( pd.to_datetime(time, format=time_tools.get_time_formate())) df[df == 99999.000] = np.nan alt = df['GPS_altitude_[km]'].copy() df['Altitude'] = alt * 1e3 df.rename(columns={ 'GPS_latitude': 'Lat', 'GPS_longitude': 'Lon' }, inplace=True) bins = [] for k in df.keys(): if 'Bin' in k: bins.append(k) # print(k) # print(bins) sd = df.loc[:, bins] hk = df.drop(bins, axis=1) hk = timeseries.TimeSeries(hk) hk.data.sort_index(inplace=True) hk.data.Altitude.interpolate(inplace=True) hk.data['temperature_K'] = hk.data[ 'iMet_air_temperature_(corrected)_[deg_C]'] + 273.15 hk.data['pressure_Pa'] = hk.data['iMet_pressure_[mb]'] * 100 # fname_cal = '/Users/htelg/data/POPS_calibrations/150622_china_UAV.csv' cal = calibration.read_csv(cal) ib = cal.get_interface_bins(20) sd = sizedistribution.SizeDist_TS( sd, ib['binedges_v_int'].values.transpose()[0], 'numberConcentration') return sd, hk
def sun_position(self): if not self._sunposition: if self._timezone != 0: date = self._timestamp_index + _pd.to_timedelta( -1 * self._timezone, 'h') else: date = self._timestamp_index self._sunposition = _solar.get_sun_position( self.site.lat, self.site.lon, date) self._sunposition.index = self._timestamp_index self._sunposition = _timeseries.TimeSeries(self._sunposition) return self._sunposition
def extinction_coeff(self): if not np.any(self.__extinction_coeff_sum_along_d): data = self.extinction_coeff_per_bin.data.sum(axis=1) df = pd.DataFrame() df['ext_coeff_m^1'] = data if self._parent_type == 'SizeDist_TS': self.__extinction_coeff_sum_along_d = timeseries.TimeSeries(df) elif self._parent_type == 'SizeDist': self.__extinction_coeff_sum_along_d = df else: raise TypeError('not possible for this distribution type') self.__extinction_coeff_sum_along_d._data_period = self._data_period return self.__extinction_coeff_sum_along_d
def extinction_coeff_sum_along_d(self): _warnings.warn('extinction_coeff_sum_along_d is deprecated and will be removed in future versions. Use extingction_coeff instead') if not np.any(self.__extinction_coeff_sum_along_d): data = self.extinction_coeff_per_bin.data.sum(axis = 1) df = pd.DataFrame() df['ext_coeff_m^1'] = data if self._parent_type == 'SizeDist_TS': self.__extinction_coeff_sum_along_d = timeseries.TimeSeries(df) elif self._parent_type == 'SizeDist': self.__extinction_coeff_sum_along_d = df else: raise TypeError('not possible for this distribution type') self.__extinction_coeff_sum_along_d._data_period = self._data_period return self.__extinction_coeff_sum_along_d
def split_revolutions(self, peaks='l', time_delta=(5, 20), revolution_period=26.): """This function reorganizes the miniSASP data in a way that all sun transits are stacked on top of eachother and the time is translated to an angle""" ulr = self.copy() # star = 10000 # till = 20000 # ulr.data = ulr.data[star:till] if peaks == 's': peaks_s = ulr.find_peaks() elif peaks == 'l': peaks_s = ulr.find_peaks(which='long') time_delta_back = time_delta[0] time_delta_forward = time_delta[1] # wls = ['460.3', '550.4', '671.2', '860.7'] photos = [ ulr.data.PhotoA, ulr.data.PhotoB, ulr.data.PhotoC, ulr.data.PhotoD ] out_dict = {} for u, i in enumerate(ulr.channels): centers = peaks_s.data[str(i)].dropna().index.values # res = [] df = pd.DataFrame() PAl = photos[u] for e, center in enumerate(centers): # center = peaks_s.data['460.3'].dropna().index.values[1] start = center - np.timedelta64(time_delta_back, 's') end = center + np.timedelta64(time_delta_forward, 's') PAlt = PAl.truncate(before=start, after=end, copy=True) PAlt.index = PAlt.index - center PAlt = PAlt[ PAlt != 0] # For some reasons there are values equal to 0 which would screw up the averaging I intend to do # res.append(PAlt) df[center] = PAlt.resample('50ms') df.index = (df.index.values - np.datetime64('1970-01-01T00:00:00.000000000Z') ) / np.timedelta64(1, 's') df.index = df.index.values / revolution_period * 2 * np.pi out = timeseries.TimeSeries(df.transpose()) out_dict[i] = out return out_dict
def _concat(self, arm_data_objs, close_gaps=True): for att in self._concatable: first_object = getattr(arm_data_objs[0], att) which_type = type(first_object).__name__ data_period = first_object._data_period if which_type == 'TimeSeries_2D': value = _timeseries.TimeSeries_2D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'TimeSeries': value = _timeseries.TimeSeries( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'AMS_Timeseries_lev01': value = _AMS.AMS_Timeseries_lev01( _pd.concat([getattr(i, att).data for i in arm_data_objs])) elif which_type == 'SizeDist_TS': # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs])) data = _pd.concat( [getattr(i, att).data for i in arm_data_objs]) value = _sizedistribution.SizeDist_TS( data, getattr(arm_data_objs[0], att).bins, 'dNdlogDp', ignore_data_gap_error=True, ) elif which_type == 'TimeSeries_3D': value = _timeseries.TimeSeries_3D( _pd.concat([getattr(i, att).data for i in arm_data_objs])) else: raise TypeError( '%s is not an allowed type here (TimeSeries_2D, TimeSeries)' % which_type) if hasattr(first_object, 'availability'): try: avail_concat = _pd.concat([ getattr(i, att).availability.availability for i in arm_data_objs ]) avail = Data_Quality(None, avail_concat, None, first_object.flag_info) value.availability = avail except: _warnings.warn( 'availability could not be concatinated make sure you converted it to a pandas frame at some point!' ) value._data_period = data_period if close_gaps: setattr(self, att, value.close_gaps()) else: setattr(self, att, value)
def aod2angstrom_exponent( self, column_1=500, column_2=870, use_wavelength_from_column_names=None, # wavelength_1=None, wavelength_2=None ): """ Calculates the angstrom exponents based on the AOD data. Parameters ---------- column_1: type of column name column name of one of the two points used for the AOD calculation column_2: type of column name column name of the other of the two points used for the AOD calculation use_wavelength_from_column_names: bool [None] When the wavelength dictionary is set. Wavelengths from the dictionary are used instead of column names. Set this kwarg to True to ignore the wavelengths dictionary and use column names instead. Parameters (deprecated) ----------------------- wavelength_1: float if the column name of column_1 is not accurate enough set the wavelenth used to calculate AOD here. wavelength_2: float as above for column_2 Returns ------- """ if isinstance(self.wavelengths, type(None)) or use_wavelength_from_column_names: # if wavelength_1 == None: wavelength_1 = column_1 # if wavelength_2 == None: wavelength_2 = column_2 else: wavelength_1 = self.wavelengths[column_1] wavelength_2 = self.wavelengths[column_2] c1 = column_1 c2 = column_2 c1ex = wavelength_1 c2ex = wavelength_2 out = -_np.log10(self.AOD.data.loc[:, c1] / self.AOD.data.loc[:, c2]) / _np.log10(c1ex / c2ex) out = _timeseries.TimeSeries(_pd.DataFrame(out)) setattr(self, 'ang_exp_{}_{}'.format(column_1, column_2), out) return out
def optical_depth_amf(self): """OD * airmassfactor + unkonwn offset. after determining the offset you might want to set the property sup_offsets""" if not self.__od_amf_orig: # if not self.optical_depth_amf_offsets['460.3']: # txt = 'please define an od offset (miniSASP only measures relative od) by setting optical_depth_amf_offset' # raise AttributeError(txt) self.__od_amf_orig = timeseries.TimeSeries( -1 * np.log(self.sun_intensities.data)) self.__od_amf_orig._data_period = self.sun_intensities._data_period if self.optical_depth_amf_offsets != self.__od_afm_offset_last: self.__od_amf = self.__od_amf_orig cols = self.__od_amf.data.columns for e, col in enumerate(cols): self.__od_amf.data[col] += self.optical_depth_amf_offsets[col] return self.__od_amf
def _read_file(fname): picof = open(fname, 'r') header = picof.readline() picof.close() header = header.split(' ') header_cleaned = [] for head in header: bla = head.replace('<', '').replace('>', '') where = bla.find('[') if where != -1: bla = bla[:where] header_cleaned.append(bla) data = pd.read_csv(fname, names=header_cleaned, sep=' ', skiprows=1, header=0) data.drop(range(20), inplace=True ) # dropping the first x lines, since the time is often dwrong time_series = data.Year.astype(str) + '-' + data.Month.apply( lambda x: '%02i' % x) + '-' + data.Day.apply( lambda x: '%02i' % x) + ' ' + data.Hours.apply( lambda x: '%02i' % x) + ':' + data.Minutes.apply( lambda x: '%02i' % x) + ':' + data.Seconds.apply( lambda x: '%05.2f' % x) data.index = pd.Series( pd.to_datetime(time_series, format=time_tools.get_time_formate())) _drop_some_columns(data) # convert from rad to deg data.Lat.values[:] = np.rad2deg(data.Lat.values) data.Lon.values[:] = np.rad2deg(data.Lon.values) data['Altitude'] = data['Height'] data = data.drop('Height', axis=1) data.sort_index(inplace=True) return timeseries.TimeSeries(data, {'original header': header})
def _parse_netCDF(self): "returns a dictionary, with panels in it" super(ArmDatasetSub,self)._parse_netCDF() size_bins = self._read_variable('size_bins')['data'] * 1000 df = pd.DataFrame(self._read_variable('RH_interDMA')['data'], index = self.time_stamps, columns=size_bins) df.columns.name = 'size_bin_center_nm' self.RH_interDMA = timeseries.TimeSeries(df) self.RH_interDMA._data_period = self._data_period data = self._read_variable('hyg_distributions')['data'] growthfactors = self._read_variable('growthfactors')['data'] data = pd.Panel(data, items= self.time_stamps, major_axis = size_bins, minor_axis = growthfactors) data.major_axis.name = 'size_bin_center_nm' data.minor_axis.name = 'growthfactors' self.hyg_distributions = timeseries.TimeSeries_3D(data) self.hyg_distributions._data_period = self._data_period
def _read_files(folder, files, verbose): def read_data(folder, filename, header=None): """Reads the file takes care of the timestamp and returns a Dataframe """ if not header: header = _read_header(folder, filename) dateparse = lambda x: datetime.datetime.strptime( x, "%d:%m:%Y %H:%M:%S") df = _pd.read_csv( folder + '/' + filename, skiprows=header['header_size'], # na_values=['N/A'], parse_dates={'times': [0, 1]}, date_parser=dateparse) df = df.set_index('times') return df if verbose: print('Reading files:') data_list = [] header_first = _read_header(folder, files[0]) for fname in files: if verbose: print('\t{}'.format(fname), end=' ... ') header = _read_header(folder, fname) # make sure that all the headers are identical assert (header_first == header) data = read_data(folder, fname, header=header) data_list.append(data) if verbose: print('done') # concatinate and sort Dataframes and create Timeseries instance data = _pd.concat(data_list) #, sort=True) data.sort_index(inplace=True) data[data == -999.0] = _np.nan data = _timeseries.TimeSeries(data, sampling_period=15 * 60) data.header = header_first if verbose: print('done') return data
def _read_variable2timeseries(self, variable, column_name=False, reverse_qc_flag=False): """ Reads the specified variables and puts them into a timeseries. Parameters ---------- variable: string or list of strings variable names column_name: bool or string this is a chance to give unites. This will also be the y-label if data is plotted Returns ------- pandas.DataFrame """ if type(variable).__name__ == 'str': variable = [variable] df = _pd.DataFrame(index=self.time_stamps) for var in variable: variable_out = self._read_variable(var, reverse_qc_flag=reverse_qc_flag) # if var == 'ratio_85by40_Bbs_R_10um_2p': # import pdb # pdb.set_trace() df[var] = _pd.Series(variable_out['data'], index=self.time_stamps) if column_name: df.columns.name = column_name out = _timeseries.TimeSeries(df) if column_name: out._y_label = column_name out._data_period = self._data_period out.availability = Data_Quality(self, variable_out['availability'], variable_out['availability_type']) # out.availability_type = variable_out['availability_type'] return out
def read_csv(fname, temperature_limits=(-20, -0.5)): """ Arguments --------- temerature_limits: tuple. The temperature reading has false readings in it which can cause porblems later""" df = _pd.read_csv(fname, sep='\t') _pandas_tools.ensure_column_exists(df, 'DateTime', _date_time_alts) _pandas_tools.ensure_column_exists(df, 'Pressure_Pa', _pressure_alt) _pandas_tools.ensure_column_exists(df, 'Temperature', _temp_alt) _pandas_tools.ensure_column_exists(df, 'Relative_humidity', _RH_alt) _pandas_tools.ensure_column_exists(df, 'Temperature_instrument', _temp_payload_alt, raise_error=False) _pandas_tools.ensure_column_exists(df, 'CN_concentration', _cn_concentration_alt, raise_error=False) try: # df.Temperature_payload = df.Temperature_payload.astype(float) df.Temperature_instrument = _pd.to_numeric(df.Temperature_instrument, errors='coerce') df.CN_concentration = _pd.to_numeric(df.CN_concentration, errors='coerce') df.CONCN = _pd.to_numeric(df.CONCN, errors='coerce') df.COUNT = _pd.to_numeric(df.COUNT, errors='coerce') except AttributeError: pass # return df df.index = _pd.Series(_pd.to_datetime(df.DateTime, format='%Y-%m-%d %H:%M:%S')) df = df.drop('DateTime', axis=1) df = df.sort_index() if temperature_limits: df = df[df.Temperature > temperature_limits[0]] df = df[temperature_limits[1] > df.Temperature] hk = _timeseries.TimeSeries(df) hk._data_period = 2 return hk
def f_RH_scatt_3p(self): """Note, when calculating a f(RH) with this function it has a mysterious off set in it. When you plan is to calculate f(RH) between 80 and 40 you actually have to apply this function for both values and than divide.""" if not self.__f_RH_scatt_3p: if not self.sup_RH: raise ValueError('please set the relative humidity in sup_RH') def applyfunk(value): if type(value).__name__ == 'function': return value(self.sup_RH) else: return _np.nan # data = self.f_RH_scatt_funcs.data.applymap(lambda x: x(self.sup_RH)) data = self.f_RH_scatt_funcs_3p.data.applymap(applyfunk) # data = _pd.DataFrame(data, columns=['f_%i'%(self.sup_RH)]) self.__f_RH_scatt_3p = _timeseries.TimeSeries(data) self.__f_RH_scatt_3p._data_period = self.f_RH_scatt_funcs_3p._data_period return self.__f_RH_scatt_3p
def zdanovskii_stokes_robinson(data, which = 'refractive_Index'): """(Stokes and Robinson,1966) Arguments --------- data: pandas dataframe containing chemical composition data which: str which property to mix ['refractive_Index', 'density', 'kappa_chem'] """ materials = _properties.get_commen() materials.index = materials.species_name essential_elcts = ['ammonium_sulfate', 'ammonium_nitrate', 'ammonium_chloride', 'sodium_chloride', 'sodium_sulfate', 'sodium_nitrate', 'calcium_nitrate', 'calcium_chloride', 'organic_aerosol' ] electrolytes = materials.loc[essential_elcts] electrolytes = electrolytes[['refractive_Index', 'density', 'kappa_chem']] _pandas_tools.ensure_column_exists(data, 'organic_aerosol', col_alt = ['total_organics'] ) for e in essential_elcts: _pandas_tools.ensure_column_exists(data, e) tobemixed = electrolytes[which] # _pdb.set_trace() numerator = (data * tobemixed / electrolytes.density).sum(axis=1) denominator = (data / electrolytes.density).sum(axis=1) mixed = numerator/denominator df = _pd.DataFrame(mixed, columns=[which]) ts = _timeseries.TimeSeries(df) return ts
def _read_variable2timeseries(self, variable, column_name=False, reverse_qc_flag=False): """ Reads the specified variables and puts them into a timeseries. Parameters ---------- variable: string or list of strings variable names column_name: bool or string this is a chance to give unites. This will also be the y-label if data is plotted Returns ------- pandas.DataFrame """ if type(variable).__name__ == 'str': variable = [variable] df = _pd.DataFrame(index=self.time_stamps) for var in variable: data = self._read_variable(var, reverse_qc_flag=reverse_qc_flag) df[var] = _pd.Series(data, index=self.time_stamps) if column_name: df.columns.name = column_name out = _timeseries.TimeSeries(df) if column_name: out._y_label = column_name out._data_period = self._data_period return out
def _parse_netCDF(self): super(ArmDatasetSub, self)._parse_netCDF() # self.rh = self._read_variable2timeseries(['rh_60m', 'rh_60m'], column_name='Relative Humidity (%)') # for the 2 parameter function def ab_2_f_RH_func(ab): ab = ab.copy() a, b = ab # a = 1. # I was just told that a is supposed to be set to one from Ann (upstairs) f_RH = lambda RH: a * (1 - (RH / 100.))**( -b) # 'bsp(RH%)/Bsp(~40%) = a*[1-(RH%/100)]^(-b)' return f_RH varies = [ 'fRH_Bs_R_10um_2p', 'fRH_Bs_G_10um_2p', 'fRH_Bs_B_10um_2p', 'fRH_Bs_R_1um_2p', 'fRH_Bs_G_1um_2p', 'fRH_Bs_B_1um_2p' ] df = _pd.DataFrame(index=self.time_stamps) df_ab = _pd.DataFrame(index=self.time_stamps) for key in varies: data = self._read_variable(key, reverse_qc_flag=8) dft = _pd.DataFrame(data['data'], index=self.time_stamps) df[key] = dft.apply(ab_2_f_RH_func, axis=1) if key == 'fRH_Bs_G_1um_2p': self.f_RH_scatt_2p_ab_G_1um = _timeseries.TimeSeries( _pd.DataFrame(dft)) self.f_RH_scatt_2p_ab_G_1um._data_period = self._data_period self.f_RH_scatt_funcs_2p = _timeseries.TimeSeries(df) self.f_RH_scatt_funcs_2p._data_period = self._data_period #for the 3 parameter function def abc_2_f_RH_func(abc): abc = abc.copy() a, b, c = abc # a = 1. f_RH = lambda RH: a * (1 + (b * (RH / 100.)**c)) return f_RH varies = [ 'fRH_Bs_R_10um_3p', 'fRH_Bs_G_10um_3p', 'fRH_Bs_B_10um_3p', 'fRH_Bs_R_1um_3p', 'fRH_Bs_G_1um_3p', 'fRH_Bs_B_1um_3p' ] df = _pd.DataFrame(index=self.time_stamps) for key in varies: data = self._read_variable(key, reverse_qc_flag=8) dft = _pd.DataFrame(data['data'], index=self.time_stamps) df[key] = dft.apply(abc_2_f_RH_func, axis=1) self.f_RH_scatt_funcs_3p = _timeseries.TimeSeries(df) self.f_RH_scatt_funcs_3p._data_period = self._data_period # f or RH at predifined point varies = [ 'ratio_85by40_Bs_R_10um_2p', 'ratio_85by40_Bs_G_10um_2p', 'ratio_85by40_Bs_B_10um_2p', 'ratio_85by40_Bs_R_1um_2p', 'ratio_85by40_Bs_G_1um_2p', 'ratio_85by40_Bs_B_1um_2p' ] self.f_RH_scatt_2p_85_40 = self._read_variable2timeseries( varies, reverse_qc_flag=8) varies = [ 'ratio_85by40_Bs_R_10um_3p', 'ratio_85by40_Bs_G_10um_3p', 'ratio_85by40_Bs_B_10um_3p', 'ratio_85by40_Bs_R_1um_3p', 'ratio_85by40_Bs_G_1um_3p', 'ratio_85by40_Bs_B_1um_3p' ] self.f_RH_scatt_3p_85_40 = self._read_variable2timeseries( varies, reverse_qc_flag=8) varies = [ 'ratio_85by40_Bbs_R_10um_2p', 'ratio_85by40_Bbs_G_10um_2p', 'ratio_85by40_Bbs_B_10um_2p', 'ratio_85by40_Bbs_R_1um_2p', 'ratio_85by40_Bbs_G_1um_2p', 'ratio_85by40_Bbs_B_1um_2p' ] self.f_RH_backscatt_2p_85_40 = self._read_variable2timeseries( varies, reverse_qc_flag=8)