Пример #1
0
def read_csv(fname, temperature_limits=(-20, -0.5)):
    """
    Arguments
    ---------
    temerature_limits: tuple.
        The temperature reading has false readings in it which can cause porblems later"""
    df = pd.read_csv(fname, sep='\t')

    pandas_tools.ensure_column_exists(df, 'DateTime', _date_time_alts)
    pandas_tools.ensure_column_exists(df, 'Pressure_Pa', _pressure_alt)
    pandas_tools.ensure_column_exists(df, 'Temperature', _temp_alt)
    pandas_tools.ensure_column_exists(df, 'Relative_humidity', _RH_alt)
    # return df
    df.index = pd.Series(
        pd.to_datetime(df.DateTime, format='%Y-%m-%d %H:%M:%S'))
    # df['Pressure_Pa'] = df.PRESS
    # df['Temperature'] = df.AT
    # df['Relative_humidity'] = df.RH
    # df = df.drop('PRESS', axis=1)
    # df = df.drop('AT', axis=1)
    # df = df.drop('RH', axis=1)
    df = df.drop('DateTime', axis=1)

    df = df.sort_index()

    if temperature_limits:
        df = df[df.Temperature > temperature_limits[0]]
        df = df[temperature_limits[1] > df.Temperature]

    hk = timeseries.TimeSeries(df)
    return hk
Пример #2
0
def extract_singlescatteringalbedo(df, version):
    """
    Extract the single scattering albedo for all aerosols (there is no 
    seperation into fine and coarse).

    Parameters
    ----------
    df : TYPE
        DESCRIPTION.

    Returns
    -------
    None.


    """
    if version == 2:
        ssa_txt = 'SSA'
    elif version == 3:
        ssa_txt = 'Single_Scattering_Albedo'

    # def
    ssa = df.loc[:, [i for i in df.columns if ssa_txt in i]]
    ssa.columns = [
        ''.join([e for e in i if e.isnumeric()]) for i in ssa.columns
    ]
    ssa.columns.name = 'channel (nm)'

    return atmts.TimeSeries(ssa)
Пример #3
0
    def _concat(self, arm_data_objs, close_gaps=True):
        for att in self._concatable:
            first_object = getattr(arm_data_objs[0], att)
            which_type = type(first_object).__name__
            data_period = first_object._data_period
            if which_type == 'TimeSeries_2D':
                value = _timeseries.TimeSeries_2D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'TimeSeries':
                value = _timeseries.TimeSeries(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'AMS_Timeseries_lev01':
                value = _AMS.AMS_Timeseries_lev01(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'SizeDist_TS':
                # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs]))
                data = _pd.concat(
                    [getattr(i, att).data for i in arm_data_objs])
                value = _sizedistribution.SizeDist_TS(
                    data,
                    getattr(arm_data_objs[0], att).bins, 'dNdlogDp')
            elif which_type == 'TimeSeries_3D':
                value = _timeseries.TimeSeries_3D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            else:
                raise TypeError(
                    '%s is not an allowed type here (TimeSeries_2D, TimeSeries)'
                    % which_type)

            value._data_period = data_period
            if close_gaps:
                setattr(self, att, value.close_gaps())
            else:
                setattr(self, att, value)
Пример #4
0
 def hemispheric_forwardscattering(self):
     if not self.__hemispheric_forwardscattering:
         out = hemispheric_forwardscattering(self.angular_scatt_func.data)
         out = timeseries.TimeSeries(out)
         out._data_period = self.angular_scatt_func._data_period
         self.__hemispheric_forwardscattering = out
     return self.__hemispheric_forwardscattering
Пример #5
0
def _read_files(folder, files, verbose):
    if len(files) == 0:
        raise ValueError('no Files to open')

    if verbose:
        print('Reading files:')
    data_list = []
    header_first = _read_header(folder, files[0])
    for fname in files:
        if verbose:
            print('\t{}'.format(fname), end=' ... ')
        header = _read_header(folder, fname)
        # make sure that all the headers are identical
        if header_first['platform'] != header['platform']:
            raise ValueError('The site name changed from {} to {}!'.format(
                header_first['platform'], header['platform']))
        data = read_data(folder, fname, header=header)
        data_list.append(data)
        if verbose:
            print('done')

    # concatinate and sort Dataframes and create Timeseries instance
    data = _pd.concat(data_list, sort=True)
    data[data == -999.0] = _np.nan
    data[data == -9.999] = _np.nan
    data = _timeseries.TimeSeries(data, sampling_period=1 * 60)
    data.header = header_first

    if verbose:
        print('done')
    return data
Пример #6
0
def get_oceanic_nino_index():
    # get the data from the internet (by parsing)
    url = 'https://origin.cpc.ncep.noaa.gov/products/analysis_monitoring/ensostuff/ONI_v5.php'

    html = _ul.request.urlopen(url).read()
    tabels = _pd.read_html(html)

    i = 8
    noi = tabels[i]

    # format the table
    noi_ts = _pd.DataFrame()

    for idx, row in noi.iterrows():
        if row.iloc[0] == 'Year':
            continue
    #     break

        year = row[0]
        values = _pd.DataFrame(row[1:])
        values.index = values.apply(
            lambda x: _pd.to_datetime(f'{year}-{x.name:02d}-15'), axis=1)
        values.index.name = 'datetime'
        values.columns = ['noi']

        noi_ts = noi_ts.append(values.astype(float), sort=True)

    noi_ts = _ts.TimeSeries(noi_ts)
    return noi_ts
Пример #7
0
    def hemispheric_backscattering_ratio(self):
        if not self.__hemispheric_backscattering_ratio:
            if _np.any(self.back_scatt.data.index != self.scatt_coeff.data.index):
                raise IndexError(
                    "The indeces doe not seam to match, that should not be possible!")

            bdf = self.back_scatt.data
            sdf = self.scatt_coeff.data

            bk = [i.replace('Bbs_', '') for i in bdf.keys()]
            sk = [i.replace('Bs_', '') for i in sdf.keys()]
            if bk != sk:
                raise KeyError(
                    'These two data frames seam to be not the right ones ... headers do not match (%s,%s)' % (
                    bk, sk))

            new_col_names = bk
            bdf.columns = new_col_names
            sdf.columns = new_col_names

            out = _timeseries.TimeSeries(bdf.div(sdf))
            out._data_period = self.back_scatt._data_period
            self.__hemispheric_backscattering_ratio = out

        return self.__hemispheric_backscattering_ratio
Пример #8
0
    def add_sun_elevetion(self, picco):
        """
        doc is not correct!!!

        This function uses telemetry data from the airplain (any timeseries including Lat and Lon) to calculate
        the sun's elevation. Based on the sun's elevation an airmass factor is calculated which the data is corrected for.

        Arguments
        ---------
        sun_intensities: Sun_Intensities_TS instance
        picco: any timeseries instance containing Lat and Lon
        """

        picco_t = timeseries.TimeSeries(
            picco.data.loc[:, ['Lat', 'Lon', 'Altitude']]
        )  # only Altitude, Lat and Lon
        sun_int_su = self.merge(picco_t)
        out = sun_int_su.get_sun_position()
        #     sun_int_su = sun_int_su.zoom_time(spiral_up_start, spiral_up_end)
        arrays = np.array([
            sun_int_su.data.index, sun_int_su.data.Altitude,
            sun_int_su.data.Solar_position_elevation
        ])
        tuples = list(zip(*arrays))
        index = pd.MultiIndex.from_tuples(
            tuples, names=['Time', 'Altitude', 'Sunelevation'])
        sun_int_su.data.index = index
        sun_int_su.data = sun_int_su.data.drop([
            'Altitude', 'Solar_position_elevation', 'Solar_position_azimuth',
            'Lon', 'Lat'
        ],
                                               axis=1)
        return sun_int_su
Пример #9
0
def load_skybrighness(fname):
    keys = [460.3, 860.7, 550.4, 671.2]
    outt = SkyBrightDict()
    for k in keys:
        fn = fname + '_' + str(k) + '.csv'
        df = pd.read_csv(fn, index_col=0)
        df.columns = df.columns.astype(float)
        outt[float(k)] = timeseries.TimeSeries(df)
    return outt
Пример #10
0
 def var2ts(self, var_list, column_name):
     """extracts the list of variables from the file_obj and puts them all in one data frame"""
     df = _pd.DataFrame(index = self.time_stamps)
     for var in var_list:
         data = self._read_variable(var)
         df[var] = _pd.Series(data, index = self.time_stamps)
     df.columns.name = column_name
     out = _timeseries.TimeSeries(df)
     out._data_period = self._data_period
     return out
Пример #11
0
 def AOD(self):
     if not self._aod:
         if not self._aot:
             raise AttributeError('Make sure either AOD or AOT is set.')
         aod = self.AOT.data.div(self.sun_position.data.airmass,
                                 axis='rows')
         aod.columns.name = 'AOD@wavelength(nm)'
         aod = _timeseries.TimeSeries(aod)
         self._aod = aod
     return self._aod
Пример #12
0
 def AOT(self):
     if not self._aot:
         if not self._aod:
             raise AttributeError('Make sure either AOD or AOT is set.')
         aot = self.AOD.data.mul(self.sun_position.data.airmass,
                                 axis='rows')
         aot.columns.name = 'AOT@wavelength(nm)'
         aot = _timeseries.TimeSeries(aot)
         self._aot = aot
     return self._aot
Пример #13
0
def _read_csv(fname, norm2time = True, norm2flow = True):
    uhsas = _readFromFakeXLS(fname)
#     return uhsas
    sd,hk = _separate_sizedist_and_housekeep(uhsas, norm2time = norm2time, norm2flow = norm2flow)
    hk = timeseries.TimeSeries(hk)
#     return size_distribution,hk
    bins = _get_bins(sd)
#     return bins
    dist = sizedistribution.SizeDist_TS(sd, bins, "numberConcentration")
    return dist, hk
Пример #14
0
def read_radiosonde_csv(fname, cal):
    """reads a csv file and returns a TimeSeries

    Parameters
    ----------
    fname: str
        Name of file to be opend
    calibration: str or calibration instance
        Either pass the name of the file containing the calibration data, or a calibration instance.

    """

    df = pd.read_csv(fname, header=15)

    fkt = lambda x: x.lstrip(' ').replace(' ', '_')
    col_new = [fkt(i) for i in df.columns.values]
    df.columns = col_new

    time = df['date_[y-m-d_GMT]'] + df['time_[h:m:s_GMT]'] + '.' + df[
        'milliseconds'].astype(str)
    df.index = pd.Series(
        pd.to_datetime(time, format=time_tools.get_time_formate()))

    df[df == 99999.000] = np.nan

    alt = df['GPS_altitude_[km]'].copy()
    df['Altitude'] = alt * 1e3
    df.rename(columns={
        'GPS_latitude': 'Lat',
        'GPS_longitude': 'Lon'
    },
              inplace=True)

    bins = []
    for k in df.keys():
        if 'Bin' in k:
            bins.append(k)
    #         print(k)
#     print(bins)
    sd = df.loc[:, bins]

    hk = df.drop(bins, axis=1)

    hk = timeseries.TimeSeries(hk)
    hk.data.sort_index(inplace=True)
    hk.data.Altitude.interpolate(inplace=True)
    hk.data['temperature_K'] = hk.data[
        'iMet_air_temperature_(corrected)_[deg_C]'] + 273.15
    hk.data['pressure_Pa'] = hk.data['iMet_pressure_[mb]'] * 100
    #     fname_cal = '/Users/htelg/data/POPS_calibrations/150622_china_UAV.csv'
    cal = calibration.read_csv(cal)
    ib = cal.get_interface_bins(20)
    sd = sizedistribution.SizeDist_TS(
        sd, ib['binedges_v_int'].values.transpose()[0], 'numberConcentration')
    return sd, hk
Пример #15
0
 def sun_position(self):
     if not self._sunposition:
         if self._timezone != 0:
             date = self._timestamp_index + _pd.to_timedelta(
                 -1 * self._timezone, 'h')
         else:
             date = self._timestamp_index
         self._sunposition = _solar.get_sun_position(
             self.site.lat, self.site.lon, date)
         self._sunposition.index = self._timestamp_index
         self._sunposition = _timeseries.TimeSeries(self._sunposition)
     return self._sunposition
Пример #16
0
 def extinction_coeff(self):
     if not np.any(self.__extinction_coeff_sum_along_d):
         data = self.extinction_coeff_per_bin.data.sum(axis=1)
         df = pd.DataFrame()
         df['ext_coeff_m^1'] = data
         if self._parent_type == 'SizeDist_TS':
             self.__extinction_coeff_sum_along_d = timeseries.TimeSeries(df)
         elif self._parent_type == 'SizeDist':
             self.__extinction_coeff_sum_along_d = df
         else:
             raise TypeError('not possible for this distribution type')
         self.__extinction_coeff_sum_along_d._data_period = self._data_period
     return self.__extinction_coeff_sum_along_d
Пример #17
0
 def extinction_coeff_sum_along_d(self):
     _warnings.warn('extinction_coeff_sum_along_d is deprecated and will be removed in future versions. Use extingction_coeff instead')
     if not np.any(self.__extinction_coeff_sum_along_d):
         data = self.extinction_coeff_per_bin.data.sum(axis = 1)
         df = pd.DataFrame()
         df['ext_coeff_m^1'] = data
         if self._parent_type == 'SizeDist_TS':
             self.__extinction_coeff_sum_along_d = timeseries.TimeSeries(df)
         elif self._parent_type == 'SizeDist':
             self.__extinction_coeff_sum_along_d = df
         else:
             raise TypeError('not possible for this distribution type')
         self.__extinction_coeff_sum_along_d._data_period = self._data_period
     return self.__extinction_coeff_sum_along_d
Пример #18
0
    def split_revolutions(self,
                          peaks='l',
                          time_delta=(5, 20),
                          revolution_period=26.):
        """This function reorganizes the miniSASP data in a way that all sun transits are stacked on top of eachother
        and the time is translated to an angle"""

        ulr = self.copy()
        # star = 10000
        # till = 20000
        # ulr.data = ulr.data[star:till]

        if peaks == 's':
            peaks_s = ulr.find_peaks()
        elif peaks == 'l':
            peaks_s = ulr.find_peaks(which='long')

        time_delta_back = time_delta[0]
        time_delta_forward = time_delta[1]

        #     wls = ['460.3', '550.4',  '671.2', '860.7']
        photos = [
            ulr.data.PhotoA, ulr.data.PhotoB, ulr.data.PhotoC, ulr.data.PhotoD
        ]
        out_dict = {}
        for u, i in enumerate(ulr.channels):

            centers = peaks_s.data[str(i)].dropna().index.values

            #     res = []
            df = pd.DataFrame()
            PAl = photos[u]
            for e, center in enumerate(centers):
                # center = peaks_s.data['460.3'].dropna().index.values[1]
                start = center - np.timedelta64(time_delta_back, 's')
                end = center + np.timedelta64(time_delta_forward, 's')
                PAlt = PAl.truncate(before=start, after=end, copy=True)
                PAlt.index = PAlt.index - center
                PAlt = PAlt[
                    PAlt !=
                    0]  # For some reasons there are values equal to 0 which would screw up the averaging I intend to do
                #         res.append(PAlt)
                df[center] = PAlt.resample('50ms')
            df.index = (df.index.values -
                        np.datetime64('1970-01-01T00:00:00.000000000Z')
                        ) / np.timedelta64(1, 's')
            df.index = df.index.values / revolution_period * 2 * np.pi
            out = timeseries.TimeSeries(df.transpose())
            out_dict[i] = out
        return out_dict
Пример #19
0
    def _concat(self, arm_data_objs, close_gaps=True):
        for att in self._concatable:
            first_object = getattr(arm_data_objs[0], att)
            which_type = type(first_object).__name__
            data_period = first_object._data_period
            if which_type == 'TimeSeries_2D':
                value = _timeseries.TimeSeries_2D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'TimeSeries':
                value = _timeseries.TimeSeries(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'AMS_Timeseries_lev01':
                value = _AMS.AMS_Timeseries_lev01(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            elif which_type == 'SizeDist_TS':
                # value = _AMS.AMS_Timeseries_lev01(pd.concat([getattr(i, att).data for i in arm_data_objs]))
                data = _pd.concat(
                    [getattr(i, att).data for i in arm_data_objs])
                value = _sizedistribution.SizeDist_TS(
                    data,
                    getattr(arm_data_objs[0], att).bins,
                    'dNdlogDp',
                    ignore_data_gap_error=True,
                )
            elif which_type == 'TimeSeries_3D':
                value = _timeseries.TimeSeries_3D(
                    _pd.concat([getattr(i, att).data for i in arm_data_objs]))
            else:
                raise TypeError(
                    '%s is not an allowed type here (TimeSeries_2D, TimeSeries)'
                    % which_type)

            if hasattr(first_object, 'availability'):
                try:
                    avail_concat = _pd.concat([
                        getattr(i, att).availability.availability
                        for i in arm_data_objs
                    ])
                    avail = Data_Quality(None, avail_concat, None,
                                         first_object.flag_info)
                    value.availability = avail
                except:
                    _warnings.warn(
                        'availability could not be concatinated make sure you converted it to a pandas frame at some point!'
                    )
            value._data_period = data_period
            if close_gaps:
                setattr(self, att, value.close_gaps())
            else:
                setattr(self, att, value)
Пример #20
0
    def aod2angstrom_exponent(
        self,
        column_1=500,
        column_2=870,
        use_wavelength_from_column_names=None,
        # wavelength_1=None, wavelength_2=None
    ):
        """
        Calculates the angstrom exponents based on the AOD data.

        Parameters
        ----------
        column_1: type of column name
            column name of one of the two points used for the AOD calculation
        column_2: type of column name
            column name of the other of the two points used for the AOD calculation
        use_wavelength_from_column_names: bool [None]
            When the wavelength dictionary is set. Wavelengths from the dictionary are used instead of column names.
            Set this kwarg to True to ignore the wavelengths dictionary and use column names instead.

        Parameters (deprecated)
        -----------------------
        wavelength_1: float
            if the column name of column_1 is not accurate enough set the wavelenth used to calculate AOD here.
        wavelength_2: float
            as above for column_2

        Returns
        -------

        """
        if isinstance(self.wavelengths,
                      type(None)) or use_wavelength_from_column_names:
            # if wavelength_1 == None:
            wavelength_1 = column_1
            # if wavelength_2 == None:
            wavelength_2 = column_2
        else:
            wavelength_1 = self.wavelengths[column_1]
            wavelength_2 = self.wavelengths[column_2]
        c1 = column_1
        c2 = column_2
        c1ex = wavelength_1
        c2ex = wavelength_2
        out = -_np.log10(self.AOD.data.loc[:, c1] /
                         self.AOD.data.loc[:, c2]) / _np.log10(c1ex / c2ex)
        out = _timeseries.TimeSeries(_pd.DataFrame(out))
        setattr(self, 'ang_exp_{}_{}'.format(column_1, column_2), out)
        return out
Пример #21
0
    def optical_depth_amf(self):
        """OD * airmassfactor + unkonwn offset. after determining the offset you might want to set the property sup_offsets"""
        if not self.__od_amf_orig:
            # if not self.optical_depth_amf_offsets['460.3']:
            #     txt = 'please define an od offset (miniSASP only measures relative od) by setting optical_depth_amf_offset'
            #     raise AttributeError(txt)
            self.__od_amf_orig = timeseries.TimeSeries(
                -1 * np.log(self.sun_intensities.data))
            self.__od_amf_orig._data_period = self.sun_intensities._data_period

        if self.optical_depth_amf_offsets != self.__od_afm_offset_last:
            self.__od_amf = self.__od_amf_orig
            cols = self.__od_amf.data.columns
            for e, col in enumerate(cols):
                self.__od_amf.data[col] += self.optical_depth_amf_offsets[col]
        return self.__od_amf
Пример #22
0
def _read_file(fname):
    picof = open(fname, 'r')
    header = picof.readline()
    picof.close()

    header = header.split(' ')
    header_cleaned = []

    for head in header:
        bla = head.replace('<', '').replace('>', '')
        where = bla.find('[')
        if where != -1:
            bla = bla[:where]
        header_cleaned.append(bla)

    data = pd.read_csv(fname,
                       names=header_cleaned,
                       sep=' ',
                       skiprows=1,
                       header=0)

    data.drop(range(20), inplace=True
              )  # dropping the first x lines, since the time is often dwrong

    time_series = data.Year.astype(str) + '-' + data.Month.apply(
        lambda x: '%02i' % x) + '-' + data.Day.apply(
            lambda x: '%02i' % x) + ' ' + data.Hours.apply(
                lambda x: '%02i' % x) + ':' + data.Minutes.apply(
                    lambda x: '%02i' % x) + ':' + data.Seconds.apply(
                        lambda x: '%05.2f' % x)
    data.index = pd.Series(
        pd.to_datetime(time_series, format=time_tools.get_time_formate()))

    _drop_some_columns(data)

    # convert from rad to deg
    data.Lat.values[:] = np.rad2deg(data.Lat.values)
    data.Lon.values[:] = np.rad2deg(data.Lon.values)

    data['Altitude'] = data['Height']
    data = data.drop('Height', axis=1)

    data.sort_index(inplace=True)

    return timeseries.TimeSeries(data, {'original header': header})
Пример #23
0
    def _parse_netCDF(self):
        "returns a dictionary, with panels in it"
        super(ArmDatasetSub,self)._parse_netCDF()


        size_bins = self._read_variable('size_bins')['data'] * 1000
        df = pd.DataFrame(self._read_variable('RH_interDMA')['data'], index = self.time_stamps, columns=size_bins)
        df.columns.name = 'size_bin_center_nm'
        self.RH_interDMA = timeseries.TimeSeries(df)
        self.RH_interDMA._data_period = self._data_period

        data = self._read_variable('hyg_distributions')['data']
        growthfactors = self._read_variable('growthfactors')['data']
        data = pd.Panel(data, items= self.time_stamps, major_axis = size_bins, minor_axis = growthfactors)
        data.major_axis.name = 'size_bin_center_nm'
        data.minor_axis.name = 'growthfactors'
        self.hyg_distributions = timeseries.TimeSeries_3D(data)
        self.hyg_distributions._data_period = self._data_period
Пример #24
0
def _read_files(folder, files, verbose):
    def read_data(folder, filename, header=None):
        """Reads the file takes care of the timestamp and returns a Dataframe
        """
        if not header:
            header = _read_header(folder, filename)
        dateparse = lambda x: datetime.datetime.strptime(
            x, "%d:%m:%Y %H:%M:%S")
        df = _pd.read_csv(
            folder + '/' + filename,
            skiprows=header['header_size'],
            #                      na_values=['N/A'],
            parse_dates={'times': [0, 1]},
            date_parser=dateparse)

        df = df.set_index('times')

        return df

    if verbose:
        print('Reading files:')
    data_list = []
    header_first = _read_header(folder, files[0])
    for fname in files:
        if verbose:
            print('\t{}'.format(fname), end=' ... ')
        header = _read_header(folder, fname)
        # make sure that all the headers are identical
        assert (header_first == header)
        data = read_data(folder, fname, header=header)
        data_list.append(data)
        if verbose:
            print('done')

    # concatinate and sort Dataframes and create Timeseries instance
    data = _pd.concat(data_list)  #, sort=True)
    data.sort_index(inplace=True)
    data[data == -999.0] = _np.nan
    data = _timeseries.TimeSeries(data, sampling_period=15 * 60)
    data.header = header_first
    if verbose:
        print('done')
    return data
Пример #25
0
    def _read_variable2timeseries(self,
                                  variable,
                                  column_name=False,
                                  reverse_qc_flag=False):
        """
        Reads the specified variables and puts them into a timeseries.

        Parameters
        ----------
        variable: string or list of strings
            variable names
        column_name: bool or string
            this is a chance to give unites. This will also be the y-label if data
            is plotted

        Returns
        -------
        pandas.DataFrame

        """

        if type(variable).__name__ == 'str':
            variable = [variable]

        df = _pd.DataFrame(index=self.time_stamps)
        for var in variable:
            variable_out = self._read_variable(var,
                                               reverse_qc_flag=reverse_qc_flag)
            # if var == 'ratio_85by40_Bbs_R_10um_2p':
            #     import pdb
            #     pdb.set_trace()
            df[var] = _pd.Series(variable_out['data'], index=self.time_stamps)
        if column_name:
            df.columns.name = column_name
        out = _timeseries.TimeSeries(df)
        if column_name:
            out._y_label = column_name

        out._data_period = self._data_period
        out.availability = Data_Quality(self, variable_out['availability'],
                                        variable_out['availability_type'])
        # out.availability_type = variable_out['availability_type']
        return out
Пример #26
0
def read_csv(fname, temperature_limits=(-20, -0.5)):
    """
    Arguments
    ---------
    temerature_limits: tuple.
        The temperature reading has false readings in it which can cause porblems later"""
    df = _pd.read_csv(fname, sep='\t')



    _pandas_tools.ensure_column_exists(df, 'DateTime', _date_time_alts)
    _pandas_tools.ensure_column_exists(df, 'Pressure_Pa', _pressure_alt)
    _pandas_tools.ensure_column_exists(df, 'Temperature', _temp_alt)
    _pandas_tools.ensure_column_exists(df, 'Relative_humidity', _RH_alt)
    _pandas_tools.ensure_column_exists(df, 'Temperature_instrument', _temp_payload_alt, raise_error=False)
    _pandas_tools.ensure_column_exists(df, 'CN_concentration', _cn_concentration_alt, raise_error=False)
    try:
        # df.Temperature_payload = df.Temperature_payload.astype(float)
        df.Temperature_instrument = _pd.to_numeric(df.Temperature_instrument, errors='coerce')
        df.CN_concentration = _pd.to_numeric(df.CN_concentration, errors='coerce')

        df.CONCN = _pd.to_numeric(df.CONCN, errors='coerce')
        df.COUNT = _pd.to_numeric(df.COUNT, errors='coerce')
    except AttributeError:
        pass

    # return df
    df.index = _pd.Series(_pd.to_datetime(df.DateTime, format='%Y-%m-%d %H:%M:%S'))


    df = df.drop('DateTime', axis=1)

    df = df.sort_index()

    if temperature_limits:
        df = df[df.Temperature > temperature_limits[0]]
        df = df[temperature_limits[1] > df.Temperature]


    hk = _timeseries.TimeSeries(df)
    hk._data_period = 2
    return hk
Пример #27
0
    def f_RH_scatt_3p(self):
        """Note, when calculating a f(RH) with this function it has a mysterious off set in it.
        When you plan is to calculate f(RH) between 80 and 40 you actually have to apply this function for
        both values and than divide."""
        if not self.__f_RH_scatt_3p:
            if not self.sup_RH:
                raise ValueError('please set the relative humidity in sup_RH')

            def applyfunk(value):
                if type(value).__name__ == 'function':
                    return value(self.sup_RH)
                else:
                    return _np.nan

            # data = self.f_RH_scatt_funcs.data.applymap(lambda x: x(self.sup_RH))
            data = self.f_RH_scatt_funcs_3p.data.applymap(applyfunk)
            # data = _pd.DataFrame(data, columns=['f_%i'%(self.sup_RH)])
            self.__f_RH_scatt_3p = _timeseries.TimeSeries(data)
            self.__f_RH_scatt_3p._data_period = self.f_RH_scatt_funcs_3p._data_period
        return self.__f_RH_scatt_3p
Пример #28
0
def zdanovskii_stokes_robinson(data, which = 'refractive_Index'):
    """(Stokes and Robinson,1966)
    Arguments
    ---------
    data: pandas dataframe
        containing chemical composition data
    which: str
        which property to mix ['refractive_Index', 'density', 'kappa_chem']
        """
    materials = _properties.get_commen()
    materials.index = materials.species_name

    essential_elcts = ['ammonium_sulfate',
                                   'ammonium_nitrate',
                                   'ammonium_chloride',
                                   'sodium_chloride',
                                   'sodium_sulfate',
                                   'sodium_nitrate',
                                   'calcium_nitrate',
                                   'calcium_chloride',
                                   'organic_aerosol'
                                  ]

    electrolytes = materials.loc[essential_elcts]
    electrolytes = electrolytes[['refractive_Index', 'density', 'kappa_chem']]

    _pandas_tools.ensure_column_exists(data, 'organic_aerosol', col_alt = ['total_organics'] )

    for e in essential_elcts:
        _pandas_tools.ensure_column_exists(data, e)

    tobemixed = electrolytes[which]
    # _pdb.set_trace()
    numerator = (data * tobemixed / electrolytes.density).sum(axis=1)
    denominator = (data / electrolytes.density).sum(axis=1)
    mixed = numerator/denominator
    df = _pd.DataFrame(mixed, columns=[which])
    ts = _timeseries.TimeSeries(df)
    return ts
Пример #29
0
    def _read_variable2timeseries(self,
                                  variable,
                                  column_name=False,
                                  reverse_qc_flag=False):
        """
        Reads the specified variables and puts them into a timeseries.

        Parameters
        ----------
        variable: string or list of strings
            variable names
        column_name: bool or string
            this is a chance to give unites. This will also be the y-label if data
            is plotted

        Returns
        -------
        pandas.DataFrame

        """

        if type(variable).__name__ == 'str':
            variable = [variable]

        df = _pd.DataFrame(index=self.time_stamps)
        for var in variable:
            data = self._read_variable(var, reverse_qc_flag=reverse_qc_flag)
            df[var] = _pd.Series(data, index=self.time_stamps)
        if column_name:
            df.columns.name = column_name
        out = _timeseries.TimeSeries(df)
        if column_name:
            out._y_label = column_name

        out._data_period = self._data_period
        return out
Пример #30
0
    def _parse_netCDF(self):
        super(ArmDatasetSub, self)._parse_netCDF()

        # self.rh = self._read_variable2timeseries(['rh_60m', 'rh_60m'], column_name='Relative Humidity (%)')

        # for the 2 parameter function
        def ab_2_f_RH_func(ab):
            ab = ab.copy()
            a, b = ab
            # a = 1. # I was just told that a is supposed to be set to one from Ann (upstairs)
            f_RH = lambda RH: a * (1 - (RH / 100.))**(
                -b)  # 'bsp(RH%)/Bsp(~40%) = a*[1-(RH%/100)]^(-b)'
            return f_RH

        varies = [
            'fRH_Bs_R_10um_2p', 'fRH_Bs_G_10um_2p', 'fRH_Bs_B_10um_2p',
            'fRH_Bs_R_1um_2p', 'fRH_Bs_G_1um_2p', 'fRH_Bs_B_1um_2p'
        ]

        df = _pd.DataFrame(index=self.time_stamps)
        df_ab = _pd.DataFrame(index=self.time_stamps)
        for key in varies:
            data = self._read_variable(key, reverse_qc_flag=8)
            dft = _pd.DataFrame(data['data'], index=self.time_stamps)
            df[key] = dft.apply(ab_2_f_RH_func, axis=1)
            if key == 'fRH_Bs_G_1um_2p':
                self.f_RH_scatt_2p_ab_G_1um = _timeseries.TimeSeries(
                    _pd.DataFrame(dft))
                self.f_RH_scatt_2p_ab_G_1um._data_period = self._data_period

        self.f_RH_scatt_funcs_2p = _timeseries.TimeSeries(df)
        self.f_RH_scatt_funcs_2p._data_period = self._data_period

        #for the 3 parameter function
        def abc_2_f_RH_func(abc):
            abc = abc.copy()
            a, b, c = abc
            # a = 1.
            f_RH = lambda RH: a * (1 + (b * (RH / 100.)**c))
            return f_RH

        varies = [
            'fRH_Bs_R_10um_3p', 'fRH_Bs_G_10um_3p', 'fRH_Bs_B_10um_3p',
            'fRH_Bs_R_1um_3p', 'fRH_Bs_G_1um_3p', 'fRH_Bs_B_1um_3p'
        ]

        df = _pd.DataFrame(index=self.time_stamps)
        for key in varies:
            data = self._read_variable(key, reverse_qc_flag=8)
            dft = _pd.DataFrame(data['data'], index=self.time_stamps)
            df[key] = dft.apply(abc_2_f_RH_func, axis=1)
        self.f_RH_scatt_funcs_3p = _timeseries.TimeSeries(df)
        self.f_RH_scatt_funcs_3p._data_period = self._data_period

        # f or RH at predifined point
        varies = [
            'ratio_85by40_Bs_R_10um_2p', 'ratio_85by40_Bs_G_10um_2p',
            'ratio_85by40_Bs_B_10um_2p', 'ratio_85by40_Bs_R_1um_2p',
            'ratio_85by40_Bs_G_1um_2p', 'ratio_85by40_Bs_B_1um_2p'
        ]

        self.f_RH_scatt_2p_85_40 = self._read_variable2timeseries(
            varies, reverse_qc_flag=8)

        varies = [
            'ratio_85by40_Bs_R_10um_3p', 'ratio_85by40_Bs_G_10um_3p',
            'ratio_85by40_Bs_B_10um_3p', 'ratio_85by40_Bs_R_1um_3p',
            'ratio_85by40_Bs_G_1um_3p', 'ratio_85by40_Bs_B_1um_3p'
        ]

        self.f_RH_scatt_3p_85_40 = self._read_variable2timeseries(
            varies, reverse_qc_flag=8)

        varies = [
            'ratio_85by40_Bbs_R_10um_2p', 'ratio_85by40_Bbs_G_10um_2p',
            'ratio_85by40_Bbs_B_10um_2p', 'ratio_85by40_Bbs_R_1um_2p',
            'ratio_85by40_Bbs_G_1um_2p', 'ratio_85by40_Bbs_B_1um_2p'
        ]

        self.f_RH_backscatt_2p_85_40 = self._read_variable2timeseries(
            varies, reverse_qc_flag=8)