Пример #1
0
    def create_compound_test_data_set(response_variable, explanatory_variables,
                                      explanatory_ranges):
        """Create a test data set for CompoundLinearModel"""

        number_of_obs = 10

        data_set = create_linear_model_test_data_set(response_variable,
                                                     explanatory_variables[0],
                                                     number_of_obs,
                                                     explanatory_ranges[0])

        for i in range(1, len(explanatory_variables)):
            tmp_data_set = create_linear_model_test_data_set(
                response_variable, explanatory_variables[i], number_of_obs,
                explanatory_ranges[i])
            tmp_df = tmp_data_set.get_data()
            tmp_range_index_start = data_set.get_data().index[-1] + 1
            tmp_range_index_stop = tmp_range_index_start + number_of_obs
            tmp_df.set_index(pd.RangeIndex(tmp_range_index_start,
                                           tmp_range_index_stop),
                             inplace=True)
            tmp_data_set = DataManager(tmp_df, tmp_data_set.get_origin())
            data_set = data_set.add_data_manager(tmp_data_set)

        return data_set
Пример #2
0
 def test_deepcopy(self):
     """Test the deepcopy functionality of instances of the DataManager class"""
     df = create_random_dataframe()
     dm1 = DataManager(df)
     dm2 = copy.deepcopy(dm1)
     self.assertTrue(dm1.equals(dm2))
     self.assertIsNot(dm1, dm2)
Пример #3
0
    def __init__(self,
                 constituent_df,
                 surrogate_df,
                 model_list,
                 min_samples=30,
                 max_extrapolation=0.1,
                 match_time=30,
                 p_thres=0.05):
        """ Initialize a HierarchicalModel


        :param constituent_data:
        :type constituent_data: DataManager
        :param surrogate_data:
        :type surrogate_data: DataManger
        :param model_list:
        """
        #HierarchicalModel.pad_data(surrogate_df)
        self._surrogate_data = DataManager(surrogate_df)
        self._constituent_data = DataManager(constituent_df)

        self._model_list = model_list

        self.match_time = match_time
        self.max_extrapolation = max_extrapolation
        self.min_samples = min_samples
        self.p_thres = p_thres

        self._create_models()
Пример #4
0
    def calculate_acoustic_parameters(self, processing_parameters):
        """

        :param processing_parameters: 
        :return: 
        """

        # calculate sediment attenuation coefficient and mean sediment corrected backscatter
        self._measured_backscatter_data = self._raw_backscatter_data.calc_measured_backscatter(
            processing_parameters)
        self._water_corrected_backscatter_data = \
            WaterCorrectedBackscatterData.calc_water_corrected_backscatter(self._measured_backscatter_data)
        self._sediment_attenuation_coefficient = \
            SedimentAttenuationCoefficient.calc_sediment_attenuation_coefficient(self._water_corrected_backscatter_data)
        self._sediment_corrected_backscatter_data = \
            SedimentCorrectedBackscatterData.calc_sediment_corrected_backscatter(self._water_corrected_backscatter_data,
                                                                                 self._sediment_attenuation_coefficient)

        mean_sediment_corrected_backscatter = \
            self._sediment_corrected_backscatter_data.calc_mean_sediment_corrected_backscatter()

        mean_scb_data_origin = create_origin_from_data_frame(
            mean_sediment_corrected_backscatter.get_data(),
            self._sediment_corrected_backscatter_data.get_origin())
        acoustic_parameter_dm = DataManager(
            mean_sediment_corrected_backscatter.get_data(),
            mean_scb_data_origin)

        # add the sediment corrected backscatter to the
        self._acoustic_parameters = acoustic_parameter_dm.add_data(
            self._sediment_attenuation_coefficient.get_sac(),
            self._sediment_attenuation_coefficient.get_origin())

        return self._acoustic_parameters
Пример #5
0
    def calc_measured_backscatter(self, processing_parameters):
        """Returns the measured backscatter based on requirements in processing_parameters.

        :param processing_parameters:
        :return:
        """

        # get the measured backscatter data
        measured_backscatter_df = self._calc_measured_backscatter(
            self._data_manager.get_data(), self._configuration_parameters,
            processing_parameters)

        # add the vertical beam and temperature data
        measured_backscatter_origin = create_origin_from_data_frame(
            measured_backscatter_df, self._data_manager.get_origin())
        measured_backscatter_data_manager = DataManager(
            measured_backscatter_df, measured_backscatter_origin)
        measured_backscatter_data_manager = measured_backscatter_data_manager.add_data(
            self.get_variable('Temp'), self.get_variable_origin('Temp'))
        measured_backscatter_data_manager = \
            measured_backscatter_data_manager.add_data(self.get_variable('Vbeam'), self.get_variable_origin('Vbeam'))

        return MeasuredBackscatterData(measured_backscatter_data_manager,
                                       self._configuration_parameters,
                                       processing_parameters,
                                       self.get_cell_range())
Пример #6
0
    def test_get_data_float_index_specify_step(self):
        """Test DataManager.get_data() a float type data index and the step parameter specified"""

        data_start = 0
        data_stop = 10
        index_start = 0.
        index_stop = 10.
        columns = ['x', 'y']
        num_rows = 6

        df1 = create_linspace_dataframe(data_start, data_stop, index_start, index_stop, columns, num_rows)
        dm = DataManager(df1)

        num_rows_1 = num_rows
        step_1 = (index_stop - index_start) / (num_rows_1 - 1)
        results_1 = dm.get_data(index_step=step_1)

        # with the same step, the DataFrames should be equal
        pd.testing.assert_frame_equal(results_1, df1)

        # get a linearly created DataFrame with twice the amount of rows, interpolate at the correct step, and
        # compare the results
        num_rows_2 = 2*num_rows_1 - 1
        df2 = create_linspace_dataframe(data_start, data_stop, index_start, index_stop, columns, num_rows * 2 - 1)
        step_2 = (index_stop - index_start) / (num_rows_2 - 1)
        results_2 = dm.get_data(index_step=step_2)

        pd.testing.assert_frame_equal(results_2, df2)
Пример #7
0
    def test_get_data(self):
        """A simple test case of the DataManager.get_data method"""

        # create a DataManager from a DataFrame with random data
        data_df = create_random_dataframe()
        data_manager = DataManager(data_df)

        # get data from the DataManager
        results_df = data_manager.get_data()

        # test if the data manager returns a copy of the DataFrame
        pd.testing.assert_frame_equal(data_manager.get_data(), data_df)
        self.assertIsNot(results_df, data_df)
Пример #8
0
def create_linear_model_test_data_set(response_variable,
                                      explanatory_variables,
                                      number_of_obs=50,
                                      explanatory_range=(0.01, 10)):
    """

    :param response_variable:
    :param explanatory_variables:
    :param number_of_obs:
    :param explanatory_range:
    :return:
    """

    # find the raw explanatory variables and create a random DataFrame with the number of raw explanatory variables
    raw_explanatory_variables = list(
        set([
            raw_var for _, raw_var in
            [find_raw_variable(var) for var in explanatory_variables]
        ]))
    explanatory_data = np.random.uniform(explanatory_range[0],
                                         explanatory_range[1],
                                         size=(number_of_obs,
                                               len(raw_explanatory_variables)))
    explanatory_df = pd.DataFrame(data=explanatory_data,
                                  columns=raw_explanatory_variables)

    # get an exogenous DataFrame to calculate the response variable
    exog_df = get_exog_df(explanatory_df, explanatory_variables)

    # get the beta vector and error term
    number_of_parameters = len(explanatory_variables) + 1
    beta_vector = np.random.uniform(1, 10, size=(number_of_parameters, 1))
    error_term = np.random.normal(0, 0.1, size=(number_of_obs, 1))

    # calculate the response variable and create a DataFrame
    response_transform, raw_response_variable = find_raw_variable(
        response_variable)
    response_inverse_transform = INVERSE_TRANSFORM_FUNCTIONS[
        response_transform]
    response_data = response_inverse_transform(
        np.dot(exog_df, beta_vector) + error_term)
    response_df = pd.DataFrame(data=response_data,
                               columns=[raw_response_variable])

    # create a DataFrame containing response and explanatory data
    test_data_df = pd.concat([response_df, explanatory_df], axis=1)
    test_data_origin = DataManager.create_data_origin(test_data_df, __file__)

    # return a DataManager with the regression data
    return DataManager(test_data_df, test_data_origin)
Пример #9
0
    def add_data(self, other, keep_curr_obs=None):
        """

        :param other: ProcessedADVMSedimentData
        :param keep_curr_obs:
        :return:
        """

        if not self._configuration_parameters.is_compatible(other.get_configuration_parameters()) and \
                self._processing_parameters.is_compatible(other.get_processing_parameters()) and \
                isinstance(other, type(self)):
            raise BackscatterDataIncompatibleException(
                "Backscatter data sets are incompatible")

        # other_data_manager = other.get_data_manager()
        other_data = other.get_data()
        other_origin = other.get_origin()
        other_data_manager = DataManager(other_data, other_origin)

        combined_data_manager = self._data_manager.add_data_manager(
            other_data_manager, keep_curr_obs=keep_curr_obs)

        return type(self)(combined_data_manager,
                          self._configuration_parameters,
                          self._processing_parameters)
Пример #10
0
    def calc_sediment_corrected_backscatter(cls, water_corrected_backscatter,
                                            sediment_attenuation_coefficient):
        """Calculate the sediment corrected backscatter from the water corrected backscatter

        :param water_corrected_backscatter:
        :param sediment_attenuation_coefficient:
        :return:
        """

        wcb_df = water_corrected_backscatter.get_backscatter_data()
        cell_range_df = water_corrected_backscatter.get_cell_range()

        sac_df = sediment_attenuation_coefficient.get_sac()

        configuration_parameters = water_corrected_backscatter.get_configuration_parameters(
        )
        sediment_corrected_backscatter_df = cls._calc_sediment_corrected_backscatter(
            wcb_df, sac_df, configuration_parameters, cell_range_df)

        water_corrected_backscatter_origin = water_corrected_backscatter.get_origin(
        )
        data_origin = create_origin_from_data_frame(
            sediment_corrected_backscatter_df,
            water_corrected_backscatter_origin)

        data_manager = DataManager(sediment_corrected_backscatter_df,
                                   data_origin)

        return cls(data_manager,
                   water_corrected_backscatter.get_configuration_parameters(),
                   water_corrected_backscatter.get_processing_parameters(),
                   water_corrected_backscatter.get_cell_range())
Пример #11
0
    def add_data(self, other, keep_curr_obs=None):
        """Adds other RawBackscatterData instance to self.

        Throws exception if other RawBackscatterData object is incompatible with self. An exception will be raised if
        keep_curr_obs=None and concurrent observations exist for variables.

        :param other: RawBackscatterData object to be added
        :type other: RawBackscatterData
        :param keep_curr_obs: {None, True, False} Flag to indicate whether or not to keep current observations.
        :return: Merged RawBackscatterData object
        """

        # test compatibility of other data set
        if not self._configuration_parameters.is_compatible(other.get_configuration_parameters()) and \
                isinstance(other, type(self)):

            raise BackscatterDataIncompatibleException(
                "ADVM data sets are incompatible")

        other_data = other.get_data()
        other_origin = other.get_origin()

        other_data_manager = DataManager(other_data, other_origin)

        combined_data_manager = self._data_manager.add_data_manager(
            other_data_manager, keep_curr_obs=keep_curr_obs)

        return type(self)(combined_data_manager,
                          self._configuration_parameters)
Пример #12
0
    def test_get_data_datetime_index_specify_step(self):
        """Test DataManager.get_data() with a DateTimeIndex type data index and step parameter specified"""

        data_start = 0.
        data_stop = 10.
        index_start = np.datetime64('2018-01-01')
        index_step = np.timedelta64(15*60*1000, 'ms')
        num_rows = 7
        index_stop = index_start + index_step * num_rows
        columns = ['x', 'y']

        df1 = create_linspace_dataframe(data_start, data_stop, index_start, index_stop, columns, num_rows)
        dm = DataManager(df1)
        pd.testing.assert_frame_equal(dm.get_data(), df1)

        num_rows_2 = 12
        df2 = create_linspace_dataframe(data_start, data_stop, index_start, index_stop, columns, num_rows_2)
        index_step = df2.index[1] - df2.index[0]
        pd.testing.assert_frame_equal(dm.get_data(index_step=index_step), df2)
Пример #13
0
    def calc_water_corrected_backscatter(cls, measured_backscatter):
        """Calculate the water corrected backscatter from the measured backscatter

        :param measured_backscatter:
        :return:
        """

        water_corrected_backscatter_df = cls._calc_water_corrected_backscatter(
            measured_backscatter)
        water_corrected_backscatter_origin = create_origin_from_data_frame(
            water_corrected_backscatter_df, measured_backscatter.get_origin())
        wcb_data_manager = DataManager(water_corrected_backscatter_df,
                                       water_corrected_backscatter_origin)
        wcb_data_manager = wcb_data_manager.add_data(
            measured_backscatter.get_variable('Temp'),
            measured_backscatter.get_variable_origin('Temp'))

        return cls(wcb_data_manager,
                   measured_backscatter.get_configuration_parameters(),
                   measured_backscatter.get_processing_parameters(),
                   measured_backscatter.get_cell_range())
Пример #14
0
    def test_init_read_tab_delimited_datetime_index_datetime(self):
        test_data_file_path_results = os.path.join(current_path, 'data', 'model', 'TestDataframeTimestamp',
                                                   'test_timestamp_results.txt')

        test_data_file_path_test = os.path.join(current_path, 'data', 'model', 'TestDataframeTimestamp',
                                                'test_timestamp_datetime.txt')

        df = pd.read_table(test_data_file_path_results, sep='\t',)
        df = df.set_index(pd.DatetimeIndex(df['DateTime']))
        df = df.drop(df.columns[0], axis=1)
        data_manager = DataManager.read_tab_delimited_data(test_data_file_path_test)

        pd.testing.assert_frame_equal(data_manager.get_data(), df)
Пример #15
0
def create_origin_from_data_frame(acoustic_df, data_origin):
    """Create a new origin from information in data_origin describing the variables in acoustic_df"""

    data_sources = set(data_origin['origin'])

    new_data_origin = pd.DataFrame(columns=['variable', 'origin'])

    for source in data_sources:
        tmp_origin = DataManager.create_data_origin(acoustic_df, source)
        new_data_origin = new_data_origin.append(tmp_origin)

    new_data_origin.reset_index(drop=True, inplace=True)

    return new_data_origin
Пример #16
0
    def test_to_hdf_path(self):
        """Test the DataManager.to_hdf() and DataManager.read_hdf() methods when saving with a file path"""

        key = '/dm'

        data = create_random_dataframe()
        data_origin = DataManager.create_data_origin(data, 'test')
        dm1 = DataManager(data, data_origin)
        dm1.to_hdf(self.temp_hdf_path, key)

        dm2 = DataManager.read_hdf(self.temp_hdf_path, key)

        self.assertTrue(dm1.equals(dm2))
Пример #17
0
    def test_add_data_manager_simple(self):
        """Test a simple case of DataManager.add_data_manager() usage"""

        # create a data manager with random data
        df = create_random_dataframe(number_of_rows=50)
        dm = DataManager(df)

        # split the data up and add the results together
        dm1 = DataManager(df.iloc[:25])
        dm2 = DataManager(df.iloc[25:])
        dm_add_result = dm1.add_data_manager(dm2)

        # test that the original data set and the add result are equal
        self.assertTrue(dm.equals(dm_add_result))
Пример #18
0
    def _init_test_model(self, test_case_parameters):
        """Initialize and return a test model"""

        # get the test case name
        test_case_df = self._load_test_case_data()
        expected_fitted_results = test_case_df.filter(regex='Fitted *')

        # drop the fitted values and create a data set to pass to the LinearModel
        data_set_df = test_case_df.drop(expected_fitted_results.keys(), axis=1)
        data_set = DataManager(data_set_df)

        # initialize a model without specifying the response and explanatory variables
        model = test_case_parameters['test_class'](
            data_set, **test_case_parameters['init_kwargs'])

        return model
Пример #19
0
    def from_advm_data(cls, advm_data):
        """"""

        advm_df = advm_data.get_data()
        backscatter_df = advm_df.filter(regex=cls._bs_data_columns_regex)
        backscatter_origin = advm_data.get_origin()
        backscatter_data_manager = DataManager(backscatter_df,
                                               backscatter_origin)

        # apply slant angle correction
        configuration_parameters = advm_data.get_configuration_parameters()
        slant_angle = configuration_parameters['Slant Angle']
        cell_range_df = advm_data.get_cell_range() / np.cos(
            np.radians(slant_angle))

        return cls(backscatter_data_manager, configuration_parameters,
                   cell_range_df)
Пример #20
0
    def test_to_hdf_buf(self):
        """Test the DataManager.to_hdf() and DataManager.read_hdf() methods when saving with a pd.HDFStore instance"""

        key = '/dm/'

        data = create_random_dataframe()
        data_origin = DataManager.create_data_origin(data, 'test')
        dm1 = DataManager(data, data_origin)

        with pd.HDFStore(self.temp_hdf_path) as store:
            dm1.to_hdf(store, key)

        with pd.HDFStore(self.temp_hdf_path) as store:
            dm2 = DataManager.read_hdf(store, key)

        self.assertTrue(dm1.equals(dm2))
Пример #21
0
    def test_init_read_tab_delimited_file_no_datetime(self):
        """Test the initialization of a DataManager instance using read_tab_delimited_data() from a file with no
        date/time information
        """

        # pick a data file from the model test data set
        test_data_file_path = os.path.join(current_path, 'data', 'model',
                                           'TestMultipleOLSModelInit', 'test_model_init.txt')

        # read the file into a DataManager
        data_manager = DataManager.read_tab_delimited_data(test_data_file_path)

        # read the file into a DataFrame
        df = pd.read_table(test_data_file_path, sep='\t')
        df.sort_index(axis=1, inplace=True)

        # make sure the DataManager's data and DataFrame are equal
        pd.testing.assert_frame_equal(data_manager.get_data(), df)
Пример #22
0
    def calc_mean_sediment_corrected_backscatter(self):
        """

        :return:
        """

        sediment_corrected_backscatter = self.get_data()

        mean_sediment_corrected_backscatter = pd.DataFrame(
            sediment_corrected_backscatter.mean(axis=1),
            columns=['MeanSCB'],
            dtype=np.float)
        data_origin = create_origin_from_data_frame(
            mean_sediment_corrected_backscatter, self.get_origin())
        data_manager = DataManager(mean_sediment_corrected_backscatter,
                                   data_origin)

        return ProcessedData(data_manager, self.get_configuration_parameters(),
                             self.get_processing_parameters())
Пример #23
0
    def test_equals(self):
        """Test the DataManager.equals() and __eq__() methods"""
        df1 = create_random_dataframe()
        dm1 = DataManager(df1)
        dm2 = DataManager(df1)

        self.assertTrue(dm1.equals(dm2))
        self.assertTrue(dm1 == dm2)

        df3 = create_random_dataframe()
        dm3 = DataManager(df3)

        self.assertFalse(dm1.equals(dm3))
        self.assertFalse(dm1 == dm3)
        self.assertTrue(dm1 != dm3)
Пример #24
0
    def test_init_without_origin(self):
        """Test the initialization method of DataManager with no origin DataFrame"""
        data_df = create_random_dataframe()
        variable_names = data_df.keys()

        origin_data = [[var, np.nan] for var in variable_names]
        nan_origin_df = pd.DataFrame(data=origin_data, columns=['variable', 'origin'])
        data_manager_without_origin = DataManager(data_df)

        # test that data is being stored correctly
        pd.testing.assert_frame_equal(data_manager_without_origin.get_data(), data_df)
        pd.testing.assert_frame_equal(data_manager_without_origin.get_origin(), nan_origin_df)

        # test that DataFrames aren't the same instance
        self.assertFalse(data_manager_without_origin.get_data() is data_df)
        self.assertFalse(data_manager_without_origin.get_origin() is nan_origin_df)
Пример #25
0
    def calc_sediment_attenuation_coefficient(cls,
                                              water_corrected_backscatter):
        """Calculates the sediment attenuation coefficient

        :param water_corrected_backscatter:
        :return:
        """

        wcb_df = water_corrected_backscatter.get_backscatter_data()
        cell_range_df = water_corrected_backscatter.get_cell_range()

        sediment_attenuation_coefficient = cls._calc_sediment_attenuation_coefficient(
            wcb_df, cell_range_df)
        sac_origin = create_origin_from_data_frame(
            sediment_attenuation_coefficient,
            water_corrected_backscatter.get_origin())
        sac_data_manager = DataManager(sediment_attenuation_coefficient,
                                       sac_origin)

        return cls(sac_data_manager,
                   water_corrected_backscatter.get_configuration_parameters(),
                   water_corrected_backscatter.get_processing_parameters())