示例#1
0
    def test_logistic_regression_wrong_type(self):
        """
        Negative test

        data: array (unsupported type)

        Checks that the function raises a TypeError if the data is passed as
        an array.
        """
        # 1. Arrange
        data = [2, 4, np.nan, 1]
        # 2. Act & 3. Assert
        with self.assertRaises(TypeError):
            logistic_regression(data)
示例#2
0
    def test_logistic_regression_wrong_dependent(self):
        """
        Negative test

        data: Correct data frame (df_breast_cancer)
        dependent: 'z' (not a column of df_breast_cancer)

        Checks that the function raises a ValueError if the column specified as
        the dependent variable doesn't exist in the data.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act & 3. Assert
        with self.assertRaises(ValueError):
            logistic_regression(df, 'z', ['thickness', 'uniformity'])
示例#3
0
    def test_logistic_regression_wrong_predictor(self):
        """
        Negative test

        data: Correct data frame (df_breast_cancer)
        predictors: ['thickness', 'z'] ('z' is not a column of
        df_breast_cancer)

        Checks that the function raises a ValueError if one of the column s
        specified as the predictor variables doesn't exist in the data.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act & 3. Assert
        with self.assertRaises(ValueError):
            logistic_regression(df, 'class', ['thickness', 'z'])
示例#4
0
    def test_logistic_regression_wrong_regressions(self):
        """
        Negative test

        data: Correct data frame (df_breast_cancer)
        regressions: 'z' (not a valid value)

        Checks that the function raises a ValueError if the value passed for
        the parameter regressions is not valid.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act & 3. Assert
        with self.assertRaises(ValueError):
            logistic_regression(df,
                                'class', ['thickness', 'uniformity'],
                                regressions='z')
示例#5
0
    def test_logistic_regression_inplace(self):
        """
        Positive test

        data: Correct data frame (df_breast_cancer)

        The data frame (df_breast_cancer) contains 15 NA values.
        logistic_regression() should impute 7 of them.

        Checks that the data frame contains 8 NA values after the operation.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act
        logistic_regression(df,
                            'class', ['thickness', 'uniformity'],
                            inplace=True)
        # 3. Assert
        self.assertEqual(df.isna().sum().sum(), 8)
示例#6
0
def mice_one_imputation(data):
    """Auxiliary function that performs one MICE imputation, choosing the
    order in which the columns are imputed at random.

    :param data: The data on which to perform the imputation.
    :type data: pandas.DataFrame
    :return: The dataframe with one MICE imputation performed.
    :rtype: pandas.DataFrame
    """
    # This auxiliary function always returns a copy:
    res = data.copy()
    # Save the mask of missing values:
    na_mask = pd.isna(data)
    # Compute the list of columns with missing values
    columns_with_na = []
    for column in data.columns:
        if data[column].isna().any():
            columns_with_na.append(column)
    # Shuffle the list of columns to impute:
    shuffle(columns_with_na)
    # Impute with mean substitution:
    for column in columns_with_na:
        if is_numeric_dtype(data[column]):
            mean_substitution(res, columns=[column], inplace=True)
        else:
            random_sample_imputation(res, columns=[column], inplace=True)
    # Compute which columns are numeric in order to use them as predictors:
    numerics = [col for col in data.columns if is_numeric_dtype(data[col])]
    # Impute each column:
    for column in columns_with_na:
        if is_numeric_dtype(data[column]):
            res.loc[na_mask[column], column] = np.nan
            linear_regression(res, column, predictors=numerics, inplace=True)
        else:
            res.loc[na_mask[column], column] = None
            logistic_regression(res, column, inplace=True)
    return res
示例#7
0
    def test_logistic_regression_returning(self):
        """
        Positive test

        data: Correct data frame (df_breast_cancer)

        The data frame (df_breast_cancer) contains 15 NA values.
        logistic_regression() should impute 7 of them.

        Checks that the original series remains unmodified and that the
        returned series contains 8 NA values.
        """
        # 1. Arrange
        df = generate_df_breast_cancer()
        # 2. Act
        df2 = logistic_regression(df, 'class', ['thickness', 'uniformity'])
        # 3. Assert
        self.assertEqual(df.isna().sum().sum(), 15)
        self.assertEqual(df2.isna().sum().sum(), 8)