Exemplo n.º 1
0
    def test_cluster_numbers(self, tmp_clustered_files):
        # Arrange
        tmpdir = tmp_clustered_files[0]
        repos = tmp_clustered_files[1]
        test_pubs = BaseTestCase.get_test_publications()

        # Act
        Cluster().run(tmpdir)
        files = Base.get_files(tmpdir, include_clustered_files=True)

        # Assert
        # TODO: This assertion is anti-pattern; must be re-implemented in a much better way.
        for file in files:
            publications = Base.get_publications(file)

            checked = False
            for idx, row in publications.iterrows():
                for test_pub in test_pubs:
                    for idx2, row2 in test_pub[0].iterrows():
                        if row.get("Tools") == row2.get("Tools"):
                            assert row.get(
                                CLUSTER_NAME_COLUMN_LABEL) == row2.get(
                                    CLUSTER_NAME_COLUMN_LABEL)
                            checked = True

            assert checked == True
Exemplo n.º 2
0
    def test_ttest_delta(self, tmp_clustered_files):
        # Arrange
        tmpdir, inputs = tmp_clustered_files

        input_files = [x["filename"] for x in inputs]
        exp_avg_pre = [x["exp_values"]["avg_sum_pre"] for x in inputs]
        exp_avg_post = [x["exp_values"]["avg_sum_post"] for x in inputs]
        output_file = os.path.join(tmpdir, "test_ttest_delta_output.csv")

        # Pre-act Assert
        # The output file should not exist before the test runs.
        assert os.path.exists(output_file) == False

        # Act
        TTest().ttest_delta(input_files, output_file)

        # Assert
        assert os.path.exists(output_file)

        output_info = Base.get_publications(output_file)

        # Check if files have header
        assert BaseTestCase.assert_str_list_equal(list(output_info), TTest.TTEST_HEADER)

        # check the value of avg pre citations
        assert BaseTestCase.assert_lists_equal(output_info["Average Pre Citations"], exp_avg_pre)

        # check the value of avg post citations
        assert BaseTestCase.assert_lists_equal(output_info["Average Post Citations"], exp_avg_post)

        # Check if the values exist and are not 0.0 or NaN;
        # the correctness of their values is asserted via other tests.
        for column in output_info:
            if column in ["Repository", "Interpretation", "Growth"]:
                continue

            assert output_info[column].dtype == 'float64'
            for cell in output_info[column]:
                assert cell > 0.0
                assert not math.isnan(cell)
                assert not math.isinf(cell)