Exemplo n.º 1
0
    def test_indirect_matrices_shape_with_missing_sources(self):
        dataset_name = "SAMPLE_WARC"
        dataset_builder = DatasetBuilder(dataset_name)
        dataset_builder.build()

        top = dataset_builder.artifacts[0]
        middle = dataset_builder.artifacts[1]
        bottom = dataset_builder.artifacts[2]

        expected_top_middle_shape = (len(top), len(middle))
        expected_middle_bottom_shape = (len(middle), len(bottom))
        expected_top_bottom_shape = (len(top), len(bottom))

        assert_shapes_equal(
            dataset_builder.trace_matrix_map["0-1"].matrix, expected_top_middle_shape
        )
        assert_shapes_equal(
            dataset_builder.trace_matrix_map["0-2"].matrix, expected_top_bottom_shape
        )

        assert_shapes_equal(
            dataset_builder.trace_matrix_map["1-2"].matrix,
            expected_middle_bottom_shape,
        )
        self.assertEqual(
            dataset_builder.trace_matrix_map["1-2"].matrix.shape,
            expected_middle_bottom_shape,
        )
Exemplo n.º 2
0
    def test_creating_missing_matrices(self):
        """
        For MockDataset, removes trace matrix 0-2 from structure definition and checks that builder is able to
        construct one using transitive traces from the other matrices.
        :return:
        """
        dataset_builder = DatasetBuilder("MockDataset")
        dataset_builder.structure_definition["traces"]["0-2"] = None
        dataset_builder.build()

        trace_matrix_builder = dataset_builder.trace_matrix_builder
        graph = trace_matrix_builder.create_trace_matrix_dependency_graph()

        self.assertEqual(
            3, len(trace_matrix_builder.trace_matrix_map.get_trace_ids()))
        self.assertIsNotNone(trace_matrix_builder.trace_matrix_map["0-2"])
        self.assertIsNotNone(
            trace_matrix_builder.trace_matrix_map["0-2"].matrix)
        self.assertEqual(
            1, trace_matrix_builder.trace_matrix_map["0-2"].matrix[0, 0])
        self.assertEqual(
            2, trace_matrix_builder.trace_matrix_map["0-2"].matrix.sum(
                axis=1).sum())

        self.assertEqual(3, len(graph.es))
Exemplo n.º 3
0
 def test_create_transitive_trace_matrices_with_warc(self):
     dataset_builder = DatasetBuilder("SAMPLE_WARC")
     dataset_builder.build()
     trace_matrix_creator = dataset_builder.trace_matrix_builder
     graph = (dataset_builder.trace_matrix_builder.
              create_trace_matrix_dependency_graph())
     self.assertIsNotNone(trace_matrix_creator.trace_matrix_map["1-2"])
     self.assertEqual(3, len(graph.es))
Exemplo n.º 4
0
    def test_get_paths_to_missing_traces_with_fakedataset(self):
        dataset_builder = DatasetBuilder("MockDataset")
        dataset_builder.structure_definition["traces"]["0-2"] = None
        dataset_builder.artifact_builder.build()
        dataset_builder.trace_matrix_builder.set_artifact_levels(
            dataset_builder.artifact_builder.artifacts)
        dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset(
        )
        trace_matrix_map = dataset_builder.trace_matrix_builder.trace_matrix_map
        # TODO: clean up levels dependency

        trace_dependency_graph = (dataset_builder.trace_matrix_builder.
                                  create_trace_matrix_dependency_graph())
        trace_ids: List[str] = trace_matrix_map.get_trace_ids()
        paths = get_paths_to_complete_graph(trace_ids, trace_dependency_graph,
                                            3)

        self.assertEqual(1, len(paths.get_trace_ids()))
        self.assertEqual(1, len(paths["0-2"]))
        self.assertEqual([0, 1, 2], paths["0-2"][0])
Exemplo n.º 5
0
    def test_create_trace_matrices_in_definition(self):
        dataset_builder = DatasetBuilder("MockDataset")
        dataset_builder.structure_definition["traces"]["0-2"] = None
        dataset_builder.artifact_builder.build()
        dataset_builder.trace_matrix_builder.set_artifact_levels(
            dataset_builder.artifact_builder.artifacts)
        dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset(
        )

        trace_matrix_map_keys = (dataset_builder.trace_matrix_builder.
                                 trace_matrix_map.get_trace_ids())
        self.assertEqual(2, len(trace_matrix_map_keys))
        self.assertIn("0-1", trace_matrix_map_keys)
        self.assertIn("1-2", trace_matrix_map_keys)
        self.assertNotIn("0-2", trace_matrix_map_keys)

        self.assertEqual(0,
                         dataset_builder.trace_matrix_builder["0-1"].top_index)
        self.assertEqual(
            1, dataset_builder.trace_matrix_builder["0-1"].bottom_index)
Exemplo n.º 6
0
 def run(self) -> Table:
     with create_loading_bar(
         EXPERIMENT_NAME, DATASET_COLUMN_ORDER, len(DATASET_COLUMN_ORDER)
     ) as d_iterable:
         for dataset_name in d_iterable:
             builder = DatasetBuilder(dataset_name)
             builder.build()
             builder.export()
             print(f"{dataset_name} exported.")
     return Table()
Exemplo n.º 7
0
    def test_export_dataset(self):
        """
        Tests that after MockDataset is exported all required folders have been updated.
        :return:
        """
        # Setup
        dataset_name = "MockDataset"
        dataset_builder = DatasetBuilder(dataset_name)
        dataset_builder.build()

        # Work
        dataset_builder.export()
        folders = ["Artifacts", "Oracles"]
        for folder_rel_path in folders:
            path_to_folder = os.path.join(
                PATH_TO_SAMPLE_DATASETS, dataset_name, folder_rel_path
            )
            check_folder_has_updated(path_to_folder)
Exemplo n.º 8
0
 def test_get_paths_to_missing_traces_with_warc(self):
     """
     Checks that the missing trace matrix in WARC is covered using a transitive path using the
     defined trace matrices.
     :return: None
     """
     dataset_builder = DatasetBuilder("SAMPLE_WARC")
     dataset_builder.artifact_builder.build()
     dataset_builder.trace_matrix_builder.set_artifact_levels(
         dataset_builder.artifact_builder.artifacts)
     dataset_builder.trace_matrix_builder.build_matrices_defined_in_dataset(
     )
     graph = (dataset_builder.trace_matrix_builder.
              create_trace_matrix_dependency_graph())
     trace_ids: List[str] = list(
         map(
             to_string,
             dataset_builder.trace_matrix_builder.trace_matrix_map.
             get_trace_ids(),
         ))
     paths = get_paths_to_complete_graph(trace_ids, graph, 3)
     self.assertEqual(1, len(paths.get_trace_ids()))
     self.assertEqual(1, len(paths["0-1"]))
     self.assertEqual([0, 2, 1], paths["0-1"][0])
Exemplo n.º 9
0
class TestTechniqueHelper(SmartTest):
    d_name = "MockDataset"
    d_builder = DatasetBuilder(d_name)
    d_builder.build()
    d_builder.export()
    dataset = Dataset(d_name)
    """
    Direct
    """
    direct_algebraic_model = AlgebraicModel.VSM
    direct_trace_type = TraceType.NOT_TRACED
    direct_parameters = [direct_algebraic_model.value, direct_trace_type.value]
    direct_components = ["0", "2"]
    direct_definition = [
        DIRECT_COMMAND_SYMBOL, direct_parameters, direct_components
    ]
    """
    Intermediate
    """
    transitive_algebraic_model = AlgebraicModel.VSM
    transitive_aggregation_type = AggregationMethod.SUM
    transitive_component_scaling_type = ScalingMethod.GLOBAL
    transitive_component_trace_type = TraceType.NOT_TRACED

    transitive_component_a = [
        DIRECT_COMMAND_SYMBOL,
        [
            transitive_algebraic_model.value,
            transitive_component_trace_type.value
        ],
        ["0", "1"],
    ]
    transitive_upper_comp = "(%s (%s %s) (%s %s))" % (
        DIRECT_COMMAND_SYMBOL,
        transitive_algebraic_model.value,
        transitive_component_trace_type.value,
        "0",
        "1",
    )
    transitive_component_b = [
        DIRECT_COMMAND_SYMBOL,
        [
            transitive_algebraic_model.value,
            transitive_component_trace_type.value
        ],
        ["1", "2"],
    ]
    transitive_component_b_name = "(%s (%s %s) (%s %s))" % (
        DIRECT_COMMAND_SYMBOL,
        transitive_algebraic_model.value,
        transitive_component_trace_type.value,
        "1",
        "2",
    )

    transitive_parameters = [
        transitive_aggregation_type.value,
        transitive_component_scaling_type.value,
    ]
    transitive_components = [transitive_component_a, transitive_component_b]
    transitive_technique_definition = [
        TRANSITIVE_COMMAND_SYMBOL,
        transitive_parameters,
        transitive_components,
    ]
    """
    Traced Components
    """
    traced_component_type = TraceType.TRACED
    traced_aggregation_value = AggregationMethod.MAX
    traced_direct_component_a = [
        DIRECT_COMMAND_SYMBOL,
        [transitive_algebraic_model.value, traced_component_type.value],
        ["0", "1"],
    ]
    traced_direct_component_b = [
        DIRECT_COMMAND_SYMBOL,
        [transitive_algebraic_model.value, traced_component_type.value],
        ["1", "2"],
    ]
    traced_components = [traced_direct_component_a, traced_direct_component_b]
    traced_parameters = [
        traced_aggregation_value.value,
        transitive_component_scaling_type.value,
    ]
    """
    Sampled Artifacts
    """
    sample_percentage = 0.5
    sampled_parameters: [str
                         ] = transitive_parameters + [repr(sample_percentage)]
    sampled_components = transitive_components
    sampled_artifacts_definition = [
        SAMPLED_COMMAND_SYMBOL,
        sampled_parameters,
        sampled_components,
    ]
    sampled_traces_definition = [
        SAMPLED_TRACED_COMMAND_SYMBOL,
        sampled_parameters,
        sampled_components,
    ]
    """
    Combined
    """
    combined_aggregation_type = AggregationMethod.SUM
    combined_parameters = ["SUM"]
    combined_components = [direct_definition, transitive_technique_definition]
    """
    Combined (with sampled transitive)
    """
    combined_sampled_artifacts_components = [
        direct_definition,
        sampled_artifacts_definition,
    ]
    combined_sampled_traces_components = [
        direct_definition, sampled_traces_definition
    ]

    direct_technique_name = "(. (VSM NT) (0 2))"
    transitive_technique_name = (
        "(x (SUM GLOBAL) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))")
    transitive_sampled_artifacts_technique_name = (
        "(~ (SUM GLOBAL %f) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))" %
        sample_percentage)
    transitive_sampled_traces_technique_name = (
        "($ (SUM GLOBAL %f) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))" %
        sample_percentage)
    combined_technique_name = "(o (%s) (%s %s))" % (
        "SUM",
        direct_technique_name,
        transitive_technique_name,
    )
    combined_sampled_artifacts_technique_name = "(o (%s) (%s %s))" % (
        "SUM",
        direct_technique_name,
        transitive_sampled_artifacts_technique_name,
    )
    combined_sampled_traces_technique_name = "(o (%s) (%s %s))" % (
        "SUM",
        direct_technique_name,
        transitive_sampled_traces_technique_name,
    )

    def get_direct_definition(self) -> DirectTechniqueDefinition:
        return DirectTechniqueDefinition(self.direct_parameters,
                                         self.direct_components)

    def get_transitive_definition(self) -> TransitiveTechniqueDefinition:
        return TransitiveTechniqueDefinition(self.transitive_parameters,
                                             self.transitive_components)

    def get_traced_transitive_definition(
            self) -> TransitiveTechniqueDefinition:
        return TransitiveTechniqueDefinition(self.traced_parameters,
                                             self.traced_components)

    def get_combined_definition(self) -> HybridTechniqueDefinition:
        return HybridTechniqueDefinition(self.combined_parameters,
                                         self.combined_components)

    def get_sampled_technique_definition(self) -> SampledTechniqueDefinition:
        return SampledTechniqueDefinition(self.sampled_parameters,
                                          self.sampled_components)

    def get_combined_sampled_artifacts_definition(
            self) -> HybridTechniqueDefinition:
        return HybridTechniqueDefinition(
            self.combined_parameters,
            self.combined_sampled_artifacts_components)

    def assert_valid_fake_dataset_similarity_matrix(
            self, similarity_matrix: SimilarityMatrix):
        self.assertEqual((1, 3), similarity_matrix.shape)

    def create_counter_func(self, t_name: str):
        n_function_calls = {"value": 0}

        def counter_func(data: TechniqueData):
            self.assertEqual(self.d_name, data.dataset.name)
            self.assertEqual(t_name, data.technique.get_name())
            n_function_calls["value"] = n_function_calls["value"] + 1

        return counter_func, n_function_calls
Exemplo n.º 10
0
 def test_normalize_original_matrices(self):
     db = DatasetBuilder("MockDataset")
     db.build()
     self.assertEqual(1, db.trace_matrix_builder["0-2"].matrix[0][2])
Exemplo n.º 11
0
from api.datasets.builder.dataset_builder import DatasetBuilder
from api.datasets.dataset import Dataset
from api.tracer import Tracer

DATASET_NAME = "IllustrativeExample"
TOP_TECHNIQUE_NAME = "(. (VSM NT) (0 1))"
BOTTOM_TECHNIQUE_NAME = "(. (VSM NT) (1 2))"
DIRECT_TECHNIQUE_NAME = "(. (VSM NT) (0 2))"
TECHNIQUE_NAME = "(x (MAX INDEPENDENT) ((. (VSM NT) (0 1)) (. (VSM NT) (1 2))))"
REBUILD = False

if __name__ == "__main__":
    if REBUILD:
        dataset_builder = DatasetBuilder(DATASET_NAME)
        dataset_builder.build()
        dataset_builder.export()

    dataset = Dataset(DATASET_NAME)

    tracer = Tracer()
    top_technique_data = tracer.get_technique_data(DATASET_NAME,
                                                   TOP_TECHNIQUE_NAME)
    bottom_technique_data = tracer.get_technique_data(DATASET_NAME,
                                                      BOTTOM_TECHNIQUE_NAME)
    direct_technique_data = tracer.get_technique_data(DATASET_NAME,
                                                      DIRECT_TECHNIQUE_NAME)

    top_score = top_technique_data.similarity_matrix[0][0]
    bottom_score = bottom_technique_data.similarity_matrix[0][0]
    transitive_score = top_score * bottom_score
    direct_score = direct_technique_data.similarity_matrix[0][0]