def test_all(self):
     tedgen = TEDGenerator(costs=[FanoutWeightedTreeEditDistanceCost(),
                                  TreeEditDistanceCost(),
                                  SubtreeWeightedTreeEditDistanceCost(),
                                  SubtreeHeightWeightedTreeEditDistanceCost()],
                           operation_generator=RandomOperation(delete_probability=0.25,
                                                               insert_probability=0.25,
                                                               edit_probability=0.25,
                                                               move_probability=0),
                           probability=.5)
     prototype = Prototype()
     root = prototype.add_node("root", pid=1, ppid=0)
     one = root.add_node("test1", pid=2, ppid=1)
     root.add_node("test2", pid=3, ppid=1)
     root.add_node("test3", pid=4, ppid=1)
     one.add_node("test1.1", pid=5, ppid=2)
     one.add_node("test1.2", pid=6, ppid=2)
     one.add_node("test1.3", pid=7, ppid=2)
     two = one.add_node("test1.4", pid=8, ppid=2)
     two.add_node("test2.1", pid=9, ppid=8)
     two.add_node("test2.2", pid=10, ppid=8)
     two.add_node("test2.3", pid=11, ppid=8)
     two.add_node("test2.4", pid=12, ppid=8)
     result = tedgen.generate(tree=prototype)
     result2 = tedgen.generate(tree=prototype)
     print("received %s" % result.distance)
     print("received %s" % result2.distance)
Exemplo n.º 2
0
    def test_ensemble_result(self):
        """
        I recognised that apparently sometimes (or ever?) values of ensemble
        methods don't match results from single calculations. Therefore this
        test should uniquely test for this.
        """
        tree = real_tree()
        tree_generator = TEDGenerator(
            operation_generator=RandomOperation(
                insert_probability=.5, delete_probability=.5),
            costs=[TreeEditDistanceCost()],
            seed=1234)
        disturbed_tree = tree_generator.generate(tree)

        signatures = [ParentChildByNameTopologySignature(),
                      ParentCountedChildrenByNameTopologySignature(count=2)]
        # First test results from ensemble
        ensemble_signature = EnsembleSignature(signatures=signatures)
        decorator = DistanceMatrixDecorator(normalized=False)
        algorithm = IncrementalDistanceAlgorithm(
            signature=ensemble_signature,
            distance=StartDistance
        )
        decorator.wrap_algorithm(algorithm)
        algorithm.prototypes = [tree]

        algorithm.start_tree()
        for event in disturbed_tree.event_iter(supported=algorithm.supported):
            try:
                algorithm.add_event(event)
            except EventNotSupportedException:
                pass
        algorithm.finish_tree()
        ensemble_data = decorator.data()

        # Second, validate this result with single measurements
        single_data = {}
        for index, signature in enumerate(signatures):
            decorator = DistanceMatrixDecorator(normalized=False)
            algorithm = IncrementalDistanceAlgorithm(
                signature=signature,
                distance=StartDistance
            )
            decorator.wrap_algorithm(algorithm)
            algorithm.prototypes = [real_tree()]
            algorithm.start_tree()
            for event in disturbed_tree.event_iter(supported=algorithm.supported):
                try:
                    algorithm.add_event(event)
                except EventNotSupportedException:
                    pass
            algorithm.finish_tree()
            single_data[index] = decorator.data()
        for index, _ in enumerate(signatures):
            self.assertEqual(ensemble_data[0][index][0], single_data[index][0][0][0])
def _generate_perturbated_tree(kwargs):
    """
    :param kwargs:
    :param filepath: Path to consider
    :param probabilities: List of probabilites
    :param repeat: How often to repeat a single probability
    :param insert_probability: Probability to insert item
    :param delete_probability: Probability to delete item
    :param change_probability: Probability to change item
    :param move_probability: Probability to move item
    :param leaf_nodes_only: Only include leaf nodes?
    :param internal_nodes_only: Only include internal nodes?
    :param attribute_nodes_only: Only include attribute nodes?
    :param cost: True or False
    :return:
    """
    result = MulticoreResult()
    filepath = kwargs.get("filepath", None)
    probabilities = kwargs.get("probabilities", [])
    repeat = kwargs.get("repeat", 1)
    insert_probability = kwargs.get("insert_probability", 0)
    delete_probability = kwargs.get("delete_probability", 0)
    change_probability = kwargs.get("change_probability", 0)
    move_probability = kwargs.get("move_probability", 0)
    leaf_nodes_only = kwargs.get("leaf_nodes_only", False)
    internal_nodes_only = kwargs.get("internal_nodes_only", False)
    attribute_nodes_only = kwargs.get("attribute_nodes_only", False)
    cost = kwargs.get("cost", True)

    tree_builder = CSVTreeBuilder()
    tree = tree_builder.build(filepath)
    if tree is not None:
        result.setdefault(filepath, {})
        result[filepath]["tree"] = tree
        result[filepath].setdefault("perturbated_tree", {})
        for probability in probabilities:
            if attribute_nodes_only:
                ted_generator = TEDGenerator(
                    costs=[],
                    operation_generator=RandomOperation(
                        delete_probability=1,
                        delete_operation=DeleteAttributeTreeEditOperation(
                            probability=probability)),
                    probability=1,
                    skip_node=skip_all_but_attribute_nodes)
            else:
                ted_generator = TEDGenerator(
                    costs=[
                        TreeEditDistanceCost(),
                        FanoutWeightedTreeEditDistanceCost(),
                        SubtreeWeightedTreeEditDistanceCost(),
                        SubtreeHeightWeightedTreeEditDistanceCost(),
                        SubtreeWeightedTreeEditDistanceCostWithMove()
                    ] if cost else [],
                    operation_generator=RandomOperation(
                        insert_probability=insert_probability,
                        delete_probability=delete_probability,
                        edit_probability=change_probability,
                        move_probability=move_probability),
                    probability=probability,
                    skip_node=skip_leaf if internal_nodes_only else
                    (skip_inner_node if leaf_nodes_only else skip_no_node))
            for _ in range(repeat):
                perturbated_tree = ted_generator.generate(tree)
                result[filepath]["perturbated_tree"].setdefault(
                    probability, []).append(perturbated_tree)
                # reload tree
                tree = tree_builder.build(filepath)
    return result
 def test_edit(self):
     tedgen = TEDGenerator(costs=[FanoutWeightedTreeEditDistanceCost(), TreeEditDistanceCost()],
                           operation_generator=RandomOperation(edit_probability=1))
     result = tedgen.generate(tree=self.prototype)
     print("received %s" % result.distance)
 def test_move(self):
     tedgen = TEDGenerator(costs=[],
                           operation_generator=RandomOperation(move_probability=1))
     result = tedgen.generate(tree=self.prototype)
     print(result)