Exemplo n.º 1
0
    def _process(self, input_pack: DataPack):
        all_anchors = defaultdict(list)
        anchor: WikiAnchor
        for anchor in input_pack.get(WikiAnchor):
            all_anchors[(anchor.span.begin, anchor.span.end)].append(anchor)

        for span in all_anchors.keys():
            l_a: List[WikiAnchor] = all_anchors[span]
            if len(l_a) > 1:
                if len(l_a) > 2:
                    print(input_pack.pack_name, l_a[0].target_page_name,
                          len(l_a))
                    logging.error(
                        "There are links that have more than 2 copies.")
                    import pdb
                    pdb.set_trace()
                for a in l_a[1:]:
                    # Removing duplicates.
                    input_pack.delete_entry(a)
Exemplo n.º 2
0
 def _process(self, input_pack: DataPack):
     hoppers = list(input_pack.get(Hopper))
     for h in hoppers:
         input_pack.delete_entry(h)