示例#1
0
 def test_literal_multiline_quotes(self):
     s, p, o = split_nt_line(self.literal_multiline_quotes)
     self.assertEqual(s, "http://example.org/#spiderman")
     self.assertEqual(p, "http://example.org/text")
     self.assertEqual(
         o,
         "\"This is a multi-line\\nliteral with many quotes (\\\"\\\"\\\"\\\"\\\")"
         "\\nand two apostrophes ('').\"")
示例#2
0
 def test_literal_xmls(self):
     s, p, o = split_nt_line(self.literal_xmls)
     self.assertEqual(s, "http://example.org/show/218")
     self.assertEqual(p, "http://www.w3.org/2000/01/rdf-schema#label")
     self.assertEqual(
         o,
         """"That Seventies Show"^^<http://www.w3.org/2001/XMLSchema#string>"""
     )
示例#3
0
 def lines_to_triples(self,
                      triples: List[str]) -> List[Tuple[str, str, str]]:
     errors = 0
     results = []
     for triple_line in tqdm(triples,
                             desc='Converting triples',
                             unit=' triples'):
         # split it and check if it is a triple
         try:
             triple = split_nt_line(triple_line)
             results.append(triple)
         except ValueError:
             errors += 1
             continue
     print(f'{errors} occurred during triple conversion')
     return results
示例#4
0
 def test_literal_double(self):
     s, p, o = split_nt_line(self.literal_double)
     self.assertEqual(s, "http://en.wikipedia.org/wiki/Helium")
     self.assertEqual(p, "http://example.org/elements/specificGravity")
     self.assertEqual(
         o, "\"1.663E-4\"^^<http://www.w3.org/2001/XMLSchema#double>")
示例#5
0
 def test_literal_integer(self):
     s, p, o = split_nt_line(self.literal_integer)
     self.assertEqual(s, "http://en.wikipedia.org/wiki/Helium")
     self.assertEqual(p, "http://example.org/elements/atomicNumber")
     self.assertEqual(o,
                      "\"2\"^^<http://www.w3.org/2001/XMLSchema#integer>")
示例#6
0
 def test_literal_region(self):
     s, p, o = split_nt_line(self.literal_region)
     self.assertEqual(s, "http://example.org/show/218")
     self.assertEqual(p, "http://example.org/show/localName")
     self.assertEqual(o, "\"Cette Série des Années Septante\"@fr-be")
示例#7
0
 def test_literal_language(self):
     s, p, o = split_nt_line(self.literal_language)
     self.assertEqual(s, "http://example.org/show/218")
     self.assertEqual(p, "http://example.org/show/localName")
     self.assertEqual(o, "\"That Seventies Show\"@en")
示例#8
0
 def test_literal_untyped(self):
     s, p, o = split_nt_line(self.literal_untyped)
     self.assertEqual(s, "http://example.org/show/218")
     self.assertEqual(p, "http://www.w3.org/2000/01/rdf-schema#label")
     self.assertEqual(o, "\"That Seventies Show\"")
示例#9
0
 def test_split_simple_tabs(self):
     s, p, o = split_nt_line(self.simple_line_tabs)
     self.assertEqual(s, "http://example.org/#spiderman")
     self.assertEqual(
         p, "http://www.perceive.net/schemas/relationship/enemyOf")
     self.assertEqual(o, "http://example.org/#green-goblin")
示例#10
0
    def map_triple_lines(self, triple_lines: List[str]):
        """
        Assigns each entity and each relation an id and creates a list of triples consisting of the ids.

        :param triple_lines: List of Triples
        :return: Tuple with the mapping of entity -> ID, relation -> ID and a list of the triples with the integer ids.
        """
        # prepare for mapping
        dict_ent = dict()
        dict_rel = dict()
        list_triples = []
        num_ent = 0
        num_rel = 0

        # iterate through every line containing a triple
        sys.stdout.write("Processing Triple lines ... ")
        sys.stdout.flush()
        skipped = 0
        for triple_line in tqdm(triple_lines,
                                desc='Processing',
                                unit=' triples'):
            # split it and check if it is a triple
            try:
                triple = split_nt_line(triple_line)
            except ValueError as e:
                if self.fail_silently:
                    skipped += 1
                    continue
                raise e

            # check if subject is in entities, add if not
            if triple[0] not in dict_ent:
                idx_sub = num_ent
                dict_ent.update({triple[0]: idx_sub})
                num_ent += 1
            else:
                idx_sub = dict_ent[triple[0]]

            # check if predicate is in relations, add if not
            if triple[1] not in dict_rel:
                idx_rel = num_rel
                dict_rel.update({triple[1]: idx_rel})
                num_rel += 1
            else:
                idx_rel = dict_rel[triple[1]]

            # check if object is in entities, add if not
            if triple[2] not in dict_ent:
                idx_obj = num_ent
                dict_ent.update({triple[2]: idx_obj})
                num_ent += 1
            else:
                idx_obj = dict_ent[triple[2]]

            # check triple
            if idx_sub < 0 or idx_rel < 0 or idx_obj < 0:
                sys.exit("Failure: Mapped Triple has invalid Indeces")

            # add to the mapped triples list
            # careful: OpenKE format is "subject object relation"
            mapped_triple = [idx_sub, idx_obj, idx_rel]
            list_triples.append(mapped_triple)

            # # output progess (avoid spamming)
            # progess += 1
            # percentage = int((progess * 100) / finish)
            # if percentage > last_percentage:
            #     sys.stdout.write("\rProcessing Triple lines ... " + str(percentage) + "%")
            #     sys.stdout.flush()
            #     last_percentage = percentage

        # output results
        print("")
        print(str(len(dict_ent)) + " Distinct Entities")
        print(str(len(dict_rel)) + " Distinct Relations")
        print(str(len(list_triples)) + " Distinct Triples")
        print("Skipped " + str(skipped) + " lines")
        return dict_ent, dict_rel, list_triples