Exemplo n.º 1
0
    def test_advanced_corpus_parsing(self):
        train_limit = 500
        train_dev_corpus_path = 'res/osdp-12/sdp/2015/en.dm.sdp'
        training_last = 21999042
        training_corpus = parse_file(train_dev_corpus_path,
                                     last_id=training_last,
                                     max_n=train_limit)

        dev_start = 22000001
        dev_limit = 100
        dev_corpus = parse_file(train_dev_corpus_path,
                                start_id=dev_start,
                                max_n=dev_limit)

        cyclic = 0
        checked = 0
        for sdg in training_corpus:
            checked += 1
            if sdg.dog.cyclic():
                cyclic += 1
        self.assertEqual(checked, 500)
        self.assertEqual(cyclic, 0)

        cyclic = 0
        checked = 0
        for sdg in dev_corpus:
            checked += 1
            if sdg.dog.cyclic():
                cyclic += 1
        self.assertEqual(checked, 1692)
        self.assertEqual(cyclic, 0)
        export_corpus(dev_corpus, '/tmp/dev_corpus_export.dm.sdp')
Exemplo n.º 2
0
 def __test_sdp_parsing_full(self):
     path = 'res/osdp-12/sdp/2015/en.dm.sdp'
     corpus = parse_file(path)
     print(len(corpus))
     for rec_part_strat in self.rec_part_strategies:
         for i, dsg in enumerate(corpus):
             if len(dsg.sentence) > 50:
                 continue
             self.__process_single_dsg(i,
                                       dsg,
                                       rec_part_strat,
                                       terminal_labeling=lambda x: x[0])
Exemplo n.º 3
0
    def test_sdp_parsing(self):
        for style, rec_part_strat in product(['dm', 'pas', 'psd'],
                                             self.rec_part_strategies):
            path = 'res/sdp/trial/' + style + '.sdp'
            corpus = parse_file(path)
            print(len(corpus))

            for i, dsg in enumerate(corpus):
                self.__process_single_dsg(i,
                                          dsg,
                                          rec_part_strat,
                                          terminal_labeling=lambda x: x[0])
Exemplo n.º 4
0
 def read_corpus(self, resource):
     return parse_file(resource.path,
                       start_id=resource.start,
                       last_id=resource.end,
                       max_n=resource.limit)