Пример #1
0
    def process_training_fragments(self):
        """Retrieve the TRAINING XmlDocument and hand it to the feature extracter for processing. 
        Features file will be used for training"""

        os.chdir(self.DIR_CLASSIFIER)
        perl = self.tarsqi_instance.getopt_perl()

        fragment_count = 0

        for fragment in self.fragments:
            base = fragment[0]
            fragment_count += 1

            fin = os.path.join(self.DIR_DATA,
                               base + '.' + self.CREATION_EXTENSION)

            ee_vectors = fin + '.EE'
            et_vectors = fin + '.ET'
            #             tt_vectors = fin + '.TT'
            ee_train_vectors = fin + '.train.EE'
            et_train_vectors = fin + '.train.ET'
            #             tt_train_vectors = fin + '.train.TT'

            fragment_doc = Parser().parse_file(open(fin, "r"))
            fragment_doc.set_dct_timex(self.document.get_dct())
            #             vectors.create_vectors(fragment_doc, ee_vectors, et_vectors, tt_vectors)
            #             vectors.create_vectors(fragment_doc, ee_vectors, et_vectors)
            """
            Without narrative scheme
            """
            dictionary_file = os.path.join(self.DICT_DATA,
                                           'feature_index.dict')
            """
            With narrative scheme
            """
            #             dictionary_file = os.path.join( self.DICT_DATA,
            #                                             'feature_index_with_narrative_scheme.dict' )
            feature_index_dict = Feature_Index_Dict()
            feature_index_dict.load_from_file(dictionary_file)
            """
            Without narrative scheme
            """
            tree_vectors.create_vectors(fragment_doc,
                                        self.auxillary[PARSED_DOCUMENT],
                                        feature_index_dict, ee_vectors,
                                        et_vectors)
            """
            With narrative scheme
            """
            #             tree_vectors_with_narrative.create_vectors(fragment_doc, self.auxillary[PARSED_DOCUMENT],
            #                                         feature_index_dict, ee_vectors, et_vectors)
            feature_index_dict.dump_to_file(dictionary_file)
            print 'done create vectors'
            #             feature_recollect( self.document, ee_vectors, et_vectors, tt_vectors,
            #                                ee_train_vectors, et_train_vectors, tt_train_vectors)
            feature_recollect(self.document, ee_vectors, et_vectors,
                              ee_train_vectors, et_train_vectors)
            print 'done collect training label and features'
            print '======================================================'