def process_training_fragments(self): """Retrieve the TRAINING XmlDocument and hand it to the feature extracter for processing. Features file will be used for training""" os.chdir(self.DIR_CLASSIFIER) perl = self.tarsqi_instance.getopt_perl() fragment_count = 0 for fragment in self.fragments: base = fragment[0] fragment_count += 1 fin = os.path.join(self.DIR_DATA, base + '.' + self.CREATION_EXTENSION) ee_vectors = fin + '.EE' et_vectors = fin + '.ET' # tt_vectors = fin + '.TT' ee_train_vectors = fin + '.train.EE' et_train_vectors = fin + '.train.ET' # tt_train_vectors = fin + '.train.TT' fragment_doc = Parser().parse_file(open(fin, "r")) fragment_doc.set_dct_timex(self.document.get_dct()) # vectors.create_vectors(fragment_doc, ee_vectors, et_vectors, tt_vectors) # vectors.create_vectors(fragment_doc, ee_vectors, et_vectors) """ Without narrative scheme """ dictionary_file = os.path.join(self.DICT_DATA, 'feature_index.dict') """ With narrative scheme """ # dictionary_file = os.path.join( self.DICT_DATA, # 'feature_index_with_narrative_scheme.dict' ) feature_index_dict = Feature_Index_Dict() feature_index_dict.load_from_file(dictionary_file) """ Without narrative scheme """ tree_vectors.create_vectors(fragment_doc, self.auxillary[PARSED_DOCUMENT], feature_index_dict, ee_vectors, et_vectors) """ With narrative scheme """ # tree_vectors_with_narrative.create_vectors(fragment_doc, self.auxillary[PARSED_DOCUMENT], # feature_index_dict, ee_vectors, et_vectors) feature_index_dict.dump_to_file(dictionary_file) print 'done create vectors' # feature_recollect( self.document, ee_vectors, et_vectors, tt_vectors, # ee_train_vectors, et_train_vectors, tt_train_vectors) feature_recollect(self.document, ee_vectors, et_vectors, ee_train_vectors, et_train_vectors) print 'done collect training label and features' print '======================================================'