def main(history=1,tiny='.tiny',tbank = None): assert history >= 1, "use at least some history" t1 = time() TRAIN_FILE = '../release3.2/final_data/train-data.pre' VAL_FILE = '../release3.2/final_data/validate-data.pre' print 'loading tree bank' t2 = time()-t1 if tbank is None: tbank = dts.tbankparser() print 'loading sentences' dp._init_(tbank) all_sentences, feature_dict = dp.process(TRAIN_FILE,history) val_sentences, _val_feat = dp.process(VAL_FILE,history) t3 = time()-t1-t2 print "features has been made" print "init perceptron" sp._init_(len(feature_dict),dts, False) print "end init" out( ('SSE random weights, only Ne-tags',flaws(dts,val_sentences,feature_dict,tbank,history,with_tags=False)) ) print "SSE random weights, only Ne-tags" out( ( 'SSE random weights',flaws(dts,val_sentences,feature_dict,tbank,history) ) ) print "SSE random weight" t4 = time() print "learning" weights = sp.train_perceptron(all_sentences, feature_dict, tbank, history) np.save('weights'+str(history)+tiny+'.npy',weights) t4 = time()-t4 print weights.shape t1=time()-t1 print "validating" out( ( 'after %d sentences, only Ne-tags'%(len(all_sentences)), flaws(dts, val_sentences,feature_dict,tbank,history,weights,False) ) ) out( ( 'after %d sentences'%(len(all_sentences)), flaws(dts, val_sentences,feature_dict,tbank,history,weights) ) ) out( ( 'total %f sec (loading: %f, %f; training: %f'%(t1,t2,t3,t4) ) ) return feature_dict,weights
def main(history=1, tiny='.tiny', tbank=None): """ run the whole proces """ assert history >= 1, """"use at least some history""" t1 = time() TRAIN_FILE = '../release3.2/final_data/train-data.pre' VAL_FILE = '../release3.2/final_data/validate-data.pre' print 'loading tree bank' t2 = time() - t1 if tbank is None: tbank = dts.tbankparser() print 'loading sentences' dp._init_(tbank) all_sentences, feature_dict = dp.process(TRAIN_FILE, history) val_sentences, _val_feat = dp.process(VAL_FILE, history) t3 = time() - t1 - t2 print "features has been made" print "init perceptron" sp._init_(len(feature_dict), dts, False) print "end init" out(('SSE random weights, only Ne-tags', flaws(dts, val_sentences, feature_dict, tbank, history, with_tags=False))) print "SSE random weights, only Ne-tags" out(('SSE random weights', flaws(dts, val_sentences, feature_dict, tbank, history))) print "SSE random weight" t4 = time() print "learning" weights = sp.train_perceptron(all_sentences, feature_dict, tbank, history) np.save('weights' + str(history) + tiny + '.npy', weights) t4 = time() - t4 print weights.shape t1 = time() - t1 print "validating" out(('after %d sentences, only Ne-tags' % (len(all_sentences)), flaws(dts, val_sentences, feature_dict, tbank, history, weights, False))) out(('after %d sentences' % (len(all_sentences)), flaws(dts, val_sentences, feature_dict, tbank, history, weights))) out(('total %f sec (loading: %f, %f; training: %f' % (t1, t2, t3, t4))) return feature_dict, weights
def test(): TRAIN_FILE = '../release3.2/data/train.data.tiny' print 'loading sentences' t1 = time() all_sentences, feature_dict = dp.process(TRAIN_FILE) t1 = time() - t1 t2 = time() all_sentences, feature_dict = dp.process_multi(TRAIN_FILE, 6) t2 = time() - t2 print t1, t2
def test(): TRAIN_FILE = '../release3.2/data/train.data.tiny' print 'loading sentences' t1=time() all_sentences, feature_dict = dp.process(TRAIN_FILE) t1=time()-t1 t2=time() all_sentences, feature_dict = dp.process_multi(TRAIN_FILE,6) t2=time()-t2 print t1,t2
def test(): """ function test test the process functions """ TRAIN_FILE = '../release3.2/data/train.data.tiny' print 'loading sentences' t1 = time() all_sentences, feature_dict = dp.process(TRAIN_FILE) t1 = time() - t1 t2 = time() - t2 print t1, t2
def test(): """ function test test the process functions """ TRAIN_FILE = '../release3.2/data/train.data.tiny' print 'loading sentences' t1=time() all_sentences, feature_dict = dp.process(TRAIN_FILE) t1=time()-t1 t2=time()-t2 print t1,t2
parw = '-START-' sentence.insert(0,parw) sentence.insert(0,parw) sentence.insert(0,parw) return sentence else: parw = parent.orth_ current_word = parent sentence.insert(0,parw) return recursive_tree_climb(current_word, sentence) if __name__ == '__main__': print 'start' TRAIN_FILE = 'test_data/test_linear.txt' #'../release3.2/data/test.txt' all_sentences, feature_dict = dp.process(TRAIN_FILE,1) tbank = dts.tbankparser() text_file = open("preprocessed-4gram-sentences2.txt", "w") print "start looping through sentece" for sentence in all_sentences: try: seen_mistakes = [] parsed_sentence = tbank.parse(sentence.raw_sentence) context_tags = [word_tag[1] for word_tag in sentence.words_tags] for i in range(0,len(sentence.raw_sentence.split(' '))): if context_tags[i] != "Ne": cur = parsed_sentence[i] sentence_array = [] sentence_array.insert(0,cur.orth_) result = recursive_tree_climb(cur, sentence_array)
sentence.insert(0, parw) sentence.insert(0, parw) sentence.insert(0, parw) return sentence else: parw = parent.orth_ current_word = parent sentence.insert(0, parw) return recursive_tree_climb(current_word, sentence) if __name__ == '__main__': print 'start' TRAIN_FILE = 'test_data/test_linear.txt' #'../release3.2/data/test.txt' all_sentences, feature_dict = dp.process(TRAIN_FILE, 1) tbank = dts.tbankparser() text_file = open("preprocessed-4gram-sentences2.txt", "w") print "start looping through sentece" for sentence in all_sentences: try: seen_mistakes = [] parsed_sentence = tbank.parse(sentence.raw_sentence) context_tags = [word_tag[1] for word_tag in sentence.words_tags] for i in range(0, len(sentence.raw_sentence.split(' '))): if context_tags[i] != "Ne": cur = parsed_sentence[i] sentence_array = [] sentence_array.insert(0, cur.orth_) result = recursive_tree_climb(cur, sentence_array)