from TestDocument import TestDocument from UnlabelledDoc import UnlabelledDoc import pickle if __name__ == '__main__': reload(sys) sys.setdefaultencoding('utf8') # Parse the configuration config = cp.RawConfigParser() config.read('config.py') trainfile = config.get('init', 'trainfile') content = open(trainfile).read() doc = TrainDocument(content) sent = doc.get_sentences()[0] print sent.pos_tags pickle.dump(doc,open('TrainDoc.pickle','w')) pickle.dump(doc.sentences,open('Sentences.pickle','w')) # # Open test documents as wel testfile = config.get('init', 'testfile') testcontent = open(testfile).read() testdoc = TestDocument(testcontent) pickle.dump(testdoc.sentences, open('SentencesTest.pickle','w')) # IF lu learning is enabled, then read the unlabelled corpus also lu = int(config.get('LU','lu')) print 'LU learning configuration : ' + str(lu)
__author__ = 'vignesh' from TrainDocument import TrainDocument f2 = '/Users/vignesh/Documents/Phd/Courses/fall15/CS521SNLP/ClassProject/corpus/kappa/sahisnukappafull.txt' c2 = open(f2).read() doc2 = TrainDocument(c2) sent2 = doc2.get_sentences() labels2 = [] for i in range(len(sent2)): l2 = sent2[i].labels labels2 += l2 print len(labels2) # write them outfile = open('labels2forkappa.txt','w') for i in range(len(labels2)): outfile.write(labels2[i]) outfile.write('\n') outfile.close()