if input_file <> None : if os.path.isdir(input_file): treebank = reader.read_dir_xml(input_file) else: instream = open(input_file) treebank = reader.read_xml(instream) else: treebank = reader.read_xml(sys.stdin) #Perform transformations treebank = transform_treebank(treebank) #Check for eval mode if eval_file <> None and not xfold: if cross: (treebank,dev,test) = reader.build_eval_data(treebank) else: (treebank,dev,test) = reader.build_std_eval_data(treebank) #Prints the additional dev and test files if eval mode is on trainposfile= eval_file+'_1.pos' pstream = open(trainposfile,'w') printout_pos_corpus(treebank,pstream) posdevfile = eval_file+'_2.pos' treedevfile = eval_file+'_2.mrg' gstream = open(treedevfile,'w') pstream = open(posdevfile,'w') printout_ptb_corpus(dev,gstream) printout_pos_corpus(dev,pstream) gstream.close() pstream.close()
treebank = [] test = [] gold = [] reader = XmlReader() if input_file <> None : if os.path.isdir(input_file): treebank = reader.read_dir_xml(input_file) else: instream = open(input_file) treebank = reader.read_xml(instream) else: treebank = reader.read_xml(sys.stdin) #Check for eval mode if eval_file <> None : (treebank,test,gold) = reader.build_eval_data(treebank) #Prints the corpus (or the training treebank) if raw : printout_raw_corpus(treebank,sys.stdout) else: printout_pos_corpus(treebank,sys.stdout) #Prints the additional eval files if eval mode is on if eval_file <> None : testfile = eval_file+'.tst' goldfile = eval_file+'.gld' gstream = open(goldfile,'w') tstream = open(testfile,'w') printout_pos_corpus(gold,gstream) printout_raw_corpus(test,tstream)