Exemplo n.º 1
0
     if input_file <> None : 
          if os.path.isdir(input_file):
               treebank =  reader.read_dir_xml(input_file)
          else:
               instream = open(input_file)
               treebank = reader.read_xml(instream)
     else:
          treebank = reader.read_xml(sys.stdin)

#Perform transformations
treebank = transform_treebank(treebank)

#Check for eval mode
if eval_file <> None and not xfold:
     if cross:
          (treebank,dev,test) = reader.build_eval_data(treebank)
     else:
          (treebank,dev,test) = reader.build_std_eval_data(treebank)
     #Prints the additional dev and test files if eval mode is on
     trainposfile= eval_file+'_1.pos'
     pstream = open(trainposfile,'w')
     printout_pos_corpus(treebank,pstream)
     posdevfile = eval_file+'_2.pos'
     treedevfile = eval_file+'_2.mrg'
     gstream = open(treedevfile,'w')
     pstream = open(posdevfile,'w')
     printout_ptb_corpus(dev,gstream)
     printout_pos_corpus(dev,pstream)
     gstream.close()
     pstream.close()
     
Exemplo n.º 2
0
treebank = []
test = []
gold = []
reader = XmlReader()
if input_file <> None : 
     if os.path.isdir(input_file):
          treebank =  reader.read_dir_xml(input_file)
     else:
          instream = open(input_file)
          treebank = reader.read_xml(instream)
else:
     treebank = reader.read_xml(sys.stdin)

#Check for eval mode
if eval_file <> None :
     (treebank,test,gold) = reader.build_eval_data(treebank)

#Prints the corpus (or the training treebank)
if raw :
     printout_raw_corpus(treebank,sys.stdout)
else:
     printout_pos_corpus(treebank,sys.stdout)

#Prints the additional eval files if eval mode is on
if eval_file <> None :
     testfile = eval_file+'.tst'
     goldfile = eval_file+'.gld'
     gstream = open(goldfile,'w')
     tstream = open(testfile,'w')
     printout_pos_corpus(gold,gstream)
     printout_raw_corpus(test,tstream)