def testLabels(): """ """ guid="f7921eed-89bc-4f38-a794-7c9a5878a7ee" writer=AZPrimeWriter() writer.save_pos_tags=True doc=cp.Corpus.loadSciDoc(guid) writer.write(doc, os.path.join(cp.Corpus.paths.output, doc.metadata["guid"]+".pos.xml"))
def exportSciXML(): """ Exports all scidocs with the selected collection_id to AZPrime XML in the output dir of the corpus """ papers=cp.Corpus.listPapers(max_results=sys.maxint) writer=AZPrimeWriter() writer.save_pos_tags=True ## papers=papers[3894:] progress=ProgressIndicator(True, len(papers),False) print("Exporting SciXML files") for guid in papers: doc=cp.Corpus.loadSciDoc(guid) if len(doc.allsentences) < 1: continue writer.write(doc, os.path.join(cp.Corpus.paths.output, doc.metadata["guid"]+".pos.xml")) cp.Corpus.saveSciDoc(doc) progress.showProgressReport("Exporting -- %s" % guid)