def TrainUsingLR(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): LRImpl = LogisticRegressor() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary(ET.parse(xmlloc + xmlname + ".xml")) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) LRImpl.domaintrain(annotatedxmllist) print LRImpl.trainedweights f = open("TrainedWeightsLR", 'w') for weight in LRImpl.trainedweights: f.write(str(weight) + "\n") f.close()
def TrainUsingLR(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): LRImpl = LogisticRegressor() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary( ET.parse(xmlloc + xmlname + ".xml") ) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) LRImpl.domaintrain(annotatedxmllist) print LRImpl.trainedweights f = open("TrainedWeightsLR", 'w') for weight in LRImpl.trainedweights: f.write(str(weight) + "\n") f.close()