def TrainUsingLR(xmls, annotatedxmlloc): LR = LogisticRegressor() trainer = Utils.Trainer.Trainer() tableslist = list() for xml in xmls: tableslist.append(trainer.readAnnotatedxmlforTableDecomposition(annotatedxmlloc + xml + "_TD_ANNOTATED")) LR.domaintrainforTableDecomposition(tableslist) return LR
def TrainUsingLR(xmls, annotatedxmlloc): LR = LogisticRegressor() trainer = Utils.Trainer.Trainer() tableslist = list() for xml in xmls: tableslist.append( trainer.readAnnotatedxmlforTableDecomposition(annotatedxmlloc + xml + "_TD_ANNOTATED")) LR.domaintrainforTableDecomposition(tableslist) return LR
def TrainUsingLR(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): LRImpl = LogisticRegressor() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary(ET.parse(xmlloc + xmlname + ".xml")) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) LRImpl.domaintrain(annotatedxmllist) print LRImpl.trainedweights f = open("TrainedWeightsLR", 'w') for weight in LRImpl.trainedweights: f.write(str(weight) + "\n") f.close()
def TrainUsingLR(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc): LRImpl = LogisticRegressor() annotatedxmllist = list() for xmlname in xmls: fontdict = preprocessor.getFontDictionary( ET.parse(xmlloc + xmlname + ".xml") ) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>) annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname + "_annotated") annotatedxmllist.append([annotatedxml, fontdict]) LRImpl.domaintrain(annotatedxmllist) print LRImpl.trainedweights f = open("TrainedWeightsLR", 'w') for weight in LRImpl.trainedweights: f.write(str(weight) + "\n") f.close()
def getModelwithTrainedWeights(isCRF=True): trainedweights = list() if (isCRF): f = open("TrainedWeightsCRF", "r") for weight in f: trainedweights.append(float(weight)) f.close() CRFImpl = CRF(trainedweights) return CRFImpl else: f = open("TrainedWeightsLR", "r") for weight in f: trainedweights.append(float(weight)) f.close() LR = LogisticRegressor(trainedweights) return LR