def main(DU_BAR): version = "v.01" usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser( sys.argv[0], version) parser.add_option("--docid", dest='docid', action="store", default=None, help="only process docid") # --- #parse the command line (options, args) = parser.parse_args() # --- try: sModelDir, sModelName = args except Exception as e: traceln("Specify a model folder and a model name!") _exit(usage, 1, e) doer = DU_BAR(sModelName, sModelDir, C=options.crf_C, tol=options.crf_tol, njobs=options.crf_njobs, max_iter=options.max_iter, inference_cache=options.crf_inference_cache) if options.docid: sDocId = options.docid else: sDocId = None if options.rm: doer.rm() sys.exit(0) lTrn, lTst, lRun, lFold = [ _checkFindColDir(lsDir) for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold] ] # if options.bAnnotate: # doer.annotateDocument(lTrn) # traceln('annotation done') # sys.exit(0) ## use. a_mpxml files doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish: if options.iFoldInitNum: """ initialization of a cross-validation """ splitter, ts_trn, lFilename_trn = doer._nfold_Init( lFold, options.iFoldInitNum, bStoreOnDisk=True) elif options.iFoldRunNum: """ Run one fold """ oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum, options.warm) traceln(oReport) elif options.bFoldFinish: tstReport = doer._nfold_Finish() traceln(tstReport) else: assert False, "Internal error" #no more processing!! exit(0) #------------------- if lFold: loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl) import graph.GraphModel sReportPickleFilename = os.path.join(sModelDir, sModelName + "__report.txt") traceln("Results are in %s" % sReportPickleFilename) graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename, loTstRpt) elif lTrn: doer.train_save_test(lTrn, lTst, options.warm, options.pkl) try: traceln("Baseline best estimator: %s" % doer.bsln_mdl.best_params_) #for GridSearch except: pass traceln(" --- CRF Model ---") traceln(doer.getModel().getModelInfo()) elif lTst: doer.load() tstReport = doer.test(lTst) traceln(tstReport) if lRun: if options.storeX or options.applyY: try: doer.load() except: pass #we only need the transformer lsOutputFilename = doer.runForExternalMLMethod( lRun, options.storeX, options.applyY) else: doer.load() lsOutputFilename = doer.predict(lRun) traceln("Done, see in:\n %s" % lsOutputFilename)
if __name__ == "__main__": version = "v.01" usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(sys.argv[0], version) # --- #parse the command line (options, args) = parser.parse_args() # --- try: sModelDir, sModelName = args except Exception as e: _exit(usage, 1, e) doer = DU_BL_V1(sModelName, sModelDir,'logit_5') if options.rm: doer.rm() sys.exit(0) traceln("- classes: ", DU_GRAPH.getLabelNameList()) if hasattr(options,'l_train_files') and hasattr(options,'l_test_files'): f=open(options.l_train_files) lTrn=[] for l in f: fname=l.rstrip() lTrn.append(fname)
def main_command_line(TableSkewedRowCut_CLASS): version = "v.01" usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(sys.argv[0], version) # parser.add_option("--annotate", dest='bAnnotate', action="store_true",default=False, help="Annotate the textlines with BIES labels") #FOR GCN parser.add_option("--revertEdges", dest='bRevertEdges', action="store_true", help="Revert the direction of the edges") parser.add_option("--detail", dest='bDetailedReport', action="store_true", default=False,help="Display detailed reporting (score per document)") parser.add_option("--baseline", dest='bBaseline', action="store_true", default=False, help="report baseline method") parser.add_option("--line_see_line", dest='iLineVisibility', action="store", type=int, default=GraphSkewedCut.iLineVisibility, help="seeline2line: how far in pixel can a line see another cut line?") parser.add_option("--block_see_line", dest='iBlockVisibility', action="store", type=int, default=GraphSkewedCut.iBlockVisibility, help="seeblock2line: how far in pixel can a block see a cut line?") parser.add_option("--height", dest="fCutHeight", default=GraphSkewedCut.fCutHeight , action="store", type=float, help="Minimal height of a cut") parser.add_option("--cut-above", dest='bCutAbove', action="store_true", default=False ,help="Each object defines one or several cuts above it (instead of below as by default)") parser.add_option("--angle", dest='lsAngle' , action="store", type="string", default="-1,0,+1" ,help="Allowed cutting angles, in degree, comma-separated") parser.add_option("--graph", dest='bGraph', action="store_true", help="Store the graph in the XML for displaying it") # --- #parse the command line (options, args) = parser.parse_args() if options.bGraph: import os.path # hack TableSkewedRowCut_CLASS.bCutAbove = options.bCutAbove traceln("\t%s.bCutAbove=" % TableSkewedRowCut_CLASS.__name__, TableSkewedRowCut_CLASS.bCutAbove) TableSkewedRowCut_CLASS.lRadAngle = [math.radians(v) for v in [float(s) for s in options.lsAngle.split(",")]] traceln("\t%s.lRadAngle=" % TableSkewedRowCut_CLASS.__name__, TableSkewedRowCut_CLASS.lRadAngle) for sInputFilename in args: sp, sf = os.path.split(sInputFilename) sOutFilename = os.path.join(sp, "graph-" + sf) doer = TableSkewedRowCut_CLASS("debug", "." , iBlockVisibility=options.iBlockVisibility , iLineVisibility=options.iLineVisibility , fCutHeight=options.fCutHeight , bCutAbove=options.bCutAbove , lRadAngle=[math.radians(float(s)) for s in options.lsAngle.split(",")]) o = doer.cGraphClass() o.parseDocFile(sInputFilename, 9) o.parseDocLabels() o.addParsedLabelToDom() o.addEdgaddEdgeToDoc print('Graph edges added to %s'%sOutFilename) o.doc.write(sOutFilename, encoding='utf-8',pretty_print=True,xml_declaration=True) SkewedCutAnnotator.gtStatReport() exit(0) # --- try: sModelDir, sModelName = args except Exception as e: traceln("Specify a model folder and a model name!") _exit(usage, 1, e) main(TableSkewedRowCut_CLASS, sModelDir, sModelName, options)