示例#1
0
def main(DU_BAR):
    version = "v.01"
    usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(
        sys.argv[0], version)
    parser.add_option("--docid",
                      dest='docid',
                      action="store",
                      default=None,
                      help="only process docid")
    # ---
    #parse the command line
    (options, args) = parser.parse_args()

    # ---
    try:
        sModelDir, sModelName = args
    except Exception as e:
        traceln("Specify a model folder and a model name!")
        _exit(usage, 1, e)

    doer = DU_BAR(sModelName,
                  sModelDir,
                  C=options.crf_C,
                  tol=options.crf_tol,
                  njobs=options.crf_njobs,
                  max_iter=options.max_iter,
                  inference_cache=options.crf_inference_cache)

    if options.docid:
        sDocId = options.docid
    else:
        sDocId = None
    if options.rm:
        doer.rm()
        sys.exit(0)

    lTrn, lTst, lRun, lFold = [
        _checkFindColDir(lsDir)
        for lsDir in [options.lTrn, options.lTst, options.lRun, options.lFold]
    ]
    #     if options.bAnnotate:
    #         doer.annotateDocument(lTrn)
    #         traceln('annotation done')
    #         sys.exit(0)

    ## use. a_mpxml files
    doer.sXmlFilenamePattern = doer.sLabeledXmlFilenamePattern

    if options.iFoldInitNum or options.iFoldRunNum or options.bFoldFinish:
        if options.iFoldInitNum:
            """
            initialization of a cross-validation
            """
            splitter, ts_trn, lFilename_trn = doer._nfold_Init(
                lFold, options.iFoldInitNum, bStoreOnDisk=True)
        elif options.iFoldRunNum:
            """
            Run one fold
            """
            oReport = doer._nfold_RunFoldFromDisk(options.iFoldRunNum,
                                                  options.warm)
            traceln(oReport)
        elif options.bFoldFinish:
            tstReport = doer._nfold_Finish()
            traceln(tstReport)
        else:
            assert False, "Internal error"
        #no more processing!!
        exit(0)
        #-------------------

    if lFold:
        loTstRpt = doer.nfold_Eval(lFold, 3, .25, None, options.pkl)
        import graph.GraphModel
        sReportPickleFilename = os.path.join(sModelDir,
                                             sModelName + "__report.txt")
        traceln("Results are in %s" % sReportPickleFilename)
        graph.GraphModel.GraphModel.gzip_cPickle_dump(sReportPickleFilename,
                                                      loTstRpt)
    elif lTrn:
        doer.train_save_test(lTrn, lTst, options.warm, options.pkl)
        try:
            traceln("Baseline best estimator: %s" %
                    doer.bsln_mdl.best_params_)  #for GridSearch
        except:
            pass
        traceln(" --- CRF Model ---")
        traceln(doer.getModel().getModelInfo())
    elif lTst:
        doer.load()
        tstReport = doer.test(lTst)
        traceln(tstReport)

    if lRun:
        if options.storeX or options.applyY:
            try:
                doer.load()
            except:
                pass  #we only need the transformer
            lsOutputFilename = doer.runForExternalMLMethod(
                lRun, options.storeX, options.applyY)
        else:
            doer.load()
            lsOutputFilename = doer.predict(lRun)
        traceln("Done, see in:\n  %s" % lsOutputFilename)
示例#2
0


if __name__ == "__main__":

    version = "v.01"
    usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(sys.argv[0], version)

    # ---
    #parse the command line
    (options, args) = parser.parse_args()
    # ---
    try:
        sModelDir, sModelName = args
    except Exception as e:
        _exit(usage, 1, e)

    doer = DU_BL_V1(sModelName, sModelDir,'logit_5')

    if options.rm:
        doer.rm()
        sys.exit(0)

    traceln("- classes: ", DU_GRAPH.getLabelNameList())

    if hasattr(options,'l_train_files') and hasattr(options,'l_test_files'):
        f=open(options.l_train_files)
        lTrn=[]
        for l in f:
            fname=l.rstrip()
            lTrn.append(fname)
def main_command_line(TableSkewedRowCut_CLASS):        
    version = "v.01"
    usage, description, parser = DU_CRF_Task.getBasicTrnTstRunOptionParser(sys.argv[0], version)
#     parser.add_option("--annotate", dest='bAnnotate',  action="store_true",default=False,  help="Annotate the textlines with BIES labels")    

    #FOR GCN
    parser.add_option("--revertEdges", dest='bRevertEdges',  action="store_true", help="Revert the direction of the edges") 
    parser.add_option("--detail", dest='bDetailedReport',  action="store_true", default=False,help="Display detailed reporting (score per document)") 
    parser.add_option("--baseline", dest='bBaseline',  action="store_true", default=False, help="report baseline method") 
    parser.add_option("--line_see_line", dest='iLineVisibility',  action="store",
                      type=int, default=GraphSkewedCut.iLineVisibility,
                      help="seeline2line: how far in pixel can a line see another cut line?") 
    parser.add_option("--block_see_line", dest='iBlockVisibility',  action="store",
                      type=int, default=GraphSkewedCut.iBlockVisibility,
                      help="seeblock2line: how far in pixel can a block see a cut line?") 
    parser.add_option("--height", dest="fCutHeight", default=GraphSkewedCut.fCutHeight
                      , action="store", type=float, help="Minimal height of a cut") 
    parser.add_option("--cut-above", dest='bCutAbove',  action="store_true", default=False
                        ,help="Each object defines one or several cuts above it (instead of below as by default)") 
    parser.add_option("--angle", dest='lsAngle'
                      ,  action="store", type="string", default="-1,0,+1"
                        ,help="Allowed cutting angles, in degree, comma-separated") 

    parser.add_option("--graph", dest='bGraph',  action="store_true", help="Store the graph in the XML for displaying it") 
            
    # --- 
    #parse the command line
    (options, args) = parser.parse_args()

    if options.bGraph:
        import os.path
        # hack
        TableSkewedRowCut_CLASS.bCutAbove = options.bCutAbove
        traceln("\t%s.bCutAbove=" % TableSkewedRowCut_CLASS.__name__, TableSkewedRowCut_CLASS.bCutAbove)
        TableSkewedRowCut_CLASS.lRadAngle = [math.radians(v) for v in [float(s) for s in options.lsAngle.split(",")]]
        traceln("\t%s.lRadAngle=" % TableSkewedRowCut_CLASS.__name__, TableSkewedRowCut_CLASS.lRadAngle)
        for sInputFilename in args:
            sp, sf = os.path.split(sInputFilename)
            sOutFilename = os.path.join(sp, "graph-" + sf)
            doer = TableSkewedRowCut_CLASS("debug", "."
                                           , iBlockVisibility=options.iBlockVisibility
                                           , iLineVisibility=options.iLineVisibility
                                           , fCutHeight=options.fCutHeight
                                           , bCutAbove=options.bCutAbove
                                           , lRadAngle=[math.radians(float(s)) for s in options.lsAngle.split(",")])
            o = doer.cGraphClass()
            o.parseDocFile(sInputFilename, 9)
            o.parseDocLabels()
            o.addParsedLabelToDom()
            o.addEdgaddEdgeToDoc         print('Graph edges added to %s'%sOutFilename)
            o.doc.write(sOutFilename, encoding='utf-8',pretty_print=True,xml_declaration=True)
        SkewedCutAnnotator.gtStatReport()
        exit(0)
    
    # --- 
    try:
        sModelDir, sModelName = args
    except Exception as e:
        traceln("Specify a model folder and a model name!")
        _exit(usage, 1, e)
    
    main(TableSkewedRowCut_CLASS, sModelDir, sModelName, options)