def __init__(self, sModelName, sModelDir, iBlockVisibility = None, iLineVisibility = None, sComment = None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): DU_ABPTableRCut.iBlockVisibility = iBlockVisibility DU_ABPTableRCut.iLineVisibility = iLineVisibility DU_CRF_Task.__init__(self , sModelName, sModelDir , dFeatureConfig = {'row_row':{} , 'row_hdr':{}, 'row_sepH':{}, 'hdr_row':{} , 'hdr_hdr':{}, 'hdr_sepH':{}, 'sepH_row':{},'sepH_hdr':{}, 'sepH_sepH':{}, 'row':{} , 'hdr':{} , 'sepH':{}} , dLearnerConfig = { 'C' : .1 if C is None else C , 'njobs' : 4 if njobs is None else njobs , 'inference_cache' : 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol' : .05 if tol is None else tol , 'save_every' : 50 #save every 50 iterations,for warm start , 'max_iter' : 10 if max_iter is None else max_iter } , sComment=sComment #,cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText ,cFeatureDefinition=My_FeatureDefinition_v2 )
def __init__(self, sModelName, sModelDir, sComment=None): DU_CRF_Task.__init__(self , sModelName, sModelDir , dFeatureConfig = { 'n_tfidf_node' : 10 , 't_ngrams_node' : (2,2) , 'b_tfidf_node_lc' : False , 'n_tfidf_edge' : 10 , 't_ngrams_edge' : (2,2) , 'b_tfidf_edge_lc' : False } , dLearnerConfig = { 'C' : .1 , 'njobs' : 2 , 'inference_cache' : 10 , 'tol' : .1 , 'save_every' : 5 #save every 50 iterations,for warm start #, 'max_iter' : 1000 , 'max_iter' : 2 } , sComment=sComment ) self.addBaseline_LogisticRegression() #use a LR model as baseline
def __init__(self, sModelName, sModelDir, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): #another way to specify the graph class # defining a getConfiguredGraphClass is preferred self.configureGraphClass(self.DU_GRAPH) DU_CRF_Task.__init__(self , sModelName, sModelDir , dLearnerConfig = { 'C' : .1 if C is None else C , 'njobs' : 8 if njobs is None else njobs , 'inference_cache' : 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol' : .05 if tol is None else tol , 'save_every' : 50 #save every 50 iterations,for warm start , 'max_iter' : 1000 if max_iter is None else max_iter } , sComment=sComment , cFeatureDefinition=FeatureDefinition_T_PageXml_StandardOnes_noText_v3 , dFeatureConfig = { #config for the extractor of nodes of each type "text": None, "sprtr": None, #config for the extractor of edges of each type "text_text": None, "text_sprtr": None, "sprtr_text": None, "sprtr_sprtr": None } ) traceln("- classes: ", self.DU_GRAPH.getLabelNameList()) self.bsln_mdl = self.addBaseline_LogisticRegression() #use a LR model trained by GridSearch as baseline
def __init__(self, sModelName, sModelDir, iBlockVisibility=None, iLineVisibility=None, fCutHeight=None, bCutAbove=None, lRadAngle=None, bTxt=None, sComment=None, cFeatureDefinition=None, dFeatureConfig={}, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): DU_ABPTableSkewedRowCutLine.iBlockVisibility = iBlockVisibility DU_ABPTableSkewedRowCutLine.iLineVisibility = iLineVisibility DU_ABPTableSkewedRowCutLine.fCutHeight = fCutHeight DU_ABPTableSkewedRowCutLine.bCutAbove = bCutAbove DU_ABPTableSkewedRowCutLine.lRadAngle = lRadAngle DU_ABPTableSkewedRowCutLine.bTxt = bTxt DU_CRF_Task.__init__( self, sModelName, sModelDir, dFeatureConfig={ 'row_row': {}, 'row_sepH': {}, 'sepH_row': {}, 'sepH_sepH': {}, 'sepH': {}, 'row': {} }, dLearnerConfig={ 'C': .1 if C is None else C, 'njobs': 4 if njobs is None else njobs, 'inference_cache': 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol': .05 if tol is None else tol, 'save_every': 50 #save every 50 iterations,for warm start , 'max_iter': 10 if max_iter is None else max_iter }, sComment=sComment #,cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText , cFeatureDefinition=My_FeatureDefinition_v3_txt if self.bTxt else My_FeatureDefinition_v3)
def __init__(self, sModelName, sModelDir, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): #NOTE: we might get a list in C tol max_iter inference_cache (in case of gridsearch) DU_CRF_Task.__init__( self, sModelName, sModelDir, DU_GRAPH, dFeatureConfig={ 'nbClass': nbClass, 't_ngrams_node': (2, 4), 'b_node_lc': False, 't_ngrams_edge': (2, 4), 'b_edge_lc': False, 'n_jobs': 5 #n_jobs when fitting the internal Logit feat extractor model by grid search }, dLearnerConfig={ 'C': .1 if C is None else C, 'njobs': 5 if njobs is None else njobs, 'inference_cache': 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol': .05 if tol is None else tol, 'save_every': 50 #save every 50 iterations,for warm start , 'max_iter': 1000 if max_iter is None else max_iter }, sComment=sComment, cFeatureDefinition=FeatureDefinition_PageXml_LogitExtractorV2) self.setNbClass( nbClass ) #so that we check if all classes are represented in the training set self.bsln_mdl = self.addBaseline_LogisticRegression( ) #use a LR model trained by GridSearch as baseline
def __init__(self, sModelName, sModelDir, iGridVisibility=None, iBlockVisibility=None, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): DU_ABPTableRG.iGridVisibility = iGridVisibility DU_ABPTableRG.iBlockVisibility = iBlockVisibility DU_CRF_Task.__init__( self, sModelName, sModelDir, dFeatureConfig={ 'row_row': {}, 'row_gh': {}, 'gh_row': {}, 'gh_gh': {}, 'gh': {}, 'row': {} }, dLearnerConfig={ 'C': .1 if C is None else C, 'njobs': 4 if njobs is None else njobs, 'inference_cache': 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol': .05 if tol is None else tol, 'save_every': 50 #save every 50 iterations,for warm start , 'max_iter': 10 if max_iter is None else max_iter }, sComment=sComment #,cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText , cFeatureDefinition=My_FeatureDefinition) self.cModelClass.setBalancedWeights(True)
def __init__(self, sModelName, sModelDir, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): DU_CRF_Task.__init__(self , sModelName, sModelDir , dFeatureConfig = { } , dLearnerConfig = { 'C' : .1 if C is None else C , 'njobs' : 4 if njobs is None else njobs , 'inference_cache' : 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol' : .05 if tol is None else tol , 'save_every' : 50 #save every 50 iterations,for warm start , 'max_iter' : 10 if max_iter is None else max_iter } , sComment=sComment #,cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText ,cFeatureDefinition=My_FeatureDefinition_v2 )
def __init__(self, sModelName, sModelDir, feat_select=None, sComment=None): if feat_select == 'chi2': DU_CRF_Task.__init__(self, sModelName, sModelDir, DU_GRAPH, dFeatureConfig=dFeatureConfig_FeatSelect, dLearnerConfig=dLearnerConfig, sComment=sComment) else: DU_CRF_Task.__init__(self, sModelName, sModelDir, DU_GRAPH, dFeatureConfig=dFeatureConfig_Baseline, dLearnerConfig=dLearnerConfig, sComment=sComment) self.addBaseline_LogisticRegression() #use a LR model as baseline
def __init__(self, sModelName, sModelDir, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): if sComment is None: sComment = sModelName DU_CRF_Task.__init__( self, sModelName, sModelDir, dFeatureConfig={}, dLearnerConfig={ 'C': .1 if C is None else C, 'njobs': 2 if njobs is None else njobs, 'inference_cache': 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol': .05 if tol is None else tol, 'save_every': 50 #save every 50 iterations,for warm start , 'max_iter': 200 if max_iter is None else max_iter }, sComment=sComment #,cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText , cFeatureDefinition=FeatureDefinition_PageXml_StandardOnes_noText_v3 ) #self.setNbClass(3) #so that we check if all classes are represented in the training set if options.bBaseline: self.bsln_mdl = self.addBaseline_LogisticRegression( ) #use a LR model trained by GridSearch as baseline
def __init__(self, sModelName, sModelDir, sComment=None, C=None, tol=None, njobs=None, max_iter=None, inference_cache=None): if self.bHTR: cFeatureDefinition = FeatureDefinition_PageXml_StandardOnes dFeatureConfig = { 'n_tfidf_node':100, 't_ngrams_node':(1,2), 'b_tfidf_node_lc':False , 'n_tfidf_edge':100, 't_ngrams_edge':(1,2), 'b_tfidf_edge_lc':False } else: cFeatureDefinition = FeatureDefinition_PageXml_StandardOnes_noText dFeatureConfig = { } #'n_tfidf_node':None, 't_ngrams_node':None, 'b_tfidf_node_lc':None #, 'n_tfidf_edge':None, 't_ngrams_edge':None, 'b_tfidf_edge_lc':None } DU_CRF_Task.__init__(self , sModelName, sModelDir , dFeatureConfig = dFeatureConfig , dLearnerConfig = { 'C' : .1 if C is None else C , 'njobs' : 16 if njobs is None else njobs , 'inference_cache' : 50 if inference_cache is None else inference_cache #, 'tol' : .1 , 'tol' : .05 if tol is None else tol , 'save_every' : 50 #save every 50 iterations,for warm start , 'max_iter' : 1000 if max_iter is None else max_iter } , sComment=sComment , cFeatureDefinition=cFeatureDefinition # , cFeatureDefinition=FeatureDefinition_T_PageXml_StandardOnes_noText # , dFeatureConfig = { # #config for the extractor of nodes of each type # "text": None, # "sprtr": None, # #config for the extractor of edges of each type # "text_text": None, # "text_sprtr": None, # "sprtr_text": None, # "sprtr_sprtr": None # } ) traceln("- classes: ", self.getGraphClass().getLabelNameList()) self.bsln_mdl = self.addBaseline_LogisticRegression() #use a LR model trained by GridSearch as baseline