Python CRF примеры использования

Язык программирования: Python

Пространство имен/Пакет: CRF.CRF

Класс/Тип: CRF

Примеров на hotexamples.com: 6

Python CRF - 6 примеров найдено. Это лучшие примеры Python кода для CRF.CRF.CRF, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

CRF(2)

domaintrain(1)

predict(1)

Пример #1

Показать файл

Файл: TableDetector.py Проект: vidhyagp/TableClassification

def TrainUsingCRF(xmls, preprocessor, trainer):
    CRFImpl = CRF()
    annotatedxmllist = list()
    for xmlname in xmls:
        fontdict = preprocessor.getFontDictionary(ET.parse("../TrainingData/xmls/cs/" + xmlname + ".xml")) #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>)
        annotatedxml = trainer.readAnnotatedXml('../TrainingData/annotated/' + xmlname + "_annotated")
        annotatedxmllist.append([annotatedxml, fontdict])
    CRFImpl.domaintrain(annotatedxmllist)
    f = open("TrainedWeightsCRF", 'w')
    for weight in CRFImpl.trainedweights:
        f.write(str(weight) + "\n")
    
    f.close()

Пример #2

Показать файл

Файл: TableDetector.py Проект: vidhyagp/TableClassification

def TestUsingCRF(predictxmlname, location):
    CRF = getModelwithTrainedWeights()
    fontdict = preprocessor.getFontDictionary(ET.parse(location + predictxmlname + ".xml"))                  
    preprocessedxml = preprocessor.preprocessxml(location + predictxmlname + ".xml") #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>)
    alltables = list()
    for page in preprocessedxml:
        for col in page:
            if(len(col) < 2):
                    continue
            for lineno in xrange(len(col)):
                col[lineno].append(lineno)
            predicted = CRF.predict(col, fontdict)
            for r in predicted:
#                if(r[0] == SparseType.OTHERSPARSE):
                    print r[1].text + " *** Line no *** " + str(r[2]) + " --  " + str(r[0])
            data = postprocessor.findTables(predicted)
            tables = data
            if(len(tables) == 0):
                continue
            for t in tables:
                alltables.append(t)
    
    for table in alltables:
        print "============================================="
        for row in table:
            print row[1].text + " " + str(row[0])

Пример #3

Показать файл

Файл: TableDetector.py Проект: shriram-sridharan/TableExtraction

def TestUsingCRF(predictxmlname, location, TDsvm=None):
    CRF = getModelwithTrainedWeights()
    fontdict = preprocessor.getFontDictionary(
        ET.parse(location + predictxmlname + ".xml"))
    preprocessedxml = preprocessor.preprocessxml(
        location + predictxmlname + ".xml"
    )  #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>)

    alltables = list()
    errorcount = 0
    sparseerror = 0
    ntlafterpostproc = 0
    for page in preprocessedxml:
        for col in page:
            if (len(col) < 2):
                continue
            for tup in col:
                if (tup[1].text is None or tup[1].text.strip() == ''):
                    col.remove(tup)
            for lineno in xrange(len(col)):
                col[lineno].append(lineno)

            result = CRF.predict(col, fontdict)
            predicted = result[0]
            errorcount += result[1]
            sparseerror += result[2]
            #            for r in predicted:
            #                if(r[0] == SparseType.OTHERSPARSE):
            #                    print r[1].text.encode('ascii','ignore') + " *** Line no *** " + str(r[2])
            data = postprocessor.findTables(predicted)
            tables = data
            if (len(tables) == 0):
                continue
            for t in tables:
                alltables.append(t)

    if TDsvm is None:
        for table in alltables:
            print "============================================="
            for row in table:
                if (int(row[0]) == SparseType.NONTABLELINE):
                    ntlafterpostproc += 1
                print row[1].text.encode('ascii', 'ignore')
            print "=============================================="

    else:
        for t in alltables:
            predicted = TDsvm.domainpredictforTableDecomposition(t)
            print "=============================================="
            for r in predicted[0]:
                if (r[0] == SparseType.HEADER):
                    print r[1].text + "  ---> HEADER "
                else:
                    print r[1].text + "  ---> DATA "
            print "=============================================="

    return [errorcount, sparseerror, ntlafterpostproc]

Пример #4

Показать файл

Файл: TableDetector.py Проект: shriram-sridharan/TableExtraction

def TrainUsingCRF(xmls, preprocessor, trainer, xmlloc, annotatedxmlloc):
    CRFImpl = CRF()
    annotatedxmllist = list()
    for xmlname in xmls:
        fontdict = preprocessor.getFontDictionary(
            ET.parse(xmlloc + xmlname + ".xml")
        )  #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>)
        annotatedxml = trainer.readAnnotatedXml(annotatedxmlloc + xmlname +
                                                "_annotated")
        annotatedxmllist.append([annotatedxml, fontdict])

    CRFImpl.domaintrain(annotatedxmllist)
    print CRFImpl.trainedweights
    f = open("TrainedWeightsCRF", 'w')
    for weight in CRFImpl.trainedweights:
        f.write(str(weight) + "\n")

    f.close()

Пример #5

Показать файл

Файл: TableDetector.py Проект: shriram-sridharan/TableExtraction

def TestUsingCRF(predictxmlname, location, TDsvm = None):
    CRF = getModelwithTrainedWeights()
    fontdict = preprocessor.getFontDictionary(ET.parse(location + predictxmlname + ".xml"))                  
    preprocessedxml = preprocessor.preprocessxml(location + predictxmlname + ".xml") #list(pages), pages -> list(cols), col -> list(<Sparse/NonSparse, tag>)
    
    alltables = list()
    errorcount = 0
    sparseerror = 0
    ntlafterpostproc = 0
    for page in preprocessedxml:
        for col in page:
            if(len(col) < 2):
                    continue
            for tup in col:
                if(tup[1].text is None or tup[1].text.strip() == ''):
                    col.remove(tup)
            for lineno in xrange(len(col)):
                col[lineno].append(lineno)
            
            result = CRF.predict(col, fontdict)
            predicted = result[0]
            errorcount += result[1]
            sparseerror += result[2]
#            for r in predicted:
#                if(r[0] == SparseType.OTHERSPARSE):
#                    print r[1].text.encode('ascii','ignore') + " *** Line no *** " + str(r[2])
            data = postprocessor.findTables(predicted)
            tables = data
            if(len(tables) == 0):
                continue
            for t in tables:
                alltables.append(t)
                
    if TDsvm is None:
        for table in alltables:
            print "============================================="
            for row in table:
                if(int(row[0]) == SparseType.NONTABLELINE):
                    ntlafterpostproc += 1
                print row[1].text.encode('ascii','ignore') 
            print "=============================================="
        
    else:
        for t in alltables:
            predicted = TDsvm.domainpredictforTableDecomposition(t)
            print "=============================================="
            for r in predicted[0]:
                if(r[0] == SparseType.HEADER):
                    print r[1].text + "  ---> HEADER "
                else:
                    print r[1].text + "  ---> DATA "
            print "=============================================="
            
    return [errorcount, sparseerror, ntlafterpostproc]

Пример #6

Показать файл

Файл: TableDetector.py Проект: shriram-sridharan/TableExtraction

def getModelwithTrainedWeights(isCRF=True):
    trainedweights = list()
    if (isCRF):
        f = open("TrainedWeightsCRF", "r")
        for weight in f:
            trainedweights.append(float(weight))

        f.close()
        CRFImpl = CRF(trainedweights)
        return CRFImpl
    else:
        f = open("TrainedWeightsLR", "r")
        for weight in f:
            trainedweights.append(float(weight))

        f.close()
        LR = LogisticRegressor(trainedweights)
        return LR