Пример #1
0
def templateFiler(clf, ptemplates):
    templates = []

    m = {0: 'name', 1: 'address', 2: 'phone'}
    ft = fe.getFeatureNames()
    dft = {key: value for (key, value) in zip(ft, range(len(ft)))}

    for terms in ptemplates:
        # preprocessedTerms = [fe.extractFeatureText(term) for term in terms]

        preprocessedTerms = []
        for term in terms:
            if (bpreprocessing):
                preprocessedTerms.append(fe.preprocess(term))

        btmp = False
        _X = []
        for term in preprocessedTerms:
            if (len(term) > 0):
                _X.append(fe.feature(term, fe.getFeatureNames()))
            else:
                btmp = True
        if (btmp):
            continue

        X = np.asarray(_X)
        cls = clf.predict(X)

        tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0])
        tmp.sort()
        if (tmp == list(range(len(terms)))):
            dct = {}
            probs = clf.predict_proba(X)
            for (term, cl, prob, prepTerm, _x) in zip(terms, cls, probs,
                                                      preprocessedTerms, _X):
                try:
                    dct[m[int(cl)]] = {
                        'term': term,
                        'score': prob[int(cl)],
                        'preprocessed': prepTerm,
                        'features': _x
                    }
                except ValueError:
                    dct[m[int(cl)]] = {
                        'term': term,
                        'score': prob[cl],
                        'preprocessed': prepTerm,
                        'features': _x
                    }
            dct['score'] = sum([log(dct[key]['score']) for key in dct])
            if (checkTemplate(dct, dft)):
                templates.append(dct)

    if (len(templates) > 0):
        templates = sorted(templates, key=lambda k: k['score'], reverse=True)

    return templates
Пример #2
0
def templateFiler(clf, ptemplates):
    templates = []

    m = {0: 'name', 1: 'address', 2: 'phone'}

    # =======================================
    # for template in ptemplates:
    #     dct = {}
    #     for i, terr in zip(range(len(template)), template):
    #         dct[m[i]] = {'term': template[i], 'score': 0}
    #
    #     templates.append(dct)
    # =======================================

    for terms in ptemplates:
        # preprocessedTerms = [fe.extractFeatureText(term) for term in terms]

        preprocessedTerms = []
        for term in terms:
            if (bpreprocessing):
                preprocessedTerms.append(fe.preprocess(term))

        _X = [fe.feature(term) for term in terms]

        X = np.asarray(_X)
        cls = clf.predict(X)

        tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0])
        tmp.sort()
        if (tmp == list(range(len(terms)))):
            dct = {}
            probs = clf.predict_proba(X)
            for (term, cl, prob, prepTerm, _x) in zip(terms, cls, probs,
                                                      preprocessedTerms, _X):
                try:
                    dct[m[int(cl)]] = {
                        'term': term,
                        'score': prob[int(cl)],
                        'preprocessed': prepTerm,
                        'features': _x
                    }
                except ValueError:
                    dct[m[int(cl)]] = {
                        'term': term,
                        'score': prob[cl],
                        'preprocessed': prepTerm,
                        'features': _x
                    }
            dct['score'] = sum([log(dct[key]['score']) for key in dct])
            templates.append(dct)

    if (len(templates) > 0):
        templates = sorted(templates, key=lambda k: k['score'], reverse=True)

    return templates
Пример #3
0
def templateFiler(clf, ptemplates):
    templates = []

    m = {0: 'name', 1: 'address', 2: 'phone'}

    # =======================================
    # for template in ptemplates:
    #     dct = {}
    #     for i, terr in zip(range(len(template)), template):
    #         dct[m[i]] = {'term': template[i], 'score': 0}
    #
    #     templates.append(dct)
    # =======================================

    for terms in ptemplates:
        # preprocessedTerms = [fe.extractFeatureText(term) for term in terms]

        preprocessedTerms = []
        for term in terms:
            if (bpreprocessing):
                preprocessedTerms.append(fe.preprocess(term))

        btmp = False
        _X = []
        for term in preprocessedTerms:
            if (len(term) > 0):
                _X.append(fe.feature(term))
            else:
                btmp = True
        if (btmp):
            continue

        X = np.asarray(_X)
        cls = clf.predict(X)

        tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0])
        tmp.sort()
        if (tmp == list(range(len(terms)))):
            dct = {}
            probs = clf.predict_proba(X)
            for (term, cl, prob, prepTerm, _x) in zip(terms, cls, probs, preprocessedTerms, _X):
                try:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[int(cl)], 'preprocessed': prepTerm, 'features': _x}
                except ValueError:
                    dct[m[int(cl)]] = {'term': term, 'score': prob[cl], 'preprocessed': prepTerm, 'features': _x}
            dct['score'] = sum([log(dct[key]['score']) for key in dct])
            templates.append(dct)

    if (len(templates) > 0):
        templates = sorted(templates, key=lambda k: k['score'], reverse=True)

    return templates
Пример #4
0
# from libs.segment import templateSegment
#
# X = templateSegment('81 Duong 16, P. Binh Tri Dong B, Q.Binh Tan, 0909218877, Dinh Thi Bich Phuong', 3)

# X = templateSegment('a,cb         b', 3)

# from libs.features import feature
#
# X = feature('nguyen thi thanh thuy')
#
# None

# from execute.test_address_segment import exc
#
# exc()
# None

from libs.features import preprocess, feature

X = feature(preprocess('(+84 )342-1+ Du.ong 16'))
None
Пример #5
0
__author__ = 'Thong_Le'

import libs.features as fe

test_1 = fe.feature(' ' + 'so le may le nhanh line lines' + ' ')

test_2 = fe.feature(' ' + 'duong so khu cong nghiep lo' + ' ')

None
Пример #6
0
def printData(text):
    print('Text : ', text)
    for i, j in zip(getFeatureNames(), feature(preprocess(text))):
        print(i, ' : ', j)
    print('---------------------------------')
Пример #7
0
# from libs.segment import templateSegment
#
# X = templateSegment('81 Duong 16, P. Binh Tri Dong B, Q.Binh Tan, 0909218877, Dinh Thi Bich Phuong', 3)

# X = templateSegment('a,cb         b', 3)

# from libs.features import feature
#
# X = feature('nguyen thi thanh thuy')
#
# None

# from execute.test_address_segment import exc
#
# exc()
# None


from libs.features import preprocess, feature

X = feature(preprocess('(+84 )342-1+ Du.ong 16'))
None