def exc(): # 5. Test tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} print('=======================================================') print('=> Term Classifying...') if (file_model): clf = store.loadClassifier(file=file_model) else: clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = preprocess(termList['X'][i]) X = np.asarray([extractFeatureText(termList['X'][i])]) results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + ['', preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \ feature_names tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y']) if (file_model): store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result) else: store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result) return tacc
def templateFiler(clf, ptemplates): templates = [] m = {0: 'name', 1: 'address', 2: 'phone'} # ======================================= # for template in ptemplates: # dct = {} # for i, terr in zip(range(len(template)), template): # dct[m[i]] = {'term': template[i], 'score': 0} # # templates.append(dct) # ======================================= for terms in ptemplates: X = np.asarray([fe.extractFeatureText(term) for term in terms]) cls = clf.predict(X) tmp = copy.deepcopy(cls.reshape((1, cls.shape[0])).tolist()[0]) tmp.sort() if (tmp == list(range(len(terms)))): dct = {} probs = clf.predict_proba(X) for (term, cl, prob) in zip(terms, cls, probs): try: dct[m[int(cl)]] = {'term': term, 'score': prob[int(cl)]} except ValueError: dct[m[int(cl)]] = {'term': term, 'score': prob[cl]} dct['score'] = sum([log(dct[key]['score']) for key in dct]) templates.append(dct) if (len(templates) > 0): templates = sorted(templates, key=lambda k: k['score'], reverse=True) return templates
def exc(): # 5. Test tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} print('=======================================================') print('=> Term Classifying...') if (file_model): clf = store.loadClassifier(file=file_model) else: clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = preprocess(termList['X'][i]) X = np.asarray([extractFeatureText(termList['X'][i], getFeatureNames())]) y_hat = clf.predict(X)[0].tolist()[0] results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + [1 if (y_hat != termList['y'][i]) else 0, preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', 'Error', 'Preprocessed_Term'] + \ getFeatureNames() tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y']) if (file_model): store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result) else: store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result) return tacc
def test(feature_func, preprocessing_func): # 4. Test termList = store.loadTermData() print('=======================================================') print('=> Term Classifying...') # _time, templateList = sg.parseAddress(termList, feature_func, preprocessing_func) clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = eval('preprocessing(termList[\'X\'][i])') X = np.asarray([ extractFeatureText(feature_func, preprocessing_func, termList['X'][i]) ]) results.append( clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + ['', preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \ feature_names store.saveTermTestResults(titles, termList, results)
__author__ = 'Thong_Le' import libs.features as fe from libs.config import * text = 'Thon Xa Huyen' X = fe.extractFeatureText(feature_func, preprocessing_func, text) None