def exc():
    # 5. Test
    tmp = store.loadTermData()
    termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}

    print('=======================================================')
    print('=> Term Classifying...')

    if (file_model):
        clf = store.loadClassifier(file=file_model)
    else:
        clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = preprocess(termList['X'][i])
        X = np.asarray([extractFeatureText(termList['X'][i])])
        results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
                       ['', preprocessd_term] + X[0].tolist())

    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \
            feature_names

    tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y'])

    if (file_model):
        store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result)
    else:
        store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result)

    return tacc
示例#2
0
def exc():
    # 5. Test
    tmp = store.loadTermData()
    termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}

    print('=======================================================')
    print('=> Term Classifying...')

    if (file_model):
        clf = store.loadClassifier(file=file_model)
    else:
        clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = preprocess(termList['X'][i])
        X = np.asarray([extractFeatureText(termList['X'][i], getFeatureNames())])
        y_hat = clf.predict(X)[0].tolist()[0]
        results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
                       [1 if (y_hat != termList['y'][i]) else 0, preprocessd_term] + X[0].tolist())

    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', 'Error', 'Preprocessed_Term'] + \
            getFeatureNames()

    tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y'])

    if (file_model):
        store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result)

    else:
        store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result)

    return tacc
示例#3
0
def test(feature_func, preprocessing_func):
    # 4. Test
    termList = store.loadTermData()

    print('=======================================================')
    print('=> Term Classifying...')
    # _time, templateList = sg.parseAddress(termList, feature_func, preprocessing_func)

    clf = store.loadClassifier()
    results = []

    for i in range(len(termList['X'])):
        preprocessd_term = eval('preprocessing(termList[\'X\'][i])')
        X = np.asarray([
            extractFeatureText(feature_func, preprocessing_func,
                               termList['X'][i])
        ])
        results.append(
            clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() +
            ['', preprocessd_term] + X[0].tolist())



    titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \
            feature_names

    store.saveTermTestResults(titles, termList, results)
def exc():
    alpha = 0.05
    tt = norm.isf(alpha / 2)

    modelInfos, modelDict = store.loadAllModel()

    groups = models.groupModels(modelInfos, modelDict)

    data = [['#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Features', 'Avg_Mean_Distance', 'Avg_Var_Distance',
             'alpha', 'H0: Avg_Mean_Distance = 0']]

    tmp = store.loadTermData()
    termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}

    for i, igroup in zip(range(len(groups)), groups):
        if (len(igroup['models'].keys()) >= 2):
            feature_manager.updateFeatureList(igroup['group-info']['features'])
            _X = np.asarray([extractFeatureText(term, getFeatureNames()) for term in termList['X']])

            d, v = models.checkModelConvergence(igroup['models'], _X)
            t = d / math.sqrt(v)

            data.append([
                i,
                igroup['group-info']['learning_rate'],
                igroup['group-info']['learning_rule'],
                igroup['group-info']['n_iter'],
                str(igroup['group-info']['features']),
                d,
                v,
                alpha,
                'Accept' if abs(t) < tt else 'Reject'
            ])

    # ================================
    workbook = xlsxwriter.Workbook(folder_model + '/' + file_model_result)
    store.writeSheet(workbook.add_worksheet('original'), data)
    workbook.close()
示例#5
0
from libs import store, models
from libs.features import *
from config import *
import xlsxwriter

modelInfos, modelDict = store.loadAllModel()

groups = models.groupModels(modelInfos, modelDict)

data = [['#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Avg_Mean_Distance', 'Avg_Var_Distance',
         'alpha', 'H0: Avg_Mean_Distance = 0', 'P(distance < ' + str(delta_threshold) + ')',
         'p(Accept H0: mean_1 = mean_2)']]

tmp = store.loadTermData()
termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}
_X = np.asarray([extractFeatureText(term, getFeatureNames()) for term in termList['X']])

workbook_d = xlsxwriter.Workbook(folder_model + '/' + file_model_details)
store.writeSheet(workbook_d.add_worksheet('GroupInfo'),
    [['Group', 'Learning_Rate', 'Learning_Rule', 'n_Iter']] + \
    [[i,
      g['group-info']['learning_rate'],
      g['group-info']['learning_rule'],
      g['group-info']['n_iter']] for i, g in zip(range(len(groups)), groups)]
)

for i, igroup in zip(range(len(groups)), groups):
    results = models.checkModelConvergence(igroup['models'], _X)

    store.writeSheet(workbook_d.add_worksheet('Group' + str(i)), results['data'])
示例#6
0
from libs.features import *
from config import *
import xlsxwriter

modelInfos, modelDict = store.loadAllModel()

groups = models.groupModels(modelInfos, modelDict)

data = [[
    '#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Avg_Mean_Distance',
    'Avg_Var_Distance', 'alpha', 'H0: Avg_Mean_Distance = 0',
    'P(distance < ' + str(delta_threshold) + ')',
    'p(Accept H0: mean_1 = mean_2)'
]]

tmp = store.loadTermData()
termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]}
_X = np.asarray(
    [extractFeatureText(term, getFeatureNames()) for term in termList['X']])

workbook_d = xlsxwriter.Workbook(folder_model + '/' + file_model_details)
store.writeSheet(workbook_d.add_worksheet('GroupInfo'),
    [['Group', 'Learning_Rate', 'Learning_Rule', 'n_Iter']] + \
    [[i,
      g['group-info']['learning_rate'],
      g['group-info']['learning_rule'],
      g['group-info']['n_iter']] for i, g in zip(range(len(groups)), groups)]
                 )

for i, igroup in zip(range(len(groups)), groups):
    results = models.checkModelConvergence(igroup['models'], _X)