def exc(): # 5. Test tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} print('=======================================================') print('=> Term Classifying...') if (file_model): clf = store.loadClassifier(file=file_model) else: clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = preprocess(termList['X'][i]) X = np.asarray([extractFeatureText(termList['X'][i])]) results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + ['', preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \ feature_names tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y']) if (file_model): store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result) else: store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result) return tacc
def exc(): # 5. Test tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} print('=======================================================') print('=> Term Classifying...') if (file_model): clf = store.loadClassifier(file=file_model) else: clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = preprocess(termList['X'][i]) X = np.asarray([extractFeatureText(termList['X'][i], getFeatureNames())]) y_hat = clf.predict(X)[0].tolist()[0] results.append(clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + [1 if (y_hat != termList['y'][i]) else 0, preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', 'Error', 'Preprocessed_Term'] + \ getFeatureNames() tacc = sum([1 for (y1, y2) in zip(termList['y'], [result[0] for result in results]) if (y1 == y2)]) / len(termList['y']) if (file_model): store.saveTermTestResults(tacc, titles, termList, results, file=file_model + '_' + file_term_classify_result) else: store.saveTermTestResults(tacc, titles, termList, results, file=timeManage.getTime() + '_' + file_term_classify_result) return tacc
def test(feature_func, preprocessing_func): # 4. Test termList = store.loadTermData() print('=======================================================') print('=> Term Classifying...') # _time, templateList = sg.parseAddress(termList, feature_func, preprocessing_func) clf = store.loadClassifier() results = [] for i in range(len(termList['X'])): preprocessd_term = eval('preprocessing(termList[\'X\'][i])') X = np.asarray([ extractFeatureText(feature_func, preprocessing_func, termList['X'][i]) ]) results.append( clf.predict(X)[0].tolist() + clf.predict_proba(X)[0].tolist() + ['', preprocessd_term] + X[0].tolist()) titles = ['TestCase', 'Term', 'Label', 'Predicted Label', 'Name Score', 'Address Score', 'Phone Score', '', 'Preprocessed_Term'] + \ feature_names store.saveTermTestResults(titles, termList, results)
def exc(): alpha = 0.05 tt = norm.isf(alpha / 2) modelInfos, modelDict = store.loadAllModel() groups = models.groupModels(modelInfos, modelDict) data = [['#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Features', 'Avg_Mean_Distance', 'Avg_Var_Distance', 'alpha', 'H0: Avg_Mean_Distance = 0']] tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} for i, igroup in zip(range(len(groups)), groups): if (len(igroup['models'].keys()) >= 2): feature_manager.updateFeatureList(igroup['group-info']['features']) _X = np.asarray([extractFeatureText(term, getFeatureNames()) for term in termList['X']]) d, v = models.checkModelConvergence(igroup['models'], _X) t = d / math.sqrt(v) data.append([ i, igroup['group-info']['learning_rate'], igroup['group-info']['learning_rule'], igroup['group-info']['n_iter'], str(igroup['group-info']['features']), d, v, alpha, 'Accept' if abs(t) < tt else 'Reject' ]) # ================================ workbook = xlsxwriter.Workbook(folder_model + '/' + file_model_result) store.writeSheet(workbook.add_worksheet('original'), data) workbook.close()
from libs import store, models from libs.features import * from config import * import xlsxwriter modelInfos, modelDict = store.loadAllModel() groups = models.groupModels(modelInfos, modelDict) data = [['#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Avg_Mean_Distance', 'Avg_Var_Distance', 'alpha', 'H0: Avg_Mean_Distance = 0', 'P(distance < ' + str(delta_threshold) + ')', 'p(Accept H0: mean_1 = mean_2)']] tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} _X = np.asarray([extractFeatureText(term, getFeatureNames()) for term in termList['X']]) workbook_d = xlsxwriter.Workbook(folder_model + '/' + file_model_details) store.writeSheet(workbook_d.add_worksheet('GroupInfo'), [['Group', 'Learning_Rate', 'Learning_Rule', 'n_Iter']] + \ [[i, g['group-info']['learning_rate'], g['group-info']['learning_rule'], g['group-info']['n_iter']] for i, g in zip(range(len(groups)), groups)] ) for i, igroup in zip(range(len(groups)), groups): results = models.checkModelConvergence(igroup['models'], _X) store.writeSheet(workbook_d.add_worksheet('Group' + str(i)), results['data'])
from libs.features import * from config import * import xlsxwriter modelInfos, modelDict = store.loadAllModel() groups = models.groupModels(modelInfos, modelDict) data = [[ '#', 'Learning Rate', 'Learning Rule', 'N_Iter', 'Avg_Mean_Distance', 'Avg_Var_Distance', 'alpha', 'H0: Avg_Mean_Distance = 0', 'P(distance < ' + str(delta_threshold) + ')', 'p(Accept H0: mean_1 = mean_2)' ]] tmp = store.loadTermData() termList = {'X': [i[0] for i in tmp], 'y': [int(i[1]) for i in tmp]} _X = np.asarray( [extractFeatureText(term, getFeatureNames()) for term in termList['X']]) workbook_d = xlsxwriter.Workbook(folder_model + '/' + file_model_details) store.writeSheet(workbook_d.add_worksheet('GroupInfo'), [['Group', 'Learning_Rate', 'Learning_Rule', 'n_Iter']] + \ [[i, g['group-info']['learning_rate'], g['group-info']['learning_rule'], g['group-info']['n_iter']] for i, g in zip(range(len(groups)), groups)] ) for i, igroup in zip(range(len(groups)), groups): results = models.checkModelConvergence(igroup['models'], _X)