def evaluate(model, page_tuples, page_labels, page_num_labels): TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, True, TVDBG_P, TVDBG_L) matthews = score_matthews(TP, TN, FP, FN, TVDBG_P, TVDBG_L) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, matthews) print 'weighted average = {} matthews={}'.format(wgt_avg, matthews)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the positive score TP / (TP + FP) for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False, TVDBG_P, TVDBG_L) score = score_positive(TP, TN, FP, FN, TVDBG_P, TVDBG_L) print 'score = {}'.format(score) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, score) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False) matthews = score_matthews(TP, TN, FP, FN) print 'score = {}'.format(matthews) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, matthews) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the positive score TP / (TP + FP) for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels(sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, False) score = score_positive(TP, TN, FP, FN) print 'score = {}'.format(score) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, score) print 'weighted average = {}'.format(wgt_avg)
def main(): """ Command Line Inputs: Data file with lines: PAGE URL LABEL_1 LABEL_2 ... LABEL_n and associated text for page Input Model file with labels and array of weak learners Output the matthews correlation coefficient for each label """ model = read_model(sys.argv[2]) # print 'model has {} labels and {} tuples'.format(len(model['labels']), len(model['learners'])) page_tuples, page_labels, page_num_labels = read_data_labels( sys.argv[1], model) # print 'read {} pages'.format(len(page_labels)) TP, TN, FP, FN = score_errors(model, page_tuples, page_labels, page_num_labels, True, TVDBG_P, TVDBG_L) matthews = score_matthews(TP, TN, FP, FN, TVDBG_P, TVDBG_L) print 'score = {}'.format(matthews) page_total, page_count = get_page_count(page_labels) wgt_avg = weighted_average(page_total, page_count, matthews) print 'weighted average = {}'.format(wgt_avg)