'AND (gunning_fog_index >= %s AND gunning_fog_index <= %s) ' 'AND (smog_index >= %s AND smog_index <= %s) ' 'AND (coleman_liau_index >= %s AND coleman_liau_index <= %s) ' 'AND (lix >= %s AND lix <= %s) ' 'AND (rix >= %s AND rix <= %s) ' 'ORDER BY RAND() LIMIT 1000', (limits['ari'][0], limits['ari'][1], limits['flesch_reading_ease'][0], limits['flesch_reading_ease'][1], limits['flesch_kincaid_grade_level'][0], limits['flesch_kincaid_grade_level'][1], limits['gunning_fog_index'][0], limits['gunning_fog_index'][1], limits['smog_index'][0], limits['smog_index'][1], limits['coleman_liau_index'][0], limits['coleman_liau_index'][1], limits['lix'][0], limits['lix'][1], limits['rix'][0], limits['rix'][1])) seen = [] for i in indices: for j in indices: if i == j: continue key = tuple(sorted([i, j])) if key in seen: continue print(key) seen.append(key) x = [float(v[i]) for v in result] y = [float(v[j]) for v in result] graph.scatter('data/%s-%s' % (i, j), x, y, True, limits[i], limits[j], i, j)
def scatter_ab(): dot_a = ga.scatter(fpr_a[1],tpr_a[1],'A', 'o', colors[0]) dot_b = ga.scatter(fpr_b[1],tpr_b[1],'B', 'o', colors[1])
result = corpus.run_sql('SELECT * FROM comment_feature_read ' 'LEFT JOIN comment ON (comment.id=comment_feature_read.id) LEFT JOIN submission ON (submission.id=comment.submission_id) LEFT JOIN reddit ON (reddit.id=submission.reddit_id) ' 'WHERE reddit.name = \'netsec\' ' 'AND (ari >= %s AND ari <= %s) ' 'AND (flesch_reading_ease >= %s AND flesch_reading_ease <= %s) ' 'AND (flesch_kincaid_grade_level >= %s AND flesch_kincaid_grade_level <= %s) ' 'AND (gunning_fog_index >= %s AND gunning_fog_index <= %s) ' 'AND (smog_index >= %s AND smog_index <= %s) ' 'AND (coleman_liau_index >= %s AND coleman_liau_index <= %s) ' 'AND (lix >= %s AND lix <= %s) ' 'AND (rix >= %s AND rix <= %s) ' 'ORDER BY RAND() LIMIT 1000', ( limits['ari'][0], limits['ari'][1], limits['flesch_reading_ease'][0], limits['flesch_reading_ease'][1], limits['flesch_kincaid_grade_level'][0], limits['flesch_kincaid_grade_level'][1], limits['gunning_fog_index'][0], limits['gunning_fog_index'][1], limits['smog_index'][0], limits['smog_index'][1], limits['coleman_liau_index'][0], limits['coleman_liau_index'][1], limits['lix'][0], limits['lix'][1], limits['rix'][0], limits['rix'][1] )) seen = [] for i in indices: for j in indices: if i == j: continue key = tuple(sorted([i, j])) if key in seen: continue print(key) seen.append(key) x = [ float(v[i]) for v in result ] y = [ float(v[j]) for v in result ] graph.scatter('data/%s-%s' % (i, j), x, y, True, limits[i], limits[j], i, j)
fpr_a, tpr_a, auc_a, thresholds_a = ca.roc(a_predict, label) fpr_b, tpr_b, auc_b, thresholds_b= ca.roc(b_predict, label) renew(fpr_a,tpr_a,auc_a) renew(fpr_b,tpr_b,auc_b) # Get c_infos fpr_c, tpr_c, auc_c, thresholds_c = ca.roc(c_value, label) c_label = [] for value in thresholds_c: c_label.append('C ' + str(value)) # Scatter series of c for f,t, thres,color in zip(fpr_c,tpr_c,thresholds_c,colors): l = 'C: ' + str(thres) # plt.scatter(f,t,marker = '>', color = color, label = 'C: ' + str(thres)) ga.scatter(f,t,l,'>',color) # Scatter A B def scatter_ab(): dot_a = ga.scatter(fpr_a[1],tpr_a[1],'A', 'o', colors[0]) dot_b = ga.scatter(fpr_b[1],tpr_b[1],'B', 'o', colors[1]) # Convex Hullfor A B C points = np.array([fpr_c,tpr_c]).T points = np.vstack((points,np.array([fpr_a[1],tpr_a[1]]))) points = np.vstack((points,np.array([fpr_b[1],tpr_b[1]]))) points = np.vstack((points,np.array([0,0]))) points = np.vstack((points,np.array([1,1]))) hull = ConvexHull(points) for simplex in hull.simplices: plt.plot(points[simplex, 0], points[simplex, 1], 'k-')
values = [ float(v['smog_index']) for v in result ] graph.hist('data/smog_index_hist', values, 'Smog Index', 'Frequency', 'Frequency of Smog Index values') values = [ float(v['coleman_liau_index']) for v in result ] graph.hist('data/coleman_liau_index_hist', values, 'Coleman Liau Index', 'Frequency', 'Frequency of Coleman Liau Index values') values = [ float(v['lix']) for v in result ] graph.hist('data/lix_hist', values, 'LIX', 'Frequency', 'Frequency of LIX values') values = [ float(v['rix']) for v in result ] graph.hist('data/rix_hist', values, 'RIX', 'Frequency', 'Frequency of RIX values') indices = ['ari', 'flesch_reading_ease', 'flesch_kincaid_grade_level', 'gunning_fog_index', 'smog_index', 'coleman_liau_index', 'lix', 'rix'] seen = [] for i in indices: for j in indices: if i == j: continue key = tuple(sorted([i, j])) if key in seen: continue seen.append(key) x = [ float(v[i]) for v in result ] y = [ float(v[j]) for v in result ] graph.scatter('data/%s-%s' % (i, j), x, y, True, x_title=i, y_title=j)