def RF_model(X, y): RF_clf = RandomForestClassifier(n_estimators=150, max_depth=10, min_samples_split=100, min_samples_leaf=100, n_jobs=-1) # 再度分割数据集, 取1/3数据用作训练集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.66, random_state=42) # 拟合模型 RF_clf.fit(X_train, y_train) # 做预测 y_pred = RF_clf.predict(X_test) print("Accuracy: {0:.3f}\n".format(accuracy_score(y_test, y_pred))) print('Kappa:', skll.kappa(y_test, y_pred)) print("\nConfusion matrix:\n{}\n".format(confusion_matrix(y_test, y_pred))) labels = np.unique(np.concatenate((y_test, y_pred))) scores_image = mglearn.tools.heatmap( confusion_matrix(y_test, y_pred), xlabel='Predicted label', ylabel='True label', xticklabels=labels, yticklabels=labels, cmap='viridis', fmt='%d') plt.title("Confusion matrix") plt.gca().invert_yaxis() print(classification_report(y_test, y_pred)) print("The training of the model ends!") return RF_clf
def kappa_score(events, data_student, data_RE): for event in events: label_student = check_label_event(event, data_student, 'student') label_RE = check_label_event(event, data_RE, 'RE') print event + '\t' + str(kappa(label_RE, label_student))
def cal_cohen_kappa_by_hit(results): # collect predicate by hit_id preds_by_hit = {} for sent in results.itervalues(): for pred in sent.predicates.itervalues(): if pred.hit_id not in preds_by_hit: preds_by_hit[pred.hit_id] = [] preds_by_hit[pred.hit_id].append(pred) kappas = [] # calculate kappa for preds in preds_by_hit.itervalues(): # get answer list for each worker worker_answers_by_hit = {worker_id: [] for worker_id in preds[0].workers.keys()} for pred in preds: for worker in pred.workers.itervalues(): worker_answers_by_hit[worker.workerid].append(worker.answer) # calculate kappa for each answer list pair worker_answers = worker_answers_by_hit.values() pairs_combinations = itertools.combinations(worker_answers, 2) for l1, l2 in pairs_combinations: kappa_value = skll.kappa(l1, l2) #, weights='quadratic') kappas.append(kappa_value) avg_kappa = sum(kappas) / (len(kappas) + 0.0) print len(preds_by_hit) print "The average Cohen's kappa of all HITs is %f." %(avg_kappa)
def kapp_sentiment_event(path_sent, path_event, sentiment, event): load_sent, load_event = load_file(path_sent, 'allTweets_ver3_sentLabel_' + sentiment + '.csv')\ , load_file(path_event, event + '.csv') list_sent, list_gt = list(), list() for i in range(0, len(load_sent)): split_sent, split_gt = load_sent[i].split('\t'), load_event[i].split('\t') label, gt = int(split_sent[0]), int(split_gt[1]) list_sent.append(label), list_gt.append(gt) print 'Kappa score of ' + sentiment + ' and ' + event + ':' + '\t' + str(kappa(list_gt, list_sent))
def Kappa(y_true, y_pred, **kwargs): if not KAGGLE: from skll import kappa #from kappa import kappa return kappa(y_true, y_pred, **kwargs)
ftTest = d.transform(test.T.to_dict().values()) msk = np.random.rand(ftData.shape[0]) < 0.8 ftTrain = ftData[msk, 0:ftData.shape[1]] ftCv = ftData[~msk, 0:ftData.shape[1]] yTrain = dataPred[msk] yCv = dataPred[~msk] clf = GradientBoostingRegressor(n_estimators=3000, max_depth=8, min_samples_split=10, min_samples_leaf=2, max_features="auto", verbose=1, random_state=1988) clf.fit(ftTrain, yTrain) trainPred = clf.predict(ftTrain) trainPred = [adjustResponse(resp) for resp in trainPred] kTrain = kappa(yTrain, trainPred, weights="quadratic") print "Ktrain : " + str(kTrain) cvPred = clf.predict(ftCv) cvPred = [adjustResponse(resp) for resp in cvPred] kCv = kappa(yCv, cvPred, weights="quadratic") print "cvPred : " + str(kCv) joblib.dump(clf, "GBMModel3000_sameCvMat/GBMModel3000_sameCvMat")
if datum['kevin']!='' and datum['amy']!='']) filtered_kevin,filtered_amy = zip(*[(int(datum['kevin']),int(datum['amy'])) for datum in filtered_data if datum['kevin']!='' and datum['amy']!='']) ratings = map(str,range(4)) kevin_amy_contingency = np.array([[len([item for item in filtered_data if item['kevin'] == kevin_rating and item['amy'] == amy_rating]) for kevin_rating in ratings] for amy_rating in ratings]) print>>f, 'Did Kevin and Amy rate differently? %s'%fisher_test(numpy2ri(kevin_amy_contingency),simulate_p_value=True,B=6000) print>>f, '---For all data------' print>>f, 'Chance agreement: %.02f'%(np.trace(contingency_table)/float(contingency_table.sum())) print>>f, "Cohen's kappa: %.02f"%skll.kappa(kevin,amy) print>>f, '---------' print>>f, '---For full fields-----' print>>f, 'Chance agreement: %.02f'%(np.trace(filtered_contingency_table)/float(filtered_contingency_table.sum())) print>>f, "Cohen's kappa: %.02f"%skll.kappa(kevin,amy) print>>f, '---------' #--Does reflection related to grade? unfiltered_distribution_grades = [float(item['exam']) for item in data if item['exam'] != ''] filtered_distribution_grades = [float(item['exam']) for item in filtered_data if item['exam'] != ''] quartiles = [(0,70),(70,80),(80,90),(90,100)] unfiltered_quartiles = [(np.percentile(unfiltered_distribution_grades,lower),np.percentile(unfiltered_distribution_grades,upper))
data = [] sentiment_r1_5_scale = [] sentiment_r2_5_scale = [] for r1, r2 in zip(sentences_r1, sentences_r2): sentiment_r1_5_scale.append(int(r1[5])) data.append((6, r1[0], r1[5])) sentiment_r2_5_scale.append(int(r2[5])) data.append((7, r2[0], r2[5])) if (r1[0] != r2[0]): print r1[0] except Exception, e: print e # disconnect from server db.close() print i print skll.kappa(sentiment_r1_5_scale, sentiment_r2_5_scale) annotation = AnnotationTask(data=data) print annotation.kappa() print annotation.alpha()
def evalerror(preds, dtrain): labels = dtrain.get_label() k = kappa(labels,preds,weights="quadratic") return 'error',k
def compute_kappa(data): print "kappa: ", print skll.kappa(data[0], data[1]) #print skll.kappa(data[0], data[1], weights='linear') print "quadratic weighted kappa: ", print skll.kappa(data[0], data[1], weights='quadratic')
# Collect the data needed for the accuracy assessment. xys = [] classes = [] with open(accuracy_fn) as fp: reader = csv.reader(fp) next(reader) for row in reader: xys.append([float(n) for n in row[:2]]) classes.append(int(row[2])) ds = gdal.Open(prediction_fn) pixel_trans = gdal.Transformer(ds, None, []) offset, ok = pixel_trans.TransformPoints(True, xys) cols, rows, z = zip(*offset) data = ds.GetRasterBand(1).ReadAsArray() sample = data[rows, cols] del ds # Compute kappa. print('Kappa:', skll.kappa(classes, sample)) # Create the confusion matrix. labels = np.unique(np.concatenate((classes, sample))) matrix = metrics.confusion_matrix(classes, sample, labels) # Add labels to the matrix and save it. matrix = np.insert(matrix, 0, labels, 0) matrix = np.insert(matrix, 0, np.insert(labels, 0, 0), 1) np.savetxt(matrix_fn, matrix, fmt='%1.0f', delimiter=',')
def Kappa(y_true, y_pred, **kwargs): from numpy import clip from skll import kappa return kappa(clip(y_true, 1, 8), clip(y_pred, 1, 8), **kwargs)
valid_df = pd.read_csv(valid_fn) class_type = ['裸地', '建筑', '农田', '林地', '工业用地', '潮滩', '稻田', '芦苇草地', '盐田', '水域'] # 获取图像坐标信息(xoff, yoff) xys = np.array(valid_df[['xoff', 'yoff']]) # 提取类别标签数据 y = np.array(valid_df['class'], dtype=int) del valid_df cols, rows = zip(*xys) del xys data = tif_ds.GetRasterBand(1).ReadAsArray() y_hat = data[rows, cols] print('Kappa:', skll.kappa(y, y_hat)) labels = np.unique(np.concatenate((y, y_hat))) matrix = confusion_matrix(y, y_hat, labels) print("\nConfusion matrix:\n{}\n".format(matrix)) scores_image = mglearn.tools.heatmap(matrix, xlabel='Predicted label', ylabel='True label', xticklabels=labels, yticklabels=labels, cmap='viridis', fmt='%d') plt.title("RF_opt_Confusion matrix") plt.gca().invert_yaxis() report = classification_report(y,