def plot_time_boxplot(dataset_id): # data 1st level = technique, 2nd = list of revisions, 3rd = list of observations data = [] for i, technique_id in enumerate(technique_list): technique_results = [] history = Parser.parse_rectangles(technique_id, dataset_id) for revision in range(len(history) - 1): un_mov = Metrics.compute_unavoidable_movement( history[revision], history[revision + 1]) delta_vis = Metrics.compute_delta_vis(history[revision], history[revision + 1]) diff = 1 - (delta_vis - un_mov) technique_results.append(diff) data.append(technique_results) TimeBoxplot.plot(data, technique_list, title="Unavoidable Movement - " + dataset_id) TimeBoxplot.plot(data, technique_list, median_sorted=True, title="Unavoidable Movement - " + dataset_id)
def do_eval(self, phase): eval_batch_gen = self.pipe.batch_gen(phase=phase) eval_size, eval_n_acc = 0, 0.0 y_list = [] y_list_ = [] #eval_step=0 with torch.no_grad(): for eval_batch_dict in eval_batch_gen: # eval_step+=1 # if eval_step>3: # break eval_result = self.model(eval_batch_dict, 0, phase) eval_batch_y, eval_batch_y_ = eval_result["y"], eval_result[ "y_"] eval_batch_n_acc = metrics.n_accurate(eval_batch_y, eval_batch_y_) eval_n_acc += eval_batch_n_acc eval_size += float(eval_batch_dict["batch_size"]) y_list.extend(eval_batch_y) y_list_.extend(eval_batch_y_) y_list = torch.stack(y_list, 0).detach().cpu().numpy() y_list_ = torch.stack(y_list_, 0).detach().cpu().numpy() acc = metrics.eval_acc(eval_n_acc, eval_size) tp, fp, tn, fn = metrics.create_confusion_matrix(y_list, y_list_, True) self.train_logger.info( "eval tp = {}, fp = {}, tn = {}, fn = {}".format(tp, fp, tn, fn)) mcc = metrics.eval_mcc(tp, fp, tn, fn) res = {"acc": acc, "mcc": mcc, "size": eval_size} return res
def run_clustering_city(filepath, filename, k, eps, latitude, longitude): """ The function clusters data for a given city and draws the result obtained on the map. :param filepath: path of file .csv :param filename: name of file .csv :param k: the value of k :param eps: the value of eps :param latitude: latitude of city :param longitude: longitude of city :return: None """ d = Cluster.ClusterGreatCircles(filepath, filename) for k in [7]: for eps in [50]: c = Clustering.K_MXTGreatCircle(eps, k, d) c() m = Metrics.Modularity(c) print(f'k-MXT k={k} eps={eps} Modularity={m()}') c.cluster.view_at_map(latitude=latitude, longitude=longitude, filename_of_map=f'{k}-MXT-eps{eps}') c = Clustering.K_MXTGaussGreatCircle(eps, k, d) c() c.cluster.view_at_map(latitude=latitude, longitude=longitude, filename_of_map=f'{k}-MXTGauss-eps{eps}') m = Metrics.Modularity(c) print(f'k-MXT-Gauss k = {k} eps = {eps} Modularity = {m()}')
def plot_mean_boxplot(dataset_id): data = [] for i, technique_id in enumerate(technique_list): print(Globals.acronyms[technique_id], end=' ', flush=True) technique_data = [] history = Parser.parse_rectangles(technique_id, dataset_id) for revision in range(len(history) - 1): delta_vis = Metrics.compute_delta_vis(history[revision], history[revision + 1]) delta_data = Metrics.compute_delta_data(history[revision], history[revision + 1]) un_mov = Metrics.compute_unavoidable_movement( history[revision], history[revision + 1]) ratios = (1 - delta_vis) / (1 - delta_data) diffs = 1 - abs(delta_vis - delta_data) unavoidable = 1 - (delta_vis - un_mov) mean = (ratios + diffs + unavoidable) / 3 technique_data.append(mean) data.append(technique_data) TimeBoxplot.plot(data, technique_list, title='Mean - ' + dataset_id) TimeBoxplot.plot(data, technique_list, median_sorted=True, title='Mean - ' + dataset_id)
def evaluate_sentences(self, test_data, test_relations): gold = [] detects = [] for i, data in enumerate(test_data): if self.use_dependency_features: _, ne, _, _ = data else: _, ne, _ = data #Get all combinations of named entities: ne_combinations = map(list, itertools.product(ne, repeat=2)) disc = [ get_match(n[0], n[1], test_relations[i]) for n in ne_combinations ] gold.extend(disc) detects.append([g != 0 for g in disc]) sentence_pred = self.predict_sentences(test_data, detects) pred = list(itertools.chain(*sentence_pred)) return Metrics.precision(pred, gold, 2), Metrics.recall(pred, gold, 2), Metrics.f1( pred, gold, 2)
def process_tweet_credibility_vectors(rows): tweets = Metrics.groupby_element(rows, 1, 2, 5) score_vectors = get_credibility_scores(tweets) score_matrix = get_score_matrix(score_vectors) scores_to_csv('Output/scores_t2.csv', score_matrix) tweet_vectors = [(tweet, Metrics.get_group_vector(tweet_matrix)) for tweet, tweet_matrix in tweets] return append_rows_with_cosine(rows, 1, tweet_vectors, 2, 5)
def _loss(y_true, y_pred): embeds_ao = [] _len = 0 for i in range(len(e_len)): embed_a = y_pred[:, _len:(_len+e_len[i])] embed_o = y_pred[:, (_len+e_len[i]):(_len+(e_len[i]*2))] embeds_ao.append((embed_a, embed_o)) _len += e_len[i]*2 output_a = y_pred[:, _len:(_len+n_cls)] output_o = y_pred[:, (_len+n_cls):(_len+(n_cls*2))] true_a = y_true[:, :n_cls] true_o = y_true[:, n_cls:(n_cls*2)] def __loss(anc, oth): # _dist_l2 = Metrics.squared_l2_distance(anc, oth) """ Symmetrised Kullback and Leibler Kullback, S.; Leibler, R.A. (1951). "On information and sufficiency". Annals of Mathematical Statistics. 22 (1): 79–86. doi:10.1214/aoms/1177729694. MR 0039968. """ # _dist_kl = Metrics.kullback_leibler(anc, oth) +\ # Metrics.kullback_leibler(oth, anc) """ Squared Jensen-Shannon distance Endres, D. M.; J. E. Schindelin (2003). "A new metric for probability distributions". IEEE Trans. Inf. Theory. 49 (7): 1858–1860. doi:10.1109/TIT.2003.813506. """ _dist_js = K.sqrt(Metrics.jensen_shannon(anc, oth)) """ Squared Hellinger distance Nikulin, M.S. (2001) [1994], "Hellinger distance" in Hazewinkel, Michiel, Encyclopedia of Mathematics, Springer Science+Business Media B.V. Kluwer Academic Publishers, ISBN 978-1-55608-010-4 """ # _dist_hl = Metrics.squared_hellinger(anc, oth) _loss = \ -K.tanh(_dist_js)*K.log(K.maximum(K.tanh(_dist_js), K.epsilon())) return _loss loss = 0 for i in range(len(e_len)): loss += __loss(*embeds_ao[i]) loss += \ Metrics.cross_entropy(true_a, output_a) +\ Metrics.cross_entropy(true_o, output_o) return loss
def _loss(y_true, y_pred): output_a = y_pred[:, :(e_len)] output_p = y_pred[:, (e_len):(e_len*2)] output_n = y_pred[:, (e_len*2):(e_len*3)] loss = \ K.sqrt(Metrics.jensen_shannon(output_a, output_p)) +\ -K.log(K.tanh(K.sqrt(Metrics.jensen_shannon(output_a, output_n)))) return loss
def pearson_matrix(dataset_ids): matrix = [] for dataset_id in dataset_ids: dataset_values = [] for technique_id in technique_list: # print(Globals.acronyms[technique_id], dataset_id) history = Parser.parse_rectangles(technique_id, dataset_id) # Compute all delta_vis and delta_data values for a dataset (1 pair per cell) all_delta_data = np.array([]) all_delta_vis = np.array([]) for revision in range(len(history) - 1): delta_data = Metrics.compute_delta_data( history[revision], history[revision + 1]) all_delta_data = np.append(all_delta_data, delta_data) delta_vis = Metrics.compute_delta_vis(history[revision], history[revision + 1]) all_delta_vis = np.append(all_delta_vis, delta_vis) # Compute linear regression statistics slope, intercept, r_value, p_value, std_err = stats.linregress( all_delta_data, all_delta_vis) dataset_values.append(r_value) print(Globals.acronyms[technique_id], dataset_id, r_value) matrix.append(dataset_values) matrix = np.array(matrix).transpose() MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=True, title='Pearson') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=True, title='Pearson') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=False, title='Pearson') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=False, title='Pearson')
def evalFitness(self, predictions, expected): if self.isClassification: # use accuracy as fitness measure result = mt.Metrics().confusion_matrix(expected, predictions) fitness = result[0] else: # invert rmse to deal with maximization problem result = mt.Metrics().RootMeanSquareError(expected, predictions) fitness = -result return fitness
def CalcMCCF1(pred=None, truth=None, probCutoff=0.5, contactCutoff=8.0): if pred is None: print 'please provide a predicted contact matrix' exit(-1) if truth is None: print 'please provide a true distance matrix' exit(-1) assert pred.shape == truth.shape ## in case the matrix is not square, e.g., interfacial contact matrix seqLen = pred.shape[0] seqLen2 = pred.shape[1] pred_binary = (pred > probCutoff) truth_binary = (0 < truth) & (truth < contactCutoff) pred_truth = pred_binary * 2 + truth_binary numPredicted = np.sum(pred_binary) numTruths = np.sum(truth_binary) #print "#predicted=", numPredicted, "#natives=", numTruths mask_LR = np.triu_indices(seqLen, 24, m=seqLen2) mask_MLR = np.triu_indices(seqLen, 12, m=seqLen2) mask_SMLR = np.triu_indices(seqLen, 6, m=seqLen2) metrics = [] for mask in [mask_LR, mask_MLR, mask_SMLR]: res = pred_truth[mask] total = res.shape[0] count = np.bincount(res, minlength=4) assert (total == np.sum(count)) ## pred=0, truth=0 TN = count[0] ## pred=0, truth=1 FN = count[1] ## pred=1, truth=0 FP = count[2] ## pred=1, truth=1 TP = count[3] #print TP, FP, TN, FN MCC = Metrics.MCC(TP, FP, TN, FN) F1, precision, recall = Metrics.F1(TP, FP, TN, FN) metrics.extend([MCC, TP, FP, TN, FN, F1, precision, recall]) return np.array(metrics)
def _loss(y_true, y_pred): embeds_apn = [] for i in range(len(e_len)): _len = i * (e_len[i] * 3) embed_a = y_pred[:, _len:(_len + e_len[i])] embed_p = y_pred[:, (_len + e_len[i]):(_len + (e_len[i] * 2))] embed_n = y_pred[:, (_len + (e_len[i] * 2)):(_len + (e_len[i] * 3))] embeds_apn.append((embed_a, embed_p, embed_n)) out_len = 0 for i in range(len(e_len)): out_len += (e_len[i] * 3) output_a = y_pred[:, out_len:(out_len + n_cls)] output_p = y_pred[:, (out_len + n_cls):(out_len + (n_cls * 2))] output_n = y_pred[:, (out_len + (n_cls * 2)):(out_len + (n_cls * 3))] true_a = y_true[:, :n_cls] true_p = y_true[:, n_cls:(n_cls * 2)] true_n = y_true[:, (n_cls * 2):(n_cls * 3)] zero = K.constant(0, dtype=K.floatx()) one = K.constant(1, dtype=K.floatx()) def __loss(anc, pos, neg): pos_dist_l2 = Metrics.squared_l2_distance(anc, pos) neg_dist_l2 = Metrics.squared_l2_distance(anc, neg) pos_dist_kl = Metrics.kullback_leibler(anc, pos) +\ Metrics.kullback_leibler(pos, anc) neg_dist_kl = Metrics.kullback_leibler(anc, neg) +\ Metrics.kullback_leibler(neg, anc) _loss = \ Metrics.entropy(K.tanh(pos_dist_kl)) +\ Metrics.entropy(K.tanh(neg_dist_kl)) +\ Metrics.entropy(K.tanh(pos_dist_l2)) +\ Metrics.entropy(K.tanh(neg_dist_l2)) +\ Metrics.cross_entropy(zero, K.tanh(pos_dist_kl)) +\ Metrics.cross_entropy(one, K.tanh(neg_dist_kl)) +\ Metrics.cross_entropy(zero, K.tanh(pos_dist_l2)) +\ Metrics.cross_entropy(one, K.tanh(neg_dist_l2)) return _loss loss = 0 for i in range(len(e_len)): loss += __loss(*embeds_apn[i]) loss += \ Metrics.cross_entropy(true_a, output_a) +\ Metrics.cross_entropy(true_p, output_p) +\ Metrics.cross_entropy(true_n, output_n) return loss
def make_prediction(self): x, y, pred = self.OpticDiscPrediction() self.x = x self.y = y copy = self.currentImg.copy() drawCopy = self.currentImg.copy() drawCopy = moil.stackImageChannels(drawCopy) w, h, c = moil.getWidthHeightChannels(copy) xShift = int(80 * w / 600) yShift = int(80 * (w * 0.75) / 450) xExitShift = int(40 * w / 600) yExitShift = int(40 * (w * 0.75) / 450) roi = moil.getRegionOfInterest(copy, x, y, xShift, yShift) roiExit = moil.getRegionOfInterest(copy, x, y, xExitShift, yExitShift) atrophyRate, atrophyMap = self.AtrophyPrediction(roi) self.atrophyRate = atrophyRate self.label.configure(text="Stopień zaniku (tylko faza tętniczo-żylna): " + str(atrophyRate)) xExit, yExit = self.ExitPrediction(roiExit, xExitShift, yExitShift, x, y) self.xOut = xExit self.yOut = yExit dist = np.linalg.norm( np.asarray([xExit / w * 600, yExit / (w * 0.75) * 450]) - np.asarray([x / w * 600, y / (w * 0.75) * 450])) if dist > 16: self.labelExit.configure( text='Przesunięcie naczyń (faza tętniczo-żylna lub późna) : ' + str(dist) + ', ZNACZNE!') else: self.labelExit.configure( text='Przesunięcie naczyń (faza tętniczo-żylna lub późna) : ' + str(dist)) wA, hA, cA = moil.getWidthHeightChannels(atrophyMap) mask = np.zeros((h, w), drawCopy.dtype) mask = moil.addToRegionOfInterest(mask, x, y, round(wA / 2 + 0.00001), round(hA / 2 + 0.00001), atrophyMap) # mask[y-round(hA/2+0.00001):y+round(hA/2+0.00001), x-round(wA/2+0.00001):x+round(wA/2+0.00001)] = atrophyMap redImg = np.zeros(drawCopy.shape, drawCopy.dtype) redImg[:, :] = (255, 0, 0) redMask = cv2.bitwise_and(redImg, redImg, mask=mask) drawCopy = cv2.addWeighted(redMask, 1, drawCopy, 1, 0) # moil.show(atrophyMap) # drawCopy[mask] = (255, 0, 0) cv2.rectangle(drawCopy, (x - xShift, y - yShift), (x + xShift, y + yShift), (127, 0, 127), int(5 / 1387 * w)) cv2.circle(drawCopy, (x, y), int(12 / 1387 * w), (127, 0, 127), thickness=int(5 / 1387 * w)) met.draw(pred, drawCopy, thickness=int(4 / 1387 * w)) cv2.circle(drawCopy, (xExit, yExit), int(12 / 1387 * w), (0, 127, 0), thickness=int(5 / 1387 * w)) self.updateGuiImage(drawCopy) self.predicted = True
def unavoidable_matrix(dataset_ids): matrix = [] for dataset_id in dataset_ids: dataset_values = [] for technique_id in technique_list: history = Parser.parse_rectangles(technique_id, dataset_id) all_unavoidable = np.array([]) for revision in range(len(history) - 1): un_mov = Metrics.compute_unavoidable_movement( history[revision], history[revision + 1]) delta_vis = Metrics.compute_delta_vis(history[revision], history[revision + 1]) diff = 1 - (delta_vis - un_mov) all_unavoidable = np.append(all_unavoidable, diff.values) dataset_values.append(all_unavoidable.mean()) print(Globals.acronyms[technique_id], dataset_id, all_unavoidable.mean()) matrix.append(dataset_values) matrix = np.array(matrix).transpose() MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=True, title='Unavoidable') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=True, title='Unavoidable') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=False, title='Unavoidable') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=False, title='Unavoidable')
def testModel(name, mode, XS, YS, YS_multi): print('Model Testing Started ...', time.ctime()) print('TIMESTEP_IN, TIMESTEP_OUT', TIMESTEP_IN, TIMESTEP_OUT) XS_torch, YS_torch = torch.Tensor(XS).to(device), torch.Tensor(YS).to( device) test_data = torch.utils.data.TensorDataset(XS_torch, YS_torch) test_iter = torch.utils.data.DataLoader(test_data, BATCHSIZE, shuffle=False) model = getModel(name) model.load_state_dict(torch.load(PATH + '/' + name + '.pt')) criterion = nn.MSELoss() torch_score = evaluateModel(model, criterion, test_iter) YS_pred_multi = predictModel_multi(model, test_iter) print('YS_multi.shape, YS_pred_multi.shape,', YS_multi.shape, YS_pred_multi.shape) YS_multi, YS_pred_multi = np.squeeze(YS_multi), np.squeeze(YS_pred_multi) for i in range(YS_multi.shape[1]): YS_multi[:, i, :] = scaler.inverse_transform(YS_multi[:, i, :]) YS_pred_multi[:, i, :] = scaler.inverse_transform(YS_pred_multi[:, i, :]) print('YS_multi.shape, YS_pred_multi.shape,', YS_multi.shape, YS_pred_multi.shape) np.save(PATH + '/' + MODELNAME + '_prediction.npy', YS_pred_multi) np.save(PATH + '/' + MODELNAME + '_groundtruth.npy', YS_multi) MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS_multi, YS_pred_multi) print('*' * 40) print("%s, %s, Torch MSE, %.10e, %.10f\n" % (name, mode, torch_score, torch_score)) f = open(PATH + '/' + name + '_prediction_scores.txt', 'a') f.write("%s, %s, Torch MSE, %.10e, %.10f\n" % (name, mode, torch_score, torch_score)) print( "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (name, mode, MSE, RMSE, MAE, MAPE)) f.write( "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (name, mode, MSE, RMSE, MAE, MAPE)) for i in [2, 5, 11]: MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS_multi[:, i, :], YS_pred_multi[:, i, :]) print( "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (i, name, mode, MSE, RMSE, MAE, MAPE)) f.write( "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (i, name, mode, MSE, RMSE, MAE, MAPE)) f.close() print('Model Testing Ended ...', time.ctime())
def testModel(name, mode, XS, YS): if LOSS == "GraphWaveNetLoss": criterion = Metrics.masked_mae if LOSS == 'MSE': criterion = nn.MSELoss() if LOSS == 'MAE': criterion = nn.L1Loss() print('Model Testing Started ...', time.ctime()) print('TIMESTEP_IN, TIMESTEP_OUT', TIMESTEP_IN, TIMESTEP_OUT) XS_torch, YS_torch = torch.Tensor(XS).to(device), torch.Tensor(YS).to( device) test_data = torch.utils.data.TensorDataset(XS_torch, YS_torch) test_iter = torch.utils.data.DataLoader(test_data, BATCHSIZE, shuffle=False) model = getModel(name) model.load_state_dict(torch.load(PATH + '/' + name + '.pt')) torch_score = evaluateModel(model, criterion, test_iter) YS_pred = predictModel(model, test_iter) print('YS.shape, YS_pred.shape,', YS.shape, YS_pred.shape) YS, YS_pred = scaler.inverse_transform( np.squeeze(YS)), scaler.inverse_transform(np.squeeze(YS_pred)) print('YS.shape, YS_pred.shape,', YS.shape, YS_pred.shape) np.save(PATH + '/' + MODELNAME + '_prediction.npy', YS_pred) np.save(PATH + '/' + MODELNAME + '_groundtruth.npy', YS) MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS, YS_pred) print('*' * 40) print("%s, %s, Torch MSE, %.10e, %.10f" % (name, mode, torch_score, torch_score)) f = open(PATH + '/' + name + '_prediction_scores.txt', 'a') f.write("%s, %s, Torch MSE, %.10e, %.10f\n" % (name, mode, torch_score, torch_score)) print( "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f" % (name, mode, MSE, RMSE, MAE, MAPE)) f.write( "all pred steps, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (name, mode, MSE, RMSE, MAE, MAPE)) for i in range(TIMESTEP_OUT): MSE, RMSE, MAE, MAPE = Metrics.evaluate(YS[:, i, :], YS_pred[:, i, :]) print( "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f" % (i + 1, name, mode, MSE, RMSE, MAE, MAPE)) f.write( "%d step, %s, %s, MSE, RMSE, MAE, MAPE, %.10f, %.10f, %.10f, %.10f\n" % (i + 1, name, mode, MSE, RMSE, MAE, MAPE)) f.close() print('Model Testing Ended ...', time.ctime())
def delta_ratio_matrix(dataset_ids): matrix = [] for dataset_id in dataset_ids: dataset_values = [] for technique_id in technique_list: history = Parser.parse_rectangles(technique_id, dataset_id) all_ratios = np.array([]) for revision in range(len(history) - 1): delta_vis = Metrics.compute_delta_vis(history[revision], history[revision + 1]) delta_data = Metrics.compute_delta_data( history[revision], history[revision + 1]) ratio = (1 - delta_vis) / (1 - delta_data) all_ratios = np.append(all_ratios, ratio.values) dataset_values.append(all_ratios.mean()) print(Globals.acronyms[technique_id], dataset_id, all_ratios.mean()) matrix.append(dataset_values) matrix = np.array(matrix).transpose() MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=True, title='Delta ratio') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=True, title='Delta ratio') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=False, cell_text=False, title='Delta ratio') MatrixPlot.plot(matrix, dataset_ids, technique_list, shared_cm=True, cell_text=False, title='Delta ratio')
def get_score_matrix(score_vectors): matrix = [] for tweet_combi, vector in score_vectors: tweet_split = tweet_combi.split('/') matrix.append([tweet_split[0], (tweet_split[1], sum(vector))]) grouped = Metrics.groupby_one_element(matrix, 0, 1) table = [] for group in grouped: aggr = Metrics.groupby_one_element(group[1], 0, 1) aggr = [(id, sum(counts)) for (id, counts) in aggr] aggr.append((group[0], 'x')) table.append([group[0], aggr]) return table
def validate(model, device, dataset, batch_size=64): batches = len(dataset) model.train(False) total = 0 ground_truths = [] predictions = [] loss = 0 criterion = nn.CrossEntropyLoss() # dataset.switch_mode(training=False) # dataset.update_batchsize(batch_size) with torch.no_grad(): for data in tqdm(dataset): #data=dataset[i] X = data['data'].to(device).float() #X=torch.nn.functional.one_hot(X,num_classes=4) Y = data['labels'].to(device).long() output = model(X) del X loss += criterion(output, Y) classification_predictions = torch.argmax(output, dim=1).squeeze() for pred in classification_predictions: predictions.append(pred.cpu().numpy()) for truth in Y: ground_truths.append(truth.cpu().numpy()) del output ground_truths = np.asarray(ground_truths) torch.cuda.empty_cache() val_loss = (loss / batches).cpu() predictions = np.asarray(predictions) binary_predictions = predictions.copy() binary_predictions[binary_predictions == 2] = 1 binary_ground_truths = ground_truths.copy() binary_ground_truths[binary_ground_truths == 2] = 1 #print(predictions) #print(ground_truths) #score=metrics.cohen_kappa_score(ground_truths,predictions,weights='quadratic') val_acc = Metrics.accuracy(predictions, ground_truths) val_sens = Metrics.sensitivity(predictions, ground_truths) val_spec = Metrics.specificity(predictions, ground_truths) val_precision = precision_score(predictions, ground_truths) val_recall = recall_score(predictions, ground_truths) binary_acc = np.sum( binary_predictions == binary_ground_truths) / len(binary_ground_truths) val_f1 = f1_score(ground_truths, predictions) val_mcc = matthews_corrcoef(ground_truths, predictions) print('Accuracy: {}, Binary Accuracy: {} Val F1: {} Val Loss: {}'.format( val_acc, binary_acc, val_f1, val_loss)) return val_loss, val_acc, val_precision, val_recall, val_f1, val_mcc
def train(train_loader, model, criterion, optimizer, epoch, use_cuda): batch_time = AverageMeter() data_time = AverageMeter() losses = AverageMeter() top5 = AverageMeter() # switch to train mode model.train() end = time.time() for i, (input, target) in enumerate(train_loader): # measure data loading time data_time.update(time.time() - end) if use_cuda: target = target.cuda() input = input.cuda() input_var = torch.autograd.Variable(input) target_var = torch.autograd.Variable(target) # compute output output = model(input_var) loss = criterion(output, target_var) # measure accuracy and record loss # s_mAP = Metrics.calculate_mAP(output.data.cpu().numpy(), target_var.data.cpu().numpy()) # s_mAP = Metrics.meanAP(output.data.cpu().numpy(), target_var.data.cpu().numpy()) # prec1, prec5 = Metrics.accuracy(output.data, target, topk=(1, 5)) prec = Metrics.match_accuracy(output.data, target) top5.update(prec, input.size(0)) losses.update(loss.data[0], input.size(0)) # mAP.update(s_mAP, input.size(0)) # compute gradient and do SGD step optimizer.zero_grad() loss.backward() optimizer.step() batch_time.update(time.time() - end) end = time.time() if i % args.print_freq == 0: print( 'Epoch: [{0}][{1}/{2}]\t' 'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' 'Data {data_time.val:.3f} ({data_time.avg:.3f})\tLR {lr:.3f}\t' 'Loss {loss.val:.4f} ({loss.avg:.4f})\t' 'Prec {mAP.val:.3f} ({mAP.avg:.3f})\t'.format( epoch, i, len(train_loader), batch_time=batch_time, data_time=data_time, lr=optimizer.param_groups[0]['lr'], loss=losses, mAP=top5)) return top5.avg, losses.avg
def setupMetric(self, metric_string, dataset): currentMetric = None if metric_string == 'Constant': currentMetric = Metrics.ConstantMetric(dataset, self.rankingSize, 1.0) elif metric_string == 'Revenue': currentMetric = Metrics.Revenue(dataset, self.rankingSize) else: print("Experiment:setupMetric [ERR] Metric ", metric_string, "currently not supported.", flush=True) sys.exit(0) self.metric = currentMetric print("Experiment:setupMetric [INFO] ", metric_string, flush=True)
def __loss(anc, oth): # _dist_l2 = Metrics.squared_l2_distance(anc, oth) """ Symmetrised Kullback and Leibler Kullback, S.; Leibler, R.A. (1951). "On information and sufficiency". Annals of Mathematical Statistics. 22 (1): 79–86. doi:10.1214/aoms/1177729694. MR 0039968. """ # _dist_kl = Metrics.kullback_leibler(anc, oth) +\ # Metrics.kullback_leibler(oth, anc) """ Squared Jensen-Shannon distance Endres, D. M.; J. E. Schindelin (2003). "A new metric for probability distributions". IEEE Trans. Inf. Theory. 49 (7): 1858–1860. doi:10.1109/TIT.2003.813506. """ _dist_js = K.sqrt(Metrics.jensen_shannon(anc, oth)) """ Squared Hellinger distance Nikulin, M.S. (2001) [1994], "Hellinger distance" in Hazewinkel, Michiel, Encyclopedia of Mathematics, Springer Science+Business Media B.V. Kluwer Academic Publishers, ISBN 978-1-55608-010-4 """ # _dist_hl = Metrics.squared_hellinger(anc, oth) _loss = \ -K.tanh(_dist_js)*K.log(K.maximum(K.tanh(_dist_js), K.epsilon())) return _loss
def predict_doses(self, similarity, data): flip_args = Metrics.get_flip_args() adjacency = TJaccardModel().get_adjacency_lists( data.organ_distances, np.arange(Constants.num_organs)) normal_distances = data.tumor_distances flipped_distances = data.tumor_distances[:, flip_args] flipped_doses = data.doses[:, flip_args] dose_predictions = np.zeros( (data.get_num_patients(), Constants.num_organs)) for p1 in range(data.get_num_patients()): matches = [] for p2 in range(0, data.get_num_patients()): if p1 == p2: continue match = self.get_patient_similarity(p1, p2, normal_distances, flipped_distances, data.doses, flipped_doses, adjacency) matches.append(match) matches = sorted(matches, key=lambda x: -x[0]) n_matches = self.get_num_matches(p1, matches, data.classes) prediction = np.array([x[1] for x in matches[0:n_matches]]) weights = np.array([x[0] for x in matches[0:n_matches]]).reshape(-1, 1) if weights.mean() <= 0: print(weights, p1, [x[0] for x in matches]) dose_predictions[p1, :] = np.mean(prediction * weights, axis=0) / np.mean(weights) return (dose_predictions)
def AtrophyPrediction(self, roi): img = self.ModAtrophy.predict(roi) atrophyRate = met.atrophyRate(img) w, h, c = moil.getWidthHeightChannels(self.currentImg) img = cv2.resize(img, (round(160 * w / 600), round(160 * (w * 0.75) / 450))) img = moil.getBinaryThreshold(img) return atrophyRate, img
def tsim(self, d1, d2, adjacency): scores = [] if d2.sum() == np.inf: return 0 for organ_set in adjacency: scores.append( Metrics.jaccard_distance(d1[organ_set], d2[organ_set])) return np.mean(scores)
def feed_forward(self, input_, biases=[], weights=[]): if not biases and not weights: biases = self.biases weights = self.weights #for a input run the network and get the output activation layer unit vector. activation = input_ for b, w in zip(biases, weights): activation = mt.Metrics().sigmoid(np.dot(w, activation) + b) return activation
def performance_measure(self, predicted, labels, dataset, isClassification, k, method): mtrx = Metrics.Metrics() if (isClassification): acc, prec, recall = mtrx.confusion_matrix(labels.values, predicted) self.update_result(dataset, isClassification, k, method, acc, prec, recall, 0) else: rmse = mtrx.RootMeanSquareError(labels.values, predicted) self.update_result(dataset, isClassification, k, method, 0, 0, 0, rmse)
def find_best_move(board, history, player): if board.tostring() not in history: history[board.tostring()] = Metrics() print("Deciding best move...") run_simulations(board, history, player) boards = get_possible_moves(board, player) print(history[b.tostring()].count for b in boards) values = [history[b.tostring()].get_expected_value(player) for b in boards] return boards[np.argmax(values)]
def test_validation(self, validate_set, isClassification, epoch): #test the model with validation set and return accuracy or rmse based on classification/regression. predicted = [] label = [] if isClassification: for x, y in validate_set: predicted.append(np.argmax(self.feed_forward(x))) label.append(np.argmax(y)) acc, prec, recall = mt.Metrics().confusion_matrix(label, predicted) print('Epoch {0} completed with acc::: {1}'.format(epoch, acc)) return acc else: for x, y in validate_set: predicted.append(self.feed_forward(x)[0][0]) label.append(y) rmse = mt.Metrics().RootMeanSquareError(np.asarray(label), predicted) print('Epoch {0} completed with rmse::: {1}'.format(epoch, rmse)) return rmse
def __init__(self, sizeOfBuffer, timeout, numberOfThreads, numberOfCores, timeQuantum, contextSwitchTime, numberOfClients, randomSeed, arrivalTimeDistributionLambda, thinkTimeDistribution, serviceTimeDistribution, paramThinkTime1, paramServiceTime1, paramThinkTime2=None, paramServiceTime2=None): random.seed(randomSeed) self.eventList = EventList.EventList() Request.Request.initRequestId() self.simulationTime = 0 self.departureCount = 0 self.clients = [] for y in list(range(numberOfClients)): self.clients.append( Client.Client(y, thinkTimeDistribution, paramThinkTime1, 0, paramThinkTime2)) #0 - thinking self.requestList = RequestList.RequestList() for index in list(range(numberOfClients)): if serviceTimeDistribution == 1 or serviceTimeDistribution == 2: # Uniform or Normal distribution request = Request.Request(index, arrivalTimeDistributionLambda, serviceTimeDistribution, timeout, paramServiceTime1, paramServiceTime2) else: request = Request.Request(index, arrivalTimeDistributionLambda, serviceTimeDistribution, timeout, paramServiceTime1) self.requestList.addToRequestList(request) #print (self.requestList.requestList[index].arrivalTime) newEvent = Event.Event(self.simulationTime + request.arrivalTime, 0, request.requestId) self.eventList.enqueueEvent(newEvent) #schedule timeout of the request newEvent1 = Event.Event(self.simulationTime + request.arrivalTime + request.timeout, 4, request.requestId) #4 - timeout self.eventList.enqueueEvent(newEvent1) self.system = System.System(sizeOfBuffer, numberOfCores, numberOfThreads, timeQuantum, contextSwitchTime) self.metrics = Metrics.Metrics()
def process_documents(): '''Read From Document''' documents = Utilities.read_from_time_all() #documents = read_lines() '''Tokens and Stem Documents''' documents = Utilities.tokenize_stem_docs(documents) '''calculate doc lengths''' doc_len = Utilities.calculate_doc_len(documents) ''' term frequency''' tf = TFIDF.term_frequency(documents) '''calculates tf-idf''' tfidf = TFIDF.TFIDF(len(documents), tf) '''Read From Document''' queries = Utilities.read_from_time_que() #queries = ['pop love song', 'chinese american', 'city'] '''Tokens and Stem Documents''' queries = Utilities.tokenize_stem_docs(queries) #print Search.search_by_cosine(tfidf,len(documents),['CARTOONISTS'.lower()]) cosine_result = [] rsv_result = [] BM25_1_5 = [] #b=1 k= 0.5 BM25_1_1 = [] #b=1 k= 1 BM25_2_5 = [] #b=2 k= 0.5 BM25_2_1 = [] #b=2 k= 1 for query in queries: cosine_result.append(Search.search_by_cosine(tfidf,len(documents),query)) rsv_result.append(Search.search_by_rsv(tf,len(documents),query)) BM25_1_5.append(Search.search_by_BM25(tf,doc_len,query,1.0,0.5)) BM25_1_1.append(Search.search_by_BM25(tf,doc_len,query,1.0,1.0)) BM25_2_5.append(Search.search_by_BM25(tf,doc_len,query,2.0,0.5)) BM25_2_1.append(Search.search_by_BM25(tf,doc_len,query,2.0,1.0)) #print cosine_result[1] ''' read from time.rel ''' rel_dict = Utilities.read_from_time_rel() ''' print result ''' result = [] result.append(('System','Precision','Recall','F1','MAP')) result.append( ('cosine ',) + Metrics.getMetrics(cosine_result,rel_dict,20)) #limit to top 20 search result.append( ('RSV ',) + Metrics.getMetrics(rsv_result,rel_dict,20)) result.append(('BM25 (1, .5) ',)+ Metrics.getMetrics(BM25_1_5,rel_dict,20)) result.append(('BM25 (1, 1) ',)+Metrics.getMetrics(BM25_1_1,rel_dict,20)) result.append(('BM25 (2, .5) ',)+Metrics.getMetrics(BM25_2_5,rel_dict,20)) result.append(('BM25 (2, 1) ',)+Metrics.getMetrics(BM25_2_1,rel_dict,20)) Utilities.tabulate(result) Utilities.plot_graph(result)
def computeMetricsAverageOverFold(self): RMSE = [] AAE = [] PEARSON = [] for f in range(0,self.completedFolds): metric = Metrics(self.aT[f],self.aP[f]) metric.computeRMSE() RMSE.append(metric.RMSE) metric.computeAAE() AAE.append(metric.AAE) if len(self.aT[f]) > 1: metric.computePEARSON() PEARSON.append(metric.PEARSON) print "Fold"+str(f)+"\nRMSE:"+str(RMSE[f])+" AAE:"+str(AAE[f])+" PEARSON:"+str(PEARSON[f]) else: print "Fold"+str(f)+"\nRMSE:"+str(RMSE[f])+" AAE:"+str(AAE[f]) averageRMSE = 0.0 averagePEARSON = 0.0 averageAAE = 0.0 for f in range(0,self.completedFolds): averageRMSE += RMSE[f] averageAAE += AAE[f] averagePEARSON += PEARSON[f] averageRMSE = averageRMSE/float(self.completedFolds) averageAAE = averageAAE/float(self.completedFolds) averagePEARSON = averagePEARSON/float(self.completedFolds) stdRMSE = 0.0 stdPEARSON = 0.0 stdAAE = 0.0 for f in range(0,self.completedFolds): stdRMSE += math.pow((RMSE[f]-averageRMSE),2) stdAAE += math.pow((AAE[f]-averageAAE),2) stdPEARSON += math.pow((PEARSON[f]-averagePEARSON),2) stdRMSE = math.sqrt(stdRMSE/float(self.completedFolds)) stdAAE = math.sqrt(stdAAE/float(self.completedFolds)) stdPEARSON = math.sqrt(stdPEARSON/float(self.completedFolds)) print "FOLD AVERAGE...\nRMSE:"+str(averageRMSE)+" "+str(stdRMSE)+" AAE:"+str(averageAAE)+" "+str(stdAAE)+" PEARSON:"+str(averagePEARSON)+" "+str(stdPEARSON)
def cross_validation_score(x, y, classifier, folds = 10, class_value = 1.0): """ Creates #folds in the dataset, and then runs the <classifier> on them, computing the average recall, precision and f1 score """ if len(x) != len(y): raise Exception("Lists are not the same size") x = __ensure_np_array__(x) y = __ensure_np_array__(y) edges = cross_validation_edges(len(x), folds) recall, precision, f1_score = 0.0, 0.0, 0.0 for i in range(folds): l,r = edges[i] #Note these are numpy obj's and cannot be treated as lists td_x = np.concatenate((x[:l], x[r:])) td_y = np.concatenate((y[:l], y[r:])) vd_x = x[l:r] vd_y = y[l:r] classifier.fit(td_x, td_y) pred_y = classifier.predict(vd_x) r, p, f1 = Metrics.rpf1(vd_y, pred_y, class_value) recall += r precision += p f1_score += f1 recall = recall / folds precision = precision / folds f1_score = f1_score / folds return (recall, precision, f1_score)
def RunStacked(self, results_file, cv_folds = 10, min_word_count = 5, stem = True, lemmatize = False, remove_stop_words = True, layers = 2): #SETTINGS logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO) print "Results filename: " + results_file settings = Settings.Settings() results_dir = settings.results_directory + self.sub_dir() + "\\" fName = results_dir + results_file #TOKENIZE data = self.get_data(settings) tokenized_docs = WordTokenizer.tokenize(data.documents, min_word_count=min_word_count, stem=stem, lemmatize=lemmatize, remove_stop_words=remove_stop_words, spelling_correct=True, number_fn=NumberStrategy.collapse_num) empty_ixs = set([i for i, doc in enumerate(tokenized_docs) if len(doc) < StackedExperimentRunner.__MIN_DOC_LENGTH__]) tokenized_docs = [t for i, t in enumerate(tokenized_docs) if i not in empty_ixs] #TRAINING DATA #TODO Make this one call from docs -> td (distance_matrix, id2word) = self.get_vector_space(tokenized_docs) xs = self.get_training_data(distance_matrix, id2word) matrix_mapper = self.matrix_value_mapper() if matrix_mapper: xs = MatrixHelper.map_matrix(matrix_mapper, xs) all_results = self.get_params() + "\n" print all_results, MIN_CODE_COUNT = 3 codes = set(self.get_codes(data.sm_codes)) label_mapper = self.label_mapper() # Stop logging now logging.disable(logging.INFO) xs = ensure_np_array(xs) edges = cross_validation_edges(len(xs), cv_folds) ys_by_code = {} positive_count_by_code = {} for code in codes.copy(): ys = self.get_ys(code, data, empty_ixs, label_mapper, xs) ys_by_code[code] = ys positive_count = len([item for item in ys if item == 1]) positive_count_by_code[code] = positive_count if positive_count < MIN_CODE_COUNT: codes.remove(code) dct_td_predictions_by_fold = {} dct_vd_predictions_by_fold = {} dct_actual_by_fold = {} for layer in range(layers): print("Layer: {0}".format(layer)) vd_metrics_for_layer, td_metrics_for_layer = [], [] vd_metrics_by_code = defaultdict(lambda: []) td_metrics_by_code = defaultdict(lambda: []) for fold in range(cv_folds): l, r = edges[fold] #Note these are numpy obj's and cannot be treated as lists td_x = np.concatenate((xs[:l], xs[r:])) vd_x = xs[l:r] predictions_from_previous_layer = None if layer > 0: # Seed with an empty lists lst_td_preds = self.__extract_predictions__(codes, dct_td_predictions_by_fold[fold], td_x) td_x = np.concatenate((td_x, np.array(lst_td_preds)), 1) lst_vd_preds = self.__extract_predictions__(codes, dct_vd_predictions_by_fold[fold], vd_x) vd_x = np.concatenate((vd_x, np.array(lst_vd_preds)), 1) dct_td_predictions_per_code = {} dct_vd_predictions_per_code = {} dct_actual_per_code = {} dct_td_predictions_by_fold[fold] = dct_td_predictions_per_code dct_vd_predictions_by_fold[fold] = dct_vd_predictions_per_code dct_actual_by_fold[fold] = dct_actual_per_code class_value = self.get_class_value() for code in codes: total_codes = positive_count_by_code[code] ys = ys_by_code[code] td_y = np.concatenate((ys[:l], ys[r:])) vd_y = ys[l:r] if min(td_y) == max(td_y): val = td_y[0] td_predictions = np.array([val for y in td_y]) vd_predictions = np.array([val for y in vd_y]) else: create_classifier_func = self.create_classifier(code) classify_func = self.classify() classifier = create_classifier_func(td_x, td_y) td_predictions = classify_func(classifier, td_x) vd_predictions = classify_func(classifier, vd_x) dct_td_predictions_per_code[code] = td_predictions dct_vd_predictions_per_code[code] = vd_predictions dct_actual_per_code[code] = td_y td_r, td_p, td_f1, td_a = Metrics.rpf1a(td_y, td_predictions, class_value=class_value) vd_r, vd_p, vd_f1, vd_a = Metrics.rpf1a(vd_y, vd_predictions, class_value=class_value) vd_metric, td_metric = self.rpfa(vd_r, vd_p, vd_f1, vd_a, total_codes), \ self.rpfa(td_r, td_p, td_f1, td_a, total_codes) vd_metrics_for_layer.append(vd_metric) td_metrics_for_layer.append(td_metric) vd_metrics_by_code[code].append(vd_metric) td_metrics_by_code[code].append(td_metric) pass # End for code in codes pass #END for fold in folds for code in sorted(codes): positive_count = positive_count_by_code[code] vd_metric, td_metric = self.mean_rpfa(vd_metrics_by_code[code]), self.mean_rpfa(td_metrics_by_code[code]) results = "Code: {0} Count: {1} VD[ {2} ]\tTD[ {3} ]\n".format(code.ljust(7), str(positive_count).rjust(4), vd_metric.to_str(), td_metric.to_str()) print results, mean_vd_metrics, mean_td_metrics = self.mean_rpfa(vd_metrics_for_layer), self.mean_rpfa(td_metrics_for_layer) wt_mean_vd_metrics, wt_mean_td_metrics = self.weighted_mean_rpfa(vd_metrics_for_layer), self.weighted_mean_rpfa( td_metrics_for_layer) aggregate_results = "\n" aggregate_results += "VALIDATION DATA -\n" aggregate_results += "\tMEAN\n\t\t {0}\n".format(mean_vd_metrics.to_str(True)) aggregate_results += "\tWEIGHTED MEAN\n\t\t {0}\n".format(wt_mean_vd_metrics.to_str(True)) aggregate_results += "\n" aggregate_results += "TRAINING DATA -\n" aggregate_results += "\tMEAN\n\t\t {0}\n".format(mean_td_metrics.to_str(True)) aggregate_results += "\tWEIGHTED MEAN\n\t\t {0}\n".format(wt_mean_td_metrics.to_str(True)) print aggregate_results pass #End for layer in layers pass #End fold """ Dump results to file in case of crash """ #DUMP TO FILE """ print "Writing results to: " + fName handle = open(fName, mode="w+") handle.write(all_results) handle.close() """ #return (mean_vd_metrics, wt_mean_vd_metrics)
__author__ = 'bharathipriyaa' import Metrics total_df, coke_df, pepsi_df = Metrics.readFiles() Metrics.calculate_viewability(total_df, coke_df, pepsi_df) total_df, coke_df, pepsi_df=Metrics.calculateAdStickiness(total_df, coke_df, pepsi_df) Metrics.calculateCPM(total_df, coke_df, pepsi_df)
def computeMetricsTestFixed(self): foldLength = len(self.tT)/self.folds tTFixed = [] pTFixed = [] pTFixedAv = [] tTFixedAv = [] for i in range(0,foldLength): pTFixedAv.append(0.0) tTFixedAv.append(0.0) averageRMSE = 0.0 averageAAE = 0.0 averagePEARSON = 0.0 averageStdAAE = 0.0 for f in range(0,self.folds): first = f*foldLength last = first+foldLength tTFixed.append(self.tT[first:last]) pTFixed.append(self.pT[first:last]) for i in range(0,foldLength): pTFixedAv[i] = pTFixedAv[i]+pTFixed[f][i] tTFixedAv[i] = tTFixedAv[i]+tTFixed[f][i] metric = Metrics(tTFixed[f],pTFixed[f]) metric.computeRMSE() metric.computeAAE() if len(tTFixed) > 1: metric.computePEARSON() metric.computeStdAAE() print "Fold"+str(f)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)+" STD_R:"+str(metric.stdAAE) averageRMSE += metric.RMSE averageAAE += metric.AAE averagePEARSON += metric.PEARSON averageStdAAE += metric.stdAAE print "FOLD AVERAGE...\nRMSE:"+str(averageRMSE/float(self.folds))+" AAE:"+str(averageAAE/float(self.folds))+" PEARSON:"+str(averagePEARSON/float(self.folds))+" STD_R:"+str(averageStdAAE/float(self.folds)) else: print "Fold"+str(f)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE) pTFixedAv = [x/float(self.folds) for x in pTFixedAv] tTFixedAv = [x/float(self.folds) for x in tTFixedAv] if len(pTFixedAv) > 1: metric = Metrics(tTFixedAv,pTFixedAv) metric.computeRMSE() metric.computeAAE() metric.computePEARSON() metric.computeStdAAE() print "AVERAGE...\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)+" STD_R:"+str(metric.stdAAE)
def train(self, tokenized_docs, ys, epochs, batch_size = 500): if self.activation_fn == "tanh": def to_tanh_val(y): if y > 0: return 1 else: return -1 ys = [map(to_tanh_val, y) for y in ys] elif self.activation_fn == "sigmoid": def to_sigmoid_val(y): if y > 0: return 1 else: return 0 ys = [map(to_sigmoid_val, y) for y in ys] if not self.init: self.__init_learners__(tokenized_docs, ys) outputs = np.array(ys) num_rows = len(tokenized_docs) assert num_rows == outputs.shape[0] num_batches = num_rows / batch_size if num_rows % batch_size > 0: num_batches += 1 batch_leaf_nodes = {} for epoch in range(epochs): top_level_inputs = [] recon_errors = None cls_errors = None print "" print "EPOCH: ", epoch for batch in range(num_batches): print batch, start = batch * batch_size end = start + batch_size mini_batch_in = tokenized_docs[start:end] mini_batch_out = outputs[start:end] """ Leaf level input data will NOT change thru learning """ if batch not in batch_leaf_nodes: leaf_nodes, word_pairs, indices = self.__construct_leaf_nodes__(mini_batch_in) batch_leaf_nodes[batch] = (leaf_nodes, word_pairs, indices) else: leaf_nodes, word_pairs, indices = batch_leaf_nodes[batch] reconstruction_errors, classification_errors, top_nts = self.__train_mini_batch__(leaf_nodes, word_pairs, indices, mini_batch_out) top_level_inputs.extend(top_nts) if recon_errors == None: recon_errors = reconstruction_errors cls_errors = classification_errors else: recon_errors = np.append(recon_errors, reconstruction_errors, 0) cls_errors = np.append(cls_errors, classification_errors, 0) recon_mse = np.mean(np.square(recon_errors)) cls_mse = np.mean(np.square(classification_errors)) recon_mae = np.mean(np.abs(recon_errors)) cls_mae = np.mean(np.abs(classification_errors)) print "" print "[AE] MSE for EPOCH: " + str(recon_mse) print "[AE] MAE for EPOCH: " + str(recon_mae) print "" print "[NNet] MSE for EPOCH: " + str(cls_mse) print "[NNet] MAE for EPOCH: " + str(cls_mae) print "" a3, a2, err = self.nnet.prop_up(top_level_inputs, outputs) a3sorted = np.argsort(a3, 1) if self.activation_fn == "tanh": """ If tanh h, adjust to be [-1,1] """ a3sorted = ((2 * a3sorted) -1) expected = outputs[:,1] actual = a3sorted[:,1].flatten().tolist()[0] r,p,f1 = Metrics.rpf1(expected, actual, class_value = 1) mse = np.mean(np.square(err)) mae = np.mean(np.abs(err)) print "Top-Level Classification Results:" print "\tMSE for EPOCH: " + str(mse) print "\tMAE for EPOCH: " + str(mae) print "" print "\tRecall: " + str(r) print "\tPrecision: " + str(p) print "\tF1: " + str(f1) if epoch > 0 and epoch % 5 == 0: self.__run_classifier__(top_level_inputs, expected)
for col in range(dim): p_single = int(p[col]) a_single = int(a[col]) str_p += str(p_single) + " " str_a += str(a_single) + " " predict_single[col].append(p_single) actual_single[col].append(a_single) str_p = str_p[0:-1] str_a = str_a[0:-1] predict_complete.append(str_p) actual_complete.append(str_a) import Metrics matrix_complete = Metrics.ConfusionMatrix(predict_complete, actual_complete) matrix_single = [] for col in range(dim): matrix_single.append(Metrics.ConfusionMatrix(predict_single[col], actual_single[col])) # Hardcoded for better formatting #Metrics.printConfusionMatrix("COMPLETE", matrix_complete) #Metrics.printConfusionMatrix("EXPLORATION ORDER", matrix_single[0]) #Metrics.printConfusionMatrix("SATURATION STRATEGY", matrix_single[1]) #Metrics.printConfusionMatrix("SATURATION GRANULARITY", matrix_single[2]) # Simple printing Metrics.printConfusionMatrixCompact(matrix_complete) for col in range(dim): Metrics.printConfusionMatrixCompact(matrix_single[col]) print ""
def computeMetrics(self): if self.testFixed and self.completedFolds == self.folds: self.computeMetricsTestFixed() elif self.averageOverFold: self.computeMetricsAverageOverFold() else: for m in range(0,self.models): metric = Metrics(self.tTM[m],self.pTM[m]) metric.computeRMSE() metric.computeAAE() if len(self.tTM[m]) > 1: metric.computePEARSON() print "Model"+str(m)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON) else: print "Model"+str(m)+"\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE) metric = Metrics(self.tT,self.pT) metric.computeRMSE() metric.computeAAE() if len(self.tT) > 1: metric.computePEARSON() metric.computeStdAAE() metric.computeStd() print "AVERAGE...\nRMSE:"+str(metric.RMSE)+" AAE:"+str(metric.AAE)+" PEARSON:"+str(metric.PEARSON)
import KernelKMeans import numpy as np import Metrics import scipy.io as sio #parameters fileData = 'G:/Dropbox/Universidad/Machine Learning/Robustes/Abalone/abalone.npz' epocs = sio.loadmat('G:/Dropbox/Universidad/Machine Learning/Robustes/Abalone/parameters.mat')['epocs'] n_clusters = 3 gamma_logscale = [1,2,3,4] clustering_accuracy = np.zeros(epocs*4) calculate_purity = np.zeros(epocs*4) calculate_nmi = np.zeros(epocs*4) cont = 0 for gamma in gamma_logscale: for epocs in xrange(epocs): print epocs labels_true,labels_pred,features= KernelKMeans.get_kernelKMeans(fileData,n_clusters,normalized_axis = 0,norm='l1',gamma=2**-gamma) #print str(np.where(labels_true==0)[0].shape) + ' ' + str(np.where(labels_pred==0)[0].shape) #print str(np.where(labels_true==1)[0].shape) + ' ' + str(np.where(labels_pred==1)[0].shape) #print str(np.where(labels_true==2)[0].shape) + ' ' + str(np.where(labels_pred==2)[0].shape) clustering_accuracy[cont] = Metrics.calculate_clusteringAccuracy(labels_true,labels_pred) calculate_purity[cont],vector = Metrics.calculate_purity(labels_true,labels_pred) calculate_nmi[cont] = Metrics.calculate_nmi(labels_true,labels_pred) cont +=1 sio.savemat('results',{'clusteringAccuracy' : clustering_accuracy,'purityvec' : calculate_purity,'nmivec' : calculate_nmi})
for i in range(0, len(vector)): vector[i] = list(map(int, vector[i])) for i in range(0, len(vector_words)): vector_words[i] = list(map(int, vector_words[i])) for x in range(0, len(worker)): if worker[x] in workerdict: current_worker_annotations = workerdict[worker[x]] else: current_worker_annotations = {} current_worker_annotations[tweet[x]] = vector[x] workerdict[worker[x]] = current_worker_annotations for x in range(0, len(worker_words)): if worker_words[x] in workerdict_words: current_worker_annotations = workerdict_words[worker_words[x]] else: current_worker_annotations = {} current_worker_annotations[tweet_words[x]] = vector_words[x] workerdict_words[worker_words[x]] = current_worker_annotations # workerdict[worker[x]] = {tweet[x]:vector[x]} worker_agreement = Metrics.get_worker_agreement(workerdict) cosine = Metrics.get_cosine_similarity(workerdict) worker_unique = list(set(worker)) write_counts_to_csv('Novelty_Cosine.csv',cosine) write_counts_to_csv('Novelty_Worker_Disagreement.csv',worker_agreement)
# Test classifier testSet = DataUtils.read_dataset(directory +"Set-"+ setNo +"-validate.csv") predictionsRaw = classifier.predict(testSet.features) # Outbox labels predictions = [] predictionsExpl = [] for row in range(len(predictionsRaw)): # Convert float to int predictions.append(int(predictionsRaw[row])) # Splice int into 3 values order = predictions[row]/100 sat = (predictions[row]/10)%10 gran = predictions[row]%10 predictionsExpl.append([order, sat, gran]) # Write results to csv file results = DataUtils.ResultSet(testSet.id, predictionsExpl, testSet.labels) DataUtils.write_resultset(results, directory +"Result-"+ setNo +".csv") # Metrics of classifier import Metrics matrix_complete = Metrics.ConfusionMatrix(predictions, inboxLabels(testSet.labels)) # Readable printing #Metrics.printConfusionMatrix("COMPLETE", matrix_complete) # Simple printing Metrics.printConfusionMatrixCompact(matrix_complete) print ""
def cross_validation_score_generic(x, y, fn_create_classifier, fn_classify, folds = 10, class_value = 1.0, one_fold = False): """ Creates #folds in the dataset, and then runs the <classifier> on them, computing the average recall, precision and f1 score fn_create_classifier : a function that takes a list of training data and returns a classifier fn_classifier : a function that takes a classifier and a list of inputs and returns a list of classifications folds : Number of folds class_value : positive class value one_fold : run for one fold (for quick testing) """ if len(x) != len(y): raise Exception("Lists are not the same size") npx = __ensure_np_array__(x) npy = __ensure_np_array__(y) edges = cross_validation_edges(len(x), folds) td_recall, td_precision, td_f1_score, td_accuracy = 0.0, 0.0, 0.0, 0.0 vd_recall, vd_precision, vd_f1_score, vd_accuracy = 0.0, 0.0, 0.0, 0.0 td_tp_ix, td_fp_ix, td_fn_ix, td_tn_ix = [], [], [], [] vd_tp_ix, vd_fp_ix, vd_fn_ix, vd_tn_ix = [], [], [], [] for i in range(folds): l,r = edges[i] #Note these are numpy obj's and cannot be treated as lists td_x = np.concatenate((npx[:l], npx[r:])) td_y = np.concatenate((npy[:l], npy[r:])) vd_x = np.array(npx[l:r]) vd_y = np.array(npy[l:r]) classifier = fn_create_classifier(td_x, td_y) pred_td_y = fn_classify(classifier, td_x) td_r, td_p, td_f1, td_a, tp_ix, fp_ix, fn_ix, tn_ix, = Metrics.rpf1a_with_indices(td_y, pred_td_y, class_value) td_recall += td_r td_precision += td_p td_f1_score += td_f1 td_accuracy += td_a td_tp_ix.extend(tp_ix) td_fp_ix.extend(fp_ix) td_fn_ix.extend(fn_ix) td_tn_ix.extend(tn_ix) pred_vd_y = fn_classify(classifier, vd_x) vd_r, vd_p, vd_f1, vd_a, tp_ix, fp_ix, fn_ix, tn_ix = Metrics.rpf1a_with_indices(vd_y, pred_vd_y, class_value) vd_recall += vd_r vd_precision += vd_p vd_f1_score += vd_f1 vd_accuracy += vd_a vd_tp_ix.extend(tp_ix) vd_fp_ix.extend(fp_ix) vd_fn_ix.extend(fn_ix) vd_tn_ix.extend(tn_ix) if one_fold: folds = 1 break #Compute mean scores across all folds mean_td_recall = td_recall / folds mean_td_precision = td_precision / folds mean_td_f1_score = td_f1_score / folds mean_td_accuracy = td_accuracy / folds mean_vd_recall = vd_recall / folds mean_vd_precision = vd_precision / folds mean_vd_f1_score = vd_f1_score / folds mean_vd_accuracy = vd_accuracy / folds return \ ( mean_vd_recall, mean_vd_precision, mean_vd_f1_score, mean_vd_accuracy, mean_td_recall, mean_td_precision, mean_td_f1_score, mean_td_accuracy, # indices for different groupings vd_tp_ix, vd_fp_ix, vd_fn_ix, vd_tn_ix, td_tp_ix, td_fp_ix, td_fn_ix, td_tn_ix )
for line in worker_label_vectors_file.readlines(): info = line.strip().split('|') worker_id = info[0] label = info[1] vector = info[2:] vector = [int(i) for i in vector] if worker_id in worker_dict: cur_label_vectors = worker_dict[worker_id] else: cur_label_vectors = {} cur_label_vectors[label] = vector worker_dict[worker_id] = cur_label_vectors worker_label_vectors_file.close() #worker_agreement worker_agreement_dict = Metrics.get_worker_agreement(worker_dict) #avg_worker_sentence_score avg_worker_sentence_agreement_dict = Metrics.get_avg_worker_sentence_agreement(worker_dict) #Avg Amount of annotations per label avg_worker_annotations = {} for worker in worker_dict: total_annotations = 0 labels = worker_dict[worker] for label in labels: vector = labels[label] total_annotations += sum(vector) avg_annotations = float(total_annotations)/len(labels) avg_worker_annotations[worker] = avg_annotations