print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = test_data_gen.next(return_y_true=True) (x, y_processed, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join("test_predictions", os.path.basename( args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
print("\n ================= 48h mortality ================") ihm_pred = np.array(ihm_pred) ihm_ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) # decomp if args.decomp_C > 0: print("\n ================ decompensation ================") decomp_pred = np.array(decomp_pred) decomp_ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) # los if args.los_C > 0: print("\n ================ length of stay ================") if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) # pheno if args.pheno_C > 0: print("\n =================== phenotype ==================") pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print("Saving the predictions in test_predictions/task directories ...") # ihm
def calc_metrics(self, data_gen, history, dataset, logs): ihm_y_true = [] decomp_y_true = [] los_y_true = [] pheno_y_true = [] ihm_pred = [] decomp_pred = [] los_pred = [] pheno_pred = [] for i in range(data_gen.steps): if self.verbose == 1: print("\tdone {}/{}".format(i, data_gen.steps), end='\r') (X, y, los_y_reg) = data_gen.next(return_y_true=True) outputs = self.model.predict(X, batch_size=self.batch_size) ihm_M = X[1] decomp_M = X[2] los_M = X[3] if not data_gen.target_repl: # no target replication (ihm_p, decomp_p, los_p, pheno_p) = outputs (ihm_t, decomp_t, los_t, pheno_t) = y else: # target replication (ihm_p, _, decomp_p, los_p, pheno_p, _) = outputs (ihm_t, _, decomp_t, los_t, pheno_t, _) = y los_t = los_y_reg # real value not the label # ihm for (m, t, p) in zip(ihm_M.flatten(), ihm_t.flatten(), ihm_p.flatten()): if np.equal(m, 1): ihm_y_true.append(t) ihm_pred.append(p) # decomp for (m, t, p) in zip(decomp_M.flatten(), decomp_t.flatten(), decomp_p.flatten()): if np.equal(m, 1): decomp_y_true.append(t) decomp_pred.append(p) # los if los_p.shape[-1] == 1: # regression for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.flatten()): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) else: # classification for (m, t, p) in zip(los_M.flatten(), los_t.flatten(), los_p.reshape((-1, 10))): if np.equal(m, 1): los_y_true.append(t) los_pred.append(p) # pheno for (t, p) in zip(pheno_t.reshape((-1, 25)), pheno_p.reshape((-1, 25))): pheno_y_true.append(t) pheno_pred.append(p) print('\n') # ihm print("\n ================= 48h mortality ================") ihm_pred = np.array(ihm_pred) ihm_pred = np.stack([1 - ihm_pred, ihm_pred], axis=1) ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) for k, v in ret.items(): logs[dataset + '_ihm_' + k] = v # decomp print("\n ================ decompensation ================") decomp_pred = np.array(decomp_pred) decomp_pred = np.stack([1 - decomp_pred, decomp_pred], axis=1) ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) for k, v in ret.items(): logs[dataset + '_decomp_' + k] = v # los print("\n ================ length of stay ================") if self.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if self.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if self.partition == 'none': ret = metrics.print_metrics_regression(los_y_true, los_pred) for k, v in ret.items(): logs[dataset + '_los_' + k] = v # pheno print("\n =================== phenotype ==================") pheno_pred = np.array(pheno_pred) ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) for k, v in ret.items(): logs[dataset + '_pheno_' + k] = v history.append(logs)
print "\n ================= 48h mortality ================" ihm_pred = np.array(ihm_pred) ihm_ret = metrics.print_metrics_binary(ihm_y_true, ihm_pred) # decomp if args.decomp_C > 0: print "\n ================ decompensation ================" decomp_pred = np.array(decomp_pred) decomp_ret = metrics.print_metrics_binary(decomp_y_true, decomp_pred) # los if args.los_C > 0: print "\n ================ length of stay ================" if args.partition == 'log': los_pred = [metrics.get_estimate_log(x, 10) for x in los_pred] los_ret = metrics.print_metrics_log_bins(los_y_true, los_pred) if args.partition == 'custom': los_pred = [metrics.get_estimate_custom(x, 10) for x in los_pred] los_ret = metrics.print_metrics_custom_bins(los_y_true, los_pred) if args.partition == 'none': los_ret = metrics.print_metrics_regression(los_y_true, los_pred) # pheno if args.pheno_C > 0: print "\n =================== phenotype ==================" pheno_pred = np.array(pheno_pred) pheno_ret = metrics.print_metrics_multilabel(pheno_y_true, pheno_pred) print "Saving the predictions in test_predictions/task directories ..." # ihm
x = torch.tensor(x, dtype=torch.float).to(device) pred = model(x) pred = pred.cpu().data.numpy() if isinstance(x, list) and len(x) == 2: # deep supervision pass else: if pred.shape[-1] == 1: y_true += list(y.flatten()) predictions += list(pred.flatten()) else: y_true += list(y) predictions += list(pred) print('\n') if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] ret = metrics.print_metrics_log_bins(y_true, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] ret = metrics.print_metrics_custom_bins(y_true, predictions) if args.partition == 'none': ret = metrics.print_metrics_regression(y_true, predictions) cur_val = ret['mse'] scheduler.step(cur_val) current_lr = optimizer.param_groups[0]['lr'] if current_lr < 1e-5: with open(os.path.join(save_path, 'log.txt'), 'a') as fout: print('Early stop at step {}'.format(step), file=fout) exit() with open(os.path.join(save_path, 'log.txt'), 'a') as fout:
def process_one_chunk(mode, chunk_index): assert (mode == "train" or mode == "test") if (mode == "train"): reader = train_reader if (mode == "test"): reader = val_reader (data, ts, ys, header) = utils.read_chunk(reader, chunk_size) data = utils.preprocess_chunk(data, ts, discretizer, normalizer) if (mode == "train"): network.set_datasets((data, ys), None) if (mode == "test"): network.set_datasets(None, (data, ys)) network.shuffle_train_set() y_true = [] predictions = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() n_batches = network.get_batches_per_epoch(mode) for i in range(0, n_batches): step_data = network.step(mode) prediction = step_data["prediction"] answers = step_data["answers"] current_loss = step_data["current_loss"] current_loss_mse = step_data["loss_mse"] current_loss_reg = step_data["loss_reg"] log = step_data["log"] avg_loss += current_loss sum_loss += current_loss for x in answers: y_true.append(x) for x in prediction: predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" %sing: %d.%d / %d \t loss: %.3f = %.3f + %.3f \t avg_loss: %.3f \t"\ "%s \t time: %.2fs" % (mode, chunk_index, i * args.batch_size, n_batches * args.batch_size, current_loss, current_loss_mse, current_loss_reg, avg_loss / args.log_every, log, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n %s loss = %.5f" % (mode, sum_loss) if args.network in ['lstm', 'lstm_log']: metrics.print_metrics_regression(y_true, predictions) if args.network == 'lstm_cf_log': metrics.print_metrics_log_bins(y_true, predictions) if args.network == 'lstm_cf_custom': metrics.print_metrics_custom_bins(y_true, predictions) return sum_loss
predictions.append(x) if ((i + 1) % args.log_every == 0): cur_time = time.time() print (" testing: %d / %d \t loss: %.3f \t avg_loss: %.3f \t"\ " time: %.2fs" % ((i+1) * args.batch_size, n_batches * args.batch_size, current_loss, avg_loss / args.log_every, cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= n_batches print "\n test loss = %.5f" % sum_loss if args.network in ['lstm', 'lstm_log']: metrics.print_metrics_regression(y_true, predictions) if args.network == 'lstm_cf_log': metrics.print_metrics_log_bins(y_true, predictions) if args.network == 'lstm_cf_custom': metrics.print_metrics_custom_bins(y_true, predictions) with open("activations.txt", "w") as fout: fout.write("prediction, y_true") for (x, y) in zip(predictions, y_true): fout.write("%.6f, %.6f\n" % (x, y)) else: raise Exception("unknown mode")
for i in range(test_data_gen.steps): print "\rpredicting {} / {}".format(i, test_data_gen.steps), ret = test_data_gen.next(return_y_true=True) (x, y_processed, y) = ret["data"] cur_names = ret["names"] cur_ts = ret["ts"] x = np.array(x) pred = model.predict_on_batch(x) predictions += list(pred) labels += list(y) names += list(cur_names) ts += list(cur_ts) if args.partition == 'log': predictions = [metrics.get_estimate_log(x, 10) for x in predictions] metrics.print_metrics_log_bins(labels, predictions) if args.partition == 'custom': predictions = [metrics.get_estimate_custom(x, 10) for x in predictions] metrics.print_metrics_custom_bins(labels, predictions) if args.partition == 'none': metrics.print_metrics_regression(labels, predictions) predictions = [x[0] for x in predictions] path = os.path.join("test_predictions", os.path.basename(args.load_state)) + ".csv" utils.save_results(names, ts, predictions, labels, path) else: raise ValueError("Wrong value for args.mode")
def do_epoch(mode, epoch): # mode is 'train' or 'test' ihm_predictions = [] ihm_answers = [] los_predictions = [] los_answers = [] ph_predictions = [] ph_answers = [] decomp_predictions = [] decomp_answers = [] avg_loss = 0.0 sum_loss = 0.0 prev_time = time.time() batches_per_epoch = network.get_batches_per_epoch(mode) for i in range(0, batches_per_epoch): step_data = network.step(mode) ihm_pred = step_data["ihm_prediction"] los_pred = step_data["los_prediction"] ph_pred = step_data["ph_prediction"] decomp_pred = step_data["decomp_prediction"] current_loss = step_data["loss"] ihm_loss = step_data["ihm_loss"] los_loss = step_data["los_loss"] ph_loss = step_data["ph_loss"] decomp_loss = step_data["decomp_loss"] reg_loss = step_data["reg_loss"] data = step_data["data"] ihm_data = data[1] ihm_mask = [x[1] for x in ihm_data] ihm_label = [x[2] for x in ihm_data] los_data = data[2] los_mask = [x[0] for x in los_data] los_label = [x[1] for x in los_data] ph_data = data[3] ph_label = ph_data decomp_data = data[4] decomp_mask = [x[0] for x in decomp_data] decomp_label = [x[1] for x in decomp_data] avg_loss += current_loss sum_loss += current_loss for (x, mask, y) in zip(ihm_pred, ihm_mask, ihm_label): if (mask == 1): ihm_predictions.append(x) ihm_answers.append(y) for (sx, smask, sy) in zip(los_pred, los_mask, los_label): for (x, mask, y) in zip(sx, smask, sy): if (mask == 1): los_predictions.append(x) los_answers.append(y) for (x, y) in zip(ph_pred, ph_label): ph_predictions.append(x) ph_answers.append(y) for (sx, smask, sy) in zip(decomp_pred, decomp_mask, decomp_label): for (x, mask, y) in zip(sx, smask, sy): if (mask == 1): decomp_predictions.append(x) decomp_answers.append(y) if ((i + 1) % args.log_every == 0): cur_time = time.time() print " {}ing {}.{} / {} loss: {:8.4f} = {:1.2f} + {:8.2f} + {:1.2f} + "\ "{:1.2f} + {:.2f} avg_loss: {:6.4f} time: {:6.4f}".format( mode, epoch, i * args.batch_size, batches_per_epoch * args.batch_size, float(current_loss), float(ihm_loss), float(los_loss), float(ph_loss), float(decomp_loss), float(reg_loss), float(avg_loss / args.log_every), float(cur_time - prev_time)) avg_loss = 0 prev_time = cur_time if np.isnan(current_loss): print "loss: {:6.4f} = {:1.2f} + {:8.2f} + {:1.2f} + {:1.2f} + {:.2f}".format( float(current_loss), float(ihm_loss), float(los_loss), float(ph_loss), float(decomp_loss), float(reg_loss)) raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= batches_per_epoch print "\n %s loss = %.5f" % (mode, sum_loss) eps = 1e-13 if args.ihm_C > eps: print "\n ================= 48h mortality ================" metrics.print_metrics_binary(ihm_answers, ihm_predictions) if args.los_C > eps: print "\n ================ length of stay ================" if args.partition == 'log': metrics.print_metrics_log_bins(los_answers, los_predictions) else: metrics.print_metrics_custom_bins(los_answers, los_predictions) if args.ph_C > eps: print "\n =================== phenotype ==================" metrics.print_metrics_multilabel(ph_answers, ph_predictions) if args.decomp_C > eps: print "\n ================ decompensation ================" metrics.print_metrics_binary(decomp_answers, decomp_predictions) return sum_loss
float(current_loss), float(ihm_loss), float(los_loss), float(ph_loss), float(decomp_loss), float(reg_loss)) raise Exception("current loss IS NaN. This should never happen :)") sum_loss /= batches_per_epoch print "\n %s loss = %.5f" % (args.mode, sum_loss) eps = 1e-13 if args.ihm_C > eps: print "\n ================= 48h mortality ================" metrics.print_metrics_binary(ihm_answers, ihm_predictions) if args.los_C > eps: print "\n ================ length of stay ================" if args.partition == 'log': metrics.print_metrics_log_bins(los_answers, los_predictions) else: metrics.print_metrics_custom_bins(los_answers, los_predictions) if args.ph_C > eps: print "\n =================== phenotype ==================" metrics.print_metrics_multilabel(ph_answers, ph_predictions) if args.decomp_C > eps: print "\n ================ decompensation ================" metrics.print_metrics_binary(decomp_answers, decomp_predictions) with open("los_activations.txt", "w") as fout: fout.write("prediction, y_true") for (x, y) in zip(los_predictions, los_answers): fout.write("%.6f, %.6f\n" % (x, y))