def experiment(exp_name, device, eval_range='all', plot=True): config, _, _, _ = load_config(exp_name) net, loss_fn = build_model(config, device, train=False) state_dict = torch.load(get_model_name(config), map_location=device) if config['mGPUs']: net.module.load_state_dict(state_dict) else: net.load_state_dict(state_dict) train_loader, val_loader = get_data_loader( config['batch_size'], config['use_npy'], geometry=config['geometry'], frame_range=config['frame_range']) #Train Set train_metrics, train_precisions, train_recalls, _ = eval_batch( config, net, loss_fn, train_loader, device, eval_range) print("Training mAP", train_metrics['AP']) fig_name = "PRCurve_train_" + config['name'] legend = "AP={:.1%} @IOU=0.5".format(train_metrics['AP']) plot_pr_curve(train_precisions, train_recalls, legend, name=fig_name) # Val Set val_metrics, val_precisions, val_recalls, _ = eval_batch( config, net, loss_fn, val_loader, device, eval_range) print("Validation mAP", val_metrics['AP']) print("Net Fwd Pass Time on average {:.4f}s".format( val_metrics['Forward Pass Time'])) print("Nms Time on average {:.4f}s".format( val_metrics['Postprocess Time'])) fig_name = "PRCurve_val_" + config['name'] legend = "AP={:.1%} @IOU=0.5".format(val_metrics['AP']) plot_pr_curve(val_precisions, val_recalls, legend, name=fig_name)
def test(): test_data = get_test_data() x = test_data[0] y = test_data[1] # Recreate the model. model = DeepSEA() model.compile(optimizer=tf.keras.optimizers.SGD(momentum=0.9), loss=tf.keras.losses.BinaryCrossentropy()) model.build(input_shape=(None, 1000, 4)) model.summary() # Load the weights of the old model. (The weights content the weights of model and status of optimizer.) # Because the tensorflow delay the creation of variables in model and optimizer, so the optimizer status will # be restored when the model is trained first. like: model.train_on_batch(x[0:1], y[0:1]) model.load_weights('./result/model/ckpt') # model.load_weights('./result/model/bestmodel.h5') result = model.predict(x) # shape = (455024, 919) np.savez('./result/test_result.npz', result=result, label=y) result = np.mean((result[0:227512], result[227512:]), axis=0) result_shape = np.shape(result) y = y[0:227512] fpr_list, tpr_list, auroc_list = [], [], [] precision_list, recall_list, aupr_list = [], [], [] for i in tqdm(range(result_shape[1]), ascii=True): fpr_temp, tpr_temp, auroc_temp = calculate_auroc(result[:, i], y[:, i]) precision_temp, recall_temp, aupr_temp = calculate_aupr( result[:, i], y[:, i]) fpr_list.append(fpr_temp) tpr_list.append(tpr_temp) precision_list.append(precision_temp) recall_list.append(recall_temp) auroc_list.append(auroc_temp) aupr_list.append(aupr_temp) plot_roc_curve(fpr_list, tpr_list, './result/') plot_pr_curve(precision_list, recall_list, './result/') header = np.array([['auroc', 'aupr']]) content = np.stack((auroc_list, aupr_list), axis=1) content = np.concatenate((header, content), axis=0) write2csv(content, './result/result.csv') write2txt(content, './result/result.txt') avg_auroc = np.nanmean(auroc_list) avg_aupr = np.nanmean(aupr_list) print('AVG-AUROC:{:.3f}, AVG-AUPR:{:.3f}.\n'.format(avg_auroc, avg_aupr))
def test(): dataset_test = get_test_data(64) model = DanQ() loss_object = keras.losses.BinaryCrossentropy() optimizer = keras.optimizers.Adam() trainer = Trainer(model=model, loss_object=loss_object, optimizer=optimizer, experiment_dir='./result/DanQ') result, label = trainer.test(dataset_test, test_steps=int(np.ceil(455024 / 64)), dis_show_bar=True) result = np.mean((result[0:227512], result[227512:]), axis=0) result_shape = np.shape(result) label = label[0:227512] fpr_list, tpr_list, auroc_list = [], [], [] precision_list, recall_list, aupr_list = [], [], [] for i in tqdm(range(result_shape[1]), ascii=True): fpr_temp, tpr_temp, auroc_temp = calculate_auroc( result[:, i], label[:, i]) precision_temp, recall_temp, aupr_temp = calculate_aupr( result[:, i], label[:, i]) fpr_list.append(fpr_temp) tpr_list.append(tpr_temp) precision_list.append(precision_temp) recall_list.append(recall_temp) auroc_list.append(auroc_temp) aupr_list.append(aupr_temp) plot_roc_curve(fpr_list, tpr_list, './result/DanQ/') plot_pr_curve(precision_list, recall_list, './result/DanQ/') header = np.array([['auroc', 'aupr']]) content = np.stack((auroc_list, aupr_list), axis=1) content = np.concatenate((header, content), axis=0) write2csv(content, './result/DanQ/result.csv') write2txt(content, './result/DanQ/result.txt') avg_auroc = np.nanmean(auroc_list) avg_aupr = np.nanmean(aupr_list) print('AVG-AUROC:{:.3f}, AVG-AUPR:{:.3f}.\n'.format(avg_auroc, avg_aupr))
def run_main(args): # Define parameters epochs = args.epochs dim_au_out = args.bottleneck #8, 16, 32, 64, 128, 256,512 dim_dnn_in = dim_au_out dim_dnn_out = 1 select_drug = args.drug na = args.missing_value data_path = args.data_path label_path = args.label_path test_size = args.test_size valid_size = args.valid_size g_disperson = args.var_genes_disp model_path = args.source_model_path encoder_path = args.encoder_path log_path = args.logging_file batch_size = args.batch_size encoder_hdims = args.encoder_h_dims.split(",") preditor_hdims = args.predictor_h_dims.split(",") reduce_model = args.dimreduce prediction = args.predition sampling = args.sampling PCA_dim = args.PCA_dim encoder_hdims = list(map(int, encoder_hdims)) preditor_hdims = list(map(int, preditor_hdims)) load_model = bool(args.load_source_model) preditor_path = model_path + reduce_model + args.predictor + prediction + select_drug + '.pkl' # Read data data_r = pd.read_csv(data_path, index_col=0) label_r = pd.read_csv(label_path, index_col=0) label_r = label_r.fillna(na) now = time.strftime("%Y-%m-%d-%H-%M-%S") ut.save_arguments(args, now) # Initialize logging and std out out_path = log_path + now + ".err" log_path = log_path + now + ".log" out = open(out_path, "w") sys.stderr = out logging.basicConfig( level=logging.INFO, #控制台打印的日志级别 filename=log_path, filemode='a', ##模式,有w和a,w就是写模式,每次都会重新写日志,覆盖之前的日志 #a是追加模式,默认如果不写的话,就是追加模式 format= '%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s' #日志格式 ) logging.getLogger('matplotlib.font_manager').disabled = True logging.info(args) # data = data_r # Filter out na values selected_idx = label_r.loc[:, select_drug] != na if (g_disperson != None): hvg, adata = ut.highly_variable_genes(data_r, min_disp=g_disperson) # Rename columns if duplication exist data_r.columns = adata.var_names # Extract hvgs data = data_r.loc[selected_idx, hvg] else: data = data_r.loc[selected_idx, :] # Do PCA if PCA_dim!=0 if PCA_dim != 0: data = PCA(n_components=PCA_dim).fit_transform(data) else: data = data # Extract labels label = label_r.loc[selected_idx, select_drug] # Scaling data mmscaler = preprocessing.MinMaxScaler() lbscaler = preprocessing.MinMaxScaler() data = mmscaler.fit_transform(data) label = label.values.reshape(-1, 1) if prediction == "regression": label = lbscaler.fit_transform(label) dim_model_out = 1 else: le = LabelEncoder() label = le.fit_transform(label) dim_model_out = 2 #label = label.values.reshape(-1,1) logging.info(np.std(data)) logging.info(np.mean(data)) # Split traning valid test set X_train_all, X_test, Y_train_all, Y_test = train_test_split( data, label, test_size=test_size, random_state=42) X_train, X_valid, Y_train, Y_valid = train_test_split(X_train_all, Y_train_all, test_size=valid_size, random_state=42) # sampling method if sampling == None: X_train, Y_train = sam.nosampling(X_train, Y_train) logging.info("nosampling") elif sampling == "upsampling": X_train, Y_train = sam.upsampling(X_train, Y_train) logging.info("upsampling") elif sampling == "downsampling": X_train, Y_train = sam.downsampling(X_train, Y_train) logging.info("downsampling") elif sampling == "SMOTE": X_train, Y_train = sam.SMOTEsampling(X_train, Y_train) logging.info("SMOTE") else: logging.info("not a legal sampling method") logging.info(data.shape) logging.info(label.shape) #logging.info(X_train.shape, Y_train.shape) #logging.info(X_test.shape, Y_test.shape) logging.info(X_train.max()) logging.info(X_train.min()) # Select the Training device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Assuming that we are on a CUDA machine, this should print a CUDA device: logging.info(device) torch.cuda.set_device(device) # Construct datasets and data loaders X_trainTensor = torch.FloatTensor(X_train).to(device) X_validTensor = torch.FloatTensor(X_valid).to(device) X_testTensor = torch.FloatTensor(X_test).to(device) X_allTensor = torch.FloatTensor(data).to(device) if prediction == "regression": Y_trainTensor = torch.FloatTensor(Y_train).to(device) Y_trainallTensor = torch.FloatTensor(Y_train_all).to(device) Y_validTensor = torch.FloatTensor(Y_valid).to(device) else: Y_trainTensor = torch.LongTensor(Y_train).to(device) Y_trainallTensor = torch.LongTensor(Y_train_all).to(device) Y_validTensor = torch.LongTensor(Y_valid).to(device) train_dataset = TensorDataset(X_trainTensor, X_trainTensor) valid_dataset = TensorDataset(X_validTensor, X_validTensor) test_dataset = TensorDataset(X_testTensor, X_testTensor) all_dataset = TensorDataset(X_allTensor, X_allTensor) X_trainDataLoader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) X_validDataLoader = DataLoader(dataset=valid_dataset, batch_size=batch_size, shuffle=True) X_allDataLoader = DataLoader(dataset=all_dataset, batch_size=batch_size, shuffle=True) # construct TensorDataset trainreducedDataset = TensorDataset(X_trainTensor, Y_trainTensor) validreducedDataset = TensorDataset(X_validTensor, Y_validTensor) trainDataLoader_p = DataLoader(dataset=trainreducedDataset, batch_size=batch_size, shuffle=True) validDataLoader_p = DataLoader(dataset=validreducedDataset, batch_size=batch_size, shuffle=True) dataloaders_train = {'train': trainDataLoader_p, 'val': validDataLoader_p} if (bool(args.pretrain) != False): dataloaders_pretrain = { 'train': X_trainDataLoader, 'val': X_validDataLoader } if reduce_model == "VAE": encoder = VAEBase(input_dim=data.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims) else: encoder = AEBase(input_dim=data.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims) if torch.cuda.is_available(): encoder.cuda() logging.info(encoder) encoder.to(device) optimizer_e = optim.Adam(encoder.parameters(), lr=1e-2) loss_function_e = nn.MSELoss() exp_lr_scheduler_e = lr_scheduler.ReduceLROnPlateau(optimizer_e) if reduce_model == "AE": encoder, loss_report_en = t.train_AE_model( net=encoder, data_loaders=dataloaders_pretrain, optimizer=optimizer_e, loss_function=loss_function_e, n_epochs=epochs, scheduler=exp_lr_scheduler_e, save_path=encoder_path) elif reduce_model == "VAE": encoder, loss_report_en = t.train_VAE_model( net=encoder, data_loaders=dataloaders_pretrain, optimizer=optimizer_e, n_epochs=epochs, scheduler=exp_lr_scheduler_e, save_path=encoder_path) logging.info("Pretrained finished") # Train model of predictor if args.predictor == "DNN": if reduce_model == "AE": model = PretrainedPredictor(input_dim=X_train.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims, hidden_dims_predictor=preditor_hdims, output_dim=dim_model_out, pretrained_weights=encoder_path, freezed=bool(args.freeze_pretrain)) elif reduce_model == "VAE": model = PretrainedVAEPredictor( input_dim=X_train.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims, hidden_dims_predictor=preditor_hdims, output_dim=dim_model_out, pretrained_weights=encoder_path, freezed=bool(args.freeze_pretrain), z_reparam=bool(args.VAErepram)) elif args.predictor == "GCN": if reduce_model == "VAE": gcn_encoder = VAEBase(input_dim=data.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims) else: gcn_encoder = AEBase(input_dim=data.shape[1], latent_dim=dim_au_out, h_dims=encoder_hdims) gcn_encoder.load_state_dict(torch.load(args.GCNreduce_path)) gcn_encoder.to(device) train_embeddings = gcn_encoder.encode(X_trainTensor) zOut_tr = train_embeddings.cpu().detach().numpy() valid_embeddings = gcn_encoder.encode(X_validTensor) zOut_va = valid_embeddings.cpu().detach().numpy() test_embeddings = gcn_encoder.encode(X_testTensor) zOut_te = test_embeddings.cpu().detach().numpy() adj_tr, edgeList_tr = g.generateAdj( zOut_tr, graphType='KNNgraphStatsSingleThread', para='euclidean' + ':' + str('10'), adjTag=True) adj_va, edgeList_va = g.generateAdj( zOut_va, graphType='KNNgraphStatsSingleThread', para='euclidean' + ':' + str('10'), adjTag=True) adj_te, edgeList_te = g.generateAdj( zOut_te, graphType='KNNgraphStatsSingleThread', para='euclidean' + ':' + str('10'), adjTag=True) Adj_trainTensor = preprocess_graph(adj_tr) Adj_validTensor = preprocess_graph(adj_va) Adj_testTensor = preprocess_graph(adj_te) Z_trainTensor = torch.FloatTensor(zOut_tr).to(device) Z_validTensor = torch.FloatTensor(zOut_va).to(device) Z_testTensor = torch.FloatTensor(zOut_te).to(device) if (args.binarizied == 0): zDiscret_tr = zOut_tr > np.mean(zOut_tr, axis=0) zDiscret_tr = 1.0 * zDiscret_tr zDiscret_va = zOut_va > np.mean(zOut_va, axis=0) zDiscret_va = 1.0 * zDiscret_va zDiscret_te = zOut_te > np.mean(zOut_te, axis=0) zDiscret_te = 1.0 * zDiscret_te Z_trainTensor = torch.FloatTensor(zDiscret_tr).to(device) Z_validTensor = torch.FloatTensor(zDiscret_va).to(device) Z_testTensor = torch.FloatTensor(zDiscret_te).to(device) ZTensors_train = {'train': Z_trainTensor, 'val': Z_validTensor} XTensors_train = {'train': X_trainTensor, 'val': X_validTensor} YTensors_train = {'train': Y_trainTensor, 'val': Y_validTensor} AdjTensors_train = {'train': Adj_trainTensor, 'val': Adj_validTensor} if (args.GCNfeature == "x"): dim_GCNin = X_allTensor.shape[1] GCN_trainTensors = XTensors_train GCN_testTensor = X_testTensor else: dim_GCNin = Z_testTensor.shape[1] GCN_trainTensors = ZTensors_train GCN_testTensor = Z_testTensor model = GCNPredictor(input_feat_dim=dim_GCNin, hidden_dim1=encoder_hdims[0], hidden_dim2=dim_au_out, dropout=0.5, hidden_dims_predictor=preditor_hdims, output_dim=dim_model_out, pretrained_weights=encoder_path, freezed=bool(args.freeze_pretrain)) # model2 = GAEBase(input_dim=X_train_all.shape[1], latent_dim=128,h_dims=[512]) # model2.to(device) # test = model2((X_trainTensor,Adj_trainTensor)) logging.info(model) if torch.cuda.is_available(): model.cuda() model.to(device) # Define optimizer optimizer = optim.Adam(model.parameters(), lr=1e-2) if prediction == "regression": loss_function = nn.MSELoss() else: loss_function = nn.CrossEntropyLoss() exp_lr_scheduler = lr_scheduler.ReduceLROnPlateau(optimizer) if args.predictor == "GCN": model, report = t.train_GCNpreditor_model(model=model, z=GCN_trainTensors, y=YTensors_train, adj=AdjTensors_train, optimizer=optimizer, loss_function=loss_function, n_epochs=epochs, scheduler=exp_lr_scheduler, save_path=preditor_path) else: model, report = t.train_predictor_model(model, dataloaders_train, optimizer, loss_function, epochs, exp_lr_scheduler, load=load_model, save_path=preditor_path) if args.predictor != 'GCN': dl_result = model(X_testTensor).detach().cpu().numpy() else: dl_result = model(GCN_testTensor, Adj_testTensor).detach().cpu().numpy() #torch.save(model.feature_extractor.state_dict(), preditor_path+"encoder.pkl") logging.info('Performances: R/Pearson/Mse/') if prediction == "regression": logging.info(r2_score(dl_result, Y_test)) logging.info(pearsonr(dl_result.flatten(), Y_test.flatten())) logging.info(mean_squared_error(dl_result, Y_test)) else: lb_results = np.argmax(dl_result, axis=1) #pb_results = np.max(dl_result,axis=1) pb_results = dl_result[:, 1] report_dict = classification_report(Y_test, lb_results, output_dict=True) report_df = pd.DataFrame(report_dict).T ap_score = average_precision_score(Y_test, pb_results) auroc_score = roc_auc_score(Y_test, pb_results) report_df['auroc_score'] = auroc_score report_df['ap_score'] = ap_score report_df.to_csv("saved/logs/" + reduce_model + args.predictor + prediction + select_drug + now + '_report.csv') logging.info(classification_report(Y_test, lb_results)) logging.info(average_precision_score(Y_test, pb_results)) logging.info(roc_auc_score(Y_test, pb_results)) model = DummyClassifier(strategy='stratified') model.fit(X_train, Y_train) yhat = model.predict_proba(X_test) naive_probs = yhat[:, 1] ut.plot_roc_curve(Y_test, naive_probs, pb_results, title=str(roc_auc_score(Y_test, pb_results)), path="saved/figures/" + reduce_model + args.predictor + prediction + select_drug + now + '_roc.pdf') ut.plot_pr_curve(Y_test, pb_results, title=average_precision_score(Y_test, pb_results), path="saved/figures/" + reduce_model + args.predictor + prediction + select_drug + now + '_prc.pdf')
print 'loading data...' cc_dataset = pk.load(open('datasets/cc_web_video.pickle', 'rb')) cc_features = np.load(args['evaluation_set']) model = DNN(cc_features.shape[1], None, args['model_path'], load_model=True, trainable=False) cc_embeddings = model.embeddings(cc_features) print 'Evaluation set file: ', args['evaluation_set'] print 'Path to DML model: ', args['model_path'] print 'Positive labels: ', args['positive_labels'] print '\nEvaluation Results' print '==================' similarities = calculate_similarities(cc_dataset['queries'], cc_embeddings) mAP, pr_curve = evaluate(cc_dataset['ground_truth'], similarities, positive_labels=args['positive_labels'], all_videos=False) print 'CC_WEB_VIDEO mAP: ', mAP plot_pr_curve(pr_curve, 'CC_WEB_VIDEO') mAP, pr_curve = evaluate(cc_dataset['ground_truth'], similarities, positive_labels=args['positive_labels'], all_videos=True) print 'CC_WEB_VIDEO* mAP: ', mAP plot_pr_curve(pr_curve, 'CC_WEB_VIDEO*')
print('\nEvaluation Results') print('==================') similarities = calculate_similarities(cc_dataset['queries'], cc_embeddings) baseline_similarities = calculate_similarities(cc_dataset['queries'], cc_features) mAP_dml, pr_curve_dml = evaluate(cc_dataset['ground_truth'], similarities, positive_labels=args['positive_labels'], all_videos=False) mAP_base, pr_curve_base = evaluate(cc_dataset['ground_truth'], baseline_similarities, positive_labels=args['positive_labels'], all_videos=False) print('CC_WEB_VIDEO') print('baseline mAP: ', mAP_base) print('DML mAP: ', mAP_dml) plot_pr_curve(pr_curve_dml, pr_curve_base, 'CC_WEB_VIDEO') mAP_dml, pr_curve_dml = evaluate(cc_dataset['ground_truth'], similarities, positive_labels=args['positive_labels'], all_videos=True) mAP_base, pr_curve_base = evaluate(cc_dataset['ground_truth'], baseline_similarities, positive_labels=args['positive_labels'], all_videos=True) print('\nCC_WEB_VIDEO*') print('baseline mAP: ', mAP_base) print('DML mAP: ', mAP_dml) plot_pr_curve(pr_curve_dml, pr_curve_base, 'CC_WEB_VIDEO*')
if 'preds_labels' in _dir] aucs, aps = [], [] preds, trues = [], [] for fname in flist: with open(fname, 'rb') as f: data = pickle.load(f) # (pred, label) tuple aucs.append(get_auroc(data[1], data[0])) aps.append(get_ap(data[1], data[0])) preds.append(data[0]) trues.append(data[1]) plot_args = {'lw': 1, 'alpha': 0.5, 'color': 'gray', 'ls': '-'} plot_roc_curve(0, data[1], data[0], **plot_args) plot_pr_curve(1, data[1], data[0], **plot_args) plot_args = {'lw': 1, 'alpha': 0.9, 'color': 'black', 'ls': '-'} preds = np.concatenate(preds, axis=0) trues = np.concatenate(trues, axis=0) auc_cint = np.std(aucs) / np.sqrt(len(aucs)) * 1.96 ap_cint = np.std(aps) / np.sqrt(len(aps)) * 1.96 aucstr = '{} AUC: {:.4f} ({} {:.4f})'.format(model, np.mean(aucs), u"\u00B1", auc_cint) apstr = '{} AP: {:.4f} ({} {:.4f})'.format(model, np.mean(aps), u"\u00B1", ap_cint) plot_roc_curve(0, trues, preds, legend=aucstr, **plot_args) plt.savefig(os.path.join(root_dir, 'auc')) plot_pr_curve(1, trues, preds, legend=apstr, **plot_args)