def main(argv=None): opt = parse_args(argv) tasks = TCGAMeta(download=True, preload=True) task = tasks[113] # Setup the results dictionary filename = "experiments/results/clinical-tasks.pkl" try: results = pickle.load(open(filename, "rb"), encoding='latin1') print("Loaded Checkpointed Results") except Exception as e: print(e) results = pd.DataFrame(columns=[ 'task', 'acc_metric', 'model', 'graph', 'trial', 'train_size', 'time_elapsed' ]) print("Created a New Results Dictionary") train_size = 50 trials = 3 cuda = True exp = [] for trial in range(trials): model = GCN(cuda=cuda, dropout=opt.dropout, num_layer=opt.num_layer, channels=opt.channels, embedding=opt.embedding, aggregation=opt.aggregation, lr=opt.lr, agg_reduce=opt.agg_reduce, seed=trial) task._samples = task._samples - task._samples.mean(axis=0) task._samples = task._samples / task._samples.var() X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( task._samples, task._labels, stratify=task._labels, train_size=train_size, test_size=len(task._labels) - train_size) adj = sparse.csr_matrix(nx.to_numpy_matrix(GeneManiaGraph().nx_graph)) model.fit(X_train, y_train, adj=adj) y_hat = [] for chunk in get_every_n(X_test, 10): y_hat.extend(np.argmax(model.predict(chunk), axis=1).numpy()) exp.append(model.metric(y_test, y_hat)) print(exp) report_results([{ "name": "acc_metric", "type": "objective", "value": np.array(exp).mean() }])
def main(argv=None): opt = parse_args(argv) dataset = datasets.TCGADataset() dataset.df = dataset.df - dataset.df.mean(axis=0) gene_graph = GeneManiaGraph() search_num_genes = [50, 100, 200, 300, 500, 1000, 2000, 4000, 8000, 16300] test_size = 300 cuda = torch.cuda.is_available() exp = [] for num_genes in search_num_genes: start_time = time.time() gene = "RPL4" model = GCN(cuda=cuda, dropout=opt.dropout, num_layer=opt.num_layer, channels=opt.channels, embedding=opt.embedding, aggregation=opt.aggregation, lr=opt.lr, agg_reduce=opt.agg_reduce) dataset.labels = dataset.df[gene].where( dataset.df[gene] > 0).notnull().astype("int") dataset.labels = dataset.labels.values if type( dataset.labels) == pd.Series else dataset.labels X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( dataset.df, dataset.labels, stratify=dataset.labels, train_size=opt.train_size, test_size=opt.test_size, random_state=opt.seed) if num_genes == 16300: neighbors = gene_graph.nx_graph else: neighbors = gene_graph.bfs_sample_neighbors(gene, num_genes) X_train = X_train[list(neighbors.nodes)].copy() X_test = X_test[list(neighbors.nodes)].copy() X_train[gene] = 1 X_test[gene] = 1 adj = sparse.csr_matrix(nx.to_numpy_matrix(neighbors)) model.fit(X_train, y_train, adj=adj) y_hat = model.predict(X_test) y_hat = np.argmax(y_hat, axis=1) auc = sklearn.metrics.roc_auc_score(y_test, np.asarray(y_hat).flatten()) del model exp.append(auc) report_results([{ "name": "auc", "type": "objective", "value": np.array(exp).mean() }])
lr=0.001, num_epochs=100, patience=30, verbose=True, seed=seed, train_valid_split=0.8 ) elif model_name == 'MLP64': model = MLP(name="MLP_lay2_chan64", cuda=cuda, dropout=True, num_layer=2, channels=64, train_valid_split=0.8, patience=30, lr=0.001) elif model_name == 'MLP64_lr4': model = MLP(name="MLP_lay2_chan64_lr.0.0001_nodropout", cuda=cuda, dropout=False, num_layer=2, channels=64, train_valid_split=0.8, patience=30, lr=0.0001) try: # print(x_train.shape, y_train.shape, adj.shape) model.fit(x_train, y_train, adj=adj) with torch.no_grad(): model.eval() y_hat = model.predict(x_test) y_hat = np.argmax(y_hat, axis=1) # auc = sklearn.metrics.roc_auc_score(y_test, np.asarray(y_hat).flatten(), multi_class='ovo') acc = sklearn.metrics.accuracy_score(y_test, np.asarray(y_hat).flatten()) f1 = sklearn.metrics.f1_score(y_test, np.asarray(y_hat).flatten(), average='macro') experiment["model"] = model.name experiment["auc"] = 0 experiment["acc"] = acc experiment["f1"] = f1 experiment["num_genes"] = len(x_train.columns)
neighbors = list(gene_graph.first_degree(gene)[0]) neighbors = [n for n in neighbors if n in X_train.columns.values] X_train = X_train.loc[:, neighbors].copy() X_test = X_test.loc[:, neighbors].copy() else: X_train = X_train.copy() X_test = X_test.copy() try: # Don't include expression of enquired gene? # X_train[gene] = 1 # X_test[gene] = 1 with warnings.catch_warnings(): warnings.simplefilter("ignore") model.fit(X_train, y_train, adj) model.eval() with torch.no_grad(): y_hat = model.predict(X_test) auc = sklearn.metrics.roc_auc_score(y_test, np.argmax(y_hat, axis=1)) acc = sklearn.metrics.accuracy_score(y_test, np.argmax(y_hat, axis=1)) print("auc:", auc, " acc: ", acc) experiment["auc"] = auc experiment["acc"] = acc results.append(experiment) if auc > best_auc: best_auc = copy.deepcopy(auc) best_auc_model = copy.deepcopy(model) if acc > best_acc:
patience=30, lr=0.001) elif model_name == 'MLP64_lr4': model = MLP(name="MLP_lay2_chan64_lr.0.0001_nodropout", cuda=cuda, dropout=False, num_layer=2, channels=64, train_valid_split=0.8, patience=30, lr=0.0001) try: # print(x_train.shape, y_train.shape, adj.shape) model.fit(x_train, y_train, adj=adj, ontology_vectors=emb_vectors) with torch.no_grad(): model.eval() y_hat = model.predict(x_test) y_hat = np.argmax(y_hat, axis=1) # auc = sklearn.metrics.roc_auc_score(y_test, np.asarray(y_hat).flatten(), multi_class='ovo') acc = sklearn.metrics.accuracy_score( y_test, np.asarray(y_hat).flatten()) f1 = sklearn.metrics.f1_score(y_test, np.asarray(y_hat).flatten(), average='macro') experiment["model"] = model.name experiment["auc"] = 0