parser.add_argument("--epochs", type=int, default=10, help="Number of epochs during training") parser.add_argument("--classifier", type=str, default='SVM', choices={'NN', 'SVM'}, help="Downstream Classifier") parser.add_argument("--seed", type=int, default=0, help="Random Seed") args = parser.parse_args() np.random.seed(args.seed) torch.manual_seed(args.seed) data_path = f'Datasets/{args.data_name}' db = Database.load_csv(data_path) model_dir = f'models/{args.data_name}/{args.kernel}_{args.depth}_{args.dim}_{args.num_samples}_{args.epochs}_{args.batch_size}_{args.seed}' os.makedirs(model_dir, exist_ok=True) sample_fct = ek_utlis.ek_sample_fct if args.kernel == 'EK' else mmd_utils.mmd_sample_fct Y, rows = db.get_labels() scores = [] split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10) for i, (train_index, test_index) in enumerate(split.split(rows, Y)): samples = get_samples(db, args.depth, args.num_samples, sample_fct) row_idx = {r: i for i, r in enumerate(rows)} scheme_idx = {s: i for i, s in enumerate(samples.keys())}
def compute_embedding(db): G = db.get_row_val_graph_reg() embedding, model = get_node2vec_embedding_new(G, epochs=5) return embedding, model if __name__ == "__main__": name = 'mutagenesis' embedding_name = 'testn3.pckl' path = f'Datasets/{name}' embedding_path = f'Embeddings/{name}/{embedding_name}' db = Database.load_csv(path) Y, rows = db.get_labels() scores = [] split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10) for i, (train_index, test_index) in enumerate(split.split(rows, Y)): embedding, _ = io_utils.load_or_compute(f'{embedding_path}_{i}', lambda: compute_embedding(db)) X_train = np.float32([embedding[rows[j]] for j in train_index]) X_test = np.float32([embedding[rows[j]] for j in test_index]) Y_train, Y_test = [Y[i] for i in train_index], [Y[i] for i in test_index] clf = SVC(kernel='rbf', C=1.0)