def main(args): logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) fix_seed(args.seed) if args.csv_path is None: args.csv_path = "csv/%s.%s.%s.%s.csv" % ( args.dsid, args.level, args.preselection, args.systematic) logging.info(f'training file: {args.csv_path}') data = pd.read_csv(args.csv_path, delimiter=',', names=PTCL_HEADER) if (args.task == 'tail'): tail_cut = 1500 data = data[data['jj_M'] > tail_cut] data = data[PTCL_FEATURES] items = data.values logging.info(f'input features: {list(data.columns)}') logging.info(f'total number of input features: {len(data.columns)}') train_set, test_set = train_test_split(items, random_state=args.seed, shuffle=args.random_split, train_size=args.train_split) source_path = Path(args.csv_path) parent_path = source_path.parent file_name = source_path.stem train_file = parent_path / f'train_{file_name}' test_file = parent_path / f'test_{file_name}' logging.info( f'saving train/test files to {str(train_file)} and {str(test_file)}...' ) np.save(train_file, train_set) np.save(test_file, test_set)
def main(): # Set variables. img_dim = [64, 64] latent_dim = 32 hidden_dim = 1024 num_epochs = 100 save_freq = 25 batch_size = 64 shuffle = True num_loader_workers = 2 std_dev = 1. mu = 0. cuda = True learning_rate = 0.001 save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. aae_net = aae.AAE(latent_dim, hidden_dim) if cuda: aae_net.cuda() # Set loss fn. loss_fn = aae.loss_fn # Load optimizer. optimizer = optim.Adam(aae_net.parameters(), lr=learning_rate) # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Epoch loop ones = torch.Tensor(np.ones(batch_size)) if cuda: ones = ones.cuda() zeroes = torch.Tensor(np.zeros(batch_size)) if cuda: zeroes = zeroes.cuda() for epoch in range(num_epochs): print('Epoch {} of {}'.format(epoch + 1, num_epochs)) # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): print('Batch {}'.format(i_batch + 1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. # Train autoencoder and gan on real batch. x_reconst, real_critic = aae_net.forward(x) loss = loss_fn(x, x_reconst, real_critic, ones) loss.backward() optimizer.step() # Train gan on fake batch. fake_z = torch.Tensor(std_dev * np.random.randn(batch_size, latent_dim) + mu) if cuda: fake_z = fake_z.cuda() fake_critic = aae_net.gan_fake_forward(fake_z) loss = F.binary_cross_entropy(fake_critic, zeroes, reduction='sum') optimizer.zero_grad() loss.backward() optimizer.step() if epoch % save_freq == 0: util.save_weights( vae_net, os.path.join(save_dir, 'aae_{}.pth'.format(epoch))) end = time.time() print('loss: ', loss) print('Took {}'.format(end - start))
def main(): # Set variables. img_dim = [64, 64] codebook_size = 256 latent_dim = 32 hidden_dim = 1024 num_epochs = 100 save_freq = 25 batch_size = 64 shuffle = True num_loader_workers = 4 beta = 1.0 cuda = True learning_rate = 0.001 save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. model_net = vqvae.VQVAE(latent_dim, hidden_dim, codebook_size) if cuda: model_net.cuda() # Set loss fn. loss_fn = vqvae.loss_fn # Load optimizer. optimizer = optim.Adam(model_net.parameters(), lr=learning_rate) # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Epoch loop for epoch in range(num_epochs): print('Epoch {} of {}'.format(epoch, num_epochs)) start = time.time() # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): print('Batch {}'.format(i_batch+1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. x_reconst, embed_loss, _ = model_net.forward(x) loss = loss_fn(x, x_reconst, embed_loss) loss.backward() optimizer.step() if epoch % save_freq == 0: util.save_weights(vae_net, os.path.join(save_dir, 'vqvae_{}.pth'.format(epoch))) end = time.time() print('loss: ', train_loss / len(anime_data.img_folder)) print('Took {}'.format(end - start))
def main(): # Set variables. img_dim = [64, 64] latent_dim = 32 hidden_dim = 1024 num_epochs = 20 save_freq = 5 batch_size = 128 shuffle = True num_loader_workers = 3 beta = 1. cuda = True learning_rate = 0.001 adaptive = False # True save_dir = os.path.dirname(os.path.realpath(__file__)) # fix seed for experiment. util.fix_seed() # Load Encoder, Decoder. vae_net = vae.VAE(latent_dim, hidden_dim) if cuda: vae_net.cuda() # Set loss fn. loss_fn = vae.loss_fn # Load Dataset. anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers) # Load optimizer. if adaptive: optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate) else: optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate) scheduler = optim.lr_scheduler.OneCycleLR(optimizer, max_lr=1e-1, epochs=num_epochs, steps_per_epoch=10) # Epoch loop for epoch in range(1, num_epochs + 1): print('Epoch {} of {}'.format(epoch, num_epochs)) start = time.time() train_loss = 0 # Batch loop. for i_batch, batch_data in enumerate(anime_data.data_loader, 0): # print('Batch {}'.format(i_batch+1)) # Load batch. x, _ = batch_data if cuda: x = x.cuda() # Reset gradient. optimizer.zero_grad() # Run batch, calculate loss, and backprop. x_reconst, mu, logvar = vae_net.forward(x) loss = loss_fn(x, x_reconst, mu, logvar, beta) train_loss += loss.item() loss.backward() optimizer.step() if not adaptive: scheduler.step() if epoch % save_freq == 0: if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch))) end = time.time() print('loss: ', train_loss / len(anime_data.img_folder)) print('Took {}'.format(end - start)) if adaptive: o = 'adaptive' else: o = 'cyclic' util.save_weights( vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))
def main_train(args): now = datetime.now() save_to = Path(args.save_to) if args.save_to is not None else Path().cwd() save_dir = save_to / f'{now:%Y%m%d-%H%M-%S}' fix_seed(args.seed) logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', level=logging.INFO) device = torch.device( 'cuda:0') if torch.cuda.is_available() else torch.device('cpu') dataset_train, dataset_test, scaler = get_data(args) logging.info(f'training level: {args.level}') n_features = dataset_train.items.shape[1] generator, discriminator = get_models(args, n_features, device) if args.gan_type == 'vanilla': trainer = GANTrainer(generator, discriminator, device) elif args.gan_type == 'wgp': trainer = WGPGANTrainer(generator, discriminator, device, lambda_=args.lambda_) else: raise ValueError(f'Unknown gan type: {args.gan_type}') optimizer_d = setup_optimizer(discriminator, args.learning_rate, weight_decay=0, args=args) optimizer_g = setup_optimizer(generator, args.learning_rate, weight_decay=0, args=args) if args.load_from is not None: load_model(Path(args.load_from), generator, discriminator, optimizer_g, optimizer_d, device) experiment = Experiment(args.comet_api_key, project_name=args.comet_project_name, workspace=args.comet_workspace) experiment.log_parameters(vars(args)) iterations_total = trainer.train( args, dataset_train, optimizer_g, optimizer_d, scaler=scaler, save_dir=save_dir, test_dataset=dataset_test.items[:len(dataset_test) // 10], experiment=experiment) n_events = len(dataset_test) steps = (args.gan_test_ratio * n_events) // args.eval_batch_size evaluate_model(generator, experiment, dataset_test, args.eval_batch_size, steps, args, device, scaler, iterations_total) experiment.end() save_model(save_dir, generator, discriminator, optimizer_g, optimizer_d, iterations_total)
def plot(g, n_edges_to_change, fig_num=0, verbose=False, legend=False, n_eig=20, energy_=False, eigen=False, action='drop', coef=False, name='cora'): fix_seed() if action == 'drop': g_drop = helper.rm_pyG_edges(g, n=n_edges_to_change) # process_nx_graph(g_drop, add_weight=True) elif action == 'increase': g_drop = helper.increase_random_edge_w(g, n=n_edges_to_change, w=10000) else: NotImplementedError L1 = get_laplacian_mat(g.edge_index, g.edge_weight, n_node, normalization='sym') L2 = get_laplacian_mat(g_drop.edge_index, g_drop.edge_weight, n_node, normalization='sym') if eigen or coef: # no need to compute eigenvector for energy w1, v1 = eig(L1) w2, v2 = eig(L2) if eigen: # eig value if args.middle: diff = w2 - w1 ax[fig_num].scatter(range(len(diff)), diff, marker='o') middle = n_node // 2 else: if n_eig != len(w1): ax[fig_num].plot(w1[:n_eig], marker='o', label=f'$w_1$. First {n_eig}') ax[fig_num].plot(w2[:n_eig], marker='o', label=f'$w_2$. First {n_eig}') ax[fig_num].plot(w1[-n_eig:], marker='o', label=f'$w_1$. Last {n_eig}') ax[fig_num].plot(w2[-n_eig:], marker='o', label=f'$w_2$. Last {n_eig}') else: ax[fig_num].plot(w1[:n_eig], marker='o', label=f'$w_1$.') ax[fig_num].plot(w2[:n_eig], marker='o', label=f'$w_2$.') percent = int(100 * n_edges_to_change / (g.num_edges // 2)) if percent % 10 == 1: percent -= 1 if percent % 10 == 9: percent += 1 title = f'{action} {percent}\% edges.' ax[fig_num].set_title(title) ax[fig_num].set_ylim([0, 2]) # ax[fig_num].set_yscale('log', basey=2) if legend: ax[fig_num].legend(loc='center right') if coef: fix_seed() if args.gaussian: v = np.random.normal(0, 1, (g.num_nodes, 3)) else: v = np.random.random((g.num_nodes, 3)) v_sm = {'v1_sm0': v, 'v2_sm0': v} for i in range(3): tmp1 = np.dot(np.identity(len(w1)) - tonp(L1), v_sm[f'v1_sm{i}']) tmp2 = np.dot(np.identity(len(w2)) - tonp(L2), v_sm[f'v2_sm{i}']) v_sm[f'v1_sm{i + 1}'] = tmp1 v_sm[f'v2_sm{i + 1}'] = tmp2 v2_coef0 = np.dot(v2.T, v_sm['v2_sm0']) v2_coef1 = np.dot(v2.T, v_sm['v2_sm1']) v2_coef2 = np.dot(v2.T, v_sm['v2_sm2']) v1_coef0 = np.dot(v1.T, v_sm['v1_sm0']) v1_coef1 = np.dot(v1.T, v_sm['v1_sm1']) v1_coef2 = np.dot(v1.T, v_sm['v1_sm2']) ax[fig_num].scatter(range(n_eig - 1), v1_coef1[1:, 0], s=5, label=f"$c_1$") ax[fig_num].scatter(range(n_eig - 1), v2_coef1[1:, 0], s=5, label=f"$c_1'$") ax[fig_num].set_ylim([-1, 1]) percent = int(100 * n_edges_to_change / (g.num_edges // 2)) if percent % 10 == 1: percent -= 1 if percent % 10 == 9: percent += 1 title = f'{action} {percent}\% edges.' ax[fig_num].set_title(title) if legend: ax[fig_num].legend(loc='center right') if energy_: # signal fix_seed() if args.gaussian: v = np.random.normal(0, 1, (g.num_nodes, 20)) elif args.loweig: k = 30 if name == 'cora': k = 400 if name == 'citeseer': k = 400 print(k) v = low_eig(g).mix_low_eig(k=k, n_vec=20, mode='full') else: v = np.random.random((g.num_nodes, 20)) # v = normalize(v, axis=0) v_sm = {'v1_sm0': v, 'v2_sm0': v} for i in range(2): tmp1 = np.dot(np.identity(n_node) - tonp(L1), v_sm[f'v1_sm{i}']) tmp2 = np.dot(np.identity(n_node) - tonp(L2), v_sm[f'v2_sm{i}']) v_sm[f'v1_sm{i + 1}'] = tmp1 v_sm[f'v2_sm{i + 1}'] = tmp2 D1_sm0 = energy(v_sm['v1_sm0'], L1) D2_sm0 = energy(v_sm['v2_sm0'], L2) D1_sm = energy(v_sm['v1_sm1'], L1) D2_sm = energy(v_sm['v2_sm1'], L2) D1_sm2 = energy(v_sm['v1_sm2'], L1) D2_sm2 = energy(v_sm['v2_sm2'], L2) ax[fig_num].scatter(range(n_eig), D1_sm0[:n_eig], label=f'$E_0$') ax[fig_num].scatter(range(n_eig), D2_sm0[:n_eig], label=f"$E'_0$") ax[fig_num].scatter(range(n_eig), D1_sm[:n_eig], label=f'$E_1$') ax[fig_num].scatter(range(n_eig), D2_sm[:n_eig], label=f"$E'_1$") ax[fig_num].scatter(range(n_eig), D1_sm2[:n_eig], label=f'$E_2$') ax[fig_num].scatter(range(n_eig), D2_sm2[:n_eig], label=f"$E'_2$") percent = int(100 * n_edges_to_change / (g.num_edges // 2)) if percent % 10 == 1: percent -= 1 if percent % 10 == 9: percent += 1 title = f'{action} {percent}\% edges.' ax[fig_num].set_title(title) ax[fig_num].set_yscale('log', basey=10) # set y axis scale if args.gaussian: if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 3]) if name in ['cora']: ax[fig_num].set_ylim([10 ** (2.5), 10 ** 4]) if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (2.9), 10 ** 4]) elif args.loweig and name in ['cora', 'citeseer']: pass else: if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 1.5]) if name in ['cora']: ax[fig_num].set_ylim([10 ** (1.5), 10 ** 3]) if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (1.9), 10 ** 3]) if legend: ax[fig_num].legend(loc='center right')
if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 3]) if name in ['cora']: ax[fig_num].set_ylim([10 ** (2.5), 10 ** 4]) if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (2.9), 10 ** 4]) elif args.loweig and name in ['cora', 'citeseer']: pass else: if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 1.5]) if name in ['cora']: ax[fig_num].set_ylim([10 ** (1.5), 10 ** 3]) if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (1.9), 10 ** 3]) if legend: ax[fig_num].legend(loc='center right') if __name__ == '__main__': args = parser.parse_args() fix_seed() helper = random_pyG() n_node = 200 if args.graph == 'GEO': g_nx = nx.random_geometric_graph(n_node, 0.2, seed=42) g = helper.process_nx_graph(g_nx, add_weight=True) name = 'Random Geometric Graph' elif args.graph == 'ER': g_nx = nx.erdos_renyi_graph(n_node, 0.05, seed=42) g = helper.process_nx_graph(g_nx, add_weight=True) name = f'Erdos Renyi Graph G({n_node}, 0.05)' elif args.graph == 'WS': g_nx = nx.watts_strogatz_graph(n_node, 20, p=0.05, seed=42) g = helper.process_nx_graph(g_nx, add_weight=True) name = 'Watts-Strogatz Graph'