예제 #1
0
def main(args):
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO)

    fix_seed(args.seed)
    if args.csv_path is None:
        args.csv_path = "csv/%s.%s.%s.%s.csv" % (
            args.dsid, args.level, args.preselection, args.systematic)
    logging.info(f'training file: {args.csv_path}')

    data = pd.read_csv(args.csv_path, delimiter=',', names=PTCL_HEADER)
    if (args.task == 'tail'):
        tail_cut = 1500
        data = data[data['jj_M'] > tail_cut]
    data = data[PTCL_FEATURES]
    items = data.values

    logging.info(f'input features: {list(data.columns)}')
    logging.info(f'total number of input features: {len(data.columns)}')

    train_set, test_set = train_test_split(items,
                                           random_state=args.seed,
                                           shuffle=args.random_split,
                                           train_size=args.train_split)

    source_path = Path(args.csv_path)
    parent_path = source_path.parent
    file_name = source_path.stem

    train_file = parent_path / f'train_{file_name}'
    test_file = parent_path / f'test_{file_name}'

    logging.info(
        f'saving train/test files to {str(train_file)} and {str(test_file)}...'
    )

    np.save(train_file, train_set)
    np.save(test_file, test_set)
예제 #2
0
def main():
    # Set variables.
    img_dim = [64, 64]
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 100
    save_freq = 25
    batch_size = 64
    shuffle = True
    num_loader_workers = 2
    std_dev = 1.
    mu = 0.
    cuda = True
    learning_rate = 0.001
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    aae_net = aae.AAE(latent_dim, hidden_dim)
    if cuda:
        aae_net.cuda()

    # Set loss fn.
    loss_fn = aae.loss_fn

    # Load optimizer.
    optimizer = optim.Adam(aae_net.parameters(), lr=learning_rate)

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle,
                                         num_loader_workers)

    # Epoch loop
    ones = torch.Tensor(np.ones(batch_size))
    if cuda:
        ones = ones.cuda()
    zeroes = torch.Tensor(np.zeros(batch_size))
    if cuda:
        zeroes = zeroes.cuda()

    for epoch in range(num_epochs):
        print('Epoch {} of {}'.format(epoch + 1, num_epochs))

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            print('Batch {}'.format(i_batch + 1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            # Train autoencoder and gan on real batch.
            x_reconst, real_critic = aae_net.forward(x)
            loss = loss_fn(x, x_reconst, real_critic, ones)
            loss.backward()
            optimizer.step()

            # Train gan on fake batch.
            fake_z = torch.Tensor(std_dev *
                                  np.random.randn(batch_size, latent_dim) + mu)
            if cuda:
                fake_z = fake_z.cuda()
            fake_critic = aae_net.gan_fake_forward(fake_z)
            loss = F.binary_cross_entropy(fake_critic, zeroes, reduction='sum')
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if epoch % save_freq == 0:
            util.save_weights(
                vae_net, os.path.join(save_dir, 'aae_{}.pth'.format(epoch)))

        end = time.time()
        print('loss: ', loss)
        print('Took {}'.format(end - start))
예제 #3
0
def main():
    # Set variables.
    img_dim = [64, 64]
    codebook_size = 256
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 100
    save_freq = 25
    batch_size = 64
    shuffle = True
    num_loader_workers = 4
    beta = 1.0
    cuda = True
    learning_rate = 0.001
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    model_net = vqvae.VQVAE(latent_dim, hidden_dim, codebook_size)
    if cuda:
        model_net.cuda()

    # Set loss fn.
    loss_fn = vqvae.loss_fn

    # Load optimizer.
    optimizer = optim.Adam(model_net.parameters(), lr=learning_rate)

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle, num_loader_workers)

    # Epoch loop
    for epoch in range(num_epochs):
        print('Epoch {} of {}'.format(epoch, num_epochs))
        start = time.time()

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            print('Batch {}'.format(i_batch+1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            x_reconst, embed_loss, _ = model_net.forward(x)
            loss = loss_fn(x, x_reconst, embed_loss)
            loss.backward()
            optimizer.step()

        if epoch % save_freq == 0:
            util.save_weights(vae_net, os.path.join(save_dir, 'vqvae_{}.pth'.format(epoch)))

        end = time.time()
        print('loss: ', train_loss / len(anime_data.img_folder))
        print('Took {}'.format(end - start))
예제 #4
0
def main():
    # Set variables.
    img_dim = [64, 64]
    latent_dim = 32
    hidden_dim = 1024
    num_epochs = 20
    save_freq = 5
    batch_size = 128
    shuffle = True
    num_loader_workers = 3
    beta = 1.
    cuda = True
    learning_rate = 0.001
    adaptive = False  # True
    save_dir = os.path.dirname(os.path.realpath(__file__))

    # fix seed for experiment.
    util.fix_seed()

    # Load Encoder, Decoder.
    vae_net = vae.VAE(latent_dim, hidden_dim)
    if cuda:
        vae_net.cuda()

    # Set loss fn.
    loss_fn = vae.loss_fn

    # Load Dataset.
    anime_data = data_util.AnimeFaceData(img_dim, batch_size, shuffle,
                                         num_loader_workers)

    # Load optimizer.
    if adaptive:
        optimizer = optim.Adam(vae_net.parameters(), lr=learning_rate)
    else:
        optimizer = optim.SGD(vae_net.parameters(), lr=learning_rate)
        scheduler = optim.lr_scheduler.OneCycleLR(optimizer,
                                                  max_lr=1e-1,
                                                  epochs=num_epochs,
                                                  steps_per_epoch=10)

    # Epoch loop
    for epoch in range(1, num_epochs + 1):
        print('Epoch {} of {}'.format(epoch, num_epochs))
        start = time.time()
        train_loss = 0

        # Batch loop.
        for i_batch, batch_data in enumerate(anime_data.data_loader, 0):
            # print('Batch {}'.format(i_batch+1))

            # Load batch.
            x, _ = batch_data
            if cuda:
                x = x.cuda()

            # Reset gradient.
            optimizer.zero_grad()

            # Run batch, calculate loss, and backprop.
            x_reconst, mu, logvar = vae_net.forward(x)
            loss = loss_fn(x, x_reconst, mu, logvar, beta)
            train_loss += loss.item()
            loss.backward()
            optimizer.step()
            if not adaptive:
                scheduler.step()

        if epoch % save_freq == 0:
            if adaptive:
                o = 'adaptive'
            else:
                o = 'cyclic'
            util.save_weights(
                vae_net,
                os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))

        end = time.time()
        print('loss: ', train_loss / len(anime_data.img_folder))
        print('Took {}'.format(end - start))

        if adaptive:
            o = 'adaptive'
        else:
            o = 'cyclic'
        util.save_weights(
            vae_net, os.path.join(save_dir, 'vae_{}_{}.pth'.format(o, epoch)))
예제 #5
0
def main_train(args):
    now = datetime.now()
    save_to = Path(args.save_to) if args.save_to is not None else Path().cwd()
    save_dir = save_to / f'{now:%Y%m%d-%H%M-%S}'
    fix_seed(args.seed)
    logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
                        level=logging.INFO)
    device = torch.device(
        'cuda:0') if torch.cuda.is_available() else torch.device('cpu')

    dataset_train, dataset_test, scaler = get_data(args)

    logging.info(f'training level: {args.level}')

    n_features = dataset_train.items.shape[1]
    generator, discriminator = get_models(args, n_features, device)

    if args.gan_type == 'vanilla':
        trainer = GANTrainer(generator, discriminator, device)
    elif args.gan_type == 'wgp':
        trainer = WGPGANTrainer(generator,
                                discriminator,
                                device,
                                lambda_=args.lambda_)
    else:
        raise ValueError(f'Unknown gan type: {args.gan_type}')

    optimizer_d = setup_optimizer(discriminator,
                                  args.learning_rate,
                                  weight_decay=0,
                                  args=args)
    optimizer_g = setup_optimizer(generator,
                                  args.learning_rate,
                                  weight_decay=0,
                                  args=args)

    if args.load_from is not None:
        load_model(Path(args.load_from), generator, discriminator, optimizer_g,
                   optimizer_d, device)

    experiment = Experiment(args.comet_api_key,
                            project_name=args.comet_project_name,
                            workspace=args.comet_workspace)
    experiment.log_parameters(vars(args))
    iterations_total = trainer.train(
        args,
        dataset_train,
        optimizer_g,
        optimizer_d,
        scaler=scaler,
        save_dir=save_dir,
        test_dataset=dataset_test.items[:len(dataset_test) // 10],
        experiment=experiment)

    n_events = len(dataset_test)
    steps = (args.gan_test_ratio * n_events) // args.eval_batch_size

    evaluate_model(generator, experiment, dataset_test, args.eval_batch_size,
                   steps, args, device, scaler, iterations_total)
    experiment.end()

    save_model(save_dir, generator, discriminator, optimizer_g, optimizer_d,
               iterations_total)
예제 #6
0
def plot(g, n_edges_to_change, fig_num=0, verbose=False, legend=False, n_eig=20,
         energy_=False, eigen=False, action='drop', coef=False, name='cora'):
    fix_seed()

    if action == 'drop':
        g_drop = helper.rm_pyG_edges(g, n=n_edges_to_change)  # process_nx_graph(g_drop, add_weight=True)
    elif action == 'increase':
        g_drop = helper.increase_random_edge_w(g, n=n_edges_to_change, w=10000)
    else:
        NotImplementedError

    L1 = get_laplacian_mat(g.edge_index, g.edge_weight, n_node, normalization='sym')
    L2 = get_laplacian_mat(g_drop.edge_index, g_drop.edge_weight, n_node, normalization='sym')

    if eigen or coef:  # no need to compute eigenvector for energy
        w1, v1 = eig(L1)
        w2, v2 = eig(L2)

    if eigen:
        # eig value
        if args.middle:
            diff = w2 - w1
            ax[fig_num].scatter(range(len(diff)), diff, marker='o')
            middle = n_node // 2
        else:
            if n_eig != len(w1):
                ax[fig_num].plot(w1[:n_eig], marker='o', label=f'$w_1$. First {n_eig}')
                ax[fig_num].plot(w2[:n_eig], marker='o', label=f'$w_2$. First {n_eig}')
                ax[fig_num].plot(w1[-n_eig:], marker='o', label=f'$w_1$. Last {n_eig}')
                ax[fig_num].plot(w2[-n_eig:], marker='o', label=f'$w_2$. Last {n_eig}')
            else:
                ax[fig_num].plot(w1[:n_eig], marker='o', label=f'$w_1$.')
                ax[fig_num].plot(w2[:n_eig], marker='o', label=f'$w_2$.')

        percent = int(100 * n_edges_to_change / (g.num_edges // 2))
        if percent % 10 == 1: percent -= 1
        if percent % 10 == 9: percent += 1

        title = f'{action} {percent}\% edges.'
        ax[fig_num].set_title(title)
        ax[fig_num].set_ylim([0, 2])
        # ax[fig_num].set_yscale('log', basey=2)
        if legend:
            ax[fig_num].legend(loc='center right')

    if coef:
        fix_seed()

        if args.gaussian:
            v = np.random.normal(0, 1, (g.num_nodes, 3))
        else:
            v = np.random.random((g.num_nodes, 3))

        v_sm = {'v1_sm0': v, 'v2_sm0': v}
        for i in range(3):
            tmp1 = np.dot(np.identity(len(w1)) - tonp(L1), v_sm[f'v1_sm{i}'])
            tmp2 = np.dot(np.identity(len(w2)) - tonp(L2), v_sm[f'v2_sm{i}'])
            v_sm[f'v1_sm{i + 1}'] = tmp1
            v_sm[f'v2_sm{i + 1}'] = tmp2

        v2_coef0 = np.dot(v2.T, v_sm['v2_sm0'])
        v2_coef1 = np.dot(v2.T, v_sm['v2_sm1'])
        v2_coef2 = np.dot(v2.T, v_sm['v2_sm2'])

        v1_coef0 = np.dot(v1.T, v_sm['v1_sm0'])
        v1_coef1 = np.dot(v1.T, v_sm['v1_sm1'])
        v1_coef2 = np.dot(v1.T, v_sm['v1_sm2'])

        ax[fig_num].scatter(range(n_eig - 1), v1_coef1[1:, 0], s=5, label=f"$c_1$")
        ax[fig_num].scatter(range(n_eig - 1), v2_coef1[1:, 0], s=5, label=f"$c_1'$")

        ax[fig_num].set_ylim([-1, 1])
        percent = int(100 * n_edges_to_change / (g.num_edges // 2))
        if percent % 10 == 1: percent -= 1
        if percent % 10 == 9: percent += 1
        title = f'{action} {percent}\% edges.'
        ax[fig_num].set_title(title)
        if legend:
            ax[fig_num].legend(loc='center right')

    if energy_:
        # signal
        fix_seed()
        if args.gaussian:
            v = np.random.normal(0, 1, (g.num_nodes, 20))
        elif args.loweig:
            k = 30
            if name == 'cora': k = 400
            if name == 'citeseer': k = 400
            print(k)

            v = low_eig(g).mix_low_eig(k=k, n_vec=20, mode='full')
        else:
            v = np.random.random((g.num_nodes, 20))
        # v = normalize(v, axis=0)
        v_sm = {'v1_sm0': v, 'v2_sm0': v}
        for i in range(2):
            tmp1 = np.dot(np.identity(n_node) - tonp(L1), v_sm[f'v1_sm{i}'])
            tmp2 = np.dot(np.identity(n_node) - tonp(L2), v_sm[f'v2_sm{i}'])

            v_sm[f'v1_sm{i + 1}'] = tmp1
            v_sm[f'v2_sm{i + 1}'] = tmp2

        D1_sm0 = energy(v_sm['v1_sm0'], L1)
        D2_sm0 = energy(v_sm['v2_sm0'], L2)

        D1_sm = energy(v_sm['v1_sm1'], L1)
        D2_sm = energy(v_sm['v2_sm1'], L2)

        D1_sm2 = energy(v_sm['v1_sm2'], L1)
        D2_sm2 = energy(v_sm['v2_sm2'], L2)

        ax[fig_num].scatter(range(n_eig), D1_sm0[:n_eig], label=f'$E_0$')
        ax[fig_num].scatter(range(n_eig), D2_sm0[:n_eig], label=f"$E'_0$")

        ax[fig_num].scatter(range(n_eig), D1_sm[:n_eig], label=f'$E_1$')
        ax[fig_num].scatter(range(n_eig), D2_sm[:n_eig], label=f"$E'_1$")

        ax[fig_num].scatter(range(n_eig), D1_sm2[:n_eig], label=f'$E_2$')
        ax[fig_num].scatter(range(n_eig), D2_sm2[:n_eig], label=f"$E'_2$")

        percent = int(100 * n_edges_to_change / (g.num_edges // 2))
        if percent % 10 == 1: percent -= 1
        if percent % 10 == 9: percent += 1

        title = f'{action} {percent}\% edges.'
        ax[fig_num].set_title(title)
        ax[fig_num].set_yscale('log', basey=10)

        # set y axis scale
        if args.gaussian:
            if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 3])
            if name in ['cora']: ax[fig_num].set_ylim([10 ** (2.5), 10 ** 4])
            if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (2.9), 10 ** 4])
        elif args.loweig and name in ['cora', 'citeseer']:
            pass
        else:
            if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 1.5])
            if name in ['cora']: ax[fig_num].set_ylim([10 ** (1.5), 10 ** 3])
            if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (1.9), 10 ** 3])
        if legend:
            ax[fig_num].legend(loc='center right')
예제 #7
0
            if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 3])
            if name in ['cora']: ax[fig_num].set_ylim([10 ** (2.5), 10 ** 4])
            if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (2.9), 10 ** 4])
        elif args.loweig and name in ['cora', 'citeseer']:
            pass
        else:
            if name not in ['cora', 'citeseer']: ax[fig_num].set_ylim([10 ** (-3), 10 ** 1.5])
            if name in ['cora']: ax[fig_num].set_ylim([10 ** (1.5), 10 ** 3])
            if name in ['citeseer']: ax[fig_num].set_ylim([10 ** (1.9), 10 ** 3])
        if legend:
            ax[fig_num].legend(loc='center right')


if __name__ == '__main__':
    args = parser.parse_args()
    fix_seed()
    helper = random_pyG()
    n_node = 200

    if args.graph == 'GEO':
        g_nx = nx.random_geometric_graph(n_node, 0.2, seed=42)
        g = helper.process_nx_graph(g_nx, add_weight=True)
        name = 'Random Geometric Graph'
    elif args.graph == 'ER':
        g_nx = nx.erdos_renyi_graph(n_node, 0.05, seed=42)
        g = helper.process_nx_graph(g_nx, add_weight=True)
        name = f'Erdos Renyi Graph G({n_node}, 0.05)'
    elif args.graph == 'WS':
        g_nx = nx.watts_strogatz_graph(n_node, 20, p=0.05, seed=42)
        g = helper.process_nx_graph(g_nx, add_weight=True)
        name = 'Watts-Strogatz Graph'