Exemplo n.º 1
0
def get_trainer(trial, dataloader):
    n_layers = trial.suggest_categorical('n_layer', [2, 3, 4])
    hidden_dims = []
    for i in range(n_layers):
        hidden_dim = int(
            trial.suggest_loguniform('hidden_dim_{}'.format(i), 4, 256))
        hidden_dims.append(hidden_dim)
    model = GAE(39, hidden_dims)
    lr = trial.suggest_loguniform('lr', 1e-6, 1e-2)
    optim = torch.optim.Adam(model.parameters(), lr=lr)
    trainer = Trainer(model, optim, dataloader)
    return trainer
Exemplo n.º 2
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(os.path.join(save_dir, 'zinc250k.png'))

    model = GAE(args.in_dim, args.hidden_dims)
    model.to(device)

    print('Loading data')
    with open(args.data_file, 'rb') as f:
        graphs = dill.load(f)
    print('Loaded {} molecules'.format(len(graphs)))
    train_graphs, val_graphs = train_test_split(graphs, test_size=10000)
    train_dataset = MolDataset(train_graphs)
    val_dataset = MolDataset(val_graphs)
    del train_graphs, val_graphs

    train_loader = DataLoader(train_dataset,
                              batch_size=args.batch_size,
                              shuffle=True,
                              collate_fn=collate)
    val_loader = DataLoader(val_dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            collate_fn=collate)
    trainer = Trainer(model, args)
    train_losses, val_losses = [], []
    print('Training Start')
    for epoch in tqdm(range(args.n_epochs)):
        train_loss = 0
        model.train()
        for bg in tqdm(train_loader):
            bg.set_e_initializer(dgl.init.zero_initializer)
            bg.set_n_initializer(dgl.init.zero_initializer)
            train_loss += trainer.iteration(bg)
        train_loss /= len(train_loader)
        train_losses.append(train_loss)
        trainer.save(epoch, args.save_dir)

        val_loss = 0
        model.eval()
        for bg in val_loader:
            bg.set_e_initializer(dgl.init.zero_initializer)
            bg.set_n_initializer(dgl.init.zero_initializer)
            val_loss += trainer.iteration(bg, train=False)
        val_loss /= len(val_loader)
        val_losses.append(val_loss)
        print('Epoch: {:02d} | Train Loss: {:.4f} | Validation Loss: {:.4f}'.
              format(epoch, train_loss, val_loss))
    plot(train_losses, val_losses)
Exemplo n.º 3
0
def main():
    if not os.path.exists(args.save_dir):
        os.makedirs(args.save_dir)

    # TODO: train test split
    # load and preprocess dataset
    data = load_data(args)
    features = torch.FloatTensor(data.features)
    in_feats = features.shape[1]
    print(features.shape)
    model = GAE(in_feats, [32,16])
    model.train()
    optim = torch.optim.Adam(model.parameters(), lr=1e-2)
    loss_function = BCELoss

    g = DGLGraph(data.graph)
    g.ndata['h'] = features


    n_epochs = 500
    losses = []
    print('Training Start')
    for epoch in tqdm(range(n_epochs)):
        g.ndata['h'] = features
        # normalization
        degs = g.in_degrees().float()
        norm = torch.pow(degs, -0.5)
        norm[torch.isinf(norm)] = 0
        g.ndata['norm'] = norm.unsqueeze(1)
        adj = g.adjacency_matrix().to_dense()
        pos_weight = torch.Tensor([float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()])
        
        
        adj_logits = model.forward(g)#, features)
        
        loss = loss_function(adj_logits, adj, pos_weight=pos_weight)
        optim.zero_grad()
        loss.backward()
        optim.step()
        losses.append(loss.item())
        print('Epoch: {:02d} | Loss: {:.5f}'.format(epoch, loss))
        
    
    plt.plot(losses)
    plt.xlabel('iteration')
    plt.ylabel('train loss')
    plt.grid()
    plt.show()
Exemplo n.º 4
0
    def build_model(self):
        self.obs = tf.placeholder(tf.float32, [None, self.observation_size])
        self.action = tf.placeholder(tf.float32, [None, self.action_size])
        self.advantage = tf.placeholder(tf.float32, [None])

        #Mean of old action distribution
        self.old_action_dist_mu = tf.placeholder(tf.float32,
                                                 [None, self.action_size])
        self.old_action_dist_logstd = tf.placeholder(tf.float32,
                                                     [None, self.action_size])

        #NN framework for action distribution
        self.action_dist_mu, action_dist_logstd = self.build_policy(self.obs)

        # Get trainable variables for the policy (NN weights)
        tr_vrbs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                    scope='Policy')
        for i in tr_vrbs:
            print(i.op.name)

        #Construct distribution by repeating action_dis_logstd
        self.action_dist_logstd = tf.tile(action_dist_logstd,
                                          (tf.shape(action_dist_logstd)[0], 1))

        #Probability of action under old policy vs. new policy
        self.log_policy = LOG_POLICY(self.action_dist_mu,
                                     self.action_dist_logstd, self.action)
        self.log_old_policy = LOG_POLICY(self.old_action_dist_mu,
                                         self.old_action_dist_logstd,
                                         self.action)
        policy_ratio = tf.exp(self.log_policy - self.log_old_policy)

        #Number of observations in batch
        batch_size = tf.cast(tf.shape(self.obs)[0], tf.float32)
        '''
        Equation (14) in paper
        Contribution of a single s_n : Expectation over a~q[ (new policy / q(is)) * advantage_old]
        '''
        surr_single_state = -tf.reduce_mean(policy_ratio * self.advantage)

        #Define KL divergence and shannon entropy, averaged over a set of inputs (policies)
        kl = GAUSS_KL(self.old_action_dist_mu, self.old_action_dist_logstd,
                      self.action_dist_mu,
                      self.action_dist_logstd) / batch_size
        ent = GAUSS_ENTROPY(self.action_dist_mu,
                            self.action_dist_logstd) / batch_size

        #Define 'loss' quantities to constrain or maximize
        self.losses = [surr_single_state, kl, ent]

        # Maximize surrogate function over policy parameter 'theta' represented by neural network weights
        self.pg = FLAT_GRAD(surr_single_state, tr_vrbs)

        #KL divergence where first argument is fixed
        kl_first_fixed = GAUSS_KL_FIRST_FIX(
            self.action_dist_mu, self.action_dist_logstd) / batch_size

        #Gradient of KL divergence w.r.t. theta (NN policy weights)
        first_kl_grads = tf.gradients(kl_first_fixed, tr_vrbs)

        self.flat_tangent = tf.placeholder(tf.float32, [None])
        tangent = list()
        start = 0
        for vrbs in tr_vrbs:
            variable_size = np.prod(vrbs.get_shape().as_list())
            param = tf.reshape(
                self.flat_tangent[start:(start + variable_size)],
                vrbs.get_shape())
            tangent.append(param)
            start += variable_size
        '''
            Gradient of KL with tangent vector
            gradient_w_tangent : list of KL_prime*y for each variables
        '''
        gradient_w_tangent = [
            tf.reduce_sum(kl_g * t)
            for (kl_g, t) in zip(first_kl_grads, tangent)
        ]
        '''
			From derivative of KL_prime*y : [dKL/dx1, dKL/dx2...]*y
				y -> Ay, A is n by n matrix but hard to implement(numerically solving (n*n)*(n*1))
				so first multiply target 'y' to gradient and take derivation
		    'self.FVP'	Returns : [d2KL/dx1dx1+d2KL/dx1dx2..., d2KL/dx1dx2+d2KL/dx2dx2..., ...]*y
			So get (second derivative of KL divergence)*y for each variable => y->JMJy (Fisher Vector Product)
		'''
        self.FVP = FLAT_GRAD(gradient_w_tangent, tr_vrbs)

        #Get actual parameter value
        self.get_value = GetValue(self.sess, tr_vrbs, name='Policy')

        #Set parameter values
        self.set_value = SetValue(self.sess, tr_vrbs, name='Policy')

        #Estimate of the advantage function
        self.gae = GAE(self.sess, self.observation_size, self.args.gamma,
                       self.args.lamda, self.args.vf_constraint)

        #Intialization of the barrier function compensator
        self.bar_comp = BARRIER(self.args, self.sess, self.observation_size,
                                self.action_size)

        #Variable initializers
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 5
0
    def build_model(self):
        self.obs = tf.placeholder(tf.float32, [None, self.observation_size])
        self.action = tf.placeholder(tf.float32, [None, self.action_size])
        self.advantage = tf.placeholder(tf.float32, [None])
        # Mean of old action distribution
        self.old_action_dist_mu = tf.placeholder(tf.float32, [None, self.action_size])
        self.old_action_dist_logstd = tf.placeholder(tf.float32, [None, self.action_size])
        '''
        Mean value for each action : each action has gaussian distribution with mean and standard deviation
        With continuous state and action space, use GAUSSIAN DISTRIBUTION, maps  from the input features to the mean of Gaussian distribution for each action
        Seperate set of parameters specifies the log standard deviation of each action
        => The policy is defined by the normnal distribution (mean=NeuralNet(states), stddev= exp(r))
        '''
        self.action_dist_mu, action_dist_logstd = self.build_policy(self.obs)
        # Make log standard shape from [1, action size] => [batch size, action size]
        # tf.tile(A, reps) : construct an tensor by repeating A given by 'reps'
        # Use tf.shape instead of tf.get_shape() when 'None' used in placeholder
        self.action_dist_logstd = tf.tile(action_dist_logstd, (tf.shape(action_dist_logstd)[0], 1))

        # outputs probability of taking 'self.action'
        # new distribution  
        self.log_policy = LOG_POLICY(self.action_dist_mu, self.action_dist_logstd, self.action)
        # old distribution
        self.log_old_policy = LOG_POLICY(self.old_action_dist_mu, self.old_action_dist_logstd, self.action)
        
        # Take exponential to log policy distribution
        '''
        Equation (14) in paper
        Contribution of a single s_n : Expectation over a~q[(new policy / q(is)) * advantace_old]
        sampling distribution q is normally old policy
        '''
        batch_size = tf.shape(self.obs)[0]
        # print('Batch size %d' % batch_size)
        policy_ratio = tf.exp(self.log_policy - self.log_old_policy)
        surr_single_state = -tf.reduce_mean(policy_ratio * self.advantage)
        # tf.shape returns dtype=int32, tensor conversion requested dtype float32
        batch_size = tf.cast(batch_size, tf.float32)
        # Average KL divergence and shannon entropy, averaged over a set of inputs to function mu 
        kl = GAUSS_KL(self.old_action_dist_mu, self.old_action_dist_logstd, self.action_dist_mu, self.action_dist_logstd) / batch_size
        ent = GAUSS_ENTROPY(self.action_dist_mu, self.action_dist_logstd) / batch_size

        self.losses = [surr_single_state, kl, ent]
        #tr_vrbs = tf.trainable_variables()
        tr_vrbs = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope='Policy')
        for i in tr_vrbs:
            print(i.op.name)

        '''
            Compute a search direction using a linear approx to objective and quadratic approx to constraint
            => The search direction is computed by approximately solving 'Ax=g' where A is FIM
                Quadratic approximation to KL divergence constraint
        '''
        # Maximize surrogate function over policy parameter 'theta'
        self.pg = FLAT_GRAD(surr_single_state, tr_vrbs)
        # KL divergence where first argument is fixed
        # First argument would be old policy parameters, so keep it constant
        kl_first_fixed = GAUSS_KL_FIRST_FIX(self.action_dist_mu, self.action_dist_logstd) / batch_size
        # Gradient of KL divergence
        first_kl_grads = tf.gradients(kl_first_fixed, tr_vrbs)
        # Vectors we are going to multiply
        self.flat_tangent = tf.placeholder(tf.float32, [None])
        tangent = list()
        start = 0
        for vrbs in tr_vrbs:
            variable_size = np.prod(vrbs.get_shape().as_list())
            param = tf.reshape(self.flat_tangent[start:(start+variable_size)], vrbs.get_shape())
            tangent.append(param)
            start += variable_size
        '''
            Gradient of KL with tangent vector
            gradient_w_tangent : list of KL_prime*y for each variables  
        '''
        gradient_w_tangent = [tf.reduce_sum(kl_g*t) for (kl_g, t) in zip(first_kl_grads, tangent)]
        '''
            From derivative of KL_prime*y : [dKL/dx1, dKL/dx2...]*y
                y -> Ay, A is n by n matrix but hard to implement(numerically solving (n*n)*(n*1))
                so first multiply target 'y' to gradient and take derivation
            'self.FVP'  Returns : [d2KL/dx1dx1+d2KL/dx1dx2..., d2KL/dx1dx2+d2KL/dx2dx2..., ...]*y
            So get (second derivative of KL divergence)*y for each variable => y->JMJy (Fisher Vector Product)
        '''
        self.FVP = FLAT_GRAD(gradient_w_tangent, tr_vrbs)
        # Get actual paramenter value
        self.get_value = GetValue(self.sess, tr_vrbs, name='Policy')
        # To set parameter values
        self.set_value = SetValue(self.sess, tr_vrbs, name='Policy')
        # GAE
        self.gae = GAE(self.sess, self.observation_size, self.args.gamma, self.args.lamda, self.args.vf_constraint)
    
        self.sess.run(tf.global_variables_initializer())        
Exemplo n.º 6
0
def main():
    # Get arguments parsed
    args = get_args()

    # Setup for logging
    output_dir = 'output/{}'.format(
        datetime.now(
            timezone('Asia/Hong_Kong')).strftime('%Y-%m-%d_%H-%M-%S-%f')[:-3])
    create_dir(output_dir)
    LogHelper.setup(log_path='{}/training.log'.format(output_dir),
                    level_str='INFO')
    _logger = logging.getLogger(__name__)

    # Save the configuration for logging purpose
    save_yaml_config(args, path='{}/config.yaml'.format(output_dir))

    # Reproducibility
    set_seed(args.seed)

    # Get dataset
    dataset = SyntheticDataset(args.n, args.d, args.graph_type, args.degree,
                               args.sem_type, args.noise_scale,
                               args.dataset_type, args.x_dim)
    _logger.info('Finished generating dataset')

    model = GAE(args.n, args.d, args.x_dim, args.seed, args.num_encoder_layers,
                args.num_decoder_layers, args.hidden_size, args.latent_dim,
                args.l1_graph_penalty, args.use_float64)
    model.print_summary(print_func=model.logger.info)

    trainer = ALTrainer(args.init_rho, args.rho_thres, args.h_thres,
                        args.rho_multiply, args.init_iter, args.learning_rate,
                        args.h_tol, args.early_stopping,
                        args.early_stopping_thres)
    W_est = trainer.train(model, dataset.X, dataset.W, args.graph_thres,
                          args.max_iter, args.iter_step, output_dir)
    _logger.info('Finished training model')

    # Save raw recovered graph, ground truth and observational data after training
    np.save('{}/true_graph.npy'.format(output_dir), dataset.W)
    np.save('{}/observational_data.npy'.format(output_dir), dataset.X)
    np.save('{}/final_raw_recovered_graph.npy'.format(output_dir), W_est)

    # Plot raw recovered graph
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/raw_recovered_graph.png'.format(output_dir))

    _logger.info('Filter by constant threshold')
    W_est = W_est / np.max(np.abs(W_est))  # Normalize

    # Plot thresholded recovered graph
    W_est[np.abs(W_est) < args.graph_thres] = 0  # Thresholding
    plot_recovered_graph(
        W_est,
        dataset.W,
        save_name='{}/thresholded_recovered_graph.png'.format(output_dir))
    results_thresholded = count_accuracy(dataset.W, W_est)
    _logger.info('Results after thresholding by {}: {}'.format(
        args.graph_thres, results_thresholded))