Пример #1
0
def main():

    n = args.n
    m = args.m
    nt = args.nt
    mt = args.mt

    reward = np.ones((n,m,4))*-0.1
    reward[nt-2,mt-1,2],reward[nt-1,mt-2,1] = 0,0 

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")
    gcn_model = GCN(nfeat=n*m, nhid=args.hidden)
    gcn_model.to(device)
    optimizer = optim.Adam(gcn_model.parameters(),lr=args.lr, weight_decay=args.weight_decay)
    adj,features,_,_ = shortest_dist(n,m,[n,m])

    features = normalize(sp.csr_matrix(features))
    features = torch.FloatTensor(np.array(features.todense()))

    adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
    adj = normalize(sp.csr_matrix(adj) + sp.eye(adj.shape[0]))
    adj = sparse_mx_to_torch_sparse_tensor(adj)


    for episodes in range(args.gcn_epi):
        update_graph(n,m,args,gcn_model,optimizer)
        print("{} episode done :".format(episodes+1))
    
    if args.cuda and torch.cuda.is_available():
        features = features.cuda()
        adj = adj.cuda()

    output = gcn_model(features,adj).cpu()
    gcn_phi = torch.exp(output).detach().numpy()
    gcn_phi = gcn_phi[:,1].reshape(n,m)

    
    param1 = Params(n,m,nt,mt,gamma = args.gamma,qstep = args.qstep,pstep = args.pstep,alpha = 0,noepi = args.noepi)
    param2 = Params(n,m,nt,mt,gamma = args.gamma,qstep = args.qstep,pstep = args.pstep,alpha = 1,noepi = args.noepi)
    
    regcn,valgcn = ACPhi(param1,reward,gcn_phi)
    reg,val = ACPhi(param2,reward,gcn_phi)

    PlotAnalysis(param1.noepi,reg,val,regcn,valgcn,gcn_phi)
Пример #2
0
def gcn():
    print('Training GCN mode!')

    # initialize results arrays
    total_mse = np.zeros(args.exp_number)

    total_pcc = np.zeros(args.exp_number)
    total_mae = np.zeros(args.exp_number)
    mse_datasets = {}
    std_datasets = {}
    pcc_datasets = {}
    pcc_std_datasets = {}
    mae_datasets = {}
    mae_std_datasets = {}

    t_total = time.time()

    if args.dataset == 'all':
        datasets = [
            'airport', 'collaboration', 'congress', 'forum', 'geom', 'astro'
        ]
    else:
        datasets = [args.dataset]

    for dataset in datasets:
        for exp_number in range(args.exp_number):
            print("%s: experiment number %d" % (dataset, exp_number + 1))

            data = preprocess_dataset.clean_data(dataset)
            if dataset != 'usair':
                data['weights'] = preprocessing.normalize([data['weights']])[0]

            # random split of data
            data_train, data_test = train_test_split(data, test_size=0.2)
            data_train, data_val = train_test_split(data_train, test_size=0.08)

            data = data.reset_index()
            data_train = data_train.reset_index()
            data_val = data_val.reset_index()
            data_test = data_test.reset_index()

            G = preprocess_dataset.create_graph_gcn(dataset, data, data_train)
            val_G = preprocess_dataset.create_graph_gcn(
                dataset, data, data_val)
            test_G = preprocess_dataset.create_graph_gcn(
                dataset, data, data_test)

            nodes_len = len(G.nodes)
            node_ids_to_index = {}
            for i, node_id in enumerate(G.nodes):
                node_ids_to_index[node_id] = i

            train_A = nx.adjacency_matrix(G)
            val_A = nx.adjacency_matrix(val_G)
            test_A = nx.adjacency_matrix(test_G)

            # identity if same for all
            I = identity(G.number_of_nodes(), dtype='int8', format='csr')
            features = torch.FloatTensor(np.array(I.todense())).cuda()

            train_labels = torch.FloatTensor(
                data_train['weights'].values).cuda()
            val_labels = torch.FloatTensor(data_val['weights'].values).cuda()
            test_labels = torch.FloatTensor(data_test['weights'].values).cuda()

            train_A = sparse_mx_to_torch_sparse_tensor(train_A).cuda()
            val_A = sparse_mx_to_torch_sparse_tensor(val_A).cuda()
            test_A = sparse_mx_to_torch_sparse_tensor(test_A).cuda()

            model = GCN(nfeat=nodes_len,
                        nhid=args.nhid,
                        nclass=args.nclass,
                        dropout=args.dropout)
            optimizer = optim.Adam(model.parameters(), lr=args.lr)

            model.train()
            model = model.to(args.device)

            # train
            for epoch in range(args.epochs):
                t = time.time()
                model.train()
                optimizer.zero_grad()

                output = model(features, train_A,
                               torch.tensor(data_train['A'].values).cuda(),
                               torch.tensor(data_train['B'].values).cuda(),
                               node_ids_to_index)

                loss_train = F.mse_loss(output, train_labels)
                loss_train.backward()
                optimizer.step()

                # validation
                model.eval()
                output = model(features, val_A,
                               torch.tensor(data_val['A'].values).cuda(),
                               torch.tensor(data_val['B'].values).cuda(),
                               node_ids_to_index)
                loss_val = F.mse_loss(output, val_labels)

                if args.verbose:
                    print('Epoch: {:04d}'.format(epoch + 1),
                          'loss_train: {:.4f}'.format(loss_train.item()),
                          'loss_val: {:.4f}'.format(loss_val.item()),
                          'time: {:.4f}s'.format(time.time() - t))

            # test
            model.eval()
            with torch.no_grad():
                output = model(features, test_A,
                               torch.tensor(data_test['A'].values).cuda(),
                               torch.tensor(data_test['B'].values).cuda(),
                               node_ids_to_index)

                loss_test = F.mse_loss(torch.flatten(output), test_labels)
                pcc_test = pearson_correlation(test_labels, output)
                mae_test = F.l1_loss(output, test_labels)
                print("Test set results:",
                      "loss= {:.10f}".format(loss_test.item()),
                      "pcc= {:.10f}".format(pcc_test),
                      "mae= {:.10f}".format(mae_test.item()))

                total_mse[exp_number] = loss_test
                total_pcc[exp_number] = pcc_test
                total_mae[exp_number] = mae_test

        # results
        mse_datasets[dataset] = np.mean(total_mse)
        std_datasets[dataset] = np.std(total_mse)
        total_mse = np.zeros(args.exp_number)

        pcc_datasets[dataset] = np.mean(total_pcc[~np.isnan(total_pcc)])
        pcc_std_datasets[dataset] = np.std(total_pcc[~np.isnan(total_pcc)])
        total_pcc = np.zeros(args.exp_number)

        mae_datasets[dataset] = np.mean(total_mae)
        mae_std_datasets[dataset] = np.std(total_mae)
        total_mae = np.zeros(args.exp_number)

    for dataset in datasets:
        print("MSE %s: {:,f}".format(mse_datasets[dataset]) % dataset)
        print("MSE_STD %s: {:,f}".format(std_datasets[dataset]) % dataset)

        print("PCC %s: {:,f}".format(pcc_datasets[dataset]) % dataset)
        print("PCC_STD %s: {:,f}".format(pcc_std_datasets[dataset]) % dataset)

        print("MAE %s: {:,f}".format(mae_datasets[dataset]) % dataset)
        print("MAE_STD %s: {:,f}".format(mae_std_datasets[dataset]) % dataset)

    print("Total time elapsed: {:.4f}s".format(time.time() - t_total))

    exit()
Пример #3
0
        soft_out = torch.unsqueeze(
            torch.nn.functional.softmax(output, dim=1)[:, 1], 1)
        loss_reg = torch.mm(torch.mm(soft_out.T, laplacian), soft_out)
        print('Epoch: {:04d}'.format(epoch + 1),
              'loss_train: {:.4f}'.format(loss_train.item()),
              'time: {:.4f}s'.format(time.time() - t))
        loss_train += args.gcn_lambda * loss_reg.squeeze()
        loss_train.backward()
        optimizer.step()


torch.set_num_threads(1)
device = torch.device("cuda:0" if args.cuda else "cpu")
n, m = 15, 15
gcn_model = GCN(nfeat=n * m, nhid=args.hidden)
gcn_model.to(device)
optimizer = optim.Adam(gcn_model.parameters(),
                       lr=args.lr,
                       weight_decay=args.weight_decay)
adj, features, _, _ = shortest_dist(n, m, [n, m])

features = normalize(sp.csr_matrix(features))
features = torch.FloatTensor(np.array(features.todense()))

adj = adj + adj.T.multiply(adj.T > adj) - adj.multiply(adj.T > adj)
adj = normalize(sp.csr_matrix(adj) + sp.eye(adj.shape[0]))
adj = sparse_mx_to_torch_sparse_tensor(adj)

for episodes in range(40):
    update_graph(n, m, args, gcn_model, optimizer)
    print("{} episode done :".format(episodes + 1))
Пример #4
0
def main():
    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    run_id = "alpha{}".format(args.gcn_alpha)
    if args.use_logger:
        from utils import Logger
        folder = "{}/{}".format(args.folder, run_id)
        logger = Logger(algo_name=args.algo,
                        environment_name=args.env_name,
                        folder=folder,
                        seed=args.seed)
        logger.save_args(args)

        print("---------------------------------------")
        print('Saving to', logger.save_folder)
        print("---------------------------------------")

    else:
        print("---------------------------------------")
        print('NOTE : NOT SAVING RESULTS')
        print("---------------------------------------")
    all_rewards = []

    envs = make_vec_envs(args.env_name, args.seed, args.num_processes,
                         args.gamma, args.log_dir, args.add_timestep, device,
                         False)

    actor_critic = Policy(envs.observation_space.shape,
                          envs.action_space,
                          args.env_name,
                          base_kwargs={'recurrent': args.recurrent_policy})
    actor_critic.to(device)

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               lr=args.lr,
                               eps=args.eps,
                               alpha=args.alpha,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(actor_critic,
                         args.clip_param,
                         args.ppo_epoch,
                         args.num_mini_batch,
                         args.value_loss_coef,
                         args.entropy_coef,
                         lr=args.lr,
                         eps=args.eps,
                         max_grad_norm=args.max_grad_norm)
    elif args.algo == 'acktr':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               acktr=True)

    rollouts = RolloutStorage(args.num_steps, args.num_processes,
                              envs.observation_space.shape, envs.action_space,
                              actor_critic.recurrent_hidden_state_size,
                              actor_critic.base.output_size)

    obs = envs.reset()
    rollouts.obs[0].copy_(obs)
    rollouts.to(device)

    ############################
    # GCN Model and optimizer
    from pygcn.train import update_graph
    from pygcn.models import GCN
    gcn_model = GCN(nfeat=actor_critic.base.output_size, nhid=args.gcn_hidden)
    gcn_model.to(device)
    gcn_optimizer = optim.Adam(gcn_model.parameters(),
                               lr=args.gcn_lr,
                               weight_decay=args.gcn_weight_decay)
    gcn_loss = nn.NLLLoss()
    gcn_states = [[] for _ in range(args.num_processes)]
    Gs = [nx.Graph() for _ in range(args.num_processes)]
    node_ptrs = [0 for _ in range(args.num_processes)]
    rew_states = [[] for _ in range(args.num_processes)]
    ############################

    episode_rewards = deque(maxlen=100)
    avg_fwdloss = deque(maxlen=100)
    rew_rms = RunningMeanStd(shape=())
    delay_rew = torch.zeros([args.num_processes, 1])
    delay_step = torch.zeros([args.num_processes])

    start = time.time()
    for j in range(num_updates):

        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            update_linear_schedule(
                agent.optimizer, j, num_updates,
                agent.optimizer.lr if args.algo == "acktr" else args.lr)

        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob,\
                 recurrent_hidden_states, hidden_states = actor_critic.act(
                        rollouts.obs[step],
                        rollouts.recurrent_hidden_states[step],
                        rollouts.masks[step])

            # Obser reward and next obs
            obs, reward, done, infos = envs.step(action)
            delay_rew += reward
            delay_step += 1

            for idx, (info, hid,
                      eps_done) in enumerate(zip(infos, hidden_states, done)):

                if eps_done or delay_step[idx] == args.reward_freq:
                    reward[idx] = delay_rew[idx]
                    delay_rew[idx] = delay_step[idx] = 0
                else:
                    reward[idx] = 0

                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])

                if args.gcn_alpha < 1.0:
                    gcn_states[idx].append(hid)
                    node_ptrs[idx] += 1
                    if not eps_done:
                        Gs[idx].add_edge(node_ptrs[idx] - 1, node_ptrs[idx])
                    if reward[idx] != 0. or eps_done:
                        rew_states[idx].append(
                            [node_ptrs[idx] - 1, reward[idx]])
                    if eps_done:
                        adj = nx.adjacency_matrix(Gs[idx]) if len(Gs[idx].nodes)\
                                        else sp.csr_matrix(np.eye(1,dtype='int64'))
                        update_graph(gcn_model, gcn_optimizer,
                                     torch.stack(gcn_states[idx]), adj,
                                     rew_states[idx], gcn_loss, args, envs)
                        gcn_states[idx] = []
                        Gs[idx] = nx.Graph()
                        node_ptrs[idx] = 0
                        rew_states[idx] = []

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks,
                            hidden_states)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau, gcn_model, args.gcn_alpha)
        agent.update(rollouts)
        rollouts.after_update()

        ####################### Saving and book-keeping #######################
        if (j % int(num_updates / 5.) == 0
                or j == num_updates - 1) and args.save_dir != "":
            print('Saving model')
            print()

            save_dir = "{}/{}/{}".format(args.save_dir, args.folder, run_id)
            save_path = os.path.join(save_dir, args.algo, 'seed' +
                                     str(args.seed)) + '_iter' + str(j)
            try:
                os.makedirs(save_path)
            except OSError:
                pass

            # A really ugly way to save a model to CPU
            save_model = actor_critic
            save_gcn = gcn_model
            if args.cuda:
                save_model = copy.deepcopy(actor_critic).cpu()
                save_gcn = copy.deepcopy(gcn_model).cpu()

            save_model = [
                save_gcn, save_model,
                hasattr(envs.venv, 'ob_rms') and envs.venv.ob_rms or None
            ]

            torch.save(save_model,
                       os.path.join(save_path, args.env_name + "ac.pt"))

        total_num_steps = (j + 1) * args.num_processes * args.num_steps

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            end = time.time()
            print("Updates {}, num timesteps {}, FPS {} \n Last {}\
             training episodes: mean/median reward {:.2f}/{:.2f},\
              min/max reward {:.2f}/{:.2f}, success rate {:.2f}, avg fwdloss {:.2f}\n"
                  .format(
                      j,
                      total_num_steps,
                      int(total_num_steps / (end - start)),
                      len(episode_rewards),
                      np.mean(episode_rewards),
                      np.median(episode_rewards),
                      np.min(episode_rewards),
                      np.max(episode_rewards),
                      np.count_nonzero(np.greater(episode_rewards, 0)) /
                      len(episode_rewards),
                      np.mean(avg_fwdloss),
                  ))

            all_rewards.append(np.mean(episode_rewards))
            if args.use_logger:
                logger.save_task_results(all_rewards)
        ####################### Saving and book-keeping #######################

    envs.close()