Пример #1
0
                    edge_index_r0_batch.append(
                        build_edge_index(selected_chrom_batch[i_batch],
                                         inverse=False))
                    edge_index_r1_batch.append(
                        build_edge_index(selected_chrom_batch[i_batch],
                                         inverse=True))

                dataset = []
                for i_batch in range(CONF.batch_size):
                    data = Data(
                        x=torch.tensor(env_batch[i_batch].nodes_embedding,
                                       dtype=torch.float32).to(CONF.device),
                        edge_index_r0=torch.tensor(
                            edge_index_r0_batch[i_batch]).to(CONF.device),
                        edge_index_r1=torch.tensor(
                            edge_index_r1_batch[i_batch]).to(CONF.device),
                        edge_index_n=torch.tensor(
                            env_batch[i_batch].edge_index_n).to(CONF.device))
                    dataset.append(data)
                loader = DataLoader(dataset,
                                    batch_size=CONF.batch_size,
                                    shuffle=False)

                model(env_batch,
                      list(loader)[0], selected_route_batch, n_nodes)


if __name__ == '__main__':
    args = args()
    train(args)
Пример #2
0
import torch
import copy
import time
import numpy as np
from torch_geometric.data import Data, DataLoader
from arguments import args
from memory import Memory
from ppo_model import Agent
from envs.tsp_env import TSP_Env

config = args()

if __name__ == '__main__':

    agent = Agent(config)

    for epoch in range(config.n_epoch):
        memory = Memory()
        envs = [TSP_Env(config.n_nodes) for i in range(config.n_agents)]

        time1 = time.time()
        next_value = []
        for i_rollout in range(config.n_rollout):
            # reset envs
            for i, env in enumerate(envs):
                env.reset()
            cost = np.array([env.get_cost() for env in envs]).mean()
            # if i_rollout % 5 == 0:
            #     print("i_rollout %2d/%d | init length: %f" % (i_rollout, config.n_rollout, cost))

            record_reward = []
Пример #3
0
def main():
    # get arguments
    args = arguments.args()

    # build environment
    env = gym.make(args.env_name)
    num_observations = env.observation_space.shape[0]
    num_actions = env.action_space.shape[0]

    # define the global network...
    critic_shared_model = models.Critic_Network(num_observations)
    critic_shared_model.share_memory()

    actor_shared_model = models.Actor_Network(num_observations, num_actions)
    actor_shared_model.share_memory()

    # define the traffic signal...
    traffic_signal = utils.TrafficLight()
    # define the counter
    critic_counter = utils.Counter()
    actor_counter = utils.Counter()
    # define the shared gradient buffer...
    critic_shared_grad_buffer = utils.Shared_grad_buffers(critic_shared_model)
    actor_shared_grad_buffer = utils.Shared_grad_buffers(actor_shared_model)
    # define shared observation state...
    shared_obs_state = utils.Running_mean_filter(num_observations)
    # define shared reward...
    shared_reward = utils.RewardCounter()
    # define the optimizer...
    critic_optimizer = torch.optim.Adam(critic_shared_model.parameters(),
                                        lr=args.value_lr)
    actor_optimizer = torch.optim.Adam(actor_shared_model.parameters(),
                                       lr=args.policy_lr)

    # prepare multiprocessing
    # find how many are available
    total_works = mp.cpu_count() - 1
    print(f'.....total available process  is {total_works}')
    num_of_workers = total_works
    print(f'.....we set num_of_processes to {num_of_workers}')

    processors = []
    workers = []

    # load model from check point
    pass
    #

    p = mp.Process(target=chief_worker,
                   args=(num_of_workers, traffic_signal, critic_counter,
                         actor_counter, critic_shared_model,
                         actor_shared_model, critic_shared_grad_buffer,
                         actor_shared_grad_buffer, critic_optimizer,
                         actor_optimizer, shared_reward, shared_obs_state,
                         args.policy_update_step, args.env_name))

    processors.append(p)

    for idx in range(num_of_workers):
        workers.append(dppo_workers(args))

    for worker in workers:
        p = mp.Process(target=worker.train_network,
                       args=(traffic_signal, critic_counter, actor_counter,
                             critic_shared_model, actor_shared_model,
                             shared_obs_state, critic_shared_grad_buffer,
                             actor_shared_grad_buffer, shared_reward))
        processors.append(p)

    for p in processors:
        p.start()

    for p in processors:
        p.join()
Пример #4
0
import math
import sys
import subprocess
import os
import matplotlib.pyplot as plt
import operator
#from functions import extract_mfcc
import os
import multiprocessing as mp
from lib.utils import h5read
import sys
import pickle
from arguments import parse_arguments as args
from pdb import set_trace as bp

param,path=args()

print("Arguments passed: ",sys.argv)

print('Dimensions:\n Features- D, Number of frames- N \n features shape: D X N  \n GMM:\n mixtures- C \n Weights: 1 X C ,Means shape: D X C, Variances: D X C \n T-matrix: R X CD ')
withsad = param.featswithsad
loopprob = param.loopprob
stat = param.beta
vec_dim = param.ivec_dim
max_iters = param.max_iters
vstat = 'hardasgn'
maxSpeakers = 10
featdim = 20
feats_foldername = path.feats_folder_name
SAD = path.sad_marks
initrttm_folder = path.initrttm_folder_name