edge_index_r0_batch.append( build_edge_index(selected_chrom_batch[i_batch], inverse=False)) edge_index_r1_batch.append( build_edge_index(selected_chrom_batch[i_batch], inverse=True)) dataset = [] for i_batch in range(CONF.batch_size): data = Data( x=torch.tensor(env_batch[i_batch].nodes_embedding, dtype=torch.float32).to(CONF.device), edge_index_r0=torch.tensor( edge_index_r0_batch[i_batch]).to(CONF.device), edge_index_r1=torch.tensor( edge_index_r1_batch[i_batch]).to(CONF.device), edge_index_n=torch.tensor( env_batch[i_batch].edge_index_n).to(CONF.device)) dataset.append(data) loader = DataLoader(dataset, batch_size=CONF.batch_size, shuffle=False) model(env_batch, list(loader)[0], selected_route_batch, n_nodes) if __name__ == '__main__': args = args() train(args)
import torch import copy import time import numpy as np from torch_geometric.data import Data, DataLoader from arguments import args from memory import Memory from ppo_model import Agent from envs.tsp_env import TSP_Env config = args() if __name__ == '__main__': agent = Agent(config) for epoch in range(config.n_epoch): memory = Memory() envs = [TSP_Env(config.n_nodes) for i in range(config.n_agents)] time1 = time.time() next_value = [] for i_rollout in range(config.n_rollout): # reset envs for i, env in enumerate(envs): env.reset() cost = np.array([env.get_cost() for env in envs]).mean() # if i_rollout % 5 == 0: # print("i_rollout %2d/%d | init length: %f" % (i_rollout, config.n_rollout, cost)) record_reward = []
def main(): # get arguments args = arguments.args() # build environment env = gym.make(args.env_name) num_observations = env.observation_space.shape[0] num_actions = env.action_space.shape[0] # define the global network... critic_shared_model = models.Critic_Network(num_observations) critic_shared_model.share_memory() actor_shared_model = models.Actor_Network(num_observations, num_actions) actor_shared_model.share_memory() # define the traffic signal... traffic_signal = utils.TrafficLight() # define the counter critic_counter = utils.Counter() actor_counter = utils.Counter() # define the shared gradient buffer... critic_shared_grad_buffer = utils.Shared_grad_buffers(critic_shared_model) actor_shared_grad_buffer = utils.Shared_grad_buffers(actor_shared_model) # define shared observation state... shared_obs_state = utils.Running_mean_filter(num_observations) # define shared reward... shared_reward = utils.RewardCounter() # define the optimizer... critic_optimizer = torch.optim.Adam(critic_shared_model.parameters(), lr=args.value_lr) actor_optimizer = torch.optim.Adam(actor_shared_model.parameters(), lr=args.policy_lr) # prepare multiprocessing # find how many are available total_works = mp.cpu_count() - 1 print(f'.....total available process is {total_works}') num_of_workers = total_works print(f'.....we set num_of_processes to {num_of_workers}') processors = [] workers = [] # load model from check point pass # p = mp.Process(target=chief_worker, args=(num_of_workers, traffic_signal, critic_counter, actor_counter, critic_shared_model, actor_shared_model, critic_shared_grad_buffer, actor_shared_grad_buffer, critic_optimizer, actor_optimizer, shared_reward, shared_obs_state, args.policy_update_step, args.env_name)) processors.append(p) for idx in range(num_of_workers): workers.append(dppo_workers(args)) for worker in workers: p = mp.Process(target=worker.train_network, args=(traffic_signal, critic_counter, actor_counter, critic_shared_model, actor_shared_model, shared_obs_state, critic_shared_grad_buffer, actor_shared_grad_buffer, shared_reward)) processors.append(p) for p in processors: p.start() for p in processors: p.join()
import math import sys import subprocess import os import matplotlib.pyplot as plt import operator #from functions import extract_mfcc import os import multiprocessing as mp from lib.utils import h5read import sys import pickle from arguments import parse_arguments as args from pdb import set_trace as bp param,path=args() print("Arguments passed: ",sys.argv) print('Dimensions:\n Features- D, Number of frames- N \n features shape: D X N \n GMM:\n mixtures- C \n Weights: 1 X C ,Means shape: D X C, Variances: D X C \n T-matrix: R X CD ') withsad = param.featswithsad loopprob = param.loopprob stat = param.beta vec_dim = param.ivec_dim max_iters = param.max_iters vstat = 'hardasgn' maxSpeakers = 10 featdim = 20 feats_foldername = path.feats_folder_name SAD = path.sad_marks initrttm_folder = path.initrttm_folder_name