def __init__(self, env, args): self.args = args self.agent = Neurosmash.Agent() self.env = env # extract background self.extr = BE.Background_Extractor(self.env, self.agent, args) self.background = self.extr.get_background(oned=True) self.background_im = self.extr.get_background(oned=False)
def set_up_env(args): # Initialize agent and environment controller = Neurosmash.Agent() # This is an example agent. if args.use_controller: controller = Controller(args) # This is the main environment. try: environment = Neurosmash.Environment(args) except: print( "Connecting to environment failed. Please make sure Neurosmash is running and check your settings." ) print( f"Settings from world model: ip={args.ip}, port={args.port}, size={args.size}, timescale={args.timescale}" ) else: print("Successfully connected to Neurosmash!") return controller, environment
def __init__(self, vision, env, args, agent=None): self.args = args self.vision = vision self.env = env if not agent: self.agent = Neurosmash.Agent() else: self.agent = agent # extract background self.extr = BE.Background_Extractor(self.env, self.agent, args) self.background = self.extr.get_background(oned=True)
TARGET_UPDATE = 10 n_actions = 3 learning_rate = 1e-2 # gamma = 0.99 # Setup device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') weighted_loss = 1 ip = "127.0.0.1" # Ip address that the TCP/IP interface listens to port = 13000 # Port number that the TCP/IP interface listens to size = 64 # Please check the Updates section above for more details timescale = 10 # Please check the Updates section above for more details env = Neurosmash.Environment(timescale=timescale, size=size, port=port, ip=ip) # Load VAE weights vae = VAE(device, image_channels=3).to(device) vae.load_state_dict( torch.load("./data_folder_vae/vae_v3_weighted_loss_{}.torch".format( weighted_loss))) vae.eval() # Load RNN weights rnn = MDNRNN(32, 256, 5, 1).to(device) rnn.load_state_dict( torch.load("./weights/rnn_29dec_{}.torch".format(weighted_loss))) rnn.eval() # Load controller, if vanilla DQN, replace DQN_VAE with DQN2
def main(episodes, episode_length): env = Neurosmash.Environment(timescale=timescale, size=size, port=port, ip=ip) # This is the main environment. end, reward, state = env.reset() ep_cnt = 0 all_states = [] all_actions = [] all_dones = [] all_rewards = [] file_cnt = 0 while len(all_states) < episodes: print(len(all_states)) episode_states = [] episode_actions = [] episode_dones = [] episode_rewards = [] end, reward, state = env.reset( ) # Reset environment and record the starting state done = False ep_cnt += 1 # for time in range(episode_length): while True: action = select_action() # episode_actions.append(action) # Step through environment using chosen action done, reward, state = env.step(action) state = torch.FloatTensor(state).reshape(size, size, 3) / 255.0 state = state.permute(2, 0, 1) episode_states.append(state.reshape( 3, 64, 64)) #vae.encode(state.reshape(1, 3, 64, 64).cuda())[0]) episode_actions.append(action) episode_dones.append(done) episode_rewards.append(reward) if done: break if len(episode_states) >= 20: all_states.append(torch.stack(episode_states)[-20:]) all_actions.append(torch.Tensor(episode_actions[-20:])) all_dones.append(torch.Tensor(episode_dones[-20:])) all_rewards.append(torch.Tensor(episode_rewards[-20:])) print(episode_states[-1]) print(episode_actions) print(episode_dones) print(episode_rewards) torch.save(torch.stack(all_states), 'training_data.pt') torch.save(torch.stack(all_actions), 'training_actions.pt') torch.save(torch.stack(all_dones), 'training_dones.pt') torch.save(torch.stack(all_rewards), 'training_rewards.pt')
# Imports from convvae import ConvVae import numpy as np import mxnet as mx from mxnet import nd, autograd, gluon from sklearn.model_selection import train_test_split from tqdm import tqdm import matplotlib.pyplot as plt import Neurosmash ip = "127.0.0.1" port = 13000 size = 128 # 96, 192 timescale = 1 agent = Neurosmash.Agent() environment = Neurosmash.Environment(ip, port, size, timescale) # end (true if the episode has ended, false otherwise) # reward (10 if won, 0 otherwise) # state (flattened size x size x 3 vector of pixel values) # The state can be converted into an image as follows: # image = np.array(state, "uint8").reshape(size, size, 3) # You can also use to Neurosmash.Environment.state2image(state) function which returns # the state as a PIL image shape = (3, size, size) def roam_and_collect(nr_images=10): # The following steps through an entire episode from start to finish with random actions (by default)
from settings import * from utils.argparser import get_args from utils.background_extractor import Background_Extractor from mxnet import nd import numpy as np import matplotlib.pyplot as plt args = get_args() vision = Agent_Location_Classifier() vision.load_parameters(path_to_clf_params) rnn = mdn_rnn(input_dim=7, interface_dim=10, output_dim=4) rnn.load_parameters(path_to_rnn_params) env = Neurosmash.Environment(args) agent = Neurosmash.Agent() end, reward, previous_state = env.reset() n_steps = 30 extr = Background_Extractor(env, agent, args) background = extr.get_background(oned=True) h, c = (nd.zeros((1, rnn.RNN.h_dim)), nd.zeros((1, rnn.RNN.c_dim))) eye = nd.eye(args.move_dim) prev_pred = nd.zeros((1,4)) while end == 0: # Get latent representation from LSTM z = vision(extr.clean_and_reshape(previous_state, args.size)/255) # Make random step a = np.random.randint(0,3)
def __init__(self, env, args): self.args = args self.agent = Neurosmash.Agent() self.env = env extr = BE.Background_Extractor(self.env, self.agent, args) self.background = extr.get_background().reshape(-1)