Exemplo n.º 1
0
def init(device_name, model_name):
    #if model_name is None:
    #raise ValueError("No model given!")
    global device, net, last_h, last_c
    device = torch.device(device_name)
    #net = cnn_fc.CNN_FC_DQN(NUM_ACTIONS)
    net = fc_lstm.FC_LSTM(INPUT_SHAPE, NUM_ACTIONS)
    net.train(False)  # set training mode to false to deactivate dropout layer
    net.load_state_dict(torch.load(model_name, map_location=device))
    net.to(device)
    reset_hidden()
Exemplo n.º 2
0
 def __init__(self, device_name, model_name, num_observations, num_envs,
              num_threads, training_data_path):
     assert (num_envs == 1)
     self.device = torch.device(device_name)
     if USE_GRU:
         self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS)
     else:
         self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS)
     self.net.train(
         False)  # set training mode to false to deactivate dropout layer
     self.net.to(self.device)
     if model_name != None:
         self.net.load_state_dict(
             torch.load(model_name, map_location=device))
     self.reset_hidden()
Exemplo n.º 3
0
	def __init__(self, device_name, model_name, num_observations, num_envs, num_threads, training_data_path):
		assert(num_envs == 1)
		### test stuff ###
		self.episode_idx = 0
		if model_name is None:
			print("WARNING: No Model given! Add \'--model <model_name>\' to specifie a path to a model file to be loaded as initial model weights by the agent.")
		self.model_name = model_name
		### --- ###
		self.device = torch.device(device_name)
		if USE_GRU:
			self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS)
		else:
			self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS)
		self.net.train(False)# set training mode to false to deactivate dropout layer
		self.net.to(self.device)
		if model_name != None:
			self.net.load_state_dict(torch.load(model_name, map_location=self.device))
		self.reset_hidden()
Exemplo n.º 4
0
    def __init__(self, device_name, model_name, num_observations, num_envs,
                 num_threads, training_data_path):
        assert (num_envs == 1)
        self.device = torch.device(device_name)
        self.training_data_path = training_data_path

        # creating xp buffers on gpu for faster sampling
        self.tensor_state_buffer = torch.zeros(MEMORY_SIZE,
                                               num_observations,
                                               dtype=torch.float).to(
                                                   self.device)  # state
        # rewards with applied N-Step: buffer[t] = reward_t + discount*buffer[t+1] + ... discount^(N-1)*buffer[t+N-1]
        self.tensor_reward_buffer = torch.zeros(
            MEMORY_SIZE, dtype=torch.float).to(self.device)
        self.tensor_action_buffer = torch.zeros(
            MEMORY_SIZE,
            dtype=torch.long).to(self.device)  # the action that was chosen
        self.tensor_done_buffer = torch.zeros(
            MEMORY_SIZE, dtype=torch.bool).to(self.device)  # episode has ended
        self.tensor_step_buffer = torch.zeros(
            MEMORY_SIZE, dtype=torch.int16).to(
                self.device)  # step index in episode (starting at 0)

        # creating net and target net
        if USE_GRU:
            self.net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS)
            self.tgt_net = fc_gru.FC_GRU(num_observations, NUM_ACTIONS)
        else:
            self.net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS)
            self.tgt_net = fc_lstm.FC_LSTM(num_observations, NUM_ACTIONS)

        # copy to device
        self.net.to(self.device)
        self.tgt_net.to(self.device)

        # showing net to user
        print(self.net)

        self.episode_start = 0

        # load model parameters from file if given
        if model_name is not None:
            self.net.load_state_dict(torch.load(model_name))
            self.tgt_net.load_state_dict(self.net.state_dict())

        # initialize epsilon for epsilon-greedy algorithm
        self.epsilon = EPSILON_START

        # create optimizer
        self.optimizer = optim.Adam(self.net.parameters(), lr=LEARNING_RATE)

        # mean rewards
        self.best_mean_reward = None
        self.mean_reward = 0

        self.new_state_discount = DISCOUNT_FACTOR**N_STEPS
        # time metrics
        self.sampling_times = deque(maxlen=100)
        self.batch_forward_times = deque(maxlen=100)
        self.loss_calc_times = deque(maxlen=100)
        self.backward_times = deque(maxlen=100)
        self.optimize_times = deque(maxlen=100)
        self.gpu_pre_copy_times = deque(maxlen=100)
        self.gpu_copy_times = deque(maxlen=100)
        self.measure_gpu_times = False

        # training metrics
        self.mean_loss_buffer = deque(maxlen=100)
        self.mean_value_buffer = deque(maxlen=100)
        self.mean_loss = 0
        self.mean_value = 0

        # initializing frame indicies
        self.frame_idx = 0
        self.last_episode_frame = 0
        self.episode_idx = self.episode_start
        self.training_done = False

        # getting current time
        self.last_time = time.perf_counter()
        self.start_time = time.time()

        # reset state
        self.reset()