Exemplo n.º 1
0
    def _generator_run(self, input_):
        self.game_no = self.game_no + 1
        self.init_fn(input_)

        self.engine = simulator.Simulator(feature_name='unit_test1',
                                          actionspace_name='lattice1',
                                          canvas=self.canvas)

        self.reset()

        while self.engine.get_time() < 200:
            self.i = self.i + 1

            #print(dire_predefine_step)
            dire_state = self.engine.get_state_tup("Dire", 0)

            dire_predefine_step = self.engine.predefined_step("Dire", 0)
            predefine_move = torch.LongTensor([dire_predefine_step[1]])

            is_end = dire_state[2]
            if is_end:
                break

            self.predefined_steps.append(predefine_move)
            state_now = dire_state[0]
            self.states.append(state_now)
            action_out, value_out = self.a3c_model(state_now)

            prob = F.softmax(action_out)
            self.raw_probs.append(prob)
            log_prob = F.log_softmax(action_out)
            self.raw_log_probs.append(log_prob)

            entropy = -(log_prob * prob).sum(1, keepdim=True)
            self.entropies.append(entropy)

            if self.rank != 0:
                action = predefine_move.view(1, -1).data
            else:
                #action = prob.multinomial(num_samples=1).data

                action = torch.argmax(log_prob, 1).data.view(-1, 1)
            self.actions.append(action)
            log_prob = log_prob.gather(1, Variable(action))

            self.engine.set_order("Dire", 0, (1, action))

            self.engine.loop()

            reward = dire_state[1]
            self.rewards.append(reward)
            self.values.append(value_out)
            self.log_probs.append(log_prob)

            yield
        print("rank %d os.pid %d" % (self.rank, os.getpid()))
        if self.rank != 0:

            self.train()
    def _generator_run(self, input_):
        self.init_fn(input_)

        self.engine = simulator.Simulator(feature_name='unit_test1', actionspace_name='lattice1', canvas=self.canvas)

        while True:
            dire_predefine_step = self.engine.predefined_step("Dire",0)
            self.engine.loop()
            self.engine.set_order("Dire",0,dire_predefine_step)

            yield

            if self.stop_cond_fn(self):
                break

        self.cleanup_fn()
Exemplo n.º 3
0
    def _generator_run(self, input_):
        self.init_fn(input_)

        self.engine = simulator.Simulator(feature_name='unit_test1',
                                          actionspace_name='lattice1',
                                          canvas=self.canvas)

        self.reset()

        while self.engine.get_time() < 200:
            self.i = self.i + 1

            #print(dire_predefine_step)
            dire_state = self.engine.get_state_tup("Dire", 0)

            dire_predefine_step = self.engine.predefined_step("Dire", 0)
            predefine_move = torch.LongTensor([dire_predefine_step[1]])

            is_end = dire_state[2]
            if is_end:
                break

            self.predefined_steps.append(predefine_move)
            action_out, value_out = self.a3c_model(dire_state[0])

            prob = F.softmax(action_out)
            log_prob = F.log_softmax(action_out)
            self.raw_log_probs.append(log_prob)

            entropy = -(log_prob * prob).sum(1, keepdim=True)
            self.entropies.append(entropy)

            action = prob.multinomial(num_samples=1).data
            log_prob = log_prob.gather(1, Variable(action))

            self.engine.set_order("Dire", 0, (1, action))

            self.engine.loop()

            reward = 0
            self.rewards.append(reward)
            self.values.append(value_out)
            self.log_probs.append(log_prob)

            yield

        self.train()
    def _generator_run(self, input_):
        self.init_fn(input_)

        self.engine = simulator.Simulator(feature_name='unit_test1',
            canvas=self.canvas)        

        while True:
            self.i = self.i + 1

            dire_predefine_step = self.engine.predefined_step("Dire",0)
            self.engine.loop()
            

            dire_predefine_step = self.engine.predefined_step("Dire",0)
            predefine_move = torch.FloatTensor(dire_predefine_step[1])
            #print(dire_predefine_step)
            dire_state = self.engine.get_state_tup("Dire", 0)
            is_end = dire_state[2]

            out = self.lstm_module(dire_state[0])
            loss = torch.mean((out - predefine_move)**2)
            self.losses.append(loss)

            self.engine.set_order("Dire", 0, (1,tuple(out.detach().numpy()[0])))

            if self.i % self.batch_size == 0:
                #just make it simple
                self.lstm_module.zero_grad()
                random.shuffle(self.losses)
                self.losses = self.losses[:self.buffer_size]
                buf = self.losses[:self.batch_size]
                avg_loss = (sum(buf)/self.batch_size)
                print(avg_loss.float(), out, predefine_move)
                avg_loss.backward()
                self.optimizer.step()
                self.losses = []

            yield

            if is_end:
                break
Exemplo n.º 5
0
    def _generator_run(self, input_):
        self.game_no = self.game_no + 1
        self.init_fn(input_)

        self.engine = simulator.Simulator(feature_name='Lattice1',
                                          actionspace_name='lattice1',
                                          canvas=self.canvas)

        state_pkg = StatePkg()

        while self.engine.get_time() < 30:
            self.i = self.i + 1

            #print(dire_predefine_step)
            dire_state = self.engine.get_state_tup("Dire", 0)

            dire_predefine_step = self.engine.predefined_step("Dire", 0)
            predefine_move = torch.LongTensor([dire_predefine_step[1]])

            is_end = dire_state[2]
            if is_end:
                break

            state_pkg.predefined_steps.append(predefine_move)
            state_now = dire_state[0]
            state_pkg.states.append(state_now)
            action_out, value_out = self.a3c_model(state_now)

            prob = F.softmax(action_out)
            state_pkg.raw_probs.append(prob)
            log_prob = F.log_softmax(action_out)
            state_pkg.raw_log_probs.append(log_prob)

            entropy = -(log_prob * prob).sum(1, keepdim=True)
            state_pkg.entropies.append(entropy)

            #print(torch.max(prob).data)
            max_prob = torch.max(prob).data

            if max_prob > 0.9:
                action = torch.argmax(log_prob, 1).data.view(-1, 1)
            else:
                action = prob.multinomial(num_samples=1).data

            #action = torch.argmax(log_prob, 1).data.view(-1,1)
            state_pkg.actions.append(action)
            log_prob = log_prob.gather(1, Variable(action))

            self.engine.set_order("Dire", 0, (1, action))

            self.engine.loop()

            reward = dire_state[1]
            state_pkg.rewards.append(reward)
            state_pkg.values.append(value_out)
            state_pkg.log_probs.append(log_prob)

            yield
        print("rank %d os.pid %d" % (self.rank, os.getpid()))

        self.state_buffer.append(state_pkg)

        self.train(self.state_buffer)

        self.state_buffer = self.state_buffer[-2:]

        torch.save(self.a3c_model.state_dict(),
                   "./tmp/model_%d_%d" % (self.game_no, os.getpid()))