示例#1
0
            step += 1

            #spatialize the action and then concatenate with state
            action_spatio = action_spa(action)

            v_new_state = Variable(
                torch.from_numpy(new_state))  # variable of new state
            v_action_spatio = Variable(torch.from_numpy(
                action_spatio))  # variable of spatilized action

            v_sa = torch.cat((v_state, v_action_spatio),
                             2)  # variable of state-action
            reward = getReward(
                new_state
            )  # reward is not used in section 3.1, here is only used to end an episod
            memory.push(v_state.data, v_action_spatio.data, v_sa.data, action,
                        v_new_state.data, reward)

            if (len(memory) < buffer):  #if buffer not filled, add to it
                state = new_state
                if reward != -1:  #if reached terminal state, update game status
                    break
                if step > 31:
                    break
                else:
                    continue
            #print('**************      starting here     ****************')
            transitions = memory.sample(BATCH_SIZE)
            batch = Transition(*zip(*transitions))  # tuple batchx(11x11x5=605)

            state_batch = Variable(torch.stack(batch.state))  #batchx11x11x5
            # action_batch =batch.action
        amplifier.WriteAnalogScalarF64(1, 10.0, input_voltage, None)

        laser.append(dist)
        print('second %.1f  -- voltage %.1f -- distance %.1f -- reward %.2f\n'
              % ((i + 1) * timeout, input_voltage, dist, reward))
        i = i + 1
        if i == n:
            sched.shutdown(wait=False)


    sched = BlockingScheduler()
    sched.add_job(iterate, 'interval', seconds=0.1)
    sched.start()

    for i in range(len(state_list) - 1):
        memory.push(state_list[i], action_list[i], state_list[i + 1], torch.Tensor([reward_list[i]]))

    amplifier.StopTask()
    laser_sensor.StopTask()
    # ========================================================================================
    # plot and save readings
    laser = np.asarray(laser, dtype='float')
    plt.plot(range(n), laser)
    plt.plot(range(n), desired_traj)
    plt.legend(['laser output', 'desired output'])

    error = np.abs(desired - laser[10:-10]).sum() / len(desired)
    plt.title('error: %f' % error)
    plt.savefig('result/epoch_%d.png' % epoch)
    plt.close()
 step = 0
 #while game still in progress
 while (status == 1):
     v_state = Variable(torch.from_numpy(state)).view(1, -1)
     qval = model(v_state)
     if (np.random.random() < epsilon):  #choose random action
         action = np.random.randint(0, 4)
     else:  #choose best action from Q(s,a) values
         action = np.argmax(qval.data)
     #Take action, observe new state S'
     new_state = makeMove(state, action)
     step += 1
     v_new_state = Variable(torch.from_numpy(new_state)).view(1, -1)
     #Observe reward
     reward = getReward(new_state)
     memory.push(v_state.data, action, v_new_state.data, reward)
     if (len(memory) < buffer):  #if buffer not filled, add to it
         state = new_state
         if reward != -1:  #if reached terminal state, update game status
             break
         else:
             continue
     transitions = memory.sample(BATCH_SIZE)
     batch = Transition(*zip(*transitions))
     state_batch = Variable(torch.cat(batch.state))
     action_batch = Variable(torch.LongTensor(batch.action)).view(-1, 1)
     new_state_batch = Variable(torch.cat(batch.new_state))
     reward_batch = Variable(torch.FloatTensor(batch.reward))
     non_final_mask = (reward_batch == -1)
     #Let's run our Q function on S to get Q values for all possible actions
     qval_batch = model(state_batch)