Пример #1
0
def main():

    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    temperature = 2
    ttable_path = os.path.join(BASE_DIR, 't-table.pkl')
    qtable_path = os.path.join(BASE_DIR,
                               'q-table-{}.pkl'.format(Q_TABLE_VERSION))
    dtmc_generator = DTMCGenerator(ttable_path, qtable_path, temperature)
    dtmc_generator.load_policy(POLICY_PREFIX + 'sm10-policy.pkl', BASE_DIR)

    with open(ttable_path, 'rb') as file:
        ttable = pickle.load(file)
        trans_prob_dict = {}

        for key, value in ttable.items():
            new_key = (key[0], key[1])
            v = trans_prob_dict.get(new_key, [])
            v.append((key[2], value))
            trans_prob_dict[new_key] = v

    while True:
        n1 = dt.datetime.now()
        state = task.getObservation()[0]

        action = numpy.argmax(dtmc_generator.Q[state])
        print('trans probs {}'.format(trans_prob_dict.get((state, action))))

        action = select_action(dtmc_generator.policy, state, 'argmax')
        print('State {} Action {} Prob {}'.format(
            state, action, dtmc_generator.policy[state][action]))
        print('pol trans probs {}'.format(trans_prob_dict.get(
            (state, action))))

        if action == 729:
            task.reset()
        else:
            task.performAction(action)

        successors = dtmc_generator.get_successor_states(state, action)
        new_state = task.getObservation()[0]
        found = False
        for state, prob in successors:
            if new_state == state:
                print('{} state found, prob {}'.format(state, prob))
                found = True
        if not found:
            print('{} state not found! successors: {}'.format(
                new_state, successors))
        n2 = dt.datetime.now()
        print('elapsed time: {} s'.format((n2 - n1).microseconds / 1e6))

    Utils.endVREP()
Пример #2
0
    def run(self):
        try:
            proc = Utils.exec_vrep(self.port)
            time.sleep(60)

            client_id = Utils.connectToVREP(self.port)
            environment = StandingUpEnvironment(client_id)
            task = StandingUpTask(environment)

            for episode in range(self.n_episodes):

                old_state = current_state = task.getObservation()[0]
                action = self.select_action(self.policy, current_state)
                print('State {} Action {} Prob {}'.format(
                    current_state, action, self.policy[current_state][action]))
                task.performAction(action)
                while action != 729:
                    old_state = current_state
                    # Test to verify Monitor capability,,   #Pk:added more actions
                    #if action == 579 or action == 337:
                    #    current_state = task.state_mapper.self_collided_state
                    #else:
                    current_state = task.getObservation()[0]
                    self.t_table.incrementValue(
                        (old_state, action, current_state))
                    action = self.select_action(self.policy, current_state)
                    print('State {} Action {} Prob {}'.format(
                        current_state, action,
                        self.policy[current_state][action]))
                    task.performAction(action)
                task.reset()

                if current_state == task.state_mapper.goal_state:
                    self.counters['goal'] += 1
                elif current_state == task.state_mapper.fallen_state:
                    self.counters['fallen'] += 1
                elif current_state == task.state_mapper.too_far_state:
                    self.counters['far'] += 1
                elif current_state == task.state_mapper.self_collided_state:
                    self.counters['collided'] += 1
                else:
                    self.counters['unknown'] += 1
        finally:
            self.barrier.wait()
            Utils.endVREP(client_id)
            proc.kill()
    def run(self):
        proc = Utils.exec_vrep(self.port)
        time.sleep(10)
        # connect to V-REP server
        try:
            client_id = Utils.connectToVREP(self.port)
            env = StandingUpEnvironment(client_id)
            task = StandingUpTask(env)
            counter = 0
            while counter < MAX_ITERATIONS:

                state = task.getObservation()[0]
                action = select_action(self.policy, state, 'prob')
                # print('State {} Action {} Prob {}'.format(state, action, self.policy[state][action]))
                if action == 729:
                    if state == task.state_mapper.goal_state:
                        self.logger.info('Goal!')
                    elif state == task.state_mapper.fallen_state:
                        self.logger.info('Fallen!')
                    elif state == task.state_mapper.too_far_state:
                        self.logger.info('Far!')
                    elif state == task.state_mapper.self_collided_state:
                        self.logger.info('Collided!')
                    else:
                        self.logger.info(state)
                    counter += 1
                    print('Iteration {}'.format(counter))
                    task.reset()
                else:
                    task.performAction(action)
        finally:
            proc.kill()
Пример #4
0
def main():
    vrep.simxFinish(-1)  # just in case, close all opened connections
    client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000,
                               5)  # Connect to V-REP

    if client_id < 0:
        print('Failed connecting to remote API server')
        return -1

    print('Connected to remote API server')

    # Define RL elements
    environment = StandingUpEnvironment(client_id)

    task = StandingUpTask(environment)

    controller = ActionValueTable(task.get_state_space_size(),
                                  task.get_action_space_size())
    controller.initialize(1.)

    file = open('standing-up-q.pkl', 'rb')
    controller._params = pickle.load(file)
    file.close()

    # learner = Q()
    agent = LearningAgent(controller)

    experiment = EpisodicExperiment(task, agent)

    i = 0
    while True:
        i += 1
        print('Iteration n° ' + str(i))
        experiment.doEpisodes(1)

    vrep.simxFinish(client_id)
Пример #5
0
def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    #print('Initial State: ')
    #print(environment.bioloid.read_state())

    trajectory_data = []
    for i in range(50):
        print('Iteration {}'.format(i))
        trajectory = []
        for action in Utils.standingUpActions:
            observation = task.getObservation()[0]
            state_vector = task.env.bioloid.read_state()
            action = Utils.vecToInt(action)
            task.performAction(action)
            reward = task.getReward()
            trajectory.append({
                'state': observation,
                'state_vector': state_vector,
                'action': action,
                'reward': reward,
                'full_state': task.env.bioloid.read_full_state()
            })
        trajectory_data.append(trajectory)
        observation = task.getObservation()[0]
        state_vector = task.env.bioloid.read_state()
        trajectory.append({
            'state': observation,
            'state_vector': state_vector,
            'action': -1,
            'reward': 0,
            'full_state': task.env.bioloid.read_full_state()
        })
        task.reset()

    with open('../data/trajectory.pkl', 'wb') as file:
        pickle.dump(trajectory_data, file)
    Utils.endVREP()
Пример #6
0
def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    while True:
        action_str = input("Insert next action: ")
        action = [int(x) for x in action_str.split(' ')]

        observation = task.getObservation()
        print(task.current_sensors)
        a = Utils.vecToInt(action)
        task.performAction(a)
        task.getReward()
        print('self-collided: ' + str(environment.bioloid.is_fallen()))
        print('is-fallen: ' + str(environment.bioloid.is_fallen()))

    environment.reset()
def main():
    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    controller = MyActionValueTable()
    learner = Q(0.5, 0.9)
    learner.explorer = EpsilonGreedyExplorer(0.15, 1)  # EpsilonGreedyBoltzmannExplorer()
    agent = LearningAgent(controller, learner)
    experiment = EpisodicExperiment(task, agent)

    controller.initialize(agent)

    i = 0
    try:
        while True:
            i += 1
            print('Episode ' + str(i))
            experiment.doEpisodes()
            agent.learn()
            agent.reset()
            print('mean: '+str(numpy.mean(controller.params)))
            print('max: '+str(numpy.max(controller.params)))
            print('min: '+str(numpy.min(controller.params)))

            if i % 500 == 0:  # Save q-table every 500 episodes
                print('Save q-table')
                controller.save()
                task.t_table.save()

    except (KeyboardInterrupt, SystemExit):
        with open('../data/standing-up-q.pkl', 'wb') as handle:
            pickle.dump(controller.params, handle)
        task.t_table.save()
        controller.save()

    vrep.simxFinish(client_id)
def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    state_vector_length = len(environment.bioloid.read_state())

    n = int(input('Number of iterations: '))

    workbook = xlsxwriter.Workbook('data/reports/trajectory-trials.xls')
    worksheets = []
    for i in range(len(Utils.standingUpActions)):
        worksheets.append(workbook.add_worksheet('t' + str(i + 1)))

    for i in range(n):
        print('Iteration ' + str(i + 1))
        print('Initial State: ')
        # task.getObservation()
        print(task.state_mapper.sd.discretize(environment.getSensors()))
        print(task.getObservation()[0])

        for j, action in enumerate(Utils.standingUpActions):
            environment.performAction(action)
            state_vector = environment.getSensors()
            discretized_state = task.state_mapper.sd.discretize(state_vector)
            for k, s in enumerate(discretized_state):
                worksheets[j].write(i, k, s)
            state_n = task.update_current_state()

            if state_n != task.state_mapper.goal_state:
                state_distance = euclidean(
                    task.state_mapper.state_space[state_n], discretized_state)
            else:
                state_distance = 0
            goal_distance = task.state_mapper.get_goal_distance(
                discretized_state)

            print(discretized_state)
            print('---------------------')

            worksheets[j].write(i, state_vector_length + 1, state_n)
            worksheets[j].write(i, state_vector_length + 2, state_distance)
            worksheets[j].write(i, state_vector_length + 3, goal_distance)

        environment.reset()

    res_worksheet = workbook.add_worksheet('Results')

    row = 0
    for i in range(len(Utils.standingUpActions)):
        sheet_name = 't' + str(i + 1)
        res_worksheet.write(row, 0, sheet_name)
        res_worksheet.write(row, 1, 'mean')
        res_worksheet.write(row + 1, 1, 'var')

        for j in range(state_vector_length):
            col_name = chr(ord('A') + j)
            data_range = sheet_name + '.' + col_name + '1:' + col_name + str(n)
            # TODO: check why range is made lowercase :/
            res_worksheet.write_formula(row, 2 + j,
                                        '=AVERAGE(' + data_range + ')')
            res_worksheet.write_formula(row + 1, 2 + j,
                                        '=VAR.P(' + data_range + ')')
        row += 2

    workbook.close()