Python StandingUpTask примеры использования

Язык программирования: Python

Пространство имен/Пакет: models.pybrain

Класс/Тип: StandingUpTask

Примеров на hotexamples.com: 8

Python StandingUpTask - 8 примеров найдено. Это лучшие примеры Python кода для models.pybrain.StandingUpTask, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

StandingUpTask(8)

getObservation(6)

performAction(5)

reset(4)

getReward(2)

get_action_space_size(1)

get_state_space_size(1)

update_current_state(1)

Пример #1

Показать файл

def main():

    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    temperature = 2
    ttable_path = os.path.join(BASE_DIR, 't-table.pkl')
    qtable_path = os.path.join(BASE_DIR,
                               'q-table-{}.pkl'.format(Q_TABLE_VERSION))
    dtmc_generator = DTMCGenerator(ttable_path, qtable_path, temperature)
    dtmc_generator.load_policy(POLICY_PREFIX + 'sm10-policy.pkl', BASE_DIR)

    with open(ttable_path, 'rb') as file:
        ttable = pickle.load(file)
        trans_prob_dict = {}

        for key, value in ttable.items():
            new_key = (key[0], key[1])
            v = trans_prob_dict.get(new_key, [])
            v.append((key[2], value))
            trans_prob_dict[new_key] = v

    while True:
        n1 = dt.datetime.now()
        state = task.getObservation()[0]

        action = numpy.argmax(dtmc_generator.Q[state])
        print('trans probs {}'.format(trans_prob_dict.get((state, action))))

        action = select_action(dtmc_generator.policy, state, 'argmax')
        print('State {} Action {} Prob {}'.format(
            state, action, dtmc_generator.policy[state][action]))
        print('pol trans probs {}'.format(trans_prob_dict.get(
            (state, action))))

        if action == 729:
            task.reset()
        else:
            task.performAction(action)

        successors = dtmc_generator.get_successor_states(state, action)
        new_state = task.getObservation()[0]
        found = False
        for state, prob in successors:
            if new_state == state:
                print('{} state found, prob {}'.format(state, prob))
                found = True
        if not found:
            print('{} state not found! successors: {}'.format(
                new_state, successors))
        n2 = dt.datetime.now()
        print('elapsed time: {} s'.format((n2 - n1).microseconds / 1e6))

    Utils.endVREP()

Пример #2

Показать файл

    def run(self):
        try:
            proc = Utils.exec_vrep(self.port)
            time.sleep(60)

            client_id = Utils.connectToVREP(self.port)
            environment = StandingUpEnvironment(client_id)
            task = StandingUpTask(environment)

            for episode in range(self.n_episodes):

                old_state = current_state = task.getObservation()[0]
                action = self.select_action(self.policy, current_state)
                print('State {} Action {} Prob {}'.format(
                    current_state, action, self.policy[current_state][action]))
                task.performAction(action)
                while action != 729:
                    old_state = current_state
                    # Test to verify Monitor capability,,   #Pk:added more actions
                    #if action == 579 or action == 337:
                    #    current_state = task.state_mapper.self_collided_state
                    #else:
                    current_state = task.getObservation()[0]
                    self.t_table.incrementValue(
                        (old_state, action, current_state))
                    action = self.select_action(self.policy, current_state)
                    print('State {} Action {} Prob {}'.format(
                        current_state, action,
                        self.policy[current_state][action]))
                    task.performAction(action)
                task.reset()

                if current_state == task.state_mapper.goal_state:
                    self.counters['goal'] += 1
                elif current_state == task.state_mapper.fallen_state:
                    self.counters['fallen'] += 1
                elif current_state == task.state_mapper.too_far_state:
                    self.counters['far'] += 1
                elif current_state == task.state_mapper.self_collided_state:
                    self.counters['collided'] += 1
                else:
                    self.counters['unknown'] += 1
        finally:
            self.barrier.wait()
            Utils.endVREP(client_id)
            proc.kill()

Пример #3

Показать файл

Файл: exec-policy-analysis.py Проект: ghoshsuman/bioloid-standup

    def run(self):
        proc = Utils.exec_vrep(self.port)
        time.sleep(10)
        # connect to V-REP server
        try:
            client_id = Utils.connectToVREP(self.port)
            env = StandingUpEnvironment(client_id)
            task = StandingUpTask(env)
            counter = 0
            while counter < MAX_ITERATIONS:

                state = task.getObservation()[0]
                action = select_action(self.policy, state, 'prob')
                # print('State {} Action {} Prob {}'.format(state, action, self.policy[state][action]))
                if action == 729:
                    if state == task.state_mapper.goal_state:
                        self.logger.info('Goal!')
                    elif state == task.state_mapper.fallen_state:
                        self.logger.info('Fallen!')
                    elif state == task.state_mapper.too_far_state:
                        self.logger.info('Far!')
                    elif state == task.state_mapper.self_collided_state:
                        self.logger.info('Collided!')
                    else:
                        self.logger.info(state)
                    counter += 1
                    print('Iteration {}'.format(counter))
                    task.reset()
                else:
                    task.performAction(action)
        finally:
            proc.kill()

Пример #4

Показать файл

def main():
    vrep.simxFinish(-1)  # just in case, close all opened connections
    client_id = vrep.simxStart('127.0.0.1', 19997, True, True, 5000,
                               5)  # Connect to V-REP

    if client_id < 0:
        print('Failed connecting to remote API server')
        return -1

    print('Connected to remote API server')

    # Define RL elements
    environment = StandingUpEnvironment(client_id)

    task = StandingUpTask(environment)

    controller = ActionValueTable(task.get_state_space_size(),
                                  task.get_action_space_size())
    controller.initialize(1.)

    file = open('standing-up-q.pkl', 'rb')
    controller._params = pickle.load(file)
    file.close()

    # learner = Q()
    agent = LearningAgent(controller)

    experiment = EpisodicExperiment(task, agent)

    i = 0
    while True:
        i += 1
        print('Iteration n° ' + str(i))
        experiment.doEpisodes(1)

    vrep.simxFinish(client_id)

Пример #5

Показать файл

Файл: exec-trajectory.py Проект: ghoshsuman/bioloid-standup

def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    #print('Initial State: ')
    #print(environment.bioloid.read_state())

    trajectory_data = []
    for i in range(50):
        print('Iteration {}'.format(i))
        trajectory = []
        for action in Utils.standingUpActions:
            observation = task.getObservation()[0]
            state_vector = task.env.bioloid.read_state()
            action = Utils.vecToInt(action)
            task.performAction(action)
            reward = task.getReward()
            trajectory.append({
                'state': observation,
                'state_vector': state_vector,
                'action': action,
                'reward': reward,
                'full_state': task.env.bioloid.read_full_state()
            })
        trajectory_data.append(trajectory)
        observation = task.getObservation()[0]
        state_vector = task.env.bioloid.read_state()
        trajectory.append({
            'state': observation,
            'state_vector': state_vector,
            'action': -1,
            'reward': 0,
            'full_state': task.env.bioloid.read_full_state()
        })
        task.reset()

    with open('../data/trajectory.pkl', 'wb') as file:
        pickle.dump(trajectory_data, file)
    Utils.endVREP()

Пример #6

Показать файл

Файл: exec-actions.py Проект: ghoshsuman/bioloid-standup

def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)

    while True:
        action_str = input("Insert next action: ")
        action = [int(x) for x in action_str.split(' ')]

        observation = task.getObservation()
        print(task.current_sensors)
        a = Utils.vecToInt(action)
        task.performAction(a)
        task.getReward()
        print('self-collided: ' + str(environment.bioloid.is_fallen()))
        print('is-fallen: ' + str(environment.bioloid.is_fallen()))

    environment.reset()

Пример #7

Показать файл

Файл: exec_learning_simple.py Проект: paragkhanna1/bioloid-standup

def main():
    client_id = Utils.connectToVREP()

    # Define RL elements
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    controller = MyActionValueTable()
    learner = Q(0.5, 0.9)
    learner.explorer = EpsilonGreedyExplorer(0.15, 1)  # EpsilonGreedyBoltzmannExplorer()
    agent = LearningAgent(controller, learner)
    experiment = EpisodicExperiment(task, agent)

    controller.initialize(agent)

    i = 0
    try:
        while True:
            i += 1
            print('Episode ' + str(i))
            experiment.doEpisodes()
            agent.learn()
            agent.reset()
            print('mean: '+str(numpy.mean(controller.params)))
            print('max: '+str(numpy.max(controller.params)))
            print('min: '+str(numpy.min(controller.params)))

            if i % 500 == 0:  # Save q-table every 500 episodes
                print('Save q-table')
                controller.save()
                task.t_table.save()

    except (KeyboardInterrupt, SystemExit):
        with open('../data/standing-up-q.pkl', 'wb') as handle:
            pickle.dump(controller.params, handle)
        task.t_table.save()
        controller.save()

    vrep.simxFinish(client_id)

Пример #8

Показать файл

Файл: exec-traj-analysis.py Проект: ghoshsuman/bioloid-standup

def main():

    client_id = Utils.connectToVREP()
    environment = StandingUpEnvironment(client_id)
    task = StandingUpTask(environment)
    state_vector_length = len(environment.bioloid.read_state())

    n = int(input('Number of iterations: '))

    workbook = xlsxwriter.Workbook('data/reports/trajectory-trials.xls')
    worksheets = []
    for i in range(len(Utils.standingUpActions)):
        worksheets.append(workbook.add_worksheet('t' + str(i + 1)))

    for i in range(n):
        print('Iteration ' + str(i + 1))
        print('Initial State: ')
        # task.getObservation()
        print(task.state_mapper.sd.discretize(environment.getSensors()))
        print(task.getObservation()[0])

        for j, action in enumerate(Utils.standingUpActions):
            environment.performAction(action)
            state_vector = environment.getSensors()
            discretized_state = task.state_mapper.sd.discretize(state_vector)
            for k, s in enumerate(discretized_state):
                worksheets[j].write(i, k, s)
            state_n = task.update_current_state()

            if state_n != task.state_mapper.goal_state:
                state_distance = euclidean(
                    task.state_mapper.state_space[state_n], discretized_state)
            else:
                state_distance = 0
            goal_distance = task.state_mapper.get_goal_distance(
                discretized_state)

            print(discretized_state)
            print('---------------------')

            worksheets[j].write(i, state_vector_length + 1, state_n)
            worksheets[j].write(i, state_vector_length + 2, state_distance)
            worksheets[j].write(i, state_vector_length + 3, goal_distance)

        environment.reset()

    res_worksheet = workbook.add_worksheet('Results')

    row = 0
    for i in range(len(Utils.standingUpActions)):
        sheet_name = 't' + str(i + 1)
        res_worksheet.write(row, 0, sheet_name)
        res_worksheet.write(row, 1, 'mean')
        res_worksheet.write(row + 1, 1, 'var')

        for j in range(state_vector_length):
            col_name = chr(ord('A') + j)
            data_range = sheet_name + '.' + col_name + '1:' + col_name + str(n)
            # TODO: check why range is made lowercase :/
            res_worksheet.write_formula(row, 2 + j,
                                        '=AVERAGE(' + data_range + ')')
            res_worksheet.write_formula(row + 1, 2 + j,
                                        '=VAR.P(' + data_range + ')')
        row += 2

    workbook.close()