Python take_action示例，actionutils.take_action Python示例

示例#1

0

显示文件

文件： random_walker.py 项目： fohria/maia

def run_episode(config, parameters, actions, counters, ws, gw, mc, episode):

    logging.debug("starting new episode")

    flower_picked = False # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on.
    reward = 0

    # reset action for current episode
    action = None

    #episode action count
    ep_actioncount = 0

    # episode will continue until a flower is picked
    while not flower_picked:

        #select random action
        action = np.random.choice(actions.keys())
        logging.debug("ACTION:  %s" % (action))

        # reward is 10 if flower was picked else 0
        reward = actionutils.take_action(action, config['walk_stride'], config['look_angle'], mc)

        # increase number of actions taken
        counters['actioncount'] += 1

        # allow action animation to finish before observing new state
        time.sleep(0.8)

        # send q-value to websocket api server if that setting is enabled
        message = { 'action': action, 'q_value': actions[action].value, 'step': counters['actioncount'], 'episode': episode, 'round': counters['roundcount'] }
        if config['graph_server']: ws.send(json.dumps(message))

        # episode is over if flower was picked
        if reward != 0:
            flower_picked = True
            counters['flowercount'] += 1

        ep_actioncount += 1
        logging.debug("actions taken this episode: %s" % (ep_actioncount))

        # reset action before next step
        action = None

示例#2

0

显示文件

文件： qlearner_v2.py 项目： fohria/maia

def run_episode(config, parameters, actions, counters, ws, gw, mc, episode):

    logging.debug("starting new episode")

    flower_picked = False  # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on.
    reward = 0

    #init states
    states = []

    # observe initial state, states[0], and add it to states
    states.append(observe_state(gw))

    # reset action for current episode
    action = None

    #episode action count
    ep_actioncount = 0

    # episode will continue until a flower is picked
    while not flower_picked:

        action_strategy = ""

        # check if enough red is in the frame for innate chop
        redrum = is_it_red(states[0])
        if redrum:
            if np.random.uniform(0, 1) < parameters['chop_prob']:
                action = 'chop'
                action_strategy = "innate"
                logging.debug("INNATE:  chop")

        # if we still don't have an action we should explore/exploit
        if action == None:

            # select action depending on explore/exploit probability epsilon
            if np.random.uniform(0, 1) < parameters['epsilon']:
                #explore
                action = np.random.choice(actions.keys())
                action_strategy = "explore"
                logging.debug("EXPLORE:  %s" % (action))
            else:
                # exploit
                action = select_optimal_action(actions)
                action_strategy = "exploit"
                logging.debug("EXPLOIT:  %s" % (action))

        # reward is 10 if flower was picked else 0
        reward = actionutils.take_action(action, config['walk_stride'],
                                         config['look_angle'], mc)

        # increase number of actions taken
        counters['actioncount'] += 1

        # allow action animation to finish before observing new state
        time.sleep(0.8)

        # observe new state, states[1]
        states.append(observe_state(gw))

        actions[action].update(
            states, reward,
            actions)  #update function will do forward and backprop for weights

        # update action datastream
        update_action_datastream(action, actions, counters, action_strategy,
                                 episode)

        # send q-value to websocket api server if that setting is enabled
        message = {
            'action': action,
            'q_value': actions[action].value,
            'step': counters['actioncount'],
            'episode': episode,
            'round': counters['roundcount']
        }
        if config['graph_server']: ws.send(json.dumps(message))

        # new state, states[1], becomes states[0] by deleting states[0]
        del states[0]

        # episode is over if flower was picked
        if reward != 0:
            flower_picked = True
            counters['flowercount'] += 1

        ep_actioncount += 1
        logging.debug("actions taken this episode: %s" % (ep_actioncount))

        # reset action before next step
        action = None

示例#3

0

显示文件

文件： qlearner_v2.py 项目： fohria/maia

def run_episode(config, parameters, actions, counters, ws, gw, mc, episode):

    logging.debug("starting new episode")

    flower_picked = False # can actually skip this and use reward value for while loop, but this makes it easier to see what's going on.
    reward = 0

    #init states
    states = []

    # observe initial state, states[0], and add it to states
    states.append( observe_state(gw) )

    # reset action for current episode
    action = None

    #episode action count
    ep_actioncount = 0

    # episode will continue until a flower is picked
    while not flower_picked:

        action_strategy = ""

        # check if enough red is in the frame for innate chop
        redrum = is_it_red(states[0])
        if redrum:
            if np.random.uniform(0,1) < parameters['chop_prob']:
                action = 'chop'
                action_strategy = "innate"
                logging.debug("INNATE:  chop")

        # if we still don't have an action we should explore/exploit
        if action == None:

            # select action depending on explore/exploit probability epsilon
            if np.random.uniform(0,1) < parameters['epsilon']:
                #explore
                action = np.random.choice(actions.keys())
                action_strategy = "explore"
                logging.debug("EXPLORE:  %s" % (action))
            else:
                # exploit
                action = select_optimal_action(actions)
                action_strategy = "exploit"
                logging.debug("EXPLOIT:  %s" % (action))

        # reward is 10 if flower was picked else 0
        reward = actionutils.take_action(action, config['walk_stride'], config['look_angle'], mc)

        # increase number of actions taken
        counters['actioncount'] += 1

        # allow action animation to finish before observing new state
        time.sleep(0.8)

        # observe new state, states[1]
        states.append( observe_state(gw) )

        actions[action].update(states, reward, actions) #update function will do forward and backprop for weights

        # update action datastream
        update_action_datastream(action,actions,counters,action_strategy,episode)

        # send q-value to websocket api server if that setting is enabled
        message = { 'action': action, 'q_value': actions[action].value, 'step': counters['actioncount'], 'episode': episode, 'round': counters['roundcount'] }
        if config['graph_server']: ws.send(json.dumps(message))

        # new state, states[1], becomes states[0] by deleting states[0]
        del states[0]

        # episode is over if flower was picked
        if reward != 0:
            flower_picked = True
            counters['flowercount'] += 1

        ep_actioncount += 1
        logging.debug("actions taken this episode: %s" % (ep_actioncount))

        # reset action before next step
        action = None