Python Agent.act_ffnet示例

def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (widthIn, heightIn)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'RGB24'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    preprocess_input_images = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['resolution'] = (width, height)
    # just use grayscale for nnet inputs
    agent_args['num_channels'] = 1

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])
    agent_args['n_actions'] = 7

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['history_length_ico'] = 3
    historyLen = agent_args['history_length']
    print("HistoryLen: ", historyLen)

    print('starting simulator')
    simulator = DoomSimulator(simulator_args)
    num_channels = simulator.num_channels

    print('started simulator')

    agent_args['state_imgs_shape'] = (historyLen * num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_input'] = (agent_args['resolution'][0] *
                                   agent_args['resolution'][1])
    agent_args['n_ffnet_hidden'] = np.array([50, 5])
    agent_args['n_ffnet_output'] = 1
    agent_args['n_ffnet_act'] = 7
    agent_args['n_ffnet_meas'] = simulator.num_meas
    agent_args['learning_rate'] = 1E-4

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(historyLen):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(historyLen * simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (historyLen - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []

    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    #    agent = Agent(sess, agent_args)
    #    agent.load('/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/ICO-8600')
    #    print("model loaded..")

    #    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    #    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False))

    img_buffer = np.zeros((historyLen, simulator.resolution[1],
                           simulator.resolution[0], num_channels),
                          dtype='uint8')

    meas_buffer = np.zeros((historyLen, simulator.num_meas))
    act_buffer = np.zeros((historyLen, 7))
    act_buffer_ico = np.zeros((agent_args['history_length_ico'], 7))
    curr_step = 0
    old_step = -1
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))
    ag = Agent(sess, agent_args)

    diff_y = 0
    diff_x = 0
    diff_z = 0
    diff_theta = 0
    iter = 1
    epoch = 200
    radialFlowLeft = 30.
    radialFlowRight = 30.
    radialFlowInertia = 0.4
    radialGain = 4.
    rotationGain = 50.
    errorThresh = 10.
    updatePtsFreq = 50
    skipImage = 1
    skipImageICO = 5
    reflexGain = 0.01
    oldHealth = 0.

    # create masks for left and right visual fields - note that these only cover the upper half of the image
    # this is to help prevent the tracking getting confused by the floor pattern
    half_height = round(height / 2)
    half_width = round(width / 2)

    maskLeft = np.zeros([height, width], np.uint8)
    maskLeft[half_height:, :half_width] = 1.
    maskRight = np.zeros([height, width], np.uint8)
    maskRight[half_height:, half_width:] = 1.

    lk_params = dict(winSize=(15, 15),
                     maxLevel=2,
                     criteria=(cv2.TERM_CRITERIA_EPS | cv2.TERM_CRITERIA_COUNT,
                               10, 0.03))
    feature_params = dict(maxCorners=500,
                          qualityLevel=0.03,
                          minDistance=7,
                          blockSize=7)

    imgCentre = np.array([
        simulator_args['resolution'][0] / 2,
        simulator_args['resolution'][1] / 2
    ])
    print("Image centre: ", imgCentre)
    simpleInputs1 = np.zeros((width, height))
    simpleInputs2 = np.zeros((width, height))
    input_buff = np.zeros((1, width * height))
    target_buff = np.zeros((1, 1))
    meas_buff = np.zeros((1, simulator.num_meas))
    netOut = 0.
    netErr = np.zeros((width, height))
    delta = 0.

    while not term:
        if curr_step < historyLen:
            curr_act = np.zeros(7).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)
            print("Image: ", img.shape, " max: ", np.amax(img), " min: ",
                  np.amin(img))

            if curr_step == 0:
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            img_buffer[curr_step % historyLen] = img
            meas_buffer[curr_step % historyLen] = meas
            act_buffer[curr_step % historyLen] = curr_act[:7]

        else:
            img1 = img_buffer[(curr_step - 2) % historyLen, :, :, :]
            img2 = img_buffer[(curr_step - 1) % historyLen, :, :, :]
            state = simulator._game.get_state()

            stateImg = state.screen_buffer
            greyImg1 = np.sum(img1, axis=0)
            greyImg2 = cv2.resize(stateImg, (width, height))
            greyImg2 = np.array(np.sum(greyImg2, axis=2) / 3, dtype='uint8')

            if (curr_step % updatePtsFreq == 0):
                p0Left = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                 mask=maskLeft,
                                                 **feature_params)
                p0Right = cv2.goodFeaturesToTrack(img[:, :, 0],
                                                  mask=maskRight,
                                                  **feature_params)

            p1Left, st, err = cv2.calcOpticalFlowPyrLK(img1[:, :, 0],
                                                       img2[:, :, 0], p0Left,
                                                       None, **lk_params)
            p1Right, st, err = cv2.calcOpticalFlowPyrLK(
                img1[:, :, 0], img2[:, :, 0], p0Right, None, **lk_params)
            flowLeft = (p1Left - p0Left)[:, 0, :]
            flowRight = (p1Right - p0Right)[:, 0, :]
            radialFlowTmpLeft = 0
            radialFlowTmpRight = 0

            for i in range(0, len(p0Left)):
                radialFlowTmpLeft += ((p0Left[i, 0, :] - imgCentre)).dot(
                    flowLeft[i, :]) / float(len(p0Left))
            for i in range(0, len(p0Right)):
                radialFlowTmpRight += ((p0Right[i, 0, :] - imgCentre)).dot(
                    flowRight[i, :]) / float(len(p0Right))

            rotation = act_buffer[(curr_step - 1) % historyLen][6]
            forward = act_buffer[(curr_step - 1) % historyLen][3]
            # keep separate radial errors for left and right fields
            radialFlowLeft = radialFlowLeft + radialFlowInertia * (
                radialFlowTmpLeft - radialFlowLeft)
            radialFlowRight = radialFlowRight + radialFlowInertia * (
                radialFlowTmpRight - radialFlowRight)
            expectFlowLeft = radialGain * forward + (rotationGain * rotation
                                                     if rotation < 0. else 0.)
            expectFlowRight = radialGain * forward - (rotationGain * rotation
                                                      if rotation > 0. else 0.)

            flowErrorLeft = forward * (expectFlowLeft - radialFlowLeft) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorRight = forward * (expectFlowRight - radialFlowRight) / (
                1. + rotationGain * np.abs(rotation))
            flowErrorLeft = flowErrorLeft if flowErrorLeft > 0. else 0.
            flowErrorRight = flowErrorRight if flowErrorRight > 0. else 0.
            icoSteer = 0.

            if curr_step > 100:
                health = meas[1]

                # Don't run any networks when the player is dead!
                if (health < 101. and health > 0.):
                    #print (curr_step)

                    icoInLeft = (flowErrorLeft - errorThresh) if (
                        flowErrorLeft - errorThresh) > 0. else 0. / reflexGain
                    icoInRight = (flowErrorRight - errorThresh) if (
                        flowErrorRight - errorThresh) > 0. else 0. / reflexGain
                    icoInSteer = ((flowErrorRight - errorThresh) if
                                  (flowErrorRight - errorThresh) > 0. else
                                  0. / reflexGain -
                                  (flowErrorLeft - errorThresh) if
                                  (flowErrorLeft - errorThresh) > 0. else 0. /
                                  reflexGain)

                    centre, bottomLeft, topRight, colourStrength = getMaxColourPos(
                        stateImg, [255, 0, 0])
                    colourSteer = imgCentre[0]
                    delta = (colourSteer - imgCentre[0]) / width

                    if (len(bottomLeft) > 0 and len(topRight) > 0
                            and ((topRight[0] - bottomLeft[0]) < width / 3)
                            and ((topRight[1] - bottomLeft[1]) < height / 2)):
                        colourSteer = bottomLeft[0] + int(
                            0.5 * (topRight[0] - bottomLeft[0]))

                    # get the setpoint in the -.9/+.9 range
                    simpleInputs1[:, :] = 0.1 * np.random.rand(width, height)
                    simpleInputs2[:, :] = 0.1 * np.random.rand(width, height)

                    greyImg2 = cv2.filter2D(greyImg2, -1, edge)
                    input_buff[0, :] = np.ndarray.flatten(
                        preprocess_input_images(greyImg2))
                    target_buff[...] = delta + netOut
                    meas_buff[0, :] = meas

                    ag.act_ffnet(input_buff, meas, target_buff)
                    netOut = ag.ext_ffnet_output[0]

                    #if (False):
                    #net_output = np.ndarray.flatten(agent.test_ffnet(input_buff))[0]
                    #else:
                    #net_output = np.ndarray.flatten(agent.learn_ffnet(input_buff, target_buff))[0]

                    netErr[:, :] = 0.
                    diff_theta = diff_theta + 0.01 * colourStrength2 * (
                        colourSteer - imgCentre[0]) / width

                    curr_act = np.zeros(7).tolist()
                    curr_act[0] = 0
                    curr_act[1] = 0
                    curr_act[2] = 0
                    curr_act[3] = 0.  #curr_act[3] + diff_z
                    curr_act[3] = 0.
                    curr_act[4] = 0
                    curr_act[5] = 0
                    curr_act[6] = curr_act[6] + diff_theta

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.

            if not term:
                img_buffer[curr_step % historyLen] = img
                meas_buffer[curr_step % historyLen] = meas
                act_buffer[curr_step % historyLen] = curr_act[:7]
        curr_step += 1

    simulator.close_game()
    ag.save(
        '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' +
        'hack-' + str(iter))

示例#2

显示文件

def main():
    ## Simulator
    simulator_args = {}
    simulator_args['config'] = 'config/config.cfg'
    simulator_args['resolution'] = (160, 120)
    simulator_args['frame_skip'] = 1
    simulator_args['color_mode'] = 'GRAY'
    simulator_args['game_args'] = "+name ICO +colorset 7"

    ## Agent
    agent_args = {}

    # preprocessing
    agent_args['preprocess_input_images'] = lambda x: x / 255. - 0.5
    agent_args['preprocess_input_measurements'] = lambda x: x / 100. - 0.5
    agent_args['num_future_steps'] = 6
    pred_scale_coeffs = np.expand_dims(
        (np.expand_dims(np.array([8., 40., 1.]), 1) * np.ones(
            (1, agent_args['num_future_steps']))).flatten(), 0)
    agent_args['postprocess_predictions'] = lambda x: x * pred_scale_coeffs
    agent_args['discrete_controls_manual'] = range(6, 12)
    agent_args['meas_for_net_init'] = range(3)
    agent_args['meas_for_manual_init'] = range(3, 16)
    agent_args['opposite_button_pairs'] = [(0, 1), (2, 3)]

    # net parameters
    agent_args['conv_params'] = np.array([(16, 5, 4), (32, 3, 2), (64, 3, 2),
                                          (128, 3, 2)],
                                         dtype=[('out_channels', int),
                                                ('kernel', int),
                                                ('stride', int)])
    agent_args['fc_img_params'] = np.array([(128, )],
                                           dtype=[('out_dims', int)])
    agent_args['fc_meas_params'] = np.array([(128, ), (128, ), (128, )],
                                            dtype=[('out_dims', int)])
    agent_args['fc_joint_params'] = np.array([(256, ), (256, ), (-1, )],
                                             dtype=[('out_dims', int)])
    agent_args['target_dim'] = agent_args['num_future_steps'] * len(
        agent_args['meas_for_net_init'])

    # efference copy

    # experiment arguments
    agent_args['test_objective_params'] = (np.array([5, 11, 17]),
                                           np.array([1., 1., 1.]))
    agent_args['history_length'] = 3
    agent_args['test_checkpoint'] = 'model'

    print('starting simulator')

    simulator = DoomSimulator(simulator_args)

    print('started simulator')

    agent_args['discrete_controls'] = simulator.discrete_controls
    agent_args['continuous_controls'] = simulator.continuous_controls
    agent_args['state_imgs_shape'] = (agent_args['history_length'] *
                                      simulator.num_channels,
                                      simulator.resolution[1],
                                      simulator.resolution[0])

    agent_args['n_ffnet_hidden'] = np.array([50, 50])

    if 'meas_for_net_init' in agent_args:
        agent_args['meas_for_net'] = []
        for ns in range(agent_args['history_length']):
            agent_args['meas_for_net'] += [
                i + simulator.num_meas * ns
                for i in agent_args['meas_for_net_init']
            ]
        agent_args['meas_for_net'] = np.array(agent_args['meas_for_net'])
    else:
        agent_args['meas_for_net'] = np.arange(agent_args['history_length'] *
                                               simulator.num_meas)
    if len(agent_args['meas_for_manual_init']) > 0:
        agent_args['meas_for_manual'] = np.array([
            i + simulator.num_meas * (agent_args['history_length'] - 1)
            for i in agent_args['meas_for_manual_init']
        ])  # current timestep is the last in the stack
    else:
        agent_args['meas_for_manual'] = []
    agent_args['state_meas_shape'] = (len(agent_args['meas_for_net']), )

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.1)
    sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options,
                                            log_device_placement=False))

    img_buffer = np.zeros(
        (agent_args['history_length'], simulator.num_channels,
         simulator.resolution[1], simulator.resolution[0]))
    meas_buffer = np.zeros((agent_args['history_length'], simulator.num_meas))
    act_buffer = np.zeros((agent_args['history_length'], 6))
    curr_step = 0
    term = False

    print("state_meas_shape: ", meas_buffer.shape, " == ",
          agent_args['state_meas_shape'])
    print("act_buffer_shape: ", act_buffer.shape)
    agent_args['n_ffnet_meas'] = len(np.ndarray.flatten(meas_buffer))
    agent_args['n_ffnet_act'] = len(np.ndarray.flatten(act_buffer))

    ag = Agent(sess, agent_args)
    ag.load('./checkpoints')

    acts_to_replace = [
        a + b + d + e for a in [[0, 0], [1, 1]] for b in [[0, 0], [1, 1]]
        for d in [[0]] for e in [[0], [1]]
    ]
    print(acts_to_replace)
    replacement_act = [0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0]
    # MOVE_FORWARD   MOVE_BACKWARD   TURN_LEFT   TURN_RIGHT  ATTACK  SPEED   SELECT_WEAPON2  SELECT_WEAPON3  SELECT_WEAPON4  SELECT_WEAPON5  SELECT_WEAPON6  SELECT_WEAPON7

    #    img, meas, rwrd, term = simulator.step(np.squeeze(ag.random_actions(1)).tolist())

    diff_y = 0
    diff_x = 0
    diff_z = 0
    inertia = 0.5
    iter = 1
    epoch = 200

    userdoc = os.path.join(os.path.expanduser("~"), "Documents")

    while not term:
        if curr_step < agent_args['history_length']:
            curr_act = np.squeeze(ag.random_actions(1)).tolist()
            img, meas, rwrd, term = simulator.step(curr_act)

        else:
            state_imgs = np.transpose(
                np.reshape(
                    img_buffer[np.arange(
                        curr_step - agent_args['history_length'], curr_step) %
                               agent_args['history_length']],
                    (1, ) + agent_args['state_imgs_shape']), [0, 2, 3, 1])
            state_meas = np.reshape(
                meas_buffer[np.arange(curr_step - agent_args['history_length'],
                                      curr_step) %
                            agent_args['history_length']],
                (1, agent_args['history_length'] * simulator.num_meas))

            #            print ("imgs shape: ", state_imgs.shape, " meas shape: ", state_meas.shape)
            #            print ("flat imgs shape: ", np.ndarray.flatten(state_imgs).shape, " flat meas shape: ", np.ndarray.flatten(state_meas).shape)
            #            print ("meas shape: ", state_meas.shape)

            curr_act = np.squeeze(ag.random_actions(1)[0]).tolist()
            if curr_act[:6] in acts_to_replace:
                curr_act = replacement_act
            hack = [0] * len(curr_act)

            hack[6] = diff_x
            hack[8] = -diff_y * 0.2
            hack[3] = 0  # diff_z
            #            hack[6] = 1
            #            hack[8] = 1
            curr_act[2] = 0
            curr_act[3] = 10

            img, meas, rwrd, term = simulator.step(curr_act)
            if (not (meas is None)) and meas[0] > 30.:
                meas[0] = 30.
            if (not (img is None)):

                #                print ("state_imgs: ", np.shape(state_imgs), "state_meas: ", np.shape(state_meas), "curr_act: ", np.shape(curr_act))
                #                print ("img type: ", np.ndarray.flatten(ag.preprocess_input_images(img)).dtype, "state_img type: ", state_imgs.dtype, "state_meas type: ", state_meas.dtype)

                ag.act_ffnet(
                    np.ndarray.flatten(state_imgs),
                    np.ndarray.flatten(state_meas),
                    np.array(np.ndarray.flatten(act_buffer), dtype='float64'),
                    np.ndarray.flatten(ag.preprocess_input_images(img)))
                diff_image = np.absolute(
                    np.reshape(np.array(ag.ext_ffnet_output),
                               [img.shape[0], img.shape[1]]) -
                    ag.preprocess_input_images(img))
                diff_image = np.absolute(
                    ag.preprocess_input_images(
                        img_buffer[(curr_step - 1) %
                                   agent_args['history_length']] -
                        ag.preprocess_input_images(img)))
                diff_image = ag.preprocess_input_images(img)

                diff_x = diff_x + inertia * (
                    (np.argmax(diff_image.sum(axis=0)) /
                     float(diff_image.shape[1])) - 0.5 - diff_x)
                diff_y = diff_x + inertia * (
                    (np.argmax(diff_image.sum(axis=1)) /
                     float(diff_image.shape[0])) - 0.5 - diff_y)

                #                print ("diff_x: ", diff_x, " diff_y: ", hack[6], "centre_x: ", np.argmax(diff_image.sum(axis=0)), "centre_y: ", np.argmax(diff_image.sum(axis=1)))

                if (curr_step % epoch == 0):
                    print("saving...")
                    np.save(
                        os.path.join('/home/paul', "hack"),
                        np.reshape(np.array(ag.ext_ffnet_output),
                                   [img.shape[0], img.shape[1]]))
                    np.save(os.path.join('/home/paul', "target"),
                            ag.preprocess_input_images(img))
                    np.save(os.path.join('/home/paul', "diff"), diff_image)
                    diff_x = np.random.normal(0, 2)
                    diff_z = np.random.normal(10, 2)

        if not term:
            img_buffer[curr_step % agent_args['history_length']] = img
            meas_buffer[curr_step % agent_args['history_length']] = meas
            act_buffer[curr_step % agent_args['history_length']] = curr_act[:6]
            curr_step += 1

    simulator.close_game()
    ag.save(
        '/home/paul/Dev/GameAI/vizdoom_cig2017/icolearner/ICO1/checkpoints/' +
        'hack-' + str(iter))