示例#1
0
    args = parser.parse_args()

    image_size = args.image_size
    number_of_games = args.number_of_games
    batch_size = args.batch_size
    gpu_flag = args.gpu

    learning_rate = args.learning_rate
    #epsilon is the decision parameter - do you use the actor's actions or do them randomly?
    #initially, you want to use random actions - but over time as the actor learns,
    #the actor's actions will be better
    epsilon = 1
    epsilon_decay = args.epsilon_decay
    display_steps = args.display_iterations
    sim = Simulator(1)
    if gpu_flag > -1:
        device_string = '/gpu:{}'.format(gpu_flag)
    else:
        device_string = "/cpu:0"
    with tf.Graph().as_default(), tf.device(device_string):
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False))
        with sess.as_default():
            learner = ActionLearner(
                image_size=sim.image_size,
                n_filters=args.number_of_filters,
                n_hidden=args.number_of_hidden,
                n_out=sim.number_of_actions
                )
        #     #     #this will do it linearly
        #     #     previous_state[0][1] += -1*reward*np.power(discount_factor,discount_iterator)
        #     #     discount_iterator += 1
        #
        #     break

    return score_list #+ left_state_list

class FakeActor:
    def __init__(self,num_actions):
        self.number_of_actions = num_actions
        self.display_output = [0,0]
    def return_action(self,simulator_screen):
        return np.random.randint(self.number_of_actions)




if __name__ == "__main__":
    sim = Simulator(10)
    actor = FakeActor(3)
    start_time = time.time()
    game_state_list = make_states(sim,actor,1,200,10,winners_only=False)
    print(time.time() - start_time)
    for state_list in game_state_list:
        for state in state_list:
            print(state[0][1:4],np.mean(state[0][4]))
            cv2.imshow('Phong!',cv2.resize(state[0][4],(0,0),fx=2,fy=2))
            np.save('phong_screen',state[0][4])
            cv2.waitKey(400)
    args = parser.parse_args()

    image_size = args.image_size
    number_of_games = args.number_of_games
    batch_size = args.batch_size
    gpu_flag = args.gpu

    learning_rate = args.learning_rate
    #epsilon is the decision parameter - do you use the actor's actions or do them randomly?
    #initially, you want to use random actions - but over time as the actor learns,
    #the actor's actions will be better
    epsilon = 1
    epsilon_decay = args.epsilon_decay
    display_steps = args.display_iterations
    sim = Simulator(1,screen_size=args.image_size,state_space=args.state_space)
    if gpu_flag > -1:
        device_string = '/gpu:{}'.format(gpu_flag)
    else:
        device_string = "/cpu:0"
    with tf.Graph().as_default(), tf.device(device_string):
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=False))
        with sess.as_default():
            learner = ActionLearner(
                image_size=sim.screen_size,
                n_filters=args.number_of_filters,
                n_hidden=args.number_of_hidden,
                n_out=sim.number_of_actions
                )
示例#4
0
    args = parser.parse_args()

    image_size = args.image_size
    number_of_games = args.number_of_games
    batch_size = args.batch_size
    gpu_flag = args.gpu

    learning_rate = args.learning_rate
    #epsilon is the decision parameter - do you use the actor's actions or do them randomly?
    #initially, you want to use random actions - but over time as the actor learns,
    #the actor's actions will be better
    epsilon = 1
    epsilon_decay = args.epsilon_decay
    display_steps = args.display_iterations
    sim = Simulator(1,
                    screen_size=args.image_size,
                    state_space=args.state_space)
    if gpu_flag > -1:
        device_string = '/gpu:{}'.format(gpu_flag)
    else:
        device_string = "/cpu:0"
    with tf.Graph().as_default(), tf.device(device_string):
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        with sess.as_default():
            learner = ActionLearner(image_size=sim.screen_size,
                                    n_filters=args.number_of_filters,
                                    n_hidden=args.number_of_hidden,
                                    n_out=sim.number_of_actions)
            learner.set_sess(sess)
    args = parser.parse_args()

    image_size = args.image_size
    number_of_games = args.number_of_games
    batch_size = args.batch_size
    gpu_flag = args.gpu

    learning_rate = args.learning_rate
    #epsilon is the decision parameter - do you use the actor's actions or do them randomly?
    #initially, you want to use random actions - but over time as the actor learns,
    #the actor's actions will be better
    epsilon = 1
    epsilon_decay = args.epsilon_decay
    display_steps = args.display_iterations
    sim = Simulator(1)
    if gpu_flag > -1:
        device_string = '/gpu:{}'.format(gpu_flag)
    else:
        device_string = "/cpu:0"
    with tf.Graph().as_default(), tf.device(device_string):
        sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True,
                                                log_device_placement=False))
        with sess.as_default():
            learner = ActionLearner(image_size=sim.image_size,
                                    n_filters=args.number_of_filters,
                                    n_hidden=args.number_of_hidden,
                                    n_out=sim.number_of_actions)
            learner.set_sess(sess)

            global_step = tf.Variable(0, name="global_step", trainable=False)