def savegame(config):
    # Step 1: init Game
    env = Environment(config.game_num)  #1 is for main game 2 is for evaluation
    ###################
    # Step 2: init DQN
    actions = env.action_size()
    objects = env.object_size()
    config.setnumactions(actions)
    config.setnumobjects(objects)
    config.setvocabsize(env.vocab_size())

    brain = DQN(config)

    # checkStates = None
    #adding progress bar for training
    dic = {}
    with open("symbolMapping" + str(sys.argv[1]) + ".txt", 'r') as fp:
        data = fp.read().split('\n')
        for i in range(len(data) - 1):
            splitdata = data[i].split(' ')
            dic[int(splitdata[1])] = splitdata[0]
    dic[0] = "NULL"

    fp = open("teacher" + str(sys.argv[1]) + "_embeddings.txt", "w")
    for i in range(config.vocab_size - 1):
        state = np.zeros([config.batch_size, config.seq_length])
        state[:, 0] = i
        embedding = brain.output_embedT.eval(
            feed_dict={brain.stateInputT: state}, session=brain.session)[0,
                                                                         0, :]
        print >> fp, dic[i]
        for element in embedding:
            print >> fp, element,
        print >> fp
    brain.session.close()
Пример #2
0
def main(train):
    LOGGER.info('=== main started ===')
    environment = Environment(ticker,
                              from_date=datetime(2004, 1, 1),
                              to_date=datetime(2010, 1, 1))
    agent = Agent(environment.state_size(),
                  environment.action_size(),
                  epochs=epochs)

    environment.set_agent(agent)

    if train:
        for i in range(epochs):
            environment.reset()
            environment.run()
            agent.decrease_epsilon()
            LOGGER.info('#### {}/{} game finished ####\nBalance: {}'.format(
                str(i + 1), epochs, environment.cerebro.broker.get_value()))

        agent.save(environment.ticker + '.h5')
    else:
        agent.load(environment.ticker + '.h5')

    # Test on!
    test_environment = Environment(ticker,
                                   from_date=datetime(2010, 1, 1),
                                   to_date=datetime(2013, 1, 1),
                                   scaler=environment.scaler)
    test_environment.set_agent(agent)

    test_environment.reset()
    test_environment.run()
    LOGGER.info('Backtest balance: {}'.format(
        test_environment.cerebro.broker.get_value()))
Пример #3
0
def savegame(config):
    # Step 1: init Game
    env = Environment(config.game_num)  #1 is for main game 2 is for evaluation
    ###################
    # Step 2: init DQN
    actions = env.action_size()
    objects = env.object_size()
    config.setnumactions(actions)
    config.setnumobjects(objects)
    config.setvocabsize(env.vocab_size())

    brain = DQN(config)

    # checkStates = None
    #adding progress bar for training
    dic = {}
    with open("symbolMapping" + str(sys.argv[1]) + ".txt", 'r') as fp:
        data = fp.read().split('\n')
        for i in range(len(data) - 1):
            splitdata = data[i].split(' ')
            dic[int(splitdata[1])] = splitdata[0]
    dic[0] = "NULL"

    dic_trans = {}
    with open("symbolMapping1236.txt", 'r') as fp:
        data = fp.read().split('\n')
        for i in range(len(data) - 1):
            splitdata = data[i].split(' ')
            dic_trans[splitdata[0]] = int(splitdata[1])
    dic_trans["NULL"] = 0

    dic_embedding = {}
    #1st let us initialize it randomly
    sess = tf.InteractiveSession()
    stateInput = tf.placeholder(tf.int32, [len(dic_trans.keys())])
    embed = tf.Variable(tf.random_uniform([len(dic_trans.keys()), 20], -1, 1),
                        name="embed")
    word_embeds = tf.nn.embedding_lookup(embed, stateInput)
    tf.initialize_all_variables().run()
    state = sorted(dic_trans.values())
    state_map = word_embeds.eval(feed_dict={stateInput: state})

    for i in range(len(state)):
        dic_embedding[state[i]] = state_map[i]
    sess.close()

    for i in range(config.vocab_size - 1):
        state = np.zeros([config.batch_size, config.seq_length])
        state[:, 0] = i
        embedding = brain.word_embeds.eval(feed_dict={brain.stateInput: state},
                                           session=brain.session)[0, 0]
        dic_embedding[dic_trans[dic[i]]] = embedding
    brain.session.close()

    cpickle.dump(dic_embedding,
                 open("embedTeacher" + str(sys.argv[1]) + ".p", "wb"))
Пример #4
0
def main(train, action_bias=0):
    environment = Environment(tickers,
                              initial_deposit=100000,
                              from_date=datetime(2004, 1, 1),
                              to_date=datetime(2010, 1, 1),
                              min_days_to_hold=min_days_to_hold,
                              max_days_to_hold=max_days_to_hold)
    agent = Agent(environment.state_size(),
                  environment.action_size(),
                  epochs=epochs,
                  gamma=0.2,
                  replay_buffer=64,
                  memory_queue_length=32)

    if train:
        for i in range(epochs):
            state = environment.reset()
            done = False

            while not done:
                action = agent.act(state)
                next_state, reward, done = environment.step(action)
                agent.remember(state, action, reward, next_state, done)
                state = next_state
            agent.decrease_epsilon()
            LOGGER.info('Balance for current game: %d', environment.deposit)

        pprint(environment.actions)
        agent.save(environment.main_ticker + '.h5')
    else:
        agent.load(environment.main_ticker + '.h5')

    # Test on!
    test_environment = Environment(tickers,
                                   initial_deposit=100000,
                                   from_date=datetime(2010, 1, 1),
                                   to_date=datetime(2013, 1, 1),
                                   min_days_to_hold=min_days_to_hold,
                                   max_days_to_hold=max_days_to_hold,
                                   scaler=environment.scaler)

    state = test_environment.reset()
    done = False

    while not done:
        action = agent.act(state, False, action_bias)
        next_state, _, done = test_environment.step(action)
        state = next_state
    print_results_on_test_environment(test_environment)
    export_to_file(test_environment.actions)
def main():
    config = Config()
    env = Environment(config) #for training
    eval_env = Eval_Environment(config)#for testing
    num_actions = env.action_size()
    config.setaction_set_size(num_actions)
    brain = Control(config)
    plt = Plotter()
    plt.writesummary(0)
    #adding progress bar for training
    pbar = tqdm(total = config.MAX_FRAMES, desc='Training Progress')


    episode_buffer = Buffer(config)
    episode_length = 0

    eval_count = 1
    while(env.frame_history <= config.MAX_FRAMES):
        if env.frame_history/(config.EVAL_FREQ*eval_count) == 1:
            evaluate(eval_env,config,brain,env.frame_history,plt)#testing happens now
            eval_count+=1
        past_num_frames = env.frame_history
        #algorithm beigns now

        if episode_length == 0:
            env.reset()
            s,a,r,t = env.act(0)
            episode_buffer.add(s,a,r)
            episode_length += 1

        s,a,r,t = env.act(brain.getaction(s))
        episode_length += 1
        episode_buffer.add(s,a,r)

        if (env.START_NEW_GAME or episode_length >= config.T) and not(episode_buffer.isempty()):#then epsiode ends
            episode_values = episode_buffer.get_returns()
            brain.update_table(episode_values)
            episode_buffer.reset()
            episode_length = 0

        pbar.update(env.frame_history-past_num_frames)

    env.close_render()
Пример #6
0
def savegame(config):
    fp = open('symbolMapping'+str(sys.argv[1])+'.txt','r')
    data = fp.read().split('\n')
    spd = [data_.split(' ')[::-1] for data_ in data]
    dic_local = dict(spd[0:-1])
    dic_local['0'] = 'NULL'
    fp.close()

    fp = open('symbolMapping5.txt','r')
    data = fp.read().split('\n')
    spd = [data_.split(' ')for data_ in data]
    dic_global = dict(spd[0:-1])

    dic_global['NULL']='0'
    fp.close()    

    # Step 1: init Game
    env = Environment(config.game_num) #1 is for main game 2 is for evaluation
    ###################
    # Step 2: init DQN
    actions = env.action_size()
    objects = env.object_size()
    config.setnumactions(actions)
    config.setnumobjects(objects)
    config.setvocabsize(env.vocab_size())
    brain = DQN(config)

    # checkStates = None
    #adding progress bar for training
    pbar = tqdm(total = config.MAX_FRAMES, desc='Training Progress')
    episode_length = 0
    num_episodes = 0
    total_reward = 0
    MAX_STEPS = 105000
    totalSteps = 0
    MEM_STEPS = 10000
    memory = []
    while totalSteps < MAX_STEPS:
        totalSteps += 1
        if env.START_NEW_GAME:
            episode_length = 0
            env.START_NEW_GAME = False
            state, reward, terminal, availableObjects = env.newGame()
            brain.history.add(state)
        action_indicator = np.zeros(actions)
        object_indicator = np.zeros(objects)
        #predict
        action_index,object_index = brain.getAction(availableObjects, True)
        Qactions, Qobjects = brain.getQValues(availableObjects)
        action_indicator[action_index] = 1
        object_indicator[object_index] = 1
        # print state
        memory.append((convert_state(state, dic_local, dic_global), Qactions, Qobjects))
        #act
        nextstate,reward,terminal, availableObjects = env.step(action_index,object_index)
        total_reward += reward
        episode_length += 1
        #observe
        brain.setPerception(state, reward, action_indicator, object_indicator, nextstate, terminal, True)
        state = nextstate

        if (totalSteps % MEM_STEPS == 0):
            fileName = str(config.game_num) + "_mem.txt"
            with open(fileName, "a") as fp:
                for i in range(len(memory)):
                    for j in memory[i][0]:
                        print >> fp, j,
                    print >> fp
                    for j in memory[i][1]:
                        print >> fp, j,
                    print >> fp
                    for j in memory[i][2]:
                        print >> fp, j,
                    print >> fp
            memory = []


        if ((terminal) or ((episode_length % config.max_episode_length) == 0)):
            num_episodes += 1
            with open("saver_reward.txt", "a") as fp:
                print >> fp, (total_reward / (num_episodes * 1.0))
            env.START_NEW_GAME = True

        pbar.update(1)


        if (brain.timeStep) > config.MAX_FRAMES:
            brain.train_writer.close()
            break

    brain.session.close()
def playgame(config):
    # Step 1: init Game
    env = Environment(config.game_num)  #1 is for main game 2 is for evaluation
    ###################
    # Step 2: init DQN
    actions = env.action_size()
    objects = env.object_size()
    config.setnumactions(actions)
    config.setnumobjects(objects)
    config.setvocabsize(env.vocab_size())

    brain = DQN(config)

    # checkStates = None
    #adding progress bar for training
    pbar = tqdm(total=config.MAX_FRAMES, desc='Training Progress')
    episode_length = 0
    num_episodes = 0
    total_reward = 0
    while True:
        if env.START_NEW_GAME:
            episode_length = 0
            env.START_NEW_GAME = False
            state, reward, terminal, availableObjects = env.newGame()
            brain.history.add(state)
        action_indicator = np.zeros(actions)
        object_indicator = np.zeros(objects)
        #predict
        action_index, object_index = brain.getAction(availableObjects)
        action_indicator[action_index] = 1
        object_indicator[object_index] = 1
        #act
        nextstate, reward, terminal, availableObjects = env.step(
            action_index, object_index)
        total_reward += reward
        episode_length += 1
        #observe
        brain.setPerception(state, reward, action_indicator, object_indicator,
                            nextstate, terminal, False)
        state = nextstate

        if ((terminal) or ((episode_length % config.max_episode_length) == 0)):
            num_episodes += 1
            with open("train_reward.txt", "a") as fp:
                print >> fp, (total_reward / (num_episodes * 1.0))
            env.START_NEW_GAME = True
#####################################################################
#for evaluating qvalues
        if (brain.timeStep % config.EVAL == 0) and (brain.timeStep != 0):
            if (brain.timeStep / config.EVAL == 1):
                if not ((os.path.exists("checkStates.txt")) and
                        (os.path.getsize("checkStates.txt") > 0)):
                    assert config.SAMPLE_STATES % config.BATCH_SIZE == 0
                    assert config.SAMPLE_STATES < brain.memory.count
                    checkStates, _1, _2, _3, _4, _5 = brain.memory.sample()
                    with open("checkStates.txt", "w") as fp:
                        cpickle.dump(checkStates, fp)
                else:
                    with open("checkStates.txt", 'r') as fp:
                        checkStates = cpickle.load(fp)

            evalQValues_a = brain.action_valueT.eval(
                feed_dict={brain.stateInputT: checkStates},
                session=brain.session)
            maxEvalQValues_a = np.max(evalQValues_a, axis=1)
            avgEvalQValues_a = np.mean(maxEvalQValues_a)

            with open("evalQValue_a.txt", "a") as fp:
                print >> fp, avgEvalQValues_a

            evalQValues_o = brain.object_valueT.eval(
                feed_dict={brain.stateInputT: checkStates},
                session=brain.session)
            maxEvalQValues_o = np.max(evalQValues_o, axis=1)
            avgEvalQValues_o = np.mean(maxEvalQValues_o)

            with open("evalQValue_o.txt", "a") as fp:
                print >> fp, avgEvalQValues_o
#####################################################################
#save current history before starting evaluation
# temp_history_data = brain.history.copy()
#now let us evaluate avg reward
#create alternate environment for EVALUATION
# env_eval = Environment(2)
            env_eval = env
            if config.TUTORIAL_WORLD:
                total_reward, nrewards, nepisodes, quest1_reward_cnt, quest2_reward_cnt, quest3_reward_cnt = evaluate(
                    brain, env_eval, config)
            else:
                total_reward, nrewards, nepisodes, quest1_reward_cnt = evaluate(
                    brain, env_eval, config)

            with open("test_reward.txt", "a") as fp:
                print >> fp, total_reward

            #setting the best network
            if len(env_eval.reward_history) == 0 or total_reward > max(
                    env_eval.reward_history):
                # save best network
                if not os.path.exists(os.getcwd() + '/Savednetworks'):
                    os.makedirs(os.getcwd() + '/Savednetworks')
                brain.saver.save(brain.session,
                                 os.getcwd() + '/Savednetworks/' + 'network' +
                                 '-dqn',
                                 global_step=brain.timeStep)

            env_eval.reward_history.append(
                total_reward)  #doing this for keeping track of best network

            #go back to saved frame after evaluation completed
            # brain.history.add(temp_history_data)
            #####################################################################
            if config.TUTORIAL_WORLD:
                brain.inject_summary(
                    {
                        'average.q_a':
                        avgEvalQValues_a,
                        'average.q_o':
                        avgEvalQValues_o,
                        'average.q':
                        (0.5 * avgEvalQValues_o + 0.5 * avgEvalQValues_a),
                        'average_reward':
                        total_reward,
                        'average_num_pos_reward':
                        nrewards,
                        'number_of_episodes':
                        nepisodes,
                        'quest1_average_reward_cnt':
                        quest1_reward_cnt,
                        'quest2_average_reward_cnt':
                        quest2_reward_cnt,
                        'quest3_average_reward_cnt':
                        quest3_reward_cnt
                    }, brain.timeStep)
            else:
                brain.inject_summary(
                    {
                        'average.q_a':
                        avgEvalQValues_a,
                        'average.q_o':
                        avgEvalQValues_o,
                        'average.q':
                        (0.5 * avgEvalQValues_o + 0.5 * avgEvalQValues_a),
                        'average_reward':
                        total_reward,
                        'average_numrewards':
                        nrewards,
                        'number_of_episodes':
                        nepisodes,
                        'quest1_average_reward_cnt':
                        quest1_reward_cnt
                    }, brain.timeStep)


#####################################################################
        pbar.update(1)

        if (brain.timeStep) > config.MAX_FRAMES:
            brain.train_writer.close()
            break

    brain.session.close()
def learnstudent(config):
    # Step 1: init Game
    env = Environment(config.game_num)  #1 is for main game 2 is for evaluation
    ###################
    # Step 2: init DQN
    actions = env.action_size()
    objects = env.object_size()
    config.setnumactions(actions)
    config.setnumobjects(objects)
    config.setvocabsize(env.vocab_size())

    brain = student(config)
    brain.data[1] = reader('1_mem.txt')
    brain.data[2] = reader('2_mem.txt')
    brain.data[3] = reader('3_mem.txt')

    #adding progress bar for training
    pbar = tqdm(total=config.MAX_FRAMES, desc='Training Progress')
    while True:
        for _ in range(1, 4):
            brain.train(_)
        brain.timeStep += 1
        #####################################################################
        #for evaluating qvalues
        if (brain.timeStep % 100) == 0 and (brain.timeStep != 0):
            env_eval = env

            # save last network
            if not os.path.exists(os.getcwd() + '/StudentSavednetworks'):
                os.makedirs(os.getcwd() + '/StudentSavednetworks')
            brain.saver.save(brain.session,
                             os.getcwd() + '/StudentSavednetworks/' +
                             'network' + '-student',
                             global_step=brain.timeStep)

            if config.TUTORIAL_WORLD:
                total_reward, nrewards, nepisodes, quest1_reward_cnt, quest2_reward_cnt, quest3_reward_cnt = evaluate(
                    brain, env_eval, config)
            else:
                total_reward, nrewards, nepisodes, quest1_reward_cnt = evaluate(
                    brain, env_eval, config)

#####################################################################
            if config.TUTORIAL_WORLD:
                brain.inject_summary(
                    {
                        'average_reward': total_reward,
                        'average_num_pos_reward': nrewards,
                        'number_of_episodes': nepisodes,
                        'quest1_average_reward_cnt': quest1_reward_cnt,
                        'quest2_average_reward_cnt': quest2_reward_cnt,
                        'quest3_average_reward_cnt': quest3_reward_cnt
                    }, brain.timeStep)
            else:
                brain.inject_summary(
                    {
                        'average_reward': total_reward,
                        'average_numrewards': nrewards,
                        'number_of_episodes': nepisodes,
                        'quest1_average_reward_cnt': quest1_reward_cnt
                    }, brain.timeStep)


#####################################################################
        pbar.update(1)

        if (brain.timeStep) > config.MAX_FRAMES:
            brain.train_writer.close()
            break
    brain.session.close()