示例#1
0
def run_dqn():
    # get command line arguments, defaults set in utils.py
    agent_params, dqn_params, cnn_params = parse_args()

    env = gym.make(agent_params['environment'])
    episodes = agent_params['episodes']
    steps = agent_params['steps']
    steps_to_update = agent_params['steps_to_update']
    num_actions = env.action_space.n
    observation_shape = env.observation_space.shape

    # initialize dqn learning
    dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params)

    env.monitor.start('./outputs/cartpole-experiment-' +
                      agent_params['run_id'])
    last_100 = deque(maxlen=100)

    total_steps = 0
    for i_episode in range(episodes):
        observation = env.reset()
        reward_sum = 0

        # cartpole solved
        if np.mean(last_100) > 200:
            break

        for t in range(steps):
            env.render()

            # select action based on the model
            action = dqn.select_action(observation)
            # execute actin in emulator
            new_observation, reward, done, _ = env.step(action)
            # update the state
            dqn.update_state(action, observation, new_observation, reward,
                             done)
            observation = new_observation

            # train the model
            dqn.train_step()

            reward_sum += reward
            if done:
                print "Episode ", i_episode
                print "Finished after {} timesteps".format(t + 1)
                print "Reward for this episode: ", reward_sum
                last_100.append(reward_sum)
                print "Average reward for last 100 episodes: ", np.mean(
                    last_100)
                break

            if total_steps % steps_to_update == 0:
                print "updating target network..."
                dqn.update_target()

            total_steps += 1
    env.monitor.close()
示例#2
0
def main():
    general_params, a2c_params, \
        pref_interface_params, rew_pred_training_params = parse_args()

    if general_params['debug']:
        logging.getLogger().setLevel(logging.DEBUG)

    run(general_params, a2c_params, pref_interface_params,
        rew_pred_training_params)
示例#3
0
def run_dqn():
    # get command line arguments, defaults set in utils.py
    agent_params, dqn_params, cnn_params = parse_args()

    steps_to_update = 10  #agent_params['steps_to_update']

    current_time = datetime.utcnow()
    start_time = current_time - timedelta(days=10)
    end_time = start_time - timedelta(days=7)

    ticker_str = ('AUD_CAD', 'AUD_CHF', 'AUD_HKD')

    SQL = database()
    env = fxEnviroment(SQL, start_time, end_time, ticker_str)

    observation_shape = env.current_state.shape
    num_actions = 3

    # initialize dqn learning
    dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params)

    episode_starttime = env.starttime
    total_steps = 0
    while episode_starttime < env.endtime:

        episode_endtime = start_time + timedelta(minutes=60)
        observation = env.reset(episode_starttime, episode_endtime)
        reward_sum = 0
        done = 0

        while not done:

            # select action based on the model
            action = dqn.select_action(observation)
            # execute actin in emulator
            new_observation, reward, done, _ = env.step(action)
            # update the state
            dqn.update_state(action, observation, new_observation, reward,
                             done)
            observation = new_observation

            # train the model
            dqn.train_step()

            reward_sum += reward
            if done:
                print("episode completed")
                print("Reward for this episode: ", reward_sum)

                episode_starttime = episode_endtime
                break

            if total_steps % steps_to_update == 0:
                print("updating target network...")
                dqn.update_target()
示例#4
0
文件: train.py 项目: gilzamir/pacman
def main():
    args, lr_args, log_dir, preprocess_wrapper = parse_args() # parse_args() é importado de params
    easy_tf_log.set_dir(log_dir) # seta o caminho dos logs em easy_ty_log

    utils_tensorflow.set_random_seeds(args.seed) # iniciando a semente aleatóriamente
    sess = tf.Session() # Uma classe para executar operações do TensorFlow. Um Sessionobjeto encapsula o ambiente no qual os Operation objetos são executados e os Tensorobjetos são avaliados. 

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops, args.n_workers,
                     args.seed, args.debug, log_dir)

    step_counter = utils.TensorFlowCounter(sess)
    update_counter = utils.TensorFlowCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    # Criando o conjunto de redes por threads
    networks = make_networks(n_workers=args.n_workers, obs_shape=envs[0].observation_space.shape,
                             n_actions=envs[0].action_space.n, value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus, max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer, detailed_logs=args.detailed_logs,
                             debug=args.debug)

    # Retorna todas as variáveis ​​criadas com trainable=True.
    # scope: (Opcional.) Uma string. Se fornecida, a lista resultante é filtrada para incluir apenas itens cujo nameatributo corresponde ao scopeuso re.match
    global_vars = tf.trainable_variables('global')


    # Por que save_relative_paths = True?
    # De modo que o arquivo de 'checkpoint' em texto simples use caminhos relativos,
    # para que possamos restaurar a partir de pontos de verificação criados em outra máquina.
    saver = tf.train.Saver(global_vars, max_to_keep=1, save_relative_paths=True)

    # se existir um checkpoint para carregar ele restaura os dados para proceguir de onde parou, caso contrário ele inicia do 0
    if args.load_ckpt:
        print("Restoring from checkpoint '{}'...".format(args.load_ckpt), end='', flush=True)
        saver.restore(sess, args.load_ckpt) # restaura(carrega) a sessão do checkpoint especificado
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    # Criando as workes
    workers = make_workers(sess, envs, networks, args.n_workers, log_dir)

    # inicia as threads referente a cada workers criada
    worker_threads = start_worker_threads(workers, args.n_steps, args.steps_per_update,
                                          step_counter, update_counter)

    # Gerenciador de execução das workers_threads
    run_manager(worker_threads, sess, lr, step_counter, update_counter, log_dir, saver,
                args.manager_wake_interval_seconds, args.ckpt_interval_seconds)

    for env in envs:
        env.close()
示例#5
0
def main():
    args, lr_args, log_dir, preprocess_wrapper = parse_args()
    easy_tf_log.set_dir(log_dir)

    utils_tensorflow.set_random_seeds(args.seed)
    sess = tf.Session()

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops,
                     args.n_workers, args.seed, args.debug, log_dir)

    step_counter = utils.TensorFlowCounter(sess)
    update_counter = utils.TensorFlowCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    networks = make_networks(n_workers=args.n_workers,
                             obs_shape=envs[0].observation_space.shape,
                             n_actions=envs[0].action_space.n,
                             value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus,
                             max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer,
                             detailed_logs=args.detailed_logs,
                             debug=args.debug)

    global_vars = tf.trainable_variables('global')
    # Why save_relative_paths=True?
    # So that the plain-text 'checkpoint' file written uses relative paths, so that we can restore
    # from checkpoints created on another machine.
    saver = tf.train.Saver(global_vars,
                           max_to_keep=1,
                           save_relative_paths=True)
    if args.load_ckpt:
        print("Restoring from checkpoint '{}'...".format(args.load_ckpt),
              end='',
              flush=True)
        saver.restore(sess, args.load_ckpt)
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    workers = make_workers(sess, envs, networks, args.n_workers, log_dir)

    worker_threads = start_worker_threads(workers, args.n_steps,
                                          args.steps_per_update, step_counter,
                                          update_counter)

    run_manager(worker_threads, sess, lr, step_counter, update_counter,
                log_dir, saver, args.manager_wake_interval_seconds,
                args.ckpt_interval_seconds)

    for env in envs:
        env.close()
示例#6
0
def run_dqn():
    # get command line arguments, defaults set in utils.py
    agent_params, dqn_params, cnn_params = parse_args()

    env = gym.make(agent_params['environment'])
    episodes = agent_params['episodes']
    steps = agent_params['steps']
    num_actions = env.action_space.n
    observation_shape = env.observation_space.shape

    # initialize dqn learning
    dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params)

    last_100 = deque(maxlen=100)

    for i_episode in range(episodes):
        observation = env.reset()
        reward_sum = 0

        if np.mean(last_100) > 200:
            break

        for t in range(steps):
            env.render()
            #print observation

            # select action based on the model
            action = dqn.select_action(observation)
            # execute actin in emulator
            new_observation, reward, done, _ = env.step(action)
            # update the state
            dqn.update_state(action, observation, new_observation, reward,
                             done)
            observation = new_observation

            # train the model
            dqn.train_step()

            reward_sum += reward
            if done:
                print("Episode ", i_episode)
                print("Finished after {} timesteps".format(t + 1))
                print("Reward for this episode: ", reward_sum)
                last_100.append(reward_sum)
                print("Average reward for last 100 episodes: ",
                      np.mean(last_100))
                # mark in CartPole_DQN.png
                plt.plot(i_episode, np.mean(last_100), 'bo-')
                plt.savefig("./save_graph/CartPole_DQN.png")
                break
示例#7
0
                total_reward = 0
                for i in range(10):
                    state = env.reset()
                    for j in range(300):
                        # env.render()
                        feed = {mainQN.inputs_: [state]}
                        Qs = sess.run(mainQN.output, feed_dict=feed)
                        action = np.argmax(Qs) # direct action for test
                        state, reward, done, _ = env.step(action)
                        total_reward += reward
                        if done:
                            break
                ave_reward = total_reward / 10
                if ave_reward > max_reward:
                    max_reward = ave_reward
                    saver.save(sess, "model/dqn_ep" + str(ep) + "-" + str(ave_reward), total_step_count)
                print('episode: ', ep, 'Evaluation Average Reward:', ave_reward)
                with open("model/dqn.csv", "a") as savefile:
                    wr = csv.writer(savefile, dialect="excel")
                    wr.writerow([ep, ave_reward])

            # Save model.
            # if config.save_model and total_step_count > config.pretrain_steps and \
            #         ep % config.save_model_interval == 0:
            #     print('Saving model...')
            #     saver.save(sess, config.model_path +'/model' + str(ep) + '.ckpt', total_step_count)

if __name__ == '__main__':
    config = params.parse_args()
    train(config)
示例#8
0
import params
from tqdm import tqdm

global logf


def myprint(s):
    global logf
    if args.log:
        print(s)
    logf.write(str(s) + '\n')
    logf.flush()
    return


parser = params.parse_args()
args = parser.parse_args()
args = add_config(args) if args.config_file != None else args
assert (args.mode == "train" or args.mode == "resume")

set_all_seeds_to(args.seed)

MAX_VOCAB_SIZE = 25000 if (args.cap_vocab) else 100000
print(MAX_VOCAB_SIZE)

device = torch.device(
    'cuda:{0}'.format(args.gpu_id) if torch.cuda.is_available() else 'cpu')
if args.pool == 'last1' or args.pool == 'max1' or args.pool == 'mean1':
    custom_lstm.forget_bias = args.forget_bias

args.model_path = get_model_path(args)
示例#9
0
def run_dqn():
    # get command line arguments, defaults set in utils.py
    agent_params, dqn_params, cnn_params, prog_params = parse_args()

    env = gym.make(agent_params['environment'])
    episodes = agent_params['episodes']
    steps = agent_params['steps']
    steps_to_update = agent_params['steps_to_update']
    skipping = agent_params['skipping']
    num_actions = env.action_space.n
    observation_shape = env.observation_space.shape
    display = prog_params['display']
    monitor = prog_params['monitor']
    verbose = prog_params['verbose']

    if verbose > 0:
        print("num actions: ", num_actions)
        print("observation_shape: ", observation_shape)

    # initialize dqn learning
    dqn = DQN(num_actions, observation_shape, dqn_params, cnn_params, prog_params)

    if monitor:
        env.monitor.start('./outputs/experiment-' + agent_params['run_id'])
    last_100 = deque(maxlen=100)

    total_steps = 0
    for i_episode in range(episodes):
            observation = env.reset()
            reward_sum = 0

            for t in range(steps):
                    if display:
                        env.render()

                    # Use the previous action if in a skipping frame
                    if total_steps % skipping == 0:
                        # select action based on the model
                        action = dqn.select_action(observation)

                    # execute actin in emulator
                    new_observation, reward, done, _ = env.step(action)
                    new_observation = new_observation.ravel()

                    # Only update the network if not in a skipping frame
                    if total_steps % skipping == 0:
                        # update the state
                        dqn.update_state(action, new_observation, reward, done)

                        # train the model
                        dqn.train_step()

                    observation = new_observation

                    reward_sum += reward

                    if done:
                            if verbose > 0:
                                print("Episode ", i_episode)
                            if verbose > 1:
                                print("Finished after {} timesteps".format(t+1))
                                print("Reward for this episode: ", reward_sum)
                            if verbose > 0:
                                last_100.append(reward_sum)
                                print("Average reward for last 100 episodes: ", np.mean(last_100))
                            break

                    if total_steps % steps_to_update == 0:
                        if verbose > 0:
                            print("Total steps : ", total_steps)
                            print("Updating target network...")
                        dqn.update_target()

                    total_steps += 1
    if monitor:
        env.monitor.close()
示例#10
0
def main():
    args, lr_args, log_dir, preprocess_wrapper, ckpt_timer = parse_args()
    easy_tf_log.set_dir(log_dir)

    utils.set_random_seeds(args.seed)
    sess = tf.Session()

    envs = make_envs(args.env_id, preprocess_wrapper, args.max_n_noops,
                     args.n_workers, args.seed, args.debug, log_dir)

    step_counter = utils.GraphCounter(sess)
    update_counter = utils.GraphCounter(sess)
    lr = make_lr(lr_args, step_counter.value)
    optimizer = make_optimizer(lr)

    networks = make_networks(n_workers=args.n_workers,
                             n_actions=envs[0].action_space.n,
                             weight_inits=args.weight_inits,
                             value_loss_coef=args.value_loss_coef,
                             entropy_bonus=args.entropy_bonus,
                             max_grad_norm=args.max_grad_norm,
                             optimizer=optimizer,
                             debug=args.debug)

    # Why save_relative_paths=True?
    # So that the plain-text 'checkpoint' file written uses relative paths,
    # which seems to be needed in order to avoid confusing saver.restore()
    # when restoring from FloydHub runs.
    global_vars = tf.trainable_variables('global')
    saver = tf.train.Saver(global_vars,
                           max_to_keep=1,
                           save_relative_paths=True)
    checkpoint_dir = osp.join(log_dir, 'checkpoints')
    os.makedirs(checkpoint_dir)
    checkpoint_file = osp.join(checkpoint_dir, 'network.ckpt')

    if args.load_ckpt:
        print("Restoring from checkpoint '%s'..." % args.load_ckpt,
              end='',
              flush=True)
        saver.restore(sess, args.load_ckpt)
        print("done!")
    else:
        sess.run(tf.global_variables_initializer())

    workers = make_workers(sess=sess,
                           envs=envs,
                           networks=networks,
                           n_workers=args.n_workers,
                           log_dir=log_dir)

    worker_threads = start_workers(n_steps=args.n_steps,
                                   steps_per_update=args.steps_per_update,
                                   step_counter=step_counter,
                                   update_counter=update_counter,
                                   workers=workers)
    ckpt_timer.reset()
    step_rate = utils.RateMeasure()
    step_rate.reset(int(step_counter))
    while True:
        time.sleep(args.wake_interval_seconds)

        steps_per_second = step_rate.measure(int(step_counter))
        easy_tf_log.tflog('misc/steps_per_second', steps_per_second)
        easy_tf_log.tflog('misc/steps', int(step_counter))
        easy_tf_log.tflog('misc/updates', int(update_counter))
        easy_tf_log.tflog('misc/lr', sess.run(lr))

        alive = [t.is_alive() for t in worker_threads]

        if ckpt_timer.done() or not any(alive):
            saver.save(sess, checkpoint_file, int(step_counter))
            print("Checkpoint saved to '{}'".format(checkpoint_file))
            ckpt_timer.reset()

        if not any(alive):
            break

    for env in envs:
        env.close()
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from get_babi_data import get_task_6_train
from get_babi_data import get_task_6_test
from get_glove import load_glove_vectors
from get_babi_data import get_task_1_train
from get_babi_data import get_task_1_test
from tensorflow.python.ops.seq2seq import sequence_loss
from format_data import split_training_data, format_data, batch_data, convert_to_vectors_with_sentences, get_word_vector
from random import shuffle
from params import parse_args

#### MODEL PARAMETERS ####

params = parse_args()

WORD_VECTOR_LENGTH = 50
NUM_CLASSES = 2
MAX_EPISODES = 3
MAX_INPUT_SENTENCES = 40
EARLY_STOPPING = 2
MAX_INPUT_LENGTH = 200
MAX_QUESTION_LENGTH = 20

LEARNING_RATE = params['LEARNING_RATE']
HIDDEN_SIZE = params['HIDDEN_SIZE']
ATTENTION_GATE_HIDDEN_SIZE = params['ATTENTION_GATE_HIDDEN_SIZE']
MAX_EPOCHS = params['MAX_EPOCHS']
REG = params['REG']
DROPOUT = params['DROPOUT']
示例#12
0
        cv_types = ["spherical", "diag", "full", "tied"]
        for cv_type in cv_types:
            gmm = mixture.GaussianMixture(n_components=10,
                                          covariance_type=cv_type)
            gmm.fit(train_data)
            clusters = gmm.predict(train_data)
            labels = np.zeros_like(clusters)
            for i in range(10):
                mask = clusters == i
                labels[mask] = mode(train_labels[mask])[0]

            correct1 = np.equal(clusters, train_labels).sum()
            correct2 = np.equal(labels, train_labels).sum()
            print("%d/49000 (%0.2f%%)" % (correct1, correct1 / 49000))
            print("%d/49000 (%0.2f%%)" % (correct2, correct2 / 49000))


if __name__ == "__main__":
    args = params.parse_args()
    utils.set_random_seed(args.seed, args.cuda)
    trainer = ModelTrainer(args=args)
    if args.eval is False:
        if args.training_mode == "supervised":
            trainer.train_val_test()
        elif args.training_mode == "semi-supervised":
            trainer.ssl_train_val_test()
        elif args.training_mode == "gmm":
            trainer.gmm_train_val_test()
    if args.eval is True:
        trainer.evaluate("Test", verbose=True)
import tensorflow as tf
from tensorflow.models.rnn import rnn, rnn_cell
from get_babi_data import get_task_6_train
from get_babi_data import get_task_6_test
from get_glove import load_glove_vectors
from get_babi_data import get_task_1_train
from get_babi_data import get_task_1_test
from tensorflow.python.ops.seq2seq import sequence_loss
from format_data import split_training_data, format_data, batch_data, convert_to_vectors_with_sentences, get_word_vector
from random import shuffle
from params import parse_args

#### MODEL PARAMETERS ####

params = parse_args()

WORD_VECTOR_LENGTH = 50
NUM_CLASSES = 2
MAX_EPISODES = 3
MAX_INPUT_SENTENCES = 40
EARLY_STOPPING = 2
MAX_INPUT_LENGTH = 200
MAX_QUESTION_LENGTH = 20

LEARNING_RATE = params['LEARNING_RATE']
HIDDEN_SIZE = params['HIDDEN_SIZE']
ATTENTION_GATE_HIDDEN_SIZE = params['ATTENTION_GATE_HIDDEN_SIZE']
MAX_EPOCHS = params['MAX_EPOCHS']
REG = params['REG']
DROPOUT = params['DROPOUT']
示例#14
0
文件: main.py 项目: djreiss/pynkey
    ##funcs.checkpoint( '%s/%s.pkl' % (params.output_dir, params.organism) )
    do_saveall()

    tmp = np.array( bicluster.get_all_cluster_row_counts( glb.clusters, glb.all_genes ).values() )
    print np.sum(tmp==0), 'genes in no clusters'
    print np.sum(tmp==np.max(tmp)), 'genes in', np.max(tmp), 'clusters'

# println( @sprintf( "%.3f", (endTime - startTime)/60 ), " minutes since initialization" )

# #genes = rownames(ratios)[clusters[kInd].rows] ##rows]
# #seqs = get_sequences(genes);
# #@time gibbs_out = gibbs_site_sampler(seqs[:,2])     ## run gibbs sampler on most "flagellar-enriched" cluster
# #@time gibbs_out2 = gibbs_site_sampler(seqs, gibbs_out["pssm"])

if __name__ == '__main__':
    params.parse_args()
    params.init_args()

    if not init.IS_INITED:
        init.init()

    from Bicluster import fill_all_cluster_scores_par

    #clusters = fill_all_cluster_scores( clusters, all_genes, ratios, string_net, ratios.columns.values )    
    ## weird - if I move this to glb.py, then it gets locked up.
    glb.clusters = fill_all_cluster_scores_par(glb.clusters, threads=params.nthreads)
    stats_tmp = funcs.print_cluster_stats(glb.clusters, glb.ratios, 1, glb.startTime)
    glb.stats_df = glb.stats_df.append( stats_tmp )

    # NOTE: run_pynkey() which calls floc.get_floc_scores_all() fills all the cluster scores at the beginning    
    glb.iter = run_pynkey(glb.iter) ## Note this function can be run like this to restart from current iter