Exemplo n.º 1
0
def main():
    doers = _get_doers(shell)
    doers.update(_get_doers(utils))

    possible_actions = doers.keys() + ['start', 'stop', 'status']

    args = arguments.get_args(possible_actions)

    if args.action is None:
        print "No action"
        sys.exit(1)

    apiclient = None
    if args.no_api is False:
        os_options = arguments.OpenstackOptions(args, os.environ)
        if args.debug:
            print os_options
        apiclient = client.Client(username=os_options.username,
                                  password=os_options.password,
                                  tenant_name=os_options.tenant_name,
                                  endpoint=os_options.endpoint,
                                  auth_url=os_options.auth_url)
        if args.client_id:
            apiclient.client_id = args.client_id

    if args.action in doers:
        try:
            return doers[args.action](apiclient, args)
        except Exception as e:
            print ('ERROR {0}'.format(e))
            return 1

    create_dir(args.jobs_dir, do_log=False)

    freezer_scheduler = FreezerScheduler(apiclient=apiclient,
                                         interval=int(args.interval),
                                         job_path=args.jobs_dir)

    daemon = Daemon(daemonizable=freezer_scheduler)

    if args.action == 'start':
        daemon.start(log_file=args.log_file)
    elif args.action == 'stop':
        daemon.stop()
    elif args.action == 'reload':
        daemon.reload()
    elif args.action == 'status':
        daemon.status()

    return os.EX_OK
Exemplo n.º 2
0
def main():
    """
    Main entry point for the program.
    """
    args = get_args(get_version())

    if not args.solution_file:
        args.solution_file = default_solution()

    # create a new MonoTool object using our project path.
    mt = MonoTool(args.solution_file)

    method = getattr(mt, args.method)
    res = method(**args.__dict__)
    if res:
        print res
Exemplo n.º 3
0
def main():
    doers = _get_doers(shell)
    doers.update(_get_doers(utils))

    possible_actions = doers.keys() + ['start', 'stop', 'status']

    args = arguments.get_args(possible_actions)

    if args.action is None:
        print ('No action')
        return os.EX_DATAERR

    apiclient = None

    if args.no_api is False:
        apiclient = client.Client(opts=args)
        if args.client_id:
            apiclient.client_id = args.client_id

    if args.action in doers:
        try:
            return doers[args.action](apiclient, args)
        except Exception as e:
            print ('ERROR {0}'.format(e))
            return os.EX_SOFTWARE

    freezer_scheduler = FreezerScheduler(apiclient=apiclient,
                                         interval=int(args.interval),
                                         job_path=args.jobs_dir)

    if args.no_daemon:
        print ('Freezer Scheduler running in no-daemon mode')
        daemon = NoDaemon(daemonizable=freezer_scheduler)
    else:
        daemon = Daemon(daemonizable=freezer_scheduler)

    if args.action == 'start':
        daemon.start(log_file=args.log_file)
    elif args.action == 'stop':
        daemon.stop()
    elif args.action == 'reload':
        daemon.reload()
    elif args.action == 'status':
        daemon.status()

    return os.EX_OK
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False
    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()
    args.mem_length = args.mem_length if args.transformer_xl else 0
    if args.load:
        args.experiment_name = os.path.basename(os.path.normpath(args.load))
    else:
        args.experiment_name = args.experiment_name + datetime.now().strftime(
            "%m-%d-%H-%M")
    if args.save:
        args.save = os.path.join(args.save, args.experiment_name)
    # Pytorch distributed.
    initialize_distributed(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    train_data, val_data, test_data, args.vocab_size, \
        args.eod_token = get_train_val_test_data(args)

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)

    if args.load is not None:
        with FileLock("/root/checkpoint_lock", timeout=-1):
            args.iteration = load_checkpoint(model, optimizer, lr_scheduler,
                                             args)
    else:
        args.iteration = 0
    torch.distributed.barrier()

    summary_writer = None
    if torch.distributed.get_rank() == 0:
        print('Pretrain GPT2 model')
        print_args(args)
        summary_writer = get_sample_writer(base=args.summary_dir,
                                           name=args.experiment_name,
                                           iteration=args.iteration)

    # Resume data loader if necessary.
    if args.resume_dataloader:
        if train_data is not None:
            train_data.batch_sampler.start_iter = args.iteration % \
                                                  len(train_data)
        if val_data is not None:
            start_iter_val = (args.train_iters // args.save_interval) * \
                             args.eval_interval
            val_data.batch_sampler.start_iter = start_iter_val % \
                                                len(val_data)
    if train_data is not None:
        train_data_iterator = iter(train_data)
    else:
        train_data_iterator = None
    if val_data is not None:
        val_data_iterator = iter(val_data)
    else:
        val_data_iterator = None

    # TODO: figure out how to properly set this especially when resuming training
    iteration = 0
    if args.train_iters > 0:
        if args.do_train:
            with ExitStack() as stack:

                def save_on_exit(args_, model_, optimizer_, lr_scheduler_):
                    save_checkpoint(args_.iteration, model_, optimizer_,
                                    lr_scheduler_, args_)

                # stack.callback(save_on_exit, args, model, optimizer, lr_scheduler)
                iteration, skipped = train(model,
                                           optimizer,
                                           lr_scheduler,
                                           train_data_iterator,
                                           val_data_iterator,
                                           timers,
                                           args,
                                           summary_writer=summary_writer)

        if args.do_valid:
            prefix = 'the end of training for val data'
            val_loss = evaluate_and_print_results(prefix, val_data_iterator,
                                                  model, args, timers, False)

    if args.save and iteration != 0:
        save_checkpoint(iteration, model, optimizer, lr_scheduler, args)

    if test_data is not None:
        test_data_iterator = iter(test_data)
    else:
        test_data_iterator = None

    if args.do_test:
        # Run on test data.
        prefix = 'the end of training for test data'
        evaluate_and_print_results(prefix, test_data_iterator, model, args,
                                   timers, True)
Exemplo n.º 5
0
import torch.nn.functional as F
import torch.optim as optim

import algo
from arguments import get_args
from envs import make_vec_envs
from model_nomodulation import Policy
from storage import RolloutStorage
from utils import get_vec_normalize
from visualize import visdom_plot
from tensorboardX import SummaryWriter

#####################################
# prepare

args = get_args()

assert args.algo in ['a2c', 'ppo', 'acktr']
if args.recurrent_policy:
    assert args.algo in ['a2c', 'ppo'], \
        'Recurrent policy is not implemented for ACKTR'

num_updates = int(args.num_frames) // args.num_steps // args.num_processes

torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

try:
    os.makedirs(args.log_dir)
except OSError:
Exemplo n.º 6
0
def load_populations(
        dataset_name,
        base_path="../logs-2020-neurips/log_when_to_stop/_recorded",
        filter_func=None):
    result_info_dict = {}
    data_path = os.path.join(base_path, dataset_name)
    for i, exp_dir in enumerate(os.listdir(data_path)):

        if not filter_func(exp_dir):
            continue

        full_path = os.path.join(data_path, exp_dir)
        exp_tuple = exp_dir.split("+")[0].split("-")[:3]
        try:
            args_kwargs = {
                "model_name":
                exp_tuple[0],
                "dataset_class":
                "Planetoid" if exp_tuple[1] != "PPI" else "PPI",
                "dataset_name":
                exp_tuple[1],
                "custom_key":
                exp_tuple[2] + ("" if exp_tuple[1] != "PubMed" else "-500") +
                "-ES",
            }
            args = get_args(**args_kwargs)

            result_info = {"full_path": full_path, "args": args}
            for perf_file in os.listdir(full_path):
                if "val_loss" in perf_file:
                    result_info["val_loss"] = os.path.join(
                        full_path, perf_file)
                elif "val_perf" in perf_file:
                    result_info["val_perf"] = os.path.join(
                        full_path, perf_file)
                elif "test_perf" in perf_file:
                    result_info["test_perf"] = os.path.join(
                        full_path, perf_file)
            result_info_dict[exp_dir] = result_info
        except Exception as e:
            cprint(f"Exception in {full_path} and {exp_tuple}, {e}", "red")

    for exp_dir, v in result_info_dict.items():
        args = v["args"]
        val_loss_matrix = np.load(v["val_loss"], allow_pickle=True)
        val_perf_matrix = np.load(v["val_perf"], allow_pickle=True)
        test_perf_matrix = np.load(v["test_perf"], allow_pickle=True)

        if "PPI" in exp_dir:
            val_loss_matrix = val_loss_matrix[:30, :]
            val_perf_matrix = val_perf_matrix[:30, :]
            test_perf_matrix = test_perf_matrix[:30, :]

        test_perf_at_best_val_list = simulate_early_stop(
            val_loss_matrix, val_perf_matrix, test_perf_matrix,
            args.early_stop_patience, args.early_stop_queue_length,
            args.early_stop_threshold_loss, args.early_stop_threshold_perf,
            args.epochs)
        result_info_dict[exp_dir][
            "test_perf_at_best_val_list"] = test_perf_at_best_val_list

    return result_info_dict
Exemplo n.º 7
0
def main():
    # load hyper parameters
    args = get_args()
    num_updates = int(args.num_frames // args.num_steps)
    start = time.time()
    record = {'steps': [0], 'max': [0], 'mean': [0], 'min': [0], 'query': [0]}

    config = configparser.ConfigParser()
    config.read('config.ini', encoding='utf-8')
    data_index = config.getint('data', 'data_index')

    actions = [0, 1, 2, 3, 4, 5, 11, 12]
    key_map = {0: 0, 1: 1, 2: 2, 3: 3, 4: 4, 5: 5, 6: 11, 7: 12, 8: -1}

    # query_cnt counts queries to the expert
    query_cnt = data_index

    # environment initial
    envs = Env(args.env_name, args.num_stacks)
    # action_shape is the size of the discrete action set, here is 18
    # Most of the 18 actions are useless, find important actions
    # in the tips of the homework introduction document
    action_shape = envs.action_space.n
    # observation_shape is the shape of the observation
    # here is (210,160,3)=(height, weight, channels)
    observation_shape = envs.observation_space.shape
    print(action_shape, observation_shape)

    # agent initial
    # you should finish your agent with DaggerAgent
    # e.g. agent = MyDaggerAgent()
    agent = ExampleAgent()

    # You can play this game yourself for fun
    if args.play_game:
        obs = envs.reset()
        while True:
            im = Image.fromarray(obs)
            im.save('imgs/' + str('screen') + '.jpeg')
            action = int(input('input action'))
            while action < 0 or action >= action_shape:
                action = int(input('re-input action'))
            obs_next, reward, done, _ = envs.step(action)
            obs = obs_next
            if done:
                obs = envs.reset()

    data_set = {'data': [], 'label': []}
    # start train your agent
    for i in range(data_index):
        data_path = 'data/data_batch_' + str(i) + '/'
        for j in range(args.num_steps):
            pic_path = data_path + str(j) + '.jpeg'
            data_set['data'].append(cv2.imread(pic_path))
        with open(data_path + 'label.txt', 'r') as f:
            for label_tmp in f.readlines():
                data_set['label'].append(int(label_tmp))
    agent.update(data_set['data'], data_set['label'])
    with open('performance.txt') as f:
        record_temp = eval(f.readline())
        if record_temp is not None:
            record = record_temp

    for i in range(data_index, num_updates):
        # an example of interacting with the environment
        # we init the environment and receive the initial observation
        obs = envs.reset()
        # we get a trajectory with the length of args.num_steps
        for step in range(args.num_steps):
            # Sample actions
            epsilon = 0.05
            if np.random.rand() < epsilon:
                # we choose a random action
                action = envs.action_space.sample()
            else:
                # we choose a special action according to our model
                action = agent.select_action(obs)

            # interact with the environment
            # we input the action to the environments and it returns some information
            # obs_next: the next observation after we do the action
            # reward: (float) the reward achieved by the action
            # down: (boolean)  whether it’s time to reset the environment again.
            #           done being True indicates the episode has terminated.
            obs_next, reward, done, _ = envs.step(action)
            # we view the new observation as current observation
            obs = obs_next
            # if the episode has terminated, we need to reset the environment.
            if done:
                envs.reset()

            # an example of saving observations
            if args.save_img:
                im = Image.fromarray(obs)
                im.save('imgs/' + str(step) + '.jpeg')
            data_set['data'].append(obs)

        # You need to label the images in 'imgs/' by recording the right actions in label.txt
        with open('imgs/label.txt', 'w+') as f:
            img_set = data_set['data'][-args.num_steps:]
            for img in img_set:
                cv2.imshow('Current Frame', img)
                cmd_in = cv2.waitKey(0) - 48
                while cmd_in not in key_map.keys():
                    pass
                cmd_in = key_map.get(cmd_in)
                print(cmd_in)
                if cmd_in is -1:
                    f.write(str(actions[random.randint(0, 7)]) + '\n')
                else:
                    f.write(str(cmd_in) + '\n')

        if not os.path.exists('data/data_batch_' + str(data_index) + '/'):
            shutil.copytree('./imgs', 'data/data_batch_' + str(data_index))
        data_index += 1
        config.set('data', 'data_index', str(data_index))
        config.write(open('config.ini', 'w'))
        # After you have labeled all the images, you can load the labels
        # for training a model
        with open('imgs/label.txt', 'r') as f:
            for label_tmp in f.readlines():
                data_set['label'].append(int(label_tmp))

        # design how to train your model with labeled data
        agent.update(data_set['data'], data_set['label'])
        query_cnt += 1

        if (i + 1) % args.log_interval == 0:
            total_num_steps = (i + 1) * args.num_steps
            obs = envs.reset()
            reward_episode_set = []
            reward_episode = 0
            # evaluate your model by testing in the environment
            for step in range(args.test_steps):
                action = agent.select_action(obs)
                # you can render to get visual results
                # envs.render()
                obs_next, reward, done, _ = envs.step(action)
                reward_episode += reward
                obs = obs_next
                if done:
                    reward_episode_set.append(reward_episode)
                    reward_episode = 0
                    envs.reset()
            if len(reward_episode_set) == 0:
                reward_episode_set.append(0)
            end = time.time()
            print(
                "TIME {} Updates {}, num timesteps {}, FPS {} \n query {}, avrage/min/max reward {:.1f}/{:.1f}/{:.1f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start)), i,
                    total_num_steps, int(total_num_steps / (end - start)),
                    query_cnt, np.mean(reward_episode_set),
                    np.min(reward_episode_set), np.max(reward_episode_set)))
            record['steps'].append(total_num_steps)
            record['mean'].append(np.mean(reward_episode_set))
            record['max'].append(np.max(reward_episode_set))
            record['min'].append(np.min(reward_episode_set))
            record['query'].append(query_cnt)
            plot(record)
Exemplo n.º 8
0
import random
import pickle

#import visdom

from utils import *
from loader import *
from model import BiLSTM_CRF

from arguments import get_args
from processor import generate_batch_data, generate_batch_para, generate_batch_rep


t = time.time()

opts, parameters=get_args()

experiment=None

models_path = "models/"
use_gpu = parameters['use_gpu']

mapping_file = 'models/mapping.pkl'

name = parameters['name']

model_name = models_path + name #get_name(parameters)
tmp_model = model_name + '.tmp'

if not os.path.exists(models_path):
    os.makedirs(models_path)
Exemplo n.º 9
0
def initialize_agent_and_env(is_test=False):
    """
    Read and parse commandline arguments to the args variable.
    Initiate an agent and environment based on the arguments.

    :return: agent, env, args
    """
    args = get_args(is_test)

    # set schema
    cfg.schema = args.schema

    # set dataset nubmer
    cfg.dataset_number = args.dataset_number

    # set number of steps in session
    cfg.MAX_NUM_OF_STEPS = args.episode_length
    # hack to change the default value of the max_steps argument in the __init__ of ATENAEnvCont to cfg.MAX_NUM_OF_STEPS
    atena_init_default_args = list(ATENAEnvCont.__init__.__defaults__)
    atena_init_default_args[0] = cfg.MAX_NUM_OF_STEPS
    ATENAEnvCont.__init__.__defaults__ = tuple(atena_init_default_args)

    # set env settings
    cfg.stack_obs_num = args.stack_obs_num
    cfg.obs_with_step_num = args.obs_with_step_num
    cfg.no_back = args.no_back
    cfg.bins_sizes = args.bins_sizes
    #filter_terms_bins_sizes_helper(FilterTermsBinsSizes(cfg.bins_sizes))
    #paremetric_softmax_idx_action_maps_helper()

    # set reward types to use
    cfg.no_diversity = args.no_diversity
    cfg.no_interestingness = args.no_inter
    cfg.use_humans_reward = args.use_humans_reward
    cfg.humans_reward_interval = args.humans_reward_interval
    cfg.count_data_driven = args.count_data_driven

    # set number of hidden units for gaussian policy
    cfg.n_hidden_channels = args.n_hidden_channels

    # set architecture type
    cfg.arch = args.arch
    cfg.beta = args.beta

    # optimization settings
    cfg.max_nn_tokens = args.max_nn_tokens
    cfg.cache_dfs_size = args.cache_dfs_size
    cfg.cache_tokenization_size = args.cache_tokenization_size
    cfg.cache_distances_size = args.cache_distances_size

    # set reward coefficients
    cfg.humanity_coeff = args.humanity_coeff
    cfg.diversity_coeff = args.diversity_coeff
    cfg.kl_coeff = args.kl_coeff
    cfg.compaction_coeff = args.compaction_coeff

    args.outdir = chainerrl.experiments.prepare_output_dir(args, args.outdir)
    cfg.outdir = args.outdir

    # https://stackoverflow.com/questions/13479295/python-using-basicconfig-method-to-log-to-console-and-file
    # logging file path
    log_path = os.path.join(args.outdir, 'training_results.log')
    # set up logging to file
    logging.basicConfig(
        level=logging.INFO,
        format='[%(asctime)s] {%(pathname)s:%(lineno)d} %(levelname)s - %(message)s',
        filename=log_path,
        datefmt='%H:%M:%S'
    )

    # set up logging to console
    console = logging.StreamHandler()
    console.setLevel(args.logger_level)
    # add the handler to the root logger
    logging.getLogger('').addHandler(console)

    # set logging of the entire episode every LOG_INTERVAL steps
    cfg.log_interval = args.log_interval
    cfg.num_envs = args.num_envs
    ATENAEnvCont.LOG_INTERVAL = int(args.log_interval / args.num_envs)

    # TODO (baelo): delete it
    # Set filter term bins
    #filter_terms_bins_sizes_helper(FilterTermsBinsSizes(cfg.bins_sizes))
    #paremetric_softmax_idx_action_maps_helper()

    # Set random seed
    chainerrl.misc.set_random_seed(args.seed, gpus=(args.gpu,))

    # create environment
    env = make_env(args, args.env, args.seed, args.render, args.outdir)

    # choose algorithm
    args.algo = AlgoName(args.algo)
    if args.algo is AlgoName.CAPG_PPO:  # capg
        model = PPOModel(env,
                         args.gpu,
                         args.n_hidden_channels,
                         args.adam_lr,
                         args.ppo_update_interval,
                         args.outdir,
                         args.load,
                         args.use_clipped_gaussian)
    elif args.algo is AlgoName.CAPG_TRPO:  # capg
        model = TRPOModel(env,
                          args.gpu,
                          args.n_hidden_channels,
                          args.trpo_update_interval,
                          args.outdir,
                          args.load,
                          args.use_clipped_gaussian)
    elif args.algo == AlgoName.CHAINERRL_PPO:
        model = PPOchianerrl(args, env)
    else:
        raise NotImplementedError

    agent = model.agent

    return agent, env, args
Exemplo n.º 10
0
def load_ocnli_data(data_path, data_type, tokenizer):
    args = get_args()

    filename = os.path.join(data_path, data_type+'.json')
    objs = []
    with open(filename) as fin:
        for line in fin:
            objs.append(json.loads(line.strip()))

    pad_id = tokenizer.encoder['<pad>']
    args.eod_token = tokenizer.encoder['<eod>']

    all_tokens_1 = []
    all_masks_1 = []
    all_tokens_2 = []
    all_masks_2 = []    
    all_tokens_3 = []
    all_masks_3 = [] 
    all_labels = []
    for obj in objs:

        if obj['label'] == '-':
            continue

        prompt = "{}?对,".format(obj['sentence1'])
        prompt_tokens = tokenizer.encode(prompt)
        prompt_len = len(prompt_tokens)
        tokens = prompt_tokens + tokenizer.encode(obj['sentence2'])
        second_mask = [0] * (args.seq_length-1)
        for idx in range(prompt_len-1, len(tokens)-1):
            second_mask[idx] = 1
        all_masks_1.append(second_mask)
        token_length = len(tokens)
        assert token_length < args.seq_length
        tokens.extend([pad_id] * (args.seq_length - token_length))
        all_tokens_1.append(tokens)

        prompt = "{}?错,".format(obj['sentence1'])
        prompt_tokens = tokenizer.encode(prompt)
        prompt_len = len(prompt_tokens)
        tokens = prompt_tokens + tokenizer.encode(obj['sentence2'])
        second_mask = [0] * (args.seq_length-1)
        for idx in range(prompt_len-1, len(tokens)-1):
            second_mask[idx] = 1
        all_masks_2.append(second_mask)
        token_length = len(tokens)
        assert token_length < args.seq_length
        tokens.extend([pad_id] * (args.seq_length - token_length))
        all_tokens_2.append(tokens)

        prompt = "{}?也许,".format(obj['sentence1'])
        prompt_tokens = tokenizer.encode(prompt)
        prompt_len = len(prompt_tokens)
        tokens = prompt_tokens + tokenizer.encode(obj['sentence2'])
        second_mask = [0] * (args.seq_length-1)
        for idx in range(prompt_len-1, len(tokens)-1):
            second_mask[idx] = 1
        all_masks_3.append(second_mask)
        token_length = len(tokens)
        assert token_length < args.seq_length
        tokens.extend([pad_id] * (args.seq_length - token_length))
        all_tokens_3.append(tokens)

        if obj['label'] == 'entailment':
            all_labels.append([0])
        elif obj['label'] == 'contradiction':
            all_labels.append([1])
        else:
            all_labels.append([2])

    all_tokens_1 = torch.tensor(all_tokens_1, dtype=torch.long)
    all_masks_1 = torch.tensor(all_masks_1, dtype=torch.float)
    all_tokens_2 = torch.tensor(all_tokens_2, dtype=torch.long)
    all_masks_2 = torch.tensor(all_masks_2, dtype=torch.float)
    all_tokens_3 = torch.tensor(all_tokens_3, dtype=torch.long)
    all_masks_3 = torch.tensor(all_masks_3, dtype=torch.float)
    all_labels = torch.tensor(all_labels, dtype=torch.long)
    dataset = TensorDataset(all_tokens_1, all_masks_1, all_tokens_2, all_masks_2, all_tokens_3, all_masks_3, all_labels)

    # Data parallel arguments.
    world_size = mpu.get_data_parallel_world_size()
    rank = mpu.get_data_parallel_rank()
    global_batch_size = args.batch_size * world_size
    num_workers = args.num_workers

    # Use a random sampler with distributed batch sampler.
    if data_type == 'train':
        sampler = RandomSampler(dataset)
    else:
        sampler = torch.utils.data.SequentialSampler(dataset)
    batch_sampler = DistributedBatchSampler(sampler=sampler,
                                            batch_size=global_batch_size,
                                            drop_last=True,
                                            rank=rank,
                                            world_size=world_size)
    
    # Torch dataloader.
    return torch.utils.data.DataLoader(dataset,
                                       batch_sampler=batch_sampler,
                                       num_workers=num_workers,
                                       pin_memory=True)
Exemplo n.º 11
0
    # NOTE: Here's where you can set hyperparameters for PPO. I don't include them as part of
    # ArgumentParser because it's too annoying to type them every time at command line. Instead, you can change them here.
    # To see a list of hyperparameters, look in ppo.py at function _init_hyperparameters
    hyperparameters = {
        'timesteps_per_batch': 2048,
        'max_timesteps_per_episode': 200,
        'gamma': 0.99,
        'n_updates_per_iteration': 10,
        'lr': 3e-4,
        'clip': 0.2
    }

    # Creates the environment we'll be running. If you want to replace with your own
    # custom environment, note that it must inherit Gym and have both continuous
    # observation and action spaces.
    env = gym.make('Pendulum-v0')

    # Train or test, depending on the mode specified
    if args.mode == 'train':
        train(env=env,
              hyperparameters=hyperparameters,
              actor_model=args.actor_model,
              critic_model=args.critic_model)
    else:
        test(env=env, actor_model=args.actor_model)


if __name__ == '__main__':
    args = get_args()  # Parse arguments from command line
    main(args)
Exemplo n.º 12
0
def main():
    """Main training program."""

    print('Pretrain BERT model')

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False
    # Arguments.
    args = get_args()

    # Pytorch distributed.
    initialize_distributed(args)

    set_random_seed(args.seed)
    print(args)
    # Data stuff.
    data_config = configure_data()
    data_config.set_defaults(data_set_type='BERT', transpose=False)
    (train_data, val_data), tokenizer = data_config.apply(args)

    args.train_iters = len(train_data)
    evaluate.best_val_loss = float("inf")

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler, criterion = setup_model_and_optimizer(
        args, tokenizer)
    # evaluate(val_data, model, tokenizer, criterion, args)
    # At any point you can hit Ctrl + C to break out of training early.
    try:
        total_iters = 0
        skipped_iters = 0
        start_epoch = 1
        best_val_loss = float('inf')
        # Resume data loader if necessary.
        if args.resume_dataloader:
            start_epoch = args.epoch
            total_iters = args.total_iters
        # For all epochs.
        for epoch in range(start_epoch, args.epochs + 1):
            timers = Timers()
            # if args.shuffle:
            #     train_data.batch_sampler.sampler.set_epoch(epoch + args.seed)
            timers('epoch time').start()
            iteration, skipped = train_epoch(epoch, model, tokenizer,
                                             optimizer, train_data, val_data,
                                             lr_scheduler, criterion, timers,
                                             args)
            elapsed_time = timers('epoch time').elapsed()
            total_iters += iteration
            skipped_iters += skipped
            lm_loss, nsp_loss = evaluate(val_data, model, tokenizer, criterion,
                                         args)
            val_loss = lm_loss + nsp_loss
            print('-' * 100)
            print(
                '| end of epoch {:3d} | time: {:.3f}s | valid loss {:.3f} | '
                'valid LM Loss {:.3f} | valid LM PPL {:.3f} | valid NSP Loss {:.3f}'
                .format(epoch, elapsed_time, val_loss, lm_loss,
                        math.exp(lm_loss), nsp_loss))
            print('-' * 100)
            if val_loss < evaluate.best_val_loss:
                evaluate.best_val_loss = val_loss
                if args.save:
                    best_path = 'checkpoints-best.pt'
                    print('saving best model to:',
                          os.path.join(args.save, best_path))
                    save_checkpoint(best_path, epoch + 1, 0, model, optimizer,
                                    lr_scheduler, args)
    except KeyboardInterrupt:
        print('-' * 100)
        print('Exiting from training early')
        if args.save:
            cur_path = 'checkpoints-last.pt'
            print('saving current model to:',
                  os.path.join(args.save, cur_path))
            save_checkpoint(cur_path, epoch, args.cur_iteration, model,
                            optimizer, lr_scheduler, args)
        exit()
import torch
from torch.utils.tensorboard import SummaryWriter
import numpy as np
from preprocess import *  #Preprocess, ChampDataset
from model import *  #CNNModel, MUCNNModel, RNNModel, MURNNModel, OHModel, MUOHModel
from arguments import get_args
import train
from datetime import datetime
now = datetime.now()
import os
import pdb

preprocess = Preprocess()
arg = get_args()
'''
def oneHotEncodding(labels):
    onehot_encoded = list()
    for value in labels:
        target = [0 for _ in range(2)]
        target[value] = 1
        onehot_encoded.append(target)

    return onehot_encoded
'''


def main():

    allChamp, matchComp, blueWin = preprocess.lolDataSet(arg, "train")
    _, test_x, test_y = preprocess.lolDataSet(arg, "test")
Exemplo n.º 14
0
def visualize_attention_metric_for_multiple_models(
        name_prefix_and_kwargs: List[Tuple[str, Dict]],
        unit_width_per_name=3,
        extension="png"):
    res = None
    total_args, num_layers, custom_key_list, name_prefix_list = None, None, [], []
    kld1_list, kld2_list, jsd_list, ent_list = [], [], [], []  # [L * M, N]
    for name_prefix, kwargs in name_prefix_and_kwargs:
        args = get_args(**kwargs)
        custom_key_list.append(args.custom_key)
        num_layers = args.num_layers

        train_d, val_d, test_d = get_dataset_or_loader(
            args.dataset_class,
            args.dataset_name,
            args.data_root,
            batch_size=args.batch_size,
            seed=args.seed,
        )
        if val_d is None and test_d is None:
            data_list = [train_d[0]]
        else:
            data_list = []
            for _data in chain(train_d, val_d, test_d):
                if _data.x.size(0) != len(_data.agreement_dist):
                    _data.agreement_dist = [
                        _ad for _ad in _data.agreement_dist[0]
                    ]
                    _data.uniform_att_dist = [
                        _uad for _uad in _data.uniform_att_dist[0]
                    ]
                data_list.append(_data)

        gpu_id = [
            int(
                np.random.choice([
                    g for g in range(args.num_gpus_total)
                    if g not in args.gpu_deny_list
                ], 1))
        ][0]

        if args.verbose >= 1:
            pprint_args(args)
            cprint("Use GPU the ID of which is {}".format(gpu_id), "yellow")

        device = "cpu" if gpu_id is None \
            else torch.device('cuda:{}'.format(gpu_id) if torch.cuda.is_available() else 'cpu')

        model, ret = run(args, gpu_id=gpu_id, return_model=True)

        kld1_layer, kld2_layer, jsd_layer, ent_layer, *res = \
            get_attention_metric_for_single_model_and_multiple_data(model, data_list, device)
        kld1_list += kld1_layer
        kld2_list += kld2_layer
        jsd_list += jsd_layer
        ent_list += ent_layer
        name_prefix_list.append(name_prefix)
        total_args = args

        torch.cuda.empty_cache()

    total_args.custom_key = "-".join(sorted(custom_key_list))
    plot_kld_jsd_ent(kld1_list,
                     kld2_list,
                     jsd_list,
                     ent_list,
                     *res,
                     num_layers=num_layers,
                     model_args=total_args,
                     epoch=-1,
                     name_prefix_list=name_prefix_list,
                     unit_width_per_name=unit_width_per_name,
                     extension=extension,
                     flierprops={
                         "marker": "x",
                         "markersize": 12
                     })
Exemplo n.º 15
0
def eval_networks():
    # get args
    args = get_args()
    seed = set_seed(args.seed, args.use_cuda)

    _, testset, nr_channels, mlp_input_neurons, classes = get_dataset(args)

    testloader = torch.utils.data.DataLoader(testset,
                                             batch_size=5,
                                             shuffle=False,
                                             num_workers=1)

    # get student and teacher models
    student_model_class = get_model_class(args.student_model)
    teacher_model_class = get_model_class(args.teacher_model)
    if "MLP" in args.student_model:
        stud_model_simple = student_model_class(mlp_input_neurons, 10,
                                                args.dropout)
        stud_model_teacher = student_model_class(mlp_input_neurons, 10,
                                                 args.dropout)
        teacher_model = teacher_model_class(mlp_input_neurons, 10,
                                            args.dropout)
    else:
        stud_model_simple = student_model_class(nr_channels, 10, args.dropout)
        stud_model_teacher = student_model_class(nr_channels, 10, args.dropout)
        teacher_model = teacher_model_class(nr_channels, 10, args.dropout)

    if torch.cuda.is_available() and args.use_cuda:
        device = torch.device("cuda:0")
    else:
        device = torch.device("cpu")

    with open(
            args.dataset + "_teacher_network_" + args.teacher_model + "_" +
            str(seed), "rb") as f:
        teacher_model.load_state_dict(torch.load(f))

    with open(
            args.dataset + "_student_network_simple" + args.student_model +
            str(seed) + "_10", "rb") as f:
        stud_model_simple.load_state_dict(torch.load(f))

    with open(
            args.dataset + "_student_network_teacher" + args.student_model +
            str(seed) + "_10", "rb") as f:
        stud_model_teacher.load_state_dict(torch.load(f))

    stud_model_simple.to(device)
    stud_model_teacher.to(device)
    teacher_model.to(device)

    stud_model_simple.eval()
    stud_model_teacher.eval()
    teacher_model.eval()

    print("Eval teacher model")
    show_results(testloader, teacher_model, classes, use_cuda=True)

    print("Eval student model simple")
    #show_results(testloader, stud_model_simple, classes, use_cuda=True)

    print("Eval student model twacher")
Exemplo n.º 16
0
def analyze_rpg_by_degree_and_homophily(degree_list: List[float],
                                        homophily_list: List[float],
                                        legend_list: List[str],
                                        model_list: List[str],
                                        custom_key_list: List[str],
                                        att_lambda_list: List[float],
                                        l2_lambda_list: List[float],
                                        num_total_runs: int,
                                        num_nodes_per_class: int = 500,
                                        num_classes: int = 10,
                                        verbose=2,
                                        is_test=False,
                                        plot_part_by_part=False,
                                        draw_plot=True,
                                        draw_diff_between_first=False,
                                        extension="pdf"):
    def to_log10(v, eps=1e-5):
        return float(np.log10(v + eps))

    base_key = "analysis_rpg" + ("" if not is_test else "_test")
    base_path = os.path.join("../figs", base_key)

    best_meta_dict = defaultdict(dict)

    deg_and_legend_to_mean_over_hp_list, deg_and_legend_to_std_over_hp_list = {}, {}

    for deg in degree_list:

        avg_deg_ratio = deg / num_nodes_per_class

        for legend, model, key in zip(legend_list, model_list,
                                      custom_key_list):

            base_kwargs = {
                "model_name": model,
                "dataset_class": "RandomPartitionGraph",
                "dataset_name": f"rpg-{num_classes}-{num_nodes_per_class}-h-d",
                "custom_key": key,
            }
            args = get_args(**base_kwargs)
            args.verbose = verbose
            deg_and_legend = (deg, legend)

            if is_test:
                args.epochs = 2

            mean_over_hp_list, std_over_hp_list = [], []
            for hp in homophily_list:

                args.dataset_name = f"rpg-{num_classes}-{num_nodes_per_class}-{hp}-{avg_deg_ratio}"
                model_key, model_path = _get_key_and_makedirs(
                    args=args, base_path=base_path, args_prefix=legend)

                max_mean_perf = -1

                for att_lambda in att_lambda_list:
                    for l2_lambda in l2_lambda_list:
                        args.att_lambda = att_lambda
                        args.l2_lambda = l2_lambda
                        pprint_args(args)

                        result_key = (att_lambda, l2_lambda)
                        result_path = os.path.join(
                            model_path,
                            "ms_result_{}.pkl".format(s_join("-", result_key)))

                        try:
                            many_seeds_result = pickle.load(
                                open(result_path, "rb"))
                            cprint("Load: {}".format(result_path), "blue")

                        except FileNotFoundError:
                            many_seeds_result = run_with_many_seeds_with_gpu(
                                args, num_total_runs)
                            with open(result_path, "wb") as f:
                                pickle.dump(many_seeds_result, f)
                                cprint("Dump: {}".format(result_path), "green")
                                garbage_collection_cuda()
                                cprint("Garbage collected", "green")

                        cur_mean_perf = float(
                            np.mean(
                                many_seeds_result["test_perf_at_best_val"]))
                        cur_std_perf = float(
                            np.std(many_seeds_result["test_perf_at_best_val"]))
                        if cur_mean_perf > max_mean_perf:
                            max_mean_perf = cur_mean_perf
                            best_meta_dict[model_key][
                                "mean_perf"] = cur_mean_perf
                            best_meta_dict[model_key][
                                "std_perf"] = cur_std_perf
                            best_meta_dict[model_key][
                                "att_lambda"] = att_lambda
                            best_meta_dict[model_key]["l2_lambda"] = l2_lambda
                            best_meta_dict[model_key][
                                "many_seeds_result"] = many_seeds_result

                    if not args.is_super_gat:
                        break

                mean_over_hp_list.append(
                    best_meta_dict[model_key]["mean_perf"])
                std_over_hp_list.append(best_meta_dict[model_key]["std_perf"])

            deg_and_legend_to_mean_over_hp_list[
                deg_and_legend] = mean_over_hp_list
            deg_and_legend_to_std_over_hp_list[
                deg_and_legend] = std_over_hp_list

    pprint(deg_and_legend_to_mean_over_hp_list)

    if not draw_plot:
        return

    plot_line_with_std(
        tuple_to_mean_list=
        deg_and_legend_to_mean_over_hp_list,  # (deg, legend) -> List[perf] by homophily
        tuple_to_std_list=deg_and_legend_to_std_over_hp_list,
        x_label="Homophily",
        y_label="Test Accuracy",
        name_label_list=["Avg. Degree", "Model"],
        x_list=homophily_list,
        hue="Model",
        style="Model",
        col="Avg. Degree",
        hue_order=legend_list,
        x_lim=(0, None),
        custom_key=base_key,
        extension=extension,
    )

    hp_and_legend_to_mean_over_deg_list, hp_and_legend_to_std_over_deg_list = defaultdict(
        list), defaultdict(list)
    legend_to_mean_std_num_agreed_neighbors_list = defaultdict(list)

    for deg, legend in deg_and_legend_to_mean_over_hp_list.keys():
        mean_over_hp_list = deg_and_legend_to_mean_over_hp_list[(deg, legend)]
        std_over_hp_list = deg_and_legend_to_std_over_hp_list[(deg, legend)]
        for hp, mean_of_hp, std_of_hp in zip(homophily_list, mean_over_hp_list,
                                             std_over_hp_list):
            hp_and_legend = (hp, legend)
            hp_and_legend_to_mean_over_deg_list[hp_and_legend].append(
                mean_of_hp)
            hp_and_legend_to_std_over_deg_list[hp_and_legend].append(std_of_hp)

            legend_to_mean_std_num_agreed_neighbors_list[legend].append(
                (mean_of_hp, std_of_hp, hp * deg))

    mean_perf_list = []
    num_agreed_neighbors_list = []
    model_legend_list = []
    for legend, mean_std_num_agr_neighbors_list in legend_to_mean_std_num_agreed_neighbors_list.items(
    ):
        for mean_perf, std_perf, num_agr_neighbors in sorted(
                mean_std_num_agr_neighbors_list, key=lambda t: t[2]):
            mean_perf_list.append(mean_perf)
            model_legend_list.append(legend)
            num_agreed_neighbors_list.append(num_agr_neighbors)

    plot_scatter(
        xs=num_agreed_neighbors_list,
        ys=mean_perf_list,
        hues=model_legend_list,
        xlabel="Avg. Number of Agreed Neighbors",
        ylabel="Test Performance (Acc.)",
        hue_name="Model",
        custom_key=base_key,
    )

    plot_line_with_std(
        tuple_to_mean_list=hp_and_legend_to_mean_over_deg_list,
        tuple_to_std_list=hp_and_legend_to_std_over_deg_list,
        x_label="Avg. Degree (Log10)",  # Log
        y_label="Test Accuracy",
        name_label_list=["Homophily", "Model"],
        x_list=[to_log10(d) for d in degree_list],  # Log
        hue="Model",
        style="Model",
        col="Homophily",
        aspect=0.75,
        hue_order=legend_list,
        x_lim=(None, None),
        custom_key=base_key,
        extension=extension,
    )

    if plot_part_by_part:  # manual.

        # deg: [2.5, 5.0, 10.0, 25.0, 50.0, 75.0, 100.0]
        def filtered_by_hp(hp_list, num_deg=None):
            return ({
                (hp, legend):
                (mean_list if not num_deg else mean_list[:num_deg])
                for (hp, legend), mean_list in
                hp_and_legend_to_mean_over_deg_list.items() if hp in hp_list
            }, {(hp, legend): (std_list if not num_deg else std_list[:num_deg])
                for (hp, legend
                     ), std_list in hp_and_legend_to_std_over_deg_list.items()
                if hp in hp_list})

        def get_mean_diff(h_and_l_to_m_over_d_list, first_legend, x100=True):
            h_and_l_to_mean_diff_over_d_list = dict()
            for (hp, legend), mean_list in h_and_l_to_m_over_d_list.items():
                if legend == first_legend:
                    continue
                mean_list_of_first = h_and_l_to_m_over_d_list[(hp,
                                                               first_legend)]
                mean_diff_list = (np.asarray(mean_list) -
                                  np.asarray(mean_list_of_first))
                if x100:
                    mean_diff_list = 100 * mean_diff_list
                mean_diff_list = mean_diff_list.tolist()
                h_and_l_to_mean_diff_over_d_list[(hp, legend)] = mean_diff_list
            return h_and_l_to_mean_diff_over_d_list

        if 0.1 in degree_list:
            b1, b2, b3, b4 = [0.1, 0.3, 0.5], [0.7], [0.9], [0.7, 0.9]
        else:
            b1, b2, b3, b4 = [0.2, 0.4], [0.6], [0.8], [0.6, 0.8]

        hp135_and_legend_to_mean_over_deg_list, hp135_and_legend_to_std_over_deg_list = filtered_by_hp(
            b1)
        hp7_and_legend_to_mean_over_deg_list, hp7_and_legend_to_std_over_deg_list = filtered_by_hp(
            b2)
        hp9_and_legend_to_mean_over_deg_list, hp9_and_legend_to_std_over_deg_list = filtered_by_hp(
            b3)
        hp79_and_legend_to_mean_over_deg_list, hp79_and_legend_to_std_over_deg_list = filtered_by_hp(
            b4)

        if draw_diff_between_first:
            lf = legend_list[0]
            hp135_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp135_and_legend_to_mean_over_deg_list, lf)
            hp7_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp7_and_legend_to_mean_over_deg_list, lf)
            hp79_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp79_and_legend_to_mean_over_deg_list, lf)
            hp9_and_legend_to_mean_over_deg_list = get_mean_diff(
                hp9_and_legend_to_mean_over_deg_list, lf)
            hp135_and_legend_to_std_over_deg_list = None
            hp7_and_legend_to_std_over_deg_list = None
            hp79_and_legend_to_std_over_deg_list = None
            hp9_and_legend_to_std_over_deg_list = None
            legend_list = legend_list[1:]
            y_lim = None
            y_label = "Diff. of Test Acc. vs. GO (%p)"
        else:
            y_lim = None
            y_label = "Test Accuracy",

        degree_list = np.log10(degree_list).tolist()

        palette = ["grey", "#1976D2", "#D32F2F"]

        plot_line_with_std(
            tuple_to_mean_list=hp135_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp135_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label=y_label,
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=0.9,
            hue_order=legend_list,
            legend=False,
            x_lim=(0, None),
            y_lim=y_lim,
            palette=palette,
            custom_key=base_key + "_part135",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp79_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp79_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=0.9,
            hue_order=legend_list,
            legend="full",
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part79",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp7_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp7_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=1.0,
            hue_order=legend_list,
            legend=False,
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part7",
            extension=extension,
        )
        plot_line_with_std(
            tuple_to_mean_list=hp9_and_legend_to_mean_over_deg_list,
            tuple_to_std_list=hp9_and_legend_to_std_over_deg_list,
            x_label="Avg. Degree (Log10)",
            y_label="Test Accuracy",
            name_label_list=["Homophily", "Model"],
            x_list=degree_list,
            hue="Model",
            style="Model",
            col="Homophily",
            aspect=1.0,
            hue_order=legend_list,
            legend="full",
            x_lim=(0, None),
            y_lim=y_lim,
            use_ylabel=False,
            palette=palette,
            custom_key=base_key + "_part9",
            extension=extension,
        )
Exemplo n.º 17
0
def main():
    args = get_args()
    device = torch.device('cuda' if args.cuda else 'cpu')

    env = gym.make(args.env_name)

    input_size = env.observation_space.shape  # 4
    output_size = env.action_space.n  # 2

    if 'Breakout' in args.env_name:
        output_size -= 1

    env.close()

    is_render = False
    if not os.path.exists(args.save_dir):
	    os.makedirs(args.save_dir)
    model_path = os.path.join(args.save_dir, args.env_name + '.model')
    predictor_path = os.path.join(args.save_dir, args.env_name + '.pred')
    target_path = os.path.join(args.save_dir, args.env_name + '.target')    

    writer = SummaryWriter(log_dir=args.log_dir)

    reward_rms = RunningMeanStd()
    obs_rms = RunningMeanStd(shape=(1, 1, 84, 84))
    discounted_reward = RewardForwardFilter(args.ext_gamma)

    model = CnnActorCriticNetwork(input_size, output_size, args.use_noisy_net)
    rnd = RNDModel(input_size, output_size)
    model = model.to(device)
    rnd = rnd.to(device)
    optimizer = optim.Adam(list(model.parameters()) + list(rnd.predictor.parameters()), lr=args.lr)
   
    if args.load_model:
        if args.cuda:
            model.load_state_dict(torch.load(model_path))
        else:
            model.load_state_dict(torch.load(model_path, map_location='cpu'))

    works = []
    parent_conns = []
    child_conns = []
    for idx in range(args.num_worker):
        parent_conn, child_conn = Pipe()
        work = AtariEnvironment(
        	args.env_name,
            is_render, 
        	idx, 
        	child_conn, 
        	sticky_action=args.sticky_action, 
        	p=args.sticky_action_prob,
        	max_episode_steps=args.max_episode_steps)
        work.start()
        works.append(work)
        parent_conns.append(parent_conn)
        child_conns.append(child_conn)

    states = np.zeros([args.num_worker, 4, 84, 84])

    sample_env_index = 0   # Sample Environment index to log
    sample_episode = 0
    sample_rall = 0
    sample_step = 0
    sample_i_rall = 0
    global_update = 0
    global_step = 0

    # normalize observation
    print('Initializes observation normalization...')
    next_obs = []
    for step in range(args.num_step * args.pre_obs_norm_steps):
        actions = np.random.randint(0, output_size, size=(args.num_worker,))

        for parent_conn, action in zip(parent_conns, actions):
            parent_conn.send(action)

        for parent_conn in parent_conns:
            next_state, reward, done, realdone, log_reward = parent_conn.recv()
            next_obs.append(next_state[3, :, :].reshape([1, 84, 84]))

        if len(next_obs) % (args.num_step * args.num_worker) == 0:
            next_obs = np.stack(next_obs)
            obs_rms.update(next_obs)
            next_obs = []

    print('Training...')
    while True:
        total_state, total_reward, total_done, total_next_state, total_action, total_int_reward, total_next_obs, total_ext_values, total_int_values, total_action_probs = [], [], [], [], [], [], [], [], [], []
        global_step += (args.num_worker * args.num_step)
        global_update += 1

        # Step 1. n-step rollout
        for _ in range(args.num_step):
            actions, value_ext, value_int, action_probs = get_action(model, device, np.float32(states) / 255.)

            for parent_conn, action in zip(parent_conns, actions):
                parent_conn.send(action)

            next_states, rewards, dones, real_dones, log_rewards, next_obs = [], [], [], [], [], []
            for parent_conn in parent_conns:
                next_state, reward, done, real_done, log_reward = parent_conn.recv()
                next_states.append(next_state)
                rewards.append(reward)
                dones.append(done)
                real_dones.append(real_done)
                log_rewards.append(log_reward)
                next_obs.append(next_state[3, :, :].reshape([1, 84, 84]))

            next_states = np.stack(next_states)
            rewards = np.hstack(rewards)
            dones = np.hstack(dones)
            real_dones = np.hstack(real_dones)
            next_obs = np.stack(next_obs)

            # total reward = int reward + ext Reward
            intrinsic_reward = compute_intrinsic_reward(rnd, device, 
                ((next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5))
            intrinsic_reward = np.hstack(intrinsic_reward)
            sample_i_rall += intrinsic_reward[sample_env_index]

            total_next_obs.append(next_obs)
            total_int_reward.append(intrinsic_reward)
            total_state.append(states)
            total_reward.append(rewards)
            total_done.append(dones)
            total_action.append(actions)
            total_ext_values.append(value_ext)
            total_int_values.append(value_int)
            total_action_probs.append(action_probs)

            states = next_states[:, :, :, :]

            sample_rall += log_rewards[sample_env_index]

            sample_step += 1
            if real_dones[sample_env_index]:
                sample_episode += 1
                writer.add_scalar('data/reward_per_epi', sample_rall, sample_episode)
                writer.add_scalar('data/reward_per_rollout', sample_rall, global_update)
                writer.add_scalar('data/step', sample_step, sample_episode)
                sample_rall = 0
                sample_step = 0
                sample_i_rall = 0

        # calculate last next value
        _, value_ext, value_int, _ = get_action(model, device, np.float32(states) / 255.)
        total_ext_values.append(value_ext)
        total_int_values.append(value_int)
        # --------------------------------------------------

        total_state = np.stack(total_state).transpose([1, 0, 2, 3, 4]).reshape([-1, 4, 84, 84])
        total_reward = np.stack(total_reward).transpose().clip(-1, 1)
        total_action = np.stack(total_action).transpose().reshape([-1])
        total_done = np.stack(total_done).transpose()
        total_next_obs = np.stack(total_next_obs).transpose([1, 0, 2, 3, 4]).reshape([-1, 1, 84, 84])
        total_ext_values = np.stack(total_ext_values).transpose()
        total_int_values = np.stack(total_int_values).transpose()
        total_logging_action_probs = np.vstack(total_action_probs)

        # Step 2. calculate intrinsic reward
        # running mean intrinsic reward
        total_int_reward = np.stack(total_int_reward).transpose()
        total_reward_per_env = np.array([discounted_reward.update(reward_per_step) for reward_per_step in total_int_reward.T])
        mean, std, count = np.mean(total_reward_per_env), np.std(total_reward_per_env), len(total_reward_per_env)
        reward_rms.update_from_moments(mean, std ** 2, count)

        # normalize intrinsic reward
        total_int_reward /= np.sqrt(reward_rms.var)
        writer.add_scalar('data/int_reward_per_epi', np.sum(total_int_reward) / args.num_worker, sample_episode)
        writer.add_scalar('data/int_reward_per_rollout', np.sum(total_int_reward) / args.num_worker, global_update)
        # -------------------------------------------------------------------------------------------

        # logging Max action probability
        writer.add_scalar('data/max_prob', total_logging_action_probs.max(1).mean(), sample_episode)

        # Step 3. make target and advantage
        # extrinsic reward calculate
        ext_target, ext_adv = make_train_data(total_reward,
                                              total_done,
                                              total_ext_values,
                                              args.ext_gamma,
                                              args.gae_lambda,
                                              args.num_step,
                                              args.num_worker,
                                              args.use_gae)

        # intrinsic reward calculate
        # None Episodic
        int_target, int_adv = make_train_data(total_int_reward,
                                              np.zeros_like(total_int_reward),
                                              total_int_values,
                                              args.int_gamma,
                                              args.gae_lambda,
                                              args.num_step,
                                              args.num_worker,
                                              args.use_gae)

        # add ext adv and int adv
        total_adv = int_adv * args.int_coef + ext_adv * args.ext_coef
        # -----------------------------------------------

        # Step 4. update obs normalize param
        obs_rms.update(total_next_obs)
        # -----------------------------------------------

        # Step 5. Training!
        train_model(args, device, output_size, model, rnd, optimizer, 
                        np.float32(total_state) / 255., ext_target, int_target, total_action,
                        total_adv, ((total_next_obs - obs_rms.mean) / np.sqrt(obs_rms.var)).clip(-5, 5),
                        total_action_probs)

        if global_step % (args.num_worker * args.num_step * args.save_interval) == 0:
            print('Now Global Step :{}'.format(global_step))
            torch.save(model.state_dict(), model_path)
            torch.save(rnd.predictor.state_dict(), predictor_path)
            torch.save(rnd.target.state_dict(), target_path)
Exemplo n.º 18
0
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()

    # Pytorch distributed.
    initialize_distributed(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # get the tokenizer
    tokenizer = GPT2Tokenizer(
        os.path.join(args.tokenizer_path, 'vocab.json'),
        os.path.join(args.tokenizer_path, 'chinese_vocab.model'))

    # load train data
    if args.do_train:
        train_dataloader, _ = load_data(args, 'train', tokenizer, 1)
        dev_dataloader, dev_dataset = load_data(args, 'dev', tokenizer, 1)

        with open(args.deepspeed_config, "r") as f:
            deepspeed_conf = json.load(f)

        epoch = args.epoch
        grad_acc = deepspeed_conf["gradient_accumulation_steps"]
        args.train_iters = len(train_dataloader) * epoch / grad_acc

        # Model, optimizer, and learning rate.
        # TODO: maybe need to reinitialize optimizer
    elif args.do_eval:
        # Set an arbitrary positive integer since the optimizer and the scheduler will not be used when do eval.
        args.train_iters = 1

    model, optimizer, lr_scheduler = setup_model_and_optimizer(args)
    device = torch.cuda.current_device()

    # give a time stemp to the model
    cur_time = time.strftime("%Y-%m-%d-%H:%M:%S", time.localtime())
    results_dir = os.path.join(args.results_dir,
                               "{}-{}".format(args.model_name, cur_time))
    os.makedirs(results_dir, exist_ok=True)

    if args.do_train and torch.distributed.get_rank() == 0:

        with open(os.path.join(results_dir, "train_log.txt"), "w") as f:
            f.write("Train losses:\n")

        with open(os.path.join(results_dir, "dev_log.txt"), "w") as f:
            f.write("Dev accs:\n")

    torch.distributed.barrier()

    if args.do_train:
        cand_ids = torch.tensor(dev_dataset.cand_ids).to(device)
        total_loss, logging_loss, best_acc = 0.0, 0.0, 0.0
        global_step, total_step, best_step = 0, 0, 0

        for e in range(epoch):
            model.train()
            for batch, no_model_batch in tqdm(
                    train_dataloader,
                    disable=(torch.distributed.get_rank() != 0)):
                for k in batch:
                    batch[k] = batch[k].to(device)
                for k in no_model_batch:
                    no_model_batch[k] = no_model_batch[k].to(device)

                output = model(**batch)
                # get the loss of the last token
                output = torch.sum(
                    output * no_model_batch["loss_mask"].unsqueeze(-1),
                    1) / torch.sum(no_model_batch["loss_mask"],
                                   -1).unsqueeze(-1)
                # get the label of the last token
                labels = no_model_batch["labels"].float()
                labels = (torch.sum(labels * no_model_batch["loss_mask"], 1) /
                          torch.sum(no_model_batch["loss_mask"], -1)).long()
                # cross_entropy loss
                losses = mpu.vocab_parallel_cross_entropy(
                    output.unsqueeze(1).contiguous().float(),
                    labels.unsqueeze(1))
                loss = torch.mean(losses)

                model.backward(loss)
                model.step()

                torch.distributed.all_reduce(
                    loss.data, group=mpu.get_data_parallel_group())
                loss.data = loss.data / mpu.get_data_parallel_world_size()
                total_loss += loss.item() / grad_acc

                if total_step % grad_acc == 0:
                    global_step += 1
                    if global_step != 0 and global_step % args.log_interval == 0:
                        # logging
                        if torch.distributed.get_rank() == 0:
                            train_log = "Epoch {}, global step {}, total step {}, train lm loss: {}".format(
                                e, global_step, epoch * len(train_dataloader),
                                (total_loss - logging_loss) /
                                args.log_interval)
                            yprint(train_log)
                            with open(
                                    os.path.join(results_dir, "train_log.txt"),
                                    "a") as f:
                                f.write(train_log + "\n")

                        logging_loss = total_loss

                    if global_step != 0 and global_step % args.eval_interval == 0:
                        # evaluate on the dev
                        acc, _, _ = evaluate(args,
                                             model,
                                             dev_dataloader,
                                             cand_ids,
                                             device,
                                             mode="dev")
                        dev_results_dir = os.path.join(
                            results_dir, "dev_step-{}".format(global_step))

                        if acc > best_acc:
                            best_acc = acc
                            best_step = global_step

                        if torch.distributed.get_rank() == 0:
                            # we will only write the log file once
                            dev_log = "Epoch: {}, Global step: {}, Acc: {}".format(
                                e, global_step, acc)
                            yprint(dev_log)
                            os.makedirs(dev_results_dir, exist_ok=True)
                            with open(
                                    os.path.join(dev_results_dir,
                                                 "dev_result.txt"), "w") as f:
                                f.write(dev_log + "\n")
                            with open(os.path.join(results_dir, "dev_log.txt"),
                                      "a") as f:
                                f.write(dev_log + "\n")

                        torch.distributed.barrier()

                        args.save = dev_results_dir
                        save_checkpoint(global_step, model, optimizer,
                                        lr_scheduler, args)

                total_step += 1

        with open(os.path.join(dev_results_dir, "dev_log.txt"), "a") as f:
            f.write("Best acc: {} Best step: {}\n".format(best_acc, best_step))

    if args.do_eval:
        # evaluate on the test
        test_dataloader, test_dataset = load_data(args, 'test', tokenizer, 1)
        cand_ids = torch.tensor(test_dataset.cand_ids).to(device)

        if args.do_train:
            # if do training, then evaluate the one with the max acc on dev set.
            eval_ckpt_path = os.path.join(results_dir,
                                          "dev_step-{}".format(best_step))
            args.load = eval_ckpt_path
        else:
            # if only do eval, then evaluate the one specified by the user.
            args.load = args.eval_ckpt_path

        load_checkpoint(model=model,
                        optimizer=None,
                        lr_scheduler=None,
                        args=args)
        acc, _, _ = evaluate(args,
                             model,
                             test_dataloader,
                             cand_ids,
                             device,
                             mode="test")

        if torch.distributed.get_rank() == 0:
            eval_log = "Checkpoint from {}: Acc: {}".format(args.load, acc)
            yprint(eval_log)
            with open(os.path.join(results_dir, "eval_log"), "w") as f:
                f.write(eval_log + "\n")

        torch.distributed.barrier()
Exemplo n.º 19
0
        if opts.expert_trajectories:
            save_state['T_sup'] = agent.T_sup
        torch.save(save_state, os.path.join(opts.save_path, 'model_latest.net'))

        print('Epoch %d : Train loss: %9.6f    Val loss: %9.6f'%(epoch+1, train_err, val_err))

        # Reduce supervision gradually
        if opts.expert_trajectories and opts.hybrid_train:
            if (epoch+1) % opts.hybrid_schedule == 0 and agent.T_sup > 0:
                agent.T_sup -= 1
            # Save the model after the first schedule is over
            if epoch+1 == opts.hybrid_schedule:
                torch.save(save_state, os.path.join(opts.save_path, 'model_after_one_schedule.net'))

        # Decay expert reward gradually
        if opts.expert_rewards and (epoch+1) % opts.expert_rewards_decay == 0:
            agent.reward_scale_expert /= opts.expert_rewards_decay_factor

        # Display three randomly selected batches of panoramas every 10 epochs
        if (epoch+1) % 10 == 0 or epoch == 0:
            for choice in rng_choices:
                for pano_count in range(decoded_images[choice].size(0)):
                    x = vutils.make_grid(decoded_images[choice][pano_count], padding=5, normalize=True, scale_each=True, nrow=opts.T//2+1) 
                    writer.add_image('Validation batch # : %d  image # : %d'%(choice, pano_count), x, 0) # Converting this to 0 to save disk space, should be epoch ideally

if __name__ == '__main__': 
    opts = get_args()
    assert not(opts.expert_rewards and opts.expert_trajectories), "Cannot use both sidekicks at once!"

    train(opts)
Exemplo n.º 20
0
import numpy as np

from autooed.problem import build_problem
from autooed.mobo import build_algorithm
from autooed.utils.seed import set_seed
from autooed.utils.initialization import generate_random_initial_samples
from autooed.utils.plot import plot_performance_space, plot_performance_metric

from arguments import get_args


if __name__ == '__main__':

    # load arguments
    args, module_cfg = get_args()

    # set random seed
    set_seed(args.seed)

    # build problem
    problem = build_problem(args.problem)
    print(problem)

    # build algorithm
    algorithm = build_algorithm(args.algo, problem, module_cfg)
    print(algorithm)

    # generate initial random samples
    X = generate_random_initial_samples(problem, args.n_init_sample)
    Y = np.array([problem.evaluate_objective(x) for x in X])
Exemplo n.º 21
0
def main():
    import copy
    import glob
    import os
    import time
    import matplotlib.pyplot as plt

    import gym
    import numpy as np
    import torch
    torch.multiprocessing.set_start_method('spawn')

    import torch.nn as nn
    import torch.nn.functional as F
    import torch.optim as optim
    from gym.spaces import Discrete

    from arguments import get_args
    from baselines.common.vec_env.dummy_vec_env import DummyVecEnv
    from baselines.common.vec_env.subproc_vec_env import SubprocVecEnv
    from baselines.common.vec_env.vec_normalize import VecNormalize
    from envs import make_env
    from img_env import ImgEnv, IMG_ENVS
    from model import Policy
    from storage import RolloutStorage
    from utils import update_current_obs, eval_episode
    from torchvision import transforms
    from visdom import Visdom

    import algo

    viz = Visdom(port=8097)

    print("#######")
    print(
        "WARNING: All rewards are clipped or normalized so you need to use a monitor (see envs.py) or visdom plot to get true rewards"
    )
    print("#######")

    plot_rewards = []
    plot_policy_loss = []
    plot_value_loss = []
    # x = np.array([0])
    # y = np.array([0])
    # counter = 0
    # win = viz.line(
    #     X=x,
    #     Y=y,
    #     win="test1",
    #     name='Line1',
    #     opts=dict(
    #         title='Reward',
    #     )
    #     )
    # win2 = viz.line(
    #     X=x,
    #     Y=y,
    #     win="test2",
    #     name='Line2',
    #     opts=dict(
    #         title='Policy Loss',
    #     )
    #     )
    # win3 = viz.line(
    #     X=x,
    #     Y=y,
    #     win="test3",
    #     name='Line3',
    #     opts=dict(
    #         title='Value Loss',
    #     )
    #     )

    args = get_args()
    if args.no_cuda:
        args.cuda = False
    print(args)
    assert args.algo in ['a2c', 'ppo', 'acktr']
    if args.recurrent_policy:
        assert args.algo in ['a2c', 'ppo'], \
            'Recurrent policy is not implemented for ACKTR'

    num_updates = int(args.num_frames) // args.num_steps // args.num_processes

    torch.manual_seed(args.seed)
    if args.cuda:
        torch.cuda.manual_seed(args.seed)

    toprint = ['seed', 'lr', 'nat', 'resnet']
    if args.env_name in IMG_ENVS:
        toprint += ['window', 'max_steps']
    toprint.sort()
    name = args.tag
    args_param = vars(args)
    os.makedirs(os.path.join(args.out_dir, args.env_name), exist_ok=True)
    for arg in toprint:
        if arg in args_param and (args_param[arg] or arg in ['gamma', 'seed']):
            if args_param[arg] is True:
                name += '{}_'.format(arg)
            else:
                name += '{}{}_'.format(arg, args_param[arg])
    model_dir = os.path.join(args.out_dir, args.env_name, args.algo)
    os.makedirs(model_dir, exist_ok=True)

    results_dict = {'episodes': [], 'rewards': [], 'args': args}
    torch.set_num_threads(1)
    eval_env = make_env(args,
                        'cifar10',
                        args.seed,
                        1,
                        None,
                        args.add_timestep,
                        natural=args.nat,
                        train=False)
    envs = make_env(args,
                    'cifar10',
                    args.seed,
                    1,
                    None,
                    args.add_timestep,
                    natural=args.nat,
                    train=True)

    #print(envs)
    # envs = envs[0]

    # if args.num_processes > 1:
    #     envs = SubprocVecEnv(envs)
    # else:
    #     envs = DummyVecEnv(envs)
    # eval_env = DummyVecEnv(eval_env)
    # if len(envs.observation_space.shape) == 1:
    #     envs = VecNormalize(envs, gamma=args.gamma)

    obs_shape = envs.observation_space.shape
    obs_shape = (obs_shape[0] * args.num_stack, *obs_shape[1:])

    actor_critic = Policy(obs_shape,
                          envs.action_space,
                          args.recurrent_policy,
                          dataset=args.env_name,
                          resnet=args.resnet,
                          pretrained=args.pretrained)
    if envs.action_space.__class__.__name__ == "Discrete":
        action_shape = 1
    else:
        action_shape = envs.action_space.shape[0]

    if args.cuda:
        actor_critic.cuda()

    if args.algo == 'a2c':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               lr=args.lr,
                               eps=args.eps,
                               alpha=args.alpha,
                               max_grad_norm=args.max_grad_norm)
    elif args.algo == 'ppo':
        agent = algo.PPO(actor_critic,
                         args.clip_param,
                         args.ppo_epoch,
                         args.num_mini_batch,
                         args.value_loss_coef,
                         args.entropy_coef,
                         lr=args.lr,
                         eps=args.eps,
                         max_grad_norm=args.max_grad_norm)
    elif args.algo == 'acktr':
        agent = algo.A2C_ACKTR(actor_critic,
                               args.value_loss_coef,
                               args.entropy_coef,
                               acktr=True)

    action_space = envs.action_space
    if args.env_name in IMG_ENVS:
        action_space = np.zeros(2)
    # obs_shape = envs.observation_space.shape
    rollouts = RolloutStorage(args.num_steps, args.num_processes, obs_shape,
                              action_space, actor_critic.state_size)
    current_obs = torch.zeros(args.num_processes, *obs_shape)

    obs = envs.reset()
    update_current_obs(obs, current_obs, obs_shape, args.num_stack)
    rollouts.observations[0].copy_(current_obs)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros([args.num_processes, 1])
    final_rewards = torch.zeros([args.num_processes, 1])

    if args.cuda:
        current_obs = current_obs.cuda()
        rollouts.cuda()

    start = time.time()
    for j in range(num_updates):
        # envs.display_original(j)
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, states = actor_critic.act(
                    rollouts.observations[step], rollouts.states[step],
                    rollouts.masks[step])
            cpu_actions = action.squeeze(1).cpu().numpy()

            # Obser reward and next obs
            obs, reward, done, info = envs.step(cpu_actions)

            # envs.display_step(step, j)

            # print("OBS", obs)

            # print("REWARD", reward)
            # print("DONE", done)
            # print("INFO", info)

            reward = torch.from_numpy(np.expand_dims(np.stack([reward]),
                                                     1)).float()
            episode_rewards += reward

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in [done]])
            final_rewards *= masks
            final_rewards += (1 - masks) * episode_rewards
            episode_rewards *= masks

            if args.cuda:
                masks = masks.cuda()

            if current_obs.dim() == 4:
                current_obs *= masks.unsqueeze(2).unsqueeze(2)
            else:
                current_obs *= masks

            update_current_obs(obs, current_obs, obs_shape, args.num_stack)
            rollouts.insert(current_obs, states, action, action_log_prob,
                            value, reward, masks)

            # print("envs.curr_img SHAPE: ", envs.curr_img.shape)
            #display_state = envs.curr_img
            # display_state[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window] = 5
            # display_state = custom_replace(display_state, 1, 0)
            # display_state[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window] = \
            #     envs.curr_img[:, envs.pos[0]:envs.pos[0]+envs.window, envs.pos[1]:envs.pos[1]+envs.window]
            # img = transforms.ToPILImage()(display_state)
            # img.save("state_cifar/"+"state"+str(j)+"_"+str(step)+".png")

        with torch.no_grad():
            next_value = actor_critic.get_value(rollouts.observations[-1],
                                                rollouts.states[-1],
                                                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.tau)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        if j % args.save_interval == 0:
            torch.save((actor_critic.state_dict(), results_dict),
                       os.path.join(model_dir,
                                    name + 'cifar_model_ppo_ex1_center.pt'))

        if j % args.log_interval == 0:
            end = time.time()
            total_reward = eval_episode(eval_env, actor_critic, args)

            results_dict['rewards'].append(total_reward)
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            print(
                "Updates {}, num timesteps {}, FPS {}, reward {:.1f} entropy {:.5f}, value loss {:.5f}, policy loss {:.5f}"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        np.mean(results_dict['rewards'][-10:]), dist_entropy,
                        value_loss, action_loss))

            plot_rewards.append(np.mean(results_dict['rewards'][-10:]))
            plot_policy_loss.append(action_loss)
            plot_value_loss.append(value_loss)

    plt.plot(range(len(plot_rewards)), plot_rewards)
    plt.savefig("rewards_center.png")
    plt.close()

    plt.plot(range(len(plot_policy_loss)), plot_policy_loss)
    plt.savefig("policyloss_center.png")
    plt.close()

    plt.plot(range(len(plot_value_loss)), plot_value_loss)
    plt.savefig("valueloss_center.png")
    plt.close()
Exemplo n.º 22
0
def load_tnews_data(data_path, data_type, tokenizer, few_shot=False):
    args = get_args()

    filename = os.path.join(data_path, data_type+'.json')
    objs = []
    with open(filename) as fin:
        for line in fin:
            objs.append(json.loads(line.strip()))

    pad_id = tokenizer.encoder['<pad>']
    args.eod_token = tokenizer.encoder['<eod>']

    labels = []
    label_map = {}
    label_reverse = {}
    with open(os.path.join(data_path, 'labels.json')) as fin:
        for i, line in enumerate(fin):
            obj = json.loads(line.strip())
            labels.append(obj['label_desc'])
            label_map[obj['label_desc']] = i
            label_reverse[obj['label']] = obj['label_desc']

    all_tokens = []
    all_masks = []
    all_labels = []
    for _, obj in enumerate(objs):
        sentence = obj['sentence']
        tokenized_sentence = tokenizer.encode(sentence)[:args.seq_length-20]
        obj['label_desc'] = label_reverse[obj['label']]

        if few_shot:
            cur_labels = random.sample(labels, 3)
            while obj['label_desc'] in cur_labels:
                cur_labels = random.sample(labels, 3)
            cur_labels.append(obj['label_desc'])
            cur_label = cur_labels.index(obj['label_desc'])
            assert cur_label != -1
        else:
            cur_labels = labels
            cur_label = label_map[obj['label_desc']]

        all_labels.append(cur_label)

        for _, label in enumerate(cur_labels):
            prompt = "这是关于{}的文章:".format(label)
            prompt_tokens = tokenizer.encode(prompt)
            prompt_len = len(prompt_tokens)
            tokens = prompt_tokens + tokenized_sentence
            second_mask = [0] * (args.seq_length-1)
            for idx in range(prompt_len-1, len(tokens)-1):
                second_mask[idx] = 1
            all_masks.append(second_mask)
            token_length = len(tokens)
            assert token_length < args.seq_length
            tokens.extend([pad_id] * (args.seq_length - token_length))
            all_tokens.append(tokens)
    
    all_tokens = torch.tensor(all_tokens, dtype=torch.long)
    all_masks = torch.tensor(all_masks, dtype=torch.float)
    dataset = TensorDataset(all_tokens, all_masks)

    # Data parallel arguments.
    world_size = mpu.get_data_parallel_world_size()
    rank = mpu.get_data_parallel_rank()
    global_batch_size = args.batch_size * world_size
    num_workers = args.num_workers

    sampler = torch.utils.data.SequentialSampler(dataset)
    batch_sampler = DistributedBatchSampler(sampler=sampler,
                                            batch_size=global_batch_size,
                                            drop_last=True,
                                            rank=rank,
                                            world_size=world_size)
    
    # Torch dataloader.
    return torch.utils.data.DataLoader(dataset,
                                       batch_sampler=batch_sampler,
                                       num_workers=num_workers,
                                       pin_memory=True), all_labels
Exemplo n.º 23
0
def main():
    args = get_args()
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    log_dir = os.path.expanduser(args.log_dir)
    eval_log_dir = log_dir + "_eval"
    utils.cleanup_log_dir(log_dir)
    utils.cleanup_log_dir(eval_log_dir)

    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")


    base=SEVN

    actor_critic, obs_rms = torch.load(save_dir, map_location=device)
    actor_critic.to(device)
    actor_critic.max_eval_success_rate = 0
    print("Passed!")
    num_processes = args.num_processes
    eval_recurrent_hidden_states = torch.zeros(
        args.num_processes, actor_critic.recurrent_hidden_state_size, device=device)
    eval_masks = torch.zeros(num_processes, 1, device=device)
    x = 0
    while x < 10:
        torch.manual_seed(args.seed + x)
        torch.cuda.manual_seed_all(args.seed + x)
        eval_envs = make_vec_envs(args.env_name, args.seed + x, args.num_processes,
                         args.gamma, args.log_dir, device, False, args.custom_gym)
        eval_episode_rewards = []
        eval_episode_length = []
        eval_episode_success_rate = []
        obs = eval_envs.reset()
        while len(eval_episode_rewards) < num_processes*100:
            with torch.no_grad():
                _, action, _, eval_recurrent_hidden_states = actor_critic.act(
                    obs,
                    eval_recurrent_hidden_states,
                    eval_masks,
                    deterministic=True)
            eval_envs.render()
            obs, _, done, infos = eval_envs.step(action)

            eval_masks = torch.tensor(
                [[0.0] if done_ else [1.0] for done_ in done],
                dtype=torch.float32,
                device=device)

            for info in infos:
                if 'episode' in info.keys():
                    if info['was_successful_trajectory']:
                        if args.mod: #Modified Reward Function
                            reward[idx]=10
                            episode_rewards.append(10)
                    else:
                        eval_episode_rewards.append(info['episode']['r'])
                    eval_episode_length.append(info['episode']['l'])
                    eval_episode_success_rate.append(info['was_successful_trajectory'])
        x+=1
        print(" Evaluation using {} episodes: mean reward {:.5f}, mean_length {:.2f}, mean_success {:.2f} \n".format(
        len(eval_episode_rewards), np.mean(eval_episode_rewards), np.mean(eval_episode_length), np.mean(eval_episode_success_rate)))    

    eval_envs.close()

    print(eval_episode_rewards)
    print(eval_episode_success_rate)
Exemplo n.º 24
0
    e = 0
    while stats['total_samples'] < args.max_samples:
        train(args, env, model, opt, opt_v, kf, stats, ep=e)
        avg_eval = eval(args, env, model, stats)
        log_writer.writerow([
            stats['total_samples'], stats['max_reward'], stats['avg_reward'],
            avg_eval
        ])
        log_file.flush()
        e += 1
        print("total samples: ", stats['total_samples'],
              stats['total_samples'] - last_iter_samples)
        last_iter_samples = stats['total_samples']
        if avg_eval > best_eval or last_save_step - stats[
                'total_samples'] > 10000:
            best_eval = avg_eval
            last_save_step = stats['total_samples']
            # save model if evaluation was better
            torch.save(
                model.state_dict(),
                os.path.join(
                    args.log_dir, "model_ep" + str(e) + "_samples" +
                    str(stats['total_samples']) + "_eval" + str(avg_eval) +
                    ".pth"))
    log_file.close()


if __name__ == '__main__':
    import arguments
    optimize(arguments.get_args())
Exemplo n.º 25
0
def main():
    """Main training program."""

    global global_example_count, global_token_count, event_writer, logdir, train_step, train_loss, best_val_loss, eval_start_time, log_start_time, epoch

    global_token_count = 0

    # Arguments.
    args = get_args()

    # global global_example_count, global_token_count, event_writer, logdir
    logdir = f'{args.logdir}'
    os.system(f'mkdir -p {logdir}')

    event_writer = SummaryWriter(logdir)
    log_tb("first", time.time())
    print('Pretrain BERT model')

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Pytorch distributed.
    initialize_distributed(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # Data stuff.
    data_config = configure_data()
    data_config.set_defaults(data_set_type='BERT', transpose=False)
    (train_data, val_data, test_data), tokenizer = data_config.apply(args)
    args.data_size = tokenizer.num_tokens

    # Model, optimizer, and learning rate.
    model, optimizer, lr_scheduler, criterion = setup_model_and_optimizer(
        args, tokenizer)

    # At any point you can hit Ctrl + C to break out of training early.
    try:
        total_iters = 0
        skipped_iters = 0
        start_epoch = 1
        best_val_loss = float('inf')
        # Resume data loader if necessary.
        if args.resume_dataloader:
            start_epoch = args.epoch
            total_iters = args.total_iters
            train_data.batch_sampler.start_iter = total_iters % len(train_data)
        # For all epochs.
        for epoch in range(start_epoch, args.epochs + 1):
            timers('epoch time').start()
            iteration, skipped = train_epoch(epoch, model, optimizer,
                                             train_data, lr_scheduler,
                                             criterion, timers, args)
            elapsed_time = timers('epoch time').elapsed()
            total_iters += iteration
            skipped_iters += skipped
            lm_loss, nsp_loss = evaluate(val_data, model, criterion, args)
            val_loss = lm_loss + nsp_loss
            print('-' * 100)
            print(
                '| end of epoch {:3d} | time: {:5.2f}s | valid loss {:.4E} | '
                'valid LM Loss {:.4E} | valid NSP Loss {:.4E}'.format(
                    epoch, elapsed_time, val_loss, lm_loss, nsp_loss))
            print('-' * 100)
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                if args.save:
                    best_path = 'best/model.pt'
                    print('saving best model to:',
                          os.path.join(args.save, best_path))
                    save_checkpoint(best_path, epoch + 1, total_iters, model,
                                    optimizer, lr_scheduler, args)

    except KeyboardInterrupt:
        print('-' * 100)
        print('Exiting from training early')
        if args.save:
            cur_path = 'current/model.pt'
            print('saving current model to:',
                  os.path.join(args.save, cur_path))
            save_checkpoint(cur_path, epoch, total_iters, model, optimizer,
                            lr_scheduler, args)
        exit()

    if args.save:
        final_path = 'final/model.pt'
        print('saving final model to:', os.path.join(args.save, final_path))
        save_checkpoint(final_path, args.epochs, total_iters, model, optimizer,
                        lr_scheduler, args)

    if test_data is not None:
        # Run on test data.
        print('entering test')
        lm_loss, nsp_loss = evaluate(test_data, model, criterion, args)
        test_loss = lm_loss + nsp_loss
        print('=' * 100)
        print('| End of training | test loss {:5.4f} | valid LM Loss {:.4E} |'
              ' valid NSP Loss {:.4E}'.format(test_loss, lm_loss, nsp_loss))
        print('=' * 100)
    np.save('MRR' + str(opt.num_qbots) + str(opt.num_abots),
            mean_rec_rank_final.cpu().numpy())
    np.save('r1' + str(opt.num_qbots) + str(opt.num_abots),
            r1_final.cpu().numpy())
    np.save('r5' + str(opt.num_qbots) + str(opt.num_abots),
            r5_final.cpu().numpy())
    np.save('r10' + str(opt.num_qbots) + str(opt.num_abots),
            r10_final.cpu().numpy())
    return


##############################
# Main Code Execution Starts Here
##############################

opt = get_args()
opt.manualSeed = random.randint(1, 10000)
random.seed(opt.manualSeed)
torch.manual_seed(opt.manualSeed)
np.random.seed(opt.manualSeed)
if opt.cuda:
    torch.cuda.manual_seed_all(opt.manualSeed)

if torch.cuda.is_available() and not opt.cuda:
    print(
        "WARNING: You have a CUDA device, so you should probably run with --cuda"
    )

t = datetime.datetime.now()
cur_time = '%s-%s-%s' % (t.day, t.month, t.hour)
save_path = os.path.join(opt.outf, cur_time)
Exemplo n.º 27
0
def apt_select():
    """Run apt-select: Ubuntu archive mirror reporting tool"""
    parser = get_args()
    args = parser.parse_args()
    top_number = args.top_number[0]
    ping_only = args.ping_only
    list_only = args.list_only
    choose = args.choose
    min_status = args.min_status[0].replace('-', ' ')

    if not ping_only and (min_status != 'unknown'):
        # Convert status argument to format used by Launchpad
        min_status = min_status[0].upper() + min_status[1:]

    if choose and (not top_number or top_number < 2):
        parser.print_usage()
        exit((
            "error: -c/--choose option requires -t/--top-number NUMBER "
            "where NUMBER is greater than 1."
        ))

    try:
        release = check_output(["lsb_release", "-ics"])
    except OSError:
        not_ubuntu()
    else:
        release = [s.strip() for s in release.decode('utf-8').split()]

    if release[0] == 'Debian':
        exit("Debian is not currently supported")
    elif release[0] != 'Ubuntu':
        not_ubuntu()

    directory = '/etc/apt/'
    apt_file = 'sources.list'
    sources_path = directory + apt_file
    if not path.isfile(sources_path):
        exit("%s must exist as file" % sources_path)

    mirrors_loc = "mirrors.ubuntu.com"
    mirrors_url = "http://%s/mirrors.txt" % mirrors_loc
    stderr.write("Getting list of mirrors...")
    try:
        mirrors_list = get_html(mirrors_url)
    except HTMLGetError as err:
        exit("Error getting list from %s:\n\t%s" % (mirrors_list, err))
    stderr.write("done.\n")
    mirrors_list = mirrors_list.splitlines()

    codename = release[1][0].upper() + release[1][1:]
    hardware = check_output(["uname", "-m"]).strip().decode('utf-8')
    if hardware == 'x86_64':
        hardware = 'amd64'
    else:
        hardware = 'i386'

    archives = Mirrors(mirrors_list, ping_only, min_status)
    archives.get_rtts()
    if archives.got["ping"] < top_number:
        top_number = archives.got["ping"]

    if top_number == 0:
        exit("Cannot connect to any mirrors in %s\n." % mirrors_list)

    if not ping_only:
        archives.get_launchpad_urls()
        if not archives.abort_launch:
            # Mirrors needs a limit to stop launching threads
            archives.status_num = top_number
            stderr.write("Looking up %d status(es)\n" % top_number)
            archives.lookup_statuses(min_status, codename, hardware)

        if top_number > 1:
            stderr.write('\n')

    repo_name = ""
    found = False
    skip_gen_msg = "Skipping file generation."
    with open(sources_path, 'r') as sources_file:
        lines = sources_file.readlines()
        repos = []
        required_repo = "main"
        for line in lines:
            fields = line.split()
            if confirm_mirror(fields):
                if (not found and
                        (release[1] in fields[2]) and
                        (fields[3] == required_repo)):
                    repos += [fields[1]]
                    found = True
                    continue
                elif fields[2] == '%s-security' % (release[1]):
                    repos += [fields[1]]
                    break

        if not repos:
            stderr.write((
                "Error finding current %s repository in %s\n%s\n" %
                (required_repo, sources_path, skip_gen_msg)
            ))
        else:
            repo_name = repos[0]

    rank = 0
    current_key = -1
    if ping_only:
        archives.top_list = archives.ranked[:top_number+1]

    for url in archives.top_list:
        info = archives.urls[url]
        host = info["Host"]
        if url == repo_name:
            host += " (current)"
            current_key = rank

        if not ping_only and not archives.abort_launch:
            if "Status" in info:
                assign_defaults(info, ("Org", "Speed"), "N/A")
                print((
                    "%(rank)d. %(mirror)s\n%(tab)sLatency: %(ms)d ms\n"
                    "%(tab)sOrg:     %(org)s\n%(tab)sStatus:  %(status)s\n"
                    "%(tab)sSpeed:   %(speed)s" % {
                        'tab': '    ',
                        'rank': rank + 1,
                        'mirror': host,
                        'ms': info["Latency"],
                        'org': info["Organisation"],
                        'status': info["Status"],
                        'speed': info["Speed"]
                    }
                ))
        else:
            print("%d. %s: %d ms" % (rank+1, info["Host"], info["Latency"]))

        rank += 1
        if rank == top_number:
            break

    key = 0
    if choose:
        key = ask((
            "Choose a mirror (1 - %d)\n'q' to quit " %
            len(archives.top_list)
        ))
        while True:
            try:
                key = int(key)
            except ValueError:
                if key == 'q':
                    exit()

            if (type(key) is not str) and (key >= 1) and (key <= rank):
                break

            key = ask("Invalid entry ")

        key -= 1

    if list_only:
        exit()

    # Avoid generating duplicate sources.list
    if current_key == key:
        exit((
            "%s is the currently used mirror.\n%s" %
            (archives.urls[repo_name]["Host"], skip_gen_msg)
        ))

    mirror = archives.top_list[key]
    lines = ''.join(lines)
    for repo in repos:
        lines = lines.replace(repo, mirror)

    work_dir = getcwd()
    if work_dir == directory[0:-1]:
        query = (
            "'%(dir)s' is the current directory.\n"
            "Generating a new '%(apt)s' file will "
            "overwrite the current file.\n"
            "You should copy or backup '%(apt)s' before replacing it.\n"
            "Continue?\n[yes|no] " % {
                'dir': directory,
                'apt': apt_file
            }
        )
        yes_or_no(query)

    write_file = work_dir.rstrip('/') + '/' + apt_file
    try:
        with open(write_file, 'w') as sources_file:
            sources_file.write(lines)
    except IOError as err:
        exit("Unable to generate sources.list:\n\t%s\n" % err)
    else:
        print("New config file saved to %s" % write_file)

    exit()
Exemplo n.º 28
0
import torch
import torch.utils.data as td
import numpy as np
import scipy.io as sio

import data_handler
import networks
import trainer
import arguments
# import deepspeed
from sklearn.utils import shuffle
from sklearn.metrics import roc_auc_score
from tqdm import tqdm

args = arguments.get_args()

torch.set_default_tensor_type('torch.cuda.FloatTensor')

torch.backends.cudnn.benchmark = True
torch.backends.cudnn.deterministic = True
dataset = data_handler.DatasetFactory.get_dataset(args.dataset)

#loader = dataset.loader
seed = args.seed
m = args.memory_budget

# Fix the seed.
args.seed = seed
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
Exemplo n.º 29
0
def main():
    """Main training program."""

    # Disable CuDNN.
    torch.backends.cudnn.enabled = False

    # Timer.
    timers = Timers()

    # Arguments.
    args = get_args()

    # Pytorch distributed.
    initialize_distributed(args)
    if torch.distributed.get_rank() == 0:
        print('Pretrain GPT2 model')
        print_args(args)

    # Random seeds for reproducability.
    set_random_seed(args.seed)

    # prepare log file
    os.makedirs(args.save, exist_ok=True)
    with open(args.log_file, "w") as f:
        f.write("Logging:\n")

    # Model, optimizer, and learning rate.
    with open(args.student_config_path, "r") as f:
        student_config = json.load(f)

    student_model, optimizer, lr_scheduler, student_iteration = setup_model_and_optimizer(
        args,
        student_config,
        need_optim=True,
        ckpt_path=args.student_load,
        do_fp16=args.fp16)

    args.iteration = student_iteration

    teacher_model = None
    if args.teacher_config_path is not None:
        with open(args.teacher_config_path, "r") as f:
            teacher_config = json.load(f)
        teacher_model, _, _, _ = setup_model_and_optimizer(
            args,
            teacher_config,
            need_optim=True,
            ckpt_path=args.teacher_load,
            do_fp16=(args.fp16 or args.teacher_fp16))

    if torch.distributed.get_rank() == 0:
        print(student_iteration)

    train_data_iterator, val_data_iterator, test_data_iterator = \
        build_train_valid_test_data_iterators(
            train_valid_test_dataset_provider, args)

    iteration = 0
    if args.do_train:
        iteration, skipped = train(student_model, teacher_model, optimizer,
                                   lr_scheduler, train_data_iterator,
                                   val_data_iterator, timers, args)

        prefix = 'the end of training for val data'
        evaluate_and_print_results(prefix, val_data_iterator, student_model,
                                   teacher_model, args, timers, False)

    if args.save and iteration != 0:
        save_checkpoint(iteration, student_model, optimizer, lr_scheduler,
                        args)

    if args.do_test:
        # Run on test data.
        prefix = 'the end of training for test data'
        evaluate_and_print_results(prefix, test_data_iterator, student_model,
                                   teacher_model, args, timers, True)
Exemplo n.º 30
0
    if is_value_list:
        for rk, rv in sorted(results_dict.items()):
            if keys_to_print is not None and rk not in keys_to_print:
                continue
            cprint_and_append("{}: {}".format(rk, rv))
    return line_list


if __name__ == '__main__':

    num_total_runs = 7

    main_args = get_args(
        model_name="GAT",  # GAT, GCN
        dataset_class=
        "Planetoid",  # Planetoid, FullPlanetoid, RandomPartitionGraph
        dataset_name="Cora",  # Cora, CiteSeer, PubMed, rpg-10-500-0.1-0.025
        custom_key=
        "EV13NSO8",  # NEO8, NEDPO8, EV13NSO8, EV9NSO8, EV1O8, EV2O8, -500, -Link, -ES, -ATT
    )
    pprint_args(main_args)

    if len(main_args.gpu_deny_list) == main_args.num_gpus_total:
        alloc_gpu = [None]
        cprint("Use CPU", "yellow")
    else:
        alloc_gpu = blind_other_gpus(num_gpus_total=main_args.num_gpus_total,
                                     num_gpus_to_use=main_args.num_gpus_to_use,
                                     gpu_deny_list=main_args.gpu_deny_list)
        if not alloc_gpu:
            alloc_gpu = [
                int(
Exemplo n.º 31
0
def main():
    config = None
    args = get_args()
    config, checkpoint = get_config_and_checkpoint(args)

    set_random_seeds(args, config)
    eval_log_dir = args.save_dir + "_eval"
    try:
        os.makedirs(args.save_dir)
        os.makedirs(eval_log_dir)
    except OSError:
        pass

    now = datetime.datetime.now()
    experiment_name = args.experiment_name + '_' + now.strftime("%Y-%m-%d_%H-%M-%S")

    # Create checkpoint file
    save_dir_model = os.path.join(args.save_dir, 'model', experiment_name)
    save_dir_config = os.path.join(args.save_dir, 'config', experiment_name)
    try:
        os.makedirs(save_dir_model)
        os.makedirs(save_dir_config)
    except OSError as e:
        logger.error(e)
        exit()

    if args.config:
        shutil.copy2(args.config, save_dir_config)

    curriculum = args.follow_curriculum
    if args.follow_curriculum:
        print('Using preset curriculum')

    # Tensorboard Logging
    writer = SummaryWriter(os.path.join(args.save_dir, 'tensorboard', experiment_name))

    # Logger that writes to STDOUT and a file in the save_dir
    logger = setup_carla_logger(args.save_dir, experiment_name)

    device = torch.device("cuda:0" if args.cuda else "cpu")
    norm_reward = not config.no_reward_norm
    norm_obs = not config.no_obs_norm

    assert not (config.num_virtual_goals > 0) or (config.reward_class == 'SparseReward'), 'Cant use HER with dense reward'
    obs_converter = CarlaObservationConverter(h=84, w=84, rel_coord_system=config.rel_coord_system)
    action_converter = CarlaActionsConverter(config.action_type)
    envs = make_vec_envs(obs_converter, action_converter, args.starting_port, config.seed, config.num_processes,
                                config.gamma, device, config.reward_class, num_frame_stack=1, subset=config.experiments_subset,
                                norm_reward=norm_reward, norm_obs=norm_obs, apply_her=config.num_virtual_goals > 0,
                                video_every=args.video_interval,
                                video_dir=os.path.join(args.save_dir, 'video',
                                    experiment_name),
                                curriculum=curriculum)

    if config.agent == 'forward':
        agent = agents.ForwardCarla()

    if config.agent == 'vpg':
        agent = agents.VPGCarla(obs_converter,
                                action_converter,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps, alpha=config.alpha,
                                gamma=config.gamma,
                                max_grad_norm=config.max_grad_norm)

    if config.agent == 'a2c':
        agent = agents.A2CCarla(obs_converter,
                                action_converter,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps, alpha=config.alpha,
                                max_grad_norm=config.max_grad_norm)

    elif config.agent == 'acktr':
        agent = agents.A2CCarla(obs_converter,
                                action_converter,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps, alpha=config.alpha,
                                max_grad_norm=config.max_grad_norm,
                                acktr=True)

    elif config.agent == 'ppo':
        agent = agents.PPOCarla(obs_converter,
                                action_converter,
                                config.clip_param,
                                config.ppo_epoch,
                                config.num_mini_batch,
                                config.value_loss_coef,
                                config.entropy_coef,
                                lr=config.lr,
                                eps=config.eps,
                                max_grad_norm=config.max_grad_norm)

    if checkpoint is not None:
        load_modules(agent.optimizer, agent.model, checkpoint)

    rollouts = RolloutStorage(config.num_steps, config.num_processes,
                        envs.observation_space, envs.action_space, 20,
                        config.num_virtual_goals, config.rel_coord_system, obs_converter)

    obs = envs.reset()
    # Save the first observation
    obs = obs_to_dict(obs)
    rollouts.obs = obs_to_dict(rollouts.obs)
    for k in rollouts.obs:
        rollouts.obs[k][rollouts.step + 1].copy_(obs[k])
    rollouts.obs = dict_to_obs(rollouts.obs)
    rollouts.to(device)

    start = time.time()


    total_steps = 0
    total_episodes = 0
    total_reward = 0

    episode_reward = torch.zeros(config.num_processes)


    for j in range(config.num_updates):

        for step in range(config.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = agent.act(
                        rollouts.get_obs(step),
                        rollouts.recurrent_hidden_states[step],
                        rollouts.masks[step])

            # Observe reward and next obs
            obs, reward, done, info = envs.step(action)

            # For logging purposes
            carla_rewards = torch.tensor([i['carla-reward'] for i in info], dtype=torch.float)
            episode_reward += carla_rewards
            total_reward += carla_rewards.sum().item()
            total_steps += config.num_processes * config.num_steps

            if done.any():
                total_episodes += done.sum()
                torch_done = torch.tensor(done.astype(int)).byte()
                mean_episode_reward = episode_reward[torch_done].mean().item()
                logger.info('{} episode(s) finished with reward {}'.format(done.sum(), mean_episode_reward))
                writer.add_scalar('train/mean_ep_reward_vs_steps', mean_episode_reward, total_steps)
                writer.add_scalar('train/mean_ep_reward_vs_episodes', mean_episode_reward, total_episodes)
                episode_reward[torch_done] = 0

            # If done then clean the history of observations.
            masks = torch.FloatTensor(1-done)

            rollouts.insert(obs, recurrent_hidden_states, action, action_log_prob, value, reward, masks.unsqueeze(-1))

        if config.num_virtual_goals > 0:
            rollouts.apply_her(config.num_virtual_goals, device, beta=config.beta)

        with torch.no_grad():
            next_value = agent.get_value(rollouts.get_obs(-1), # Get last observation
                                         rollouts.recurrent_hidden_states[-1],
                                         rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, config.use_gae, config.gamma, config.tau)


        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        if j % args.save_interval == 0 and args.save_dir != "" and config.agent !='forward':
            save_path = os.path.join(save_dir_model, str(j) + '.pth.tar')
            save_modules(agent.optimizer, agent.model, args, config, save_path)

        total_num_steps = (j + 1) * config.num_processes * config.num_steps

        if j % args.log_interval == 0:

            # Logging to the stdout/our logs
            end = time.time()
            logger.info('------------------------------------')
            logger.info('Episodes {}, Updates {}, num timesteps {}, FPS {}'\
                .format(total_episodes, j + 1, total_num_steps, total_num_steps / (end - start)))
            logger.info('------------------------------------')


            # Logging to tensorboard
            writer.add_scalar('train/cum_reward_vs_steps', total_reward, total_steps)
            writer.add_scalar('train/cum_reward_vs_updates', total_reward, j+1)

            if config.agent in ['a2c', 'acktr', 'ppo']:
                writer.add_scalar('debug/value_loss_vs_steps', value_loss, total_steps)
                writer.add_scalar('debug/value_loss_vs_updates', value_loss, j+1)
                writer.add_scalar('debug/action_loss_vs_steps', action_loss, total_steps)
                writer.add_scalar('debug/action_loss_vs_updates', action_loss, j+1)
                writer.add_scalar('debug/dist_entropy_vs_steps', dist_entropy, total_steps)
                writer.add_scalar('debug/dist_entropy_vs_updates', dist_entropy, j+1)

            # Sample the last reward
            writer.add_scalar('debug/sampled_normalized_reward_vs_steps', reward.mean(), total_steps)
            writer.add_scalar('debug/sampled_normalized_reward_vs_updates', reward.mean(), j+1)
            writer.add_scalar('debug/sampled_carla_reward_vs_steps', carla_rewards.mean(), total_steps)
            writer.add_scalar('debug/sampled_carla_reward_vs_updates', carla_rewards.mean(), j+1)

        if (args.eval_interval is not None and j % args.eval_interval == 0):
            eval_envs = make_vec_envs(
                args.env_name, args.starting_port, obs_converter, args.x + config.num_processes, config.num_processes,
                config.gamma, eval_log_dir, config.add_timestep, device, True,
                curriculum)

            vec_norm = get_vec_normalize(eval_envs)
            if vec_norm is not None:
                vec_norm.ob_rms = get_vec_normalize(envs).ob_rms

            eval_episode_rewards = []

            obs = eval_envs.reset()
            eval_recurrent_hidden_states = torch.zeros(config.num_processes,
                            20, device=device)
            eval_masks = torch.zeros(config.num_processes, 1, device=device)

            while len(eval_episode_rewards) < 10:
                with torch.no_grad():
                    _, action, _, eval_recurrent_hidden_states = agent.act(
                        obs, eval_recurrent_hidden_states, eval_masks, deterministic=True)

                # Obser reward and next obs
                carla_obs, reward, done, infos = eval_envs.step(action)

                eval_masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                                for done_ in done])
                for info in infos:
                    if 'episode' in info.keys():
                        eval_episode_rewards.append(info['episode']['r'])

            eval_envs.close()

            logger.info(" Evaluation using {} episodes: mean reward {:.5f}\n".
                format(len(eval_episode_rewards),
                       np.mean(eval_episode_rewards)))
Exemplo n.º 32
0
import os
import random
import sys
from pathlib import Path
from shutil import copyfile

from arguments import get_args

random.seed(1)

args = get_args('args for datasplit (citiscapes)', mode='data')

current_path = os.path.abspath('')
train_path = "/home/himanshu/cityscape/leftImg8bit/train"
train_path_lb = "/home/himanshu/cityscape/gtFine/train"
perc = args.percentage

if perc < 0 or perc > 100:
    print('Illegal usage of -p, only between 0 and 100')
    sys.exit(0)

destination = Path("/home/akshay/cityscape/frac" + str(perc) +
                   "/leftImg8bit/train")
os.makedirs(str(destination), exist_ok=True)

destination_lb = Path("/home/akshay/cityscape/frac" + str(perc) +
                      "/gtFine/train")
os.makedirs(str(destination_lb), exist_ok=True)

for folder in os.listdir(train_path):
    des1 = Path(destination / folder)
Exemplo n.º 33
0
                feature = model(images.to(args.device))

            preds = classifier(feature)

            loss = F.cross_entropy(preds, labels.to(args.device))

            loss.backward()
            optimizer.step()
            loss_meter.update(loss.item())
            lr = lr_scheduler.step()
            local_progress.set_postfix({
                'lr': lr,
                "loss": loss_meter.val,
                'loss_avg': loss_meter.avg
            })

    classifier.eval()
    correct, total = 0, 0
    acc_meter.reset()
    for idx, (images, labels) in enumerate(test_loader):
        with torch.no_grad():
            feature = model(images.to(args.device))
            preds = classifier(feature).argmax(dim=1)
            correct = (preds == labels.to(args.device)).sum().item()
            acc_meter.update(correct / preds.shape[0])
    print(f'Accuracy = {acc_meter.avg * 100:.2f}')


if __name__ == "__main__":
    main(args=get_args())
Exemplo n.º 34
0
def main():
    doers = _get_doers(shell)
    doers.update(_get_doers(utils))

    possible_actions = doers.keys() + ['start', 'stop', 'status']

    args = arguments.get_args(possible_actions)

    if args.action is None:
        print ('No action')
        return 65  # os.EX_DATAERR

    apiclient = None
    verify = True
    if args.insecure:
        verify = False

    if args.no_api is False:
        apiclient = client.Client(opts=args, verify=verify)
        if args.client_id:
            apiclient.client_id = args.client_id
    else:
        if winutils.is_windows():
            print("--no-api mode is not available on windows")
            return 69  # os.EX_UNAVAILABLE

    if args.action in doers:
        try:
            return doers[args.action](apiclient, args)
        except Exception as e:
            print ('ERROR {0}'.format(e))
            return 70  # os.EX_SOFTWARE

    freezer_scheduler = FreezerScheduler(apiclient=apiclient,
                                         interval=int(args.interval),
                                         job_path=args.jobs_dir)

    if args.no_daemon:
        print ('Freezer Scheduler running in no-daemon mode')
        daemon = NoDaemon(daemonizable=freezer_scheduler)
    else:
        if winutils.is_windows():
            daemon = Daemon(daemonizable=freezer_scheduler,
                            interval=int(args.interval),
                            job_path=args.jobs_dir,
                            insecure=args.insecure)
        else:
            daemon = Daemon(daemonizable=freezer_scheduler)

    if args.action == 'start':
        daemon.start(log_file=args.log_file)
    elif args.action == 'stop':
        daemon.stop()
    elif args.action == 'reload':
        daemon.reload()
    elif args.action == 'status':
        daemon.status()

    # os.RETURN_CODES are only available to posix like systems, on windows
    # we need to translate the code to an actual number which is the equivalent
    return 0  # os.EX_OK
Exemplo n.º 35
0
def main():
    args = get_args()

    model_types = ['conv_net', 'conv_net_attn', 'transformer']
    assert args.model_type in model_types

    raw_data = data_utils.read_smiles_ring_data('%s/raw.csv' % args.data)

    atom_predictor, optimizer = init_model(args, args.n_classes)
    data_utils.load_shortest_paths(args)  # Shortest paths includes all splits

    agg_stats = ['loss', 'nei_score', 'acc', 'auc', 'gnorm', 'gnorm_clip']

    selection_stat = 'acc'
    select_higher = True

    if args.test_mode:
        dataset_loaders = load_datasets(raw_data, 0, args)
        test_model(
            dataset_loaders=dataset_loaders,
            model=atom_predictor,
            stat_names=agg_stats,
            train_func=run_epoch,
            args=args,
        )
        exit()

    all_stats = {}
    for name in agg_stats:
        all_stats[name] = []
    output_dir = args.output_dir
    all_model_paths = []

    for round_idx in range(args.n_rounds):
        dataset_loaders = load_datasets(raw_data, round_idx, args, n_workers=0)
        atom_predictor, optimizer = init_model(args, args.n_classes)

        cur_output_dir = '%s/run_%d' % (output_dir, round_idx)
        args.output_dir = cur_output_dir
        create_dirs(args, cur_output_dir)

        test_stats, best_model_path = train_model(
            dataset_loaders=dataset_loaders,
            model=atom_predictor,
            optimizer=optimizer,
            stat_names=agg_stats,
            selection_stat=selection_stat,
            train_func=run_epoch,
            args=args,
            select_higher=select_higher,
        )

        # Aggregate stats of interest
        for name in agg_stats:
            all_stats[name].append(test_stats[name])
        all_model_paths.append(best_model_path)

    # Write summary file
    summary_file = open('%s/summary.txt' % output_dir, 'w+')

    for name, stats_arr in all_stats.items():
        stats = np.array(stats_arr)
        mean, std = np.mean(stats), np.std(stats)
        stats_string = '%s: %s, mean: %.3f, std: %.3f' % (name, str(stats_arr),
                                                          mean, std)
        print(stats_string)
        summary_file.write('%s\n' % stats_string)

    for model_path in all_model_paths:
        summary_file.write('%s\n' % model_path)

    summary_file.close()