示例#1
0
    def __init__(self, args, env_params):
        self.o_dim = env_params['o_dim']
        self.a_dim = env_params['a_dim']
        self.r_dim = args.r_dim

        self.lr = args.lr
        self.gamma_e = args.gamma_e
        self.gamma_i = args.gamma_i
        self.lamda = args.lamda
        self.entropy_coef = args.entropy_coef
        self.ex_coef = args.ex_coef
        self.in_coef = args.in_coef
        self.clip_eps = args.clip_eps
        self.update_epoch = args.update_epoch
        self.batch_size = args.batch_size
        self.initialize_episode = args.initialize_episode
        self.update_proportion = args.update_proportion
        self.rollout_len = args.rollout_len
        self.obs_clip = args.obs_clip

        self.device = torch.device(args.device)

        self.actor_critic = CNNActorCritic(in_channel=self.o_dim[0],
                                           a_dim=self.a_dim).to(self.device)
        self.RND = RNDNetwork(in_channel=1).to(self.device)

        self.optimizer = optim.Adam(list(self.actor_critic.parameters()) +
                                    list(self.RND.predictor.parameters()),
                                    lr=self.lr)

        self.buffer = Buffer(capacity=self.rollout_len, o_dim=self.o_dim)

        self.normalizer_obs = Normalizer(shape=self.o_dim, clip=self.obs_clip)
        self.normalizer_ri = Normalizer(shape=1, clip=np.inf)
示例#2
0
def get_features(arch, size, pooling=True):

    print("## Starting extracting features...")
    if pooling:
        print("## Using pooling..")
    else:
        print("## Not using pooling..")

    # Declare the features  extractor
    extractor = FeaturesExtractor(arch)

    normalizer = Normalizer()

    starting = time.time()

    results_features = dict()

    with open('./info/project-info.csv', 'r') as csvfile:

        f_csv = csv.reader(csvfile, delimiter=str(','), quotechar=str('|'))
        next(f_csv)

        for row in f_csv:
            tissue = row[1]
            dye = row[2]
            original_name = row[6]

            if tissue not in results_features:
                results_features[tissue] = dict()
            if dye not in results_features[tissue]:
                results_features[tissue][dye] = None

            patches = get_patches_from_landmarks(tissue,
                                                 original_name,
                                                 size=size)

            nb_of_landmarks = len(patches)

            for landmark_nb, (_, _, patch) in enumerate(patches):

                normalize = normalizer.get(tissue, dye)
                extractor.set_normalize(normalize)

                img = Image.fromarray(patch)

                features = extractor.get_features_from_img(
                    img, size, pooling).cpu().numpy().astype(np.float32)

                if landmark_nb == 0:
                    results_features[tissue][dye] = np.zeros(
                        (nb_of_landmarks, features.shape[0]), dtype=np.float32)

                results_features[tissue][dye][landmark_nb] = features

    print("   Elapsed time : {}".format(time.time() - starting))

    return results_features
def process(config, functions, thread_no, bpe):
    print('thread #%d start' % (thread_no))
    thread_start = time.time()
    index = 0
    token_tokenized = ''
    stmt_tokenized = ''
    bpe_tokenized = ''
    ori_untokenized = ''
    preprocessor = PreProcessor()
    special_cutter = SpecialCharCutter(config)
    brace_cutter = BracesCutter()
    normalizer = Normalizer(config)
    extractor = RelationExtractor(config)
    bcb_base = config.get('IO', 'BCB_CODE_BASE')

    for info_str in functions:
        index += 1
        infos = info_str.split(',')
        file_path = os.path.join(bcb_base, os.path.join(infos[0], os.path.join(infos[1], infos[2])))
        start_loc = infos[3]
        end_loc = infos[4]
        with open(file_path, 'r', encoding='iso8859-1') as reader:
            j = 1
            f = ''
            for line in reader.readlines():
                if int(start_loc) <= j <= int(end_loc):
                    f += line.strip() + '\n'
                j += 1
                if j > int(end_loc):
                    break
        f = preprocessor.remove_comments(f)
        f = extract_function_body(f)
        f4ori = f
        f = normalizer.normalize_literal_values(f)
        f = special_cutter.cut(f)
        f = brace_cutter.cut(f)
        _, _, function_bpe, _, bpe_node_list, _ = extractor.extract(f)
        stmt_tokenized += function_bpe + '\nc -1\nh -1\n'
        token_tokenized += re.sub(r'\$\$', ' ', function_bpe) + '\nc -1\nh -1\n'
        function_bpe = bpe.process_line(function_bpe)
        bpe_tokenized += re.sub(r'@@', ' ', function_bpe) + '\nc -1\nh -1\n'
        extractor.reset()
        ori_untokenized += info_str.strip() + '\n\n'
        token_tokenized += info_str.strip() + '\n\n'
        stmt_tokenized += info_str.strip() + '\n\n'
        bpe_tokenized += info_str.strip() + '\n\n'

        if index % 100 == 0:
            print('thread #%d progress %d / %d = %.2f' % (thread_no, index, len(functions), index / len(functions)))

    thread_end = time.time()
    print('thread #%d end in %.2f ' % (thread_no, (thread_end - thread_start)))
    return (ori_untokenized, token_tokenized, stmt_tokenized, bpe_tokenized)
示例#4
0
 def readLangs(self, reverse=False):
     print("Reading lines...")
     # Read the file and split into lines
     lines = open(self.file, encoding='utf-8').read().strip().split('\n')
     # Split every line into pairs and normalize
     pairs = [[
         Norm.ch_normalizeString(l.split('\t')[0]),
         Norm.ch_normalizeString(l.split('\t')[1])
     ] for l in lines]
     # Reverse pairs, make Lang instances
     if reverse:
         pairs = [list(reversed(p)) for p in pairs]
     return pairs
示例#5
0
def main():
    global args, model_args, best_mae_error

    # load data
    dataset = CIFData(args.cifpath)
    collate_fn = collate_pool
    test_loader = DataLoader(dataset,
                             batch_size=args.batch_size,
                             shuffle=True,
                             num_workers=args.workers,
                             collate_fn=collate_fn,
                             pin_memory=args.cuda)

    # build model
    structures, _, _ = dataset[0]
    orig_atom_fea_len = structures[0].shape[-1]
    nbr_fea_len = structures[1].shape[-1]
    model = CrystalGraphConvNet(
        orig_atom_fea_len,
        nbr_fea_len,
        atom_fea_len=model_args.atom_fea_len,
        n_conv=model_args.n_conv,
        h_fea_len=model_args.h_fea_len,
        n_h=model_args.n_h,
        classification=True if model_args.task == 'classification' else False)
    if args.cuda:
        model.cuda()

    # define loss func and optimizer
    if model_args.task == 'classification':
        criterion = nn.NLLLoss()
    else:
        criterion = nn.MSELoss()

    normalizer = Normalizer(torch.zeros(3))

    # optionally resume from a checkpoint
    if os.path.isfile(args.modelpath):
        print("=> loading model '{}'".format(args.modelpath))
        checkpoint = torch.load(args.modelpath,
                                map_location=lambda storage, loc: storage)
        model.load_state_dict(checkpoint['state_dict'])
        normalizer.load_state_dict(checkpoint['normalizer'])
        print("=> loaded model '{}' (epoch {}, validation {})".format(
            args.modelpath, checkpoint['epoch'], checkpoint['best_mae_error']))
    else:
        print("=> no model found at '{}'".format(args.modelpath))

    validate(test_loader, model, criterion, normalizer, test=True)
示例#6
0
class AdaptiveTransformation():
    def __init__(self):
        self.normalizer = Normalizer()

    def transform(self, tissue, dye):
        return transforms.Compose(
            [transforms.ToTensor(),
             self.normalizer.get(tissue, dye)])
示例#7
0
 def __init__(self,
              force_factor=5,
              initial_theta=0.0001,
              max_offset=3,
              max_angle=0.25):
     self.norm_x = Normalizer(-max_offset, max_offset)
     self.norm_xdot = Normalizer(-10, 10)
     self.norm_theta = Normalizer(-max_angle, max_angle)
     self.norm_thetadot = Normalizer(-10, 10)
     self.reset()
示例#8
0
def load_env(env_name, encoder_path):
    normalizer = Normalizer(0, 499)
    sae = StateAutoEncoder(1, 1, 12, normalize=True, normalizer=normalizer)
    sae.use_checkpoints(encoder_path)

    train_py_env = StateEncoder(suite_gym.load(env_name), sae)
    eval_py_env = StateEncoder(suite_gym.load(env_name), sae)

    train_env = tf_py_environment.TFPyEnvironment(train_py_env)
    eval_env = tf_py_environment.TFPyEnvironment(eval_py_env)

    return (train_env, eval_env)
示例#9
0
evaluate = False
test = True
evaluate_times = 10
if_train_have_answer = True
if_eval_have_answer = True

if (os.path.exists(save_path)):
    model.load_state_dict(pt.load(save_path))

#todo:need to confirm whehter the session contains the answer

if (train):
    for i in range(disConfig.num_epoch):
        for j, item in enumerate(mydata):
            if (if_train_have_answer):
                answer = Normalizer.ch_normalizeAString(item.context[-1])
                item.context = item.context[:-1]
            # optimizer.zero_grad()
            score = []
            outputs, features = myselect.get_add_features(
                item, disConfig.batch_size)
            print(answer)
            print(outputs)
            print(features)
            for output in outputs:
                output = Normalizer.ch_normalizeAString(output)
                score.append(
                    sentence_bleu([output.split(' ')],
                                  answer.split(' '),
                                  weights=(0.25, 0.25, 0.25, 0.25),
                                  smoothing_function=chencherry.method1))
示例#10
0
    def __init__(self,
                 normalizer=None,
                 backbone_type=None,
                 backbone_params=None,
                 backbone_to_grad_type=None,
                 backbone_to_grad_params=None,
                 ignore_grad_scale_mismatch=False,
                 checkpoint_path=None):
        super().__init__()

        if normalizer is None:
            normalizer = Normalizer.make('vgg')

        if backbone_params is None:
            backbone_params = {}
        if backbone_to_grad_params is None:
            backbone_to_grad_params = {}

        logging.debug('Args contain the following parameters:\n' + '\n'.join([
            f'    backbone_type: {backbone_type}',
            f'    backbone_params: {backbone_params}',
            f'    backbone_to_grad_type: {backbone_to_grad_type}',
            f'    backbone_to_grad_params: {backbone_to_grad_params}',
        ]))

        model_state_dict = None
        if checkpoint_path is not None:
            checkpoint = read_checkpoint(checkpoint_path, backbone_type)

            logging.debug(
                'Read checkpoint with the following parameters:\n' +
                '\n'.join([
                    f'    backbone_type: {checkpoint["backbone_type"]}',
                    f'    backbone_params: {checkpoint["backbone_params"]}',
                    f'    backbone_to_grad_type: {checkpoint["backbone_to_grad_type"]}',
                    f'    backbone_to_grad_params: {checkpoint["backbone_to_grad_params"]}',
                ]))

            if backbone_type is None:
                backbone_type = checkpoint['backbone_type']
            elif checkpoint['backbone_type'] is not None:
                assert backbone_type == checkpoint['backbone_type'], (
                    backbone_type, checkpoint['backbone_type'])

            if backbone_to_grad_type is None:
                backbone_to_grad_type = checkpoint['backbone_to_grad_type']
            elif checkpoint['backbone_to_grad_type'] is not None:
                assert backbone_to_grad_type == checkpoint[
                    'backbone_to_grad_type'], (
                        backbone_to_grad_type,
                        checkpoint['backbone_to_grad_type'])

            for key in (set(checkpoint['backbone_params'].keys())
                        & set(backbone_params)):
                value_ckpt = checkpoint['backbone_params'][key]
                value_args = backbone_params[key]
                assert value_args == value_ckpt, (key, value_args, value_ckpt)
            backbone_params.update(checkpoint['backbone_params'])

            for key in (set(checkpoint['backbone_to_grad_params'].keys())
                        & set(backbone_to_grad_params)):
                value_ckpt = checkpoint['backbone_to_grad_params'][key]
                value_args = backbone_to_grad_params[key]
                if key == 'grad_scale' and value_args != value_ckpt and ignore_grad_scale_mismatch:
                    logging.warning(
                        f'grad_scale mismatch: provided {value_args}, but checkpoint has {value_ckpt}'
                    )
                    checkpoint['backbone_to_grad_params'].pop(
                        'grad_scale')  # safe since we're iterating over a copy
                else:
                    assert value_args == value_ckpt, (key, value_args,
                                                      value_ckpt)
            backbone_to_grad_params.update(
                checkpoint['backbone_to_grad_params'])

            logging.debug('Final checkpoint parameters:\n' + '\n'.join([
                f'    backbone_type: {backbone_type}',
                f'    backbone_params: {backbone_params}',
                f'    backbone_to_grad_type: {backbone_to_grad_type}',
                f'    backbone_to_grad_params: {backbone_to_grad_params}',
            ]))

            model_state_dict = checkpoint['state_dict']

        assert backbone_type is not None
        assert backbone_to_grad_type is not None

        self.backbone = {
            'unet': UNetCustom,
            'resnet': ResnetGenerator,
        }[backbone_type](**backbone_params)

        proxy_type = backbone_to_grad_params['type']
        proxy_params = backbone_to_grad_params[proxy_type]
        make_proxy = {
            'raw': ProxyRaw,
            'sigmoid': ProxyAsSigmoid,
            'warped_target': ProxyAsWarpedTarget,
        }[proxy_type](normalizer, **proxy_params)

        if backbone_to_grad_type == 'direct':
            self.backbone_to_grad = PgnPredictGrad(
                make_proxy,
                backbone_to_grad_params['out_scale'],
                backbone_to_grad_params['grad_scale'],
            )
        elif backbone_to_grad_type == 'proxy':
            batchwise_loss_func = {
                'mse': mse_loss_batchwise,
                'l1': l1_loss_batchwise,
                'logcosh': logcosh_loss_batchwise,
                'mse_logit': MseLogitLossBatchwise(normalizer),
                'logcosh_logit': LogcoshLogitLossBatchwise(normalizer),
            }[backbone_to_grad_params['grad_type']]

            self.backbone_to_grad = PgnProxyToGrad(
                make_proxy,
                batchwise_loss_func,
                backbone_to_grad_params['grad_scale'],
            )
        else:
            assert False

        self.backbone_type = backbone_type
        self.backbone_params = backbone_params
        self.backbone_to_grad_type = backbone_to_grad_type
        self.backbone_to_grad_params = backbone_to_grad_params

        if model_state_dict is not None:
            self.backbone.load_state_dict(model_state_dict)
def train(batch_size=2, learning_rate=1e-2, train_epoch=100):
    # Normalizer(), Augmenter(), Resizer() 各转换时按顺序进行的
    transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()])
    dataset = CocoDataset('./data/coco/', 'train2017', transform)
    data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \
                                  collate_fn=collater, pin_memory=True)
    dataset_size = len(dataset)
    print('sample number:', dataset_size)
    print('epoch size:', dataset_size / batch_size)

    retinanet = RetinaNet()
    anchor = Anchor()
    focal_loss = FocalLoss()

    if cuda:
        retinanet = torch.nn.DataParallel(retinanet).cuda()
        anchor = anchor.cuda()
        focal_loss = focal_loss.cuda()
    retinanet.module.freeze_bn()

    optimizer = torch.optim.SGD(retinanet.parameters(),
                                lr=learning_rate,
                                momentum=0.9,
                                weight_decay=1e-4)
    '''
    class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', 
        factor=0.1, patience=10, verbose=False, threshold=0.0001, 
        threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08)
    :param optimer: 指的是网络的优化器
    :param mode: (str), 可选择‘min’或者‘max’,min表示当监控量停止下降的时候,学习率将减小,
                 max表示当监控量停止上升的时候,学习率将减小。默认值为‘min’
    :param factor: 学习率每次降低多少,new_lr = old_lr * factor
    :param patience=10: 容忍网路的性能不提升的次数,高于这个次数就降低学习率
    :param verbose: (bool), 如果为True,则为每次更新向stdout输出一条消息。 默认值:False
    :param threshold: (float), 测量新最佳值的阈值,仅关注重大变化。 默认值:1e-4
    :param cooldown: 减少lr后恢复正常操作之前要等待的时期数。 默认值:0。
    :param min_lr: 学习率的下限
    :param eps: 适用于lr的最小衰减。 如果新旧lr之间的差异小于eps,则忽略更新。 默认值:1e-8。
    ————————————————
    版权声明:本文为CSDN博主「张叫张大卫」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
    原文链接:https://blog.csdn.net/weixin_40100431/article/details/84311430
    '''
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           verbose=True)

    for epoch_num in range(train_epoch):
        epoch_loss = []

        for iter_num, data in enumerate(data_loader):
            iter_time = time.time()
            images, annots, scales = data
            if cuda:
                images = images.cuda()
                annots = annots.cuda()
                scales = scales.cuda()

            total_anchors = anchor(data['img'])
            classification, localization = retinanet(images)

            cls_loss, loc_loss = \
                focal_loss(classification, localization, total_anchors, annots)
            loss = cls_loss + loc_loss
            epoch_loss.append(float(loss))

            optimizer.zero_grad()
            loss.backward()
            '''
            关于torch.nn.utils.clip_grad_norm_(): 
            In some cases you may find that each layer of your net amplifies the 
            gradient it receives. This causes a problem because the lower layers of 
            the net then get huge gradients and their updates will be far too large 
            to allow the model to learn anything.

            This function ‘clips’ the norm of the gradients by scaling the gradients 
            down by the same amount in order to reduce the norm to an acceptable 
            level. In practice this places a limit on the size of the parameter 
            updates.

            The hope is that this will ensure that your model gets reasonably 
            sized gradients and that the corresponding updates will allow the 
            model to learn.
            引用自https://discuss.pytorch.org/t/about-torch-nn-utils-clip-grad-norm/13873 
            感受一下来自 PyTorch 讨论社区的窒息攻防,2333。。
            '''
            torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1)
            optimizer.step()

            print('|', 'Epoch:', epoch_num + 1, '|', 'Iter:', iter_num + 1,
                  '|', 'cls loss:', float(cls_loss), '|', 'loc loss:',
                  float(loc_loss), '|', 'loss:', float(loss), '|', 'lr:',
                  float(optimizer.learning_rate), '|', 'time:',
                  time.time() - iter_time)

        scheduler.step(np.mean(epoch_loss))

        print('Saving parameters in model on epoch', epoch_num + 1)
        torch.save(
            retinanet.state_dict(),
            './param/param_epoch' + str(epoch_num + 1).zfill(3) + '.pkl')
示例#12
0
def main():
    global args, best_mae_error

    # Dataset from CIF files
    dataset = CIFData(*args.data_options)
    print(f'Dataset size: {len(dataset)}')

    # Dataloader from dataset
    train_loader, val_loader, test_loader = get_train_val_test_loader(
        dataset=dataset,
        collate_fn=collate_pool,
        batch_size=args.batch_size,
        train_size=args.train_size,
        num_workers=args.workers,
        val_size=args.val_size,
        test_size=args.test_size,
        pin_memory=args.cuda,
        return_test=True)

    # Initialize data normalizer with sample of 500 points
    if args.task == 'classification':
        normalizer = Normalizer(torch.zeros(2))
        normalizer.load_state_dict({'mean': 0., 'std': 1.})
    elif args.task == 'regression':
        if len(dataset) < 500:
            warnings.warn('Dataset has less than 500 data points. '
                          'Lower accuracy is expected. ')
            sample_data_list = [dataset[i] for i in range(len(dataset))]
        else:
            sample_data_list = [
                dataset[i] for i in sample(range(len(dataset)), 500)
            ]
        _, sample_target, _ = collate_pool(sample_data_list)
        normalizer = Normalizer(sample_target)
    else:
        raise NameError('task argument must be regression or classification')

    # Build model
    structures, _, _ = dataset[0]
    orig_atom_fea_len = structures[0].shape[-1]
    nbr_fea_len = structures[1].shape[-1]
    model = CrystalGraphConvNet(orig_atom_fea_len,
                                nbr_fea_len,
                                atom_fea_len=args.atom_fea_len,
                                n_conv=args.n_conv,
                                h_fea_len=args.h_fea_len,
                                n_h=args.n_h,
                                classification=(args.task == 'classification'))

    # GPU
    if args.cuda:
        model.cuda()

    # Loss function
    criterion = nn.NLLLoss() if args.task == 'classification' else nn.MSELoss()

    # Optimizer
    if args.optim == 'SGD':
        optimizer = optim.SGD(model.parameters(),
                              args.lr,
                              momentum=args.momentum,
                              weight_decay=args.weight_decay)
    elif args.optim == 'Adam':
        optimizer = optim.Adam(model.parameters(),
                               args.lr,
                               weight_decay=args.weight_decay)
    else:
        raise NameError('optim argument must be SGD or Adam')

    # Scheduler
    scheduler = MultiStepLR(optimizer,
                            milestones=args.lr_milestones,
                            gamma=0.1)

    # Resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_mae_error = checkpoint['best_mae_error']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            normalizer.load_state_dict(checkpoint['normalizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    # Train
    for epoch in range(args.start_epoch, args.epochs):

        # Train (one epoch)
        train(train_loader, model, criterion, optimizer, epoch, normalizer)

        # Validate
        mae_error = validate(val_loader, model, criterion, normalizer)
        assert mae_error == mae_error, 'NaN :('

        # Step learning rate scheduler
        scheduler.step(mae_error)

        # Save checkpoint
        if args.task == 'regression':
            is_best = mae_error < best_mae_error
            best_mae_error = min(mae_error, best_mae_error)
        else:
            is_best = mae_error > best_mae_error
            best_mae_error = max(mae_error, best_mae_error)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_mae_error': best_mae_error,
                'optimizer': optimizer.state_dict(),
                'normalizer': normalizer.state_dict(),
                'args': vars(args)
            }, is_best)

    # Evaluate best model on test set
    print('--------- Evaluate model on test set ---------------')
    best_checkpoint = torch.load('model_best.pth.tar')
    model.load_state_dict(best_checkpoint['state_dict'])
    validate(test_loader, model, criterion, normalizer, test=True)
示例#13
0
    def __init__(self, args, env_params):
        self.s_dim = env_params['o_dim'] + env_params['g_dim']
        self.a_dim = env_params['a_dim']
        self.f_dim = args.f_dim
        self.action_bound = env_params['action_max']
        self.max_timestep = env_params['max_timestep']
        self.max_episode = args.max_episode
        self.evaluate_episode = args.evaluate_episode
        self.evaluate_interval = args.evaluate_interval
        self.log_interval = args.log_interval
        self.save_model_interval = args.save_model_interval
        self.save_model_start = args.save_model_start

        self.lr = args.lr
        self.lr_model = args.lr_model
        self.gamma = args.gamma
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.eta = args.eta
        self.noise_eps = args.noise_eps
        self.device = torch.device(args.device)

        self.normalizer_s = Normalizer(size=self.s_dim,
                                       eps=1e-2,
                                       clip_range=1.)

        self.memory = Memory(size=args.memory_size,
                             s_dim=self.s_dim,
                             a_dim=self.a_dim)

        self.policy = Policy(s_dim=self.s_dim,
                             a_dim=self.a_dim).to(self.device)
        self.policy_target = Policy(s_dim=self.s_dim,
                                    a_dim=self.a_dim).to(self.device)
        self.Q = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device)
        self.Q_target = QFunction(s_dim=self.s_dim,
                                  a_dim=self.a_dim).to(self.device)

        self.optimizer_p = optim.Adam(self.policy.parameters(), lr=self.lr)
        self.optimizer_q = optim.Adam(self.Q.parameters(), lr=self.lr)

        self.encoder = StateEncoder(s_dim=self.s_dim,
                                    f_dim=self.f_dim).to(self.device)
        self.EnvForward = ForwardModel(f_dim=self.f_dim,
                                       a_dim=self.a_dim).to(self.device)
        self.EnvInverse = InverseModel(f_dim=self.f_dim,
                                       a_dim=self.a_dim).to(self.device)

        self.optimizer_forward = optim.Adam(
            [{
                'params': self.EnvForward.parameters()
            }, {
                'params': self.encoder.parameters()
            }],
            lr=self.lr_model)
        self.optimizer_inverse = optim.Adam(
            [{
                'params': self.EnvInverse.parameters()
            }, {
                'params': self.encoder.parameters()
            }],
            lr=self.lr_model)

        self.hard_update()

        self.update_num = 0
示例#14
0
 def __init__(self):
     self.normalizer = Normalizer()
示例#15
0
def main():
    """main function"""

    args = get_args()

    shift = args.shift

    normalizer = Normalizer()

    print("\n#################")
    print("### Arguments ###")
    print("#################")
    for arg in vars(args):
        print(f"{arg} : {getattr(args, arg)}")
    print("#################\n")

    # creating the pairs of dyes to analyze
    pairs = generate_pairs()

    for i, (tissue, dye1, dye2, images_path, original_name1, original_name2,
            extension) in enumerate(pairs):

        # Each element of the pairs will play the role of the target
        for s in range(2):

            if s == 1:
                dye1, dye2 = dye2, dye1
                original_name1, original_name2 = original_name2, original_name1

            start_time = time()

            output_filename = os.path.join(
                args.output,
                f"data/{args.distance}/{args.size}/{args.arch}/{tissue}_{dye1}_{dye2}_{args.arch}_{args.size}_{args.pool}_{args.resize}.data"
            )
            if not os.path.exists(output_filename):
                print(f"File {output_filename} does not exist")
                mkdir(os.path.dirname(output_filename))
            else:
                print(f"File {output_filename} exists\n")
                continue

            # filename1 : reference (comparison from its annotation to those from filename1) -> get the patches
            # filename2 : image to cut

            print(tissue, dye1, dye2, Paths.PATH_TO_IMAGES, original_name1,
                  original_name2)

            # Get patches from image 1
            start_time_get_patches1 = time()
            patches_img1_landmarks = get_patches_from_landmarks(
                tissue, original_name1, size=get_args().size)
            time_get_patches1 = time() - start_time_get_patches1

            # Get patches from image 2
            start_time_get_patches2 = time()
            patches_img2 = segment_image(os.path.join(
                images_path, original_name2 + extension),
                                         size=get_args().size,
                                         shift=shift)
            time_get_patches2 = time() - start_time_get_patches2

            #################
            # # Is useful to make to have the results for one pair whose target has to be rotated

            # angle = -75
            # img = img = Image.open(images_path + original_name2 + extension)
            # im2 = img.convert('RGBA')
            # # rotated image
            # rot = im2.rotate(angle, expand=1)
            # # a white image same size as rotated image
            # fff = Image.new('RGBA', rot.size, (255,)*4)
            # # create a composite image using the
            # out = Image.composite(rot, fff, rot)
            # out = out.convert(img.mode)
            # patches_img2 = segment_image(img=out, size=get_args().size, shift=shift)
            # time_get_patches2 = time() - start_time_get_patches2
            ##################

            # get the features
            # number of available landmarks for the particular tissue
            nb_of_landmarks = len(patches_img1_landmarks)
            print("==> Img1 ({} {}) : {}".format(tissue, dye1,
                                                 nb_of_landmarks))
            print("==> Img2 ({} {}) : {}".format(tissue, dye2,
                                                 len(patches_img2)))

            start_time_features_img1_landmarks = time()
            normalize_dye1 = normalizer.get(tissue, dye1)
            features_img1_landmarks = get_features(patches_img1_landmarks,
                                                   normalize_dye1)
            time_get_features1 = time() - start_time_features_img1_landmarks
            patches_img1_landmarks = ""
            del patches_img1_landmarks
            gc.collect()

            start_time_features_img2_landmarks = time()
            normalize_dye2 = normalizer.get(tissue, dye2)
            features_img2 = get_features(patches_img2, normalize_dye2)
            time_get_features2 = time() - start_time_features_img2_landmarks
            feature_size = features_img1_landmarks.shape[1]
            print("===> Features size : {}".format(feature_size))

            # Keep only the center and coordinates of patches_img2

            patches_img2 = [(x[0], x[1]) for x in patches_img2]
            gc.collect()

            # Compare

            start_time_comparison = time()
            results_comparison = compare(features_img1_landmarks,
                                         features_img2, args.distance)
            time_comparison = time() - start_time_comparison
            features_img2 = ""
            del features_img2
            features_img1_landmarks = ""
            del features_img1_landmarks
            gc.collect()

            # Get the position of the landmarks of dye2

            start_time_position_landmarks = time()
            position_landmarks_dye2 = get_position_landmarks(
                tissue, original_name2)
            time_position_landmarks = time() - start_time_position_landmarks

            # Get top-k accuracy

            start_time_get_accuracy = time()

            k_list = [1, 5]
            # count the landmarks respecting the condition
            counter = [0] * len(k_list)

            for i in range(nb_of_landmarks):

                array = [(k, x) for k, x in enumerate(results_comparison[i])]
                array.sort(key=lambda x: x[1], reverse=True)

                for c, k in enumerate(k_list):

                    indices_of_best_matches = None
                    if args.distance == "cos":
                        indices_of_best_matches = [x[0] for x in array[:k]]
                    elif args.distance == "eucl" or args.distance == "eucl-norm":
                        indices_of_best_matches = [x[0] for x in array[-k:]]

                    # get the position of the k centers that best matches
                    centers = [
                        patches_img2[ind][1] for ind in indices_of_best_matches
                    ]
                    true_position = position_landmarks_dye2[i]

                    distances = [
                        euclidean_distance(np.array(center),
                                           np.array(true_position))
                        for center in centers
                    ]
                    distances = np.array(distances)

                    # if at least a patch center is within a certain radius around the true landmark
                    if distances[distances <= args.size / 2].shape[0] != 0:
                        counter[c] += 1

            table = []
            top_accuracy_list = []
            for c, k in enumerate(k_list):
                acc = round(counter[c] / nb_of_landmarks, 4)
                top_accuracy_list.append((k, acc))
                table.append([str(k), str(acc)])
            t = tabulate(table, headers=['k', 'Top-k accuracy'])
            print("\n", t, "\n")

            time_get_accuracy = time() - start_time_get_accuracy
            elapsed_time = time() - start_time

            table = [
                [
                    'Patches image 1',
                    str(datetime.timedelta(seconds=time_get_patches1))
                ],
                [
                    'Patches image 2',
                    str(datetime.timedelta(seconds=time_get_patches2))
                ],
                [
                    'Features image 1',
                    str(datetime.timedelta(seconds=time_get_features1))
                ],
                [
                    'Features image 2',
                    str(datetime.timedelta(seconds=time_get_features2))
                ],
                [
                    'Position landmarks image 2',
                    str(datetime.timedelta(seconds=time_position_landmarks))
                ],
                [
                    'Comparison',
                    str(datetime.timedelta(seconds=time_comparison))
                ],
                [
                    'Compute accuracy',
                    str(datetime.timedelta(seconds=time_get_accuracy))
                ],
                [
                    'Elapsed time',
                    str(datetime.timedelta(seconds=elapsed_time))
                ]
            ]
            t = tabulate(table, headers=['', 'Time (h:m:s)'])
            print(t, "\n")

            info = {
                "args":
                vars(args),
                "pair": (tissue, dye1, dye2, images_path, original_name1,
                         original_name2, extension),
                "results_comparison":
                results_comparison,
                "nb_of_landmarks":
                nb_of_landmarks,
                "feature_size":
                feature_size,
                "counter":
                counter,
                "top_accuracy_list":
                top_accuracy_list,
                "time":
                elapsed_time,
                "time_get_patches1":
                time_get_patches1,
                "time_get_patches2":
                time_get_patches2,
                "time_get_features1":
                time_get_features1,
                "time_get_features2":
                time_get_features2,
                "time_position_landmarks":
                time_position_landmarks,
                "time_comparison":
                time_comparison,
                "time_get_accuracy":
                time_get_accuracy,
            }

            with open(output_filename, 'wb') as output_file:
                pickle.dump(info, output_file)
示例#16
0
文件: train.py 项目: yinxx/ProteinGCN
def main():
    global args, best_error_global, best_error_local, savepath, dataset

    parser = buildParser()
    args = parser.parse_args()

    print('Torch Device being used: ', cfg.device)

    # create the savepath
    savepath = args.save_dir + str(args.name) + '/'
    if not os.path.exists(savepath):
        os.makedirs(savepath)

    # Writes to file and also to terminal
    sys.stdout = Logger(savepath)
    print(vars(args))

    best_error_global, best_error_local = 1e10, 1e10

    randomSeed(args.seed)

    # create train/val/test dataset separately
    assert os.path.exists(args.protein_dir), '{} does not exist!'.format(
        args.protein_dir)
    all_dirs = [
        d for d in os.listdir(args.protein_dir)
        if not d.startswith('.DS_Store')
    ]
    dir_len = len(all_dirs)
    indices = list(range(dir_len))
    random.shuffle(indices)

    train_size = math.floor(args.train * dir_len)
    val_size = math.floor(args.val * dir_len)
    test_size = math.floor(args.test * dir_len)
    test_dirs = all_dirs[:test_size]
    train_dirs = all_dirs[test_size:test_size + train_size]
    val_dirs = all_dirs[test_size + train_size:test_size + train_size +
                        val_size]
    print('Testing on {} protein directories:'.format(len(test_dirs)))

    dataset = ProteinDataset(args.pkl_dir,
                             args.id_prop,
                             args.atom_init,
                             random_seed=args.seed)

    print('Dataset length: ', len(dataset))

    # load all model args from pretrained model
    if args.pretrained is not None and os.path.isfile(args.pretrained):
        print("=> loading model params '{}'".format(args.pretrained))
        model_checkpoint = torch.load(
            args.pretrained, map_location=lambda storage, loc: storage)
        model_args = argparse.Namespace(**model_checkpoint['args'])
        # override all args value with model_args
        args.h_a = model_args.h_a
        args.h_g = model_args.h_g
        args.n_conv = model_args.n_conv
        args.random_seed = model_args.seed
        args.lr = model_args.lr

        print("=> loaded model params '{}'".format(args.pretrained))
    else:
        print("=> no model params found at '{}'".format(args.pretrained))

    # build model
    kwargs = {
        'pkl_dir': args.pkl_dir,  # Root directory for data
        'atom_init': args.atom_init,  # Atom Init filename
        'h_a': args.h_a,  # Dim of the hidden atom embedding learnt
        'h_g': args.h_g,  # Dim of the hidden graph embedding after pooling
        'n_conv': args.n_conv,  # Number of GCN layers
        'random_seed': args.seed,  # Seed to fix the simulation
        'lr': args.lr,  # Learning rate for optimizer
    }

    structures, _, _ = dataset[0]
    h_b = structures[1].shape[-1]
    kwargs['h_b'] = h_b  # Dim of the bond embedding initialization

    # Use DataParallel for faster training
    print("Let's use", torch.cuda.device_count(),
          "GPUs and Data Parallel Model.")
    model = ProteinGCN(**kwargs)
    model = torch.nn.DataParallel(model)
    model.cuda()

    print('Trainable Model Parameters: ', count_parameters(model))

    # Create dataloader to iterate through the dataset in batches
    train_loader, val_loader, test_loader = get_train_val_test_loader(
        dataset,
        train_dirs,
        val_dirs,
        test_dirs,
        collate_fn=collate_pool,
        num_workers=args.workers,
        batch_size=args.batch_size,
        pin_memory=False)

    try:
        print('Training data    : ', len(train_loader.sampler))
        print('Validation data  : ', len(val_loader.sampler))
        print('Testing data     : ', len(test_loader.sampler))
    except Exception as e:
        # sometimes test may not be defined
        print('\nException Cause: {}'.format(e.args[0]))

    # obtain target value normalizer
    if len(dataset) < args.avg_sample:
        sample_data_list = [dataset[i] for i in tqdm(range(len(dataset)))]
    else:
        sample_data_list = [
            dataset[i]
            for i in tqdm(random.sample(range(len(dataset)), args.avg_sample))
        ]

    _, _, sample_target = collate_pool(sample_data_list)
    normalizer_global = Normalizer(sample_target[0])
    normalizer_local = Normalizer(torch.tensor([0.0]))
    normalizer_local = Normalizer(sample_target[1])

    # load the model state dict from given pretrained model
    if args.pretrained is not None and os.path.isfile(args.pretrained):
        print("=> loading model '{}'".format(args.pretrained))
        checkpoint = torch.load(args.pretrained,
                                map_location=lambda storage, loc: storage)

        print('Best error global: ', checkpoint['best_error_global'])
        print('Best error local: ', checkpoint['best_error_local'])

        best_error_global = checkpoint['best_error_global']
        best_error_local = checkpoint['best_error_local']

        model.module.load_state_dict(checkpoint['state_dict'])
        model.module.optimizer.load_state_dict(checkpoint['optimizer'])
        normalizer_local.load_state_dict(checkpoint['normalizer_local'])
        normalizer_global.load_state_dict(checkpoint['normalizer_global'])
    else:
        print("=> no model found at '{}'".format(args.pretrained))

    # Main training loop
    for epoch in range(args.epochs):
        # Training
        [train_error_global, train_error_local,
         train_loss] = trainModel(train_loader,
                                  model,
                                  normalizer_global,
                                  normalizer_local,
                                  epoch=epoch)
        # Validation
        [val_error_global, val_error_local,
         val_loss] = trainModel(val_loader,
                                model,
                                normalizer_global,
                                normalizer_local,
                                epoch=epoch,
                                evaluation=True)

        # check for error overflow
        if (val_error_global != val_error_global) or (val_error_local !=
                                                      val_error_local):
            print('Exit due to NaN')
            sys.exit(1)

        # remember the best error and possibly save checkpoint
        is_best = val_error_global < best_error_global
        best_error_global = min(val_error_global, best_error_global)
        best_error_local = val_error_local

        # save best model
        if args.save_checkpoints:
            model.module.save(
                {
                    'epoch': epoch,
                    'state_dict': model.module.state_dict(),
                    'best_error_global': best_error_global,
                    'best_error_local': best_error_local,
                    'optimizer': model.module.optimizer.state_dict(),
                    'normalizer_global': normalizer_global.state_dict(),
                    'normalizer_local': normalizer_local.state_dict(),
                    'args': vars(args)
                }, is_best, savepath)

    # test best model using saved checkpoints
    if args.save_checkpoints and len(test_loader):
        print('---------Evaluate Model on Test Set---------------')
        # this try/except allows the code to test on the go or by defining a pretrained path separately
        try:
            best_checkpoint = torch.load(savepath + 'model_best.pth.tar')
        except Exception as e:
            best_checkpoint = torch.load(args.pretrained)

        model.module.load_state_dict(best_checkpoint['state_dict'])
        [test_error_global, test_error_local,
         test_loss] = trainModel(test_loader,
                                 model,
                                 normalizer_global,
                                 normalizer_local,
                                 testing=True)
示例#17
0
class BaseCartPoleEnvironment(Environment):
    @save_args
    def __init__(self,
                 force_factor=5,
                 initial_theta=0.0001,
                 max_offset=3,
                 max_angle=0.25):
        self.norm_x = Normalizer(-max_offset, max_offset)
        self.norm_xdot = Normalizer(-10, 10)
        self.norm_theta = Normalizer(-max_angle, max_angle)
        self.norm_thetadot = Normalizer(-10, 10)
        self.reset()

    def reset(self, agent_ids=None):
        self.pendulum = PendulumDynamics(0, 0, self.initial_theta, 0)

    @property
    def number_of_agents(self):
        return 1

    @property
    def state_size(self):
        return 4

    @property
    def action_size(self):
        raise NotImplementedError()

    @property
    def state(self):
        return (
            self.norm_x(self.pendulum.x),
            self.norm_xdot(self.pendulum.xdot),
            self.norm_theta(self.pendulum.theta),
            self.norm_thetadot(self.pendulum.thetadot),
        )

    def denormalize_state(self, state):
        x, xdot, theta, thetadot = state
        return (
            self.norm_x.denormalize(x),
            self.norm_xdot.denormalize(xdot),
            self.norm_theta.denormalize(theta),
            self.norm_thetadot.denormalize(thetadot),
        )

    @property
    def is_terminal(self):
        return (not self.norm_x.is_inside(self.pendulum.x)
                or not self.norm_xdot.is_inside(self.pendulum.xdot)
                or not self.norm_theta.is_inside(self.pendulum.theta)
                or not self.norm_thetadot.is_inside(self.pendulum.thetadot))

    @staticmethod
    def _get_force(action):
        raise NotImplementedError()

    def apply_action(self, agent, action):
        act = self._get_force(action)
        self.pendulum.step_simulate(self.force_factor * act)
        return self.is_terminal, self.state
示例#18
0
class PPOAgent():
    def __init__(self, args, env_params):
        self.o_dim = env_params['o_dim']
        self.a_dim = env_params['a_dim']
        self.r_dim = args.r_dim

        self.lr = args.lr
        self.gamma_e = args.gamma_e
        self.gamma_i = args.gamma_i
        self.lamda = args.lamda
        self.entropy_coef = args.entropy_coef
        self.ex_coef = args.ex_coef
        self.in_coef = args.in_coef
        self.clip_eps = args.clip_eps
        self.update_epoch = args.update_epoch
        self.batch_size = args.batch_size
        self.initialize_episode = args.initialize_episode
        self.update_proportion = args.update_proportion
        self.rollout_len = args.rollout_len
        self.obs_clip = args.obs_clip

        self.device = torch.device(args.device)

        self.actor_critic = CNNActorCritic(in_channel=self.o_dim[0],
                                           a_dim=self.a_dim).to(self.device)
        self.RND = RNDNetwork(in_channel=1).to(self.device)

        self.optimizer = optim.Adam(list(self.actor_critic.parameters()) +
                                    list(self.RND.predictor.parameters()),
                                    lr=self.lr)

        self.buffer = Buffer(capacity=self.rollout_len, o_dim=self.o_dim)

        self.normalizer_obs = Normalizer(shape=self.o_dim, clip=self.obs_clip)
        self.normalizer_ri = Normalizer(shape=1, clip=np.inf)

    def choose_action(self, obs):
        obs = torch.from_numpy(obs).float().to(self.device) / 255.
        with torch.no_grad():
            action_logits = self.actor_critic.act(obs)

        dist = Categorical(action_logits)
        action = dist.sample()
        log_prob = dist.log_prob(action)

        action, log_prob = action.cpu().detach().numpy(), log_prob.cpu(
        ).detach().numpy()
        return action, log_prob

    def compute_intrinsic_reward(self, obs_):
        obs_ = self.normalizer_obs.normalize(obs_)
        obs_ = torch.from_numpy(obs_[:, 3:, :, :]).float().to(self.device)
        with torch.no_grad():
            pred_feature, tar_feature = self.RND(obs_)
        reward_in = F.mse_loss(pred_feature, tar_feature,
                               reduction='none').mean(dim=-1)
        reward_in = reward_in.cpu().detach().numpy()
        return reward_in

    def GAE_caculate(self, rewards, masks, values, gamma, lamda):
        returns = np.zeros(shape=len(rewards), dtype=np.float32)
        deltas = np.zeros(shape=len(rewards), dtype=np.float32)
        advantages = np.zeros(shape=len(rewards), dtype=np.float32)

        pre_return = 0.
        pre_advantage = 0.
        pre_value = 0.
        for i in reversed(range(len(rewards))):
            returns[i] = rewards[i] + masks[i] * gamma * pre_return
            deltas[i] = rewards[i] + masks[i] * gamma * pre_value - values[i]
            advantages[i] = deltas[i] + gamma * lamda * pre_advantage

            pre_return = returns[i]
            pre_value = values[i]
            pre_advantage = advantages[i]

        return returns, deltas, advantages

    def update(self, o, a, r_i, r_e, mask, o_, log_prob):
        self.normalizer_obs.update(o_.reshape(-1, 4, 84, 84).copy())
        self.normalizer_ri.update(r_i.reshape(-1).copy())

        r_i = self.normalizer_ri.normalize(r_i)
        o_ = self.normalizer_obs.normalize(o_)
        o = torch.from_numpy(o).to(self.device).float() / 255.

        returns_ex = np.zeros_like(r_e)
        returns_in = np.zeros_like(r_e)
        advantage_ex = np.zeros_like(r_e)
        advantage_in = np.zeros_like(r_e)
        for i in range(r_e.shape[0]):
            action_logits, value_ex, value_in = self.actor_critic(o[i])
            value_ex, value_in = value_ex.cpu().detach().numpy(), value_in.cpu(
            ).detach().numpy()
            returns_ex[i], _, advantage_ex[i] = self.GAE_caculate(
                r_e[i], mask[i], value_ex, self.gamma_e, self.lamda)  #episodic
            returns_in[i], _, advantage_in[i] = self.GAE_caculate(
                r_i[i], np.ones_like(mask[i]), value_in, self.gamma_i,
                self.lamda)  #non_episodic

        o = o.reshape((-1, 4, 84, 84))
        a = np.reshape(a, -1)
        o_ = np.reshape(o_[:, :, 3, :, :], (-1, 1, 84, 84))
        log_prob = np.reshape(log_prob, -1)
        returns_ex = np.reshape(returns_ex, -1)
        returns_in = np.reshape(returns_in, -1)
        advantage_ex = np.reshape(advantage_ex, -1)
        advantage_in = np.reshape(advantage_in, -1)

        a = torch.from_numpy(a).float().to(self.device)
        o_ = torch.from_numpy(o_).float().to(self.device).float()
        log_prob = torch.from_numpy(log_prob).float().to(self.device)
        returns_ex = torch.from_numpy(returns_ex).float().to(
            self.device).unsqueeze(dim=1)
        returns_in = torch.from_numpy(returns_in).float().to(
            self.device).unsqueeze(dim=1)
        advantage_ex = torch.from_numpy(advantage_ex).float().to(self.device)
        advantage_in = torch.from_numpy(advantage_in).float().to(self.device)

        sample_range = list(range(len(o)))

        for i_update in range(self.update_epoch):
            np.random.shuffle(sample_range)
            for j in range(int(len(o) / self.batch_size)):
                idx = sample_range[self.batch_size * j:self.batch_size *
                                   (j + 1)]
                #update RND
                pred_RND, tar_RND = self.RND(o_[idx])
                loss_RND = F.mse_loss(pred_RND,
                                      tar_RND.detach(),
                                      reduction='none').mean(-1)
                mask = torch.randn(len(loss_RND)).to(self.device)
                mask = (mask < self.update_proportion).type(
                    torch.FloatTensor).to(self.device)
                loss_RND = (loss_RND * mask).sum() / torch.max(
                    mask.sum(),
                    torch.Tensor([1]).to(self.device))

                #update actor-critic
                action_logits, value_ex, value_in = self.actor_critic(o[idx])
                advantage = self.ex_coef * advantage_ex[
                    idx] + self.in_coef * advantage_in[idx]
                dist = Categorical(action_logits)
                new_log_prob = dist.log_prob(a[idx])

                ratio = torch.exp(new_log_prob - log_prob[idx])
                surr1 = ratio * advantage
                surr2 = torch.clamp(ratio, 1 - self.clip_eps,
                                    1 + self.clip_eps) * advantage
                loss_actor = torch.min(
                    surr1,
                    surr2).mean() - self.entropy_coef * dist.entropy().mean()
                loss_critic = F.mse_loss(value_ex,
                                         returns_ex[idx]) + F.mse_loss(
                                             value_in, returns_in[idx])

                loss_ac = loss_actor + 0.5 * loss_critic

                loss = loss_RND + loss_ac
                self.optimizer.zero_grad()
                loss.backward()
                global_grad_norm_(
                    list(self.actor_critic.parameters()) +
                    list(self.RND.predictor.parameters()))
                self.optimizer.step()

        return loss_RND.cpu().detach().numpy(), loss_actor.cpu().detach(
        ).numpy(), loss_critic.cpu().detach().numpy()

    def save_model(self, remark):
        if not os.path.exists('pretrained_models_PPO/'):
            os.mkdir('pretrained_models_PPO/')
        path = 'pretrained_models_PPO/{}.pt'.format(remark)
        print('Saving model to {}'.format(path))
        torch.save(self.actor_critic.state_dict(), path)

    def load_model(self, load_model_remark):
        print('Loading models with remark {}'.format(load_model_remark))
        model = torch.load(
            'pretrained_models_PPO/{}.pt'.format(load_model_remark),
            map_location=lambda storage, loc: storage)
        self.actor_critic.load_state_dict(model)
示例#19
0
        # finish, print:
        print('episode', episode, 'reward_evaluation', reward_evaluation)
        fitness.append(reward_evaluation)
    return fitness


if __name__ == '__main__':
    hp = Hp()

    work_dir = mkdir('exp', 'brs')
    monitor_dir = mkdir(work_dir, 'monitor')
    env = gym.make(hp.env_name)

    env.seed(hp.seed)
    torch.manual_seed(hp.seed)
    env = wrappers.Monitor(env, monitor_dir, force=True)

    num_inputs = env.observation_space.shape[0]
    num_outputs = env.action_space.shape[0]

    policy = nn.Linear(num_inputs, num_outputs, bias=True)
    policy.weight.data.fill_(0)
    policy.bias.data.fill_(0)

    pso = PSO(policy, hp.lr, hp.std, hp.b, hp.n_directions)
    normalizer = Normalizer(num_inputs)
    fitness = train(env, pso, normalizer, hp)
    import matplotlib.pyplot as plt
    plt.plot(fitness)
    plt.show()
示例#20
0
def train():
    normalizer = Normalizer(0, 499)
    sae = StateAutoEncoder(1,
                           1,
                           num_state_bits,
                           normalize=True,
                           normalizer=normalizer)
    sae.use_checkpoints(encoder_path)

    train_env, _ = load_env(env_name, sae)

    master_action_spec = array_spec.BoundedArraySpec(shape=((num_options, )),
                                                     dtype=np.float32,
                                                     minimum=0,
                                                     maximum=1,
                                                     name='master_action')

    options_observation_spec = array_spec.BoundedArraySpec(
        shape=((num_options + num_state_bits), ),
        dtype=np.float32,
        minimum=0,
        maximum=1,
        name='option_observation')
    options_action_spec = array_spec.BoundedArraySpec(shape=(num_state_bits,
                                                             2),
                                                      dtype=np.float32,
                                                      minimum=0,
                                                      maximum=1,
                                                      name='option_action')
    options_time_step_spec = ts.TimeStep(
        step_type=train_env.time_step_spec().step_type,
        reward=train_env.time_step_spec().reward,
        discount=train_env.time_step_spec().discount,
        observation=options_observation_spec)

    num_actions = train_env.action_spec().maximum - train_env.action_spec(
    ).minimum + 1
    low_level_model, callbacks = setup_model(num_actions, num_state_bits, sae,
                                             low_level_model_path)

    low_level_env = LowLevelEnv(train_env, low_level_model)

    options_env = OptionsEnv(low_level_env, options_observation_spec,
                             options_action_spec)
    option_train_env = tf_py_environment.TFPyEnvironment(options_env)

    master_env = MasterEnv(low_level_env, master_action_spec)
    master_train_env = tf_py_environment.TFPyEnvironment(master_env)

    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    global_step = tf.compat.v1.train.get_or_create_global_step()

    master_value_network = value_network.ValueNetwork(
        master_train_env.time_step_spec().observation, fc_layer_params=(100, ))

    master_actor_network = actor_distribution_network.ActorDistributionNetwork(
        master_train_env.time_step_spec().observation,
        master_train_env.action_spec(),
        fc_layer_params=(100, ))

    master_agent = ppo_agent.PPOAgent(master_train_env.time_step_spec(),
                                      master_train_env.action_spec(),
                                      optimizer=optimizer,
                                      actor_net=master_actor_network,
                                      value_net=master_value_network,
                                      train_step_counter=tf.Variable(0))
    master_agent.initialize()
    master_agent.train = common.function(master_agent.train)
    options_env.set_master_policy(master_agent.policy)

    options_critic_net = critic_network.CriticNetwork(
        (option_train_env.observation_spec(), option_train_env.action_spec()),
        observation_fc_layer_params=None,
        action_fc_layer_params=None,
        joint_fc_layer_params=(100, ),
        kernel_initializer='glorot_uniform',
        last_kernel_initializer='glorot_uniform')

    options_actor_net = OptionsNetwork(option_train_env.observation_spec(),
                                       option_train_env.action_spec(), 4)

    options_agent = sac_agent.SacAgent(
        option_train_env.time_step_spec(),
        option_train_env.action_spec(),
        actor_network=options_actor_net,
        critic_network=options_critic_net,
        actor_optimizer=tf.compat.v1.train.AdamOptimizer(
            learning_rate=learning_rate),
        critic_optimizer=tf.compat.v1.train.AdamOptimizer(
            learning_rate=learning_rate),
        alpha_optimizer=tf.compat.v1.train.AdamOptimizer(
            learning_rate=learning_rate),
        target_update_tau=target_update_tau,
        target_update_period=target_update_period,
        td_errors_loss_fn=tf.math.squared_difference,
        gamma=gamma,
        reward_scale_factor=reward_scale_factor,
        train_step_counter=tf.Variable(0))
    options_agent.initialize()
    options_agent.train = common.function(options_agent.train)
    master_env.set_options_policy(options_agent.policy)

    master_rb = create_replay_buffer(master_agent, batch_size,
                                     replay_buffer_max_length)
    options_rb = create_replay_buffer(options_agent, batch_size,
                                      replay_buffer_max_length)

    master_ds = master_rb.as_dataset(num_parallel_calls=3,
                                     sample_batch_size=batch_size,
                                     num_steps=2)
    master_iter = iter(master_ds)
    options_ds = options_rb.as_dataset(num_parallel_calls=3,
                                       sample_batch_size=batch_size,
                                       num_steps=2)
    options_iter = iter(options_ds)

    master_checkpointer = create_train_checkpointer(checkpoint_dir + "master/",
                                                    master_agent, master_rb,
                                                    global_step)
    options_checkpointer = create_train_checkpointer(
        checkpoint_dir + "options/", options_agent, options_rb, global_step)

    master_saver = policy_saver.PolicySaver(master_agent.policy)
    options_saver = policy_saver.PolicySaver(options_agent.policy)

    def check_interval(interval):
        return global_step % interval == 0

    while (global_step < num_iterations):
        populate_buffer(master_train_env, master_rb,
                        master_agent.collect_policy,
                        master_agent.time_step_spec, master_collect_steps,
                        batch_size)
        for _ in range(warmup_period):
            experience, unused_info = next(master_iter)
            master_loss = master_agent.train(experience)

        for _ in range(joint_update_period):
            populate_buffer(master_train_env, master_rb,
                            master_agent.collect_policy,
                            master_agent.time_step_spec, 2, batch_size)
            populate_buffer(option_train_env, options_rb,
                            options_agent.collect_policy,
                            options_agent.time_step_spec, 2, batch_size)
            option_exp, unused_info = next(options_iter)
            options_loss = options_agent.train(option_exp)
            master_exp, unused_info = next(master_iter)
            master_loss = master_agent.train(master_exp)

        global_step.assign_add(1)

        if check_interval(log_interval):
            print('step = {0}: master loss = {1}, options loss = {2}'.format(
                global_step.value, master_loss, options_loss))

        if check_interval(checkpoint_interval):
            master_checkpointer.save(global_step)
            options_checkpointer.save(global_step)
            print('Checkpoint saved!')

        # Reset master here

    master_saver.save(save_dir + "master/")
    options_saver.save(save_dir + "options/")
    print("Policies Saved!")
            # 图片,添加的文字,左上角坐标,字体,字体大小,颜色,字体粗细
            font = cv2.FONT_HERSHEY_SIMPLEX
            image = cv2.putText(image, str(int(c)), (x1, y1), font, 0.5, (0,255,0), 1)
        else:
            break

    image = image.get()
    print(image.shape)
    plt.figure() 
    image = image[:,:,[2,1,0]]
    plt.imshow(image)
    plt.show()


if __name__ == '__main__':
    transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()]) 
    dataset = CocoDataset('./data/coco/', 'train2017', transform)
    dataset_size = len(dataset)
    print(dataset_size)

    data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \
                                  collate_fn=collater, pin_memory=True)

    for epoch_num in range(2):
        for iter_num, data in enumerate(data_loader):
            print(
                'epoch:', epoch_num, 
                'iter_num:', iter_num
            )
            print('image:', data['img'].size())
            print('annot:', data['annot'].size())
示例#22
0
    def __init__(self, params):
        """Implementation of DDPG agent with Hindsight Experience Replay (HER) sampler.

        @param params: dict containing all necessary parameters:
        dims, buffer_size, tau (= 1-polyak), batch_size, lr_critic, lr_actor, norm_eps, norm_clip, clip_obs,
        clip_action, T (episode length), num_workers, clip_return, sample_her_transitions, gamma, replay_strategy
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.input_dims = params['dims']
        self.buffer_size = params['buffer_size']
        self.tau = params['tau']
        self.batch_size = params['batch_size']
        self.critic_lr = params['lr_critic']
        self.actor_lr = params['lr_actor']
        self.norm_eps = params['norm_eps']
        self.norm_clip = params['norm_clip']
        self.clip_obs = params['clip_obs']
        self.clip_action = params['clip_action']

        self.T = params['T']
        self.rollout_batch_size = params['num_workers']
        self.clip_return = params['clip_return']
        self.sample_transitions = params['sample_her_transitions']
        self.gamma = params['gamma']
        self.replay_strategy = params['replay_strategy']

        self.dimo = self.input_dims['o']
        self.dimg = self.input_dims['g']
        self.dimu = self.input_dims['u']

        stage_shapes = OrderedDict()
        for key in sorted(self.input_dims.keys()):
            if key.startswith('info_'):
                continue
            stage_shapes[key] = (None, self.input_dims[key])
        stage_shapes['o_2'] = stage_shapes['o']
        stage_shapes['r'] = (None,)
        self.stage_shapes = stage_shapes

        # normalizer
        self.obs_normalizer = Normalizer(size=self.dimo, eps=self.norm_eps, clip_range=self.norm_clip)
        self.goal_normalizer = Normalizer(size=self.dimg, eps=self.norm_eps, clip_range=self.norm_clip)

        # networks
        self.actor_local = Actor(self.input_dims).to(self.device)
        self.critic_local = Critic(self.input_dims).to(self.device)
        self.actor_target = copy.deepcopy(self.actor_local)
        self.critic_target = copy.deepcopy(self.critic_local)

        # optimizers
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.actor_lr)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.critic_lr)

        # Configuring the replay buffer
        buffer_shapes = {key: (self.T-1 if key != 'o' else self.T, self.input_dims[key])
                         for key, val in self.input_dims.items()}
        buffer_shapes['g'] = (buffer_shapes['g'][0], self.dimg)
        buffer_shapes['ag'] = (self.T, self.dimg)
        buffer_size = (self.buffer_size // self.rollout_batch_size) * self.rollout_batch_size

        self.buffer = ReplayBuffer(buffer_shapes, buffer_size, self.T, self.sample_transitions)
示例#23
0
 def preprocess(self, train_texts):
     normalizer = Normalizer.Normalizer()
     return normalizer.clean_text(train_texts)
示例#24
0
class DDPG_Agent():
    def __init__(self, args, env_params):
        self.s_dim = env_params['o_dim'] + env_params['g_dim']
        self.a_dim = env_params['a_dim']
        self.f_dim = args.f_dim
        self.action_bound = env_params['action_max']
        self.max_timestep = env_params['max_timestep']
        self.max_episode = args.max_episode
        self.evaluate_episode = args.evaluate_episode
        self.evaluate_interval = args.evaluate_interval
        self.log_interval = args.log_interval
        self.save_model_interval = args.save_model_interval
        self.save_model_start = args.save_model_start

        self.lr = args.lr
        self.lr_model = args.lr_model
        self.gamma = args.gamma
        self.batch_size = args.batch_size
        self.tau = args.tau
        self.eta = args.eta
        self.noise_eps = args.noise_eps
        self.device = torch.device(args.device)

        self.normalizer_s = Normalizer(size=self.s_dim,
                                       eps=1e-2,
                                       clip_range=1.)

        self.memory = Memory(size=args.memory_size,
                             s_dim=self.s_dim,
                             a_dim=self.a_dim)

        self.policy = Policy(s_dim=self.s_dim,
                             a_dim=self.a_dim).to(self.device)
        self.policy_target = Policy(s_dim=self.s_dim,
                                    a_dim=self.a_dim).to(self.device)
        self.Q = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device)
        self.Q_target = QFunction(s_dim=self.s_dim,
                                  a_dim=self.a_dim).to(self.device)

        self.optimizer_p = optim.Adam(self.policy.parameters(), lr=self.lr)
        self.optimizer_q = optim.Adam(self.Q.parameters(), lr=self.lr)

        self.encoder = StateEncoder(s_dim=self.s_dim,
                                    f_dim=self.f_dim).to(self.device)
        self.EnvForward = ForwardModel(f_dim=self.f_dim,
                                       a_dim=self.a_dim).to(self.device)
        self.EnvInverse = InverseModel(f_dim=self.f_dim,
                                       a_dim=self.a_dim).to(self.device)

        self.optimizer_forward = optim.Adam(
            [{
                'params': self.EnvForward.parameters()
            }, {
                'params': self.encoder.parameters()
            }],
            lr=self.lr_model)
        self.optimizer_inverse = optim.Adam(
            [{
                'params': self.EnvInverse.parameters()
            }, {
                'params': self.encoder.parameters()
            }],
            lr=self.lr_model)

        self.hard_update()

        self.update_num = 0

    def select_action(self, state, train_mode=True):
        s = self.normalize_input(state)
        s = torch.tensor(state, dtype=torch.float32).to(self.device)
        with torch.no_grad():
            action = self.policy(s).cpu().numpy()

        if train_mode:
            action += np.random.randn(
                self.a_dim
            ) * self.noise_eps * self.action_bound  #Gaussian Noise
        else:
            pass

        action = np.clip(action,
                         a_min=-self.action_bound,
                         a_max=self.action_bound)
        return action

    def get_intrisic_reward(self, s, a, s_):
        s, a, s_ = torch.from_numpy(s).to(
            self.device).float(), torch.from_numpy(a).to(
                self.device).float(), torch.from_numpy(s_).to(
                    self.device).float()
        with torch.no_grad():
            feature = self.encoder(s)
            next_feature_pred = self.EnvForward(feature, a)
            next_feature = self.encoder(s_)
        r_i = self.eta * torch.norm(next_feature_pred - next_feature)
        r_i = torch.clamp(r_i, min=-0.1, max=0.1)
        return r_i.cpu().detach().numpy()

    def train(self, env, logger=None):
        total_step = 0
        loss_pi, loss_q, loss_forward, loss_inverse = 0., 0., 0., 0.
        for i_episode in range(self.max_episode):
            obs = env.reset()
            s = get_state(obs)

            cumulative_r = 0.
            for i_step in range(self.max_timestep):
                a = self.select_action(s)
                obs_, r_e, done, info = env.step(a)
                s_ = get_state(obs_)

                r_i = self.get_intrisic_reward(s, a, s_)
                r = r_e + r_i

                self.memory.store(s, a, r, s_)
                s = s_

                if len(self.memory) > self.batch_size:
                    loss_pi, loss_q, loss_forward, loss_inverse = self.learn()
                cumulative_r += r_e
                total_step += 1

            print(
                'i_episode: {} total step: {} cumulative reward: {:.4f} is_success: {} '
                .format(i_episode, total_step, cumulative_r,
                        info['is_success']))
            if logger is not None and i_episode % self.log_interval == 0:
                logger.add_scalar('Indicator/cumulative reward', cumulative_r,
                                  i_episode)
                logger.add_scalar('Loss/pi_loss', loss_pi, i_episode)
                logger.add_scalar('Loss/q_loss', loss_q, i_episode)
                logger.add_scalar('Loss/forward_loss', loss_forward, i_episode)
                logger.add_scalar('Loss/inverse_loss', loss_inverse, i_episode)
            if i_episode % self.evaluate_interval == 0:
                success_rate = self.evaluate(env)
                if logger is not None:
                    logger.add_scalar('Indicator/success rate', success_rate,
                                      i_episode)

            if i_episode > self.save_model_start and i_episode % self.save_model_interval == 0:
                self.save_model(remarks='{}_{}'.format(env.spec.id, i_episode))

    def evaluate(self, env, render=False):
        success_count = 0
        for i_episode in range(self.evaluate_episode):
            obs = env.reset()
            s = get_state(obs)
            for i_step in range(self.max_timestep):
                if render:
                    env.render()
                a = self.select_action(s, train_mode=False)
                obs_, r_e, done, info = env.step(a)
                s_ = get_state(obs_)
                s = s_
            success_count += info['is_success']

        return success_count / self.evaluate_episode

    def learn(self):
        s, a, r, s_ = self.memory.sample_batch(batch_size=self.batch_size)
        self.normalizer_s.update(s)

        s, s_ = self.normalize_input(s, s_)
        s = torch.from_numpy(s).to(self.device)
        a = torch.from_numpy(a).to(self.device)
        r = torch.from_numpy(r).to(self.device).unsqueeze(dim=1)
        s_ = torch.from_numpy(s_).to(self.device)

        #update policy and Q
        with torch.no_grad():
            a_next_tar = self.policy_target(s_)
            Q_next_tar = self.Q_target(s_, a_next_tar)
            loss_q_tar = r + self.gamma * Q_next_tar
        loss_q_pred = self.Q(s, a)
        loss_q = F.mse_loss(loss_q_pred, loss_q_tar.detach())
        self.optimizer_q.zero_grad()
        loss_q.backward()
        self.optimizer_q.step()

        loss_p = -self.Q(s, self.policy(s)).mean()
        self.optimizer_p.zero_grad()
        loss_p.backward()
        self.optimizer_p.step()

        self.soft_update()

        #update env model and encoder
        feature = self.encoder(s)
        next_feature = self.encoder(s_)
        a_pred = self.EnvInverse(feature, next_feature)
        loss_inverse = F.mse_loss(a_pred, a)

        next_feature_pred = self.EnvForward(feature, a)
        with torch.no_grad():
            next_feature_tar = self.encoder(s_)
        loss_forward = F.mse_loss(next_feature_pred, next_feature_tar.detach())

        self.optimizer_forward.zero_grad()
        self.optimizer_inverse.zero_grad()
        loss_forward.backward(retain_graph=True)
        loss_inverse.backward()
        self.optimizer_forward.step()
        self.optimizer_inverse.step()

        self.update_num += 1
        return loss_p.cpu().detach().numpy(), loss_q.cpu().detach().numpy(
        ), loss_forward.cpu().detach().numpy(), loss_inverse.cpu().detach(
        ).numpy()

    def update_normalizer(self, states):
        states = np.array(states, dtype=np.float32)
        self.normalizer_s.update(states)

    def hard_update(self):
        self.policy_target.load_state_dict(self.policy.state_dict())
        self.Q_target.load_state_dict(self.Q.state_dict())

    def soft_update(self):
        for param, param_target in zip(self.policy.parameters(),
                                       self.policy_target.parameters()):
            param_target.data.copy_(param.data * self.tau + param_target.data *
                                    (1 - self.tau))
        for param, param_target in zip(self.Q.parameters(),
                                       self.Q_target.parameters()):
            param_target.data.copy_(param.data * self.tau + param_target.data *
                                    (1 - self.tau))

    def normalize_input(self, s, s_=None):
        s = self.normalizer_s.normalize(s)
        if s_ is not None:
            s_ = self.normalizer_s.normalize(s_)
            return s, s_
        else:
            return s

    def save_model(self, remarks):
        if not os.path.exists('pretrained_models_DDPG/'):
            os.mkdir('pretrained_models_DDPG/')
        path = 'pretrained_models_DDPG/{}.pt'.format(remarks)
        print('Saving model to {}'.format(path))
        torch.save([
            self.normalizer_s.mean, self.normalizer_s.std,
            self.policy.state_dict()
        ], path)

    def load_model(self, remark):
        print('Loading models with remark {}'.format(remark))
        self.normalizer_s.mean, self.normalizer_s.std, policy_model = torch.load(
            'pretrained_models_DDPG/{}.pt'.format(remark),
            map_location=lambda storage, loc: storage)
        self.policy.load_state_dict(policy_model)
示例#25
0
        filepath=checkpoint_file,
        save_weights_only=True
    )

    if (len(os.listdir(checkpoint_dir)) > 0):
        print("Restoring from", checkpoint_file)
        low_level_action_model.load_weights(checkpoint_file)

    return low_level_action_model, [
        checkpoint_callback,
        low_level_action_model.gumbel_callback
    ]


if __name__ == "__main__":
    normalizer = Normalizer(0, 499)
    sae = StateAutoEncoder(
        1, 1,
        num_state_bits, normalize=True,
        normalizer=normalizer
    )
    sae.use_checkpoints(sae_path)

    num_actions, train_ds = setup_env(
        env_name, num_collect_episodes, sae)

    low_level_action_model, callbacks = setup_model(
        num_actions,
        num_state_bits,
        sae,
        checkpoint_dir
示例#26
0
class ddpgAgent(object):
    def __init__(self, params):
        """Implementation of DDPG agent with Hindsight Experience Replay (HER) sampler.

        @param params: dict containing all necessary parameters:
        dims, buffer_size, tau (= 1-polyak), batch_size, lr_critic, lr_actor, norm_eps, norm_clip, clip_obs,
        clip_action, T (episode length), num_workers, clip_return, sample_her_transitions, gamma, replay_strategy
        """
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        self.input_dims = params['dims']
        self.buffer_size = params['buffer_size']
        self.tau = params['tau']
        self.batch_size = params['batch_size']
        self.critic_lr = params['lr_critic']
        self.actor_lr = params['lr_actor']
        self.norm_eps = params['norm_eps']
        self.norm_clip = params['norm_clip']
        self.clip_obs = params['clip_obs']
        self.clip_action = params['clip_action']

        self.T = params['T']
        self.rollout_batch_size = params['num_workers']
        self.clip_return = params['clip_return']
        self.sample_transitions = params['sample_her_transitions']
        self.gamma = params['gamma']
        self.replay_strategy = params['replay_strategy']

        self.dimo = self.input_dims['o']
        self.dimg = self.input_dims['g']
        self.dimu = self.input_dims['u']

        stage_shapes = OrderedDict()
        for key in sorted(self.input_dims.keys()):
            if key.startswith('info_'):
                continue
            stage_shapes[key] = (None, self.input_dims[key])
        stage_shapes['o_2'] = stage_shapes['o']
        stage_shapes['r'] = (None,)
        self.stage_shapes = stage_shapes

        # normalizer
        self.obs_normalizer = Normalizer(size=self.dimo, eps=self.norm_eps, clip_range=self.norm_clip)
        self.goal_normalizer = Normalizer(size=self.dimg, eps=self.norm_eps, clip_range=self.norm_clip)

        # networks
        self.actor_local = Actor(self.input_dims).to(self.device)
        self.critic_local = Critic(self.input_dims).to(self.device)
        self.actor_target = copy.deepcopy(self.actor_local)
        self.critic_target = copy.deepcopy(self.critic_local)

        # optimizers
        self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.actor_lr)
        self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.critic_lr)

        # Configuring the replay buffer
        buffer_shapes = {key: (self.T-1 if key != 'o' else self.T, self.input_dims[key])
                         for key, val in self.input_dims.items()}
        buffer_shapes['g'] = (buffer_shapes['g'][0], self.dimg)
        buffer_shapes['ag'] = (self.T, self.dimg)
        buffer_size = (self.buffer_size // self.rollout_batch_size) * self.rollout_batch_size

        self.buffer = ReplayBuffer(buffer_shapes, buffer_size, self.T, self.sample_transitions)


    def act(self, o, g, noise_eps=0., random_eps=0., testing=False):
        """Choose action from observations with probability 'random_eps' at random,
        else use actor output and add noise 'noise_eps'

        @param o: observation
        @param g: desired goal
        @param noise_eps: noise added to action
        @param random_eps: random action probability
        @param testing: (bool) set to 'True' if testing a single environment
        """

        obs = self.obs_normalizer.normalize(o)
        goals = self.goal_normalizer.normalize(g)

        obs = torch.tensor(obs).to(self.device)
        goals = torch.tensor(goals).to(self.device)

        # for testing single environment
        if testing:
            with torch.no_grad():
                action = self.actor_local(torch.cat([obs, goals], dim=0)).cpu().data.numpy()
            return action

        actions = self.actor_local(torch.cat([obs, goals], dim=1))

        noise = (noise_eps * np.random.randn(actions.shape[0], 4)).astype(np.float32)
        actions += torch.tensor(noise).to(self.device)

        eps_greedy_noise = np.random.binomial(1, random_eps, actions.shape[0]).reshape(-1, 1)

        random_action = torch.tensor(np.random.uniform(
            low=-1., high=1., size=(actions.shape[0], self.dimu)).astype(np.float32)).to(self.device)

        actions += torch.tensor(eps_greedy_noise.astype(np.float32)).to(self.device) * (
                    random_action - actions)  # eps-greedy

        actions = torch.clamp(actions, -self.clip_action, self.clip_action)

        return actions

    def store_episode(self, episode_batch):
        """Store episodes to replay buffer.

        @param episode_batch: array of batch_size x (T or T+1) x dim_key.
        Observation 'o' is of size T+1, others are of size T
        """
        self.buffer.store_episode(episode_batch)

        # add transitions to normalizer
        episode_batch['o_2'] = episode_batch['o'][:, 1:, :]
        episode_batch['ag_2'] = episode_batch['ag'][:, 1:, :]
        shape = episode_batch['u'].shape
        num_normalizing_transitions = shape[0] * shape[1]  # num_rollouts * (T - 1), steps every cycle
        transitions = self.sample_transitions(episode_batch, num_normalizing_transitions)

        self.obs_normalizer.update(transitions['o'])
        self.goal_normalizer.update(transitions['g'])

        self.obs_normalizer.recompute_stats()
        self.goal_normalizer.recompute_stats()

    def sample_batch(self):
        """Sample random transitions from replay buffer (which also contains HER samples).

        @return: transitions
        """
        transitions = self.buffer.sample(self.batch_size)
        return [transitions[key] for key in self.stage_shapes.keys()]

    def learn(self):
        """learning step i.e. optimizing the network.
        """
        batch = self.sample_batch()
        batch_dict = OrderedDict([(key, batch[i].astype(np.float32).copy())
                                  for i, key in enumerate(self.stage_shapes.keys())])
        batch_dict['r'] = np.reshape(batch_dict['r'], [-1, 1])

        # prepare state, action, reward, next state
        obs = torch.tensor(self.obs_normalizer.normalize(batch_dict['o'])).to(self.device)
        goal = torch.tensor(self.goal_normalizer.normalize(batch_dict['g'])).to(self.device)
        actions = torch.tensor(batch_dict['u']).to(self.device)
        rewards = torch.tensor(batch_dict['r'].astype(np.float32)).to(self.device)
        obs_2 = torch.tensor(self.obs_normalizer.normalize(batch_dict['o_2'])).to(self.device)

        # update critic --------------------------------------------------------------

        # compute predicted Q values
        next_actions = self.actor_target(torch.cat([obs_2, goal], dim=1))
        next_Q_targets = self.critic_target(torch.cat([obs_2, goal], dim=1), next_actions)

        # compute Q values for current states and clip them
        Q_targets = rewards + self.gamma * next_Q_targets          # Note: last experience of episode is not included
        Q_targets = torch.clamp(Q_targets, -self.clip_return, 0.)  # clipping

        # compute loss
        Q_expected = self.critic_local(torch.cat([obs, goal], dim=1), actions)
        critic_loss = F.mse_loss(Q_expected, Q_targets)

        # update weights critic
        self.critic_optimizer.zero_grad()
        critic_loss.backward()
        # torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1)
        self.critic_optimizer.step()

        # update actor -------------------------------------------------------------

        # compute loss
        pred_actions = self.actor_local(torch.cat([obs, goal], dim=1))
        actor_loss = -self.critic_local(torch.cat([obs, goal], dim=1), pred_actions).mean()
        actor_loss += (pred_actions ** 2).mean()  # minimize action moments

        # update weights actor
        self.actor_optimizer.zero_grad()
        actor_loss.backward()
        self.actor_optimizer.step()

    def soft_update_target_networks(self):
        """Soft update model parameters:
            θ_target = τ*θ_local + (1 - τ)*θ_target
        """
        # update critic net
        for target_param, local_param in zip(self.critic_target.parameters(), self.critic_local.parameters()):
            target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)
        # update actor net
        for target_param, local_param in zip(self.actor_target.parameters(), self.actor_local.parameters()):
            target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data)

    def save_checkpoint(self, path, name):
        """Save actor, critic networks and the stats for normalization to the path.

        @param path: path to store checkpoints
        @param name: (str) name of environment, for naming files
        """
        torch.save(self.actor_local.state_dict(), path + '/'+name+'_checkpoint_actor_her.pth')
        torch.save(self.critic_local.state_dict(), path + '/'+name+'_checkpoint_critic_her.pth')
        self.obs_normalizer.save_normalizer(path + '/'+name+'_obs_normalizer.pth')
        self.goal_normalizer.save_normalizer(path + '/'+name+'_goal_normalizer.pth')