def __init__(self, args, env_params): self.o_dim = env_params['o_dim'] self.a_dim = env_params['a_dim'] self.r_dim = args.r_dim self.lr = args.lr self.gamma_e = args.gamma_e self.gamma_i = args.gamma_i self.lamda = args.lamda self.entropy_coef = args.entropy_coef self.ex_coef = args.ex_coef self.in_coef = args.in_coef self.clip_eps = args.clip_eps self.update_epoch = args.update_epoch self.batch_size = args.batch_size self.initialize_episode = args.initialize_episode self.update_proportion = args.update_proportion self.rollout_len = args.rollout_len self.obs_clip = args.obs_clip self.device = torch.device(args.device) self.actor_critic = CNNActorCritic(in_channel=self.o_dim[0], a_dim=self.a_dim).to(self.device) self.RND = RNDNetwork(in_channel=1).to(self.device) self.optimizer = optim.Adam(list(self.actor_critic.parameters()) + list(self.RND.predictor.parameters()), lr=self.lr) self.buffer = Buffer(capacity=self.rollout_len, o_dim=self.o_dim) self.normalizer_obs = Normalizer(shape=self.o_dim, clip=self.obs_clip) self.normalizer_ri = Normalizer(shape=1, clip=np.inf)
def get_features(arch, size, pooling=True): print("## Starting extracting features...") if pooling: print("## Using pooling..") else: print("## Not using pooling..") # Declare the features extractor extractor = FeaturesExtractor(arch) normalizer = Normalizer() starting = time.time() results_features = dict() with open('./info/project-info.csv', 'r') as csvfile: f_csv = csv.reader(csvfile, delimiter=str(','), quotechar=str('|')) next(f_csv) for row in f_csv: tissue = row[1] dye = row[2] original_name = row[6] if tissue not in results_features: results_features[tissue] = dict() if dye not in results_features[tissue]: results_features[tissue][dye] = None patches = get_patches_from_landmarks(tissue, original_name, size=size) nb_of_landmarks = len(patches) for landmark_nb, (_, _, patch) in enumerate(patches): normalize = normalizer.get(tissue, dye) extractor.set_normalize(normalize) img = Image.fromarray(patch) features = extractor.get_features_from_img( img, size, pooling).cpu().numpy().astype(np.float32) if landmark_nb == 0: results_features[tissue][dye] = np.zeros( (nb_of_landmarks, features.shape[0]), dtype=np.float32) results_features[tissue][dye][landmark_nb] = features print(" Elapsed time : {}".format(time.time() - starting)) return results_features
def process(config, functions, thread_no, bpe): print('thread #%d start' % (thread_no)) thread_start = time.time() index = 0 token_tokenized = '' stmt_tokenized = '' bpe_tokenized = '' ori_untokenized = '' preprocessor = PreProcessor() special_cutter = SpecialCharCutter(config) brace_cutter = BracesCutter() normalizer = Normalizer(config) extractor = RelationExtractor(config) bcb_base = config.get('IO', 'BCB_CODE_BASE') for info_str in functions: index += 1 infos = info_str.split(',') file_path = os.path.join(bcb_base, os.path.join(infos[0], os.path.join(infos[1], infos[2]))) start_loc = infos[3] end_loc = infos[4] with open(file_path, 'r', encoding='iso8859-1') as reader: j = 1 f = '' for line in reader.readlines(): if int(start_loc) <= j <= int(end_loc): f += line.strip() + '\n' j += 1 if j > int(end_loc): break f = preprocessor.remove_comments(f) f = extract_function_body(f) f4ori = f f = normalizer.normalize_literal_values(f) f = special_cutter.cut(f) f = brace_cutter.cut(f) _, _, function_bpe, _, bpe_node_list, _ = extractor.extract(f) stmt_tokenized += function_bpe + '\nc -1\nh -1\n' token_tokenized += re.sub(r'\$\$', ' ', function_bpe) + '\nc -1\nh -1\n' function_bpe = bpe.process_line(function_bpe) bpe_tokenized += re.sub(r'@@', ' ', function_bpe) + '\nc -1\nh -1\n' extractor.reset() ori_untokenized += info_str.strip() + '\n\n' token_tokenized += info_str.strip() + '\n\n' stmt_tokenized += info_str.strip() + '\n\n' bpe_tokenized += info_str.strip() + '\n\n' if index % 100 == 0: print('thread #%d progress %d / %d = %.2f' % (thread_no, index, len(functions), index / len(functions))) thread_end = time.time() print('thread #%d end in %.2f ' % (thread_no, (thread_end - thread_start))) return (ori_untokenized, token_tokenized, stmt_tokenized, bpe_tokenized)
def readLangs(self, reverse=False): print("Reading lines...") # Read the file and split into lines lines = open(self.file, encoding='utf-8').read().strip().split('\n') # Split every line into pairs and normalize pairs = [[ Norm.ch_normalizeString(l.split('\t')[0]), Norm.ch_normalizeString(l.split('\t')[1]) ] for l in lines] # Reverse pairs, make Lang instances if reverse: pairs = [list(reversed(p)) for p in pairs] return pairs
def main(): global args, model_args, best_mae_error # load data dataset = CIFData(args.cifpath) collate_fn = collate_pool test_loader = DataLoader(dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.workers, collate_fn=collate_fn, pin_memory=args.cuda) # build model structures, _, _ = dataset[0] orig_atom_fea_len = structures[0].shape[-1] nbr_fea_len = structures[1].shape[-1] model = CrystalGraphConvNet( orig_atom_fea_len, nbr_fea_len, atom_fea_len=model_args.atom_fea_len, n_conv=model_args.n_conv, h_fea_len=model_args.h_fea_len, n_h=model_args.n_h, classification=True if model_args.task == 'classification' else False) if args.cuda: model.cuda() # define loss func and optimizer if model_args.task == 'classification': criterion = nn.NLLLoss() else: criterion = nn.MSELoss() normalizer = Normalizer(torch.zeros(3)) # optionally resume from a checkpoint if os.path.isfile(args.modelpath): print("=> loading model '{}'".format(args.modelpath)) checkpoint = torch.load(args.modelpath, map_location=lambda storage, loc: storage) model.load_state_dict(checkpoint['state_dict']) normalizer.load_state_dict(checkpoint['normalizer']) print("=> loaded model '{}' (epoch {}, validation {})".format( args.modelpath, checkpoint['epoch'], checkpoint['best_mae_error'])) else: print("=> no model found at '{}'".format(args.modelpath)) validate(test_loader, model, criterion, normalizer, test=True)
class AdaptiveTransformation(): def __init__(self): self.normalizer = Normalizer() def transform(self, tissue, dye): return transforms.Compose( [transforms.ToTensor(), self.normalizer.get(tissue, dye)])
def __init__(self, force_factor=5, initial_theta=0.0001, max_offset=3, max_angle=0.25): self.norm_x = Normalizer(-max_offset, max_offset) self.norm_xdot = Normalizer(-10, 10) self.norm_theta = Normalizer(-max_angle, max_angle) self.norm_thetadot = Normalizer(-10, 10) self.reset()
def load_env(env_name, encoder_path): normalizer = Normalizer(0, 499) sae = StateAutoEncoder(1, 1, 12, normalize=True, normalizer=normalizer) sae.use_checkpoints(encoder_path) train_py_env = StateEncoder(suite_gym.load(env_name), sae) eval_py_env = StateEncoder(suite_gym.load(env_name), sae) train_env = tf_py_environment.TFPyEnvironment(train_py_env) eval_env = tf_py_environment.TFPyEnvironment(eval_py_env) return (train_env, eval_env)
evaluate = False test = True evaluate_times = 10 if_train_have_answer = True if_eval_have_answer = True if (os.path.exists(save_path)): model.load_state_dict(pt.load(save_path)) #todo:need to confirm whehter the session contains the answer if (train): for i in range(disConfig.num_epoch): for j, item in enumerate(mydata): if (if_train_have_answer): answer = Normalizer.ch_normalizeAString(item.context[-1]) item.context = item.context[:-1] # optimizer.zero_grad() score = [] outputs, features = myselect.get_add_features( item, disConfig.batch_size) print(answer) print(outputs) print(features) for output in outputs: output = Normalizer.ch_normalizeAString(output) score.append( sentence_bleu([output.split(' ')], answer.split(' '), weights=(0.25, 0.25, 0.25, 0.25), smoothing_function=chencherry.method1))
def __init__(self, normalizer=None, backbone_type=None, backbone_params=None, backbone_to_grad_type=None, backbone_to_grad_params=None, ignore_grad_scale_mismatch=False, checkpoint_path=None): super().__init__() if normalizer is None: normalizer = Normalizer.make('vgg') if backbone_params is None: backbone_params = {} if backbone_to_grad_params is None: backbone_to_grad_params = {} logging.debug('Args contain the following parameters:\n' + '\n'.join([ f' backbone_type: {backbone_type}', f' backbone_params: {backbone_params}', f' backbone_to_grad_type: {backbone_to_grad_type}', f' backbone_to_grad_params: {backbone_to_grad_params}', ])) model_state_dict = None if checkpoint_path is not None: checkpoint = read_checkpoint(checkpoint_path, backbone_type) logging.debug( 'Read checkpoint with the following parameters:\n' + '\n'.join([ f' backbone_type: {checkpoint["backbone_type"]}', f' backbone_params: {checkpoint["backbone_params"]}', f' backbone_to_grad_type: {checkpoint["backbone_to_grad_type"]}', f' backbone_to_grad_params: {checkpoint["backbone_to_grad_params"]}', ])) if backbone_type is None: backbone_type = checkpoint['backbone_type'] elif checkpoint['backbone_type'] is not None: assert backbone_type == checkpoint['backbone_type'], ( backbone_type, checkpoint['backbone_type']) if backbone_to_grad_type is None: backbone_to_grad_type = checkpoint['backbone_to_grad_type'] elif checkpoint['backbone_to_grad_type'] is not None: assert backbone_to_grad_type == checkpoint[ 'backbone_to_grad_type'], ( backbone_to_grad_type, checkpoint['backbone_to_grad_type']) for key in (set(checkpoint['backbone_params'].keys()) & set(backbone_params)): value_ckpt = checkpoint['backbone_params'][key] value_args = backbone_params[key] assert value_args == value_ckpt, (key, value_args, value_ckpt) backbone_params.update(checkpoint['backbone_params']) for key in (set(checkpoint['backbone_to_grad_params'].keys()) & set(backbone_to_grad_params)): value_ckpt = checkpoint['backbone_to_grad_params'][key] value_args = backbone_to_grad_params[key] if key == 'grad_scale' and value_args != value_ckpt and ignore_grad_scale_mismatch: logging.warning( f'grad_scale mismatch: provided {value_args}, but checkpoint has {value_ckpt}' ) checkpoint['backbone_to_grad_params'].pop( 'grad_scale') # safe since we're iterating over a copy else: assert value_args == value_ckpt, (key, value_args, value_ckpt) backbone_to_grad_params.update( checkpoint['backbone_to_grad_params']) logging.debug('Final checkpoint parameters:\n' + '\n'.join([ f' backbone_type: {backbone_type}', f' backbone_params: {backbone_params}', f' backbone_to_grad_type: {backbone_to_grad_type}', f' backbone_to_grad_params: {backbone_to_grad_params}', ])) model_state_dict = checkpoint['state_dict'] assert backbone_type is not None assert backbone_to_grad_type is not None self.backbone = { 'unet': UNetCustom, 'resnet': ResnetGenerator, }[backbone_type](**backbone_params) proxy_type = backbone_to_grad_params['type'] proxy_params = backbone_to_grad_params[proxy_type] make_proxy = { 'raw': ProxyRaw, 'sigmoid': ProxyAsSigmoid, 'warped_target': ProxyAsWarpedTarget, }[proxy_type](normalizer, **proxy_params) if backbone_to_grad_type == 'direct': self.backbone_to_grad = PgnPredictGrad( make_proxy, backbone_to_grad_params['out_scale'], backbone_to_grad_params['grad_scale'], ) elif backbone_to_grad_type == 'proxy': batchwise_loss_func = { 'mse': mse_loss_batchwise, 'l1': l1_loss_batchwise, 'logcosh': logcosh_loss_batchwise, 'mse_logit': MseLogitLossBatchwise(normalizer), 'logcosh_logit': LogcoshLogitLossBatchwise(normalizer), }[backbone_to_grad_params['grad_type']] self.backbone_to_grad = PgnProxyToGrad( make_proxy, batchwise_loss_func, backbone_to_grad_params['grad_scale'], ) else: assert False self.backbone_type = backbone_type self.backbone_params = backbone_params self.backbone_to_grad_type = backbone_to_grad_type self.backbone_to_grad_params = backbone_to_grad_params if model_state_dict is not None: self.backbone.load_state_dict(model_state_dict)
def train(batch_size=2, learning_rate=1e-2, train_epoch=100): # Normalizer(), Augmenter(), Resizer() 各转换时按顺序进行的 transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()]) dataset = CocoDataset('./data/coco/', 'train2017', transform) data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \ collate_fn=collater, pin_memory=True) dataset_size = len(dataset) print('sample number:', dataset_size) print('epoch size:', dataset_size / batch_size) retinanet = RetinaNet() anchor = Anchor() focal_loss = FocalLoss() if cuda: retinanet = torch.nn.DataParallel(retinanet).cuda() anchor = anchor.cuda() focal_loss = focal_loss.cuda() retinanet.module.freeze_bn() optimizer = torch.optim.SGD(retinanet.parameters(), lr=learning_rate, momentum=0.9, weight_decay=1e-4) ''' class torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=10, verbose=False, threshold=0.0001, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-08) :param optimer: 指的是网络的优化器 :param mode: (str), 可选择‘min’或者‘max’,min表示当监控量停止下降的时候,学习率将减小, max表示当监控量停止上升的时候,学习率将减小。默认值为‘min’ :param factor: 学习率每次降低多少,new_lr = old_lr * factor :param patience=10: 容忍网路的性能不提升的次数,高于这个次数就降低学习率 :param verbose: (bool), 如果为True,则为每次更新向stdout输出一条消息。 默认值:False :param threshold: (float), 测量新最佳值的阈值,仅关注重大变化。 默认值:1e-4 :param cooldown: 减少lr后恢复正常操作之前要等待的时期数。 默认值:0。 :param min_lr: 学习率的下限 :param eps: 适用于lr的最小衰减。 如果新旧lr之间的差异小于eps,则忽略更新。 默认值:1e-8。 ———————————————— 版权声明:本文为CSDN博主「张叫张大卫」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。 原文链接:https://blog.csdn.net/weixin_40100431/article/details/84311430 ''' scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, patience=3, verbose=True) for epoch_num in range(train_epoch): epoch_loss = [] for iter_num, data in enumerate(data_loader): iter_time = time.time() images, annots, scales = data if cuda: images = images.cuda() annots = annots.cuda() scales = scales.cuda() total_anchors = anchor(data['img']) classification, localization = retinanet(images) cls_loss, loc_loss = \ focal_loss(classification, localization, total_anchors, annots) loss = cls_loss + loc_loss epoch_loss.append(float(loss)) optimizer.zero_grad() loss.backward() ''' 关于torch.nn.utils.clip_grad_norm_(): In some cases you may find that each layer of your net amplifies the gradient it receives. This causes a problem because the lower layers of the net then get huge gradients and their updates will be far too large to allow the model to learn anything. This function ‘clips’ the norm of the gradients by scaling the gradients down by the same amount in order to reduce the norm to an acceptable level. In practice this places a limit on the size of the parameter updates. The hope is that this will ensure that your model gets reasonably sized gradients and that the corresponding updates will allow the model to learn. 引用自https://discuss.pytorch.org/t/about-torch-nn-utils-clip-grad-norm/13873 感受一下来自 PyTorch 讨论社区的窒息攻防,2333。。 ''' torch.nn.utils.clip_grad_norm_(retinanet.parameters(), 0.1) optimizer.step() print('|', 'Epoch:', epoch_num + 1, '|', 'Iter:', iter_num + 1, '|', 'cls loss:', float(cls_loss), '|', 'loc loss:', float(loc_loss), '|', 'loss:', float(loss), '|', 'lr:', float(optimizer.learning_rate), '|', 'time:', time.time() - iter_time) scheduler.step(np.mean(epoch_loss)) print('Saving parameters in model on epoch', epoch_num + 1) torch.save( retinanet.state_dict(), './param/param_epoch' + str(epoch_num + 1).zfill(3) + '.pkl')
def main(): global args, best_mae_error # Dataset from CIF files dataset = CIFData(*args.data_options) print(f'Dataset size: {len(dataset)}') # Dataloader from dataset train_loader, val_loader, test_loader = get_train_val_test_loader( dataset=dataset, collate_fn=collate_pool, batch_size=args.batch_size, train_size=args.train_size, num_workers=args.workers, val_size=args.val_size, test_size=args.test_size, pin_memory=args.cuda, return_test=True) # Initialize data normalizer with sample of 500 points if args.task == 'classification': normalizer = Normalizer(torch.zeros(2)) normalizer.load_state_dict({'mean': 0., 'std': 1.}) elif args.task == 'regression': if len(dataset) < 500: warnings.warn('Dataset has less than 500 data points. ' 'Lower accuracy is expected. ') sample_data_list = [dataset[i] for i in range(len(dataset))] else: sample_data_list = [ dataset[i] for i in sample(range(len(dataset)), 500) ] _, sample_target, _ = collate_pool(sample_data_list) normalizer = Normalizer(sample_target) else: raise NameError('task argument must be regression or classification') # Build model structures, _, _ = dataset[0] orig_atom_fea_len = structures[0].shape[-1] nbr_fea_len = structures[1].shape[-1] model = CrystalGraphConvNet(orig_atom_fea_len, nbr_fea_len, atom_fea_len=args.atom_fea_len, n_conv=args.n_conv, h_fea_len=args.h_fea_len, n_h=args.n_h, classification=(args.task == 'classification')) # GPU if args.cuda: model.cuda() # Loss function criterion = nn.NLLLoss() if args.task == 'classification' else nn.MSELoss() # Optimizer if args.optim == 'SGD': optimizer = optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) elif args.optim == 'Adam': optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) else: raise NameError('optim argument must be SGD or Adam') # Scheduler scheduler = MultiStepLR(optimizer, milestones=args.lr_milestones, gamma=0.1) # Resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_mae_error = checkpoint['best_mae_error'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) normalizer.load_state_dict(checkpoint['normalizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # Train for epoch in range(args.start_epoch, args.epochs): # Train (one epoch) train(train_loader, model, criterion, optimizer, epoch, normalizer) # Validate mae_error = validate(val_loader, model, criterion, normalizer) assert mae_error == mae_error, 'NaN :(' # Step learning rate scheduler scheduler.step(mae_error) # Save checkpoint if args.task == 'regression': is_best = mae_error < best_mae_error best_mae_error = min(mae_error, best_mae_error) else: is_best = mae_error > best_mae_error best_mae_error = max(mae_error, best_mae_error) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_mae_error': best_mae_error, 'optimizer': optimizer.state_dict(), 'normalizer': normalizer.state_dict(), 'args': vars(args) }, is_best) # Evaluate best model on test set print('--------- Evaluate model on test set ---------------') best_checkpoint = torch.load('model_best.pth.tar') model.load_state_dict(best_checkpoint['state_dict']) validate(test_loader, model, criterion, normalizer, test=True)
def __init__(self, args, env_params): self.s_dim = env_params['o_dim'] + env_params['g_dim'] self.a_dim = env_params['a_dim'] self.f_dim = args.f_dim self.action_bound = env_params['action_max'] self.max_timestep = env_params['max_timestep'] self.max_episode = args.max_episode self.evaluate_episode = args.evaluate_episode self.evaluate_interval = args.evaluate_interval self.log_interval = args.log_interval self.save_model_interval = args.save_model_interval self.save_model_start = args.save_model_start self.lr = args.lr self.lr_model = args.lr_model self.gamma = args.gamma self.batch_size = args.batch_size self.tau = args.tau self.eta = args.eta self.noise_eps = args.noise_eps self.device = torch.device(args.device) self.normalizer_s = Normalizer(size=self.s_dim, eps=1e-2, clip_range=1.) self.memory = Memory(size=args.memory_size, s_dim=self.s_dim, a_dim=self.a_dim) self.policy = Policy(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.policy_target = Policy(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.Q = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.Q_target = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.optimizer_p = optim.Adam(self.policy.parameters(), lr=self.lr) self.optimizer_q = optim.Adam(self.Q.parameters(), lr=self.lr) self.encoder = StateEncoder(s_dim=self.s_dim, f_dim=self.f_dim).to(self.device) self.EnvForward = ForwardModel(f_dim=self.f_dim, a_dim=self.a_dim).to(self.device) self.EnvInverse = InverseModel(f_dim=self.f_dim, a_dim=self.a_dim).to(self.device) self.optimizer_forward = optim.Adam( [{ 'params': self.EnvForward.parameters() }, { 'params': self.encoder.parameters() }], lr=self.lr_model) self.optimizer_inverse = optim.Adam( [{ 'params': self.EnvInverse.parameters() }, { 'params': self.encoder.parameters() }], lr=self.lr_model) self.hard_update() self.update_num = 0
def __init__(self): self.normalizer = Normalizer()
def main(): """main function""" args = get_args() shift = args.shift normalizer = Normalizer() print("\n#################") print("### Arguments ###") print("#################") for arg in vars(args): print(f"{arg} : {getattr(args, arg)}") print("#################\n") # creating the pairs of dyes to analyze pairs = generate_pairs() for i, (tissue, dye1, dye2, images_path, original_name1, original_name2, extension) in enumerate(pairs): # Each element of the pairs will play the role of the target for s in range(2): if s == 1: dye1, dye2 = dye2, dye1 original_name1, original_name2 = original_name2, original_name1 start_time = time() output_filename = os.path.join( args.output, f"data/{args.distance}/{args.size}/{args.arch}/{tissue}_{dye1}_{dye2}_{args.arch}_{args.size}_{args.pool}_{args.resize}.data" ) if not os.path.exists(output_filename): print(f"File {output_filename} does not exist") mkdir(os.path.dirname(output_filename)) else: print(f"File {output_filename} exists\n") continue # filename1 : reference (comparison from its annotation to those from filename1) -> get the patches # filename2 : image to cut print(tissue, dye1, dye2, Paths.PATH_TO_IMAGES, original_name1, original_name2) # Get patches from image 1 start_time_get_patches1 = time() patches_img1_landmarks = get_patches_from_landmarks( tissue, original_name1, size=get_args().size) time_get_patches1 = time() - start_time_get_patches1 # Get patches from image 2 start_time_get_patches2 = time() patches_img2 = segment_image(os.path.join( images_path, original_name2 + extension), size=get_args().size, shift=shift) time_get_patches2 = time() - start_time_get_patches2 ################# # # Is useful to make to have the results for one pair whose target has to be rotated # angle = -75 # img = img = Image.open(images_path + original_name2 + extension) # im2 = img.convert('RGBA') # # rotated image # rot = im2.rotate(angle, expand=1) # # a white image same size as rotated image # fff = Image.new('RGBA', rot.size, (255,)*4) # # create a composite image using the # out = Image.composite(rot, fff, rot) # out = out.convert(img.mode) # patches_img2 = segment_image(img=out, size=get_args().size, shift=shift) # time_get_patches2 = time() - start_time_get_patches2 ################## # get the features # number of available landmarks for the particular tissue nb_of_landmarks = len(patches_img1_landmarks) print("==> Img1 ({} {}) : {}".format(tissue, dye1, nb_of_landmarks)) print("==> Img2 ({} {}) : {}".format(tissue, dye2, len(patches_img2))) start_time_features_img1_landmarks = time() normalize_dye1 = normalizer.get(tissue, dye1) features_img1_landmarks = get_features(patches_img1_landmarks, normalize_dye1) time_get_features1 = time() - start_time_features_img1_landmarks patches_img1_landmarks = "" del patches_img1_landmarks gc.collect() start_time_features_img2_landmarks = time() normalize_dye2 = normalizer.get(tissue, dye2) features_img2 = get_features(patches_img2, normalize_dye2) time_get_features2 = time() - start_time_features_img2_landmarks feature_size = features_img1_landmarks.shape[1] print("===> Features size : {}".format(feature_size)) # Keep only the center and coordinates of patches_img2 patches_img2 = [(x[0], x[1]) for x in patches_img2] gc.collect() # Compare start_time_comparison = time() results_comparison = compare(features_img1_landmarks, features_img2, args.distance) time_comparison = time() - start_time_comparison features_img2 = "" del features_img2 features_img1_landmarks = "" del features_img1_landmarks gc.collect() # Get the position of the landmarks of dye2 start_time_position_landmarks = time() position_landmarks_dye2 = get_position_landmarks( tissue, original_name2) time_position_landmarks = time() - start_time_position_landmarks # Get top-k accuracy start_time_get_accuracy = time() k_list = [1, 5] # count the landmarks respecting the condition counter = [0] * len(k_list) for i in range(nb_of_landmarks): array = [(k, x) for k, x in enumerate(results_comparison[i])] array.sort(key=lambda x: x[1], reverse=True) for c, k in enumerate(k_list): indices_of_best_matches = None if args.distance == "cos": indices_of_best_matches = [x[0] for x in array[:k]] elif args.distance == "eucl" or args.distance == "eucl-norm": indices_of_best_matches = [x[0] for x in array[-k:]] # get the position of the k centers that best matches centers = [ patches_img2[ind][1] for ind in indices_of_best_matches ] true_position = position_landmarks_dye2[i] distances = [ euclidean_distance(np.array(center), np.array(true_position)) for center in centers ] distances = np.array(distances) # if at least a patch center is within a certain radius around the true landmark if distances[distances <= args.size / 2].shape[0] != 0: counter[c] += 1 table = [] top_accuracy_list = [] for c, k in enumerate(k_list): acc = round(counter[c] / nb_of_landmarks, 4) top_accuracy_list.append((k, acc)) table.append([str(k), str(acc)]) t = tabulate(table, headers=['k', 'Top-k accuracy']) print("\n", t, "\n") time_get_accuracy = time() - start_time_get_accuracy elapsed_time = time() - start_time table = [ [ 'Patches image 1', str(datetime.timedelta(seconds=time_get_patches1)) ], [ 'Patches image 2', str(datetime.timedelta(seconds=time_get_patches2)) ], [ 'Features image 1', str(datetime.timedelta(seconds=time_get_features1)) ], [ 'Features image 2', str(datetime.timedelta(seconds=time_get_features2)) ], [ 'Position landmarks image 2', str(datetime.timedelta(seconds=time_position_landmarks)) ], [ 'Comparison', str(datetime.timedelta(seconds=time_comparison)) ], [ 'Compute accuracy', str(datetime.timedelta(seconds=time_get_accuracy)) ], [ 'Elapsed time', str(datetime.timedelta(seconds=elapsed_time)) ] ] t = tabulate(table, headers=['', 'Time (h:m:s)']) print(t, "\n") info = { "args": vars(args), "pair": (tissue, dye1, dye2, images_path, original_name1, original_name2, extension), "results_comparison": results_comparison, "nb_of_landmarks": nb_of_landmarks, "feature_size": feature_size, "counter": counter, "top_accuracy_list": top_accuracy_list, "time": elapsed_time, "time_get_patches1": time_get_patches1, "time_get_patches2": time_get_patches2, "time_get_features1": time_get_features1, "time_get_features2": time_get_features2, "time_position_landmarks": time_position_landmarks, "time_comparison": time_comparison, "time_get_accuracy": time_get_accuracy, } with open(output_filename, 'wb') as output_file: pickle.dump(info, output_file)
def main(): global args, best_error_global, best_error_local, savepath, dataset parser = buildParser() args = parser.parse_args() print('Torch Device being used: ', cfg.device) # create the savepath savepath = args.save_dir + str(args.name) + '/' if not os.path.exists(savepath): os.makedirs(savepath) # Writes to file and also to terminal sys.stdout = Logger(savepath) print(vars(args)) best_error_global, best_error_local = 1e10, 1e10 randomSeed(args.seed) # create train/val/test dataset separately assert os.path.exists(args.protein_dir), '{} does not exist!'.format( args.protein_dir) all_dirs = [ d for d in os.listdir(args.protein_dir) if not d.startswith('.DS_Store') ] dir_len = len(all_dirs) indices = list(range(dir_len)) random.shuffle(indices) train_size = math.floor(args.train * dir_len) val_size = math.floor(args.val * dir_len) test_size = math.floor(args.test * dir_len) test_dirs = all_dirs[:test_size] train_dirs = all_dirs[test_size:test_size + train_size] val_dirs = all_dirs[test_size + train_size:test_size + train_size + val_size] print('Testing on {} protein directories:'.format(len(test_dirs))) dataset = ProteinDataset(args.pkl_dir, args.id_prop, args.atom_init, random_seed=args.seed) print('Dataset length: ', len(dataset)) # load all model args from pretrained model if args.pretrained is not None and os.path.isfile(args.pretrained): print("=> loading model params '{}'".format(args.pretrained)) model_checkpoint = torch.load( args.pretrained, map_location=lambda storage, loc: storage) model_args = argparse.Namespace(**model_checkpoint['args']) # override all args value with model_args args.h_a = model_args.h_a args.h_g = model_args.h_g args.n_conv = model_args.n_conv args.random_seed = model_args.seed args.lr = model_args.lr print("=> loaded model params '{}'".format(args.pretrained)) else: print("=> no model params found at '{}'".format(args.pretrained)) # build model kwargs = { 'pkl_dir': args.pkl_dir, # Root directory for data 'atom_init': args.atom_init, # Atom Init filename 'h_a': args.h_a, # Dim of the hidden atom embedding learnt 'h_g': args.h_g, # Dim of the hidden graph embedding after pooling 'n_conv': args.n_conv, # Number of GCN layers 'random_seed': args.seed, # Seed to fix the simulation 'lr': args.lr, # Learning rate for optimizer } structures, _, _ = dataset[0] h_b = structures[1].shape[-1] kwargs['h_b'] = h_b # Dim of the bond embedding initialization # Use DataParallel for faster training print("Let's use", torch.cuda.device_count(), "GPUs and Data Parallel Model.") model = ProteinGCN(**kwargs) model = torch.nn.DataParallel(model) model.cuda() print('Trainable Model Parameters: ', count_parameters(model)) # Create dataloader to iterate through the dataset in batches train_loader, val_loader, test_loader = get_train_val_test_loader( dataset, train_dirs, val_dirs, test_dirs, collate_fn=collate_pool, num_workers=args.workers, batch_size=args.batch_size, pin_memory=False) try: print('Training data : ', len(train_loader.sampler)) print('Validation data : ', len(val_loader.sampler)) print('Testing data : ', len(test_loader.sampler)) except Exception as e: # sometimes test may not be defined print('\nException Cause: {}'.format(e.args[0])) # obtain target value normalizer if len(dataset) < args.avg_sample: sample_data_list = [dataset[i] for i in tqdm(range(len(dataset)))] else: sample_data_list = [ dataset[i] for i in tqdm(random.sample(range(len(dataset)), args.avg_sample)) ] _, _, sample_target = collate_pool(sample_data_list) normalizer_global = Normalizer(sample_target[0]) normalizer_local = Normalizer(torch.tensor([0.0])) normalizer_local = Normalizer(sample_target[1]) # load the model state dict from given pretrained model if args.pretrained is not None and os.path.isfile(args.pretrained): print("=> loading model '{}'".format(args.pretrained)) checkpoint = torch.load(args.pretrained, map_location=lambda storage, loc: storage) print('Best error global: ', checkpoint['best_error_global']) print('Best error local: ', checkpoint['best_error_local']) best_error_global = checkpoint['best_error_global'] best_error_local = checkpoint['best_error_local'] model.module.load_state_dict(checkpoint['state_dict']) model.module.optimizer.load_state_dict(checkpoint['optimizer']) normalizer_local.load_state_dict(checkpoint['normalizer_local']) normalizer_global.load_state_dict(checkpoint['normalizer_global']) else: print("=> no model found at '{}'".format(args.pretrained)) # Main training loop for epoch in range(args.epochs): # Training [train_error_global, train_error_local, train_loss] = trainModel(train_loader, model, normalizer_global, normalizer_local, epoch=epoch) # Validation [val_error_global, val_error_local, val_loss] = trainModel(val_loader, model, normalizer_global, normalizer_local, epoch=epoch, evaluation=True) # check for error overflow if (val_error_global != val_error_global) or (val_error_local != val_error_local): print('Exit due to NaN') sys.exit(1) # remember the best error and possibly save checkpoint is_best = val_error_global < best_error_global best_error_global = min(val_error_global, best_error_global) best_error_local = val_error_local # save best model if args.save_checkpoints: model.module.save( { 'epoch': epoch, 'state_dict': model.module.state_dict(), 'best_error_global': best_error_global, 'best_error_local': best_error_local, 'optimizer': model.module.optimizer.state_dict(), 'normalizer_global': normalizer_global.state_dict(), 'normalizer_local': normalizer_local.state_dict(), 'args': vars(args) }, is_best, savepath) # test best model using saved checkpoints if args.save_checkpoints and len(test_loader): print('---------Evaluate Model on Test Set---------------') # this try/except allows the code to test on the go or by defining a pretrained path separately try: best_checkpoint = torch.load(savepath + 'model_best.pth.tar') except Exception as e: best_checkpoint = torch.load(args.pretrained) model.module.load_state_dict(best_checkpoint['state_dict']) [test_error_global, test_error_local, test_loss] = trainModel(test_loader, model, normalizer_global, normalizer_local, testing=True)
class BaseCartPoleEnvironment(Environment): @save_args def __init__(self, force_factor=5, initial_theta=0.0001, max_offset=3, max_angle=0.25): self.norm_x = Normalizer(-max_offset, max_offset) self.norm_xdot = Normalizer(-10, 10) self.norm_theta = Normalizer(-max_angle, max_angle) self.norm_thetadot = Normalizer(-10, 10) self.reset() def reset(self, agent_ids=None): self.pendulum = PendulumDynamics(0, 0, self.initial_theta, 0) @property def number_of_agents(self): return 1 @property def state_size(self): return 4 @property def action_size(self): raise NotImplementedError() @property def state(self): return ( self.norm_x(self.pendulum.x), self.norm_xdot(self.pendulum.xdot), self.norm_theta(self.pendulum.theta), self.norm_thetadot(self.pendulum.thetadot), ) def denormalize_state(self, state): x, xdot, theta, thetadot = state return ( self.norm_x.denormalize(x), self.norm_xdot.denormalize(xdot), self.norm_theta.denormalize(theta), self.norm_thetadot.denormalize(thetadot), ) @property def is_terminal(self): return (not self.norm_x.is_inside(self.pendulum.x) or not self.norm_xdot.is_inside(self.pendulum.xdot) or not self.norm_theta.is_inside(self.pendulum.theta) or not self.norm_thetadot.is_inside(self.pendulum.thetadot)) @staticmethod def _get_force(action): raise NotImplementedError() def apply_action(self, agent, action): act = self._get_force(action) self.pendulum.step_simulate(self.force_factor * act) return self.is_terminal, self.state
class PPOAgent(): def __init__(self, args, env_params): self.o_dim = env_params['o_dim'] self.a_dim = env_params['a_dim'] self.r_dim = args.r_dim self.lr = args.lr self.gamma_e = args.gamma_e self.gamma_i = args.gamma_i self.lamda = args.lamda self.entropy_coef = args.entropy_coef self.ex_coef = args.ex_coef self.in_coef = args.in_coef self.clip_eps = args.clip_eps self.update_epoch = args.update_epoch self.batch_size = args.batch_size self.initialize_episode = args.initialize_episode self.update_proportion = args.update_proportion self.rollout_len = args.rollout_len self.obs_clip = args.obs_clip self.device = torch.device(args.device) self.actor_critic = CNNActorCritic(in_channel=self.o_dim[0], a_dim=self.a_dim).to(self.device) self.RND = RNDNetwork(in_channel=1).to(self.device) self.optimizer = optim.Adam(list(self.actor_critic.parameters()) + list(self.RND.predictor.parameters()), lr=self.lr) self.buffer = Buffer(capacity=self.rollout_len, o_dim=self.o_dim) self.normalizer_obs = Normalizer(shape=self.o_dim, clip=self.obs_clip) self.normalizer_ri = Normalizer(shape=1, clip=np.inf) def choose_action(self, obs): obs = torch.from_numpy(obs).float().to(self.device) / 255. with torch.no_grad(): action_logits = self.actor_critic.act(obs) dist = Categorical(action_logits) action = dist.sample() log_prob = dist.log_prob(action) action, log_prob = action.cpu().detach().numpy(), log_prob.cpu( ).detach().numpy() return action, log_prob def compute_intrinsic_reward(self, obs_): obs_ = self.normalizer_obs.normalize(obs_) obs_ = torch.from_numpy(obs_[:, 3:, :, :]).float().to(self.device) with torch.no_grad(): pred_feature, tar_feature = self.RND(obs_) reward_in = F.mse_loss(pred_feature, tar_feature, reduction='none').mean(dim=-1) reward_in = reward_in.cpu().detach().numpy() return reward_in def GAE_caculate(self, rewards, masks, values, gamma, lamda): returns = np.zeros(shape=len(rewards), dtype=np.float32) deltas = np.zeros(shape=len(rewards), dtype=np.float32) advantages = np.zeros(shape=len(rewards), dtype=np.float32) pre_return = 0. pre_advantage = 0. pre_value = 0. for i in reversed(range(len(rewards))): returns[i] = rewards[i] + masks[i] * gamma * pre_return deltas[i] = rewards[i] + masks[i] * gamma * pre_value - values[i] advantages[i] = deltas[i] + gamma * lamda * pre_advantage pre_return = returns[i] pre_value = values[i] pre_advantage = advantages[i] return returns, deltas, advantages def update(self, o, a, r_i, r_e, mask, o_, log_prob): self.normalizer_obs.update(o_.reshape(-1, 4, 84, 84).copy()) self.normalizer_ri.update(r_i.reshape(-1).copy()) r_i = self.normalizer_ri.normalize(r_i) o_ = self.normalizer_obs.normalize(o_) o = torch.from_numpy(o).to(self.device).float() / 255. returns_ex = np.zeros_like(r_e) returns_in = np.zeros_like(r_e) advantage_ex = np.zeros_like(r_e) advantage_in = np.zeros_like(r_e) for i in range(r_e.shape[0]): action_logits, value_ex, value_in = self.actor_critic(o[i]) value_ex, value_in = value_ex.cpu().detach().numpy(), value_in.cpu( ).detach().numpy() returns_ex[i], _, advantage_ex[i] = self.GAE_caculate( r_e[i], mask[i], value_ex, self.gamma_e, self.lamda) #episodic returns_in[i], _, advantage_in[i] = self.GAE_caculate( r_i[i], np.ones_like(mask[i]), value_in, self.gamma_i, self.lamda) #non_episodic o = o.reshape((-1, 4, 84, 84)) a = np.reshape(a, -1) o_ = np.reshape(o_[:, :, 3, :, :], (-1, 1, 84, 84)) log_prob = np.reshape(log_prob, -1) returns_ex = np.reshape(returns_ex, -1) returns_in = np.reshape(returns_in, -1) advantage_ex = np.reshape(advantage_ex, -1) advantage_in = np.reshape(advantage_in, -1) a = torch.from_numpy(a).float().to(self.device) o_ = torch.from_numpy(o_).float().to(self.device).float() log_prob = torch.from_numpy(log_prob).float().to(self.device) returns_ex = torch.from_numpy(returns_ex).float().to( self.device).unsqueeze(dim=1) returns_in = torch.from_numpy(returns_in).float().to( self.device).unsqueeze(dim=1) advantage_ex = torch.from_numpy(advantage_ex).float().to(self.device) advantage_in = torch.from_numpy(advantage_in).float().to(self.device) sample_range = list(range(len(o))) for i_update in range(self.update_epoch): np.random.shuffle(sample_range) for j in range(int(len(o) / self.batch_size)): idx = sample_range[self.batch_size * j:self.batch_size * (j + 1)] #update RND pred_RND, tar_RND = self.RND(o_[idx]) loss_RND = F.mse_loss(pred_RND, tar_RND.detach(), reduction='none').mean(-1) mask = torch.randn(len(loss_RND)).to(self.device) mask = (mask < self.update_proportion).type( torch.FloatTensor).to(self.device) loss_RND = (loss_RND * mask).sum() / torch.max( mask.sum(), torch.Tensor([1]).to(self.device)) #update actor-critic action_logits, value_ex, value_in = self.actor_critic(o[idx]) advantage = self.ex_coef * advantage_ex[ idx] + self.in_coef * advantage_in[idx] dist = Categorical(action_logits) new_log_prob = dist.log_prob(a[idx]) ratio = torch.exp(new_log_prob - log_prob[idx]) surr1 = ratio * advantage surr2 = torch.clamp(ratio, 1 - self.clip_eps, 1 + self.clip_eps) * advantage loss_actor = torch.min( surr1, surr2).mean() - self.entropy_coef * dist.entropy().mean() loss_critic = F.mse_loss(value_ex, returns_ex[idx]) + F.mse_loss( value_in, returns_in[idx]) loss_ac = loss_actor + 0.5 * loss_critic loss = loss_RND + loss_ac self.optimizer.zero_grad() loss.backward() global_grad_norm_( list(self.actor_critic.parameters()) + list(self.RND.predictor.parameters())) self.optimizer.step() return loss_RND.cpu().detach().numpy(), loss_actor.cpu().detach( ).numpy(), loss_critic.cpu().detach().numpy() def save_model(self, remark): if not os.path.exists('pretrained_models_PPO/'): os.mkdir('pretrained_models_PPO/') path = 'pretrained_models_PPO/{}.pt'.format(remark) print('Saving model to {}'.format(path)) torch.save(self.actor_critic.state_dict(), path) def load_model(self, load_model_remark): print('Loading models with remark {}'.format(load_model_remark)) model = torch.load( 'pretrained_models_PPO/{}.pt'.format(load_model_remark), map_location=lambda storage, loc: storage) self.actor_critic.load_state_dict(model)
# finish, print: print('episode', episode, 'reward_evaluation', reward_evaluation) fitness.append(reward_evaluation) return fitness if __name__ == '__main__': hp = Hp() work_dir = mkdir('exp', 'brs') monitor_dir = mkdir(work_dir, 'monitor') env = gym.make(hp.env_name) env.seed(hp.seed) torch.manual_seed(hp.seed) env = wrappers.Monitor(env, monitor_dir, force=True) num_inputs = env.observation_space.shape[0] num_outputs = env.action_space.shape[0] policy = nn.Linear(num_inputs, num_outputs, bias=True) policy.weight.data.fill_(0) policy.bias.data.fill_(0) pso = PSO(policy, hp.lr, hp.std, hp.b, hp.n_directions) normalizer = Normalizer(num_inputs) fitness = train(env, pso, normalizer, hp) import matplotlib.pyplot as plt plt.plot(fitness) plt.show()
def train(): normalizer = Normalizer(0, 499) sae = StateAutoEncoder(1, 1, num_state_bits, normalize=True, normalizer=normalizer) sae.use_checkpoints(encoder_path) train_env, _ = load_env(env_name, sae) master_action_spec = array_spec.BoundedArraySpec(shape=((num_options, )), dtype=np.float32, minimum=0, maximum=1, name='master_action') options_observation_spec = array_spec.BoundedArraySpec( shape=((num_options + num_state_bits), ), dtype=np.float32, minimum=0, maximum=1, name='option_observation') options_action_spec = array_spec.BoundedArraySpec(shape=(num_state_bits, 2), dtype=np.float32, minimum=0, maximum=1, name='option_action') options_time_step_spec = ts.TimeStep( step_type=train_env.time_step_spec().step_type, reward=train_env.time_step_spec().reward, discount=train_env.time_step_spec().discount, observation=options_observation_spec) num_actions = train_env.action_spec().maximum - train_env.action_spec( ).minimum + 1 low_level_model, callbacks = setup_model(num_actions, num_state_bits, sae, low_level_model_path) low_level_env = LowLevelEnv(train_env, low_level_model) options_env = OptionsEnv(low_level_env, options_observation_spec, options_action_spec) option_train_env = tf_py_environment.TFPyEnvironment(options_env) master_env = MasterEnv(low_level_env, master_action_spec) master_train_env = tf_py_environment.TFPyEnvironment(master_env) optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate) global_step = tf.compat.v1.train.get_or_create_global_step() master_value_network = value_network.ValueNetwork( master_train_env.time_step_spec().observation, fc_layer_params=(100, )) master_actor_network = actor_distribution_network.ActorDistributionNetwork( master_train_env.time_step_spec().observation, master_train_env.action_spec(), fc_layer_params=(100, )) master_agent = ppo_agent.PPOAgent(master_train_env.time_step_spec(), master_train_env.action_spec(), optimizer=optimizer, actor_net=master_actor_network, value_net=master_value_network, train_step_counter=tf.Variable(0)) master_agent.initialize() master_agent.train = common.function(master_agent.train) options_env.set_master_policy(master_agent.policy) options_critic_net = critic_network.CriticNetwork( (option_train_env.observation_spec(), option_train_env.action_spec()), observation_fc_layer_params=None, action_fc_layer_params=None, joint_fc_layer_params=(100, ), kernel_initializer='glorot_uniform', last_kernel_initializer='glorot_uniform') options_actor_net = OptionsNetwork(option_train_env.observation_spec(), option_train_env.action_spec(), 4) options_agent = sac_agent.SacAgent( option_train_env.time_step_spec(), option_train_env.action_spec(), actor_network=options_actor_net, critic_network=options_critic_net, actor_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=learning_rate), critic_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=learning_rate), alpha_optimizer=tf.compat.v1.train.AdamOptimizer( learning_rate=learning_rate), target_update_tau=target_update_tau, target_update_period=target_update_period, td_errors_loss_fn=tf.math.squared_difference, gamma=gamma, reward_scale_factor=reward_scale_factor, train_step_counter=tf.Variable(0)) options_agent.initialize() options_agent.train = common.function(options_agent.train) master_env.set_options_policy(options_agent.policy) master_rb = create_replay_buffer(master_agent, batch_size, replay_buffer_max_length) options_rb = create_replay_buffer(options_agent, batch_size, replay_buffer_max_length) master_ds = master_rb.as_dataset(num_parallel_calls=3, sample_batch_size=batch_size, num_steps=2) master_iter = iter(master_ds) options_ds = options_rb.as_dataset(num_parallel_calls=3, sample_batch_size=batch_size, num_steps=2) options_iter = iter(options_ds) master_checkpointer = create_train_checkpointer(checkpoint_dir + "master/", master_agent, master_rb, global_step) options_checkpointer = create_train_checkpointer( checkpoint_dir + "options/", options_agent, options_rb, global_step) master_saver = policy_saver.PolicySaver(master_agent.policy) options_saver = policy_saver.PolicySaver(options_agent.policy) def check_interval(interval): return global_step % interval == 0 while (global_step < num_iterations): populate_buffer(master_train_env, master_rb, master_agent.collect_policy, master_agent.time_step_spec, master_collect_steps, batch_size) for _ in range(warmup_period): experience, unused_info = next(master_iter) master_loss = master_agent.train(experience) for _ in range(joint_update_period): populate_buffer(master_train_env, master_rb, master_agent.collect_policy, master_agent.time_step_spec, 2, batch_size) populate_buffer(option_train_env, options_rb, options_agent.collect_policy, options_agent.time_step_spec, 2, batch_size) option_exp, unused_info = next(options_iter) options_loss = options_agent.train(option_exp) master_exp, unused_info = next(master_iter) master_loss = master_agent.train(master_exp) global_step.assign_add(1) if check_interval(log_interval): print('step = {0}: master loss = {1}, options loss = {2}'.format( global_step.value, master_loss, options_loss)) if check_interval(checkpoint_interval): master_checkpointer.save(global_step) options_checkpointer.save(global_step) print('Checkpoint saved!') # Reset master here master_saver.save(save_dir + "master/") options_saver.save(save_dir + "options/") print("Policies Saved!")
# 图片,添加的文字,左上角坐标,字体,字体大小,颜色,字体粗细 font = cv2.FONT_HERSHEY_SIMPLEX image = cv2.putText(image, str(int(c)), (x1, y1), font, 0.5, (0,255,0), 1) else: break image = image.get() print(image.shape) plt.figure() image = image[:,:,[2,1,0]] plt.imshow(image) plt.show() if __name__ == '__main__': transform = transforms.Compose([Normalizer(), Augmenter(), Resizer()]) dataset = CocoDataset('./data/coco/', 'train2017', transform) dataset_size = len(dataset) print(dataset_size) data_loader = Data.DataLoader(dataset, 2, num_workers=2, shuffle=True, \ collate_fn=collater, pin_memory=True) for epoch_num in range(2): for iter_num, data in enumerate(data_loader): print( 'epoch:', epoch_num, 'iter_num:', iter_num ) print('image:', data['img'].size()) print('annot:', data['annot'].size())
def __init__(self, params): """Implementation of DDPG agent with Hindsight Experience Replay (HER) sampler. @param params: dict containing all necessary parameters: dims, buffer_size, tau (= 1-polyak), batch_size, lr_critic, lr_actor, norm_eps, norm_clip, clip_obs, clip_action, T (episode length), num_workers, clip_return, sample_her_transitions, gamma, replay_strategy """ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.input_dims = params['dims'] self.buffer_size = params['buffer_size'] self.tau = params['tau'] self.batch_size = params['batch_size'] self.critic_lr = params['lr_critic'] self.actor_lr = params['lr_actor'] self.norm_eps = params['norm_eps'] self.norm_clip = params['norm_clip'] self.clip_obs = params['clip_obs'] self.clip_action = params['clip_action'] self.T = params['T'] self.rollout_batch_size = params['num_workers'] self.clip_return = params['clip_return'] self.sample_transitions = params['sample_her_transitions'] self.gamma = params['gamma'] self.replay_strategy = params['replay_strategy'] self.dimo = self.input_dims['o'] self.dimg = self.input_dims['g'] self.dimu = self.input_dims['u'] stage_shapes = OrderedDict() for key in sorted(self.input_dims.keys()): if key.startswith('info_'): continue stage_shapes[key] = (None, self.input_dims[key]) stage_shapes['o_2'] = stage_shapes['o'] stage_shapes['r'] = (None,) self.stage_shapes = stage_shapes # normalizer self.obs_normalizer = Normalizer(size=self.dimo, eps=self.norm_eps, clip_range=self.norm_clip) self.goal_normalizer = Normalizer(size=self.dimg, eps=self.norm_eps, clip_range=self.norm_clip) # networks self.actor_local = Actor(self.input_dims).to(self.device) self.critic_local = Critic(self.input_dims).to(self.device) self.actor_target = copy.deepcopy(self.actor_local) self.critic_target = copy.deepcopy(self.critic_local) # optimizers self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.actor_lr) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.critic_lr) # Configuring the replay buffer buffer_shapes = {key: (self.T-1 if key != 'o' else self.T, self.input_dims[key]) for key, val in self.input_dims.items()} buffer_shapes['g'] = (buffer_shapes['g'][0], self.dimg) buffer_shapes['ag'] = (self.T, self.dimg) buffer_size = (self.buffer_size // self.rollout_batch_size) * self.rollout_batch_size self.buffer = ReplayBuffer(buffer_shapes, buffer_size, self.T, self.sample_transitions)
def preprocess(self, train_texts): normalizer = Normalizer.Normalizer() return normalizer.clean_text(train_texts)
class DDPG_Agent(): def __init__(self, args, env_params): self.s_dim = env_params['o_dim'] + env_params['g_dim'] self.a_dim = env_params['a_dim'] self.f_dim = args.f_dim self.action_bound = env_params['action_max'] self.max_timestep = env_params['max_timestep'] self.max_episode = args.max_episode self.evaluate_episode = args.evaluate_episode self.evaluate_interval = args.evaluate_interval self.log_interval = args.log_interval self.save_model_interval = args.save_model_interval self.save_model_start = args.save_model_start self.lr = args.lr self.lr_model = args.lr_model self.gamma = args.gamma self.batch_size = args.batch_size self.tau = args.tau self.eta = args.eta self.noise_eps = args.noise_eps self.device = torch.device(args.device) self.normalizer_s = Normalizer(size=self.s_dim, eps=1e-2, clip_range=1.) self.memory = Memory(size=args.memory_size, s_dim=self.s_dim, a_dim=self.a_dim) self.policy = Policy(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.policy_target = Policy(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.Q = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.Q_target = QFunction(s_dim=self.s_dim, a_dim=self.a_dim).to(self.device) self.optimizer_p = optim.Adam(self.policy.parameters(), lr=self.lr) self.optimizer_q = optim.Adam(self.Q.parameters(), lr=self.lr) self.encoder = StateEncoder(s_dim=self.s_dim, f_dim=self.f_dim).to(self.device) self.EnvForward = ForwardModel(f_dim=self.f_dim, a_dim=self.a_dim).to(self.device) self.EnvInverse = InverseModel(f_dim=self.f_dim, a_dim=self.a_dim).to(self.device) self.optimizer_forward = optim.Adam( [{ 'params': self.EnvForward.parameters() }, { 'params': self.encoder.parameters() }], lr=self.lr_model) self.optimizer_inverse = optim.Adam( [{ 'params': self.EnvInverse.parameters() }, { 'params': self.encoder.parameters() }], lr=self.lr_model) self.hard_update() self.update_num = 0 def select_action(self, state, train_mode=True): s = self.normalize_input(state) s = torch.tensor(state, dtype=torch.float32).to(self.device) with torch.no_grad(): action = self.policy(s).cpu().numpy() if train_mode: action += np.random.randn( self.a_dim ) * self.noise_eps * self.action_bound #Gaussian Noise else: pass action = np.clip(action, a_min=-self.action_bound, a_max=self.action_bound) return action def get_intrisic_reward(self, s, a, s_): s, a, s_ = torch.from_numpy(s).to( self.device).float(), torch.from_numpy(a).to( self.device).float(), torch.from_numpy(s_).to( self.device).float() with torch.no_grad(): feature = self.encoder(s) next_feature_pred = self.EnvForward(feature, a) next_feature = self.encoder(s_) r_i = self.eta * torch.norm(next_feature_pred - next_feature) r_i = torch.clamp(r_i, min=-0.1, max=0.1) return r_i.cpu().detach().numpy() def train(self, env, logger=None): total_step = 0 loss_pi, loss_q, loss_forward, loss_inverse = 0., 0., 0., 0. for i_episode in range(self.max_episode): obs = env.reset() s = get_state(obs) cumulative_r = 0. for i_step in range(self.max_timestep): a = self.select_action(s) obs_, r_e, done, info = env.step(a) s_ = get_state(obs_) r_i = self.get_intrisic_reward(s, a, s_) r = r_e + r_i self.memory.store(s, a, r, s_) s = s_ if len(self.memory) > self.batch_size: loss_pi, loss_q, loss_forward, loss_inverse = self.learn() cumulative_r += r_e total_step += 1 print( 'i_episode: {} total step: {} cumulative reward: {:.4f} is_success: {} ' .format(i_episode, total_step, cumulative_r, info['is_success'])) if logger is not None and i_episode % self.log_interval == 0: logger.add_scalar('Indicator/cumulative reward', cumulative_r, i_episode) logger.add_scalar('Loss/pi_loss', loss_pi, i_episode) logger.add_scalar('Loss/q_loss', loss_q, i_episode) logger.add_scalar('Loss/forward_loss', loss_forward, i_episode) logger.add_scalar('Loss/inverse_loss', loss_inverse, i_episode) if i_episode % self.evaluate_interval == 0: success_rate = self.evaluate(env) if logger is not None: logger.add_scalar('Indicator/success rate', success_rate, i_episode) if i_episode > self.save_model_start and i_episode % self.save_model_interval == 0: self.save_model(remarks='{}_{}'.format(env.spec.id, i_episode)) def evaluate(self, env, render=False): success_count = 0 for i_episode in range(self.evaluate_episode): obs = env.reset() s = get_state(obs) for i_step in range(self.max_timestep): if render: env.render() a = self.select_action(s, train_mode=False) obs_, r_e, done, info = env.step(a) s_ = get_state(obs_) s = s_ success_count += info['is_success'] return success_count / self.evaluate_episode def learn(self): s, a, r, s_ = self.memory.sample_batch(batch_size=self.batch_size) self.normalizer_s.update(s) s, s_ = self.normalize_input(s, s_) s = torch.from_numpy(s).to(self.device) a = torch.from_numpy(a).to(self.device) r = torch.from_numpy(r).to(self.device).unsqueeze(dim=1) s_ = torch.from_numpy(s_).to(self.device) #update policy and Q with torch.no_grad(): a_next_tar = self.policy_target(s_) Q_next_tar = self.Q_target(s_, a_next_tar) loss_q_tar = r + self.gamma * Q_next_tar loss_q_pred = self.Q(s, a) loss_q = F.mse_loss(loss_q_pred, loss_q_tar.detach()) self.optimizer_q.zero_grad() loss_q.backward() self.optimizer_q.step() loss_p = -self.Q(s, self.policy(s)).mean() self.optimizer_p.zero_grad() loss_p.backward() self.optimizer_p.step() self.soft_update() #update env model and encoder feature = self.encoder(s) next_feature = self.encoder(s_) a_pred = self.EnvInverse(feature, next_feature) loss_inverse = F.mse_loss(a_pred, a) next_feature_pred = self.EnvForward(feature, a) with torch.no_grad(): next_feature_tar = self.encoder(s_) loss_forward = F.mse_loss(next_feature_pred, next_feature_tar.detach()) self.optimizer_forward.zero_grad() self.optimizer_inverse.zero_grad() loss_forward.backward(retain_graph=True) loss_inverse.backward() self.optimizer_forward.step() self.optimizer_inverse.step() self.update_num += 1 return loss_p.cpu().detach().numpy(), loss_q.cpu().detach().numpy( ), loss_forward.cpu().detach().numpy(), loss_inverse.cpu().detach( ).numpy() def update_normalizer(self, states): states = np.array(states, dtype=np.float32) self.normalizer_s.update(states) def hard_update(self): self.policy_target.load_state_dict(self.policy.state_dict()) self.Q_target.load_state_dict(self.Q.state_dict()) def soft_update(self): for param, param_target in zip(self.policy.parameters(), self.policy_target.parameters()): param_target.data.copy_(param.data * self.tau + param_target.data * (1 - self.tau)) for param, param_target in zip(self.Q.parameters(), self.Q_target.parameters()): param_target.data.copy_(param.data * self.tau + param_target.data * (1 - self.tau)) def normalize_input(self, s, s_=None): s = self.normalizer_s.normalize(s) if s_ is not None: s_ = self.normalizer_s.normalize(s_) return s, s_ else: return s def save_model(self, remarks): if not os.path.exists('pretrained_models_DDPG/'): os.mkdir('pretrained_models_DDPG/') path = 'pretrained_models_DDPG/{}.pt'.format(remarks) print('Saving model to {}'.format(path)) torch.save([ self.normalizer_s.mean, self.normalizer_s.std, self.policy.state_dict() ], path) def load_model(self, remark): print('Loading models with remark {}'.format(remark)) self.normalizer_s.mean, self.normalizer_s.std, policy_model = torch.load( 'pretrained_models_DDPG/{}.pt'.format(remark), map_location=lambda storage, loc: storage) self.policy.load_state_dict(policy_model)
filepath=checkpoint_file, save_weights_only=True ) if (len(os.listdir(checkpoint_dir)) > 0): print("Restoring from", checkpoint_file) low_level_action_model.load_weights(checkpoint_file) return low_level_action_model, [ checkpoint_callback, low_level_action_model.gumbel_callback ] if __name__ == "__main__": normalizer = Normalizer(0, 499) sae = StateAutoEncoder( 1, 1, num_state_bits, normalize=True, normalizer=normalizer ) sae.use_checkpoints(sae_path) num_actions, train_ds = setup_env( env_name, num_collect_episodes, sae) low_level_action_model, callbacks = setup_model( num_actions, num_state_bits, sae, checkpoint_dir
class ddpgAgent(object): def __init__(self, params): """Implementation of DDPG agent with Hindsight Experience Replay (HER) sampler. @param params: dict containing all necessary parameters: dims, buffer_size, tau (= 1-polyak), batch_size, lr_critic, lr_actor, norm_eps, norm_clip, clip_obs, clip_action, T (episode length), num_workers, clip_return, sample_her_transitions, gamma, replay_strategy """ self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') self.input_dims = params['dims'] self.buffer_size = params['buffer_size'] self.tau = params['tau'] self.batch_size = params['batch_size'] self.critic_lr = params['lr_critic'] self.actor_lr = params['lr_actor'] self.norm_eps = params['norm_eps'] self.norm_clip = params['norm_clip'] self.clip_obs = params['clip_obs'] self.clip_action = params['clip_action'] self.T = params['T'] self.rollout_batch_size = params['num_workers'] self.clip_return = params['clip_return'] self.sample_transitions = params['sample_her_transitions'] self.gamma = params['gamma'] self.replay_strategy = params['replay_strategy'] self.dimo = self.input_dims['o'] self.dimg = self.input_dims['g'] self.dimu = self.input_dims['u'] stage_shapes = OrderedDict() for key in sorted(self.input_dims.keys()): if key.startswith('info_'): continue stage_shapes[key] = (None, self.input_dims[key]) stage_shapes['o_2'] = stage_shapes['o'] stage_shapes['r'] = (None,) self.stage_shapes = stage_shapes # normalizer self.obs_normalizer = Normalizer(size=self.dimo, eps=self.norm_eps, clip_range=self.norm_clip) self.goal_normalizer = Normalizer(size=self.dimg, eps=self.norm_eps, clip_range=self.norm_clip) # networks self.actor_local = Actor(self.input_dims).to(self.device) self.critic_local = Critic(self.input_dims).to(self.device) self.actor_target = copy.deepcopy(self.actor_local) self.critic_target = copy.deepcopy(self.critic_local) # optimizers self.actor_optimizer = optim.Adam(self.actor_local.parameters(), lr=self.actor_lr) self.critic_optimizer = optim.Adam(self.critic_local.parameters(), lr=self.critic_lr) # Configuring the replay buffer buffer_shapes = {key: (self.T-1 if key != 'o' else self.T, self.input_dims[key]) for key, val in self.input_dims.items()} buffer_shapes['g'] = (buffer_shapes['g'][0], self.dimg) buffer_shapes['ag'] = (self.T, self.dimg) buffer_size = (self.buffer_size // self.rollout_batch_size) * self.rollout_batch_size self.buffer = ReplayBuffer(buffer_shapes, buffer_size, self.T, self.sample_transitions) def act(self, o, g, noise_eps=0., random_eps=0., testing=False): """Choose action from observations with probability 'random_eps' at random, else use actor output and add noise 'noise_eps' @param o: observation @param g: desired goal @param noise_eps: noise added to action @param random_eps: random action probability @param testing: (bool) set to 'True' if testing a single environment """ obs = self.obs_normalizer.normalize(o) goals = self.goal_normalizer.normalize(g) obs = torch.tensor(obs).to(self.device) goals = torch.tensor(goals).to(self.device) # for testing single environment if testing: with torch.no_grad(): action = self.actor_local(torch.cat([obs, goals], dim=0)).cpu().data.numpy() return action actions = self.actor_local(torch.cat([obs, goals], dim=1)) noise = (noise_eps * np.random.randn(actions.shape[0], 4)).astype(np.float32) actions += torch.tensor(noise).to(self.device) eps_greedy_noise = np.random.binomial(1, random_eps, actions.shape[0]).reshape(-1, 1) random_action = torch.tensor(np.random.uniform( low=-1., high=1., size=(actions.shape[0], self.dimu)).astype(np.float32)).to(self.device) actions += torch.tensor(eps_greedy_noise.astype(np.float32)).to(self.device) * ( random_action - actions) # eps-greedy actions = torch.clamp(actions, -self.clip_action, self.clip_action) return actions def store_episode(self, episode_batch): """Store episodes to replay buffer. @param episode_batch: array of batch_size x (T or T+1) x dim_key. Observation 'o' is of size T+1, others are of size T """ self.buffer.store_episode(episode_batch) # add transitions to normalizer episode_batch['o_2'] = episode_batch['o'][:, 1:, :] episode_batch['ag_2'] = episode_batch['ag'][:, 1:, :] shape = episode_batch['u'].shape num_normalizing_transitions = shape[0] * shape[1] # num_rollouts * (T - 1), steps every cycle transitions = self.sample_transitions(episode_batch, num_normalizing_transitions) self.obs_normalizer.update(transitions['o']) self.goal_normalizer.update(transitions['g']) self.obs_normalizer.recompute_stats() self.goal_normalizer.recompute_stats() def sample_batch(self): """Sample random transitions from replay buffer (which also contains HER samples). @return: transitions """ transitions = self.buffer.sample(self.batch_size) return [transitions[key] for key in self.stage_shapes.keys()] def learn(self): """learning step i.e. optimizing the network. """ batch = self.sample_batch() batch_dict = OrderedDict([(key, batch[i].astype(np.float32).copy()) for i, key in enumerate(self.stage_shapes.keys())]) batch_dict['r'] = np.reshape(batch_dict['r'], [-1, 1]) # prepare state, action, reward, next state obs = torch.tensor(self.obs_normalizer.normalize(batch_dict['o'])).to(self.device) goal = torch.tensor(self.goal_normalizer.normalize(batch_dict['g'])).to(self.device) actions = torch.tensor(batch_dict['u']).to(self.device) rewards = torch.tensor(batch_dict['r'].astype(np.float32)).to(self.device) obs_2 = torch.tensor(self.obs_normalizer.normalize(batch_dict['o_2'])).to(self.device) # update critic -------------------------------------------------------------- # compute predicted Q values next_actions = self.actor_target(torch.cat([obs_2, goal], dim=1)) next_Q_targets = self.critic_target(torch.cat([obs_2, goal], dim=1), next_actions) # compute Q values for current states and clip them Q_targets = rewards + self.gamma * next_Q_targets # Note: last experience of episode is not included Q_targets = torch.clamp(Q_targets, -self.clip_return, 0.) # clipping # compute loss Q_expected = self.critic_local(torch.cat([obs, goal], dim=1), actions) critic_loss = F.mse_loss(Q_expected, Q_targets) # update weights critic self.critic_optimizer.zero_grad() critic_loss.backward() # torch.nn.utils.clip_grad_norm_(self.critic_local.parameters(), 1) self.critic_optimizer.step() # update actor ------------------------------------------------------------- # compute loss pred_actions = self.actor_local(torch.cat([obs, goal], dim=1)) actor_loss = -self.critic_local(torch.cat([obs, goal], dim=1), pred_actions).mean() actor_loss += (pred_actions ** 2).mean() # minimize action moments # update weights actor self.actor_optimizer.zero_grad() actor_loss.backward() self.actor_optimizer.step() def soft_update_target_networks(self): """Soft update model parameters: θ_target = τ*θ_local + (1 - τ)*θ_target """ # update critic net for target_param, local_param in zip(self.critic_target.parameters(), self.critic_local.parameters()): target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data) # update actor net for target_param, local_param in zip(self.actor_target.parameters(), self.actor_local.parameters()): target_param.data.copy_(self.tau * local_param.data + (1.0 - self.tau) * target_param.data) def save_checkpoint(self, path, name): """Save actor, critic networks and the stats for normalization to the path. @param path: path to store checkpoints @param name: (str) name of environment, for naming files """ torch.save(self.actor_local.state_dict(), path + '/'+name+'_checkpoint_actor_her.pth') torch.save(self.critic_local.state_dict(), path + '/'+name+'_checkpoint_critic_her.pth') self.obs_normalizer.save_normalizer(path + '/'+name+'_obs_normalizer.pth') self.goal_normalizer.save_normalizer(path + '/'+name+'_goal_normalizer.pth')