示例#1
0
文件: ppo.py 项目: youngleox/nero
    def __init__(self,
                 actor_critic,
                 clip_param,
                 ppo_epoch,
                 num_mini_batch,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 max_grad_norm=None,
                 use_clipped_value_loss=True,
                 optimizer='adam',
                 beta1=0.0,
                 beta2=0.999):
        # betas not passed to optimizers
        self.actor_critic = actor_critic

        self.clip_param = clip_param
        self.ppo_epoch = ppo_epoch
        self.num_mini_batch = num_mini_batch

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm
        self.use_clipped_value_loss = use_clipped_value_loss
        if optimizer == 'adam':
            print("using adam optimizer!")
            self.optimizer = optim.Adam(actor_critic.parameters(),
                                        lr=lr,
                                        eps=eps,
                                        betas=(0.0, 0.999))

        elif optimizer == 'lamb':
            print("using lamb optimizer!")
            self.optimizer = Lamb(actor_critic.parameters(),
                                  lr=lr,
                                  eps=eps,
                                  betas=(0.0, 0.999))

        elif optimizer == 'sgd':
            print("using SGD optimizer!")
            self.optimizer = optim.SGD(actor_critic.parameters(),
                                       lr=lr,
                                       momentum=0.0)

        elif optimizer == 'nero':
            print("using nero optimizer!")
            self.optimizer = Nero(actor_critic.parameters(), lr=lr)
示例#2
0
文件: train.py 项目: jxbz/nero
                           lr=args.lr,
                           betas=(args.momentum, args.beta),
                           weight_decay=args.wd,
                           constraints=True)

    elif args.optimizer == 'madam':
        print("using madam!")
        optimizer = Madam(net.parameters(), lr=args.lr)

    elif args.optimizer == 'madamcs':
        print("using madamcs!")
        optimizer = MadamCS(net.parameters(), lr=args.lr, constraints=True)

    elif args.optimizer == 'nero':
        print("using nero!")
        optimizer = Nero(net.parameters(), lr=args.lr, constraints=True)

    elif args.optimizer == 'neroabl':
        print("using nero ablated!")
        optimizer = Nero_abl(net.parameters(),
                             lr=args.lr,
                             c1=args.c1,
                             c2=args.c2)

    train_scheduler = optim.lr_scheduler.MultiStepLR(
        optimizer, milestones=settings.MILESTONES,
        gamma=args.gamma)  #learning rate decay
    iter_per_epoch = len(cifar_training_loader)
    warmup_scheduler = WarmUpLR(optimizer, iter_per_epoch * args.warm)

    args.prefix = "seed" + str(args.seed) + args.prefix
示例#3
0
文件: ppo.py 项目: youngleox/nero
class PPO():
    def __init__(self,
                 actor_critic,
                 clip_param,
                 ppo_epoch,
                 num_mini_batch,
                 value_loss_coef,
                 entropy_coef,
                 lr=None,
                 eps=None,
                 max_grad_norm=None,
                 use_clipped_value_loss=True,
                 optimizer='adam',
                 beta1=0.0,
                 beta2=0.999):
        # betas not passed to optimizers
        self.actor_critic = actor_critic

        self.clip_param = clip_param
        self.ppo_epoch = ppo_epoch
        self.num_mini_batch = num_mini_batch

        self.value_loss_coef = value_loss_coef
        self.entropy_coef = entropy_coef

        self.max_grad_norm = max_grad_norm
        self.use_clipped_value_loss = use_clipped_value_loss
        if optimizer == 'adam':
            print("using adam optimizer!")
            self.optimizer = optim.Adam(actor_critic.parameters(),
                                        lr=lr,
                                        eps=eps,
                                        betas=(0.0, 0.999))

        elif optimizer == 'lamb':
            print("using lamb optimizer!")
            self.optimizer = Lamb(actor_critic.parameters(),
                                  lr=lr,
                                  eps=eps,
                                  betas=(0.0, 0.999))

        elif optimizer == 'sgd':
            print("using SGD optimizer!")
            self.optimizer = optim.SGD(actor_critic.parameters(),
                                       lr=lr,
                                       momentum=0.0)

        elif optimizer == 'nero':
            print("using nero optimizer!")
            self.optimizer = Nero(actor_critic.parameters(), lr=lr)

    def update(self, rollouts):
        advantages = rollouts.returns[:-1] - rollouts.value_preds[:-1]
        advantages = (advantages - advantages.mean()) / (advantages.std() +
                                                         1e-5)

        value_loss_epoch = 0
        action_loss_epoch = 0
        dist_entropy_epoch = 0

        for e in range(self.ppo_epoch):
            if self.actor_critic.is_recurrent:
                data_generator = rollouts.recurrent_generator(
                    advantages, self.num_mini_batch)
            else:
                data_generator = rollouts.feed_forward_generator(
                    advantages, self.num_mini_batch)

            for sample in data_generator:
                obs_batch, recurrent_hidden_states_batch, actions_batch, \
                   value_preds_batch, return_batch, masks_batch, old_action_log_probs_batch, \
                        adv_targ = sample

                # Reshape to do in a single forward pass for all steps
                values, action_log_probs, dist_entropy, _ = self.actor_critic.evaluate_actions(
                    obs_batch, recurrent_hidden_states_batch, masks_batch,
                    actions_batch)

                ratio = torch.exp(action_log_probs -
                                  old_action_log_probs_batch)
                surr1 = ratio * adv_targ
                surr2 = torch.clamp(ratio, 1.0 - self.clip_param,
                                    1.0 + self.clip_param) * adv_targ
                action_loss = -torch.min(surr1, surr2).mean()

                if self.use_clipped_value_loss:
                    value_pred_clipped = value_preds_batch + \
                        (values - value_preds_batch).clamp(-self.clip_param, self.clip_param)
                    value_losses = (values - return_batch).pow(2)
                    value_losses_clipped = (value_pred_clipped -
                                            return_batch).pow(2)
                    value_loss = 0.5 * torch.max(value_losses,
                                                 value_losses_clipped).mean()
                else:
                    value_loss = 0.5 * (return_batch - values).pow(2).mean()

                self.optimizer.zero_grad()
                (value_loss * self.value_loss_coef + action_loss -
                 dist_entropy * self.entropy_coef).backward()
                nn.utils.clip_grad_norm_(self.actor_critic.parameters(),
                                         self.max_grad_norm)
                self.optimizer.step()

                value_loss_epoch += value_loss.item()
                action_loss_epoch += action_loss.item()
                dist_entropy_epoch += dist_entropy.item()

        num_updates = self.ppo_epoch * self.num_mini_batch

        value_loss_epoch /= num_updates
        action_loss_epoch /= num_updates
        dist_entropy_epoch /= num_updates

        return value_loss_epoch, action_loss_epoch, dist_entropy_epoch
示例#4
0
criterion = nn.NLLLoss()

if args.optim == 'sgd':
    optimizer = torch.optim.SGD(model.parameters(),
                                lr=args.lr,
                                momentum=args.momentum)
elif args.optim == 'adam':
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr,
                                 betas=(args.momentum, args.beta))
elif args.optim == 'lamb':
    optimizer = Lamb(model.parameters(),
                     lr=args.lr,
                     betas=(args.momentum, args.beta))
elif args.optim == 'nero':
    optimizer = Nero(model.parameters(), lr=args.lr)

###############################################################################
# Training code
###############################################################################


def repackage_hidden(h):
    """Wraps hidden states in new Tensors, to detach them from their history."""

    if isinstance(h, torch.Tensor):
        return h.detach()
    else:
        return tuple(repackage_hidden(v) for v in h)

示例#5
0
文件: main.py 项目: youngleox/nero
def main_worker(gpu, ngpus_per_node, args):
    global best_acc1
    args.gpu = gpu

    lr_decay_epoch = [int(i) for i in args.lr_decay_epoch.split(',')]

    if args.gpu is not None:
        print("Use GPU: {} for training".format(args.gpu))

    if args.distributed:
        if args.dist_url == "env://" and args.rank == -1:
            args.rank = int(os.environ["RANK"])
        if args.multiprocessing_distributed:
            # For multiprocessing distributed training, rank needs to be the
            # global rank among all the processes
            args.rank = args.rank * ngpus_per_node + gpu
        dist.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
                                world_size=args.world_size, rank=args.rank)
    
    # create model
    if args.pretrained:
        print("=> using pre-trained model '{}'".format(args.arch))
        model = models.__dict__[args.arch](pretrained=True)
    else:
        print("=> creating model '{}'".format(args.arch))
        model = models.__dict__[args.arch]()

    if not torch.cuda.is_available():
        print('using CPU, this will be slow')
    elif args.distributed:
        # For multiprocessing distributed, DistributedDataParallel constructor
        # should always set the single device scope, otherwise,
        # DistributedDataParallel will use all available devices.
        if args.gpu is not None:
            torch.cuda.set_device(args.gpu)
            model.cuda(args.gpu)
            # When using a single GPU per process and per
            # DistributedDataParallel, we need to divide the batch size
            # ourselves based on the total number of GPUs we have
            args.batch_size = int(args.batch_size / ngpus_per_node)
            args.workers = int((args.workers + ngpus_per_node - 1) / ngpus_per_node)
            model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[args.gpu])
        else:
            model.cuda()
            # DistributedDataParallel will divide and allocate batch_size to all
            # available GPUs if device_ids are not set
            model = torch.nn.parallel.DistributedDataParallel(model)
    elif args.gpu is not None:
        torch.cuda.set_device(args.gpu)
        model = model.cuda(args.gpu)
    else:
        # DataParallel will divide and allocate batch_size to all available GPUs
        if args.arch.startswith('alexnet') or args.arch.startswith('vgg'):
            model.features = torch.nn.DataParallel(model.features)
            model.cuda()
        else:
            model = torch.nn.DataParallel(model).cuda()

    criterion = nn.CrossEntropyLoss().cuda(args.gpu)
    logname = args.prefix
    if args.optimizer == 'sgd':
        logname += "SGD_"
        print("sgd")
        optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay)
    elif args.optimizer == 'nero':
        logname += "Nero_"
        print("Nero")
        optimizer = Nero(model.parameters(), lr=args.lr,constraints=True)

    cos_sch = False   
    scheduler = None
    T_max = math.ceil(1281167.0/float(args.batch_size)) * (args.epochs)
    if args.sch == 'cos' or args.sch == 'cosine':
        print("cosine scheduler")
        cos_sch = True
        scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,T_max=T_max,eta_min=0.0)
    
    
    logname += args.arch + "_sch_" +str(args.sch)+ "_lr" +str(args.lr) + \
            '_epoch' + str(args.epochs) + \
            "_opt_" + args.optimizer + \
            "_b" + str(args.batch_size) + \
            '_momentum' + str(args.momentum) + "_beta" + str(args.beta) + \
            '_wd' + str(args.weight_decay)

    writer = SummaryWriter(args.logdir + '/' + logname)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            if args.gpu is None:
                checkpoint = torch.load(args.resume)
            else:
                # Map model to be loaded to specified single gpu.
                loc = 'cuda:{}'.format(args.gpu)
                checkpoint = torch.load(args.resume, map_location=loc)
            args.start_epoch = checkpoint['epoch']
            best_acc1 = checkpoint['best_acc1']
            if args.gpu is not None:
                # best_acc1 may be from a checkpoint from a different GPU
                best_acc1 = best_acc1.to(args.gpu)
            model.load_state_dict(checkpoint['state_dict'],strict=False)
            optimizer.load_state_dict(checkpoint['optimizer']) 
            for group in optimizer.param_groups:
                group["lr"] = args.lr
            if args.sch == 'cos':
                for i in range(checkpoint['epoch']*math.ceil(1281167.0/float(args.batch_size))):
                    scheduler.step()
             
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    traindir = os.path.join(args.data, 'train')
    valdir = os.path.join(args.data, 'val')
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                     std=[0.229, 0.224, 0.225])

    train_dataset = datasets.ImageFolder(
        traindir,
        transforms.Compose([
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ]))

    if args.distributed:
        train_sampler = torch.utils.data.distributed.DistributedSampler(train_dataset)
    else:
        train_sampler = None

    train_loader = torch.utils.data.DataLoader(
        train_dataset, batch_size=args.batch_size, shuffle=(train_sampler is None),
        num_workers=args.workers, pin_memory=True, sampler=train_sampler)

    val_loader = torch.utils.data.DataLoader(
        datasets.ImageFolder(valdir, transforms.Compose([
            transforms.Resize(256),
            transforms.CenterCrop(224),
            transforms.ToTensor(),
            normalize
        ])),
        batch_size=args.batch_size, shuffle=False,
        num_workers=args.workers, pin_memory=True)

    if args.evaluate:
        validate(val_loader, model, criterion, args)
        return

    lr = args.lr
    for epoch in range(args.start_epoch, args.epochs):
        if args.distributed:
            train_sampler.set_epoch(epoch)

        current_lr = optimizer.param_groups[0]['lr']
        print("current learning rate: {}".format(current_lr))
        writer.add_scalar('lr',current_lr,epoch)    
        
        # train for one epoch
        top1_train, top5_train, losses_train, batch_time_train, scheduler = train(train_loader, model, criterion, optimizer, epoch, args, writer,scheduler=scheduler)
        
        if not cos_sch:
            lr = adjust_learning_rate(optimizer, epoch, lr, lr_decay_epoch,args.lr_decay)
        
        writer.add_scalar('train/batch_time_mean', batch_time_train, epoch)
        writer.add_scalar('train/loss_mean', losses_train, epoch)
        writer.add_scalar('train/top1_mean', top1_train, epoch)
        writer.add_scalar('train/top5_mean', top5_train, epoch)

        # evaluate on validation set
        top1_val, top5_val, losses_val, batch_time_val = validate(val_loader, model, criterion, args, epoch, writer)
        
        writer.add_scalar('val/batch_time_mean', batch_time_val, epoch)
        writer.add_scalar('val/loss_mean', losses_val, epoch)
        writer.add_scalar('val/top1_mean', top1_val, epoch)
        writer.add_scalar('val/top5_mean', top5_val, epoch)
        # remember best acc@1 and save checkpoint
        is_best = top1_val > best_acc1
        best_acc1 = max(top1_val, best_acc1)

        if not args.multiprocessing_distributed or (args.multiprocessing_distributed
                and args.rank % ngpus_per_node == 0):

            if ((epoch + 1) % 5 == 0) :
                save_checkpoint({
                    'epoch': epoch + 1,
                    'arch': args.arch,
                    'state_dict': model.state_dict(),
                    'best_acc1': best_acc1,
                    'optimizer' : optimizer.state_dict(),
                }, is_best,
                    args.logdir + '/' + logname + '/epoch' + str(epoch+1) + '_checkpoint.pth.tar')
    writer.close()
示例#6
0
                      lr=args.initial_lr,
                      betas=(0.0, 0.999))
    optD = optim.Adam(netD.parameters(),
                      lr=args.initial_lr,
                      betas=(0.9, 0.999))

elif args.optim == 'lamb':
    optG = Lamb(netG.parameters(), lr=args.initial_lr, betas=(0.0, 0.999))
    optD = Lamb(netD.parameters(), lr=args.initial_lr, betas=(0.0, 0.999))

elif args.optim == 'sgd':
    optG = optim.SGD(netG.parameters(), lr=args.initial_lr, momentum=0.0)
    optD = optim.SGD(netD.parameters(), lr=args.initial_lr, momentum=0.0)

elif args.optim == 'nero':
    optG = Nero(netG.parameters(), lr=args.initial_lr)
    optD = Nero(netD.parameters(), lr=args.initial_lr)

else:
    raise Exception("Unsupported optim")

#########################################
#### Train ##############################
#########################################


def train():
    print("Training...")

    netG.train()
    netD.train()