Exemplo n.º 1
0
def main(args):
    t1 = dt.now()
    if args.outdir is not None and not os.path.exists(args.outdir):
        os.makedirs(args.outdir)
    LOG = f'{args.outdir}/run.log'

    def flog(msg):  # HACK: switch to logging module
        return utils.flog(msg, LOG)

    if args.load == 'latest':
        args = get_latest(args, flog)
    flog(' '.join(sys.argv))
    flog(args)

    # set the random seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    ## set the device
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')
    flog('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
    else:
        flog('WARNING: No GPUs detected')

    # load the particles
    if args.ind is not None:
        flog('Filtering image dataset with {}'.format(args.ind))
        ind = pickle.load(open(args.ind, 'rb'))
    else:
        ind = None
    if args.lazy:
        data = dataset.LazyMRCData(args.particles,
                                   norm=args.norm,
                                   invert_data=args.invert_data,
                                   ind=ind,
                                   window=args.window,
                                   datadir=args.datadir,
                                   relion31=args.relion31)
    else:
        data = dataset.MRCData(args.particles,
                               norm=args.norm,
                               invert_data=args.invert_data,
                               ind=ind,
                               window=args.window,
                               datadir=args.datadir,
                               relion31=args.relion31)
    D = data.D
    Nimg = data.N

    # instantiate model
    if args.pe_type != 'none': assert args.l_extent == 0.5
    lattice = Lattice(D, extent=args.l_extent)

    activation = {"relu": nn.ReLU, "leaky_relu": nn.LeakyReLU}[args.activation]
    model = models.get_decoder(3,
                               D,
                               args.layers,
                               args.dim,
                               args.domain,
                               args.pe_type,
                               enc_dim=args.pe_dim,
                               activation=activation)
    flog(model)
    flog('{} parameters in model'.format(
        sum(p.numel() for p in model.parameters() if p.requires_grad)))

    # optimizer
    optim = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.wd)

    # load weights
    if args.load:
        flog('Loading model weights from {}'.format(args.load))
        checkpoint = torch.load(args.load)
        model.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        assert start_epoch < args.num_epochs
    else:
        start_epoch = 0

    # load poses
    if args.do_pose_sgd:
        assert args.domain == 'hartley', "Need to use --domain hartley if doing pose SGD"
        posetracker = PoseTracker.load(args.poses, Nimg, D, args.emb_type, ind)
        pose_optimizer = torch.optim.SparseAdam(posetracker.parameters(),
                                                lr=args.pose_lr)
    else:
        posetracker = PoseTracker.load(args.poses, Nimg, D, None, ind)

    # load CTF
    if args.ctf is not None:
        flog('Loading ctf params from {}'.format(args.ctf))
        ctf_params = ctf.load_ctf_for_training(D - 1, args.ctf)
        if args.ind is not None: ctf_params = ctf_params[ind]
        ctf_params = torch.tensor(ctf_params)
    else:
        ctf_params = None
    Apix = ctf_params[0, 0] if ctf_params is not None else 1

    # save configuration
    out_config = f'{args.outdir}/config.pkl'
    save_config(args, data, lattice, model, out_config)

    # Mixed precision training with AMP
    if args.amp:
        assert args.batch_size % 8 == 0
        assert (D - 1) % 8 == 0
        assert args.dim % 8 == 0
        # Also check zdim, enc_mask dim?
        model, optim = amp.initialize(model, optim, opt_level='O1')

    # parallelize
    if args.multigpu and torch.cuda.device_count() > 1:
        flog(f'Using {torch.cuda.device_count()} GPUs!')
        args.batch_size *= torch.cuda.device_count()
        flog(f'Increasing batch size to {args.batch_size}')
        model = nn.DataParallel(model)
    elif args.multigpu:
        flog(
            f'WARNING: --multigpu selected, but {torch.cuda.device_count()} GPUs detected'
        )

    # train
    data_generator = DataLoader(data, batch_size=args.batch_size, shuffle=True)
    for epoch in range(start_epoch, args.num_epochs):
        t2 = dt.now()
        loss_accum = 0
        batch_it = 0
        for batch, ind in data_generator:
            batch_it += len(ind)
            y = batch.to(device)
            ind = ind.to(device)
            if args.do_pose_sgd:
                pose_optimizer.zero_grad()
            r, t = posetracker.get_pose(ind)
            c = ctf_params[ind] if ctf_params is not None else None
            loss_item = train(model,
                              lattice,
                              optim,
                              batch.to(device),
                              r,
                              t,
                              c,
                              use_amp=args.amp)
            if args.do_pose_sgd and epoch >= args.pretrain:
                pose_optimizer.step()
            loss_accum += loss_item * len(ind)
            if batch_it % args.log_interval == 0:
                flog(
                    '# [Train Epoch: {}/{}] [{}/{} images] loss={:.6f}'.format(
                        epoch + 1, args.num_epochs, batch_it, Nimg, loss_item))
        flog('# =====> Epoch: {} Average loss = {:.6}; Finished in {}'.format(
            epoch + 1, loss_accum / Nimg,
            dt.now() - t2))
        if args.checkpoint and epoch % args.checkpoint == 0:
            out_mrc = '{}/reconstruct.{}.mrc'.format(args.outdir, epoch)
            out_weights = '{}/weights.{}.pkl'.format(args.outdir, epoch)
            save_checkpoint(model, lattice, optim, epoch, data.norm, Apix,
                            out_mrc, out_weights)
            if args.do_pose_sgd and epoch >= args.pretrain:
                out_pose = '{}/pose.{}.pkl'.format(args.outdir, epoch)
                posetracker.save(out_pose)

    ## save model weights and evaluate the model on 3D lattice
    out_mrc = '{}/reconstruct.mrc'.format(args.outdir)
    out_weights = '{}/weights.pkl'.format(args.outdir)
    save_checkpoint(model, lattice, optim, epoch, data.norm, Apix, out_mrc,
                    out_weights)
    if args.do_pose_sgd and epoch >= args.pretrain:
        out_pose = '{}/pose.pkl'.format(args.outdir)
        posetracker.save(out_pose)

    td = dt.now() - t1
    flog('Finsihed in {} ({} per epoch)'.format(
        td, td / (args.num_epochs - start_epoch)))
Exemplo n.º 2
0
def main(args):
    log(args)
    t1 = dt.now()
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # set the random seed
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    ## set the device
    use_cuda = torch.cuda.is_available()
    device = torch.device('cuda' if use_cuda else 'cpu')
    log('Use cuda {}'.format(use_cuda))
    if use_cuda:
        torch.set_default_tensor_type(torch.cuda.FloatTensor)

    # load the particles
    if args.ind is not None:
        log('Filtering image dataset with {}'.format(args.ind))
        ind = pickle.load(open(args.ind, 'rb'))
    else:
        ind = None
    if args.lazy:
        data = dataset.LazyMRCData(args.particles,
                                   norm=args.norm,
                                   invert_data=args.invert_data,
                                   ind=ind,
                                   window=args.window,
                                   datadir=args.datadir)
    else:
        data = dataset.MRCData(args.particles,
                               norm=args.norm,
                               invert_data=args.invert_data,
                               ind=ind,
                               window=args.window,
                               datadir=args.datadir)
    D = data.D
    Nimg = data.N

    # instantiate model
    if args.pe_type != 'none': assert args.l_extent == 0.5
    lattice = Lattice(D, extent=args.l_extent)

    model = models.get_decoder(3, D, args.layers, args.dim, args.domain,
                               args.pe_type, nn.ReLU)
    log(model)
    log('{} parameters in model'.format(
        sum(p.numel() for p in model.parameters() if p.requires_grad)))

    # optimizer
    optim = torch.optim.Adam(model.parameters(),
                             lr=args.lr,
                             weight_decay=args.wd)

    # load weights
    if args.load:
        log('Loading model weights from {}'.format(args.load))
        checkpoint = torch.load(args.load)
        model.load_state_dict(checkpoint['model_state_dict'])
        optim.load_state_dict(checkpoint['optimizer_state_dict'])
        start_epoch = checkpoint['epoch'] + 1
        assert start_epoch < args.num_epochs
    else:
        start_epoch = 0

    # load poses
    if args.do_pose_sgd:
        posetracker = PoseTracker.load(args.poses, Nimg, D, args.emb_type, ind)
        pose_optimizer = torch.optim.SparseAdam(posetracker.parameters(),
                                                lr=args.pose_lr)
    else:
        posetracker = PoseTracker.load(args.poses, Nimg, D, None, ind)

    # load CTF
    if args.ctf is not None:
        log('Loading ctf params from {}'.format(args.ctf))
        ctf_params = ctf.load_ctf_for_training(D - 1, args.ctf)
        if args.ind is not None: ctf_params = ctf_params[ind]
        ctf_params = torch.tensor(ctf_params)
    else:
        ctf_params = None
    Apix = ctf_params[0, 0] if ctf_params is not None else 1

    # train
    data_generator = DataLoader(data, batch_size=args.batch_size, shuffle=True)
    for epoch in range(start_epoch, args.num_epochs):
        t2 = dt.now()
        loss_accum = 0
        batch_it = 0
        for batch, ind in data_generator:
            batch_it += len(ind)
            y = batch.to(device)
            ind = ind.to(device)
            if args.do_pose_sgd:
                pose_optimizer.zero_grad()
            r, t = posetracker.get_pose(ind)
            c = ctf_params[ind] if ctf_params is not None else None
            loss_item = train(model, lattice, optim, batch.to(device), r, t, c)
            if args.do_pose_sgd and epoch >= args.pretrain:
                pose_optimizer.step()
            loss_accum += loss_item * len(ind)
            if batch_it % args.log_interval == 0:
                log('# [Train Epoch: {}/{}] [{}/{} images] loss={:.6f}'.format(
                    epoch + 1, args.num_epochs, batch_it, Nimg, loss_item))
        log('# =====> Epoch: {} Average loss = {:.6}; Finished in {}'.format(
            epoch + 1, loss_accum / Nimg,
            dt.now() - t2))
        if args.checkpoint and epoch % args.checkpoint == 0:
            out_mrc = '{}/reconstruct.{}.mrc'.format(args.outdir, epoch)
            out_weights = '{}/weights.{}.pkl'.format(args.outdir, epoch)
            save_checkpoint(model, lattice, optim, epoch, data.norm, Apix,
                            out_mrc, out_weights)
            if args.do_pose_sgd and epoch >= args.pretrain:
                out_pose = '{}/pose.{}.pkl'.format(args.outdir, epoch)
                posetracker.save(out_pose)

    ## save model weights and evaluate the model on 3D lattice
    out_mrc = '{}/reconstruct.mrc'.format(args.outdir)
    out_weights = '{}/weights.pkl'.format(args.outdir)
    save_checkpoint(model, lattice, optim, epoch, data.norm, Apix, out_mrc,
                    out_weights)
    if args.do_pose_sgd and epoch >= args.pretrain:
        out_pose = '{}/pose.pkl'.format(args.outdir)
        posetracker.save(out_pose)

    td = dt.now() - t1
    log('Finsihed in {} ({} per epoch)'.format(
        td, td / (args.num_epochs - start_epoch)))