Exemplo n.º 1
0
def train_net(solver_prototxt, roidb, output_dir, pretrained_model, max_iter,
              gpus):
    """
    Training the network with multiple gpu
    :param solver_prototxt: the network prototxt
    :param roidb: the training roidb
    :param output_dir: the output directory to be used for saving the models
    :param pretrained_model: the pre-trained model for fine-tuning
    :param max_iter: maximum number of iterations for solver
    :param gpus: the GPU ids to be used for solving
    :return:
    """

    # Initiate Caffe NCCL
    uid = caffe.NCCL.new_uid()
    caffe.init_log(0, True)
    caffe.log('Using devices %s' % str(gpus))
    # Create a process per GPU
    procs = []
    for rank in range(len(gpus)):
        p = Process(target=worker,
                    args=(rank, uid, gpus, solver_prototxt, roidb,
                          pretrained_model, max_iter, output_dir))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
    print('done solving!')
def train_net(solver_prototxt, roidb, output_dir, pretrained_model, max_iter, gpus):
    """
    Training the network with multiple gpu
    :param solver_prototxt: the network prototxt
    :param roidb: the training roidb
    :param output_dir: the output directory to be used for saving the models
    :param pretrained_model: the pre-trained model for fine-tuning
    :param max_iter: maximum number of iterations for solver
    :param gpus: the GPU ids to be used for solving
    :return:
    """

    # Initiate Caffe NCCL
    uid = caffe.NCCL.new_uid()
    caffe.init_log(0,True)
    caffe.log('Using devices %s' % str(gpus))
    # Create a process per GPU
    procs = []
    for rank in range(len(gpus)):
        p = Process(target=worker,
                    args=(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter, output_dir))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join() 
    print('done solving!')
Exemplo n.º 3
0
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        use_cpu,  #whether use cpu
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    caffe.init_log(0, True)
    caffe.log('Using devices %s' % str(gpus))

    if use_cpu == True:
        p = Process(target=cpu_solve, args=(solver, snapshot, timing))

        p.daemon = True
        p.start()
        p.join()
    else:
        # NCCL uses a uid to identify a session
        uid = caffe.NCCL.new_uid()

        procs = []
        for rank in range(len(gpus)):
            p = Process(target=solve,
                        args=(solver, snapshot, gpus, timing, uid, rank))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()
Exemplo n.º 4
0
 def show_time():
     s = '\n'
     for i in range(len(net.layers)):
         s += 'forw %3d %8s ' % (i, net._layer_names[i])
         s += ': %.2f\n' % fprop[i].ms
     for i in range(len(net.layers) - 1, -1, -1):
         s += 'back %3d %8s ' % (i, net._layer_names[i])
         s += ': %.2f\n' % bprop[i].ms
     s += 'solver total: %.2f\n' % total.ms
     caffe.log(s)
Exemplo n.º 5
0
def train(
        solver,  # solver proto definition
        pretrained_model,  # pretrained model for initialization
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    #caffe.log(str(gpus))
    #caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    solve(solver, pretrained_model, snapshot, gpus, timing, 0)
Exemplo n.º 6
0
 def show_time():
     if solver.iter % display == 0:
         s = '\n'
         for i in range(len(solver.net.layers)):
             s += 'forw %3d %8s ' % (i, solver.net._layer_names[i])
             s += ': %.2f\n' % fprop[i].ms
         for i in range(len(solver.net.layers) - 1, -1, -1):
             s += 'back %3d %8s ' % (i, solver.net._layer_names[i])
             s += ': %.2f\n' % bprop[i].ms
         s += 'solver total: %.2f\n' % total.ms
         s += 'allreduce: %.2f\n' % allrd.ms
         caffe.log(s)
Exemplo n.º 7
0
Arquivo: train.py Projeto: BVLC/caffe
 def show_time():
     if solver.iter % display == 0:
         s = '\n'
         for i in range(len(solver.net.layers)):
             s += 'forw %3d %8s ' % (i, solver.net._layer_names[i])
             s += ': %.2f\n' % fprop[i].ms
         for i in range(len(solver.net.layers) - 1, -1, -1):
             s += 'back %3d %8s ' % (i, solver.net._layer_names[i])
             s += ': %.2f\n' % bprop[i].ms
         s += 'solver total: %.2f\n' % total.ms
         s += 'allreduce: %.2f\n' % allrd.ms
         caffe.log(s)
def train_model_multi_gpu(solver_prototxt, pretrained_model, gpus, timing=False):
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver_prototxt, pretrained_model, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
def train_net_multi_gpu(solver_prototxt, roidb, output_dir, pretrained_model, max_iter, gpus):
    """Train a Fast R-CNN network."""
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver_prototxt, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter))
        p.daemon = False
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
Exemplo n.º 10
0
def train(
        solver,  # solver proto definition
        initialization,  # weights or solver snapshot to restore from
        datasets,
        gpus  # list of device ids
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    # caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver, initialization, datasets, gpus, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
Exemplo n.º 11
0
def train_net_multi_gpu(solver_prototxt, roidb, output_dir, pretrained_model,
                        max_iters, gpus):
    """Train a Fast R-CNN network."""
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    queue = multiprocessing.Queue()
    queue.put({'path_list': []})
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver_prototxt, roidb, pretrained_model, gpus, uid,
                          rank, output_dir, max_iters, queue))
        p.daemon = False
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
    path_list = queue.get()['path_list']
    return path_list
Exemplo n.º 12
0
Arquivo: train.py Projeto: BVLC/caffe
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver, snapshot, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
Exemplo n.º 13
0
def train(
        solver,  # solver proto definition
        snapshot,  # solver snapshot to restore
        weight,  #caffemodel to load
        gpus,  # list of device ids
        timing=False,  # show timing info for compute and communications
):
    # NCCL uses a uid to identify a session
    uid = caffe.NCCL.new_uid()

    #caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))

    procs = []
    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver, snapshot, weight, gpus, timing, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
Exemplo n.º 14
0
        solver.net.copy_from(_weights)
    
    solver.net.layers[0].get_gpu_id(gpus[rank])

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    for _ in range(max_iter):
        solver.step(1)


if __name__ == '__main__':
    uid = caffe.NCCL.new_uid()
    caffe.init_log()
    caffe.log('Using devices %s' % str(gpus))
    procs = []

    for rank in range(len(gpus)):
        p = Process(target=solve,
                    args=(solver_prototxt, gpus, uid, rank, max_iter))
        p.daemon = False
        p.start()
        procs.append(p)
    for p in procs:
        p.join()

Exemplo n.º 15
0
    assert cfg.TEST.HAS_RPN

    base_imgs = '../DATA_imgs/'
    folders = ['scene_img_abstract_v002_train2015',
               'scene_img_abstract_v002_val2015']
    
    for data_path in folders:
        image_ids = load_image_ids(base_imgs+data_path+'/')
        print(len(image_ids), 'len_image_ids')
        random.seed(10)
        random.shuffle(image_ids)
        # Split image ids between gpus
        image_ids = [image_ids[i::len(gpus)] for i in range(len(gpus))]

        caffe.init_log()
        caffe.log('Using devices %s' % str(gpus))
        procs = []    

        for i,gpu_id in enumerate(gpus):
            outfile = base_p+'outputs/'+data_path+'-features.tsv'
            outfile = '%s.%d' % (outfile, gpu_id)
            p = Process(target=generate_tsv,
                        args=(gpu_id, prototxt, caffemodel, image_ids[i], outfile))
            p.daemon = True
            p.start()
            procs.append(p)
        for p in procs:
            p.join()            


Exemplo n.º 16
0
def beam_decode(
        model,  # net proto definition
        vocab_file,  # model vocab text file
        weights,  # pretrained weights to use
        gpu,  # device id
        outfile,  # json output
):

    vocab = []
    with open(vocab_file) as f:
        for word in f:
            vocab.append(word.strip())
    print 'Loaded {:,} words into caption vocab'.format(len(vocab))

    caffe.init_log(0, 1)
    caffe.log('Using device %s' % str(gpu))
    caffe.set_device(int(gpu))
    caffe.set_mode_gpu()

    net = caffe.Net(model, weights, caffe.TEST)
    print 'Loaded proto {} with weights {}'.format(model, weights)
    net.layers[0].load_dataset()

    id_to_caption = {}
    iteration = 0
    while True:
        ending = False
        out = net.forward()
        image_ids = net.blobs['image_id'].data
        captions = net.blobs['caption'].data
        scores = net.blobs['log_prob'].data
        batch_size = image_ids.shape[0]

        if captions.shape[0] == batch_size:
            # Decoding a compact net
            beam_size = captions.shape[2]
            for n in range(batch_size):
                if iteration == 0:
                    print "\nhttp://mscoco.org/explore/?id=%d" % image_ids[n][0]
                for b in range(beam_size):
                    cap = translate(vocab, captions[n][0][b])
                    score = scores[n][0][b]
                    if iteration == 0:
                        print '[%d] %.2f %s' % (b, score, cap)
        else:
            # Decoding an unrolled net
            beam_size = captions.shape[0] / batch_size
            if iteration == 0:
                print "Beam size: %d" % beam_size
            for n in range(batch_size):
                image_id = int(image_ids[n][0])
                if iteration == 0:
                    print "\nhttp://mscoco.org/explore/?id=%d" % image_id
                for b in range(beam_size):
                    cap = translate(vocab, captions[n * beam_size + b])
                    score = scores[n * beam_size + b]
                    if b == 0:
                        if image_id in id_to_caption:
                            ending = True
                        else:
                            id_to_caption[image_id] = cap
                    if iteration == 0:
                        print '[%d] %.2f %s' % (b, score, cap)
        iteration += 1
        if iteration % 1000 == 0:
            print 'Iteration: %d' % iteration
        if ending:
            break

    output = []
    for image_id in sorted(id_to_caption.keys()):
        output.append({
            'image_id': image_id,
            'caption': id_to_caption[image_id]
        })
    with open(outfile, 'w') as f:
        json.dump(output, f)
    print 'Generated %d outputs, saving to %s' % (len(output), outfile)
    s = CaptionScorer()
    s.score(outfile)
Exemplo n.º 17
0
def partseg_train(network, exp_dir, category, args):
    def solve2(solver, args, uid, rank):
        if args.cpu:
            caffe.set_mode_cpu()
        else:
            caffe.set_mode_gpu()
        caffe.set_device(args.gpus[rank])
        caffe.set_solver_count(len(args.gpus))
        caffe.set_solver_rank(rank)
        caffe.set_multiprocess(True)
        
        solver = caffe.get_solver(solver)

        if args.init_model:
            if args.init_model.endswith('.caffemodel'):
                solver.net.copy_from(args.init_model)
            else:
                solver.net.copy_from(os.path.join(exp_dir, '{}_iter_{}.caffemodel'.format(category, args.init_model)))

        if args.init_state:
            if args.init_state.endswith('.solverstate'):
                solver.restore(args.init_state)
            else:
                solver.restore(os.path.join(exp_dir, '{}_iter_{}.solverstate'.format(category, args.init_state)))

        nccl = caffe.NCCL(solver, uid)
        nccl.bcast()
        if solver.param.layer_wise_reduce:
            solver.net.after_backward(nccl)
        print(rank)
        #pdb.set_trace()
        solver.step(solver.param.max_iter)
        #solver.solve()

        #caffe.set_device(0)

    if network == 'seq':
        batch_norm = True
        conv_weight_filler = 'xavier'
        network = models.partseg_seq(arch_str=args.arch,
                                     skip_str=args.skips,
                                     dataset=args.dataset,
                                     dataset_params=args.dataset_params,
                                     category=category,
                                     feat_dims_str=args.feat,
                                     lattice_dims_str=args.lattice,
                                     sample_size=args.sample_size,
                                     batch_size=args.batch_size,
                                     batchnorm=batch_norm,
                                     conv_weight_filler=conv_weight_filler,
                                     save_path=os.path.join(exp_dir, category + '_net.prototxt'))

        models.partseg_seq(deploy=True,
                           arch_str=args.arch,
                           skip_str=args.skips,
                           dataset=args.dataset,
                           dataset_params=args.dataset_params,
                           category=category,
                           feat_dims_str=args.feat,
                           lattice_dims_str=args.lattice,
                           sample_size=args.sample_size,
                           batchnorm=batch_norm,
                           save_path=os.path.join(exp_dir, category + '_net_deploy.prototxt'))
    else:
        assert network.endswith('.prototxt'), 'Please provide a valid prototxt file'
        print('Using network defined at {}'.format(network))

    random_seed = 0
    debug_info = False
    
    solver = create_solver.standard_solver(network,
                                           network,
                                           os.path.join(exp_dir, category)+'_' +args.prefix,
                                           base_lr=args.base_lr,
                                           gamma=args.lr_decay,
                                           stepsize=args.stepsize,
                                           test_iter=args.test_iter,
                                           test_interval=args.test_interval,
                                           max_iter=args.num_iter,
                                           snapshot=args.snapshot_interval,
                                           solver_type=args.solver_type,
                                           weight_decay=args.weight_decay,
                                           iter_size=args.iter_size,
                                           debug_info=debug_info,
                                           random_seed=random_seed,
                                           save_path=os.path.join(exp_dir, category+'_solver.prototxt'))    
    ## Multiple GPUs
    uid = caffe.NCCL.new_uid()
    
    caffe.init_log(0, True)
    caffe.log('Using devices %s' % str(args.gpus))
    procs = []
    
    for rank in range(len(args.gpus)):
        p = Process(target=solve2,
                    args=(solver, args, uid, rank))
        p.daemon = True
        p.start()
        procs.append(p)
    for p in procs:
        p.join()
Exemplo n.º 18
0
    total.stop()
    show_time()


if __name__ == '__main__':
    import argparse
    parser = argparse.ArgumentParser()

    parser.add_argument("net_file", help="network model proto definition.")
    parser.add_argument("-g", "--gpu", type=int, default=0, help="Gpu Id.")
    parser.add_argument("-i", "--iters", type=int, default=10, help="Number of test iterations")
    parser.add_argument("-d", "--dataset", help="ImageDataset JSON file")
    args = parser.parse_args()

    # caffe.init_log()
    caffe.log('Using GPU# %s' % str(args.gpu))

    # init caffe
    caffe.set_device(args.gpu)
    caffe.set_mode_gpu()

    net = caffe.Net(args.net_file, caffe.TRAIN)

    if args.dataset is not None:
        print 'Loading dataset from {}'.format(args.dataset)
        dataset = rac.datasets.ImageDataset.from_json(args.dataset)
        print 'Loaded {} dataset with {} annotations'.format(dataset.name(), dataset.num_of_images())
        net.layers[0].add_dataset(dataset)
        net.layers[0].generate_datum_ids()

    print 'Will now run Fwd and Bkwd for {} times'.format(args.iters)
        force_boxes_json = json.load(open(_A.force_boxes))["annotations"]

        # Keep a map of image ID to force boxes.
        force_boxes_map = {}
        for annotation in force_boxes_json:
            if annotation["image_id"] not in force_boxes_map:
                force_boxes_map[annotation["image_id"]] = [annotation]
            else:
                force_boxes_map[annotation["image_id"]].append(annotation)

        # Make an H5 dataset to also store predicted classes if external boxes are provided.
        classes_dset = output_h5.create_dataset(
            "classes", (len(image_ids), ), h5py.special_dtype(vlen=np.uint32))

    caffe.init_log()
    caffe.log("Using device {}".format(_A.gpu_id))
    caffe.set_mode_gpu()
    caffe.set_device(_A.gpu_id)

    net = caffe.Net(_A.prototxt, caffe.TEST, weights=_A.caffemodel)

    for index, (image_id, image_file) in enumerate(tqdm(image_ids)):

        if _A.force_boxes is not None:
            # Get force_boxes if provided through args.
            force_boxes_annotations = force_boxes_map[image_id]
            force_boxes = np.asarray(
                [a["bbox"] for a in force_boxes_annotations], dtype=np.float32)
        else:
            force_boxes = None
Exemplo n.º 20
0
    # gpus = [int(i) for i in gpu_list]

    print('Using config:')
    pprint.pprint(cfg)
    assert cfg.TEST.HAS_RPN

    image_dict = load_image_dict(args.data_split)
    #import ipdb; ipdb.set_trace()

    # # Split image dictionary between gpus
    # image_dicts = []
    # for x in range(len(gpus)):
    #     image_dicts.append( dict(image_dict.items()[x::len(gpus)]) )

    caffe.init_log()
    caffe.log('Using device %s' % str(gpu_id))
    generate_h5(gpu_id, args.prototxt, args.caffemodel, image_dict,
                args.outfile)  # 74 seconds, 48MB
    # Time required:  ~ 17 days
    # Memory require: ~ 984 GB

    # procs = []

    # for i,gpu_id in enumerate(gpus):
    #     outfile = '%s.%d' % (args.outfile, gpu_id)
    #     p = Process(target=generate_h5,
    #                 args=(gpu_id, args.prototxt, args.caffemodel, image_dicts[i], outfile))
    #     p.daemon = True
    #     p.start()
    #     procs.append(p)
    # for p in procs:
Exemplo n.º 21
0
        if rank == 0:
            logging.info('curr_iter: {}, step_iters: {}'.format(
                curr_iter, step_iters))
            solver.snapshot()
            curr_iter += step_iters


if __name__ == "__main__":
    solver_proto = 'models/multigpu/solver.prototxt'
    weights_file = 'data/imagenet_models/VGG16.v2.caffemodel'
    cfg_file = 'experiments/faster_rcnn_end2end.yml'
    gpus = [3, 4, 5, 6]

    # caffe
    caffe.init_log(0, True)
    caffe.log('Using device {}'.format(str(gpus)))
    uid = caffe.NCCL.new_uid()

    # cfg
    cfg_from_file(cfg_file)
    assert (cfg.TRAIN.HAS_RPN                \
        and cfg.TRAIN.BBOX_REG               \
        and cfg.TRAIN.BBOX_NORMALIZE_TARGETS \
        and cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED)

    # roidb
    imdb_name = 'ftdata_train'
    imdb = get_imdb(imdb_name)
    if cfg.TRAIN.USE_FLIPPED:
        print 'Appending horizontally-flipped training examples...'
        imdb.append_flipped_images()