def solve(proto, snapshot, weight, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) elif (weight and len(weight) != 0): solver.net.copy_from(weight) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) solver.step(solver.param.max_iter)
def solve(proto, snapshot, gpus, uid, rank): print 'Loading solver to GPU: ' + str(rank) caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: print 'Loading snapshot from : ' + snapshot + ' to GPU: ' + str(rank) #solver.restore(snapshot) solver.net.copy_from(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: print 'Timing ON' time(solver, nccl) else: solver.add_callback(nccl) if plotting and rank == 0: print 'Plotting ON' plot(solver, nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) print 'Starting solver for GPU: ' + str(rank) solver.step(solver.param.max_iter)
def solve(proto, snapshot, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) # if solver.param.layer_wise_reduce: # solver.net.after_backward(nccl) while solver.iter < solver.param.max_iter: solver.step(100) sys.stderr.write("rank: {} iter: {}\n".format(rank, solver.iter)) if rank == 1: sleep(1)
def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter, reload): caffe.set_device(gpus[rank]) caffe.set_mode_gpu() caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] solverW = SolverWrapper(solver_prototxt=proto, roidb=roidb, output_dir=output_dir, gpu_id=rank, pretrained_model=pretrained_model, reload=reload) solver = solverW.get_solver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) while solver.iter < max_iter: solver.step(1) if solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0 and rank == 0: solverW.snapshot()
def solve2(solver, args, uid, rank): if args.cpu: caffe.set_mode_cpu() else: caffe.set_mode_gpu() caffe.set_device(args.gpus[rank]) caffe.set_solver_count(len(args.gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.get_solver(solver) if args.init_model: if args.init_model.endswith('.caffemodel'): solver.net.copy_from(args.init_model) else: solver.net.copy_from(os.path.join(exp_dir, '{}_iter_{}.caffemodel'.format(category, args.init_model))) if args.init_state: if args.init_state.endswith('.solverstate'): solver.restore(args.init_state) else: solver.restore(os.path.join(exp_dir, '{}_iter_{}.solverstate'.format(category, args.init_state))) nccl = caffe.NCCL(solver, uid) nccl.bcast() if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) print(rank) #pdb.set_trace() solver.step(solver.param.max_iter)
def solve(gpus, uid, rank, solver_proto, roidb, weights=None, snapshot=None): cfg.GPU_ID = gpus[rank] # setting for current process caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(solver_proto) logging.info('uid: {}, rank: {}, layer_wise_reduce: {}'.format( uid, rank, solver.param.layer_wise_reduce)) max_iter = solver.param.max_iter snapshot_iters = solver.param.snapshot if snapshot: solver.restore(snapshot) if weights: solver.net.copy_from(weights) solver.net.layers[0].set_roidb(roidb, rank) nccl = caffe.NCCL(solver, uid) solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) nccl.bcast() curr_iter = solver.iter while curr_iter < max_iter: step_iters = snapshot_iters - curr_iter % snapshot_iters solver.step(step_iters) if rank == 0: logging.info('curr_iter: {}, step_iters: {}'.format( curr_iter, step_iters)) solver.snapshot() curr_iter += step_iters
def solve(proto, snapshot, weights, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) elif weights and len(weights) != 0: print('Loading pretrained model ' 'weights from {:s}').format(weights) solver.net.copy_from(weights) # For RCNNDataLayer, split the dataset across gpus if solver.net.layers[0].type == "Python": solver.net.layers[0].load_dataset(rank, len(gpus)) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) solver.step(solver.param.max_iter)
def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] solverW = SolverWrapper(proto, roidb, output_dir,rank,pretrained_model) solver = solverW.getSolver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) count = 0 timer = Timer() while count < max_iter: timer.tic() solver.step(1) timer.toc() count += 1 # if count % (solver.param.display) == 0: if count % 200 == 0: if rank == 0: print 'iter: {}, speed: {:.3f}s / iter'.format(count, timer.average_time) if count % cfg.TRAIN.SNAPSHOT_ITERS == 0: if rank == 0: solverW.snapshot()
def solve(proto, pretrained_model, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solverW = SolverWrapper(proto, rank, pretrained_model) solver = solverW.getSolver() nccl = caffe.NCCL(solver, uid) nccl.bcast() print 'timing:', timing, rank, solver.param.layer_wise_reduce if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) cnt = 0 while cnt < solver.param.max_iter: solver.step(1) print 'rank', rank, ' conv5_3:', solver.net.params[ 'conv_stage3_block2_branch2c'][0].data[0][0][0] cnt += 1
def solve(proto, snapshot, gpus, timing, uid, rank): #<<<<<<< HEAD # caffe.set_mode_gpu() # caffe.set_device(gpus[rank]) #======= caffe.set_device(gpus[rank]) caffe.set_mode_gpu() #>>>>>>> 99bd99795dcdf0b1d3086a8d67ab1782a8a08383 caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) solver.step(solver.param.max_iter)
def train_net(solver_prototxt, roidb, output_dir, nccl_uid, gpus, rank, queue, bbox_means, bbox_stds, pretrained_model=None, max_iters=40000): """Train a Fast R-CNN network.""" caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) caffe.set_random_seed(cfg.RNG_SEED) sw = SolverWrapper(solver_prototxt, roidb, output_dir, nccl_uid, rank, bbox_means, bbox_stds, pretrained_model=pretrained_model) model_paths = sw.train_model(max_iters) if rank == 0: queue.put(model_paths)
def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter, previous_state=None): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] solverW = SolverWrapper(proto, roidb, output_dir, rank, pretrained_model, previous_state) solver = solverW.getSolver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) count = 0 while count < max_iter: solver.step(cfg.TRAIN.SNAPSHOT_ITERS) if rank == 0: solverW.snapshot() count = count + cfg.TRAIN.SNAPSHOT_ITERS
def __init__(self, options): ''' Constructor ''' from fast_rcnn.config import cfg from fast_rcnn.test import im_detect from fast_rcnn.nms_wrapper import nms import caffe print("Starting worker process:", multiprocessing.current_process()) global MYNET, cfg, WORKER_INDEX, OPTIONS cfg.TEST.HAS_RPN = True # Use RPN for proposals proc = multiprocessing.current_process() WORKER_INDEX = (int(proc.name.split('-')[-1]) - 1) % options.worker_count OPTIONS = options assert WORKER_INDEX >= 0 prototxt = os.path.join(options.storage_dir, 'models', 'test.prototxt') #'models/face/VGG16/faster_rcnn_end2end/test.prototxt' net_path = os.path.join(options.storage_dir, 'models', 'face_vgg16_faster_rcnn.caffemodel') #caffemodel = NETS[options.net][1] if not os.path.isfile(net_path): raise IOError(( '{:s} not found. Was the network downloaded to the {:s} directory?' ).format(net_path, options.storage_dir)) if options.cpu_mode: print("Setting CPU Mode") caffe.set_mode_cpu() caffe.set_multiprocess(True) else: # GPU_ID = options.gpu_ids[WORKER_INDEX%len(options.gpu_ids)] # cfg.GPU_ID = GPU_ID # print ("Setting GPU:",GPU_ID) caffe.set_mode_gpu() # caffe.set_device(GPU_ID) print("Loading Network:", net_path) MYNET = caffe.Net(prototxt, net_path, caffe.TEST) print("Worker Process Ready:", WORKER_INDEX, multiprocessing.current_process()) #print(proc.ident,proc.name,proc.pid) sys.stdout.flush() sys.stderr.flush()
def worker(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter, output_dir): """ Training worker :param rank: The process rank :param uid: The caffe NCCL uid :param solver_proto: Solver prototxt :param roidb: Training roidb :param pretrained_model: Pretrained model :param gpus: GPUs to be used for training :param max_iter: Maximum number of training iterations :param output_dir: Output directory used for saving models :return: """ # Setup caffe caffe.set_device(gpus[rank]) caffe.set_mode_gpu() caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] # Setup Solver solverW = SolverWrapper(solver_prototxt=solver_prototxt, roidb=roidb, output_dir=output_dir, gpu_id=rank, pretrained_model=pretrained_model) solver = solverW.get_solver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) # Train the model for the specified number of iterations while solver.iter < max_iter: solver.step(1) #A = solver.net.blobs['conv4_3'] #print(A.data.shape) #[(k, v.data.shape) for k, v in solver.net.blobs.items()] if (solver.iter % cfg.TRAIN.SNAPSHOT == 0 or solver.iter == max_iter - 1) and rank == 0: # Snapshot only in the main process solverW.snapshot()
def solve(proto, roidb, pretrained_model, gpus, uid, rank, output_dir, max_iter): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] solverW = SolverWrapper(proto, roidb, output_dir, rank, pretrained_model) solver = solverW.getSolver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) net = solver.net count = 0 rpn_loss_cls = 0 rpn_loss_bbox = 0 frcn_loss_cls = 0 frcn_loss_bbox = 0 accuarcy = 0 timer = Timer() display_step = 500 while count < max_iter: timer.tic() solver.step(cfg.TRAIN.SNAPSHOT_ITERS / display_step) timer.toc() rpn_loss_cls = net.blobs['rpn_cls_loss'].data rpn_loss_bbox = net.blobs['rpn_loss_bbox'].data frcn_loss_cls = net.blobs['loss_cls'].data frcn_loss_bbox = net.blobs['loss_bbox'].data accuarcy = net.blobs['accuarcy'].data if solver.iter % (cfg.TRAIN.SNAPSHOT_ITERS / display_step) == 0: print 'speed: {:.3f}s / iter'.format( timer.average_time / (cfg.TRAIN.SNAPSHOT_ITERS / display_step)) print 'rpn_loss_cls:' + str( rpn_loss_cls) + ',rpn_loss_bbox:' + str( rpn_loss_bbox) + ',frcn_loss_cls:' + str( frcn_loss_cls) + ',frcn_loss_bbox:' + str( frcn_loss_bbox) + ',accuarcy' + str(accuarcy) if (rank == 0) and (solver.iter % cfg.TRAIN.SNAPSHOT_ITERS == 0): solverW.snapshot() count = count + cfg.TRAIN.SNAPSHOT_ITERS
def solve(proto, initialization, datasets, gpus, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if initialization is not None: assert osp.exists( initialization ), 'Path to weights/solverstate does not exist: {}'.format( initialization) if initialization.endswith('.solverstate'): print 'Restoring solverstate from {}'.format(initialization) solver.restore(initialization) elif initialization.endswith('.caffemodel'): print 'Initializing weights from {}'.format(initialization) solver.net.copy_from(initialization) else: raise ValueError( 'ERROR: {} is not supported for initailization'.format( initialization)) else: warnings.warn( "Warning: No initialization provided. Training from scratch.") for dataset in datasets: solver.net.layers[0].add_dataset(dataset) solver.net.layers[0].print_params() solver.net.layers[0].generate_datum_ids() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) solver.step(solver.param.max_iter)
def solve_step(proto, snapshot, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) niter = solver.param.max_iter display = solver.param.display test_iter = 950 test_interval = 200 # 初始化 train_loss = zeros(int(ceil(niter // display))) test_loss = zeros(int(ceil(niter // test_interval))) test_acc = zeros(int(ceil(niter // test_interval))) # 辅助变量 _train_loss = 0; _test_loss = 0; _accuracy = 0; _max_accuracy = 0; _max_accuracy_iter = 0; # 进行解算 for it in range(niter): solver.step(1)
def solve(proto, snapshot, gpus, timing, uid, rank): caffe.set_device(gpus[rank]) caffe.set_mode_gpu() caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) solver.step(solver.param.max_iter)
def solve(proto, pretrained_model, snapshot, gpus, timing, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(False) solver = caffe.SGDSolver(proto) if pretrained_model and len(pretrained_model) != 0: solver.net.copy_from(pretrained_model) if snapshot and len(snapshot) != 0: solver.restore(snapshot) # nccl = caffe.NCCL(solver, uid) # nccl.bcast() if timing and rank == 0: time(solver) solver.step(solver.param.max_iter)
def start(self, rank): self.rank = rank if len(self.gpus) > 0: self.device = self.gpus[rank] if debug: s = 'solver gpu %d' % self.gpus[self.rank] + \ ' pid %d' % os.getpid() + ' size %d' % self.size + \ ' rank %d' % self.rank print(s, file = sys.stderr) caffe.set_mode_gpu() caffe.set_device(self.device) caffe.set_solver_count(self.size) caffe.set_solver_rank(self.rank) caffe.set_multiprocess(True) else: print('solver cpu', file = sys.stderr) caffe.set_mode_cpu() if self.cmd.graph.endswith('.json'): with open(self.cmd.graph, mode = 'r') as f: graph = caffe_pb2.SolverParameter() text_format.Merge(f.read(), graph) self.graph = graph else: self.graph = self.solver_graph() import tempfile with tempfile.NamedTemporaryFile(mode = 'w+', delete = False) as f: text_format.PrintMessage(self.graph, f) tmp = f.name self.caffe = caffe.AdamSolver(tmp) if self.uid: self.nccl = caffe.NCCL(self.caffe, self.uid) self.nccl.bcast() self.caffe.add_callback(self.nccl) if self.caffe.param.layer_wise_reduce: self.caffe.net.after_backward(self.nccl)
def worker(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter, output_dir): """ Training worker :param rank: The process rank :param uid: The caffe NCCL uid :param solver_proto: Solver prototxt :param roidb: Training roidb :param pretrained_model: Pretrained model :param gpus: GPUs to be used for training :param max_iter: Maximum number of training iterations :param output_dir: Output directory used for saving models :return: """ # Setup caffe caffe.set_device(gpus[rank]) caffe.set_mode_gpu() caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) cfg.GPU_ID = gpus[rank] # Setup Solver solverW = SolverWrapper(solver_prototxt=solver_prototxt, roidb=roidb, output_dir=output_dir,gpu_id=rank,pretrained_model=pretrained_model) solver = solverW.get_solver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) # Train the model for the specified number of iterations while solver.iter < max_iter: solver.step(1) if (solver.iter%cfg.TRAIN.SNAPSHOT == 0 or solver.iter == max_iter-1) and rank == 0: # Snapshot only in the main process solverW.snapshot()
def solve(proto, gpus, uid, rank, max_iter): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if rank == 0: # solver.restore(_snapshot) solver.net.copy_from(_weights) solver.net.layers[0].get_gpu_id(gpus[rank]) nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) for _ in range(max_iter): solver.step(1)
def worker(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter, output_dir): """ Training worker :param rank: The process rank :param uid: The caffe NCCL uid :param solver_proto: Solver prototxt :param roidb: Training roidb :param pretrained_model: Pretrained model :param gpus: GPUs to be used for training :param max_iter: Maximum number of training iterations :param output_dir: Output directory used for saving models :return: """ # Setup caffe cfg.RANK = rank cfg.GPU_ID = gpus[rank] # Will be used in gpu_nms caffe.set_device(cfg.GPU_ID) caffe.set_random_seed(cfg.RNG_SEED + rank) caffe.set_mode_gpu() caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) # Setup Solver solverW = SolverWrapper( solver_prototxt=str(solver_prototxt), roidb=roidb, output_dir=str(output_dir), rank=rank, pretrained_model=str(pretrained_model)) solver = solverW.get_solver() nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) # Train the model for the specified number of iterations target_layers = filter(lambda x: x.startswith('target_layer'), solver.net.layer_dict.keys()) if rank == 0: t = Timer() while solver.iter < max_iter: for n in target_layers: solver.net.layer_dict[n].set_iter(solver.iter) if rank == 0: t.tic() solver.step(1) if (solver.iter % cfg.TRAIN.SNAPSHOT == 0 or solver.iter == max_iter) and rank == 0: # Snapshot only in the main process solverW.snapshot(solver.iter == max_iter) if rank == 0: t.toc() eta_in_s = int((max_iter - solver.iter) * t.average_time) try: for loss_name, loss_val in solver.net.blobs.items(): if 'loss' not in loss_name: continue tb.sess.add_scalar_value( loss_name, float(loss_val.data), step=solver.iter) for n in target_layers: tb.sess.add_scalar_value( n + '_accuracy', float(solver.net.layer_dict[n].accuracy), step=solver.iter) tb.sess.add_scalar_value( "speed", 1. / t.average_time, step=solver.iter) tb.sess.add_scalar_value( "ETA (min)", eta_in_s / 60., step=solver.iter) except: logger.warning('Failed to submit data to Tensorboard') sys.stdout.write('\r{}, Speed: {:5f} iter/sec, ETA: {:8s}'.format( ', '.join([ '{}: {:5f}'.format(i[0], i[1].data) for i in solver.net.blobs.items() if 'loss' in i[0] ] + [ '{}: {:5f}'.format( n + '_accuracy', float(solver.net.layer_dict[n].accuracy)) for n in target_layers ]), 1. / t.average_time, str(datetime.timedelta(seconds=eta_in_s)))) sys.stdout.flush()
def solve_step(proto, snapshot, gpus, timing, uid, rank): caffe.set_mode_gpu() caffe.set_device(gpus[rank]) caffe.set_solver_count(len(gpus)) caffe.set_solver_rank(rank) caffe.set_multiprocess(True) solver = caffe.SGDSolver(proto) if snapshot and len(snapshot) != 0: solver.restore(snapshot) nccl = caffe.NCCL(solver, uid) nccl.bcast() if timing and rank == 0: time(solver, nccl) else: solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) #solver = caffe.SGDSolver('/home/zhujiagang/temporal-segment-networks/models/ucf101/gating_three_solver.prototxt') #solver.restore('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_200.solverstate') # 等价于solver文件中的max_iter,即最大解算次数 niter = solver.param.max_iter display = solver.param.display test_iter = 950 test_interval = 200 # 初始化 train_loss = zeros(int(ceil(niter // display))) test_loss = zeros(int(ceil(niter // test_interval))) test_acc = zeros(int(ceil(niter // test_interval))) # 辅助变量 _train_loss = 0; _test_loss = 0; _accuracy = 0; _max_accuracy = 0; _max_accuracy_iter = 0; # 进行解算 for it in range(niter): solver.step(1) _train_loss += solver.net.blobs['rgb_flow_gating_loss'].data if it % display == 0: train_loss[it // display] = _train_loss / display _train_loss = 0 if it % test_interval == 0: print '\n my test, train iteration', it for test_it in range(test_iter): #print '\n my test, test iteration \n', test_it solver.test_nets[0].forward() _test_loss += solver.test_nets[0].blobs['rgb_flow_gating_loss'].data _accuracy += solver.test_nets[0].blobs['rgb_flow_gating_accuracy'].data test_loss[it / test_interval] = _test_loss / test_iter test_acc[it / test_interval] = _accuracy / test_iter if _max_accuracy < test_acc[it / test_interval]: _max_accuracy = test_acc[it / test_interval] _max_accuracy_iter = it solver.net.save('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_' + str(it) + '.caffemodel') print '\nnewly max: _max_accuracy and _max_accuracy_iter', _max_accuracy, _max_accuracy_iter print '\n_max_accuracy and _max_accuracy_iter', _max_accuracy, _max_accuracy_iter _test_loss = 0 _accuracy = 0 print '\nplot the train loss and test accuracy\n' print '\n_max_accuracy and _max_accuracy_iter\n', _max_accuracy, _max_accuracy_iter _, ax1 = plt.subplots() ax2 = ax1.twinx() # train loss -> 绿色 ax1.plot(display * arange(len(train_loss)), train_loss, 'g') # test loss -> 黄色 ax1.plot(test_interval * arange(len(test_loss)), test_loss, 'y') # test accuracy -> 红色 ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r') ax1.set_xlabel('iteration') ax1.set_ylabel('loss') ax2.set_ylabel('accuracy') plt.show()
def pipeline_init(model_type, poj_path): if model_type == 'upper': num_points = 6 model_stage1 = poj_path + '/fashion-landmarks/models/FLD_upper/stage1.prototxt' weights_stage1 = poj_path + '/fashion-landmarks/models/FLD_upper/stage1.caffemodel' model_stage2 = poj_path + '/fashion-landmarks/models/FLD_upper/cascade.prototxt' weights_stage2 = poj_path + '/fashion-landmarks/models/FLD_upper/stage2.caffemodel' model_stage3 = poj_path + '/fashion-landmarks/models/FLD_upper/cascade.prototxt' weights_stage3_easy = poj_path + '/fashion-landmarks/models/FLD_upper/stage3_easy.caffemodel' weights_stage3_hard = poj_path + '/fashion-landmarks/models/FLD_upper/stage3_hard.caffemodel' elif model_type == 'lower': num_points = 4 model_stage1 = poj_path + '/fashion-landmarks/models/FLD_lower/stage1.prototxt' weights_stage1 = poj_path + '/fashion-landmarks/models/FLD_lower/stage1.caffemodel' model_stage2 = poj_path + '/fashion-landmarks/models/FLD_lower/cascade.prototxt' weights_stage2 = poj_path + '/fashion-landmarks/models/FLD_lower/stage2.caffemodel' model_stage3 = poj_path + '/fashion-landmarks/models/FLD_lower/cascade.prototxt' weights_stage3_easy = poj_path + '/fashion-landmarks/models/FLD_lower/stage3_easy.caffemodel' weights_stage3_hard = poj_path + '/fashion-landmarks/models/FLD_lower/stage3_hard.caffemodel' elif model_type == 'full': num_points = 8 model_stage1 = poj_path + '/fashion-landmarks/models/FLD_full/stage1.prototxt' weights_stage1 = poj_path + '/fashion-landmarks/models/FLD_full/stage1.caffemodel' model_stage2 = poj_path + '/fashion-landmarks/models/FLD_full/cascade.prototxt' weights_stage2 = poj_path + '/fashion-landmarks/models/FLD_full/stage2.caffemodel' model_stage3 = poj_path + '/fashion-landmarks/models/FLD_full/cascade.prototxt' weights_stage3_easy = poj_path + '/fashion-landmarks/models/FLD_full/stage3_easy.caffemodel' weights_stage3_hard = poj_path + '/fashion-landmarks/models/FLD_full/stage3_hard.caffemodel' else: print('Undefiened Model Type') #caffe.reset_all() caffe.set_mode_gpu() caffe.set_device(0) caffe.set_multiprocess(True) # create net and load weights net_stage1 = caffe.Net(model_stage1, weights_stage1, caffe.TEST) net_stage2 = caffe.Net(model_stage2, weights_stage2, caffe.TEST) net_stage3_easy = caffe.Net(model_stage3, weights_stage3_easy, caffe.TEST) net_stage3_hard = caffe.Net(model_stage3, weights_stage3_hard, caffe.TEST) pipeline = { 'num_points': num_points, 'net_stage1': net_stage1, 'net_stage2': net_stage2, 'net_stage3_easy': net_stage3_easy, 'net_stage3_hard': net_stage3_hard } return pipeline
def caffe_loop(gpus, uid, rank, avg_guys, proc_comm): """Main loop for each GPU process. At the bottom is the main process which creates each GPU process (this guy). We set up all the parameters here and then run the Caffe loop. NCCL links each GPU process implicitly. So, you will not see semaphores or other similars, but NCCL is doing this in the background when Caffe is called. So for example, all processes will sync up when Caffe step is called (in PrefetchTrain). """ global MP_COND global TRAIN_LOOP global FINISH_EXIT # Where is this project located? project_home = '/home/mundhenk/selfsupervised/' # Path to training image set path_prefix = '/home/mundhenk/images/patches_84h_110x110_13x13-blur-ab_compact/' # Condition is a label used for graphing, display purposes and saving snap shots # This can be any valid string, but must by file name friendly. condition = 'my_awesome_selfsupervised_run' # Base for where a lot of files are kept or go such as network files caffe_data_dir = project_home + '/caffe_data/' # Where to save figures fig_root = project_home + '/figures/' # where to save this project proj_snapshot_dir = project_home + '/py_proj/' # where to save moab files log_dir = project_home + '/moab_output/' # extra profile to run to set enviroment on node profile = project_home + '/scripts/profile.sh' # Your caffe network prototxt file network_file_name = caffe_data_dir + '/train_val_AlexNet-Custom_triple.prototxt' # Name of a caffemodel to use to initialize our weights from weight_file = '' # Alexnet layer names from the network prototxt file start_layer_vis = 'conv1' # Visualize This layer softmax_layer = 'softmax_plain' # For testing, we need this guy loss_layer = 'loss' # Your loss layer # Are we using a batch normalized network schedule. For plain CaffeNet, set to False use_batch_norm_sched = True # Re-init project files? init_new = False # ImageNet mean gray image_mean = [104.0, 117.0, 123.0] # ImageNET # Given a 110x110 size patch, what are the range of scales we can resize it to before cropping out 96x96? ra_max_size = 128 # Goes to a max size corresponding to an image of 448x448 ra_min_size = 96 # Goes to a min size corresponding to an image of 171x171 # Training batch size. The script will auto resize this when using more than one GPU train_batch_size = 128 # Testing batch size. test_batch_size = 20 # How many classes you will test over. bin_num = 20 # The actual size of the patchs (96x96) patch_size = 96 # Tells us where to center crop during testing patch_marg_1 = 7 patch_marg_2 = 110 # How many iters should we wait to display info? display_iters = 20 # How many iters should we wait to test the network test_iters = 5000 # Smoothing parameter over displayed loss loss_lambda = 20 # Stride over the testing data set so we only use a subset. test_skip = 199 # How often to snapshot the solver state snaphot_interval = 5000 # training and testing list files test_list_file = path_prefix + 'val/val_list.nfl.npz' train_list_file = path_prefix + 'train/train_list.nfl.npz' # ******************************************************************************************************************* # ******************************************************************************************************************* # Dont edit after here # ******************************************************************************************************************* # ******************************************************************************************************************* # check to make sure files and dirs exist if PrefetchTrain.check_file(train_list_file, rank) == 0: return if PrefetchTrain.check_file(test_list_file, rank) == 0: return if PrefetchTrain.check_file(profile, rank) == 0: return if PrefetchTrain.check_dir(path_prefix, rank) == 0: return if PrefetchTrain.check_dir(project_home, rank) == 0: return if PrefetchTrain.check_dir(caffe_data_dir, rank) == 0: return # Create some directories if needed PrefetchTrain.check_create_dir(log_dir, rank) PrefetchTrain.check_create_dir(fig_root, rank) solver_file_name, snapshot_file, do_exit = PrefetchTrain.instantiate_slurm( proj_snapshot_dir, network_file_name, condition, log_dir, profile, snaphot_interval, use_batch_norm_sched, rank, MP_COND, proc_comm, init_new=init_new) # We just init-ed the whole thing. Now we exit if do_exit: return fig_model = condition fig_name_err = fig_root + fig_model + '.err.png' fig_name_sqr = fig_root + fig_model + '.sqr.jpg' fig_prop = 'b--' ''' We will now configure a bunch of things before we run the main loop. NCCL needs some things to be in a particular order. Some tasks are reserved for a single process alone. These always run on the first GPU in the list. ''' batch_toggle = 0 print('GPU:{} Set Caffe Device'.format(gpus[rank])) print('GPU:{} Set Device'.format(gpus[rank])) caffe.set_device( gpus[rank]) ### THIS ALWAYS HAS TO COME BEFORE OTHER CAFFE SETTERS!!! # Set up multi processing if uid: print('GPU:{} Set Solver Count to {}'.format(gpus[rank], len(gpus))) caffe.set_solver_count(len(gpus)) print('GPU:{} Set Solver Rank to {}'.format(gpus[rank], rank)) caffe.set_solver_rank(rank) print('GPU:{} Set Multiprocess'.format(gpus[rank])) caffe.set_multiprocess(True) # Use GPU like a civilized human being print('GPU:{} Set to Use GPU'.format(gpus[rank])) caffe.set_mode_gpu() # resize the training batch size by number of GPU's we are using train_batch_size /= len(gpus) print('GPU:{} New Train Batch Size {}'.format(gpus[rank], train_batch_size)) print('GPU:{} Load Network and Files'.format(gpus[rank])) print("GPU:{} Solver: {}".format(gpus[rank], solver_file_name)) # Create the Caffe solver and read the solver file so we can use some of its parameters solver = caffe.SGDSolver(solver_file_name) solver_params = PrefetchTrain.read_proto_solver_file(solver_file_name) max_iters = solver_params.max_iter print("GPU:{} Adjusted Batch Size For Each GPU : {}".format( gpus[rank], train_batch_size)) # This script does not support iters. assert (solver_params.iter_size < 2) # Open our training and testing lists, but don't do anything with them yet. print("GPU:{} Loading: {}".format(gpus[rank], test_list_file)) if rank == 0: test_list_in = open(test_list_file) print("GPU:{} Loading: {}".format(gpus[rank], train_list_file)) train_list_in = open(train_list_file) # Do we have a weight file? If so, use it. if weight_file != '': print('GPU:{} Loading weight file: {} '.format(gpus[rank], weight_file)) solver.net.copy_from(weight_file) # Do we have a snapshot file? If so, use it. if snapshot_file != '': print('GPU:{} Loading Snapshot file: {}'.format( gpus[rank], snapshot_file)) solver.restore(snapshot_file) if uid: # Create NCCL callback nccl = caffe.NCCL(solver, uid) nccl.bcast() solver.add_callback(nccl) if solver.param.layer_wise_reduce: solver.net.after_backward(nccl) print("GPU:{} Network and Files Loaded".format(gpus[rank])) # reshape our training blobs solver.net.blobs['data_1'].reshape(train_batch_size, 3, patch_size, patch_size) solver.net.blobs['data_2'].reshape(train_batch_size, 3, patch_size, patch_size) solver.net.blobs['data_3'].reshape(train_batch_size, 3, patch_size, patch_size) solver.net.blobs['label'].reshape(train_batch_size, 1, 1, 1) print("GPU:{} Network Train Blobs Set".format(gpus[rank])) # reshape testing blobs, but only process will do this. if rank == 0: solver.test_nets[0].blobs['data_1'].reshape(test_batch_size, 3, patch_size, patch_size) solver.test_nets[0].blobs['data_2'].reshape(test_batch_size, 3, patch_size, patch_size) solver.test_nets[0].blobs['data_3'].reshape(test_batch_size, 3, patch_size, patch_size) solver.test_nets[0].blobs['label'].reshape(test_batch_size, 1, 1, 1) print("GPU:{} Network Test Blobs Set".format(gpus[rank])) test_transformer_1 = caffe.io.Transformer( {'data_1': solver.test_nets[0].blobs['data_1'].data.shape}) test_transformer_1.set_transpose('data_1', (2, 0, 1)) test_transformer_1.set_mean('data_1', np.float32(image_mean)) # mean pixel test_transformer_2 = caffe.io.Transformer( {'data_2': solver.test_nets[0].blobs['data_2'].data.shape}) test_transformer_2.set_transpose('data_2', (2, 0, 1)) test_transformer_2.set_mean('data_2', np.float32(image_mean)) # mean pixel test_transformer_3 = caffe.io.Transformer( {'data_3': solver.test_nets[0].blobs['data_3'].data.shape}) test_transformer_3.set_transpose('data_3', (2, 0, 1)) test_transformer_3.set_mean('data_3', np.float32(image_mean)) # mean pixel print("GPU:{} Network Test Transformer Set".format(gpus[rank])) # Set up our training parameters object tp = PrefetchTrain.TrainParams(solver, patch_size, patch_marg_1, patch_marg_2, train_batch_size, test_batch_size, bin_num, image_mean, loss_layer, softmax_layer) # copy a few more items over into our training parameters object tp.path_prefix = path_prefix tp.test_skip = test_skip tp.test_iters = test_iters tp.ra_patch_size = patch_size tp.ra_max_size = ra_max_size tp.ra_min_size = ra_min_size # Process and load our training data set print("GPU:{} Parse nfl context train list".format(gpus[rank])) NFL = NumpyFileList.CompactList() NFL.load(train_list_in) train_image_file = NFL train_list_in.close() # process and load our testing data set. Only one GPU will do this. if rank == 0: print("GPU:{} Parse nfl context test list".format(gpus[rank])) NFL = NumpyFileList.CompactList() NFL.load(test_list_in) test_image_file = NFL test_list_in.close() print("GPU:{} Lists Parsed".format(gpus[rank])) # Once we launch the threads, we need to exit gently TRAIN_LOOP = True # Init the two main loader threads and return handles f, r = PrefetchTrain.train_batch_triple_init(train_image_file, tp) # set some things we need to set. loss_avg = 0.0 cstart = 0.0 print("GPU:{} PREFETCH TRAIN".format(gpus[rank])) start_iter = True layer_loss = 0 vis_fig = False vis_ax = False plot_fig = False plot_ax = False print("GPU:{} START LOOP".format(gpus[rank])) ''' This is our main training loop. From here on out we will stay in this loop until exit. Most of the code here is for display and control. train_batch_triple is the only thing that needs to be called to train the network. ''' while True: i = int(solver.iter) display = False # Do we compute display timing data this iteration? if (i % display_iters == 0 or start_iter): cend = time.time() timer = cend - cstart cstart = cend # It's annoying and useless to print stats like this on the first iter if not start_iter: t = timer / float(display_iters) # Only once process prints this stuff out. if rank == 0: print("GPU:{} ({}) {} ".format(gpus[rank], i, condition)) print("GPU:{} Average TIME {}".format(gpus[rank], t)) display = True # run the actual training step on Caffe. Get back a run handle r and performance data layer_loss, _, _, batch_toggle, r, do_exit = PrefetchTrain.train_batch_triple( batch_toggle, f, tp, r) if do_exit == True: proc_comm[2] = True # compute a running average over loss if start_iter: loss_avg = layer_loss else: loss_avg = (layer_loss + loss_avg * loss_lambda) / (1.0 + loss_lambda) avg_guys[rank] = loss_avg # Update the figure showing the first layer filters. Only one process does this. if display and rank == 0: # check if we have an x server connection to output to if PrefetchTrain.check_X_is_running(): vis_fig, vis_ax = PrefetchTrain.vis_square( solver.net.params[start_layer_vis][0].data, condition, vis_fig, vis_ax, True, fig_name_sqr) # when we reach the right iteration, we will test the network and plot the performance if (rank == 0) or i == int(max_iters): if (i != 0 and i % test_iters == 0) or i == int(max_iters): print("TESTING") # Get weights over solver.test_nets[0].share_with(solver.net) # Run the test network correct_p, do_exit = PrefetchTrain.test_batch_context_triple( test_image_file, test_transformer_1, test_transformer_2, test_transformer_3, tp) # Plot the results of the test. plot_fig, plot_ax = PrefetchTrain.mr_plot(correct_p, i, fig_prop, plot_fig, plot_ax, fig_name_err, condition, tp=tp) if do_exit == True: proc_comm[2] = True # one process will collect and display loss over all GPU processes. if display: #print("GPU:{} Average LOSS {}".format(gpus[rank],loss_avg)) if rank == 0: avg = 0.0 for ar in avg_guys: avg += ar avg /= len(avg_guys) print("GPU:{} ALL Average LOSS {}".format(gpus[rank], ar)) # Exit when maximum iteration is reached. if i == int(max_iters): print("GPU:{} Reaches Maxed Iters".format(gpus[rank])) break # Exit on ctrl-c if FINISH_EXIT: print("GPU:{} Got CTRL-C. Exiting ...".format(gpus[rank])) break if proc_comm[2] == True: print("GPU:{} Got ERROR. Exiting ...".format(gpus[rank])) return start_iter = False # When we exit, we always save the current state. Only one process does this. if rank == 0: # just in case solver.snapshot() print('done : Saving and exiting ...')