Пример #1
0
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_device(gpus[rank])
    caffe.set_mode_gpu()
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)
Пример #2
0
def solve(proto, pretrained_model, gpus, uid, rank, output_dir, max_iter):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)
    cfg.GPU_ID = gpus[rank]

    solverW = SolverWrapper(proto, output_dir, rank, pretrained_model)
    solver = solverW.getSolver()
    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    count = 0
    while count < max_iter:
        solver.step(cfg.TRAIN.SNAPSHOT_ITERS)
        if rank == 0:
            solverW.snapshot()
        count = count + cfg.TRAIN.SNAPSHOT_ITERS
Пример #3
0
    def start(self, rank):
        self.rank = rank

        if len(self.gpus) > 0:
            self.device = self.gpus[rank]
            if debug:
                s = 'solver gpu %d' % self.gpus[self.rank] + \
                    ' pid %d' % os.getpid() + ' size %d' % self.size + \
                    ' rank %d' % self.rank
                print(s, file = sys.stderr)
            caffe.set_mode_gpu()
            caffe.set_device(self.device)
            caffe.set_solver_count(self.size)
            caffe.set_solver_rank(self.rank)
            caffe.set_multiprocess(True)
        else:
            print('solver cpu', file = sys.stderr)
            caffe.set_mode_cpu()

        if self.cmd.graph.endswith('.json'):
            with open(self.cmd.graph, mode = 'r') as f:
                graph = caffe_pb2.SolverParameter()
                text_format.Merge(f.read(), graph)
                self.graph = graph
        else:
            self.graph = self.solver_graph()

        import tempfile
        with tempfile.NamedTemporaryFile(mode = 'w+', delete = False) as f:
            text_format.PrintMessage(self.graph, f)
            tmp = f.name
        self.caffe = caffe.AdamSolver(tmp)

        if self.uid:
            self.nccl = caffe.NCCL(self.caffe, self.uid)
            self.nccl.bcast()
            self.caffe.add_callback(self.nccl)
            if self.caffe.param.layer_wise_reduce:
                self.caffe.net.after_backward(self.nccl)
Пример #4
0
def solve(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)
    solver.step(solver.param.max_iter)
Пример #5
0
def solve(proto, pretrained_model, snapshot, gpus, timing, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(False)

    solver = caffe.SGDSolver(proto)

    if pretrained_model and len(pretrained_model) != 0:
        solver.net.copy_from(pretrained_model)

    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    # nccl = caffe.NCCL(solver, uid)
    # nccl.bcast()

    if timing and rank == 0:
        time(solver)

    solver.step(solver.param.max_iter)
Пример #6
0
def solve_step(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    niter = solver.param.max_iter
    display = solver.param.display
    test_iter = 950
    test_interval = 200
    # 初始化
    train_loss = zeros(int(ceil(niter // display)))
    test_loss = zeros(int(ceil(niter // test_interval)))
    test_acc = zeros(int(ceil(niter // test_interval)))
    # 辅助变量
    _train_loss = 0;
    _test_loss = 0;
    _accuracy = 0;
    _max_accuracy = 0;
    _max_accuracy_iter = 0;
    # 进行解算
    for it in range(niter):
        solver.step(1)
def worker(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter, output_dir):
    """
    Training worker
    :param rank: The process rank
    :param uid: The caffe NCCL uid
    :param solver_proto: Solver prototxt
    :param roidb: Training roidb
    :param pretrained_model: Pretrained model
    :param gpus: GPUs to be used for training
    :param max_iter: Maximum number of training iterations
    :param output_dir: Output directory used for saving models
    :return:
    """

    # Setup caffe
    caffe.set_device(gpus[rank])
    caffe.set_mode_gpu()
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)
    cfg.GPU_ID = gpus[rank]

    # Setup Solver
    solverW = SolverWrapper(solver_prototxt=solver_prototxt, roidb=roidb, output_dir=output_dir,gpu_id=rank,pretrained_model=pretrained_model)
    solver = solverW.get_solver()
    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    # Train the model for the specified number of iterations
    while solver.iter < max_iter:
        solver.step(1)
        if (solver.iter%cfg.TRAIN.SNAPSHOT == 0 or solver.iter == max_iter-1) and rank == 0:
            # Snapshot only in the main process
            solverW.snapshot()
def solve(proto, gpus, uid, rank, max_iter):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if rank == 0:
        # solver.restore(_snapshot)
        solver.net.copy_from(_weights)

    solver.net.layers[0].get_gpu_id(gpus[rank])

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    for _ in range(max_iter):
        solver.step(1)
Пример #9
0
def solve(proto, gpus, uid, rank, max_iter):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if rank == 0:
        # solver.restore(_snapshot)
        solver.net.copy_from(_weights)
    
    solver.net.layers[0].get_gpu_id(gpus[rank])

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    for _ in range(max_iter):
        solver.step(1)
Пример #10
0
    def __init__(self,
                 solver_def,
                 solver_state=None,
                 weights=None,
                 gpus=[0],
                 log_dir='log',
                 log_db_prefix='log_db'):
        """
        Acts as a driver for training with smart logging.
        Logs will be stored to a MySQL database.
        All paths can be set relative to the location of the solver prototxt.

        :param solver_def:      prototxt that defines the solver
        :param solver_state:    optional: a .solverstate file from which to resume training \  NEVER SET THESE TWO
        :param weights:         optional: a .caffemodel file from which to begin finetuning /   AT THE SAME TIME
        :param gpus:            optional: a list of GPU IDs to use for (multi-)GPU training
                                if set to None, caffe will operate in CPU mode
        :param log_dir:         optional: will log into this directory under solver.prototxt
        :param log_db_prefix:   prefix for both SQLite db names and table names

        The following parameters should to be set in the solver prototxt file:
        log_interval            log per this number of iterations (simple log) [default = 20]
        viz_interval            log visualization per this number of iterations (net blobs snapshot) [default = 100]
        test_iter:              The number of iterations for each test net.
        """
        if not os.path.isabs(solver_def):
            if not os.path.isfile(os.path.join(os.getcwd(), solver_def)):
                os.chdir('..')
                solver_def = os.path.join(os.getcwd(), solver_def)
            else:
                solver_def = os.path.join(os.getcwd(), solver_def)

        self.solver_dir = solver_def[:solver_def.rfind('/')]
        os.chdir(self.solver_dir)
        self.log_dir = os.path.join(self.solver_dir, log_dir)
        if not os.path.exists(self.log_dir):
            os.mkdir(self.log_dir)
        self.logprint("Logging to {}".format(self.log_dir))
        self.log_db_prefix = log_db_prefix  # used for db name and as a prefix for tables
        self.solver_param = caffe_pb2.SolverParameter()
        text_format.Merge(open(solver_def).read(), self.solver_param)

        # read params from solver definition
        self.iterations = self.solver_param.max_iter
        self.log_interval = self.solver_param.log_interval
        self.viz_interval = self.solver_param.viz_interval
        self.test_interval = self.solver_param.test_interval
        self.se_index_blob = self.solver_param.se_index_blob
        self.se_error_blob = self.solver_param.se_error_blob
        self.se_interval = self.solver_param.se_interval
        self.se_indices = []
        self.se_errors = []

        # make solver param for net fail safe
        if not os.path.isabs(self.solver_param.net):
            self.solver_param.net = os.path.join(self.solver_dir,
                                                 self.solver_param.net)
            if not os.path.isfile(self.solver_param.net):
                raise Exception(
                    'could not find net definition from solver prototxt!')

        self.gpus = gpus
        if gpus:
            self.solver_param.device_id = gpus[0]
            caffe.set_device(gpus[0])
            caffe.set_mode_gpu()
            caffe.set_solver_count(len(gpus))

        self.solver = caffe.get_solver_from_string(
            self.solver_param.SerializeToString())

        if solver_state:
            # check if this file is in the current (or parent) directory or if the solver path needs to be prepended
            if not os.path.isfile(os.path.join('..', solver_state)):
                if not os.path.isfile(solver_state):
                    solver_state = os.path.join(self.solver_dir, solver_state)
                    if not os.path.isfile(solver_state):
                        raise Exception(
                            'could not find solver state specified!')
            else:
                solver_state = os.path.join('..', solver_state)
            self.solver.restore(solver_state)

            if weights:
                raise Exception(
                    'should not specify both solverstate and caffemodel! Preference will be given to solverstate.'
                )

        if weights and not solver_state:
            self.solver.net.copy_from(weights)

        self.sync = None
        self.viz_thread = None
        self.log_thread = None
        self.test_input = None
        self.test_out_blobs = None
        self.test_start_layer = None
        self.iteration = 0

        # check if blobs update_sample_errors exist
        if self.se_interval and not self.se_index_blob in self.solver.net.blobs:
            self.logprint(
                "WARNING: index_blob not found in net! Won't send errors to Net."
            )
            self.se_interval = 0

        if self.se_interval and not self.se_error_blob in self.solver.net.blobs:
            self.logprint(
                "WARNING: error_blob not found in net! Won't send errors to Net."
            )
            self.se_interval = 0
Пример #11
0
def solve_step(proto, snapshot, gpus, timing, uid, rank):
    caffe.set_mode_gpu()
    caffe.set_device(gpus[rank])
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    solver = caffe.SGDSolver(proto)
    if snapshot and len(snapshot) != 0:
        solver.restore(snapshot)

    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()

    if timing and rank == 0:
        time(solver, nccl)
    else:
        solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)


    #solver = caffe.SGDSolver('/home/zhujiagang/temporal-segment-networks/models/ucf101/gating_three_solver.prototxt')
    #solver.restore('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_200.solverstate')
    # 等价于solver文件中的max_iter,即最大解算次数
    niter = solver.param.max_iter
    display = solver.param.display
    test_iter = 950
    test_interval = 200
    # 初始化
    train_loss = zeros(int(ceil(niter // display)))
    test_loss = zeros(int(ceil(niter // test_interval)))
    test_acc = zeros(int(ceil(niter // test_interval)))
    # 辅助变量
    _train_loss = 0;
    _test_loss = 0;
    _accuracy = 0;
    _max_accuracy = 0;
    _max_accuracy_iter = 0;
    # 进行解算
    for it in range(niter):
        solver.step(1)
        _train_loss += solver.net.blobs['rgb_flow_gating_loss'].data
        if it % display == 0:
            train_loss[it // display] = _train_loss / display
            _train_loss = 0

        if it % test_interval == 0:
            print '\n my test, train iteration', it
            for test_it in range(test_iter):
                #print '\n my test, test iteration \n', test_it
                solver.test_nets[0].forward()
                _test_loss += solver.test_nets[0].blobs['rgb_flow_gating_loss'].data
                _accuracy += solver.test_nets[0].blobs['rgb_flow_gating_accuracy'].data
            test_loss[it / test_interval] = _test_loss / test_iter
            test_acc[it / test_interval] = _accuracy / test_iter
            if _max_accuracy < test_acc[it / test_interval]:
                _max_accuracy = test_acc[it / test_interval]
                _max_accuracy_iter = it
                solver.net.save('/home/zhujiagang/temporal-segment-networks/models/ucf101_split_1_gating_three_iter_' + str(it) + '.caffemodel')
                print '\nnewly max: _max_accuracy and _max_accuracy_iter', _max_accuracy, _max_accuracy_iter
            print '\n_max_accuracy and _max_accuracy_iter', _max_accuracy, _max_accuracy_iter
            _test_loss = 0
            _accuracy = 0

    print '\nplot the train loss and test accuracy\n'
    print '\n_max_accuracy and _max_accuracy_iter\n', _max_accuracy, _max_accuracy_iter

    _, ax1 = plt.subplots()
    ax2 = ax1.twinx()

    # train loss -> 绿色
    ax1.plot(display * arange(len(train_loss)), train_loss, 'g')
    # test loss -> 黄色
    ax1.plot(test_interval * arange(len(test_loss)), test_loss, 'y')
    # test accuracy -> 红色
    ax2.plot(test_interval * arange(len(test_acc)), test_acc, 'r')

    ax1.set_xlabel('iteration')
    ax1.set_ylabel('loss')
    ax2.set_ylabel('accuracy')
    plt.show()
Пример #12
0
def worker(rank, uid, gpus, solver_prototxt, roidb, pretrained_model, max_iter,
           output_dir):
    """
    Training worker
    :param rank: The process rank
    :param uid: The caffe NCCL uid
    :param solver_proto: Solver prototxt
    :param roidb: Training roidb
    :param pretrained_model: Pretrained model
    :param gpus: GPUs to be used for training
    :param max_iter: Maximum number of training iterations
    :param output_dir: Output directory used for saving models
    :return:
    """

    # Setup caffe
    cfg.RANK = rank
    cfg.GPU_ID = gpus[rank]  # Will be used in gpu_nms
    caffe.set_device(cfg.GPU_ID)
    caffe.set_random_seed(cfg.RNG_SEED + rank)
    caffe.set_mode_gpu()
    caffe.set_solver_count(len(gpus))
    caffe.set_solver_rank(rank)
    caffe.set_multiprocess(True)

    # Setup Solver
    solverW = SolverWrapper(
        solver_prototxt=str(solver_prototxt),
        roidb=roidb,
        output_dir=str(output_dir),
        rank=rank,
        pretrained_model=str(pretrained_model))
    solver = solverW.get_solver()
    nccl = caffe.NCCL(solver, uid)
    nccl.bcast()
    solver.add_callback(nccl)

    if solver.param.layer_wise_reduce:
        solver.net.after_backward(nccl)

    # Train the model for the specified number of iterations
    target_layers = filter(lambda x: x.startswith('target_layer'),
                           solver.net.layer_dict.keys())

    if rank == 0:
        t = Timer()

    while solver.iter < max_iter:
        for n in target_layers:
            solver.net.layer_dict[n].set_iter(solver.iter)
        if rank == 0:
            t.tic()
        solver.step(1)
        if (solver.iter % cfg.TRAIN.SNAPSHOT == 0
                or solver.iter == max_iter) and rank == 0:
            # Snapshot only in the main process
            solverW.snapshot(solver.iter == max_iter)
        if rank == 0:
            t.toc()
            eta_in_s = int((max_iter - solver.iter) * t.average_time)
            try:
                for loss_name, loss_val in solver.net.blobs.items():
                    if 'loss' not in loss_name:
                        continue
                    tb.sess.add_scalar_value(
                        loss_name, float(loss_val.data), step=solver.iter)
                for n in target_layers:
                    tb.sess.add_scalar_value(
                        n + '_accuracy',
                        float(solver.net.layer_dict[n].accuracy),
                        step=solver.iter)
                tb.sess.add_scalar_value(
                    "speed", 1. / t.average_time, step=solver.iter)
                tb.sess.add_scalar_value(
                    "ETA (min)", eta_in_s / 60., step=solver.iter)
            except:
                logger.warning('Failed to submit data to Tensorboard')
            sys.stdout.write('\r{}, Speed: {:5f} iter/sec, ETA: {:8s}'.format(
                ', '.join([
                    '{}: {:5f}'.format(i[0], i[1].data)
                    for i in solver.net.blobs.items() if 'loss' in i[0]
                ] + [
                    '{}: {:5f}'.format(
                        n +
                        '_accuracy', float(solver.net.layer_dict[n].accuracy))
                    for n in target_layers
                ]), 1. / t.average_time,
                str(datetime.timedelta(seconds=eta_in_s))))
            sys.stdout.flush()
Пример #13
0
    def __init__(self, solver_def, solver_state=None, weights=None, gpus=[0], log_dir='log', log_db_prefix='log_db'):
        """
        Acts as a driver for training with smart logging.
        Logs will be stored to a MySQL database.
        All paths can be set relative to the location of the solver prototxt.

        :param solver_def:      prototxt that defines the solver
        :param solver_state:    optional: a .solverstate file from which to resume training \  NEVER SET THESE TWO
        :param weights:         optional: a .caffemodel file from which to begin finetuning /   AT THE SAME TIME
        :param gpus:            optional: a list of GPU IDs to use for (multi-)GPU training
                                if set to None, caffe will operate in CPU mode
        :param log_dir:         optional: will log into this directory under solver.prototxt
        :param log_db_prefix:   prefix for both SQLite db names and table names

        The following parameters should to be set in the solver prototxt file:
        log_interval            log per this number of iterations (simple log) [default = 20]
        viz_interval            log visualization per this number of iterations (net blobs snapshot) [default = 100]
        test_iter:              The number of iterations for each test net.
        """
        if not os.path.isabs(solver_def):
            if not os.path.isfile(os.path.join(os.getcwd(), solver_def)):
                os.chdir('..')
                solver_def = os.path.join(os.getcwd(), solver_def)
            else:
                solver_def = os.path.join(os.getcwd(), solver_def)

        self.solver_dir = solver_def[:solver_def.rfind('/')]
        os.chdir(self.solver_dir)
        self.log_dir = os.path.join(self.solver_dir, log_dir)
        if not os.path.exists(self.log_dir):
            os.mkdir(self.log_dir)
        self.logprint("Logging to {}".format(self.log_dir))
        self.log_db_prefix = log_db_prefix  # used for db name and as a prefix for tables
        self.solver_param = caffe_pb2.SolverParameter()
        text_format.Merge(open(solver_def).read(), self.solver_param)

        # read params from solver definition
        self.iterations = self.solver_param.max_iter
        self.log_interval = self.solver_param.log_interval
        self.viz_interval = self.solver_param.viz_interval
        self.test_interval = self.solver_param.test_interval
        self.se_index_blob = self.solver_param.se_index_blob
        self.se_error_blob = self.solver_param.se_error_blob
        self.se_interval = self.solver_param.se_interval
        self.se_indices = []
        self.se_errors = []

        # make solver param for net fail safe
        if not os.path.isabs(self.solver_param.net):
            self.solver_param.net = os.path.join(self.solver_dir, self.solver_param.net)
            if not os.path.isfile(self.solver_param.net):
                raise Exception('could not find net definition from solver prototxt!')

        self.gpus = gpus
        if gpus:
            self.solver_param.device_id = gpus[0]
            caffe.set_device(gpus[0])
            caffe.set_mode_gpu()
            caffe.set_solver_count(len(gpus))

        self.solver = caffe.get_solver_from_string(self.solver_param.SerializeToString())

        if solver_state:
            # check if this file is in the current (or parent) directory or if the solver path needs to be prepended
            if not os.path.isfile(os.path.join('..', solver_state)):
                if not os.path.isfile(solver_state):
                    solver_state = os.path.join(self.solver_dir, solver_state)
                    if not os.path.isfile(solver_state):
                        raise Exception('could not find solver state specified!')
            else:
                solver_state = os.path.join('..', solver_state)
            self.solver.restore(solver_state)

            if weights:
                raise Exception(
                    'should not specify both solverstate and caffemodel! Preference will be given to solverstate.')

        if weights and not solver_state:
            self.solver.net.copy_from(weights)

        self.sync = None
        self.viz_thread = None
        self.log_thread = None
        self.test_input = None
        self.test_out_blobs = None
        self.test_start_layer = None
        self.iteration = 0

        # check if blobs update_sample_errors exist
        if self.se_interval and not self.se_index_blob in self.solver.net.blobs:
            self.logprint("WARNING: index_blob not found in net! Won't send errors to Net.")
            self.se_interval = 0

        if self.se_interval and not self.se_error_blob in self.solver.net.blobs:
            self.logprint("WARNING: error_blob not found in net! Won't send errors to Net.")
            self.se_interval = 0
def caffe_loop(gpus, uid, rank, avg_guys, proc_comm):
    """Main loop for each GPU process. 
    
    At the bottom is the main process which creates each GPU process (this guy). We set up all the parameters here and 
    then run the Caffe loop. NCCL links each GPU process implicitly. So, you will not see semaphores or other similars, 
    but NCCL is doing this in the background when Caffe is called. So for example, all processes will sync up when 
    Caffe step is called (in PrefetchTrain). 
    """
    global MP_COND
    global TRAIN_LOOP
    global FINISH_EXIT

    # Where is this project located?
    project_home = '/home/mundhenk/selfsupervised/'
    # Path to training image set
    path_prefix = '/home/mundhenk/images/patches_84h_110x110_13x13-blur-ab_compact/'

    # Condition is a label used for graphing, display purposes and saving snap shots
    # This can be any valid string, but must by file name friendly.
    condition = 'my_awesome_selfsupervised_run'
    # Base for where a lot of files are kept or go such as network files
    caffe_data_dir = project_home + '/caffe_data/'
    # Where to save figures
    fig_root = project_home + '/figures/'
    # where to save this project
    proj_snapshot_dir = project_home + '/py_proj/'
    # where to save moab files
    log_dir = project_home + '/moab_output/'
    # extra profile to run to set enviroment on node
    profile = project_home + '/scripts/profile.sh'
    # Your caffe network prototxt file
    network_file_name = caffe_data_dir + '/train_val_AlexNet-Custom_triple.prototxt'

    # Name of a caffemodel to use to initialize our weights from
    weight_file = ''

    # Alexnet layer names from the network prototxt file
    start_layer_vis = 'conv1'  # Visualize This layer
    softmax_layer = 'softmax_plain'  # For testing, we need this guy
    loss_layer = 'loss'  # Your loss layer
    # Are we using a batch normalized network schedule. For plain CaffeNet, set to False
    use_batch_norm_sched = True
    # Re-init project files?
    init_new = False

    # ImageNet mean gray
    image_mean = [104.0, 117.0, 123.0]  # ImageNET
    # Given a 110x110 size patch, what are the range of scales we can resize it to before cropping out 96x96?
    ra_max_size = 128  # Goes to a max size corresponding to an image of 448x448
    ra_min_size = 96  # Goes to a min size corresponding to an image of 171x171
    # Training batch size. The script will auto resize this when using more than one GPU
    train_batch_size = 128
    # Testing batch size.
    test_batch_size = 20
    # How many classes you will test over.
    bin_num = 20
    # The actual size of the patchs (96x96)
    patch_size = 96
    # Tells us where to center crop during testing
    patch_marg_1 = 7
    patch_marg_2 = 110
    # How many iters should we wait to display info?
    display_iters = 20
    # How many iters should we wait to test the network
    test_iters = 5000
    # Smoothing parameter over displayed loss
    loss_lambda = 20
    # Stride over the testing data set so we only use a subset.
    test_skip = 199
    # How often to snapshot the solver state
    snaphot_interval = 5000

    # training and testing list files
    test_list_file = path_prefix + 'val/val_list.nfl.npz'
    train_list_file = path_prefix + 'train/train_list.nfl.npz'

    # *******************************************************************************************************************
    # *******************************************************************************************************************
    # Dont edit after here
    # *******************************************************************************************************************
    # *******************************************************************************************************************

    # check to make sure files and dirs exist
    if PrefetchTrain.check_file(train_list_file, rank) == 0: return
    if PrefetchTrain.check_file(test_list_file, rank) == 0: return
    if PrefetchTrain.check_file(profile, rank) == 0: return

    if PrefetchTrain.check_dir(path_prefix, rank) == 0: return
    if PrefetchTrain.check_dir(project_home, rank) == 0: return
    if PrefetchTrain.check_dir(caffe_data_dir, rank) == 0: return

    # Create some directories if needed
    PrefetchTrain.check_create_dir(log_dir, rank)
    PrefetchTrain.check_create_dir(fig_root, rank)

    solver_file_name, snapshot_file, do_exit = PrefetchTrain.instantiate_slurm(
        proj_snapshot_dir,
        network_file_name,
        condition,
        log_dir,
        profile,
        snaphot_interval,
        use_batch_norm_sched,
        rank,
        MP_COND,
        proc_comm,
        init_new=init_new)

    # We just init-ed the whole thing. Now we exit
    if do_exit:
        return

    fig_model = condition
    fig_name_err = fig_root + fig_model + '.err.png'
    fig_name_sqr = fig_root + fig_model + '.sqr.jpg'
    fig_prop = 'b--'
    '''
    We will now configure a bunch of things before we run the main loop. NCCL needs some things to be in a
    particular order. Some tasks are reserved for a single process alone. These always run on the first GPU
    in the list.
    '''

    batch_toggle = 0

    print('GPU:{} Set Caffe Device'.format(gpus[rank]))

    print('GPU:{} Set Device'.format(gpus[rank]))
    caffe.set_device(
        gpus[rank])  ### THIS ALWAYS HAS TO COME BEFORE OTHER CAFFE SETTERS!!!

    # Set up multi processing
    if uid:
        print('GPU:{} Set Solver Count to {}'.format(gpus[rank], len(gpus)))
        caffe.set_solver_count(len(gpus))
        print('GPU:{} Set Solver Rank to {}'.format(gpus[rank], rank))
        caffe.set_solver_rank(rank)
        print('GPU:{} Set Multiprocess'.format(gpus[rank]))
        caffe.set_multiprocess(True)

    # Use GPU like a civilized human being
    print('GPU:{} Set to Use GPU'.format(gpus[rank]))
    caffe.set_mode_gpu()

    # resize the training batch size by number of GPU's we are using
    train_batch_size /= len(gpus)

    print('GPU:{} New Train Batch Size {}'.format(gpus[rank],
                                                  train_batch_size))

    print('GPU:{} Load Network and Files'.format(gpus[rank]))
    print("GPU:{} Solver: {}".format(gpus[rank], solver_file_name))

    # Create the Caffe solver and read the solver file so we can use some of its parameters
    solver = caffe.SGDSolver(solver_file_name)
    solver_params = PrefetchTrain.read_proto_solver_file(solver_file_name)
    max_iters = solver_params.max_iter

    print("GPU:{} Adjusted Batch Size For Each GPU : {}".format(
        gpus[rank], train_batch_size))

    # This script does not support iters.
    assert (solver_params.iter_size < 2)

    # Open our training and testing lists, but don't do anything with them yet.
    print("GPU:{} Loading: {}".format(gpus[rank], test_list_file))
    if rank == 0:
        test_list_in = open(test_list_file)
    print("GPU:{} Loading: {}".format(gpus[rank], train_list_file))
    train_list_in = open(train_list_file)

    # Do we have a weight file? If so, use it.
    if weight_file != '':
        print('GPU:{} Loading weight file: {} '.format(gpus[rank],
                                                       weight_file))
        solver.net.copy_from(weight_file)

    # Do we have a snapshot file? If so, use it.
    if snapshot_file != '':
        print('GPU:{} Loading Snapshot file: {}'.format(
            gpus[rank], snapshot_file))
        solver.restore(snapshot_file)

    if uid:
        # Create NCCL callback
        nccl = caffe.NCCL(solver, uid)
        nccl.bcast()
        solver.add_callback(nccl)

        if solver.param.layer_wise_reduce:
            solver.net.after_backward(nccl)

    print("GPU:{} Network and Files Loaded".format(gpus[rank]))

    # reshape our training blobs
    solver.net.blobs['data_1'].reshape(train_batch_size, 3, patch_size,
                                       patch_size)
    solver.net.blobs['data_2'].reshape(train_batch_size, 3, patch_size,
                                       patch_size)
    solver.net.blobs['data_3'].reshape(train_batch_size, 3, patch_size,
                                       patch_size)
    solver.net.blobs['label'].reshape(train_batch_size, 1, 1, 1)

    print("GPU:{} Network Train Blobs Set".format(gpus[rank]))

    # reshape testing blobs, but only process will do this.
    if rank == 0:
        solver.test_nets[0].blobs['data_1'].reshape(test_batch_size, 3,
                                                    patch_size, patch_size)
        solver.test_nets[0].blobs['data_2'].reshape(test_batch_size, 3,
                                                    patch_size, patch_size)
        solver.test_nets[0].blobs['data_3'].reshape(test_batch_size, 3,
                                                    patch_size, patch_size)
        solver.test_nets[0].blobs['label'].reshape(test_batch_size, 1, 1, 1)

        print("GPU:{} Network Test Blobs Set".format(gpus[rank]))

        test_transformer_1 = caffe.io.Transformer(
            {'data_1': solver.test_nets[0].blobs['data_1'].data.shape})
        test_transformer_1.set_transpose('data_1', (2, 0, 1))
        test_transformer_1.set_mean('data_1',
                                    np.float32(image_mean))  # mean pixel
        test_transformer_2 = caffe.io.Transformer(
            {'data_2': solver.test_nets[0].blobs['data_2'].data.shape})
        test_transformer_2.set_transpose('data_2', (2, 0, 1))
        test_transformer_2.set_mean('data_2',
                                    np.float32(image_mean))  # mean pixel
        test_transformer_3 = caffe.io.Transformer(
            {'data_3': solver.test_nets[0].blobs['data_3'].data.shape})
        test_transformer_3.set_transpose('data_3', (2, 0, 1))
        test_transformer_3.set_mean('data_3',
                                    np.float32(image_mean))  # mean pixel

        print("GPU:{} Network Test Transformer Set".format(gpus[rank]))

    # Set up our training parameters object
    tp = PrefetchTrain.TrainParams(solver, patch_size, patch_marg_1,
                                   patch_marg_2, train_batch_size,
                                   test_batch_size, bin_num, image_mean,
                                   loss_layer, softmax_layer)

    # copy a few more items over into our training parameters object
    tp.path_prefix = path_prefix
    tp.test_skip = test_skip
    tp.test_iters = test_iters
    tp.ra_patch_size = patch_size
    tp.ra_max_size = ra_max_size
    tp.ra_min_size = ra_min_size

    # Process and load our training data set
    print("GPU:{} Parse nfl context train list".format(gpus[rank]))
    NFL = NumpyFileList.CompactList()
    NFL.load(train_list_in)
    train_image_file = NFL

    train_list_in.close()

    # process and load our testing data set. Only one GPU will do this.
    if rank == 0:
        print("GPU:{} Parse nfl context test list".format(gpus[rank]))
        NFL = NumpyFileList.CompactList()
        NFL.load(test_list_in)
        test_image_file = NFL

        test_list_in.close()

    print("GPU:{} Lists Parsed".format(gpus[rank]))

    # Once we launch the threads, we need to exit gently
    TRAIN_LOOP = True

    # Init the two main loader threads and return handles
    f, r = PrefetchTrain.train_batch_triple_init(train_image_file, tp)

    # set some things we need to set.
    loss_avg = 0.0
    cstart = 0.0

    print("GPU:{} PREFETCH TRAIN".format(gpus[rank]))

    start_iter = True
    layer_loss = 0

    vis_fig = False
    vis_ax = False

    plot_fig = False
    plot_ax = False

    print("GPU:{} START LOOP".format(gpus[rank]))
    '''
    This is our main training loop. From here on out we will stay in this loop until exit. Most of the code here is for
    display and control. train_batch_triple is the only thing that needs to be called to train the network. 
    '''
    while True:

        i = int(solver.iter)
        display = False

        # Do we compute display timing data this iteration?
        if (i % display_iters == 0 or start_iter):
            cend = time.time()
            timer = cend - cstart
            cstart = cend

            # It's annoying and useless to print stats like this on the first iter
            if not start_iter:
                t = timer / float(display_iters)
                # Only once process prints this stuff out.
                if rank == 0:
                    print("GPU:{} ({}) {} ".format(gpus[rank], i, condition))
                    print("GPU:{} Average TIME {}".format(gpus[rank], t))

                display = True

        # run the actual training step on Caffe. Get back a run handle r and performance data
        layer_loss, _, _, batch_toggle, r, do_exit = PrefetchTrain.train_batch_triple(
            batch_toggle, f, tp, r)

        if do_exit == True: proc_comm[2] = True

        # compute a running average over loss
        if start_iter:
            loss_avg = layer_loss
        else:
            loss_avg = (layer_loss + loss_avg * loss_lambda) / (1.0 +
                                                                loss_lambda)

        avg_guys[rank] = loss_avg

        # Update the figure showing the first layer filters. Only one process does this.
        if display and rank == 0:
            # check if we have an x server connection to output to
            if PrefetchTrain.check_X_is_running():
                vis_fig, vis_ax = PrefetchTrain.vis_square(
                    solver.net.params[start_layer_vis][0].data, condition,
                    vis_fig, vis_ax, True, fig_name_sqr)

        # when we reach the right iteration, we will test the network and plot the performance
        if (rank == 0) or i == int(max_iters):

            if (i != 0 and i % test_iters == 0) or i == int(max_iters):
                print("TESTING")
                # Get weights over
                solver.test_nets[0].share_with(solver.net)

                # Run the test network
                correct_p, do_exit = PrefetchTrain.test_batch_context_triple(
                    test_image_file, test_transformer_1, test_transformer_2,
                    test_transformer_3, tp)

                # Plot the results of the test.
                plot_fig, plot_ax = PrefetchTrain.mr_plot(correct_p,
                                                          i,
                                                          fig_prop,
                                                          plot_fig,
                                                          plot_ax,
                                                          fig_name_err,
                                                          condition,
                                                          tp=tp)

                if do_exit == True: proc_comm[2] = True

        # one process will collect and display loss over all GPU processes.
        if display:
            #print("GPU:{} Average LOSS {}".format(gpus[rank],loss_avg))
            if rank == 0:
                avg = 0.0
                for ar in avg_guys:
                    avg += ar

                avg /= len(avg_guys)

                print("GPU:{} ALL Average LOSS {}".format(gpus[rank], ar))

        # Exit when maximum iteration is reached.
        if i == int(max_iters):
            print("GPU:{} Reaches Maxed Iters".format(gpus[rank]))
            break

        # Exit on ctrl-c
        if FINISH_EXIT:
            print("GPU:{} Got CTRL-C. Exiting ...".format(gpus[rank]))
            break

        if proc_comm[2] == True:
            print("GPU:{} Got ERROR. Exiting ...".format(gpus[rank]))
            return

        start_iter = False

    # When we exit, we always save the current state. Only one process does this.
    if rank == 0:
        # just in case
        solver.snapshot()

        print('done : Saving and exiting ...')