def train_faster_rcnn_e2e(cfg): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_model(image_input, roi_input, dims_node, cfg) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, cfg["CNTK"].E2E_MAX_EPOCHS, cfg) return create_faster_rcnn_eval_model(loss, image_input, dims_input, cfg)
def train_faster_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) else: if cfg["CNTK"].TRAIN_E2E: eval_model = train_faster_rcnn_e2e(cfg) else: eval_model = train_faster_rcnn_alternating(cfg) eval_model.save(model_path) print("Stored eval model at %s" % model_path) if cfg["CNTK"].DEBUG_OUTPUT: plot( eval_model, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_eval_{}_{}.{}".format( cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage", cfg["CNTK"].GRAPH_TYPE))) upload_checkpoint_file(os.path.join( os.environ['AZUREML_NATIVE_SHARE_DIRECTORY'], 'output', cfg['OUTPUT_MODEL_NAME']), cfg['OUTPUT_MODEL_NAME'], add_timestamp=True) print("Model saved to Azure blob") return eval_model
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot( eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = globalvars['e2e_lr_factor'] e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs']) return create_eval_model(loss, image_input, dims_input)
def train_faster_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) else: if cfg["CNTK"].TRAIN_E2E: eval_model = train_faster_rcnn_e2e(cfg) else: eval_model = train_faster_rcnn_alternating(cfg) eval_model.save(model_path) if cfg["CNTK"].DEBUG_OUTPUT: plot( eval_model, os.path.join( cfg.OUTPUT_PATH, "graph_frcn_eval_{}_{}.{}".format( cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage", cfg["CNTK"].GRAPH_TYPE))) print("Stored eval model at %s" % model_path) if cfg.DISTRIBUTED_FLG: distributed.Communicator.finalize() return eval_model
def train_faster_rcnn_e2e(base_model_file_name, debug_output=False): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_predictor(base_model_file_name, image_input, roi_input, dims_node) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) plot(loss, os.path.join(globalvars['output_path'], "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = globalvars['e2e_lr_factor'] e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, globalvars['e2e_epochs']) return create_eval_model(loss, image_input, dims_input)
def train_faster_rcnn_e2e(cfg): # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') # Instantiate the Faster R-CNN prediction model and loss function loss, pred_error = create_faster_rcnn_model(image_input, roi_input, dims_node, cfg) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_e2e." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters e2e_lr_factor = cfg["MODEL"].E2E_LR_FACTOR e2e_lr_per_sample_scaled = [x * e2e_lr_factor for x in cfg["CNTK"].E2E_LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(e2e_lr_per_sample_scaled)) train_model(image_input, roi_input, dims_input, loss, pred_error, e2e_lr_per_sample_scaled, mm_schedule, cfg["CNTK"].L2_REG_WEIGHT, cfg["CNTK"].E2E_MAX_EPOCHS, cfg) return create_faster_rcnn_eval_model(loss, image_input, dims_input, cfg)
def plot_model(root, output_file="model.svg"): """Plots the CNTK model starting from the root node to an output image Pre-requisites: Install graphviz executables from graphviz.org Update your PATH environment to include the path to graphviz pip install graphviz pip install pydot_ng """ graph.plot(root, output_file) _logger.info("Model graph plotted to: {}".format(output_file))
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train_fast_rcnn(debug_output=False): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = input((num_channels, image_height, image_width)) roi_input = input((num_rois, 4)) label_input = input((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) # Instantiate the trainer object learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size, epoch_size-sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) return frcn_output
def train (self, train_file, output_resources_pickle_file, \ network_type = 'unidirectional', \ num_epochs = 1, batch_size = 50, \ dropout = 0.2, reg_alpha = 0.0, \ num_hidden_units = 150, num_layers = 1): train_X, train_Y = self.reader.read_and_parse_training_data(train_file, output_resources_pickle_file) print("Data Shape: ") print(train_X.shape) # (15380, 613) print(train_Y.shape) # (15380, 613, 8) #self.wordvecs.shape (66962, 50) print("Hyper parameters:") print("output_resources_pickle_file = {}".format(output_resources_pickle_file)) print("network_type = {}".format(network_type)) print("num_epochs= {}".format(num_epochs )) print("batch_size = {}".format(batch_size )) print("dropout = ".format(dropout )) print("reg_alpha = {}".format(reg_alpha )) print("num_hidden_units = {}".format(num_hidden_units)) print("num_layers = {}".format(num_layers )) # Instantiate the model function; features = C.sequence.input_variable(self.wordvecs.shape[0]) labels = C.input_variable(train_Y.shape[2], dynamic_axes=[C.Axis.default_batch_axis()]) self.model = self.__create_model(features, train_Y.shape[2], num_hidden_units, dropout) plot_path = "./lstm_model.png" plot(self.model, plot_path) # Instantiate the loss and error function loss = C.cross_entropy_with_softmax(self.model, labels) error = C.classification_error(self.model, labels) # LR schedule learning_rate = 0.02 lr_schedule = C.learning_parameter_schedule(learning_rate) momentum_schedule = C.momentum_schedule(0.9, minibatch_size=batch_size) learner = C.fsadagrad(self.model.parameters, lr = lr_schedule, momentum = momentum_schedule, unit_gain = True) # Setup the progress updater progress_printer = C.logging.ProgressPrinter(freq=100, first=10, tag='Training', num_epochs=num_epochs) # Instantiate the trainer. We have all data in memory. https://github.com/Microsoft/CNTK/blob/master/Manual/Manual_How_to_feed_data.ipynb print('Start training') train_summary = loss.train((train_X.astype('float32'), train_Y.astype('float32')), parameter_learners=[learner], callbacks=[progress_printer])
def train_faster_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) else: if cfg["CNTK"].TRAIN_E2E: eval_model = train_faster_rcnn_e2e(cfg) else: eval_model = train_faster_rcnn_alternating(cfg) eval_model.save(model_path) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval_{}_{}.{}" .format(cfg["MODEL"].BASE_MODEL, "e2e" if cfg["CNTK"].TRAIN_E2E else "4stage", cfg["CNTK"].GRAPH_TYPE))) print("Stored eval model at %s" % model_path) return eval_model
def create_fast_rcnn_eval_model(model, image_input, roi_proposals, cfg): print("creating eval model") predictor = clone_model(model, [cfg["MODEL"].FEATURE_NODE_NAME, "roi_proposals"], ["cls_score", "bbox_regr"], CloneMethod.freeze) pred_net = predictor(image_input, roi_proposals) cls_score = pred_net.outputs[0] bbox_regr = pred_net.outputs[1] if cfg.BBOX_NORMALIZE_TARGETS: num_boxes = int(bbox_regr.shape[1] / 4) bbox_normalize_means = np.array(cfg.BBOX_NORMALIZE_MEANS * num_boxes) bbox_normalize_stds = np.array(cfg.BBOX_NORMALIZE_STDS * num_boxes) bbox_regr = plus(element_times(bbox_regr, bbox_normalize_stds), bbox_normalize_means, name='bbox_regr') cls_pred = softmax(cls_score, axis=1, name='cls_pred') eval_model = combine([cls_pred, bbox_regr]) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_eval." + cfg["CNTK"].GRAPH_TYPE)) return eval_model
def create_model(base_model_file, input_features, params): num_classes = params['num_classes'] dropout_rate = params['dropout_rate'] freeze_weights = params['freeze_weights'] # Load the pretrained classification net and find nodes base_model = load_model(base_model_file) log = logging.getLogger("neuralnets1.utils.create_model") log.info('Loaded base model - %s with layers:' % base_model_file) node_outputs = get_node_outputs(base_model) [log.info('%s , %s' % (layer.name, layer.shape)) for layer in node_outputs] graph.plot(base_model, filename="base_model.pdf") # Write graph visualization feature_node = find_by_name(base_model, 'features') beforePooling_node = find_by_name(base_model, "z.x.x.r") # Clone model until right before the pooling layer, ie. until including z.x.x.r modelCloned = combine([beforePooling_node.owner]).clone( CloneMethod.freeze if freeze_weights else CloneMethod.clone, {feature_node: placeholder(name='features')}) # Center the input around zero and set model input. # Do this early, to avoid CNTK bug with wrongly estimated layer shapes feat_norm = input_features - constant(114) model = modelCloned(feat_norm) # Add pool layer avgPool = GlobalAveragePooling(name="poolingLayer")( model) # assign name to the layer and add to the model # Add drop out layer if dropout_rate > 0: avgPoolDrop = Dropout(dropout_rate)( avgPool ) # add drop out layer with specified drop out rate and add it to the model else: avgPoolDrop = avgPool # Add new dense layer for class prediction finalModel = Dense(num_classes, activation=None, name="Dense")(avgPoolDrop) return finalModel
def plot_model(root, output_file="model.png"): """Plots the CNTK model starting from the root node to an output image Pre-requisites: Install graphviz executables from graphviz.org Update your PATH environment variable to include the path to graphviz pip install graphviz pip install pydot_ng """ text = graph.plot(root, output_file) print(text)
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable( (cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name="roi_proposals") label_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable( (cfg.NUM_ROI_PROPOSALS, 4 * cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot( loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [ x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE ] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_parameter_schedule_per_sample( lr_per_sample_scaled) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [ v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE ] bias_lr_schedule = learning_parameter_schedule_per_sample( bias_lr_per_sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile( cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold=cfg.BBOX_THRESH, normalize_means=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds=None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg[ "DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min( cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer( trainer, with_metric=True) # log progress if sample_count % 100 == 0: continue #print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
model_path = os.path.join(globalvars['output_path'], "faster_rcnn_eval_{}_{}.model" .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage")) # Train only if no model exists yet if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) else: if globalvars['train_e2e']: eval_model = train_faster_rcnn_e2e(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) else: eval_model = train_faster_rcnn_alternating(base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) eval_model.save(model_path) if cfg["CNTK"].DEBUG_OUTPUT: plot(eval_model, os.path.join(globalvars['output_path'], "graph_frcn_eval_{}_{}.{}" .format(cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE))) print("Stored eval model at %s" % model_path) # Compute mean average precision on test set eval_faster_rcnn_mAP(eval_model) # Plot results on test set if cfg["CNTK"].VISUALIZE_RESULTS: from plot_helpers import eval_and_plot_faster_rcnn num_eval = min(num_test_images, 100) img_shape = (num_channels, image_height, image_width) results_folder = os.path.join(globalvars['output_path'], cfg["CNTK"].DATASET) eval_and_plot_faster_rcnn(eval_model, num_eval, globalvars['test_map_file'], img_shape, results_folder, feature_node_name, globalvars['classes'], drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
def train_faster_rcnn_alternating(base_model_file_name, debug_output=False): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # Learning parameters rpn_lr_factor = globalvars['rpn_lr_factor'] rpn_lr_per_sample_scaled = [ x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE ] frcn_lr_factor = globalvars['frcn_lr_factor'] frcn_lr_per_sample_scaled = [ x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE ] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(globalvars['momentum_per_mb']) rpn_epochs = globalvars['rpn_epochs'] frcn_epochs = globalvars['frcn_epochs'] print("Using base model: {}".format(cfg["CNTK"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) if debug_output: print("Storing graphs and models to %s." % globalvars['output_path']) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable((num_channels, image_height, image_width), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - normalization_const roi_input = input_variable((cfg["CNTK"].INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(base_model_file_name) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn( conv_out, scaled_gt_boxes, dims_node, proposal_layer_param_string=cfg["CNTK"].PROPOSAL_LAYER_PARAMS) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage1_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, num_classes=globalvars['num_classes']) # Fast RCNN and losses fc_layers = clone_model(base_model, [pool_node_name], [last_hidden_node_name], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor( conv_out, rois, fc_layers) detection_losses = create_detection_losses(cls_score, label_targets, rois, bbox_pred, bbox_targets, bbox_inside_weights) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine( [rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot( stage1_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot( stage2_rpn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=rpn_epochs) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], [ "cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error" ], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot( stage2_frcn_network, os.path.join( globalvars['output_path'], "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, epochs_to_train=frcn_epochs, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None return create_eval_model(stage2_frcn_network, image_input, dims_input, rpn_model=stage2_rpn_network)
print("Loading existing model from %s" % model_path) eval_model = load_model(model_path) else: if globalvars['train_e2e']: eval_model = train_faster_rcnn_e2e( base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) else: eval_model = train_faster_rcnn_alternating( base_model_file, debug_output=cfg["CNTK"].DEBUG_OUTPUT) eval_model.save(model_path) if cfg["CNTK"].DEBUG_OUTPUT: plot( eval_model, os.path.join( globalvars['output_path'], "graph_frcn_eval_{}_{}.{}".format( cfg["CNTK"].BASE_MODEL, "e2e" if globalvars['train_e2e'] else "4stage", cfg["CNTK"].GRAPH_TYPE))) print("Stored eval model at %s" % model_path) # Compute mean average precision on test set eval_faster_rcnn_mAP(eval_model) # Plot results on test set if cfg["CNTK"].VISUALIZE_RESULTS: from plot_helpers import eval_and_plot_faster_rcnn num_eval = min(num_test_images, 100) img_shape = (num_channels, image_height, image_width) results_folder = os.path.join(globalvars['output_path'],
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) mm_schedule = momentum_schedule_per_sample(momentum_per_sample) # Instantiate the trainer object as default learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Preparation for distributed learning, which is compatible for normal learner learner = distributed.data_parallel_distributed_learner( learner = learner, num_quantization_bits = num_quantization_bits, # non-quantized gradient accumulation distributed_after = warm_up) # no warm start as default progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, rank=distributed.Communicator.rank()) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch(min(mb_size * C.Communicator.num_workers(), epoch_size-sample_count), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save(os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch+1))) if distributed_flg: distributed.Communicator.finalize() return frcn_output
def train_fast_rcnn(cfg): # Train only if no model exists yet model_path = cfg['MODEL_PATH'] if os.path.exists(model_path) and cfg["CNTK"].MAKE_MODE: print("Loading existing model from %s" % model_path) return load_model(model_path) else: # Input variables denoting features and labeled ground truth rois (as 5-tuples per roi) image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=cfg["MODEL"].FEATURE_NODE_NAME) roi_proposals = input_variable((cfg.NUM_ROI_PROPOSALS, 4), dynamic_axes=[Axis.default_batch_axis()], name = "roi_proposals") label_targets = input_variable((cfg.NUM_ROI_PROPOSALS, cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_targets = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) bbox_inside_weights = input_variable((cfg.NUM_ROI_PROPOSALS, 4*cfg["DATA"].NUM_CLASSES), dynamic_axes=[Axis.default_batch_axis()]) # Instantiate the Fast R-CNN prediction model and loss function loss, pred_error = create_fast_rcnn_model(image_input, roi_proposals, label_targets, bbox_targets, bbox_inside_weights, cfg) if isinstance(loss, cntk.Variable): loss = combine([loss]) if cfg["CNTK"].DEBUG_OUTPUT: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) plot(loss, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train." + cfg["CNTK"].GRAPH_TYPE)) # Set learning parameters lr_factor = cfg["CNTK"].LR_FACTOR lr_per_sample_scaled = [x * lr_factor for x in cfg["CNTK"].LR_PER_SAMPLE] mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT epochs_to_train = cfg["CNTK"].MAX_EPOCHS print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("lr_per_sample: {}".format(lr_per_sample_scaled)) # --- train --- # Instantiate the learners and the trainer object params = loss.parameters biases = [p for p in params if '.b' in p.name or 'b' == p.name] others = [p for p in params if not p in biases] bias_lr_mult = cfg["CNTK"].BIAS_LR_MULT lr_schedule = learning_rate_schedule(lr_per_sample_scaled, unit=UnitType.sample) learner = momentum_sgd(others, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) bias_lr_per_sample = [v * bias_lr_mult for v in cfg["CNTK"].LR_PER_SAMPLE] bias_lr_schedule = learning_rate_schedule(bias_lr_per_sample, unit=UnitType.sample) bias_learner = momentum_sgd(biases, bias_lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight, unit_gain=False, use_mean_gradient=True) trainer = Trainer(None, (loss, pred_error), [learner, bias_learner]) # Get minibatches of images and perform model training print("Training model for %s epochs." % epochs_to_train) log_number_of_parameters(loss) # Create the minibatch source if cfg.USE_PRECOMPUTED_PROPOSALS: proposal_provider = ProposalProvider.fromfile(cfg["DATA"].TRAIN_PRECOMPUTED_PROPOSALS_FILE, cfg.NUM_ROI_PROPOSALS) else: proposal_provider = ProposalProvider.fromconfig(cfg) od_minibatch_source = ObjectDetectionMinibatchSource( cfg["DATA"].TRAIN_MAP_FILE, cfg["DATA"].TRAIN_ROI_FILE, max_annotations_per_image=cfg.INPUT_ROIS_PER_IMAGE, pad_width=cfg.IMAGE_WIDTH, pad_height=cfg.IMAGE_HEIGHT, pad_value=cfg["MODEL"].IMG_PAD_COLOR, randomize=True, use_flipping=cfg["TRAIN"].USE_FLIPPED, max_images=cfg["DATA"].NUM_TRAIN_IMAGES, num_classes=cfg["DATA"].NUM_CLASSES, proposal_provider=proposal_provider, provide_targets=True, proposal_iou_threshold = cfg.BBOX_THRESH, normalize_means = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_MEANS, normalize_stds = None if not cfg.BBOX_NORMALIZE_TARGETS else cfg.BBOX_NORMALIZE_STDS) # define mapping from reader streams to network inputs input_map = { od_minibatch_source.image_si: image_input, od_minibatch_source.proposals_si: roi_proposals, od_minibatch_source.label_targets_si: label_targets, od_minibatch_source.bbox_targets_si: bbox_targets, od_minibatch_source.bbiw_si: bbox_inside_weights } progress_printer = ProgressPrinter(tag='Training', num_epochs=epochs_to_train, gen_heartbeat=True) for epoch in range(epochs_to_train): # loop over epochs sample_count = 0 while sample_count < cfg["DATA"].NUM_TRAIN_IMAGES: # loop over minibatches in the epoch data = od_minibatch_source.next_minibatch(min(cfg.MB_SIZE, cfg["DATA"].NUM_TRAIN_IMAGES - sample_count), input_map=input_map) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far progress_printer.update_with_trainer(trainer, with_metric=True) # log progress if sample_count % 100 == 0: print("Processed {} samples".format(sample_count)) progress_printer.epoch_summary(with_metric=True) eval_model = create_fast_rcnn_eval_model(loss, image_input, roi_proposals, cfg) eval_model.save(cfg['MODEL_PATH']) return eval_model
def train_fast_rcnn(debug_output=False, model_path=model_file): if debug_output: print("Storing graphs and intermediate models to %s." % os.path.join(abs_path, "Output")) # Create the minibatch source minibatch_source = create_mb_source(image_height, image_width, num_channels, num_classes, num_rois, base_path, "train") # Input variables denoting features, rois and label data image_input = C.input_variable((num_channels, image_height, image_width)) roi_input = C.input_variable((num_rois, 4)) label_input = C.input_variable((num_rois, num_classes)) # define mapping from reader streams to network inputs input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # Instantiate the Fast R-CNN prediction model and loss function frcn_output = frcn_predictor(image_input, roi_input, num_classes, model_path) ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, os.path.join(abs_path, "Output", "graph_frcn.png")) # Set learning parameters l2_reg_weight = 0.0005 lr_per_sample = [0.00001] * 10 + [0.000001] * 5 + [0.0000001] lr_schedule = learning_parameter_schedule_per_sample(lr_per_sample) mm_schedule = momentum_schedule_per_sample(momentum_per_sample) # Instantiate the trainer object as default learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) # Preparation for distributed learning, which is compatible for normal learner learner = distributed.data_parallel_distributed_learner( learner=learner, num_quantization_bits= num_quantization_bits, # non-quantized gradient accumulation distributed_after=warm_up) # no warm start as default progress_printer = ProgressPrinter(tag='Training', num_epochs=max_epochs, rank=distributed.Communicator.rank()) trainer = Trainer(frcn_output, (ce, pe), learner, progress_printer) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): # loop over epochs sample_count = 0 while sample_count < epoch_size: # loop over minibatches in the epoch data = minibatch_source.next_minibatch( min(mb_size * C.Communicator.num_workers(), epoch_size - sample_count), input_map=input_map, num_data_partitions=C.Communicator.num_workers(), partition_index=C.Communicator.rank()) trainer.train_minibatch(data) # update model with it sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() if debug_output: frcn_output.save( os.path.join(abs_path, "Output", "frcn_py_%s.model" % (epoch + 1))) if distributed_flg: distributed.Communicator.finalize() return frcn_output
def init_train_fast_rcnn(image_height, image_width, num_classes, num_rois, mb_size, max_epochs, cntk_lr_per_image, l2_reg_weight, momentum_time_constant, base_path, boSkipTraining=False, debug_output=False, tensorboardLogDir=None): #make sure we use GPU for training if use_default_device().type() == 0: print("WARNING: using CPU for training.") else: print("Using GPU for training.") # Instantiate the Fast R-CNN prediction model image_input = input_variable((3, image_height, image_width)) roi_input = input_variable((num_rois, 4)) label_input = input_variable((num_rois, num_classes)) frcn_output, frcn_penultimateLayer = frcn_predictor( image_input, roi_input, num_classes, base_path) if boSkipTraining: print("Using pre-trained DNN without refinement") return frcn_penultimateLayer # Create the minibatch source and define mapping from reader streams to network inputs minibatch_source, epoch_size = create_mb_source("train", image_height, image_width, num_classes, num_rois, base_path, randomize=True) input_map = { image_input: minibatch_source.streams.features, roi_input: minibatch_source.streams.rois, label_input: minibatch_source.streams.roiLabels } # set loss / error functions ce = cross_entropy_with_softmax(frcn_output, label_input, axis=1) pe = classification_error(frcn_output, label_input, axis=1) if debug_output: plot(frcn_output, "graph_frcn.png") # set the progress printer(s) progress_writers = [ProgressPrinter(tag='Training', num_epochs=max_epochs)] if tensorboardLogDir != None: tensorboard_writer = TensorBoardProgressWriter( freq=10, log_dir=tensorboardLogDir, model=frcn_output) progress_writers.append(tensorboard_writer) # Set learning parameters and instantiate the trainer object lr_per_sample = [f / float(num_rois) for f in cntk_lr_per_image] lr_schedule = learning_rate_schedule(lr_per_sample, unit=UnitType.sample) mm_schedule = momentum_as_time_constant_schedule(momentum_time_constant) learner = momentum_sgd(frcn_output.parameters, lr_schedule, mm_schedule, l2_regularization_weight=l2_reg_weight) trainer = Trainer(frcn_output, (ce, pe), learner, progress_writers) # Get minibatches of images and perform model training print("Training Fast R-CNN model for %s epochs." % max_epochs) log_number_of_parameters(frcn_output) for epoch in range(max_epochs): sample_count = 0 # loop over minibatches in the epoch while sample_count < epoch_size: data = minibatch_source.next_minibatch(min( mb_size, epoch_size - sample_count), input_map=input_map) if sample_count % 100 == 1: print( "Training in progress: epoch {} of {}, sample count {} of {}" .format(epoch, max_epochs, sample_count, epoch_size)) trainer.train_minibatch(data) sample_count += trainer.previous_minibatch_sample_count # count samples processed so far trainer.summarize_training_progress() # Log mean of each parameter tensor, so that we can confirm that the parameters change indeed. if tensorboardLogDir != None: for parameter in frcn_output.parameters: tensorboard_writer.write_value(parameter.uid + "/mean", np.mean(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/std", np.std(parameter.value), epoch) tensorboard_writer.write_value(parameter.uid + "/absSum", np.sum(np.abs(parameter.value)), epoch) if debug_output: frcn_output.save_model("frcn_py_%s.model" % (epoch + 1)) return frcn_output
def train_faster_rcnn_alternating(cfg): ''' 4-Step Alternating Training scheme from the Faster R-CNN paper: # Create initial network, only rpn, without detection network # --> train only the rpn (and conv3_1 and up for VGG16) # buffer region proposals from rpn # Create full network, initialize conv layers with imagenet, use buffered proposals # --> train only detection network (and conv3_1 and up for VGG16) # Keep conv weights from detection network and fix them # --> train only rpn # buffer region proposals from rpn # Keep conv and rpn weights from step 3 and fix them # --> train only detection network ''' # setting pre- and post-nms top N to training values since buffered proposals are used for further training test_pre = cfg["TEST"].RPN_PRE_NMS_TOP_N test_post = cfg["TEST"].RPN_POST_NMS_TOP_N cfg["TEST"].RPN_PRE_NMS_TOP_N = cfg["TRAIN"].RPN_PRE_NMS_TOP_N cfg["TEST"].RPN_POST_NMS_TOP_N = cfg["TRAIN"].RPN_POST_NMS_TOP_N # Learning parameters rpn_lr_factor = cfg["MODEL"].RPN_LR_FACTOR rpn_lr_per_sample_scaled = [x * rpn_lr_factor for x in cfg["CNTK"].RPN_LR_PER_SAMPLE] frcn_lr_factor = cfg["MODEL"].FRCN_LR_FACTOR frcn_lr_per_sample_scaled = [x * frcn_lr_factor for x in cfg["CNTK"].FRCN_LR_PER_SAMPLE] l2_reg_weight = cfg["CNTK"].L2_REG_WEIGHT mm_schedule = momentum_schedule(cfg["CNTK"].MOMENTUM_PER_MB) rpn_epochs = cfg["CNTK"].RPN_EPOCHS frcn_epochs = cfg["CNTK"].FRCN_EPOCHS feature_node_name = cfg["MODEL"].FEATURE_NODE_NAME last_conv_node_name = cfg["MODEL"].LAST_CONV_NODE_NAME print("Using base model: {}".format(cfg["MODEL"].BASE_MODEL)) print("rpn_lr_per_sample: {}".format(rpn_lr_per_sample_scaled)) print("frcn_lr_per_sample: {}".format(frcn_lr_per_sample_scaled)) debug_output=cfg["CNTK"].DEBUG_OUTPUT if debug_output: print("Storing graphs and models to %s." % cfg.OUTPUT_PATH) # Input variables denoting features, labeled ground truth rois (as 5-tuples per roi) and image dimensions image_input = input_variable(shape=(cfg.NUM_CHANNELS, cfg.IMAGE_HEIGHT, cfg.IMAGE_WIDTH), dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) feat_norm = image_input - Constant([[[v]] for v in cfg["MODEL"].IMG_PAD_COLOR]) roi_input = input_variable((cfg.INPUT_ROIS_PER_IMAGE, 5), dynamic_axes=[Axis.default_batch_axis()]) scaled_gt_boxes = alias(roi_input, name='roi_input') dims_input = input_variable((6), dynamic_axes=[Axis.default_batch_axis()]) dims_node = alias(dims_input, name='dims_input') rpn_rois_input = input_variable((cfg["TRAIN"].RPN_POST_NMS_TOP_N, 4), dynamic_axes=[Axis.default_batch_axis()]) rpn_rois_buf = alias(rpn_rois_input, name='rpn_rois') # base image classification model (e.g. VGG16 or AlexNet) base_model = load_model(cfg['BASE_MODEL_PATH']) print("stage 1a - rpn") if True: # Create initial network, only rpn, without detection network # initial weights train? # conv: base_model only conv3_1 and up # rpn: init new yes # frcn: - - # conv layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # RPN and losses rpn_rois, rpn_losses = create_rpn(conv_out, scaled_gt_boxes, dims_node, cfg) stage1_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage1_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 1a - buffering rpn proposals") buffered_proposals_s1 = compute_rpn_proposals(stage1_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 1b - frcn") if True: # Create full network, initialize conv layers with imagenet, fix rpn weights # initial weights train? # conv: base_model only conv3_1 and up # rpn: stage1a rpn model no --> use buffered proposals # frcn: base_model + new yes # conv_layers conv_layers = clone_conv_layers(base_model, cfg) conv_out = conv_layers(feat_norm) # use buffered proposals in target layer rois, label_targets, bbox_targets, bbox_inside_weights = \ create_proposal_target_layer(rpn_rois_buf, scaled_gt_boxes, cfg) # Fast RCNN and losses fc_layers = clone_model(base_model, [cfg["MODEL"].POOL_NODE_NAME], [cfg["MODEL"].LAST_HIDDEN_NODE_NAME], CloneMethod.clone) cls_score, bbox_pred = create_fast_rcnn_predictor(conv_out, rois, fc_layers, cfg) detection_losses = create_detection_losses(cls_score, label_targets, bbox_pred, rois, bbox_targets, bbox_inside_weights, cfg) pred_error = classification_error(cls_score, label_targets, axis=1, name="pred_error") stage1_frcn_network = combine([rois, cls_score, bbox_pred, detection_losses, pred_error]) # train if debug_output: plot(stage1_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage1b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s1) buffered_proposals_s1 = None print("stage 2a - rpn") if True: # Keep conv weights from detection network and fix them # initial weights train? # conv: stage1b frcn model no # rpn: stage1a rpn model yes # frcn: - - # conv_layers conv_layers = clone_model(stage1_frcn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # RPN and losses rpn = clone_model(stage1_rpn_network, [last_conv_node_name, "roi_input", "dims_input"], ["rpn_rois", "rpn_losses"], CloneMethod.clone) rpn_net = rpn(conv_out, dims_node, scaled_gt_boxes) rpn_rois = rpn_net.outputs[0] rpn_losses = rpn_net.outputs[1] stage2_rpn_network = combine([rpn_rois, rpn_losses]) # train if debug_output: plot(stage2_rpn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2a_rpn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, rpn_losses, rpn_losses, rpn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, rpn_epochs, cfg) print("stage 2a - buffering rpn proposals") buffered_proposals_s2 = compute_rpn_proposals(stage2_rpn_network, image_input, roi_input, dims_input, cfg) print("stage 2b - frcn") if True: # Keep conv and rpn weights from step 3 and fix them # initial weights train? # conv: stage2a rpn model no # rpn: stage2a rpn model no --> use buffered proposals # frcn: stage1b frcn model yes - # conv_layers conv_layers = clone_model(stage2_rpn_network, [feature_node_name], [last_conv_node_name], CloneMethod.freeze) conv_out = conv_layers(image_input) # Fast RCNN and losses frcn = clone_model(stage1_frcn_network, [last_conv_node_name, "rpn_rois", "roi_input"], ["cls_score", "bbox_regr", "rpn_target_rois", "detection_losses", "pred_error"], CloneMethod.clone) stage2_frcn_network = frcn(conv_out, rpn_rois_buf, scaled_gt_boxes) detection_losses = stage2_frcn_network.outputs[3] pred_error = stage2_frcn_network.outputs[4] # train if debug_output: plot(stage2_frcn_network, os.path.join(cfg.OUTPUT_PATH, "graph_frcn_train_stage2b_frcn." + cfg["CNTK"].GRAPH_TYPE)) train_model(image_input, roi_input, dims_input, detection_losses, pred_error, frcn_lr_per_sample_scaled, mm_schedule, l2_reg_weight, frcn_epochs, cfg, rpn_rois_input=rpn_rois_input, buffered_rpn_proposals=buffered_proposals_s2) buffered_proposals_s2 = None # resetting config values to original test values cfg["TEST"].RPN_PRE_NMS_TOP_N = test_pre cfg["TEST"].RPN_POST_NMS_TOP_N = test_post return create_faster_rcnn_eval_model(stage2_frcn_network, image_input, dims_input, cfg, rpn_model=stage2_rpn_network)