Python SSDModel примеры использования

Язык программирования: Python

Пространство имен/Пакет: model

Класс/Тип: SSDModel

Примеров на hotexamples.com: 8

Python SSDModel - 8 примеров найдено. Это лучшие примеры Python кода для model.SSDModel, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

SSDModel(8)

broadcast_params(1)

infer(1)

init_model(1)

load_parameters(1)

load_pretrain_backbone(1)

print_params_stats(1)

train_step(1)

train_val(1)

zero_grads(1)

Пример #1

Показать файл

def generate_output(mode):
    """
	Generate annotated images, videos, or sample images, based on mode
	"""
    # First, load mapping from integer class ID to sign name string
    sign_map = {}
    with open('signnames.csv', 'r') as f:
        for line in f:
            line = line[:-1]  # strip newline at the end
            sign_id, sign_name = line.split(',')
            sign_map[int(sign_id)] = sign_name
    sign_map[0] = 'background'  # class ID 0 reserved for background class

    # Launch the graph
    path = 'model/model.ckpt'
    with tf.Graph().as_default(), tf.Session() as sess:
        # "Instantiate" neural network, get relevant tensors
        model = SSDModel()

        # Load trained model
        saver = tf.train.Saver()
        print('Restoring previously trained model at %s' % path)
        saver.restore(sess, path)
        image_orig = cv2.imread('test.jpg', cv2.IMREAD_COLOR)
        t = time.time()
        image_orig = cv2.resize(
            image_orig,
            (int(image_orig.shape[1] / 2), int(image_orig.shape[0] / 2)))
        image = run_inference(image_orig, model, sess, mode, sign_map)
        print(image.shape)
        print(time.time() - t)
        show(image)

Пример #2

Показать файл

Файл: train.py Проект: lcssos/ssd_tensorflow_traffic_sign_detection

def run_training():
    """
	Load training and test data
	Run training process
	Plot train/validation losses
	Report test loss
	Save model
	"""
    # Load training and test data
    with open('data_prep_%sx%s.p' % (IMG_W, IMG_H), mode='rb') as f:
        train = pickle.load(f)
    # with open('test.p', mode='rb') as f:
    #	test = pickle.load(f)

    # Format the data
    X_train = []
    y_train_conf = []
    y_train_loc = []
    for image_file in train.keys():
        X_train.append(image_file)
        y_train_conf.append(train[image_file]['y_true_conf'])
        y_train_loc.append(train[image_file]['y_true_loc'])
    X_train = np.array(X_train)
    y_train_conf = np.array(y_train_conf)
    y_train_loc = np.array(y_train_loc)

    # Train/validation split
    X_train, X_valid, y_train_conf, y_valid_conf, y_train_loc, y_valid_loc = train_test_split( \
        X_train, y_train_conf, y_train_loc, test_size=VALIDATION_SIZE, random_state=1)

    # Launch the graph
    with tf.Graph().as_default(), tf.Session() as sess:
        # "Instantiate" neural network, get relevant tensors
        model = SSDModel()
        x = model['x']
        y_true_conf = model['y_true_conf']
        y_true_loc = model['y_true_loc']
        conf_loss_mask = model['conf_loss_mask']
        is_training = model['is_training']
        optimizer = model['optimizer']
        reported_loss = model['loss']

        # Training process
        # TF saver to save/restore trained model
        saver = tf.train.Saver()

        if RESUME:
            print('Restoring previously trained model at %s' % MODEL_SAVE_PATH)
            saver.restore(sess, MODEL_SAVE_PATH)

            # Restore previous loss history
            with open('loss_history.p', 'rb') as f:
                loss_history = pickle.load(f)
        else:
            print('Training model from scratch')
            # Variable initialization
            sess.run(tf.global_variables_initializer())

            # For book-keeping, keep track of training and validation loss over epochs, like such:
            # [(train_acc_epoch1, valid_acc_epoch1), (train_acc_epoch2, valid_acc_epoch2), ...]
            loss_history = []

        # Record time elapsed for performance check
        last_time = time.time()
        train_start_time = time.time()

        # Run NUM_EPOCH epochs of training
        for epoch in range(NUM_EPOCH):
            train_gen = next_batch(X_train, y_train_conf, y_train_loc,
                                   BATCH_SIZE)
            num_batches_train = math.ceil(X_train.shape[0] / BATCH_SIZE)
            losses = []  # list of loss values for book-keeping

            # Run training on each batch
            for _ in range(num_batches_train):
                # Obtain the training data and labels from generator
                images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(
                    train_gen)

                # Perform gradient update (i.e. training step) on current batch
                _, loss = sess.run(
                    [optimizer, reported_loss],
                    feed_dict={
                        # _, loss, loc_loss_dbg, loc_loss_mask, loc_loss = sess.run([optimizer, reported_loss, model['loc_loss_dbg'], model['loc_loss_mask'], model['loc_loss']],feed_dict={  # DEBUG
                        x: images,
                        y_true_conf: y_true_conf_gen,
                        y_true_loc: y_true_loc_gen,
                        conf_loss_mask: conf_loss_mask_gen,
                        is_training: True
                    })

                losses.append(
                    loss)  # TODO: Need mAP metric instead of raw loss

            # A rough estimate of loss for this epoch (overweights the last batch)
            train_loss = np.mean(losses)

            # Calculate validation loss at the end of the epoch
            valid_gen = next_batch(X_valid, y_valid_conf, y_valid_loc,
                                   BATCH_SIZE)
            num_batches_valid = math.ceil(X_valid.shape[0] / BATCH_SIZE)
            losses = []
            for _ in range(num_batches_valid):
                images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(
                    valid_gen)

                # Perform forward pass and calculate loss
                loss = sess.run(reported_loss,
                                feed_dict={
                                    x: images,
                                    y_true_conf: y_true_conf_gen,
                                    y_true_loc: y_true_loc_gen,
                                    conf_loss_mask: conf_loss_mask_gen,
                                    is_training: False
                                })
                losses.append(loss)
            valid_loss = np.mean(losses)

            # Record and report train/validation/test losses for this epoch
            loss_history.append((train_loss, valid_loss))

            # Print accuracy every epoch
            print('Epoch %d -- Train loss: %.4f, Validation loss: %.4f, Elapsed time: %.2f sec' % \
                  (epoch + 1, train_loss, valid_loss, time.time() - last_time))
            last_time = time.time()

        total_time = time.time() - train_start_time
        print('Total elapsed time: %d min %d sec' %
              (total_time / 60, total_time % 60))

        test_loss = 0.  # TODO: Add test set
        '''
		# After training is complete, evaluate accuracy on test set
		print('Calculating test accuracy...')
		test_gen = next_batch(X_test, y_test, BATCH_SIZE)
		test_size = X_test.shape[0]
		test_acc = calculate_accuracy(test_gen, test_size, BATCH_SIZE, accuracy, x, y, keep_prob, sess)
		print('Test acc.: %.4f' % (test_acc,))
		'''

        if SAVE_MODEL:
            # Save model to disk
            save_path = saver.save(sess, MODEL_SAVE_PATH)
            print('Trained model saved at: %s' % save_path)

            # Also save accuracy history
            print('Loss history saved at loss_history.p')
            with open('loss_history.p', 'wb') as f:
                pickle.dump(loss_history, f)

    # Return final test accuracy and accuracy_history
    return test_loss, loss_history

Пример #3

Показать файл

Файл: train.py Проект: simran-modi/traffic-ssd

		y_valid_loc.append(ys_valid_loc)
     y_test_loc.append(ys_test_loc)   
	X_train = np.squeeze(np.array(X_train),axis=0)
	X_valid = np.squeeze(np.array(X_valid),axis=0)
    X_test = np.squeeze(np.array(X_test),axis=0)
	y_train_conf = np.squeeze(np.array(y_train_conf),axis=0)
	y_valid_conf = np.squeeze(np.array(y_valid_conf),axis=0)
    y_test_conf = np.squeeze(np.array(y_test_conf),axis=0)
	y_train_loc = np.squeeze(np.array(y_train_loc),axis=0)
	y_valid_loc = np.squeeze(np.array(y_valid_loc),axis=0)
    y_test_loc = np.squeeze(np.array(y_test_loc),axis=0)

	# Launch the graph
	with tf.Graph().as_default(), tf.Session() as sess:
		# "Instantiate" neural network, get relevant tensors
		model = SSDModel()
		x = model['x']
		y_true_conf = model['y_true_conf']
		y_true_loc = model['y_true_loc']
		conf_loss_mask = model['conf_loss_mask']
		is_training = model['is_training']
		optimizer = model['optimizer']
		reported_loss = model['loss']
		num_pos = model['num_pos']

		# Training process
		# TF saver to save/restore trained model
		saver = tf.train.Saver()

		if RESUME:
			print('Restoring previously trained model at %s' % MODEL_SAVE_PATH)

Пример #4

Показать файл

                    if top is not None and len(res) > 2 * top:
                        res = cut_top(res)

        return cut_top(res)

    def restore_rects(self, tensors, threshold=None, top=None):

        #print len(tensors)
        #print tensors.shape

        def cut_top(res):
            res = sorted(res, reverse=True, key=lambda val: val[0])
            if top is not None:
                res = res[:top]
            return res

        lr, tb, cls = tensors

        result = self._restore_rects(lr, tb, cls, self.model.num_poolings,
                                     threshold, top)
        if self.verbose:
            print result

        result = cut_top(result)
        return tuple(r[0] for r in result), tuple(r[1] for r in result)


if __name__ == '__main__':
    ssd_model = SSDModel()

Пример #5

Показать файл

Файл: inference.py Проект: lcssos/ssd_tensorflow_traffic_sign_detection

def generate_output(input_files, mode):
    """
	Generate annotated images, videos, or sample images, based on mode
	"""
    # First, load mapping from integer class ID to sign name string
    sign_map = {}
    with open('signnames.csv', 'r') as f:
        for line in f:
            line = line[:-1]  # strip newline at the end
            sign_id, sign_name = line.split(',')
            sign_map[int(sign_id)] = sign_name
    sign_map[0] = 'background'  # class ID 0 reserved for background class
    logging.info(sign_map)

    # Create output directory 'inference_out/' if needed
    if mode == 'image' or mode == 'video':
        if not os.path.isdir('./inference_out'):
            try:
                os.mkdir('./inference_out')
            except FileExistsError:
                print('Error: Cannot mkdir ./inference_out')
                return

    # Launch the graph
    with tf.Graph().as_default(), tf.Session() as sess:
        # "Instantiate" neural network, get relevant tensors
        model = SSDModel()
        # logging.info(model)

        # Load trained model
        saver = tf.train.Saver()
        logging.critical('开始加载已训练模型 %s' % MODEL_SAVE_PATH)
        saver.restore(sess, MODEL_SAVE_PATH)

        if mode == 'image':
            for image_file in input_files:
                print('Running inference on %s' % image_file)
                image_orig = np.asarray(Image.open(image_file))
                image = run_inference(image_orig, model, sess, mode, sign_map)

                head, tail = os.path.split(image_file)
                plt.imsave('./inference_out/%s' % tail, image)
            print('输出文件保存至 inference_out/ 目录')

        elif mode == 'video':
            for video_file in input_files:
                print('Running inference on %s' % video_file)
                video = VideoFileClip(video_file)
                video = video.fl_image(
                    lambda x: run_inference(x, model, sess, mode, sign_map))

                head, tail = os.path.split(video_file)
                video.write_videofile('./inference_out/%s' % tail, audio=False)
            print('Output saved in inference_out/')

        elif mode == 'demo':
            print('Demo mode: Running inference on images in sample_images/')
            image_files = os.listdir('sample_images/')

            print("-" * 30)
            for image_file in image_files:
                print('Running inference on sample_images/%s' % image_file)
                # image_orig = np.asarray(Image.open('sample_images/' + image_file))
                image_orig = Image.open('sample_images/' + image_file)
                image = run_inference(image_orig, model, sess, mode, sign_map)
                plt.imshow(image)
                plt.show()
                print("-" * 30)

        else:
            raise ValueError('Invalid mode: %s' % mode)

Пример #6

Показать файл

Файл: ssd_main.py Проект: shjwudp/training_results_v0.7

def main(async_executor=None):
    # Setup MLPerf logger
    mllog.config()
    mllogger = mllog.get_mllogger()
    mllogger.logger.propagate = False
    # Start MLPerf benchmark
    log_start(key=mlperf_constants.INIT_START, uniq=False)

    # Parse args
    args = parse_args()

    ############################################################################
    # Initialize various libraries (horovod, logger, amp ...)
    ############################################################################
    # Initialize async executor
    if args.async_val:
        assert async_executor is not None, 'Please use ssd_main_async.py to launch with async support'
    else:
        # (Force) disable async validation
        async_executor = None

    # Initialize horovod
    hvd.init()

    # Initialize AMP
    if args.precision == 'amp':
        amp.init(layout_optimization=True)

    # Set MXNET_SAFE_ACCUMULATION=1 if necessary
    if args.precision == 'fp16':
        os.environ["MXNET_SAFE_ACCUMULATION"] = "1"

    # Results folder
    network_name = f'ssd_{args.backbone}_{args.data_layout}_{args.dataset}_{args.data_shape}'
    save_prefix = None
    if args.results:
        save_prefix = os.path.join(args.results, network_name)
    else:
        logging.info(
            "No results folder was provided. The script will not write logs or save weight to disk"
        )

    # Initialize logger
    log_file = None
    if args.results:
        log_file = f'{save_prefix}_{args.mode}_{hvd.rank()}.log'
    setup_logger(level=args.log_level
                 if hvd.local_rank() in args.log_local_ranks else 'CRITICAL',
                 log_file=log_file)

    # Set seed
    args.seed = set_seed_distributed(args.seed)
    ############################################################################

    ############################################################################
    # Validate arguments and print some useful information
    ############################################################################
    logging.info(args)

    assert not (args.resume_from and args.pretrained_backbone), (
        "--resume-from and --pretrained_backbone are "
        "mutually exclusive.")
    assert args.data_shape == 300, "only data_shape=300 is supported at the moment."
    assert args.input_batch_multiplier >= 1, "input_batch_multiplier must be >= 1"
    assert not (hvd.size() == 1 and args.gradient_predivide_factor > 1), (
        "Gradient predivide factor is not supported "
        "with a single GPU")
    if args.data_layout == 'NCHW' or args.precision == 'fp32':
        assert args.bn_group == 1, "Group batch norm doesn't support FP32 data format or NCHW data layout."
        if not args.no_fuse_bn_relu:
            logging.warning((
                "WARNING: fused batch norm relu is only supported with NHWC layout. "
                "A non fused version will be forced."))
            args.no_fuse_bn_relu = True
        if not args.no_fuse_bn_add_relu:
            logging.warning((
                "WARNING: fused batch norm add relu is only supported with NHWC layout. "
                "A non fused version will be forced."))
            args.no_fuse_bn_add_relu = True
    if args.profile_no_horovod and hvd.size() > 1:
        logging.warning(
            "WARNING: hvd.size() > 1, so must IGNORE requested --profile-no-horovod"
        )
        args.profile_no_horovod = False

    logging.info(f'Seed: {args.seed}')
    logging.info(f'precision: {args.precision}')
    if args.precision == 'fp16':
        logging.info(f'loss scaling: {args.fp16_loss_scale}')
    logging.info(f'network name: {network_name}')
    logging.info(f'fuse bn relu: {not args.no_fuse_bn_relu}')
    logging.info(f'fuse bn add relu: {not args.no_fuse_bn_add_relu}')
    logging.info(f'bn group: {args.bn_group}')
    logging.info(f'bn all reduce fp16: {args.bn_fp16}')
    logging.info(f'MPI size: {hvd.size()}')
    logging.info(f'MPI global rank: {hvd.rank()}')
    logging.info(f'MPI local rank: {hvd.local_rank()}')
    logging.info(f'async validation: {args.async_val}')
    ############################################################################

    # TODO(ahmadki): load network and anchors based on args.backbone (JoC)
    # Load network
    net = ssd_300_resnet34_v1_mlperf_coco(
        pretrained_base=False,
        nms_overlap_thresh=args.nms_overlap_thresh,
        nms_topk=args.nms_topk,
        nms_valid_thresh=args.nms_valid_thresh,
        post_nms=args.post_nms,
        layout=args.data_layout,
        fuse_bn_add_relu=not args.no_fuse_bn_add_relu,
        fuse_bn_relu=not args.no_fuse_bn_relu,
        bn_fp16=args.bn_fp16,
        norm_kwargs={'bn_group': args.bn_group})

    # precomputed anchors
    anchors_np = mlperf_xywh_anchors(image_size=args.data_shape,
                                     clip=True,
                                     normalize=True)
    if args.test_anchors and hvd.rank() == 0:
        logging.info(f'Normalized anchors: {anchors_np}')

    # Training mode
    train_net = None
    train_pipeline = None
    trainer_fn = None
    lr_scheduler = None
    if args.mode in ['train', 'train_val']:
        # Training iterator
        num_cropping_iterations = 1
        if args.use_tfrecord:
            tfrecord_files = glob.glob(
                os.path.join(args.tfrecord_root, 'train.*.tfrecord'))
            index_files = glob.glob(
                os.path.join(args.tfrecord_root, 'train.*.idx'))
            tfrecords = [(tfrecod, index)
                         for tfrecod, index in zip(tfrecord_files, index_files)
                         ]
        train_pipeline = get_training_pipeline(
            coco_root=args.coco_root if not args.use_tfrecord else None,
            tfrecords=tfrecords if args.use_tfrecord else None,
            anchors=anchors_np,
            num_shards=hvd.size(),
            shard_id=hvd.rank(),
            device_id=hvd.local_rank(),
            batch_size=args.batch_size * args.input_batch_multiplier,
            dataset_size=args.dataset_size,
            data_layout=args.data_layout,
            data_shape=args.data_shape,
            num_cropping_iterations=num_cropping_iterations,
            num_workers=args.dali_workers,
            fp16=args.precision == 'fp16',
            input_jpg_decode=args.input_jpg_decode,
            hw_decoder_load=args.hw_decoder_load,
            decoder_cache_size=min(
                (100 * 1024 + hvd.size() - 1) // hvd.size(), 12 *
                1024) if args.input_jpg_decode == 'cache' else 0,
            seed=args.seed)
        log_event(key=mlperf_constants.TRAIN_SAMPLES,
                  value=train_pipeline.epoch_size)
        log_event(key=mlperf_constants.MAX_SAMPLES,
                  value=num_cropping_iterations)

        # Training network
        train_net = SSDMultiBoxLoss(net=net,
                                    local_batch_size=args.batch_size,
                                    bulk_last_wgrad=args.bulk_last_wgrad)

        # Trainer function. SSDModel expects a function that takes 1 parameter - HybridBlock
        trainer_fn = functools.partial(
            sgd_trainer,
            learning_rate=args.lr,
            weight_decay=args.weight_decay,
            momentum=args.momentum,
            precision=args.precision,
            fp16_loss_scale=args.fp16_loss_scale,
            gradient_predivide_factor=args.gradient_predivide_factor,
            num_groups=args.horovod_num_groups,
            profile_no_horovod=args.profile_no_horovod)

        # Learning rate scheduler
        lr_scheduler = MLPerfLearningRateScheduler(
            learning_rate=args.lr,
            decay_factor=args.lr_decay_factor,
            decay_epochs=args.lr_decay_epochs,
            warmup_factor=args.lr_warmup_factor,
            warmup_epochs=args.lr_warmup_epochs,
            epoch_size=train_pipeline.epoch_size,
            global_batch_size=args.batch_size * hvd.size())

    # Validation mode
    infer_net = None
    val_iterator = None
    if args.mode in ['infer', 'val', 'train_val']:
        # Validation iterator
        tfrecord_files = glob.glob(
            os.path.join(args.tfrecord_root, 'val.*.tfrecord'))
        index_files = glob.glob(os.path.join(args.tfrecord_root, 'val.*.idx'))
        tfrecords = [(tfrecod, index)
                     for tfrecod, index in zip(tfrecord_files, index_files)]
        val_pipeline = get_inference_pipeline(
            coco_root=args.coco_root if not args.use_tfrecord else None,
            tfrecords=tfrecords if args.use_tfrecord else None,
            num_shards=hvd.size(),
            shard_id=hvd.rank(),
            device_id=hvd.local_rank(),
            batch_size=args.eval_batch_size,
            dataset_size=args.eval_dataset_size,
            data_layout=args.data_layout,
            data_shape=args.data_shape,
            num_workers=args.dali_workers,
            fp16=args.precision == 'fp16')
        log_event(key=mlperf_constants.EVAL_SAMPLES,
                  value=val_pipeline.epoch_size)

        # Inference network
        infer_net = COCOInference(net=net,
                                  ltrb=False,
                                  scale_bboxes=True,
                                  score_threshold=0.0)

        # annotations file
        cocoapi_annotation_file = os.path.join(
            args.coco_root, 'annotations', 'bbox_only_instances_val2017.json')

    # Prepare model
    model = SSDModel(net=net,
                     anchors_np=anchors_np,
                     precision=args.precision,
                     fp16_loss_scale=args.fp16_loss_scale,
                     train_net=train_net,
                     trainer_fn=trainer_fn,
                     lr_scheduler=lr_scheduler,
                     metric=mx.metric.Loss(),
                     infer_net=infer_net,
                     async_executor=async_executor,
                     save_prefix=save_prefix,
                     ctx=mx.gpu(hvd.local_rank()))

    # Do a training and validation runs on fake data.
    # this will set layers shape (needed before loading pre-trained backbone),
    # allocate tensors and and cache optimized graph.
    # Training dry run:
    logging.info('Running training dry runs')
    dummy_train_pipeline = get_training_pipeline(
        coco_root=None,
        tfrecords=[('dummy.tfrecord', 'dummy.idx')],
        anchors=anchors_np,
        num_shards=1,
        shard_id=0,
        device_id=hvd.local_rank(),
        batch_size=args.batch_size * args.input_batch_multiplier,
        dataset_size=None,
        data_layout=args.data_layout,
        data_shape=args.data_shape,
        num_workers=args.dali_workers,
        fp16=args.precision == 'fp16',
        seed=args.seed)
    dummy_train_iterator = get_training_iterator(pipeline=dummy_train_pipeline,
                                                 batch_size=args.batch_size)
    for images, box_targets, cls_targets in dummy_train_iterator:
        model.train_step(images=images,
                         box_targets=box_targets,
                         cls_targets=cls_targets)
    # Freeing memory is disabled due a bug in CUDA graphs
    # del dummy_train_pipeline
    # del dummy_train_iterator
    mx.ndarray.waitall()
    logging.info('Done')
    # Validation dry run:
    logging.info('Running inference dry runs')
    dummy_val_pipeline = get_inference_pipeline(
        coco_root=None,
        tfrecords=[('dummy.tfrecord', 'dummy.idx')],
        num_shards=1,
        shard_id=0,
        device_id=hvd.local_rank(),
        batch_size=args.eval_batch_size,
        dataset_size=None,
        data_layout=args.data_layout,
        data_shape=args.data_shape,
        num_workers=args.dali_workers,
        fp16=args.precision == 'fp16')
    dummy_val_iterator = get_inference_iterator(pipeline=dummy_val_pipeline)
    model.infer(data_iterator=dummy_val_iterator, log_interval=None)
    # Freeing memory is disabled due a bug in CUDA graphs
    # del dummy_val_pipeline
    # del dummy_val_iterator
    mx.ndarray.waitall()
    logging.info('Done')

    # re-initialize the model as a precaution in case the dry runs changed the parameters
    model.init_model(force_reinit=True)
    model.zero_grads()
    mx.ndarray.waitall()

    # load saved model or pretrained backbone
    if args.resume_from:
        model.load_parameters(filename=args.resume_from)
    elif args.pretrained_backbone:
        model.load_pretrain_backbone(picklefile_name=args.pretrained_backbone)

    # broadcast parameters
    model.broadcast_params()
    mx.ndarray.waitall()

    if args.test_initialization and hvd.rank() == 0:
        model.print_params_stats(net)

    log_end(key=mlperf_constants.INIT_STOP)

    # Main MLPerf loop (training+validation)
    mpiwrapper.barrier()
    log_start(key=mlperf_constants.RUN_START)
    mpiwrapper.barrier()
    # Real data iterators
    train_iterator = None
    val_iterator = None
    if train_pipeline:
        train_iterator = get_training_iterator(pipeline=train_pipeline,
                                               batch_size=args.batch_size,
                                               synthetic=args.synthetic)
    if val_pipeline:
        val_iterator = get_inference_iterator(pipeline=val_pipeline)
    model_map, epoch = model.train_val(train_iterator=train_iterator,
                                       start_epoch=args.start_epoch,
                                       end_epoch=args.epochs,
                                       val_iterator=val_iterator,
                                       val_interval=args.val_interval,
                                       val_epochs=args.val_epochs,
                                       annotation_file=cocoapi_annotation_file,
                                       target_map=args.target_map,
                                       train_log_interval=args.log_interval,
                                       val_log_interval=args.log_interval,
                                       save_interval=args.save_interval,
                                       cocoapi_threads=args.cocoapi_threads,
                                       profile_start=args.profile_start,
                                       profile_stop=args.profile_stop)
    status = 'success' if (model_map
                           and model_map >= args.target_map) else 'aborted'
    mx.ndarray.waitall()
    log_end(key=mlperf_constants.RUN_STOP, metadata={"status": status})

    logging.info(f'Rank {hvd.rank()} done. map={model_map} @ epoch={epoch}')
    mx.nd.waitall()
    hvd.shutdown()

Пример #7

Показать файл

Файл: train.py Проект: xiaolaozai/ssd_vehicle_detection

def run_training():
    """
	Load training and test data
	Run training process
	Plot train/validation losses
	Report test loss
	Save model
	"""
    # Load training data - recall training data could be chunked
    # Training data in dict train, which is a merge of data_prep_*.p
    data_prep_list = []
    data_prep = {}
    for dp_file in glob.glob('data_prep_%sx%s__*.p' % (IMG_W, IMG_H)):
        with open(dp_file, mode='rb') as f:
            dp = pickle.load(f)
            data_prep_list.append(dp)

    for dp in data_prep_list:
        data_prep = {**data_prep, **dp}

    # Manually do the train/validation split (sklearn train_test_split runs out of memory)
    train = {}
    valid = {}
    num_valid = int(len(data_prep.keys()) * VALIDATION_SIZE)
    random_keys = list(data_prep.keys())
    random.shuffle(random_keys)  # random.shuffle() shuffles list *in place*
    for i, k in enumerate(random_keys):
        if i < num_valid:
            valid[k] = data_prep[k]
        else:
            train[k] = data_prep[k]

    # Format the data, for both train and validation data
    X_train = []
    y_train_conf = []
    y_train_loc = []
    for image_file in train.keys():
        X_train.append(image_file)
        y_train_conf.append(train[image_file]['y_true_conf'])
        y_train_loc.append(train[image_file]['y_true_loc'])
    X_train = np.array(X_train)
    y_train_conf = np.array(y_train_conf)
    y_train_loc = np.array(y_train_loc)

    X_valid = []
    y_valid_conf = []
    y_valid_loc = []
    for image_file in valid.keys():
        X_valid.append(image_file)
        y_valid_conf.append(valid[image_file]['y_true_conf'])
        y_valid_loc.append(valid[image_file]['y_true_loc'])
    X_valid = np.array(X_valid)
    y_valid_conf = np.array(y_valid_conf)
    y_valid_loc = np.array(y_valid_loc)

    # Launch the graph
    with tf.Graph().as_default(), tf.Session() as sess:
        # "Instantiate" neural network, get relevant tensors
        model = SSDModel()
        x = model['x']
        y_true_conf = model['y_true_conf']
        y_true_loc = model['y_true_loc']
        conf_loss_mask = model['conf_loss_mask']
        is_training = model['is_training']
        optimizer = model['optimizer']
        reported_loss = model['loss']

        # Training process
        # TF saver to save/restore trained model
        saver = tf.train.Saver()

        if RESUME:
            print('Restoring previously trained model at %s' % MODEL_SAVE_PATH)
            saver.restore(sess, MODEL_SAVE_PATH)

            # Restore previous loss history
            with open('loss_history.p', 'rb') as f:
                loss_history = pickle.load(f)
        else:
            print('Training model from scratch')
            # Variable initialization
            sess.run(tf.global_variables_initializer())

            # For book-keeping, keep track of training and validation loss over epochs, like such:
            # [(train_acc_epoch1, valid_acc_epoch1), (train_acc_epoch2, valid_acc_epoch2), ...]
            loss_history = []

        # Record time elapsed for performance check
        last_time = time.time()
        train_start_time = time.time()

        # Run NUM_EPOCH epochs of training
        for epoch in range(NUM_EPOCH):
            train_gen = next_batch(X_train, y_train_conf, y_train_loc,
                                   BATCH_SIZE)
            num_batches_train = math.ceil(X_train.shape[0] / BATCH_SIZE)
            losses = []  # list of loss values for book-keeping

            # Run training on each batch
            for _ in range(num_batches_train):
                # Obtain the training data and labels from generator
                images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(
                    train_gen)

                # Perform gradient update (i.e. training step) on current batch
                _, loss = sess.run(
                    [optimizer, reported_loss],
                    feed_dict={
                        x: images,
                        y_true_conf: y_true_conf_gen,
                        y_true_loc: y_true_loc_gen,
                        conf_loss_mask: conf_loss_mask_gen,
                        is_training: True
                    })

                losses.append(
                    loss)  # TODO: Need mAP metric instead of raw loss

            # A rough estimate of loss for this epoch (overweights the last batch)
            train_loss = np.mean(losses)

            # Calculate validation loss at the end of the epoch
            valid_gen = next_batch(X_valid, y_valid_conf, y_valid_loc,
                                   BATCH_SIZE)
            num_batches_valid = math.ceil(X_valid.shape[0] / BATCH_SIZE)
            losses = []
            for _ in range(num_batches_valid):
                images, y_true_conf_gen, y_true_loc_gen, conf_loss_mask_gen = next(
                    valid_gen)

                # Perform forward pass and calculate loss
                loss = sess.run(reported_loss,
                                feed_dict={
                                    x: images,
                                    y_true_conf: y_true_conf_gen,
                                    y_true_loc: y_true_loc_gen,
                                    conf_loss_mask: conf_loss_mask_gen,
                                    is_training: False
                                })
                losses.append(loss)
            valid_loss = np.mean(losses)

            # Record and report train/validation/test losses for this epoch
            loss_history.append((train_loss, valid_loss))

            # Print accuracy every epoch
            print('Epoch %d -- Train loss: %.4f, Validation loss: %.4f, Elapsed time: %.2f sec' %\
             (epoch+1, train_loss, valid_loss, time.time() - last_time))
            last_time = time.time()

            if SAVE_MODEL and SAVE_MODEL_EVERY_EPOCH:
                _ = saver.save(sess, MODEL_SAVE_PATH)

        total_time = time.time() - train_start_time
        print('Total elapsed time: %d min %d sec' %
              (total_time / 60, total_time % 60))

        test_loss = 0.  # TODO: Add test set

        if SAVE_MODEL:
            # Save model to disk
            save_path = saver.save(sess, MODEL_SAVE_PATH)
            print('Trained model saved at: %s' % save_path)

            # Also save accuracy history
            print('Loss history saved at loss_history.p')
            with open('loss_history.p', 'wb') as f:
                pickle.dump(loss_history, f)

    # Return final test accuracy and accuracy_history
    return test_loss, loss_history

Пример #8

Показать файл

Файл: viz_model.py Проект: 123456pop00/Traffic_sign_Test

'''
Visualize the model using TensorBoard
'''
import tensorflow as tf
from settings import *
from model import SSDModel

FM_ONLY = False  # Only want to see feature map sizes?

with tf.Graph().as_default(), tf.Session() as sess:
    if FM_ONLY:
        # Only want to see feature map sizes (e.g. loss function and vector concatenation not yet set up)
        if MODEL == 'AlexNet':
            from model import AlexNet as MyModel
        else:
            raise NotImplementedError('Model %s not supported' % MODEL)
        _ = MyModel()
    else:
        # This includes the entire graph, e.g. loss function, optimizer, etc.
        _ = SSDModel()

    tf.summary.merge_all()
    writer = tf.summary.FileWriter('./tensorboard_out', sess.graph)
    tf.global_variables_initializer().run()