Python Job примеры использования

Язык программирования: Python

Пространство имен/Пакет: digits.job

Класс/Тип: Job

Примеров на hotexamples.com: 10

Python Job - 10 примеров найдено. Это лучшие примеры Python кода для digits.job.Job, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

load(6)

Пример #1

Показать файл

Файл: create_generic_db.py Проект: antocapp/DIGITS

def create_generic_db(jobs_dir, dataset_id, stage):
    """
    Create a generic DB
    """

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, dataset_id)
    if not os.path.isdir(dataset_dir):
        raise IOError("Dataset dir %s does not exist" % dataset_dir)
    dataset = Job.load(dataset_dir)

    # create instance of extension
    extension_id = dataset.extension_id
    extension_class = extensions.data.get_extension(extension_id)
    extension = extension_class(**dataset.extension_userdata)
    # encoding
    feature_encoding = dataset.feature_encoding
    label_encoding = dataset.label_encoding

    batch_size = dataset.batch_size
    num_threads = dataset.num_threads

    force_same_shape = dataset.force_same_shape

    # create main DB creator object and execute main method
    db_creator = DbCreator()
    db_creator.create_db(extension, stage, dataset_dir, batch_size,
                         num_threads, feature_encoding, label_encoding,
                         force_same_shape)

    logger.info('Generic DB creation Done')

Пример #2

Показать файл

Файл: create_generic_db.py Проект: SevenGong/DIGITS

def create_generic_db(jobs_dir, dataset_id, stage):
    """
    Create a generic DB
    """

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, dataset_id)
    if not os.path.isdir(dataset_dir):
        raise IOError("Dataset dir %s does not exist" % dataset_dir)
    dataset = Job.load(dataset_dir)

    # create instance of extension
    extension_id = dataset.extension_id
    extension_class = extensions.data.get_extension(extension_id)
    extension = extension_class(**dataset.extension_userdata)

    # encoding
    feature_encoding = dataset.feature_encoding
    label_encoding = dataset.label_encoding

    batch_size = dataset.batch_size
    num_threads = dataset.num_threads

    force_same_shape = dataset.force_same_shape

    # create main DB creator object and execute main method
    db_creator = DbCreator()
    db_creator.create_db(
        extension,
        stage,
        dataset_dir,
        batch_size,
        num_threads,
        feature_encoding,
        label_encoding,
        force_same_shape)

    logger.info('Generic DB creation Done')

Пример #3

Показать файл

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size,
          layers, gpu, input_is_db, resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset,
                                                 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []  # indices of samples within file list
    input_data = []  # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                if resize:
                    image = utils.image.resize_image(image,
                                                     height,
                                                     width,
                                                     channels=channels,
                                                     resize_mode=resize_mode)
                else:
                    image = utils.image.image_to_array(image,
                                                       channels=channels)
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    # perform inference
    visualizations = None
    predictions = []

    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" %
                             repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        outputs, visualizations = model.train_task().infer_one(
            input_data[0],
            snapshot_epoch=epoch,
            layers=layers,
            gpu=gpu,
            resize=resize)
    else:
        if layers != 'none':
            raise InferenceError(
                "Layer visualization is not supported for multiple inference")
        outputs = model.train_task().infer_many(input_data,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=resize)

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data=input_ids)
    db.create_dataset("input_data", data=input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations) > 0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Пример #4

Показать файл

def infer(jobs_dir, model_id, epoch, batch_size, gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    input_data = []  # sample data
    input_labels = []  # sample labels

    # load images from database
    feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB)
    feature_reader = DbReader(feature_db_path)

    label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB)
    label_reader = DbReader(label_db_path)

    embeddings = {'count': 0, 'images': None, 'zs': None}

    def aggregate(images, labels, attributes, embeddings):
        # perform inference
        outputs = model.train_task().infer_many(images,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=False)
        z_vectors = outputs['output'][:, :100]
        for image, label, z in zip(images, labels, z_vectors):
            if embeddings['images'] is None:
                embeddings['images'] = np.empty((N_EMBEDDINGS, ) + image.shape)
            if embeddings['zs'] is None:
                embeddings['zs'] = np.empty((N_EMBEDDINGS, ) + z.shape)
            if embeddings['count'] < N_EMBEDDINGS:
                embeddings['images'][embeddings['count']] = image
                embeddings['zs'][embeddings['count']] = z
                embeddings['count'] += 1
                if embeddings['count'] == N_EMBEDDINGS:
                    save_embeddings(embeddings)

            for attribute in range(attributes['n_attributes']):
                if label[attribute] > 0:
                    attributes['positive_attribute_z'][attribute] += z
                    attributes['positive_count'][attribute] += 1
                else:
                    attributes['negative_attribute_z'][attribute] += z
                    attributes['negative_count'][attribute] += 1
        # save
        save_attributes(attributes)

    n_input_samples = 0
    label_len = None
    z_dim = 100
    for key, value in feature_reader.entries():
        img = parse_datum(value)
        label = parse_datum(label_reader.entry(key))[0]
        if label_len is None:
            label_len = len(label)
            attributes = {
                'n_attributes': label_len,
                'negative_count': np.zeros(label_len),
                'positive_count': np.zeros(label_len),
                'negative_attribute_z': np.zeros((label_len, z_dim)),
                'positive_attribute_z': np.zeros((label_len, z_dim)),
            }
        elif label_len != len(label):
            raise ValueError("label len differs: %d vs %d" %
                             (label_len, len(label)))
        input_data.append(img)
        input_labels.append(label)
        n_input_samples = n_input_samples + 1
        if n_input_samples % batch_size == 0:
            aggregate(input_data, input_labels, attributes, embeddings)
            print("######## %d processed ########" % n_input_samples)
            input_data = []  # sample data
            input_labels = []  # sample labels

    if n_input_samples % batch_size != 0:
        aggregate(input_data, input_labels, attributes, embeddings)
        print("######## %d processed ########" % n_input_samples)

Пример #5

Показать файл

Файл: inference.py Проект: klqulei/DIGITS

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu):

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(self.epoch))

    # retrieve image dimensions and resize mode
    if isinstance(dataset, ImageClassificationDatasetJob):
        height = dataset.image_dims[0]
        width = dataset.image_dims[1]
        channels = dataset.image_dims[2]
        resize_mode = dataset.resize_mode
    elif isinstance(dataset, GenericImageDatasetJob):
        db_task = dataset.analyze_db_tasks()[0]
        height = db_task.image_height
        width = db_task.image_width
        channels = db_task.image_channels
        resize_mode = 'squash'
    else:
        raise InferenceError("Unknown dataset type")

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())
            image = utils.image.resize_image(image,
                        height, width,
                        channels    = channels,
                        resize_mode = resize_mode,
                        )
            input_ids.append(idx)
            input_data.append(image)
            n_input_samples = n_input_samples + 1
        except utils.errors.LoadImageError as e:
            print e

    # perform inference
    visualizations = None
    predictions = []

    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" % repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu)
    else:
        assert layers == 'none'
        outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu)

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data = input_ids)
    db.create_dataset("input_data", data = input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations)>0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Пример #6

Показать файл

Файл: inference.py Проект: cepiross/synthetic_img_gen

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size,
          layers, gpu, input_is_db, label_file, resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset,
                                                 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []  # indices of samples within file list
    input_data = []  # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                if resize:
                    image = utils.image.resize_image(image,
                                                     height,
                                                     width,
                                                     channels=channels,
                                                     resize_mode=resize_mode)
                else:
                    image = utils.image.image_to_array(image,
                                                       channels=channels)
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    labels = np.loadtxt(label_file, dtype='object')
    if fig is not None:
        # Plot original images to grid
        for row in range(NUM_ROWS):
            for col in range(NUM_COLS):
                idx = row * NUM_COLS + col
                pl.subplot(NUM_ROWS * 2, NUM_COLS,
                           row * 2 * NUM_COLS + col + 1)
                pl.xticks([])
                pl.yticks([])
                pl.imshow(input_data[idx], interpolation='nearest')

    # perform inference
    visualizations = None

    logger.info('Inference')
    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" %
                             repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        logger.info('Start')
        outputs, visualizations = model.train_task().infer_one(
            input_data[0],
            snapshot_epoch=epoch,
            layers=layers,
            gpu=gpu,
            resize=resize)
        logger.info('Done!')
    else:
        if layers != 'none':
            raise InferenceError(
                "Layer visualization is not supported for multiple inference")
        outputs = model.train_task().infer_many(input_data,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=resize)

    logger.info('Now it\'s time to pass results to write')

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data=input_ids)
    db.create_dataset("input_data", data=input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        if fig is not None:
            # Plot top-K inferences on grids
            for elem_id, elem_data in enumerate(output_data):
                row = elem_id // NUM_COLS
                col = elem_id % NUM_COLS
                img_labels = sorted(zip(elem_data, labels),
                                    key=lambda x: x[0])[-NUM_TOPK_CLASSES:]
                ax = pl.subplot(NUM_ROWS * 2,
                                NUM_COLS, (row * 2 + 1) * NUM_COLS + col + 1,
                                aspect='equal')
                ax.yaxis.set_label_position("right")
                ax.yaxis.set_label_coords(1.25, 0.5)
                pl.ylabel('Confidence score', rotation=-90, fontsize=16)

                height = 0.5
                ylocs = np.array(range(NUM_TOPK_CLASSES)) * height + 0.1
                width = max(ylocs)
                top_class = img_labels[-1][1]
                pl.barh(ylocs, [l[0]*width for l in img_labels], height=height, \
                        color=['r' if l[1] == top_class else 'b' for l in img_labels]) #color=['r' if l[1] == labels[true_label] else 'b' for l in img_labels])
                pl.yticks(ylocs + height / 2, [l[1] for l in img_labels],
                          fontsize=14)
                pl.xticks([0, width / 2.0, width], ['0%', '50%', '100%'])
                pl.ylim(0, ylocs[-1] + height + 0.1)
            pl.tight_layout()
            pl.show()
            fig.savefig('./test.pdf', dpi=300)
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations) > 0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Пример #7

Показать файл

Файл: inference.py Проект: Mandarin-Creative-Group/DIGITS

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db):

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1,2,0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:,:,0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:,:,[2,1,0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                image = utils.image.resize_image(image,
                            height, width,
                            channels    = channels,
                            resize_mode = resize_mode,
                            )
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    # perform inference
    visualizations = None
    predictions = []

    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" % repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu)
    else:
        assert layers == 'none'
        outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu)

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data = input_ids)
    db.create_dataset("input_data", data = input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations)>0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Пример #8

Показать файл

Файл: inference.py Проект: aichemzee/DIGITS

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu):

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(self.epoch))

    # retrieve image dimensions and resize mode
    if isinstance(dataset, ImageClassificationDatasetJob):
        height = dataset.image_dims[0]
        width = dataset.image_dims[1]
        channels = dataset.image_dims[2]
        resize_mode = dataset.resize_mode
    elif isinstance(dataset, GenericImageDatasetJob):
        db_task = dataset.analyze_db_tasks()[0]
        height = db_task.image_height
        width = db_task.image_width
        channels = db_task.image_channels
        resize_mode = 'squash'
    else:
        raise InferenceError("Unknown dataset type")

    # retrieve batch size (unless specified on command line)
    if batch_size is None:
        batch_size = task.get_test_batch_size()

    n_loaded_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    # create hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')
    db.create_group("outputs")

    # load paths from file
    paths = None
    with open(input_list) as infile:
        paths = infile.readlines()
    n_input_paths = len(paths)

    # load and resize images
    for idx, path in enumerate(paths):
        path = path.strip()
        try:
            image = utils.image.load_image(path.strip())
            image = utils.image.resize_image(image,
                        height, width,
                        channels    = channels,
                        resize_mode = resize_mode,
                        )
            input_ids.append(idx)
            input_data.append(image)
            n_loaded_samples = n_loaded_samples + 1
        except utils.errors.LoadImageError as e:
            print e
        # do we have a full batch, or have we reached the last item?
        if (not n_loaded_samples % batch_size) or (idx == n_input_paths - 1):
            # any item(s) left to save?
            if len(input_ids) > 0:
                # perform inference
                outputs, visualizations = infer_batch(model, input_data, epoch, layers, gpu)
                # save visualizations
                if visualizations is not None and len(visualizations)>0:
                    save_visualizations(db, visualizations)
                # save other data
                save_data(db, n_input_paths, n_loaded_samples, input_ids, input_data, outputs)
                # empty input lists
                input_ids = []
                input_data = []
                logger.info('Processed %d/%d images', idx+1, n_input_paths)

    if n_loaded_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" % repr(input_list))

    db.close()
    logger.info('Saved data to %s', db_path)

Пример #9

Показать файл

Файл: inference_agneev.py Проект: agneevguin/Scania_Thesis

def infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu,
          input_is_db,
          resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))
    
    # Set color dataset
    kwargs = {'colormap': 'dataset'}
    vis = Visualization(dataset, **kwargs)
    
    # Delete existing png segmented images
    for filename in glob.glob("/home/scania/Scania/Agneev/Tmp/*"):
        os.remove(filename) 

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data
    input_filename = []

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        try:
            if input_list.endswith('.h264') or input_list.endswith('.raw'):
                logging.info('Reading video...')
                ## http://stackoverflow.com/questions/33650974/opencv-python-read-specific-frame-using-videocapture
                cap = cv2.VideoCapture(input_list) #'/home/scania/Scania/Glantan_Recordings/2017-03-24_DrivePX2/dw_20170324_115921_0.000000_0.000000/video_front.h264')
                print cap
                frame_no = 0
                while frame_no < sys.maxint:
                    cap.set(1,frame_no);
                    ret, cv2_im = cap.read()
                    #if not ret:
                    #    break
                    cv2_im = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)
                    image = PIL.Image.fromarray(cv2_im)
                    # print image
                    if resize:
                        image = utils.image.resize_image(
                            image,
                            height,
                            width,
                            channels=channels,
                            resize_mode=resize_mode)
                    else:
                        image = utils.image.image_to_array(
                            image,
                            channels=channels)
                    # single image inference
                    outputs, visualizations = model.train_task().infer_one(
                        image,
                        snapshot_epoch=epoch,
                        layers=layers,
                        gpu=gpu,
                        resize=resize)

                    out = dict([outputs.items()][0])
                    out['score'] = out.items()[0][1][0]
                    vis.process_data(n_input_samples, image, out, 'Video_file')
                    n_input_samples = n_input_samples + 1
                    frame_no = frame_no + 30

            elif input_list.endswith('.txt'):
    
                logging.info('Reading images...')
                with open(input_list) as infile:
                    paths = infile.readlines()
                # load and resize images
                for idx, path in enumerate(paths):
                    path = path.strip()
                    try:
                        image = utils.image.load_image(path.strip())
                        if resize:
                            image = utils.image.resize_image(
                                image,
                                height,
                                width,
                                channels=channels,
                                resize_mode=resize_mode)
                        else:
                            image = utils.image.image_to_array(
                                image,
                                channels=channels)

                        # single image inference
                        outputs, visualizations = model.train_task().infer_one(
                            image,
                            snapshot_epoch=epoch,
                            layers=layers,
                            gpu=gpu,
                            resize=resize)

                        # Find filename
                        head, tail = os.path.split(path)
                        filename = tail.split('.')[0]
                        out = dict([outputs.items()][0])
                        out['score'] = out.items()[0][1][0]
                        vis.process_data(n_input_samples, image, out, filename)
                        n_input_samples = n_input_samples + 1

                    except utils.errors.LoadImageError as e:
                        print e
            else:
                print 'Cannot read image or video file. \nPlease provide .h264, .raw or .txt file only.'
        except cv2.error as e:
            print e

Пример #10

Показать файл

Файл: gan_features.py Проект: Dasona/DIGITS

def infer(jobs_dir,
          model_id,
          epoch,
          batch_size,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    input_data = []      # sample data
    input_labels = []    # sample labels

    # load images from database
    feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB)
    feature_reader = DbReader(feature_db_path)

    label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB)
    label_reader = DbReader(label_db_path)

    embeddings = {'count': 0, 'images': None, 'zs': None}

    def aggregate(images, labels, attributes, embeddings):
        # perform inference
        outputs = model.train_task().infer_many(
            images,
            snapshot_epoch=epoch,
            gpu=gpu,
            resize=False)
        z_vectors = outputs['output'][:, :100]
        for image, label, z in zip(images, labels, z_vectors):
            if embeddings['images'] is None:
                embeddings['images'] = np.empty((N_EMBEDDINGS,) + image.shape)
            if embeddings['zs'] is None:
                embeddings['zs'] = np.empty((N_EMBEDDINGS,) + z.shape)
            if embeddings['count'] < N_EMBEDDINGS:
                embeddings['images'][embeddings['count']] = image
                embeddings['zs'][embeddings['count']] = z
                embeddings['count'] += 1
                if embeddings['count'] == N_EMBEDDINGS:
                    save_embeddings(embeddings)

            for attribute in range(attributes['n_attributes']):
                if label[attribute] > 0:
                    attributes['positive_attribute_z'][attribute] += z
                    attributes['positive_count'][attribute] += 1
                else:
                    attributes['negative_attribute_z'][attribute] += z
                    attributes['negative_count'][attribute] += 1
        # save
        save_attributes(attributes)

    n_input_samples = 0
    label_len = None
    z_dim = 100
    for key, value in feature_reader.entries():
        img = parse_datum(value)
        label = parse_datum(label_reader.entry(key))[0]
        if label_len is None:
            label_len = len(label)
            attributes = {
                'n_attributes': label_len,
                'negative_count': np.zeros(label_len),
                'positive_count': np.zeros(label_len),
                'negative_attribute_z': np.zeros((label_len, z_dim)),
                'positive_attribute_z': np.zeros((label_len, z_dim)),
            }
        elif label_len != len(label):
            raise ValueError("label len differs: %d vs %d" % (label_len, len(label)))
        input_data.append(img)
        input_labels.append(label)
        n_input_samples = n_input_samples + 1
        if n_input_samples % batch_size == 0:
            aggregate(input_data, input_labels, attributes, embeddings)
            print("######## %d processed ########" % n_input_samples)
            input_data = []      # sample data
            input_labels = []    # sample labels

    if n_input_samples % batch_size != 0:
        aggregate(input_data, input_labels, attributes, embeddings)
        print("######## %d processed ########" % n_input_samples)