示例#1
0
    def path_to_datum(self, path, label,
            image_sum = None):
        """
        Creates a Datum from a path and a label
        May also update image_sum, if computing mean

        Arguments:
        path -- path to the image (filesystem path or URL)
        label -- numeric label for this image's category

        Keyword arguments:
        image_sum -- numpy array that stores a running sum of added images
        """
        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and self.image_folder and not os.path.isabs(path):
            path = os.path.join(self.image_folder, path)

        image = utils.image.load_image(path)
        image = utils.image.resize_image(image,
                self.height, self.width,
                channels    = self.channels,
                resize_mode = self.resize_mode,
                )

        if self.compute_mean and image_sum is not None:
            image_sum += image

        if not self.encoding or self.encoding == 'none':
            # Transform to caffe's format requirements
            if image.ndim == 3:
                # Transpose to (channels, height, width)
                image = image.transpose((2,0,1))
                if image.shape[0] == 3:
                    # channel swap
                    # XXX see issue #59
                    image = image[[2,1,0],...]
            elif image.ndim == 2:
                # Add a channels axis
                image = image[np.newaxis,:,:]
            else:
                raise Exception('Image has unrecognized shape: "%s"' % image.shape)
            datum = caffe.io.array_to_datum(image, label)
        else:
            datum = caffe_pb2.Datum()
            if image.ndim == 3:
                datum.channels = image.shape[2]
            else:
                datum.channels = 1
            datum.height = image.shape[0]
            datum.width = image.shape[1]
            datum.label = label

            s = StringIO()
            if self.encoding == 'png':
                PIL.Image.fromarray(image).save(s, format='PNG')
            elif self.encoding == 'jpg':
                PIL.Image.fromarray(image).save(s, format='JPEG', quality=90)
            datum.data = s.getvalue()
            datum.encoded = True
        return datum
示例#2
0
 def validate_folder_path(form, field):
     if not field.data:
         pass
     elif utils.is_url(field.data):
         # make sure the URL exists
         try:
             r = requests.get(field.data,
                              allow_redirects=False,
                              timeout=utils.HTTP_TIMEOUT)
             if r.status_code not in [
                     requests.codes.ok, requests.codes.moved,
                     requests.codes.found
             ]:
                 raise validators.ValidationError('URL not found')
         except Exception as e:
             raise validators.ValidationError(
                 'Caught %s while checking URL: %s' % (type(e).__name__, e))
         else:
             return True
     else:
         # make sure the filesystem path exists
         if not os.path.exists(field.data) or not os.path.isdir(field.data):
             raise validators.ValidationError('Folder does not exist')
         else:
             return True
示例#3
0
def _load_thread(load_queue,
                 write_queue,
                 summary_queue,
                 image_width,
                 image_height,
                 image_channels,
                 resize_mode,
                 image_folder,
                 compute_mean,
                 backend=None,
                 encoding=None):
    """
    Consumes items in load_queue
    Produces items to write_queue
    Stores cumulative results in summary_queue
    """
    images_added = 0
    if compute_mean:
        image_sum = _initial_image_sum(image_width, image_height,
                                       image_channels)
    else:
        image_sum = None

    while not load_queue.empty():
        try:
            path, label = load_queue.get(True, 0.05)
        except Queue.Empty:
            continue

        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)

        try:
            image = utils.image.load_image(path)
        except utils.errors.LoadImageError as e:
            logger.warning('[%s %s] %s: %s' %
                           (path, label, type(e).__name__, e))
            continue

        image = utils.image.resize_image(
            image,
            image_height,
            image_width,
            channels=image_channels,
            resize_mode=resize_mode,
        )

        if compute_mean:
            image_sum += image

        if backend == 'lmdb':
            datum = _array_to_datum(image, label, encoding)
            write_queue.put(datum)
        else:
            write_queue.put((image, label))

        images_added += 1

    summary_queue.put((images_added, image_sum))
示例#4
0
    def path_to_datum(self, path, label, image_sum=None):
        """
        Creates a Datum from a path and a label
        May also update image_sum, if computing mean

        Arguments:
        path -- path to the image (filesystem path or URL)
        label -- numeric label for this image's category

        Keyword arguments:
        image_sum -- numpy array that stores a running sum of added images
        """
        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and self.image_folder and not os.path.isabs(
                path):
            path = os.path.join(self.image_folder, path)

        image = utils.image.load_image(path)
        if image is None:
            return None

        # Resize
        image = utils.image.resize_image(
            image,
            self.height,
            self.width,
            channels=self.channels,
            resize_mode=self.resize_mode,
        )

        if self.compute_mean and image_sum is not None:
            image_sum += image

        if self.encode:
            datum = caffe_pb2.Datum()
            if image.ndim == 3:
                datum.channels = image.shape[2]
            else:
                datum.channels = 1
            datum.height = image.shape[0]
            datum.width = image.shape[1]
            datum.label = label
            datum.encoded = True

            s = StringIO()
            PIL.Image.fromarray(image).save(s, format='JPEG', quality=90)
            datum.data = s.getvalue()
        else:
            # Transform to caffe's format requirements
            if image.ndim == 3:
                # Transpose to (channels, height, width)
                image = image.transpose((2, 0, 1))
            elif image.ndim == 2:
                # Add a channels axis
                image = image[np.newaxis, :, :]
            else:
                raise Exception('Image has unrecognized shape: "%s"' %
                                image.shape)
            datum = caffe.io.array_to_datum(image, label)
        return datum
示例#5
0
def validate_folder(folder):
    if utils.is_url(folder):
        try:
            r = requests.head(folder, timeout=utils.HTTP_TIMEOUT)
            if r.status_code not in [
                    requests.codes.ok, requests.codes.moved,
                    requests.codes.found
            ]:
                logger.error('"%s" returned status_code %s' %
                             (folder, r.status_code))
                return False
        except Exception as e:
            logger.error('%s: %s' % (type(e).__name__, e))
            return False
        return True
    if not os.path.exists(folder):
        logger.error('folder "%s" does not exist' % folder)
        return False
    if not os.path.isdir(folder):
        logger.error('"%s" is not a directory' % folder)
        return False
    if not os.access(folder, os.R_OK):
        logger.error('you do not have read access to folder "%s"' % folder)
        return False
    return True
示例#6
0
def read_image_list(image_list, image_folder, num_test_images):
    paths = []
    ground_truths = []

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+(\d+)$', line)
        if match:
            path = match.group(1)
            ground_truth = int(match.group(2))
        else:
            path = line
            ground_truth = None

        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)
        paths.append(path)
        ground_truths.append(ground_truth)

        if num_test_images is not None and len(paths) >= num_test_images:
            break
    return paths, ground_truths
示例#7
0
def _load_thread(load_queue, write_queue, summary_queue, image_width,
                 image_height, image_channels, resize_mode, image_folder,
                 compute_mean, get_bboxes, scale_factor):
    """
    Consumes items in load_queue
    Produces items to write_queue
    Stores cumulative results in summary_queue
    """
    if get_bboxes:
        extract_bbox_patches = BoundingBoxExtractor(scale_factor=scale_factor)
    images_added = 0
    if compute_mean:
        image_sum = _initial_image_sum(image_width, image_height,
                                       image_channels)
    else:
        image_sum = None

    while not load_queue.empty():
        try:
            if get_bboxes:
                path, bbox, label = load_queue.get(True, 0.05)
            else:
                path, label = load_queue.get(True, 0.05)
        except Queue.Empty:
            continue

        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)

        try:
            image = utils.image.load_image(path)
            if get_bboxes:
                ## TODO - Make more efficient - currently loads image for each bbox in that image.
                image = extract_bbox_patches.extract(image, bbox)

        except utils.errors.LoadImageError as e:
            logger.warning('[%s] %s: %s' % (path, type(e).__name__, e))
            continue

        image = utils.image.resize_image(
            image,
            image_height,
            image_width,
            channels=image_channels,
            resize_mode=resize_mode,
        )

        if compute_mean:
            image_sum += image

        write_queue.put((image, label))
        images_added += 1

    summary_queue.put((images_added, image_sum))
示例#8
0
def _load_thread(load_queue, write_queue, summary_queue,
        image_width, image_height, image_channels,
        resize_mode, image_folder, compute_mean,
        backend=None, encoding=None):
    """
    Consumes items in load_queue
    Produces items to write_queue
    Stores cumulative results in summary_queue
    """
    images_added = 0
    if compute_mean:
        image_sum = _initial_image_sum(image_width, image_height, image_channels)
    else:
        image_sum = None

    while not load_queue.empty():
        try:
            path, label = load_queue.get(True, 0.05)
        except Queue.Empty:
            continue

        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)

        for i in xrange(100): 
            try:
                image = utils.image.load_image(path)
                break
            except utils.errors.LoadImageError as e:
                logger.warning('[%s (%d)] %s: %s' % (path, i, type(e).__name__, e))
        if not image:
            logger.warning('[%s]: Failed to load even in %d retries' % (path, 100))
            continue

        image = utils.image.resize_image(image,
                image_height, image_width,
                channels    = image_channels,
                resize_mode = resize_mode,
                )

        if compute_mean:
            image_sum += image

        if backend == 'lmdb':
            datum = _array_to_datum(image, label, encoding)
            write_queue.put(datum)
        else:
            write_queue.put((image, label))

        images_added += 1

    summary_queue.put((images_added, image_sum))
示例#9
0
def _load_thread(load_queue, write_queue, summary_queue,
        image_width, image_height, image_channels,
        resize_mode, image_folder, compute_mean, get_bboxes, scale_factor):
    """
    Consumes items in load_queue
    Produces items to write_queue
    Stores cumulative results in summary_queue
    """
    if get_bboxes:
        extract_bbox_patches = BoundingBoxExtractor(scale_factor=scale_factor)
    images_added = 0
    if compute_mean:
        image_sum = _initial_image_sum(image_width, image_height, image_channels)
    else:
        image_sum = None

    while not load_queue.empty():
        try:
            if get_bboxes:
                path, bbox, label = load_queue.get(True, 0.05)
            else:
                path, label = load_queue.get(True, 0.05)
        except Queue.Empty:
            continue

        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)

        try:
            image = utils.image.load_image(path)
            if get_bboxes:
                ## TODO - Make more efficient - currently loads image for each bbox in that image.
                image = extract_bbox_patches.extract(image, bbox)

        except utils.errors.LoadImageError as e:
            logger.warning('[%s] %s: %s' % (path, type(e).__name__, e) )
            continue

        image = utils.image.resize_image(image,
                image_height, image_width,
                channels    = image_channels,
                resize_mode = resize_mode,
                )

        if compute_mean:
            image_sum += image

        write_queue.put((image, label))
        images_added += 1

    summary_queue.put((images_added, image_sum))
示例#10
0
文件: forms.py 项目: dchall88/DIGITS
 def validate_folder_path(form, field):
     if not field.data:
         pass
     elif utils.is_url(field.data):
         # make sure the URL exists
         try:
             r = requests.get(field.data, allow_redirects=False, timeout=utils.HTTP_TIMEOUT)
             if r.status_code not in [requests.codes.ok, requests.codes.moved, requests.codes.found]:
                 raise validators.ValidationError("URL not found")
         except Exception as e:
             raise validators.ValidationError("Caught %s while checking URL: %s" % (type(e).__name__, e))
         else:
             return True
     else:
         # make sure the filesystem path exists
         if not os.path.exists(field.data) or not os.path.isdir(field.data):
             raise validators.ValidationError("Folder does not exist")
         else:
             return True
示例#11
0
def _load_thread(load_queue, write_queue, summary_queue,
        image_width, image_height, image_channels,
        resize_mode, image_folder, compute_mean):
    """
    Consumes items in load_queue
    Produces items to write_queue
    Stores cumulative results in summary_queue
    """
    images_added = 0
    if compute_mean:
        image_sum = _initial_image_sum(image_width, image_height, image_channels)
    else:
        image_sum = None

    while not load_queue.empty():
        try:
            path, label = load_queue.get(True, 0.05)
        except Queue.Empty:
            continue

        # prepend path with image_folder, if appropriate
        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)

        try:
            image = utils.image.load_image(path)
        except utils.errors.LoadImageError as e:
            logger.warning('[%s] %s: %s' % (path, type(e).__name__, e) )
            continue

        image = utils.image.resize_image(image,
                image_height, image_width,
                channels    = image_channels,
                resize_mode = resize_mode,
                )

        if compute_mean:
            image_sum += image

        write_queue.put((image, label))
        images_added += 1

    summary_queue.put((images_added, image_sum))
示例#12
0
def validate_folder(folder):
    if utils.is_url(folder):
        try:
            r = requests.head(folder, timeout=utils.HTTP_TIMEOUT)
            if r.status_code not in [requests.codes.ok, requests.codes.moved, requests.codes.found]:
                logger.error('"%s" returned status_code %s' % (folder, r.status_code))
                return False
        except Exception as e:
            logger.error('%s: %s' % (type(e).__name__, e))
            return False
        return True
    if not os.path.exists(folder):
        logger.error('folder "%s" does not exist' % folder)
        return False
    if not os.path.isdir(folder):
        logger.error('"%s" is not a directory' % folder)
        return False
    if not os.access(folder, os.R_OK):
        logger.error('you do not have read access to folder "%s"' % folder)
        return False
    return True
示例#13
0
def classify_many():
    """
    Classify many images and return the top 5 classifications for each

    Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}}
    """
    job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    paths = []
    images = []
    ground_truths = []
    dataset = job.train_task().dataset

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+(\d+)$', line)
        if match:
            path = match.group(1)
            ground_truth = int(match.group(2))
        else:
            path = line
            ground_truth = None

        try:
            if not utils.is_url(path) and image_folder and not os.path.isabs(path):
                path = os.path.join(image_folder, path)
            image = utils.image.load_image(path)
            image = utils.image.resize_image(image,
                    dataset.image_dims[0], dataset.image_dims[1],
                    channels    = dataset.image_dims[2],
                    resize_mode = dataset.resize_mode,
                    )
            paths.append(path)
            images.append(image)
            ground_truths.append(ground_truth)
        except utils.errors.LoadImageError as e:
            print e

    if not len(images):
        raise werkzeug.exceptions.BadRequest(
                'Unable to load any images from the file')

    labels, scores = job.train_task().infer_many(images, snapshot_epoch=epoch)
    if scores is None:
        raise RuntimeError('An error occured while processing the images')

    # take top 5
    indices = (-scores).argsort()[:, :5]

    classifications = []
    for image_index, index_list in enumerate(indices):
        result = []
        for i in index_list:
            # `i` is a category in labels and also an index into scores
            result.append((labels[i], round(100.0*scores[image_index, i],2)))
        classifications.append(result)

    # replace ground truth indices with labels
    ground_truths = [labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths]

    if request_wants_json():
        joined = dict(zip(paths, classifications))
        return flask.jsonify({'classifications': joined})
    else:
        return flask.render_template('models/images/classification/classify_many.html',
                job             = job,
                paths           = paths,
                classifications = classifications,
                show_ground_truth= not(ground_truths == [None]*len(ground_truths)),
                ground_truths   = ground_truths
                )
示例#14
0
文件: views.py 项目: zjucsxxd/DIGITS
def infer_many():
    """
    Infer many images
    """
    model_job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form[
            'image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest(
                'image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    if 'num_test_images' in flask.request.form and flask.request.form[
            'num_test_images'].strip():
        num_test_images = int(flask.request.form['num_test_images'])
    else:
        num_test_images = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    if 'dont_resize' in flask.request.form and flask.request.form[
            'dont_resize']:
        resize = False
    else:
        resize = True

    paths = []

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+\d+$', line)
        if match:
            path = match.group(1)
        else:
            path = line

        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)
        paths.append(path)

        if num_test_images is not None and len(paths) >= num_test_images:
            break

    # create inference job
    inference_job = ImageInferenceJob(
        username=utils.auth.get_username(),
        name="Infer Many Images",
        model=model_job,
        images=paths,
        epoch=epoch,
        layers='none',
        resize=resize,
    )

    # schedule tasks
    scheduler.add_job(inference_job)

    # wait for job to complete
    inference_job.wait_completion()

    # retrieve inference data
    inputs, outputs, _ = inference_job.get_data()

    # set return status code
    status_code = 500 if inference_job.status == 'E' else 200

    # delete job folder and remove from scheduler list
    scheduler.delete_job(inference_job)

    if outputs is not None and len(outputs) < 1:
        # an error occurred
        outputs = None

    if inputs is not None:
        paths = [paths[idx] for idx in inputs['ids']]
        inference_views_html, header_html, app_begin_html, app_end_html = get_inference_visualizations(
            model_job.dataset, inputs, outputs)
    else:
        inference_views_html = None
        header_html = None
        app_begin_html = None
        app_end_html = None

    if request_wants_json():
        result = {}
        for i, path in enumerate(paths):
            result[path] = dict(
                (name, blob[i].tolist()) for name, blob in outputs.iteritems())
        return flask.jsonify({'outputs': result}), status_code
    else:
        return flask.render_template(
            'models/images/generic/infer_many.html',
            model_job=model_job,
            job=inference_job,
            paths=paths,
            inference_views_html=inference_views_html,
            header_html=header_html,
            app_begin_html=app_begin_html,
            app_end_html=app_end_html,
        ), status_code
示例#15
0
def parse_folder(
    folder,
    labels_file,
    train_file=None,
    percent_train=None,
    val_file=None,
    percent_val=None,
    test_file=None,
    percent_test=None,
    min_per_category=2,
    max_per_category=None,
):
    """
    Parses a folder of images into three textfiles
    Returns True on success

    Arguments:
    folder -- a folder containing folders of images (can be a filesystem path or a url)
    labels_file -- file for labels

    Keyword Arguments:
    train_file -- output file for training images
    percent_test -- percentage of images to use in the training set
    val_file -- output file for validation images
    percent_val -- percentage of images to use in the validation set
    test_file -- output file for test images
    percent_test -- percentage of images to use in the test set
    min_per_category -- minimum number of images per category
    max_per_category -- maximum number of images per category
    """
    create_labels = (percent_train > 0)
    labels = []

    # Read the labels from labels_file

    if not create_labels:
        with open(labels_file) as infile:
            for line in infile:
                line = line.strip()
                if line:
                    labels.append(line)

    # Verify that at least two category folders exist

    folder_is_url = utils.is_url(folder)
    if folder_is_url:
        if not folder.endswith('/'):
            folder += '/'
        subdirs, _ = parse_web_listing(folder)
    else:
        if os.path.exists(folder) and os.path.isdir(folder):
            subdirs = []
            for filename in os.listdir(folder):
                subdir = os.path.join(folder, filename)
                if os.path.isdir(subdir):
                    subdirs.append(subdir)
        else:
            logger.error('folder does not exist')
            return False

    subdirs.sort()

    if len(subdirs) < 2:
        logger.error('folder must contain at least two subdirectories')
        return False

    # Parse the folder

    train_count = 0
    val_count = 0
    test_count = 0

    if percent_train:
        train_outfile = open(train_file, 'w')
    if percent_val:
        val_outfile = open(val_file, 'w')
    if percent_test:
        test_outfile = open(test_file, 'w')

    subdir_index = 0
    label_index = 0
    for subdir in subdirs:
        # Use the directory name as the label
        label_name = subdir
        if folder_is_url:
            label_name = unescape(label_name)
        else:
            label_name = os.path.basename(label_name)
        label_name = label_name.replace('_', ' ')
        if label_name.endswith('/'):
            # Remove trailing slash
            label_name = label_name[0:-1]

        if create_labels:
            labels.append(label_name)
            label_index = len(labels) - 1
        else:
            found = False
            for i, l in enumerate(labels):
                if label_name == l:
                    found = True
                    label_index = i
                    break
            if not found:
                logger.warning(
                    'Category "%s" not found in labels_file. Skipping.' %
                    label_name)
                continue

        logger.debug('Category - %s' % label_name)

        lines = []

        # Read all images in the folder

        if folder_is_url:
            urls, _ = web_listing_all_files(folder + subdir,
                                            max_count=max_per_category)
            for url in urls:
                lines.append('%s %d' % (url, label_index))
        else:
            for dirpath, dirnames, filenames in os.walk(os.path.join(
                    folder, subdir),
                                                        followlinks=True):
                for filename in filenames:
                    if filename.lower().endswith(
                            utils.image.SUPPORTED_EXTENSIONS):
                        lines.append('%s %d' % (os.path.join(
                            folder, subdir, dirpath, filename), label_index))
                        if max_per_category is not None and len(
                                lines) >= max_per_category:
                            break
                if max_per_category is not None and len(
                        lines) >= max_per_category:
                    logger.warning('Reached maximum limit for this category')
                    break

        # Split up the lines

        train_lines = []
        val_lines = []
        test_lines = []

        required_categories = 0
        if percent_train > 0:
            required_categories += 1
        if percent_val > 0:
            required_categories += 1
        if percent_test > 0:
            required_categories += 1

        if not lines or len(lines) < required_categories or len(
                lines) < min_per_category:
            logger.warning('Not enough images for this category')
            labels.pop()
        else:
            random.shuffle(lines)
            a, b = three_way_split_indices(len(lines), percent_val,
                                           percent_test)
            train_lines = lines[:a]
            val_lines = lines[a:b]
            test_lines = lines[b:]

        if train_lines:
            train_outfile.write('\n'.join(train_lines) + '\n')
            train_count += len(train_lines)
        if val_lines:
            val_outfile.write('\n'.join(val_lines) + '\n')
            val_count += len(val_lines)
        if test_lines:
            test_outfile.write('\n'.join(test_lines) + '\n')
            test_count += len(test_lines)

        subdir_index += 1
        logger.debug('Progress: %0.2f' % (float(subdir_index) / len(subdirs)))

    if percent_train:
        train_outfile.close()
    if percent_val:
        val_outfile.close()
    if percent_test:
        test_outfile.close()

    if create_labels:
        if len(labels) < 2:
            logger.error('Did not find two valid categories')
            return False
        else:
            with open(labels_file, 'w') as labels_outfile:
                labels_outfile.write('\n'.join(labels) + '\n')

    logger.info('Found %d images in %d categories.' %
                (train_count + val_count + test_count, len(labels)))
    logger.info('Selected %d for training.' % train_count)
    logger.info('Selected %d for validation.' % val_count)
    logger.info('Selected %d for testing.' % test_count)
    return True
示例#16
0
def infer_many():
    """
    Infer many images
    """
    job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    paths = []
    images = []

    db_task = job.train_task().dataset.analyze_db_tasks()[0]
    height = db_task.image_height
    width = db_task.image_width
    channels = db_task.image_channels

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+\d+$', line)
        if match:
            path = match.group(1)
        else:
            path = line

        try:
            if not utils.is_url(path) and image_folder and not os.path.isabs(path):
                path = os.path.join(image_folder, path)
            print path
            image = utils.image.load_image(path)
            image = utils.image.resize_image(image, height, width,
                    channels = channels,
                    resize_mode = 'squash',
                    )
            paths.append(path)
            images.append(image)
        except utils.errors.LoadImageError as e:
            print e

    if not len(images):
        raise werkzeug.exceptions.BadRequest(
                'Unable to load any images from the file')

    outputs = job.train_task().infer_many(images, snapshot_epoch=epoch)
    if outputs is None:
        raise RuntimeError('An error occured while processing the images')

    if request_wants_json():
        result = {}
        for i, path in enumerate(paths):
            result[path] = dict((name, blob[i].tolist()) for name,blob in outputs.iteritems())
        return flask.jsonify({'outputs': result})
    else:
        return flask.render_template('models/images/generic/infer_many.html',
                job             = job,
                paths           = paths,
                network_outputs = outputs,
                )
示例#17
0
文件: views.py 项目: bygreencn/DIGITS
def infer_many():
    """
    Infer many images
    """
    model_job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    if 'num_test_images' in flask.request.form and flask.request.form['num_test_images'].strip():
        num_test_images = int(flask.request.form['num_test_images'])
    else:
        num_test_images = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    if 'dont_resize' in flask.request.form and flask.request.form['dont_resize']:
        resize = False
    else:
        resize = True

    paths = []

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+\d+$', line)
        if match:
            path = match.group(1)
        else:
            path = line

        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)
        paths.append(path)

        if num_test_images is not None and len(paths) >= num_test_images:
            break

    # create inference job
    inference_job = ImageInferenceJob(
        username=utils.auth.get_username(),
        name="Infer Many Images",
        model=model_job,
        images=paths,
        epoch=epoch,
        layers='none',
        resize=resize,
        )

    # schedule tasks
    scheduler.add_job(inference_job)

    # wait for job to complete
    inference_job.wait_completion()

    # retrieve inference data
    inputs, outputs, _ = inference_job.get_data()

    # set return status code
    status_code = 500 if inference_job.status == 'E' else 200

    # delete job folder and remove from scheduler list
    scheduler.delete_job(inference_job)

    if outputs is not None and len(outputs) < 1:
        # an error occurred
        outputs = None

    if inputs is not None:
        paths = [paths[idx] for idx in inputs['ids']]
        inference_views_html, header_html, app_begin_html, app_end_html = get_inference_visualizations(
            model_job.dataset,
            inputs,
            outputs)
    else:
        inference_views_html = None
        header_html = None
        app_begin_html = None
        app_end_html = None

    if request_wants_json():
        result = {}
        for i, path in enumerate(paths):
            result[path] = dict((name, blob[i].tolist()) for name, blob in outputs.iteritems())
        return flask.jsonify({'outputs': result}), status_code
    else:
        return flask.render_template(
            'models/images/generic/infer_many.html',
            model_job=model_job,
            job=inference_job,
            paths=paths,
            inference_views_html=inference_views_html,
            header_html=header_html,
            app_begin_html=app_begin_html,
            app_end_html=app_end_html,
            ), status_code
示例#18
0
def classify_many():
    """
    Classify many images and return the top 5 classifications for each

    Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}}
    """
    job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form[
            'image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest(
                'image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    paths = []
    images = []
    ground_truths = []
    dataset = job.train_task().dataset

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+(\d+)$', line)
        if match:
            path = match.group(1)
            ground_truth = int(match.group(2))
        else:
            path = line
            ground_truth = None

        try:
            if not utils.is_url(path) and image_folder and not os.path.isabs(
                    path):
                path = os.path.join(image_folder, path)
            image = utils.image.load_image(path)
            image = utils.image.resize_image(
                image,
                dataset.image_dims[0],
                dataset.image_dims[1],
                channels=dataset.image_dims[2],
                resize_mode=dataset.resize_mode,
            )
            paths.append(path)
            images.append(image)
            ground_truths.append(ground_truth)
        except utils.errors.LoadImageError as e:
            print e

    if not len(images):
        raise werkzeug.exceptions.BadRequest(
            'Unable to load any images from the file')

    labels, scores = job.train_task().infer_many(images, snapshot_epoch=epoch)
    if scores is None:
        raise RuntimeError('An error occured while processing the images')

    # take top 5
    indices = (-scores).argsort()[:, :5]

    classifications = []
    for image_index, index_list in enumerate(indices):
        result = []
        for i in index_list:
            # `i` is a category in labels and also an index into scores
            result.append((labels[i], round(100.0 * scores[image_index, i],
                                            2)))
        classifications.append(result)

    # replace ground truth indices with labels
    ground_truths = [
        labels[x] if x is not None and (0 <= x < len(labels)) else None
        for x in ground_truths
    ]

    if request_wants_json():
        joined = dict(zip(paths, classifications))
        return flask.jsonify({'classifications': joined})
    else:
        return flask.render_template(
            'models/images/classification/classify_many.html',
            job=job,
            paths=paths,
            classifications=classifications,
            show_ground_truth=not (ground_truths
                                   == [None] * len(ground_truths)),
            ground_truths=ground_truths)
示例#19
0
文件: views.py 项目: lgx900730/DIGITS
def classify_many():
    """
    Classify many images and return the top 5 classifications for each

    Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}}
    """
    model_job = job_from_request()

    image_list = flask.request.files.get('image_list')
    if not image_list:
        raise werkzeug.exceptions.BadRequest('image_list is a required field')

    if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip():
        image_folder = flask.request.form['image_folder']
        if not os.path.exists(image_folder):
            raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder)
    else:
        image_folder = None

    if 'num_test_images' in flask.request.form and flask.request.form['num_test_images'].strip():
        num_test_images = int(flask.request.form['num_test_images'])
    else:
        num_test_images = None

    epoch = None
    if 'snapshot_epoch' in flask.request.form:
        epoch = float(flask.request.form['snapshot_epoch'])

    paths = []
    ground_truths = []

    for line in image_list.readlines():
        line = line.strip()
        if not line:
            continue

        path = None
        # might contain a numerical label at the end
        match = re.match(r'(.*\S)\s+(\d+)$', line)
        if match:
            path = match.group(1)
            ground_truth = int(match.group(2))
        else:
            path = line
            ground_truth = None

        if not utils.is_url(path) and image_folder and not os.path.isabs(path):
            path = os.path.join(image_folder, path)
        paths.append(path)
        ground_truths.append(ground_truth)

        if num_test_images is not None and len(paths) >= num_test_images:
            break

    # create inference job
    inference_job = ImageInferenceJob(
                username    = utils.auth.get_username(),
                name        = "Classify Many Images",
                model       = model_job,
                images      = paths,
                epoch       = epoch,
                layers      = 'none'
                )

    # schedule tasks
    scheduler.add_job(inference_job)

    # wait for job to complete
    inference_job.wait_completion()

    # retrieve inference data
    inputs, outputs, _ = inference_job.get_data()

    # delete job
    scheduler.delete_job(inference_job)

    if outputs is not None and len(outputs) < 1:
        # an error occurred
        outputs = None

    if inputs is not None:
        # retrieve path and ground truth of images that were successfully processed
        paths = [paths[idx] for idx in inputs['ids']]
        ground_truths = [ground_truths[idx] for idx in inputs['ids']]

    classifications = None
    if outputs is not None:
        # convert to class probabilities for viewing
        last_output_name, last_output_data = outputs.items()[-1]
        if len(last_output_data) < 1:
            raise werkzeug.exceptions.BadRequest(
                    'Unable to classify any image from the file')

        scores = last_output_data
        # take top 5
        indices = (-scores).argsort()[:, :5]

        labels = model_job.train_task().get_labels()
        classifications = []
        for image_index, index_list in enumerate(indices):
            result = []
            for i in index_list:
                # `i` is a category in labels and also an index into scores
                result.append((labels[i], round(100.0*scores[image_index, i],2)))
            classifications.append(result)

        # replace ground truth indices with labels
        ground_truths = [labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths]

    if request_wants_json():
        joined = dict(zip(paths, classifications))
        return flask.jsonify({'classifications': joined})
    else:
        return flask.render_template('models/images/classification/classify_many.html',
                model_job       = model_job,
                job             = inference_job,
                paths           = paths,
                classifications = classifications,
                show_ground_truth= not(ground_truths == [None]*len(ground_truths)),
                ground_truths   = ground_truths
                )
示例#20
0
def parse_folder(folder, labels_file,
        train_file=None, percent_train=None,
        val_file=None, percent_val=None,
        test_file=None, percent_test=None,
        min_per_category=2,
        max_per_category=None,
        ):
    """
    Parses a folder of images into three textfiles
    Returns True on sucess

    Arguments:
    folder -- a folder containing folders of images (can be a filesystem path or a url)
    labels_file -- file for labels

    Keyword Arguments:
    train_file -- output file for training images
    percent_test -- percentage of images to use in the training set
    val_file -- output file for validation images
    percent_val -- percentage of images to use in the validation set
    test_file -- output file for test images
    percent_test -- percentage of images to use in the test set
    min_per_category -- minimum number of images per category
    max_per_category -- maximum number of images per category
    """
    create_labels = (percent_train > 0)
    labels = []

    ### Read the labels from labels_file

    if not create_labels:
        with open(labels_file) as infile:
            for line in infile:
                line = line.strip()
                if line:
                    labels.append(line)

    ### Verify that at least two category folders exist

    folder_is_url = utils.is_url(folder)
    if folder_is_url:
        if not folder.endswith('/'):
            folder += '/'
        subdirs, _ = parse_web_listing(folder)
    else:
        if os.path.exists(folder) and os.path.isdir(folder):
            subdirs = []
            for filename in os.listdir(folder):
                subdir = os.path.join(folder, filename)
                if os.path.isdir(subdir):
                    subdirs.append(subdir)
        else:
            logger.error('folder does not exist')
            return False

    subdirs.sort()

    if len(subdirs) < 2:
        logger.error('folder must contain at least two subdirectories')
        return False

    ### Parse the folder

    train_count = 0
    val_count = 0
    test_count = 0

    if percent_train:
        train_outfile = open(train_file, 'w')
    if percent_val:
        val_outfile = open(val_file, 'w')
    if percent_test:
        test_outfile = open(test_file, 'w')

    subdir_index = 0
    label_index = 0
    for subdir in subdirs:
        # Use the directory name as the label
        label_name = subdir
        if folder_is_url:
            label_name = unescape(label_name)
        else:
            label_name = os.path.basename(label_name)
        label_name = label_name.replace('_',' ')
        if label_name.endswith('/'):
            # Remove trailing slash
            label_name = label_name[0:-1]

        if create_labels:
            labels.append(label_name)
            label_index = len(labels)-1
        else:
            found = False
            for i, l in enumerate(labels):
                if label_name == l:
                    found = True
                    label_index = i
                    break
            if not found:
                logger.warning('Category "%s" not found in labels_file. Skipping.' % label_name)
                continue

        logger.debug('Category - %s' % label_name)

        lines = []

        ### Read all images in the folder

        if folder_is_url:
            urls, _ = web_listing_all_files(folder + subdir, max_count=max_per_category)
            for url in urls:
                lines.append('%s %d' % (url, label_index))
        else:
            for dirpath, dirnames, filenames in os.walk(os.path.join(folder, subdir), followlinks=True):
                for filename in filenames:
                    if filename.lower().endswith(utils.image.SUPPORTED_EXTENSIONS):
                        lines.append('%s %d' % (os.path.join(folder, subdir, dirpath, filename), label_index))
                        if max_per_category is not None and len(lines) >= max_per_category:
                            break
                if max_per_category is not None and len(lines) >= max_per_category:
                    logger.warning('Reached maximum limit for this category')
                    break

        ### Split up the lines

        train_lines = []
        val_lines = []
        test_lines = []

        required_categories = 0
        if percent_train > 0:
            required_categories += 1
        if percent_val > 0:
            required_categories += 1
        if percent_test > 0:
            required_categories += 1

        if not lines or len(lines) < required_categories or len(lines) < min_per_category:
            logger.warning('Not enough images for this category')
            labels.pop()
        else:
            random.shuffle(lines)
            a, b = three_way_split_indices(len(lines), percent_val, percent_test)
            train_lines = lines[:a]
            val_lines = lines[a:b]
            test_lines = lines[b:]

        if train_lines:
            train_outfile.write('\n'.join(train_lines) + '\n')
            train_count += len(train_lines)
        if val_lines:
            val_outfile.write('\n'.join(val_lines) + '\n')
            val_count += len(val_lines)
        if test_lines:
            test_outfile.write('\n'.join(test_lines) + '\n')
            test_count += len(test_lines)

        subdir_index += 1
        logger.debug('Progress: %0.2f' % (float(subdir_index)/len(subdirs)))

    if percent_train:
        train_outfile.close()
    if percent_val:
        val_outfile.close()
    if percent_test:
        test_outfile.close()

    if create_labels:
        if len(labels) < 2:
            logger.error('Did not find two valid categories')
            return False
        else:
            with open(labels_file, 'w') as labels_outfile:
                labels_outfile.write('\n'.join(labels) + '\n')

    logger.info('Found %d images in %d categories.' % (train_count + val_count + test_count, len(labels)))
    logger.info('Selected %d for training.' % train_count)
    logger.info('Selected %d for validation.' % val_count)
    logger.info('Selected %d for testing.' % test_count)
    return True