Python fetch_uris_from_metadata示例，helper.dbpedia.fetch_uris_from_metadata Python示例

示例#1

0

显示文件

文件： fetch_commons.py 项目： janukobytsch/wikimedia-image-classification

    parser.add_argument('-c', '--count', type=int, default=100,
        help='Amount of query result images to fetch; images without \
        metadata will be skipped so fewer images may be downloaded')
    parser.add_argument('-u', '--uris',
        help='Download images and metadata from existing list of Wikimedia \
        Commons uris rather than querying them first')
    parser.add_argument('-d', '--directory',
        default='data/fetch/<timestamp>-commons',
        help='Directory to download images into; gets created if not exists')
    args = parser.parse_args()

    timestamp = str(datetime.now().strftime('%y-%m-%d-%H-%M'))
    directory = args.directory.replace('<timestamp>', timestamp)
    uris = []
    if args.uris:
        name = os.path.basename(args.uris)
        uris = read_lines(args.uris)
    elif args.metadata_query:
        name = args.metadata_query[0]
        uris = fetch_uris_from_metadata(args.metadata_query, args.count)
    elif args.article_query:
        name = args.article_query[0]
        uris = fetch_uris_from_articles(args.article_query, args.count)
    else:
        assert False, ('One of parameters --uris, --metadata-query and ' +
            '--article-query is required')

    ensure_directory(directory)
    print('Download', len(uris), 'images and metadata into', directory)
    images_and_metadata(uris, directory)

示例#2

0

显示文件

    parser.add_argument('-c', '--count', type=int, default=100,
        help='Amount of query result images to fetch; images without \
        metadata will be skipped so fewer images may be downloaded')
    parser.add_argument('-u', '--uris',
        help='Download images and metadata from existing list of Wikimedia \
        Commons uris rather than querying them first')
    parser.add_argument('-d', '--directory',
        default='data/fetch/<timestamp>-commons',
        help='Directory to download images into; gets created if not exists')
    args = parser.parse_args()

    timestamp = str(datetime.now().strftime('%y-%m-%d-%H-%M'))
    directory = args.directory.replace('<timestamp>', timestamp)
    uris = []
    if args.uris:
        name = os.path.basename(args.uris)
        uris = read_lines(args.uris)
    elif args.metadata_query:
        name = args.metadata_query[0]
        uris = fetch_uris_from_metadata(args.metadata_query, args.count)
    elif args.article_query:
        name = args.article_query[0]
        uris = fetch_uris_from_articles(args.article_query, args.count)
    else:
        assert False, ('One of parameters --uris, --metadata-query and ' +
            '--article-query is required')

    ensure_directory(directory)
    print('Download', len(uris), 'images and metadata into', directory)
    images_and_metadata(uris, directory)

示例#3

0

显示文件

文件： tasks.py 项目： janukobytsch/wikimedia-image-classification

def classify_images(self, keywords=[], limit=25):
    """
    Worker task to retrieve a list of automatically categorized images from
    Wikimedia Commons from a given list of keywords.
    """
    if not len(keywords):
        raise AssertionError

    with app.app_context():

        def supported_extractors():
            extractors = []
            extractors.append(SizeFeature())
            extractors.append(ColorFeature())
            extractors.append(HistogramFeature())
            extractors.append(GradientFeature())
            extractors.append(FaceFeature(app.config['FACE_CLASSIFIER']))
            extractors.append(GeoFeature())
            extractors.append(FormatFeature())
            extractors.append(
                WordsFeature.create_from(app.config['WORDS_CONFIG']))
            return extractors

        def create_response_entry(label, sample):
            return {
                'thumbnail': sample.thumbnail,
                'image': sample.url,
                'label': label,
                'title': sample.url
            }

        def create_response(entries):
            return {'current': 100, 'total': 100, 'result': entries}

        # keep track of progress
        progress_observer = ProgressObserver(self)
        progress_observer.update(5)

        # query dpedia for related images based on given keywords
        if limit > app.config['QUERY_LIMIT']:
            limit = app.config['QUERY_LIMIT']
        searchterm = ' '.join(keywords)
        uris = fetch_uris_from_metadata(searchterm, limit, multiple=False)
        progress_observer.update(20)

        # download images and metadata into temp folder with unique task id
        temp_folder = os.path.join(app.config['DOWNLOAD_DIRECTORY'],
                                   classify_images.request.id)
        images_and_metadata(uris,
                            temp_folder,
                            False,
                            observer=progress_observer)
        progress_observer.update(80)

        # load dataset and extract features
        dataset = Dataset(logging=True)
        dataset.read(root=temp_folder,
                     extractors=supported_extractors(),
                     unlabeled_data=True)
        dataset_config = json.load(open(app.config['DATASET_CONFIG']))
        dataset.means = dataset_config['means']
        dataset.stds = dataset_config['stds']
        dataset.normalize()
        progress_observer.update(90)

        # predict labels using the trained classifier
        classifier = joblib.load(app.config['WIKIMEDIA_CLASSIFIER'])
        predictions = classifier.predict(dataset.data)
        progress_observer.update(95)

        # build response
        suggestions = []
        for index, sample in enumerate(dataset.samples):
            label = np.asscalar(predictions[index])
            entry = create_response_entry(label, sample)
            suggestions.append(entry)
        result = create_response(suggestions)

        # cleanup temporary directory
        delete_directory(temp_folder)

        progress_observer.update(100)

        return result

示例#4

0

显示文件

文件： tasks.py 项目： janukobytsch/wikimedia-image-classification

def classify_images(self, keywords=[], limit=25):
    """
    Worker task to retrieve a list of automatically categorized images from
    Wikimedia Commons from a given list of keywords.
    """
    if not len(keywords):
        raise AssertionError

    with app.app_context():

        def supported_extractors():
            extractors = []
            extractors.append(SizeFeature())
            extractors.append(ColorFeature())
            extractors.append(HistogramFeature())
            extractors.append(GradientFeature())
            extractors.append(FaceFeature(app.config["FACE_CLASSIFIER"]))
            extractors.append(GeoFeature())
            extractors.append(FormatFeature())
            extractors.append(WordsFeature.create_from(app.config["WORDS_CONFIG"]))
            return extractors

        def create_response_entry(label, sample):
            return {"thumbnail": sample.thumbnail, "image": sample.url, "label": label, "title": sample.url}

        def create_response(entries):
            return {"current": 100, "total": 100, "result": entries}

        # keep track of progress
        progress_observer = ProgressObserver(self)
        progress_observer.update(5)

        # query dpedia for related images based on given keywords
        if limit > app.config["QUERY_LIMIT"]:
            limit = app.config["QUERY_LIMIT"]
        searchterm = " ".join(keywords)
        uris = fetch_uris_from_metadata(searchterm, limit, multiple=False)
        progress_observer.update(20)

        # download images and metadata into temp folder with unique task id
        temp_folder = os.path.join(app.config["DOWNLOAD_DIRECTORY"], classify_images.request.id)
        images_and_metadata(uris, temp_folder, False, observer=progress_observer)
        progress_observer.update(80)

        # load dataset and extract features
        dataset = Dataset(logging=True)
        dataset.read(root=temp_folder, extractors=supported_extractors(), unlabeled_data=True)
        dataset_config = json.load(open(app.config["DATASET_CONFIG"]))
        dataset.means = dataset_config["means"]
        dataset.stds = dataset_config["stds"]
        dataset.normalize()
        progress_observer.update(90)

        # predict labels using the trained classifier
        classifier = joblib.load(app.config["WIKIMEDIA_CLASSIFIER"])
        predictions = classifier.predict(dataset.data)
        progress_observer.update(95)

        # build response
        suggestions = []
        for index, sample in enumerate(dataset.samples):
            label = np.asscalar(predictions[index])
            entry = create_response_entry(label, sample)
            suggestions.append(entry)
        result = create_response(suggestions)

        # cleanup temporary directory
        delete_directory(temp_folder)

        progress_observer.update(100)

        return result