示例#1
0
def get_labels(keys, name=None):
    keys = sorted(keys, key=lambda i: int(i[1:]))
    if name is None:
        name = '_'.join(keys)
    filename = f'cache/{name}_labels.json'
    labels = []
    if os.path.exists(filename):
        from_cache = json.load(open(filename))
        if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
            labels = from_cache['labels']
    if not labels:
        for cur in utils.chunk(keys, 50):
            labels += mediawiki.get_entities(cur, props='labels')

        json.dump({
            'keys': keys,
            'labels': labels
        },
                  open(filename, 'w'),
                  indent=2)

    return {
        entity['id']: wikibase.get_entity_label(entity)
        for entity in labels
    }
示例#2
0
def get_labels(keys, name=None):
    keys = sorted(keys, key=lambda i: int(i[1:]))
    if name is None:
        name = hashlib.md5('_'.join(keys).encode('utf-8')).hexdigest()
    filename = f'cache/{name}_labels.json'
    labels = []
    if os.path.exists(filename):
        from_cache = json.load(open(filename))
        if isinstance(from_cache, dict) and from_cache.get('keys') == keys:
            labels = from_cache['labels']
    if not labels:
        print(len(keys))
        for num, cur in enumerate(utils.chunk(keys, 50)):
            print(f'{num * 50} / {len(keys)}')
            labels += mediawiki.get_entities(cur, props='labels')

        json.dump({
            'keys': keys,
            'labels': labels
        },
                  open(filename, 'w'),
                  indent=2)

    return {
        entity['id']: wikibase.get_entity_label(entity)
        for entity in labels
    }
示例#3
0
def get_labels_db(keys):
    keys = set(keys)
    labels = {}
    missing = set()
    for qid in keys:
        item = Item.query.get(qid[1:])
        if item:
            labels[qid] = item.label
        else:
            missing.add(qid)

    print(len(missing))
    page_size = 50
    for num, cur in enumerate(utils.chunk(missing, page_size)):
        print(f'{num * page_size} / {len(missing)}')
        for entity in mediawiki.get_entities(cur):
            if 'redirects' in entity:
                continue

            qid = entity['id']

            modified = datetime.strptime(entity['modified'],
                                         "%Y-%m-%dT%H:%M:%SZ")
            # FIXME: check if the item is an artwork and set is_artwork correctly
            item = Item(item_id=qid[1:],
                        entity=entity,
                        lastrevid=entity['lastrevid'],
                        modified=modified,
                        is_artwork=False)
            database.session.add(item)
            labels[qid] = item.label
        database.session.commit()

    return labels
示例#4
0
def get_labels_db(keys):
    keys = set(keys)
    labels = {}
    missing = set()
    for qid in keys:
        m = re_qid.match(qid)
        if m:
            item_id = int(m.group(1))
            item = Item.query.get(item_id)
            if item:
                labels[qid] = item.label
                continue

        missing.add(qid)

    page_size = 50
    try:
        for cur in utils.chunk(missing, page_size):
            for entity in mediawiki.get_entities(cur):
                if 'redirects' in entity:
                    continue

                qid = entity['id']

                modified = datetime.strptime(entity['modified'],
                                             "%Y-%m-%dT%H:%M:%SZ")
                # FIXME: check if the item is an artwork and set is_artwork correctly
                item = Item(item_id=qid[1:],
                            entity=entity,
                            lastrevid=entity['lastrevid'],
                            modified=modified,
                            is_artwork=False)
                database.session.add(item)
                labels[qid] = item.label
            database.session.commit()
    except requests.exceptions.ReadTimeout:
        pass

    return labels