Пример #1
0
def get_user_info(user_id, host):
    params = {'action': 'query', 'meta': 'globaluserinfo', 'guiid': user_id}
    session = Session(host)
    res = session.get(**params)
    if 'error' in res:
        return {}
    return res.get('query', {}).get('globaluserinfo', {})
Пример #2
0
def format_value(session: mwapi.Session, property_id: str,
                 value: dict) -> flask.Markup:
    response = session.get(action='wbformatvalue',
                           datavalue=json.dumps(value),
                           property=property_id,
                           generate='text/plain')
    return flask.Markup.escape(response['result'])
Пример #3
0
def get_entities(session: mwapi.Session, entity_ids: Iterable[str]) -> dict:
    entity_ids = list(set(entity_ids))
    entities = {}
    for chunk in [entity_ids[i:i+50] for i in range(0, len(entity_ids), 50)]:
        response = session.get(action='wbgetentities',
                               ids=chunk,
                               props=['info', 'claims'],
                               formatversion=2)
        entities.update(response['entities'])
    return entities
Пример #4
0
def save_entity(entity_data: dict,
                summary: str,
                base_revision_id: Union[int, str],
                session: mwapi.Session) -> int:

    token = edit_token(session)

    api_response = session.post(action='wbeditentity',
                                id=entity_data['id'],
                                data=json.dumps(entity_data),
                                summary=summary,
                                baserevid=base_revision_id,
                                token=token)
    revision_id = api_response['entity']['lastrevid']

    return revision_id
Пример #5
0
def edit_token(session: mwapi.Session) -> str:
    """Get an edit token / CSRF token for the MediaWiki API.

    Not to be confused with csrf_token,
    which gets a token for use within the tool."""

    edit_tokens = flask.g.setdefault('edit_tokens', {})
    key = session.host
    if key in edit_tokens:
        return edit_tokens[key]

    token = session.get(action='query',
                        meta='tokens',
                        type='csrf')['query']['tokens']['csrftoken']
    edit_tokens[key] = token
    return token
Пример #6
0
def _get_sitematrix(session: mwapi.Session) -> dict:
    sitematrix = {
        'by_dbname': {},
        'by_url': {},
    }  # type: Dict[str, Dict[str, dict]]
    result = session.get(action='sitematrix', formatversion=2)
    for k, v in result['sitematrix'].items():
        if k == 'count':
            if v > 5000:
                warnings.warn('sitematrix reports more than 5000 sites (%d), '
                              'continuation might be necessary' % v)
            continue
        if k == 'specials':
            sites = v
        else:
            sites = v['site']
        for site in sites:
            sitematrix['by_dbname'][site['dbname']] = site
            sitematrix['by_url'][site['url']] = site
    return sitematrix
Пример #7
0
def prefetch_entities(session: mwapi.Session, entity_ids: AbstractSet[str]):
    with format_entity_cache_lock:
        entity_id_chunks: List[List[str]] = []
        for entity_id in entity_ids:
            key = format_entity_key(session, entity_id)
            if key in format_entity_cache:
                continue
            if len(entity_id_chunks) == 0:
                entity_id_chunks.append([entity_id])
            else:
                last_chunk = entity_id_chunks[-1]
                if len(last_chunk) >= 50:
                    entity_id_chunks.append([entity_id])
                else:
                    last_chunk.append(entity_id)
        for entity_id_chunk in entity_id_chunks:
            response = session.get(action='wbformatentities',
                                   ids=entity_id_chunk,
                                   formatversion=2)['wbformatentities']
            for entity_id in response:
                key = format_entity_key(session, entity_id)
                value = flask.Markup(response[entity_id])
                format_entity_cache[key] = value
Пример #8
0
import numpy as np

from maya.nltk import util
from pandarallel import pandarallel

from script.TrustScore import TrustScore

sys.path.append("..")
from mwapi import Session
from maya.extractors.api import Extractor
import pandas as pd
from matplotlib import pyplot

import os.path

session = Session("https://en.wikipedia.org/w/api.php", user_agent="test")
api_extractor = Extractor(session)

# pandarallel.initialize(nb_workers=10)
pandarallel.initialize()


def getAllUsers():
    """
      Fetches the list of all the user in Wikipedia that has contributed.
      Args:

      Result:
          the dataframe of the list of users and their editcounts
       """
    uc = api_extractor.get_all_user()
Пример #9
0
def format_entity(session: mwapi.Session, entity_id: str) -> flask.Markup:
    response = session.get(action='wbformatentities',
                           ids=[entity_id],
                           formatversion=2)
    return flask.Markup(response['wbformatentities'][entity_id])