Пример #1
0
class GetRevisionContent(GetCurrentContent):
    input_field = SingleParam('revids', key_prefix=False, attr='rev_id')
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False)
    ]
    examples = [OperationExample('539916351')]
Пример #2
0
class GetPageHTML(Operation):
    input_field = SingleParam('title')
    examples = [OperationExample('Africa', limit=1)]
    output_type = Operation
    _limit = 1

    def __init__(self, *a, **kw):
        super(GetPageHTML, self).__init__(*a, **kw)
        self.web_client = getattr(self.client,
                                  'web_client',
                                  DEFAULT_WEB_CLIENT)
        self.raise_exc = kw.pop('raise_exc', True)
        source_info = getattr(self.client, 'source_info', None)
        if source_info:
            main_title = source_info.mainpage
            main_url = source_info.base
            self.base_url = main_url[:-len(main_title)]
        else:
            self.base_url = DEFAULT_BASE_URL
        self.url = self.base_url + self.input_param
        self.results = {}

    def process(self):
        try:
            resp = self.web_client.get(self.url)
        except Exception as e:
            self.exception = e
            if self.raise_exc:
                raise
            return self
        self.results[self.url] = resp.text
        raise NoMoreResults()
Пример #3
0
class GetExternalLinks(QueryOperation):
    """
    Fetch page outgoing links to URLs outside of source wiki.
    """
    field_prefix = 'el'
    input_field = SingleParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'extlinks')]
    output_type = [ExternalLink]
    examples = [OperationExample('Croatian War of Independence')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for el in pid_dict.get('extlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = el.get('*')
                link = ExternalLink.from_query(cur_dict)
                ret.append(link)
        return ret

    def prepare_params(self, **kw):
        params = super(GetExternalLinks, self).prepare_params(**kw)
        if params.get('elcontinue'):
            params['eloffset'] = params.pop('elcontinue')
        return params
Пример #4
0
class WebRequestOperation(Operation):
    input_field = SingleParam('url')
    output_type = Operation
    _limit = 1

    def __init__(self, input_param, **kw):
        self.client = kw.pop('client', None)
        self.web_client = getattr(self.client,
                                  'web_client',
                                  DEFAULT_WEB_CLIENT)
        self.action = kw.pop('action', 'get')
        self.raise_exc = kw.pop('raise_exc', True)
        if kw:
            raise ValueError('got unexpected keyword arguments: %r'
                             % kw.keys())
        self.set_input_param(input_param)
        self.url = self._input_param
        self.kwargs = kw
        self.results = {}

    def process(self):
        resp = None
        try:
            resp = self.web_client.req(self.action, self.url)
        except Exception as e:
            self.exception = e
            if self.raise_exc:
                raise
            return self
        self.results[self.url] = resp.text
        raise NoMoreResults()
Пример #5
0
class GeoSearch(QueryOperation):
    field_prefix = 'gs'
    input_field = MultiParam('coord')
    fields = [
        StaticParam('list', 'geosearch'),
        SingleParam('radius', 10000),  # must be within 10 and 10000
        #SingleParam('maxdim', 1000),  # does not work?
        SingleParam('globe', 'earth'),  # which planet? donno...
        SingleParam('namespace'),
        StaticParam('gsprop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(('37.8197', '-122.479'), 1)]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp['geosearch']:
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            coord_ident = CoordinateIdentifier(pid_dict, page_ident)
            ret.append(coord_ident)
        return ret
Пример #6
0
class GetCurrentContent(QueryOperation):
    """
    Fetch full content for current (top) revision.
    """
    input_field = MultiParam('titles', key_prefix=False, attr='title')
    field_prefix = 'rv'
    fields = [
        StaticParam('prop', 'revisions'),
        MultiParam('prop', DEFAULT_PROPS + '|content'),
        SingleParam('parse', False),
        SingleParam('redirects', True, key_prefix=False)
    ]
    examples = [
        OperationExample('This page does not exist'),
        OperationExample('Coffee')
    ]
    output_type = Revision

    def extract_results(self, query_resp):
        ret = []
        #redirect_list = query_resp.get('redirects', [])  # TODO
        #redirects = dict([(r['from'], r['to']) for r in redirect_list])
        requested_title = self.input_param
        is_parsed = self.kwargs.get('rvparse', False)

        pages = query_resp.get('pages', {})
        for page_id, pid_dict in pages.iteritems():
            if int(page_id) < 0:
                continue
            rev_dict = dict(pid_dict)
            rev_dict.update(pid_dict['revisions'][0])
            revision = Revision.from_query(rev_dict,
                                           source=self.source,
                                           is_parsed=is_parsed)
            revision.req_title = requested_title
            ret.append(revision)
        return ret
Пример #7
0
class GetFeedbackV5(QueryOperation):
    """
    article feedback v5 breaks standards in a couple ways.
      * the various v5 APIs use different prefixes (af/afvf)
      * it doesn't put its results under 'query', requiring a custom
      post_process_response()
    """
    field_prefix = 'afvf'
    input_field = SingleParam('pageid')
    fields = [
        StaticParam('list', 'articlefeedbackv5-view-feedback'),
        SingleParam('filter', default='featured')
    ]
    output_type = list
    examples = [OperationExample('604727')]

    def post_process_response(self, response):
        if not response.results:
            return {}
        return dict(response.results)

    def extract_results(self, query_resp):
        count = query_resp['articlefeedbackv5-view-feedback']['count']
        return ['TODO'] * int(count)
Пример #8
0
class GetTranscludes(QueryOperation):
    input_field = SingleParam('title', val_prefix='Template:')
    field_prefix = 'gei'
    fields = [
        StaticParam('generator', 'embeddedin'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Template:ArticleHistory')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp.get('pages', {}).items():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Пример #9
0
class GetUserContribs(QueryOperation):
    field_prefix = 'uc'
    input_field = SingleParam('user')
    fields = [
        StaticParam('list', 'usercontribs'),
        StaticParam('ucprop', DEFAULT_PROPS)
    ]
    output_type = [RevisionInfo]
    examples = [OperationExample('Jimbo Wales')]

    def extract_results(self, query_resp):
        ret = []
        for rev_dict in query_resp.get('usercontribs', []):
            user_contrib = RevisionInfo.from_query(rev_dict,
                                                   source=self.source)
            ret.append(user_contrib)
        return ret
Пример #10
0
class GetCoordinates(QueryOperation):
    field_prefix = 'co'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('prop', 'coordinates'),
        SingleParam('primary', 'all'),  # primary, secondary, all
        MultiParam('prop', DEFAULT_COORD_PROPS)
    ]
    output_type = [CoordinateIdentifier]
    examples = [OperationExample(['White House', 'Mount Everest'])]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageIdentifier.from_query(pid_dict,
                                                   source=self.source)
            for coord in pid_dict['coordinates']:
                coord_ident = CoordinateIdentifier(coord, page_ident)
            ret.append(coord_ident)
        return ret
Пример #11
0
class GetBacklinks(QueryOperation):
    """
    Fetch page's incoming links from other pages on source wiki.
    """
    field_prefix = 'gbl'
    input_field = SingleParam('title')
    fields = [
        StaticParam('generator', 'backlinks'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp.get('pages', {}).iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Пример #12
0
class GetLinks(QueryOperation):
    """
    Fetch page's outgoing links to other pages on source wiki.
    """
    field_prefix = 'gpl'
    input_field = SingleParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'links'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Coffee'), OperationExample('Aabach')]

    def extract_results(self, query_resp):
        ret = []
        for pid, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_info)
        return ret
Пример #13
0
class GetInterwikiLinks(QueryOperation):
    """
    Fetch pages' interwiki links.
    """
    field_prefix = 'iw'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'iwlinks'), SingleParam('url', True)]
    output_type = [InterwikiLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for iwd in pid_dict.get('iwlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = iwd.get('url')
                cur_dict['prefix'] = iwd.get('prefix')
                link = InterwikiLink.from_query(cur_dict)
                ret.append(link)
        return ret
Пример #14
0
class GetCategory(QueryOperation):
    """
    Fetch the members in category.
    """
    field_prefix = 'gcm'
    input_field = SingleParam('title', val_prefix='Category:')
    fields = [
        StaticParam('generator', 'categorymembers'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection'),
        MultiParam('namespace')
    ]
    output_type = [PageInfo]
    examples = [OperationExample('Featured_articles')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_ident = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page_ident)
        return ret
Пример #15
0
class GetCategoryList(QueryOperation):
    """
    Fetch the categories containing pages.
    """
    field_prefix = 'gcl'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [
        StaticParam('generator', 'categories'),
        StaticParam('prop', 'categoryinfo'),
        SingleParam('gclshow', '')
    ]  # hidden, !hidden
    output_type = [CategoryInfo]
    examples = [OperationExample('Physics')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            cat_info = CategoryInfo.from_query(pid_dict, source=self.source)
            if cat_info.page_id < 0:
                continue
            ret.append(cat_info)
        return ret
Пример #16
0
class GetQueryPage(QueryOperation):
    field_prefix = 'gqp'
    input_field = SingleParam('page')
    fields = [
        StaticParam('generator', 'querypage'),
        StaticParam('prop', 'info'),
        StaticParam('inprop', 'subjectid|talkid|protection')
    ]
    output_type = PageInfo

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page = PageInfo.from_query(pid_dict, source=self.source)
            ret.append(page)
        return ret

    def prepare_params(self, **kw):
        params = super(GetQueryPage, self).prepare_params(**kw)
        if params.get('gqpcontinue'):
            params['gqpoffset'] = params.pop('ggqpcontinue')
        return params
Пример #17
0
class GetSubcategoryInfos(QueryOperation):
    """
    Fetch `CategoryInfo` for category, used to count the members of
    sub-categories.
    """
    field_prefix = 'gcm'
    input_field = SingleParam('title', val_prefix='Category:')
    fields = [
        StaticParam('generator', 'categorymembers'),
        StaticParam('prop', 'categoryinfo'),
        StaticParam('gcmtype', 'subcat')
    ]
    output_type = [CategoryInfo]
    examples = [OperationExample('FA-Class_articles')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            pid_dict.update(pid_dict.get('categoryinfo', {}))
            cat_info = CategoryInfo.from_query(pid_dict, source=self.source)
            if cat_info.page_id < 0:
                continue
            ret.append(cat_info)
        return ret
Пример #18
0
class GetLanguageLinks(QueryOperation):
    """
    Fetch pages' interlanguage links (aka "Language Links" in the MediaWiki
    API). Interlanguage links should correspond to pages on another language
    wiki. Mostly useful on a source wiki with a family of similar multilingual
    projects, such as Wikipedias.
    """
    field_prefix = 'll'
    input_field = MultiParam('titles', key_prefix=False)
    fields = [StaticParam('prop', 'langlinks'), SingleParam('url', True)]
    output_type = [LanguageLink]
    examples = [OperationExample('Coffee')]

    def extract_results(self, query_resp):
        ret = []
        for pid_dict in query_resp.get('pages', {}).values():
            for ld in pid_dict.get('langlinks', []):
                cur_dict = dict(pid_dict)
                cur_dict['source'] = self.source
                cur_dict['url'] = ld.get('*')
                cur_dict['language'] = ld.get('lang')
                link = LanguageLink.from_query(cur_dict)
                ret.append(link)
        return ret
Пример #19
0
class GetRandom(QueryOperation):
    """
    Fetch random pages using MediaWiki's Special:Random.
    """
    field_prefix = 'grn'
    fields = [StaticParam('generator', 'random'),
              StaticParam('prop', 'info'),
              StaticParam('inprop', 'subjectid|talkid|protection'), 
              SingleParam('namespace', default='', coerce=coerce_namespace)]
    input_field = None
    output_type = [PageInfo]
    per_query_limit = QueryLimit(10, 20)
    examples = [OperationExample(doc='basic random')]

    def extract_results(self, query_resp):
        ret = []
        for k, pid_dict in query_resp['pages'].iteritems():
            page_info = PageInfo.from_query(pid_dict,
                                            source=self.source)
            ret.append(page_info)
        return ret

    def get_cont_str(self, *a, **kw):
        return ''
Пример #20
0
class MediaWikiCall(Operation):
    """
    Sets up actual API HTTP request, makes the request, encapsulates
    error handling, and stores results.
    """
    input_field = SingleParam('url_params')  # param_type=dict)
    output_type = Operation

    _limit = 1

    def __init__(self, params, **kw):
        # These settings will all go on the WapitiClient
        self.raise_exc = kw.pop('raise_exc', True)
        self.raise_err = kw.pop('raise_err', True)
        self.raise_warn = kw.pop('raise_warn', False)
        self.client = kw.pop('client')
        self.web_client = getattr(self.client,
                                     'web_client',
                                     DEFAULT_WEB_CLIENT)
        if kw:
            raise ValueError('got unexpected keyword arguments: %r'
                             % kw.keys())
        self.api_url = self.client.api_url
        params = params or {}
        self.params = dict(BASE_API_PARAMS)
        self.params.update(params)
        self.action = params['action']

        self.url = ''
        self.results = None
        self.servedby = None
        self.exception = None
        self.error = None
        self.error_code = None
        self.warnings = []

        self._input_param = params

    def process(self):
        # TODO: add URL to all exceptions
        resp = None
        try:
            resp = self.web_client.get(self.api_url, self.params)
        except Exception as e:
            # TODO: log
            self.exception = e  # TODO: wrap
            if self.raise_exc:
                raise
            return self
        finally:
            self.url = getattr(resp, 'url', '')

        try:
            self.results = json.loads(resp.text)
        except Exception as e:
            self.exception = e  # TODO: wrap
            if self.raise_exc:
                raise
            return self
        self.servedby = self.results.get('servedby')

        error = self.results.get('error')
        if error:
            self.error = error.get('info')
            self.error_code = error.get('code')

        warnings = self.results.get('warnings', {})
        for mod_name, warn_dict in warnings.items():
            warn_str = '%s: %s' % (mod_name, warn_dict.get('*', warn_dict))
            self.warnings.append(warn_str)

        if self.error and self.raise_err:
            raise WapitiException(self.error_code)
        if self.warnings and self.raise_warn:
            raise WapitiException('warnings: %r' % self.warnings)
        return self

    @property
    def notices(self):
        ret = []
        if self.exception:
            ret.append(self.exception)
        if self.error:
            ret.append(self.error)
        if self.warnings:
            ret.extend(self.warnings)
        return ret

    @property
    def remaining(self):
        if self.done:
            return 0
        return 1