class GetPageRevisionInfos(QueryOperation): """ Fetch revisions for pages. """ field_prefix = 'rv' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('prop', 'revisions'), MultiParam('prop', DEFAULT_PROPS) ] output_type = [RevisionInfo] examples = [OperationExample('Coffee', 10)] def extract_results(self, query_resp): ret = [] pages = [ p for p in query_resp.get('pages', {}).values() if 'missing' not in p ] for pid_dict in pages: for rev in pid_dict.get('revisions', []): rev_dict = dict(pid_dict) rev_dict.update(rev) rev_info = RevisionInfo.from_query(rev_dict, source=self.source) ret.append(rev_info) return ret
class GetPageInfo(QueryOperation): field_prefix = 'in' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('prop', 'info'), MultiParam('prop', 'subjectid|talkid|protection') ] output_type = PageInfo examples = [OperationExample(['Coffee', 'Category:Africa'])] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_info = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_info) return ret
class GetRevisionContent(GetCurrentContent): input_field = SingleParam('revids', key_prefix=False, attr='rev_id') fields = [ StaticParam('prop', 'revisions'), MultiParam('prop', DEFAULT_PROPS + '|content'), SingleParam('parse', False) ] examples = [OperationExample('539916351')]
class GetCurrentTalkContent(GetCurrentContent): """ The simple prefix behavior means this operation will only work on namespace 0 pages. I wouldn't rely on this operation being around for long. """ input_field = MultiParam('titles', val_prefix='Talk:', key_prefix=False) examples = [ OperationExample('This page does not exist'), OperationExample('Coffee') ]
class GetCoordinates(QueryOperation): field_prefix = 'co' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('prop', 'coordinates'), SingleParam('primary', 'all'), # primary, secondary, all MultiParam('prop', DEFAULT_COORD_PROPS) ] output_type = [CoordinateIdentifier] examples = [OperationExample(['White House', 'Mount Everest'])] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_ident = PageIdentifier.from_query(pid_dict, source=self.source) for coord in pid_dict['coordinates']: coord_ident = CoordinateIdentifier(coord, page_ident) ret.append(coord_ident) return ret
class GetRevisionInfos(GetPageRevisionInfos): """ Fetch information about specific revision. """ input_field = MultiParam('revids', attr='rev_id', key_prefix=False) output_type = RevisionInfo examples = [OperationExample(['538903663', '539916351', '531458383'])] def prepare_params(self, *a, **kw): ret = super(GetRevisionInfos, self).prepare_params() ret.pop(self.field_prefix + 'limit', None) return ret
class GetCurrentContent(QueryOperation): """ Fetch full content for current (top) revision. """ input_field = MultiParam('titles', key_prefix=False, attr='title') field_prefix = 'rv' fields = [ StaticParam('prop', 'revisions'), MultiParam('prop', DEFAULT_PROPS + '|content'), SingleParam('parse', False), SingleParam('redirects', True, key_prefix=False) ] examples = [ OperationExample('This page does not exist'), OperationExample('Coffee') ] output_type = Revision def extract_results(self, query_resp): ret = [] #redirect_list = query_resp.get('redirects', []) # TODO #redirects = dict([(r['from'], r['to']) for r in redirect_list]) requested_title = self.input_param is_parsed = self.kwargs.get('rvparse', False) pages = query_resp.get('pages', {}) for page_id, pid_dict in pages.iteritems(): if int(page_id) < 0: continue rev_dict = dict(pid_dict) rev_dict.update(pid_dict['revisions'][0]) revision = Revision.from_query(rev_dict, source=self.source, is_parsed=is_parsed) revision.req_title = requested_title ret.append(revision) return ret
class GetProtections(QueryOperation): field_prefix = 'in' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'info'), StaticParam('inprop', 'protection')] output_type = ProtectionInfo examples = [ OperationExample('Coffee'), OperationExample('Coffee|House'), OperationExample(['Coffee', 'House']) ] def extract_results(self, query_resp): ret = [] for page_id, page in query_resp['pages'].iteritems(): ret.append(ProtectionInfo(page['protection'])) return ret
class GetTemplates(QueryOperation): field_prefix = 'gtl' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('generator', 'templates'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection') ] output_type = [PageInfo] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_ident = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_ident) return ret
class GetSourceInfo(QueryOperation): """ Fetch meta site information about the source wiki. The default properties include: - General source information: Main Page, base, sitename, generator, phpversion, phpsapi, dbtype, dbversion, case, rights, lang, fallback, fallback8bitEncoding, writeapi, timezone, timeoffset, articlepath, scriptpath, script, variantarticlepath, server, wikiid, time, misermode, maxuploadsize - Namespace map - Interwiki map - Statistics: pages, articles, edits, images, users, activeusers, admins, jobs """ field_prefix = 'si' input_field = None fields = [StaticParam('meta', 'siteinfo'), MultiParam('prop', DEFAULT_PROPS)] output_type = SourceInfo def extract_results(self, query_resp): ret = query_resp['general'] namespaces = query_resp.get('namespaces', {}) interwikis = query_resp.get('interwikimap', {}) ns_map = [] for ns, ns_dict in namespaces.iteritems(): ns_map.append(NamespaceDescriptor(ns_dict.get('id'), ns_dict.get('*'), ns_dict.get('canonical'))) iw_map = [] for iw in interwikis: iw_map.append(InterwikiDescriptor(iw.get('prefix'), iw.get('url'), iw.get('language'))) ret['namespace_map'] = tuple(ns_map) ret['interwiki_map'] = tuple(iw_map) ret.update(query_resp['statistics']) source_info = SourceInfo(**ret) return [source_info]
class GetLinks(QueryOperation): """ Fetch page's outgoing links to other pages on source wiki. """ field_prefix = 'gpl' input_field = SingleParam('titles', key_prefix=False) fields = [ StaticParam('generator', 'links'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection'), MultiParam('namespace') ] output_type = [PageInfo] examples = [OperationExample('Coffee'), OperationExample('Aabach')] def extract_results(self, query_resp): ret = [] for pid, pid_dict in query_resp['pages'].iteritems(): page_info = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_info) return ret
class GetImages(QueryOperation): """ Fetch the images embedded on pages. """ field_prefix = 'gim' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('generator', 'images'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection')] output_type = [PageInfo] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid, pid_dict in query_resp['pages'].iteritems(): if pid.startswith('-'): pid_dict['pageid'] = None # TODO: breaks consistency :/ page_ident = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_ident) return ret
class GetCategory(QueryOperation): """ Fetch the members in category. """ field_prefix = 'gcm' input_field = SingleParam('title', val_prefix='Category:') fields = [ StaticParam('generator', 'categorymembers'), StaticParam('prop', 'info'), StaticParam('inprop', 'subjectid|talkid|protection'), MultiParam('namespace') ] output_type = [PageInfo] examples = [OperationExample('Featured_articles')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): page_ident = PageInfo.from_query(pid_dict, source=self.source) ret.append(page_ident) return ret
class GetInterwikiLinks(QueryOperation): """ Fetch pages' interwiki links. """ field_prefix = 'iw' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'iwlinks'), SingleParam('url', True)] output_type = [InterwikiLink] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp.get('pages', {}).values(): for iwd in pid_dict.get('iwlinks', []): cur_dict = dict(pid_dict) cur_dict['source'] = self.source cur_dict['url'] = iwd.get('url') cur_dict['prefix'] = iwd.get('prefix') link = InterwikiLink.from_query(cur_dict) ret.append(link) return ret
class GetImageInfos(QueryOperation): field_prefix = 'ii' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'imageinfo'), StaticParam('iiprop', IMAGE_INFO_PROPS)] output_type = [ImageInfo] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): if int(k) < 0 and pid_dict['imagerepository'] != 'local': pid_dict['pageid'] = 'shared' pid_dict['revid'] = 'shared' try: pid_dict.update(pid_dict.get('imageinfo', [{}])[0]) image_info = ImageInfo.from_query(pid_dict, source=self.source) except ValueError as e: print e continue ret.append(image_info) return ret
class GetCategoryList(QueryOperation): """ Fetch the categories containing pages. """ field_prefix = 'gcl' input_field = MultiParam('titles', key_prefix=False) fields = [ StaticParam('generator', 'categories'), StaticParam('prop', 'categoryinfo'), SingleParam('gclshow', '') ] # hidden, !hidden output_type = [CategoryInfo] examples = [OperationExample('Physics')] def extract_results(self, query_resp): ret = [] for k, pid_dict in query_resp['pages'].iteritems(): cat_info = CategoryInfo.from_query(pid_dict, source=self.source) if cat_info.page_id < 0: continue ret.append(cat_info) return ret
class GeoSearch(QueryOperation): field_prefix = 'gs' input_field = MultiParam('coord') fields = [ StaticParam('list', 'geosearch'), SingleParam('radius', 10000), # must be within 10 and 10000 #SingleParam('maxdim', 1000), # does not work? SingleParam('globe', 'earth'), # which planet? donno... SingleParam('namespace'), StaticParam('gsprop', DEFAULT_COORD_PROPS) ] output_type = [CoordinateIdentifier] examples = [OperationExample(('37.8197', '-122.479'), 1)] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp['geosearch']: page_ident = PageIdentifier.from_query(pid_dict, source=self.source) coord_ident = CoordinateIdentifier(pid_dict, page_ident) ret.append(coord_ident) return ret
class GetLanguageLinks(QueryOperation): """ Fetch pages' interlanguage links (aka "Language Links" in the MediaWiki API). Interlanguage links should correspond to pages on another language wiki. Mostly useful on a source wiki with a family of similar multilingual projects, such as Wikipedias. """ field_prefix = 'll' input_field = MultiParam('titles', key_prefix=False) fields = [StaticParam('prop', 'langlinks'), SingleParam('url', True)] output_type = [LanguageLink] examples = [OperationExample('Coffee')] def extract_results(self, query_resp): ret = [] for pid_dict in query_resp.get('pages', {}).values(): for ld in pid_dict.get('langlinks', []): cur_dict = dict(pid_dict) cur_dict['source'] = self.source cur_dict['url'] = ld.get('*') cur_dict['language'] = ld.get('lang') link = LanguageLink.from_query(cur_dict) ret.append(link) return ret