def login(self, username, password, domain=None): """Login via MediaWiki API @param username: username @type username: unicode @param password: password @type password: unicode @param domain: optional domain @type domain: unicode @returns: True if login succeeded, False otherwise @rtype: bool """ args = { 'action': 'login', 'lgname': username.encode('utf-8'), 'lgpassword': password.encode('utf-8'), 'format': 'json', } if domain is not None: args['lgdomain'] = domain.encode('utf-8') result = utils.fetch_url('%sapi%s' % (self.base_url, self.script_extension), post_data=args, ignore_errors=False, opener=self.opener, ) result = json.loads(result) if 'login' in result and result['login'].get('result') == 'Success': return True return False
def getDiskPath(self, name, size=None): """Return filename for image with given name and size @param name: image name (without namespace, i.e. without 'Image:') @type name: unicode @param size: if given, the image is converted to the given maximum width @type size: int or NoneType @returns: filename of image or None if image could not be found @rtype: basestring """ assert isinstance(name, unicode), 'name must be of type unicode' url = self.getURL(name, size=size) if url is None: return None ext = url.rsplit('.')[-1] if size is not None: ext = '%dpx.%s' % (size, ext) else: ext = '.%s' % ext filename = os.path.join(self.tmpdir, utils.fsescape(name + ext)) if utils.fetch_url(url, ignore_errors=True, output_filename=filename): return filename else: return None
def addLicense(mbook): license_text = utils.fetch_url( LICENSE_URL, ignore_errors=False, expected_content_type='text/x-wiki', ) license_text = unicode(license_text, 'utf-8') license = {'mw_rights_text': license_text, 'name': 'GNU Free Documentation License', } mbook['licenses'] = [license]
def query(self, ignore_errors=True, num_tries=2, **kwargs): args = { 'action': 'query', 'format': 'json', } args.update(**kwargs) for k, v in args.items(): if isinstance(v, unicode): args[k] = v.encode('utf-8') q = urllib.urlencode(args) q = q.replace('%3A', ':') # fix for wrong quoting of url for images q = q.replace('%7C', '|') # fix for wrong quoting of API queries (relevant for redirects) for i in range(num_tries): try: s = time.time() data = utils.fetch_url('%sapi%s?%s' % (self.base_url, self.script_extension, q), ignore_errors=ignore_errors, opener=self.opener, ) elapsed = time.time() - s if elapsed > self.long_request: log.warn('Long request: HTTP request took %f s' % elapsed) if data is not None: break except: if i == num_tries - 1: raise log.warn('Fetching failed. Trying again.') time.sleep(0.5) if ignore_errors and data is None: log.error('Got no data from api%s' % self.script_extension) return None try: data = unicode(data, 'utf-8') if data and data[0] == u'\ufeff': # strip off BOM # Note that a BOM is actually *not allowed* at the beginning of a JSON string # see http://www.ietf.org/rfc/rfc4627.txt, section "3. Encoding" data = data[1:] return json.loads(data)['query'] except KeyError: log.error('Response from api%s did not contain a query result' % self.script_extension) return None except Exception, e: log.error('Got exception: %r' % e) if ignore_errors: return None raise RuntimeError('api%s query failed. Are you sure you specified the correct base URL?' % self.script_extension)
def get_licenses(self): """Return list of licenses @returns: list of dicts with license info @rtype: [dict] """ if 'licenses' not in self.metabook: return [] licenses = [] for license in self.metabook['licenses']: wikitext = '' if license.get('mw_license_url'): wikitext = utils.fetch_url( license['mw_license_url'], ignore_errors=True, expected_content_type='text/x-wiki', ) if wikitext: try: wikitext = unicode(wikitext, 'utf-8') except UnicodeError: wikitext = None else: wikitext = '' if license.get('mw_rights_text'): wikitext = license['mw_rights_text'] if license.get('mw_rights_page'): wikitext += '\n\n[[%s]]' % license['mw_rights_page'] if license.get('mw_rights_url'): wikitext += '\n\n' + license['mw_rights_url'] if not wikitext: continue licenses.append({ 'title': license.get('name', u'License'), 'wikitext': wikitext, }) return licenses
def get_licenses(metabook): """Return list of licenses @returns: list of dicts with license info @rtype: [dict] """ import re from mwlib import utils retval = [] for l in metabook.licenses: wikitext = '' if l.get('mw_license_url'): url = l['mw_license_url'] if re.match(r'^.*/index\.php.*action=raw', url) and 'templates=expand' not in url: url += '&templates=expand' wikitext = utils.fetch_url( url, ignore_errors=True, expected_content_type='text/x-wiki', ) if wikitext: try: wikitext = unicode(wikitext, 'utf-8') except UnicodeError: wikitext = None else: wikitext = '' if l.get('mw_rights_text'): wikitext = l['mw_rights_text'] if l.get('mw_rights_page'): wikitext += '\n\n[[%s]]' % l['mw_rights_page'] if l.get('mw_rights_url'): wikitext += '\n\n' + l['mw_rights_url'] if not wikitext: continue retval.append( license(title=l.get('name', u'License'), wikitext=wikitext)) return retval
def get_licenses(metabook): """Return list of licenses @returns: list of dicts with license info @rtype: [dict] """ import re from mwlib import utils retval = [] for l in metabook.licenses: wikitext = '' if l.get('mw_license_url'): url = l['mw_license_url'] if re.match(r'^.*/index\.php.*action=raw', url) and 'templates=expand' not in url: url += '&templates=expand' wikitext = utils.fetch_url(url, ignore_errors=True, expected_content_type='text/x-wiki', ) if wikitext: try: wikitext = unicode(wikitext, 'utf-8') except UnicodeError: wikitext = None else: wikitext = '' if l.get('mw_rights_text'): wikitext = l['mw_rights_text'] if l.get('mw_rights_page'): wikitext += '\n\n[[%s]]' % l['mw_rights_page'] if l.get('mw_rights_url'): wikitext += '\n\n' + l['mw_rights_url'] if not wikitext: continue retval.append(license(title=l.get('name', u'License'), wikitext=wikitext)) return retval