예제 #1
0
def GetData(params, site = None, useAPI = True, retryCount = 5, encodeTitle = True, sysop = False, back_response = False):
    """Get data from the query api, and convert it into a data object
    """
    if not site:
        site = wikipedia.getSite()
    data = {}
    titlecount = 0

    for k,v in params.iteritems():
        if k == u'file':
            data[k] = v
        elif type(v) == list:
            if k in [u'titles', u'pageids', u'revids', u'ususers'] and len(v) > 10:
                # Titles param might be long, case convert it to post request
                titlecount = len(params[k])
                data[k] = unicode(ListToParam(v))
            else:
                params[k] = unicode(ListToParam(v))

        elif not isinstance(v,basestring):
            params[k] = unicode(v)
        elif type(v) == unicode:
            params[k] = ToUtf8(v)

    if 'format' not in params or params['format'] != u'json':
        params['format'] = u'json'

    if not useAPI:
        params['noprofile'] = ''

    if data:
        for k in data:
            del params[k]

    if wikipedia.verbose: #dump params info.
        wikipedia.output(u"==== API action:%s ====" % params[u'action'])
        if data and 'file' not in data:
            wikipedia.output(u"%s: (%d items)" % (data.keys()[0], titlecount ) )

        for k, v in params.iteritems():
            if k not in ['action', 'format', 'file', 'xml', 'text']:
                if k == 'lgpassword' and wikipedia.verbose == 1:
                    v = u'XXXXX'
                elif not isinstance(v, unicode):
                    v = v.decode('utf-8')
                wikipedia.output(u"%s: %s" % (k, v) )
        wikipedia.output(u'-' * 16 )


    postAC = [
        'edit', 'login', 'purge', 'rollback', 'delete', 'undelete', 'protect', 'parse',
        'block', 'unblock', 'move', 'emailuser','import', 'userrights', 'upload', 'patrol'
    ]
    if useAPI:
        if params['action'] in postAC:
            path = site.api_address()
            cont = ''
        else:
            path = site.api_address() + site.urlEncode(params.items())

    else:
        path = site.query_address() + site.urlEncode(params.items())

    if wikipedia.verbose:
        if titlecount > 1:
            wikipedia.output(u"Requesting %d %s from %s" % (titlecount, data.keys()[0], site))
        else:
            wikipedia.output(u"Requesting API query from %s" % site)

    lastError = None
    retry_idle_time = 1

    while retryCount >= 0:
        try:
            jsontext = "Nothing received"
            if params['action'] == 'upload' and ('file' in data):
                import upload
                res, jsontext = upload.post_multipart(site, path, params.items(),
                  (('file', params['filename'].encode(site.encoding()), data['file']),),
                  site.cookies(sysop=sysop)
                  )
            elif params['action'] in postAC:
                res, jsontext = site.postForm(path, params, sysop, site.cookies(sysop = sysop) )
            else:
                if back_response:
                    res, jsontext = site.getUrl( path, retry=True, data=data, sysop=sysop, back_response=True)
                else:
                    jsontext = site.getUrl( path, retry=True, sysop=sysop, data=data)

            # This will also work, but all unicode strings will need to be converted from \u notation
            # decodedObj = eval( jsontext )

            jsontext = json.loads( jsontext )

            if "error" in jsontext:
                errorDetails = jsontext["error"]
                if errorDetails["code"] == 'badtoken':
                    wikipedia.output('Received a bad login token error from the server.  Attempting to refresh.')
                    params['token'] = site.getToken(sysop = sysop, getagain = True)
                    continue

            if back_response:
                return res, jsontext
            else:
                return jsontext

        except ValueError, error:
            if "<title>Wiki does not exist</title>" in jsontext:
                raise wikipedia.NoSuchSite(u'Wiki %s does not exist yet' % site)

            if 'Wikimedia Error' in jsontext: #wikimedia server error
                raise wikipedia.ServerError

            retryCount -= 1
            wikipedia.output(u"Error downloading data: %s" % error)
            wikipedia.output(u"Request %s:%s" % (site.lang, path))
            lastError = error
            if retryCount >= 0:
                wikipedia.output(u"Retrying in %i minutes..." % retry_idle_time)
                time.sleep(retry_idle_time*60)
                # Next time wait longer, but not longer than half an hour
                retry_idle_time *= 2
                if retry_idle_time > 30:
                    retry_idle_time = 30
            else:
                wikipedia.debugDump('ApiGetDataParse', site, str(error) + '\n%s\n%s' % (site.hostname(), path), jsontext)
예제 #2
0
def GetData(params, site=None, useAPI=True, retryCount=config.maxretries,
            encodeTitle=True, sysop=False, back_response=False):
    """Get data from the query api, and convert it into a data object
    """
    if ('action' in params) and pywikibot.simulate and \
       (params['action'] in pywikibot.config.actions_to_block):
        pywikibot.output(
            u'\03{lightyellow}SIMULATION: %s action blocked.\03{default}'
            % params['action'])
        jsontext_dummy = {params['action']: {u'result': u''}}
        if back_response:
            import StringIO
            res_dummy = StringIO.StringIO()
            res_dummy.__dict__.update({u'code': 0, u'msg': u''})
            return res_dummy, jsontext_dummy
        else:
            return jsontext_dummy

    if not site:
        site = pywikibot.getSite()
    data = {}
    titlecount = 0

    for k, v in params.iteritems():
        if k == u'file':
            data[k] = v
        elif type(v) == list:
            if k in [u'titles', u'pageids', u'revids',
                     u'ususers'] and len(v) > 10:
                # Titles param might be long, case convert it to post request
                titlecount = len(params[k])
                data[k] = unicode(ListToParam(v))
            else:
                params[k] = unicode(ListToParam(v))

        elif not isinstance(v, basestring):
            params[k] = unicode(v)
        elif type(v) == unicode:
            params[k] = ToUtf8(v)

    if 'format' not in params or params['format'] != u'json':
        params['format'] = u'json'

    if 'action' in params and params['action'] == 'query' and not (
            'continue' in params or 'rawcontinue' in params):
        params['rawcontinue'] = ''

    if not useAPI:
        params['noprofile'] = ''

    if data:
        for k in data:
            del params[k]

    if pywikibot.verbose:  # dump params info.
        pywikibot.output(u"==== API action:%s ====" % params[u'action'])
        if data and 'file' not in data:
            pywikibot.output(u"%s: (%d items)" % (data.keys()[0], titlecount))

        for k, v in params.iteritems():
            if k not in ['action', 'format', 'file', 'xml', 'text']:
                if k == 'lgpassword' and pywikibot.verbose == 1:
                    v = u'XXXXX'
                elif not isinstance(v, unicode):
                    v = v.decode('utf-8')
                pywikibot.output(u"%s: %s" % (k, v))
        pywikibot.output(u'-' * 16)

    postAC = [
        'edit', 'login', 'purge', 'rollback', 'delete', 'undelete', 'protect',
        'parse', 'block', 'unblock', 'move', 'emailuser', 'import',
        'userrights', 'upload', 'patrol', 'wbcreateclaim', 'wbeditentity',
        'wbremoveclaims'
    ]
    if site.versionnumber() >= 18:
        postAC.append('watch')
    if useAPI:
        if params['action'] in postAC or params['action'][:5] == 'wbset':
            path = site.api_address()
        else:
            path = site.api_address() + site.urlEncode(params.items())

    else:
        path = site.query_address() + site.urlEncode(params.items())

    if pywikibot.verbose:
        if titlecount > 1:
            pywikibot.output(u"Requesting %d %s from %s"
                             % (titlecount, data.keys()[0], site))
        else:
            pywikibot.output(u"Requesting API query from %s" % site)

    lastError = None
    retry_idle_time = 1

    while retryCount >= 0:
        try:
            jsontext = "Nothing received"
            if params['action'] == 'upload' and ('file' in data):
                import upload
                res, jsontext = upload.post_multipart(
                    site, path, params.items(),
                    (('file', params['filename'].encode(site.encoding()),
                      data['file']), ),
                    site.cookies(sysop=sysop))
            elif params['action'] in postAC or params['action'][:5] == 'wbset':
                res, jsontext = site.postForm(path, params, sysop,
                                              site.cookies(sysop=sysop))
            else:
                if back_response:
                    res, jsontext = site.getUrl(path, retry=True, data=data,
                                                sysop=sysop, back_response=True)
                else:
                    jsontext = site.getUrl(path, retry=True, sysop=sysop,
                                           data=data)

            # This will also work, but all unicode strings will need to be
            # converted from \u notation
##            decodedObj = eval(jsontext)

            jsontext = json.loads(jsontext)

            if "error" in jsontext:
                errorDetails = jsontext["error"]
                if errorDetails["code"] == 'badtoken':
                    pywikibot.output('Received a bad login token error from '
                                     'the server.  Attempting to refresh.')
                    params['token'] = site.getToken(sysop=sysop,
                                                    getagain=True)
                    continue

            if back_response:
                return res, jsontext
            else:
                return jsontext

        except ValueError, error:
            if "<title>Wiki does not exist</title>" in jsontext:
                raise pywikibot.NoSuchSite(u'Wiki %s does not exist yet' % site)

            if 'Wikimedia Error' in jsontext:  # wikimedia server error
                raise pywikibot.ServerError

            retryCount -= 1
            pywikibot.output(u"Error downloading data: %s" % error)
            pywikibot.output(u"Request %s:%s" % (site.lang, path))
            lastError = error
            if retryCount >= 0:
                pywikibot.output(u"Retrying in %i minutes..." % retry_idle_time)
                time.sleep(retry_idle_time * 60)
                # Next time wait longer, but not longer than half an hour
                retry_idle_time *= 2
                if retry_idle_time > 30:
                    retry_idle_time = 30
            else:
                pywikibot.debugDump('ApiGetDataParse', site,
                                    str(error) + '\n%s\n%s' % (site.hostname(),
                                                               path),
                                    jsontext)