Exemplo n.º 1
0
def purgquery(falink):
    falink = falink.replace(u' ', u'_')
    total_cache = dict(_cache_old, **_cache)
    if total_cache.get(tuple([falink, 'purgquery'])):
        return total_cache[tuple([falink, 'purgquery'])]
    if falink:
        params = {'action': 'purge', 'titles': falink, 'forcelinkupdate': 1}

        try:
            categoryname = query.GetData(params, faSite)
            for item in categoryname[u'purge']:
                templateha = item[u'title']
                break
        except:
            pass
    enlink = englishdictionry(falink, 'fa', 'en')
    if enlink:
        enlink = enlink.replace(u' ', u'_')
        params = {'action': 'purge', 'titles': enlink, 'forcelinkupdate': 1}
        try:
            categoryname = query.GetData(params, enSite)
            for item in categoryname[u'purge']:
                templateha = item[u'title']
                break
        except:
            pass
    _cache[tuple([falink, 'purgquery'])] = True
Exemplo n.º 2
0
def getUserInfo(user):
    editcount = 0
    if not isIP(user):
        if users.has_key(user):
            editcount = users[user]['editcount']

        if editcount > preferences['newbie']:
            if not random.randint(0,
                                  20):  #avoid update no newbies users too much
                return editcount

        params = {
            'action': 'query',
            'list': 'users',
            'ususers': user,
            'usprop': 'editcount|groups',
        }
        data = query.GetData(params, site=preferences['site'])
        if not 'error' in data.keys():
            editcount = 0
            if 'editcount' in query.GetData(
                    params)['query']['users'][0].keys():
                editcount = int(
                    query.GetData(params)['query']['users'][0]['editcount'])
            groups = []
            if 'groups' in query.GetData(params)['query']['users'][0].keys():
                groups = query.GetData(params)['query']['users'][0]['groups']
            users[user] = {
                'editcount': editcount,
                'groups': groups,
            }

        saveUserInfo()
Exemplo n.º 3
0
def loadGroups():
    #Info about groups: http://www.mediawiki.org/wiki/Manual:User_rights
    global groups

    groups = []
    params = {
        'action': 'query',
        'meta': 'siteinfo',
        'siprop': 'usergroups',
    }
    data = query.GetData(params, site=preferences['site'])
    if not 'error' in data.keys():
        for item in query.GetData(params)['query']['usergroups']:
            groups.append(item['name'])
Exemplo n.º 4
0
 def query(self):
     result = query.GetData(self.params, site=self.site)
     blocklist = result['query']['blocks']
     #Todo: handle possible errors (they will cause KeyError at this time)
     while 'query-continue' in result:
         self.params.update(result['query-continue']['blocks'])
         result = query.GetData(self.params)
         blocklist += result['query']['blocks']
     #Finally we remove possible duplicates. This piece of code may be
     #removed after successful closing of
     #https://bugzilla.wikimedia.org/show_bug.cgi?id=34029
     for b in blocklist:
         if blocklist.count(b) > 1:
             blocklist.pop(blocklist.index(b))
     return blocklist
Exemplo n.º 5
0
    def queryMonths(self):
        months_long = ['january', 'february', 'march', 'april', 'may_long', 'june',
                       'july', 'august', 'september', 'october', 'november', 'december']
        months_short = ['jan', 'feb', 'mar', 'apr', 'may', 'jun',
                        'jul', 'aug', 'sep', 'oct', 'nov', 'dec']

        #one query instead of multiple queries using site.mediawiki_message()
        #can be refactored to use site.mediawiki_message()
        params = {
            'action':     'query',
            'meta':       'allmessages',
            'ammessages': '|'.join(months_long) + '|' + '|'.join(months_short),
            'amlang':     self.site.lang,
        }

        monthsDict = query.GetData(params)['query']['allmessages']

        monthNum2origNames = dict((i, {'short': '', 'long': ''}) for i in range(1, 13))
        origNames2monthNum = dict()

        for el in monthsDict:
            orig, eng = el['*'], el['name']
            try:
                month_num = months_long.index(eng) + 1
                monthNum2origNames[month_num]['long'] = orig
            except ValueError:
                month_num = months_short.index(eng) + 1
                monthNum2origNames[month_num]['short'] = orig

            origNames2monthNum[orig] = month_num

        return monthNum2origNames, origNames2monthNum
Exemplo n.º 6
0
def query_api(data):
    predata = {
        'action': 'query',
        'prop': 'revisions',
    }
    predata = query.CombineParams(predata, data)
    return query.GetData(predata)
Exemplo n.º 7
0
def englishdictionry(enlink, firstsite, secondsite):
    try:
        enlink = unicode(str(enlink), 'UTF-8').replace(u'[[', u'').replace(
            u']]', u'').replace(u'en:', u'').replace(u'fa:', u'')
    except:
        enlink = enlink.replace(u'[[', u'').replace(u']]', u'').replace(
            u'en:', u'').replace(u'fa:', u'')
    enlink = enlink.split(u'#')[0].strip()
    enlink = enlink.replace(u' ', u'_')
    if _cache.get(tuple([enlink, 'englishdictionry'])):
        return _cache[tuple([enlink, 'englishdictionry'])]
    if enlink == u'':
        _cache[tuple([enlink, 'englishdictionry'])] = False
        return False

    site = wikipedia.getSite(firstsite)
    sitesecond = wikipedia.getSite(secondsite)
    params = {
        'action': 'query',
        'prop': 'langlinks',
        'titles': enlink,
        'redirects': 1,
        'lllimit': 500,
    }
    try:
        categoryname = query.GetData(params, site)
        for item in categoryname[u'query'][u'pages']:
            case = categoryname[u'query'][u'pages'][item][u'langlinks']
        for item in case:
            if item[u'lang'] == secondsite:
                intersec = item[u'*']
                break
        result = intersec
        _cache[tuple([enlink, 'englishdictionry'])] = result
        return result
Exemplo n.º 8
0
 def getdata(
     self
 ):  # getdata() returns a dictionnary of the query to api.php?action=query&meta=siteinfo&siprop=statistics
     # This method return data in a dictionnary format.
     # View data with: api.php?action=query&meta=siteinfo&siprop=statistics&format=jsonfm
     params = {
         'action': 'query',
         'meta': 'siteinfo',
         'siprop': 'statistics',
     }
     pywikibot.output("\nQuerying api for json-formatted data...")
     try:
         data = query.GetData(params, self.site, encodeTitle=False)
     except:
         url = self.site.protocol() + '://' + self.site.hostname(
         ) + self.site.api_address()
         pywikibot.output(
             "The query has failed. Have you check the API? Cookies are working?"
         )
         pywikibot.output(u"\n>> \03{lightpurple}%s\03{default} <<" % url)
     if data != None:
         pywikibot.output("Extracting statistics...")
         data = data['query']  # "query" entry of data.
         dict = data['statistics']  # "statistics" entry of "query" dict.
         return dict
Exemplo n.º 9
0
    def RunQuery(self, params):
        while True:
            # Get data
            data = query.GetData(params)

            # Process received data
            yield data

            # Clear any continuations first
            if 'clcontinue' in params: del params['clcontinue']
            if 'plcontinue' in params: del params['plcontinue']

            if 'query-continue' not in data:
                if 'gapcontinue' in params: del params['gapcontinue']
                break

            qc = data['query-continue']
            # First continue properties only, once done, continue with allpages
            if 'categories' in qc or 'links' in qc:
                if 'categories' in qc: params.update(qc['categories'])
                if 'links' in qc: params.update(qc['links'])
            elif 'allpages' in qc:
                params.update(qc['allpages'])
            else:
                raise ValueError(u'Unexpected query-continue values: %s' % qc)
            continue
Exemplo n.º 10
0
    def AddNoSuggestionTitle(self, title):
        if title in self.seenUnresolvedLinks:
            return True
        self.seenUnresolvedLinks.add(title)

        params = {
            'action': 'query',
            'list': 'backlinks',
            'bltitle': title,
            'bllimit': '50',
        }

        data = query.GetData(params)
        cl = 0
        redirs = 0
        if 'backlinks' in data['query']:
            bl = data['query']['backlinks']
            cl = len(bl)
            redirs = len([i for i in bl if 'redirect' in i])

        if cl > 0 and 'query-continue' in data:
            count = '50+'
        else:
            count = str(cl if cl > 0 else 'no backlinks')

        self.AppendLineToLog(
            self.nosuggestions,
            u'* %s (%s%s)' % (self.MakeLink(title), count, u', %d redirects' %
                              redirs if redirs > 0 else u''))
        return False
Exemplo n.º 11
0
    def get_contributions(self, max = -1, ns = None):
        predata = {
            'action': 'query',
            'list': 'usercontribs',
            'uclimit': '500',
            'ucuser': self.site.username(),
        }
        if ns: predata['ucnamespace'] = ns
        if max < 500 and max != -1: predata['uclimit'] = str(max)

        count = 0
        iterator = iter(xrange(0))
        never_continue = False
        while count != max or never_continue:
            try:
                item = iterator.next()
            except StopIteration:
                self.log(u'Fetching new batch of contributions')
                data = query.GetData(predata, self.site)
                if 'error' in data:
                    raise RuntimeError(data['error'])
                if 'query-continue' in data:
                    predata['uccontinue'] = data['query-continue']['usercontribs']
                else:
                    never_continue = True
                iterator = iter(data['query']['usercontribs'])
            else:
                count += 1
                yield item
Exemplo n.º 12
0
def uploadedYesterday(site=None):
    '''
    Return a pagegenerator containing all the pictures uploaded yesterday.
    Should probably copied to somewhere else
    '''
    result = []
    dateformat = "%Y-%m-%dT00:00:00Z"
    today = datetime.utcnow()
    yesterday = today + timedelta(days=-1)

    params = {
        'action': 'query',
        'list': 'logevents',
        'leprop': 'title',
        'letype': 'upload',
        'ledir': 'newer',
        'lelimit': '5000',
        'lestart': yesterday.strftime(dateformat),
        'leend': today.strftime(dateformat)
    }

    data = query.GetData(params, site)
    try:
        for item in data['query']['logevents']:
            result.append(item['title'])
    except IndexError:
        raise NoPage(u'API Error, nothing found in the APIs')
    except KeyError:
        raise NoPage(u'API Error, nothing found in the APIs')

    return pagegenerators.PagesFromTitlesGenerator(result, site)
Exemplo n.º 13
0
    def revert(self, item):
        predata = {
            'action': 'query',
            'titles': item['title'],
            'prop': 'revisions',
            'rvprop': 'ids|timestamp|user|content',
            'rvlimit': '2',
            'rvstart': item['timestamp'],
        }
        data = query.GetData(predata, self.site)

        if 'error' in data:
            raise RuntimeError(data['error'])

        pages = data['query'].get('pages', ())
        if not pages: return False
        page = pages.itervalues().next()
        if len(page.get('revisions', ())) != 2: return False
        rev = page['revisions'][1]

        comment = u'Reverted to revision %s by %s on %s' % (rev['revid'],
            rev['user'], rev['timestamp'])
        if self.comment: comment += ': ' + self.comment

        page = pywikibot.Page(self.site, item['title'])
        pywikibot.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
                         % page.aslink(True, True))
        old = page.get()
        new = rev['*']
        pywikibot.showDiff(old, new)
        page.put(new, comment)
        return comment
Exemplo n.º 14
0
    def sendMail(self, subject=u'', text=u'', ccMe=False):
        if not hasattr(self, '_mailable'):
            self._load()
        if not self._mailable:
            raise UserActionRefuse("This user is not mailable")
        if not self.site().isAllowed('sendemail'):
            raise UserActionRefuse("You don't have permission to send mail")

        if not self.site().has_api() or self.site().versionnumber() < 14:
            return self.sendMailOld(subject, text, ccMe)

        params = {
            'action': 'emailuser',
            'target': self.name(),
            'token': self.site().getToken(),
            'subject': subject,
            'text': text,
        }
        if ccMe:
            params['ccme'] = 1
        result = query.GetData(params, self.site())
        if 'error' in result:
            code = result['error']['code']
            if code == 'usermaildisabled ':
                wikipedia.output("User mail has been disabled")
            #elif code == '':
            #
        elif 'emailuser' in result:
            if result['emailuser']['result'] == 'Success':
                wikipedia.output(u'Email sent.')
                return True
        return False
Exemplo n.º 15
0
    def get_redirect_pageids_via_api(self):
        """Return generator that yields
        page IDs of Pages that are redirects.

        """
        params = {
            'action': 'query',
            'list': 'allpages',
            'apfilterredir': 'redirects',
            'aplimit': self.api_number,
            'apdir': 'ascending',
        }
        for ns in self.namespaces:
            params['apnamespace'] = ns
            if self.api_start:
                params['apfrom'] = self.api_start
            done = False
            while not done:
                pywikibot.output(u'\nRetrieving pages...', newline=False)
                data = query.GetData(params, self.site)
                if 'error' in data:
                    raise RuntimeError("API query error: %s" % data['error'])
                if "limits" in data:  # process aplimit = max
                    params['aplimit'] = int(data['limits']['allpages'])
                for x in data['query']['allpages']:
                    done = self.api_until and x['title'] >= self.api_until
                    if done: break
                    yield x['pageid']
                if not done and 'query-continue' in data:
                    params['apfrom'] = data['query-continue']['allpages'][
                        'apfrom']
                else:
                    break
Exemplo n.º 16
0
def englishdictionry(link, firstsite, secondsite):
    link = link.replace(u' ', u'_')
    total_cache = dict(_cache_old, **_cache)
    if total_cache.get(tuple([link, 'englishdictionry'])):
        return total_cache[tuple([link, 'englishdictionry'])]
    if link == u'':
        _cache[tuple([link, 'englishdictionry'])] = u''
        return u''
    site = wikipedia.getSite(firstsite)
    sitesecond = wikipedia.getSite(secondsite)
    params = {
        'action': 'query',
        'prop': 'langlinks',
        'titles': link,
        'redirects': 1,
        'lllimit': 500,
    }
    try:
        categoryname = query.GetData(params, site)
        for item in categoryname[u'query'][u'pages']:
            case = categoryname[u'query'][u'pages'][item][u'langlinks']
        for item in case:
            if item[u'lang'] == secondsite:
                intersec = item[u'*']
                break
        result = intersec
        if result.find('#') != -1:
            _cache[tuple([link, 'englishdictionry'])] = u''
            return u''
        _cache[tuple([link, 'englishdictionry'])] = result
        return result
Exemplo n.º 17
0
def namespacefinder( enlink ,firstsite):
    try:
        enlink=unicode(str(enlink),'UTF-8').replace(u'[[',u'').replace(u']]',u'').replace(u'en:',u'').replace(u'fa:',u'')
    except:
        enlink=enlink.replace(u'[[',u'').replace(u']]',u'').replace(u'en:',u'').replace(u'fa:',u'') 
    enlink=enlink.replace(u' ',u'_')
    if _cache.get(tuple([enlink,firstsite, 'namespacefinder'])):
        return _cache[tuple([enlink,firstsite, 'namespacefinder'])]
    site = wikipedia.getSite(firstsite)
    params = {
        'action': 'query',
        'prop': 'langlinks',
        'titles': enlink,
        'redirects': 1,
        'lllimit':500,
    }
    a=1    
    if a:
        categoryname = query.GetData(params,site)
        for item in categoryname[u'query'][u'pages']:
            fanamespace=categoryname[u'query'][u'pages'][item]['ns']
        _cache[tuple([enlink,firstsite, 'namespacefinder'])]=fanamespace
        return fanamespace
    else: 
        _cache[tuple([enlink,firstsite, 'namespacefinder'])]=False
        return False
Exemplo n.º 18
0
def englishdictionry(enlink, firstsite='fa', secondsite='en'):
    try:
        enlink = unicode(str(enlink), 'UTF-8').replace(u'[[', u'').replace(
            u']]', u'').replace(u'en:', u'').replace(u'fa:', u'')
    except:
        enlink = enlink.replace(u'[[', u'').replace(u']]', u'').replace(
            u'en:', u'').replace(u'fa:', u'')
    if enlink.find('#') != -1:
        return False
    if enlink == u'':
        return False
    enlink = enlink.replace(u' ', u'_')
    site = wikipedia.getSite(firstsite)
    sitesecond = wikipedia.getSite(secondsite)
    params = {
        'action': 'query',
        'prop': 'langlinks',
        'titles': enlink,
        'redirects': 1,
        'lllimit': 500,
    }
    try:
        categoryname = query.GetData(params, site)
        for item in categoryname[u'query'][u'pages']:
            case = categoryname[u'query'][u'pages'][item][u'langlinks']
        for item in case:
            if item[u'lang'] == secondsite:
                intersec = item[u'*']
                break
        result = intersec
        return result
Exemplo n.º 19
0
def findDuplicateImages(filename):
    '''
    Takes the photo, calculates the SHA1 hash and asks the mediawiki api for a list of duplicates.

    TODO: Add exception handling, fix site thing
    '''
    f = open(filename, 'rb')

    result = []
    hashObject = hashlib.sha1()
    hashObject.update(f.read(-1))
    #f.close()
    sha1Hash = base64.b16encode(hashObject.digest())

    params = {
        'action': 'query',
        'list': 'allimages',
        'aisha1': sha1Hash,
        'aiprop': '',
    }
    data = query.GetData(params,
                         site=wikipedia.getSite(),
                         useAPI=True,
                         encodeTitle=False)

    for image in data['query']['allimages']:
        result.append(image['name'])
    return result
Exemplo n.º 20
0
def check_user_admin(username):
    username = username.replace(u' ', u'_')
    total_cache = dict(_cache_old, **_cache)
    if total_cache.get(tuple([username, 'check_user_admin'])):
        return total_cache[tuple([username, 'check_user_admin'])]
    params = {
        'action': 'query',
        'list': 'users',
        'ususers': username,
        'usprop': 'groups'
    }
    try:
        usernamequery = query.GetData(params, faSite)
        if not 'sysop' in usernamequery[u'query'][u'users'][0][u'groups']:
            _cache[tuple([username, 'check_user_admin'])] = True
            return True
        else:
            if not check_user(
                    username
            ):  # if user is syso-bot like user:rezabot it dosen't care
                _cache[tuple([username, 'check_user_admin'])] = True
                return True
            _cache[tuple([username, 'check_user_admin'])] = Fasle
            return False
    except:
        _cache[tuple([username, 'check_user_admin'])] = True
        return True
Exemplo n.º 21
0
def get_interwikis(link):
    link = link.replace(u' ', u'_')
    total_cache = dict(_cache_old, **_cache)
    if total_cache.get(tuple([link, 'get_interwikis'])):
        return total_cache[tuple([link, 'get_interwikis'])]
    if link.find('#') != -1 or link == u'':
        _cache[tuple([link, 'get_interwikis'])] = []
        return []
    case = []
    try:
        params = {
            'action': 'query',
            'prop': 'langlinks',
            'titles': link,
            'redirects': 1,
            'lllimit': 500,
        }
        pagename = query.GetData(params, faSite)
        for item in pagename[u'query'][u'pages']:
            case = pagename[u'query'][u'pages'][item][u'langlinks']
            _cache[tuple([link, 'get_interwikis'])] = case
            return case
    except:
        _cache[tuple([link, 'get_interwikis'])] = []
        return []
Exemplo n.º 22
0
def loadUsersFromGroup(group):
    global users

    users[group] = {}
    aufrom = '!'
    while aufrom:
        params = {
            'action': 'query',
            'list': 'allusers',
            'augroup': group,
            'aulimit': '500',
            'aufrom': aufrom,
        }
        data = query.GetData(params, site=preferences['site'])
        if not 'error' in data.keys():
            for item in data['query']['allusers']:
                user = item['name']
                users[group][user] = {
                    'editcount': getUserEditcount(user),
                    'groups': getUserGroups(user)
                }
                print user, users[group][user]

        if 'query-continue' in data.keys():
            aufrom = data['query-continue']['allusers']['aufrom']
        else:
            aufrom = ''
Exemplo n.º 23
0
def recentChanges(site=None, delay=0, block=70):
    """
    Return a pagegenerator containing all the images edited in a certain
    timespan. The delay is the amount of minutes to wait and the block is the
    timespan to return images in. Should probably be copied to somewhere else.

    """

    result = []
    dateformat = "%Y-%m-%dT%H:%M:%SZ"
    rcstart = datetime.utcnow() + timedelta(minutes=-delay-block)
    rcend = datetime.utcnow() + timedelta(minutes=-delay)

    params = {
        'action':      'query',
        'list':        'recentchanges',
        'rcstart':     rcstart.strftime(dateformat),
        'rcend':       rcend.strftime(dateformat),
        'rcdir':       'newer',
        'rcnamespace': '6',
        'rcprop':      'title',
        'rcshow':      '!bot',
        'rclimit':     '5000',
        'rctype':      'edit|log',
    }

    data = query.GetData(params, site)
    try:
        for item in data['query']['recentchanges']:
            result.append(item['title'])
    except (IndexError, KeyError):
        raise NoPage(u'API Error, nothing found in the APIs')
    return pagegenerators.PagesFromTitlesGenerator(result, site)
Exemplo n.º 24
0
 def query_results(self, **data):
     """Iterate results from API action=query, using data as parameters."""
     querydata = {'action': 'query', 'maxlag': str(pywikibot.config.maxlag)}
     querydata = query.CombineParams(querydata, data)
     if "action" not in querydata or querydata['action'] != 'query':
         raise ValueError(
             "query_results: 'action' set to value other than 'query'")
     waited = 0
     while True:
         try:
             result = query.GetData(querydata, self.site)
             #if data.startswith(u"unknown_action"):
             #    e = {'code': data[:14], 'info': data[16:]}
             #    raise APIError(e)
         except pywikibot.ServerError:
             pywikibot.output(u"Wikimedia Server Error; retrying...")
             time.sleep(5)
             continue
         except ValueError:
             # if the result isn't valid JSON, there must be a server
             # problem.  Wait a few seconds and try again
             # WARNING: if the server is down, this could
             # cause an infinite loop
             pywikibot.output(u"Invalid API response received; retrying...")
             time.sleep(5)
             continue
         if type(result) is dict and "error" in result:
             if result['error']['code'] == "maxlag":
                 print "Pausing due to server lag.\r",
                 time.sleep(5)
                 waited += 5
                 if waited % 30 == 0:
                     pywikibot.output(
                         u"(Waited %i seconds due to server lag.)" % waited)
                 continue
             else:
                 raise APIError(result['error'])
         waited = 0
         if type(result) is list:
             # query returned no results
             return
         assert type(result) is dict, (
             "Unexpected result of type '%s' received." % type(result))
         if "query" not in result:
             # query returned no results
             return
         yield result['query']
         if 'query-continue' in result:
             assert len(result['query-continue'].keys()) == 1, (
                 "More than one query-continue key returned: %s" %
                 result['query-continue'].keys())
             query_type = result['query-continue'].keys()[0]
             assert (query_type in querydata.keys() or
                     query_type in querydata.values()), \
                    "Site returned unknown query-continue type '%s'" \
                    % query_type
             querydata.update(result['query-continue'][query_type])
         else:
             return
Exemplo n.º 25
0
def query_old_api(data):

    predata = {
        'what': 'revisions',
        'rvlimit': '1',
    }
    predata = query.CombineParams(predata, data)
    return query.GetData(predata, useAPI = False)
Exemplo n.º 26
0
def Query_Abus(pasttime, abusnum):
    #http://en.wikipedia.org/w/api.php?action=query&list=abuselog&aflprop=details|ids&format=json&afllimit=1000
    params = {
        'action': 'query',
        'list': 'abuselog',
        'aflprop': 'details|ids',
        'format': 'json',
        'afllimit': 1000,
        'aflend': pasttime
    }
    abus_dict = {}
    abuslog = query.GetData(params, faSite)
    for item in abuslog[u'query'][u'abuselog']:
        abus_id = item['filter_id']
        log_id = str(item['id'])
        item = item['details']
        if abus_id == str(abusnum):  #abus number
            #wikipedia.output(u'-----------------------------------------')
            #wikipedia.output(str(item))
            user_editcount = item["user_editcount"]
            if not user_editcount.strip():
                user_editcount = 0
            else:
                user_editcount = int(user_editcount)
            user_name = item["user_name"].replace(u'\r', u'')
            user_age = 1000  #int(item["user_age"])
            user_groups = item["user_groups"]
            user_mobile = item["user_mobile"]
            article_articleid = item["article_articleid"]
            article_namespace = int(item["article_namespace"])
            article_text = item["article_text"].replace(u'\r', u'')
            article_prefixedtext = item["article_prefixedtext"]
            article_recent_contributors = item[
                "article_recent_contributors"].replace(u'\r', u'').split(u'\n')
            action = item["action"]
            summary = item["summary"].replace(u'\r', u'')
            old_wikitext = item["old_wikitext"].replace(u'\r', u'')
            new_wikitext = item["new_wikitext"].replace(u'\r', u'')
            edit_diff = item["edit_diff"].replace(u'\r', u'')
            new_size = int(item["new_size"])
            old_size = int(item["old_size"])
            edit_delta = int(item["edit_delta"])
            added_lines = item["added_lines"].replace(u'\r', u'')
            removed_lines = item["removed_lines"].replace(u'\r', u'')
            tor_exit_node = item["tor_exit_node"]
            timestamp = int(item["timestamp"])
            abus_dict[log_id] = [
                user_editcount, user_name, user_age, user_groups, user_mobile,
                article_articleid, article_namespace, article_text,
                article_prefixedtext, article_recent_contributors, action,
                summary, old_wikitext, new_wikitext, edit_diff, new_size,
                old_size, edit_delta, added_lines, removed_lines,
                tor_exit_node, timestamp, abus_id
            ]
    return abus_dict
Exemplo n.º 27
0
    def contributions(self, limit=500, namespace=[]):
        """ Yield tuples describing this user edits with an upper bound of
        'limit'. Each tuple is composed of a pywikibot.Page object,
        the revision id (int), the edit timestamp and the comment (unicode).
        Pages returned are not guaranteed to be unique.

        @param limit: limit result to this number of pages
        @type limit: int
        @param namespace: only iterate links in these namespaces
        @type namespace: list
        """
        if not self.site().has_api():
            raise NotImplementedError

        params = {
            'action': 'query',
            'list': 'usercontribs',
            'ucuser': self.name(),
            'ucprop': ['ids', 'title', 'timestamp',
                       'comment'],  # 'size','flags'],
            'uclimit': limit,
            'ucdir': 'older',
        }
        if limit > pywikibot.config.special_page_limit:
            params['uclimit'] = pywikibot.config.special_page_limit
            if limit > 5000 and self.site().isAllowed('apihighlimits'):
                params['uclimit'] = 5000
        if namespace:
            params['ucnamespace'] = namespace
        # An user is likely to contribute on several pages,
        # keeping track of titles
        nbresults = 0
        while True:
            pywikibot.output(u'Retrieving %s user contributions from %s...' %
                             (params['uclimit'], self.site()))
            result = query.GetData(params, self.site())
            if 'error' in result:
                pywikibot.output('%s' % result)
                raise pywikibot.Error
            for contrib in result['query']['usercontribs']:
                ts = pywikibot.parsetime2stamp(contrib['timestamp'])
                yield (pywikibot.Page(self.site(),
                                      contrib['title'],
                                      defaultNamespace=contrib['ns']),
                       contrib['revid'], ts, contrib.get('comment', None))
                nbresults += 1
                if nbresults >= limit:
                    break
            if 'query-continue' in result and nbresults < limit:
                params['ucstart'] = result['query-continue']['usercontribs'][
                    'ucstart']
            else:
                break
        return
Exemplo n.º 28
0
def Check_Page_Exists(page_link):
    page_link = page_link.replace(u' ', u'_')

    params = {'action': 'query', 'prop': 'info', 'titles': page_link}
    query_page = query.GetData(params, faSite)
    try:
        for i in query_page[u'query'][u'pages']:
            redirect_link = query_page[u'query'][u'pages'][i]['pageid']
            return False  # page existed
    except:
        return True  # page not existed
def getRollbackToken():
    params = {'action': 'query', 'meta': 'tokens', 'type': 'rollback'}
    try:
        data = query.GetData(params)
        if 'error' in data:
            raise RuntimeError('%s' % data['error'])
        elif 'warnings' in data:
            raise RuntimeError('%s' % data['warnings'])
        return data['query']['tokens']['rollbacktoken']
    except KeyError:
        raise ServerError("The APIs don't return data, the site may be down")
def redirect_find(page_link):
    page_link = page_link.replace(u' ', u'_')
    params = {'action': 'query', 'redirects': "", 'titles': page_link}
    query_page = query.GetData(params, faSite)
    try:
        redirect_link = query_page[u'query'][u'redirects'][0]['to']
        return True
    except:
        if 'missing=""' in str(query_page):
            return True
        else:
            return False