Пример #1
0
def iter_project_pages() -> Iterator[pywikibot.Page]:
    query = """
        SELECT page_namespace - 1, page_title
        FROM templatelinks
        JOIN page ON tl_from = page_id
        WHERE
            tl_title = "WikiProject_Essays"
            and tl_namespace = 10
            and page_namespace in (3, 5, 13)
        """
    conn = toolforge.connect("enwiki_p")
    with conn.cursor() as cur:
        rows = cur.execute(query)
        logger.info(f"{rows} pages found")
        data = cast(Iterable[Tuple[int, bytes]], cur.fetchall())

    # XXX: Work around pywikibot bug T67262
    namespaces = {2: "User:"******"Wikipedia:", 12: "Help:"}

    progress = -1
    for i, (ns, title) in enumerate(data):
        percent = math.floor(i / rows * 100)
        if (percent > progress) and (percent % 5 == 0):
            logger.info(f"Analyzing pages: {percent}% complete")
            progress = percent
        yield pywikibot.Page(site,
                             title=namespaces[ns] +
                             str(title, encoding="utf-8"))

    logger.info("Analyzing pages: 100% complete")
def iter_pages_and_targets() -> Iterator[Tuple[pywikibot.Page, str]]:
    conn = toolforge.connect("enwiki_p")
    query = """
SELECT
  CONCAT("Talk:", com.page_title) as title,
  CONCAT("Talk:", target.page_title) as target
FROM
  page com
  JOIN page target ON target.page_title = REPLACE(com.page_title, "/Comments", "")
WHERE
  com.page_namespace = 1
  AND target.page_namespace = 1
  AND com.page_title LIKE "%/Comments"
  AND com.page_is_redirect = 0
  AND com.page_len = 0
  AND target.page_is_redirect = 0
"""
    with conn.cursor() as cur:
        cur.execute(query)
        data = cur.fetchall()

    for title, target in data:
        yield (
            pywikibot.Page(site, str(title, encoding="utf-8")),
            str(target, encoding="utf-8").replace("_", " "),
        )
Пример #3
0
def db_get_usage(cat: pywikibot.Category, depth: int) -> UsageResult:
    query = """
SELECT page_title, count(*)
FROM categorylinks
JOIN page ON cl_from = page_id
LEFT JOIN globalimagelinks ON page_title = gil_to
JOIN image ON img_name = page_title
WHERE
    cl_to IN %(cats)s
    AND img_major_mime = "image"
    AND img_minor_mime != "svg+xml"
GROUP BY page_title
ORDER BY count(*) DESC
"""
    conn = toolforge.connect("commonswiki")
    with conn.cursor() as cur:
        total = cur.execute(
            query,
            args={
                "cats": [
                    cat.title(with_ns=False, underscore=True)
                    for cat in list_cats(cat, depth)
                ]
            },
        )
        data = cast(List[Tuple[bytes, int]], cur.fetchall())
    return UsageResult(
        [
            FileUsage(f"File:{str(page, encoding='utf-8')}", count)
            for page, count in data
        ][:200],
        total,
        [],
    )
def users():
    conn = toolforge.connect('commonswiki')
    with conn.cursor() as cur:
        sql = ('SELECT actor_name, count(*) FROM actor_revision JOIN revision '
               'ON rev_actor=actor_id JOIN change_tag ON ct_rev_id=rev_id '
               'join change_tag_def ON ct_tag_id=ctd_id WHERE '
               'ctd_name="OAuth CID: 821" and actor_user>0 group'
               ' by actor_user order by count(*) desc')
        cur.execute(sql)
        data = cur.fetchall()
    users = []
    for user in data:
        rowres = []
        for item in user:
            if isinstance(item, type(b'a')):
                rowres.append(item.decode('utf-8'))
            else:
                rowres.append(item)
        users.append(rowres)
    with conn.cursor() as cur:
        sql = ('SELECT count(*) FROM actor_revision JOIN revision ON '
               'rev_actor=actor_id JOIN change_tag ON ct_rev_id=rev_id'
               ' join change_tag_def ON ct_tag_id=ctd_id WHERE '
               'ctd_name="OAuth CID: 821" AND actor_id>0')
        cur.execute(sql)
        data = cur.fetchall()
    total = data[0][0]
    return flask.render_template('users.html', users=users, total=total)
Пример #5
0
def frwiki_query(countryname, country, language):
    printInfo('fr', countryname, country, language)
    if cached:
        query_res = get_quarry('30043', '0')
        frwikidata = {}

        for line in query_res:
            iws, fr, en, wd = line

            if en is None:
                frwikidata.update({wd: [iws, ['fr', fr.replace('_', ' ')]]})
            else:
                frwikidata.update({wd: [iws, ['en', en]]})

        final2 = sort_list(frwikidata)
        saveToDB(final2, language, country, 'fr')
        return frwikidata
    else:
        query_res = run_query(SQL_frwiki.format(language, country),
                              toolforge.connect('frwiki_p', 'analytics'))
        result_json = [encode_all_items(f) for f in query_res]
        frwikidata = {}

        for line in result_json:
            iws, fr, en, wd = line

            if en is None:
                frwikidata.update({wd: [iws, ['fr', fr.replace('_', ' ')]]})
            else:
                frwikidata.update({wd: [iws, ['en', en]]})

        final2 = sort_list(frwikidata)
        saveToDB(final2, language, countryname, 'fr')
        return frwikidata
Пример #6
0
def enwiki_query(countryname, country, language):
    printInfo('en', countryname, country, language)
    if cached:
        query_res = get_quarry('30043', '1')

        results = {
            f[2]: [f[0], ['en', f[1].replace('_', ' ')]]
            for f in query_res
        }

        final2 = sort_list(results)
        saveToDB(final2, language, country, 'en')
        return results
    else:
        query_res = run_query(SQL_enwiki.format(country, language),
                              toolforge.connect('enwiki_p', 'analytics'))
        result_json = [encode_all_items(f) for f in query_res]

        results = {
            f[2]: [f[0], ['en', f[1].replace('_', ' ')]]
            for f in result_json
        }

        final2 = sort_list(results)
        saveToDB(final2, language, countryname, 'en')

        return results
Пример #7
0
    def test_connect(self):
        tests = [
            (['enwiki_p'], {
                'database': 'enwiki_p',
                'host': 'enwiki.web.db.svc.eqiad.wmflabs',
            }),
            (['enwiki'], {
                'database': 'enwiki_p',
                'host': 'enwiki.web.db.svc.eqiad.wmflabs',
            }),
            (['enwiki', 'analytics'], {
                'database': 'enwiki_p',
                'host': 'enwiki.analytics.db.svc.eqiad.wmflabs',
            }),
            (['enwiki_p', 'labsdb'], {
                'database': 'enwiki_p',
                'host': 'enwiki.labsdb',
            }),
        ]
        common_expects = {
            'read_default_file': os.path.expanduser("~/replica.my.cnf"),
            'charset': 'utf8mb4',
        }

        for args, expects in tests:
            with unittest.mock.patch('toolforge._connect') as mm:
                mm.return_value = None
                exp = common_expects.copy()
                exp.update(expects)

                conn = toolforge.connect(*args)

                assert conn is None
                mm.assert_called_once_with(**exp)
Пример #8
0
def suggest_articles(wiki, limit):
    w = Wiki.query.filter_by(dbname=wiki).first()
    conn = toolforge.connect(wiki)
    with conn.cursor() as cur:
        if len(w.excluded_articles_json) > 0:
            regexp = r'^(%s)' % "|".join(w.excluded_articles_json)
        else:
            regexp = r'^$'
        cur.execute(
            '''select
                page_id,
                page_title,
                page_len/count(*) as bytes_per_link
            from pagelinks
            join page on page_id=pl_from
            where page_len>%s and page_namespace=0
            and page_id not in (select tl_from from templatelinks where tl_title=%s)
            and page_id not in (select pp_page from page_props where pp_propname="disambiguation")
            and page_title not rlike %s
            group by page_id having bytes_per_link>%s
            limit %s;''',
            (w.minimum_length, w.template, regexp, w.treshold, limit))
        data = cur.fetchall()
    for row in data:
        if SuggestedArticle.query.filter_by(
                page_id=row[0]).first() is not None:
            continue
        s = SuggestedArticle(page_id=row[0],
                             bytes_per_link=row[2],
                             wiki_id=w.id)
        db.session.add(s)
        db.session.commit()
Пример #9
0
def check_user_exists(username):
    message = 'User with username:'******'exists'
    success = True
    try:
        conn = toolforge.connect('enwiki')
        with conn.cursor() as cursor:
            # finding how many users with the given username exist
            cursor.execute(
                'SELECT count(*) FROM user WHERE user_name'
                ' = %s ', [str(username)])
            result = cursor.fetchone()
            if result[0] == 0:
                success = False
                message = 'User with username:'******' does not exist'
    except Exception:
        message = 'Error fetching data from database for user: ' +\
                  str(username)
        success = False
    finally:
        cursor.close()
    conn.close()

    logger.info(message)
    return success
def run_query():
    query = """
SELECT
    actor_name as `username`,
    user_editcount as `edit_count`,
    user_registration as `registration_date`,
    NOT(ug_group IS NULL)  as `ext_conf`,
    COUNT(*) as `redwarn_edits`,
    COUNT(*)/user_editcount*100 as `redwarn_pct`,
    ipb_sitewide as `blocked`
FROM revision_userindex
JOIN change_tag ON ct_rev_id = rev_id
JOIN actor_revision ON rev_actor = actor_id
JOIN `user` ON actor_user = user_id
LEFT JOIN user_groups ON ug_user = user_id AND ug_group = "extendedconfirmed"
LEFT JOIN ipblocks ON user_id = ipb_user
WHERE ct_tag_id = 577 -- RedWarn
GROUP BY actor_name
ORDER BY user_registration DESC
"""
    conn = toolforge.connect("enwiki_p")
    with conn.cursor() as cur:
        cur.execute(query)
        data = cur.fetchall()
    for line in data:
        yield Row(
            username=f"[[User:{str(line[0], encoding='utf-8')}]]",
            edit_count=line[1],
            registration_date=datetime.datetime.strptime(
                str(line[2], encoding="utf-8"), "%Y%m%d%H%M%S"),
            ext_conf=bool(line[3]),
            redwarn_edits=line[4],
            redwarn_pct=line[5],
            blocked=bool(line[6]),
        )
Пример #11
0
def get_dump(database=DEFAULT_DATABASE, start=None, end=None, mode=None):
    """Returns result object for bulk aggregation
    
    Args:
        database: name of database to use
        start: Start hour as "2018-10-10T01" or a duration like "1d"
        end: End hour as "2018-10-10T01" or None
        mode: How to manipulate results
            views: (default) Report raw views in ``views`` field
            logprobs: Estimate log probabilities in ``logprobs`` and ``default_logprob`` field
    """
    logger = logging.getLogger(__name__)
    with toolforge.connect(database, cluster="tools") as cursor:
        (start, end) = convert_start_and_end(cursor, start, end)
        logger.info(f"start={start}, end={end}")
        hours = get_hours(cursor, start, end)
        logger.info(f"{len(hours)} hours")
        (max_qid, total_views) = get_summary(cursor, start, end)
        summary = dict(
            start=start,
            end=end,
            hours=hours,
            max_qid=max_qid,
            total_views=total_views,
        )
        data = list(aggregate_by_qid(cursor, start, end))

        return (summary, data)
Пример #12
0
def iter_files():
    conn = toolforge.connect("commonswiki")
    query = """
        SELECT CONCAT("File:", page_title)
        FROM page
        JOIN categorylinks ON cl_from = page_id
        WHERE
            cl_to IN (
                SELECT page_title
                FROM page
                JOIN categorylinks ON cl_from = page_id
                WHERE
                    cl_to = "Images_from_Wiki_Loves_Africa_2020"
                    and cl_type = "subcat"
                    and cl_sortkey != " "
            )
            and page_id NOT IN (
                SELECT cl_from
                FROM categorylinks
                WHERE
                    cl_to = "Images_from_Wiki_Loves_Africa_2020"
            )
            and page_namespace = 6
    """
    with conn.cursor() as cur:
        cur.execute(query)
        for title in cur.fetchall():
            yield pywikibot.Page(site, str(title[0], encoding="utf-8"))
def recentclaims(user, prop_name, limit):
    prop = "%%%s%%" % prop_name
    limit = int(limit)
    conn = toolforge.connect('wikidatawiki')
    with conn.cursor() as cur:
        sql = '''
		select rev_page, page_title, rev_id, rev_comment
		from revision_userindex
		join page on page_id=rev_page
		'''
        if user:
            sql += '''
			where rev_user_text=%s
			and rev_comment like "%%claim-%%" and rev_comment like %s order by rev_timestamp desc limit %s;
			'''
            parameters = (user, prop, limit)
        else:
            sql += 'where rev_comment like "%%claim-%%" and rev_comment like %s order by rev_timestamp desc limit %s;'
            parameters = (prop, limit)
        cur.execute(sql, parameters)
        data = cur.fetchall()
    result = []
    for row in data:
        mid_result = re.search(r"Property:P18\]\]: (.*)$",
                               row[3].decode('utf-8'))
        if not mid_result: continue
        value = mid_result.groups()[0]
        result.append({
            "page_id": row[0],
            "qid": row[1],
            "rev_id": row[2],
            "property": request.args.get('property'),
            "value": value
        })
    return result
Пример #14
0
def main():
    parser = argparse.ArgumentParser(parents=[config.logging_cli()])
    parser.add_argument('--progress',
                        help='log progress every N suspects',
                        type=int,
                        metavar='N')
    parser.add_argument('--list_features',
                        action='store_true',
                        help='print all available features and exit')
    parser.add_argument('--feature',
                        action='append',
                        dest='features',
                        help='''Feature to evaluate.  May be repeated.
                        Features are evaluated in
                        the order given; it is the responsibility of
                        the user to include the required dependencies,
                        and in the right order.  If no features are
                        specified, defaults to all features.  See
                        --list-features to get a list of available
                        features.''')

    args = parser.parse_args()

    if args.list_features:
        print_features()
        return

    features = args.features or ['reg_time',
                                 'first_contrib_time',
                                 'first_contrib_interval',
                                 'live_edit_count',
                                 'deleted_edit_count',
                                 'block_count']
    check_valid_features(features)

    config.configure_logging(args)
    logger = logging.getLogger('get_features')

    logger.info("Starting work")
    logger.info("job-name = %s", args.job_name)
    logger.info("Using features: %s", features)
    start_time = datetime.datetime.now()

    db_connection = toolforge.connect('enwiki')

    count = 0
    for line in sys.stdin:
        initial_data = json.loads(line)

        suspect = Suspect(db_connection, initial_data)
        suspect.add_features(features)
        print(json.dumps(suspect.clean_data()))

        count += 1
        if args.progress and (count % args.progress == 0):
            logger.info("Processed %s suspects", count)
                
    finish_time = datetime.datetime.now()
    elapsed_time = finish_time - start_time
    logger.info("Processed %d suspects in %s", count, elapsed_time)
def users():
    conn = toolforge.connect('commonswiki')
    with conn.cursor() as cur:
        sql = ('select rev_user_text, count(*) from change_tag join revision '
               'on ct_rev_id=rev_id where ct_tag="OAuth CID: 821" and '
               'rev_user>0 group by rev_user order by count(*) desc;')
        cur.execute(sql)
        data = cur.fetchall()
    users = []
    for user in data:
        rowres = []
        for item in user:
            if isinstance(item, b'a'):
                rowres.append(item.decode('utf-8'))
            else:
                rowres.append(item)
        users.append(rowres)
    with conn.cursor() as cur:
        sql = ('select count(*) from change_tag join revision on ct_rev_id='
               'rev_id where ct_tag="OAuth CID: 821" and rev_user>0;')
        cur.execute(sql)
        data = cur.fetchall()
    total = data[0][0]
    return flask.render_template('users.html',
                                 users=users,
                                 total=total,
                                 logged=logged(),
                                 username=getusername())
Пример #16
0
def connect_to_replicas_database(db_name, replicas_port=None):
    """
    Establishes connection to Wikimedia replicas database in Toolforge.
    :param db_name: name of the database.
    :param replicas_port: port for connecting to db through ssh tunneling, if used.
    :return: pymysql.connection to the database.
    """
    try:
        if replicas_port:
            conn = pymysql.connect(
                host="127.0.0.1",
                port=replicas_port,
                user=cfg['user_credits']['user'],
                password=cfg['user_credits']['password'],
                connect_timeout=1000,
            )
            if db_name[-2:] != "_p":
                db_name = db_name + "_p"
            with conn.cursor() as cur:
                cur.execute("use " + db_name)
            conn.commit()
        else:
            conn = toolforge.connect(
                dbname=db_name, connect_timeout=1000, cluster="analytics"
            )

        return conn
    except pymysql.err.OperationalError as err:
        print("Failure: Please establish connection to Toolforge")
        print("Error: ", err)
        exit(1)
Пример #17
0
def calculateUserContributionRank(username):
    results = {'success': True, 'user_rank': -1, 'complete_edit_count': -1}
    message = "User's contribution rank calculated successfully. Results:"
    try:
        conn = toolforge.connect('enwiki')
        with conn.cursor() as cursor:
            # finding how many users have greater editcount
            #  than that of given username
            cursor.execute(
                'SELECT count(*), useredits.user_editcount FROM user, '
                '(SELECT * FROM user WHERE user_name = %s ) AS useredits  '
                'WHERE user.user_editcount > useredits.user_editcount',
                [username])
            result = cursor.fetchone()

    except Exception:
        results['success'] = False
        message = 'Error occurred while calculating contribution ' \
                  'distribution for all users. Results:'
    finally:
        cursor.close()
        conn.close()

    if results['success']:
        results['user_rank'] = result[0] + 1
        results['complete_edit_count'] = result[1]

    logger.info("{0}{1}".format(message, str(results)))
    return results
Пример #18
0
    def test_connect(self):
        tests = [
            (['enwiki_p'], {
                'database': 'enwiki_p',
                'host': 'enwiki.web.db.svc.eqiad.wmflabs',
            }),
            (['enwiki'], {
                'database': 'enwiki_p',
                'host': 'enwiki.web.db.svc.eqiad.wmflabs',
            }),
            (['enwiki', 'analytics'], {
                'database': 'enwiki_p',
                'host': 'enwiki.analytics.db.svc.eqiad.wmflabs',
            }),
            (['enwiki_p', 'labsdb'], {
                'database': 'enwiki_p',
                'host': 'enwiki.labsdb',
            }),
        ]
        common_expects = {
            'read_default_file': os.path.expanduser("~/replica.my.cnf"),
            'charset': 'utf8mb4',
        }

        for args, expects in tests:
            with unittest.mock.patch('toolforge._connect') as mm:
                mm.return_value = None
                exp = common_expects.copy()
                exp.update(expects)

                conn = toolforge.connect(*args)

                assert conn is None
                mm.assert_called_once_with(**exp)
Пример #19
0
def get_user_properties(user: str, dbname: str) -> UserProps:
    """Get signature and fancysig values for a user from the replica db"""
    logger.info("Getting user properties")
    conn = toolforge.connect(f"{dbname}_p")
    with conn.cursor() as cur:
        cur.execute(
            """
            SELECT up_property, up_value
            FROM
                user_properties
            WHERE
                up_user = (SELECT user_id
                           FROM `user`
                           WHERE user_name = %s)
            """,
            [user],
        )
        resultset = cast(List[Tuple[bytes, bytes]], cur.fetchall())
    logger.debug(resultset)

    data: Dict[str, str] = {
        key.decode("utf-8"): value.decode("utf-8")
        for key, value in resultset
    }
    return UserProps(nickname=data.get("nickname", ""),
                     fancysig=bool(int(data.get("fancysig", "0"))))
Пример #20
0
def suggest_articles(wiki, limit):
    w = Wiki.query.filter_by(dbname=wiki).first()
    conn = toolforge.connect(wiki)
    with conn.cursor() as cur:
        treshold = floor(w.bytes_per_link_avg, 2)
        cur.execute(
            '''select page_id, page_title, page_len/count(*) as bytes_per_link from pagelinks
            join page on page_id=pl_from where page_len>%s and page_namespace=0
            group by page_id having bytes_per_link>%s
            limit %s;''', (w.minimum_length, treshold, limit))
        data = cur.fetchall()
    for row in data:
        bpl_min = w.bytes_per_link_avg
        bpl_max = w.bytes_per_link_max + w.tolerance
        if row[2] < bpl_min:
            probability = 0
        elif row[2] > bpl_max:
            probability = 100
        else:
            probability = (row[2] - bpl_min) / bpl_max * 100
        s = SuggestedArticle(page_id=row[0],
                             bytes_per_link=row[2],
                             probability=probability,
                             wiki_id=w.id)
        db.session.add(s)
        db.session.commit()
def revert():
    conn = toolforge.connect('wikidatawiki')
    rev_id = request.args.get('rev_id')
    with conn.cursor() as cur:
        cur.execute('select rev_page from revision where rev_id=%s', rev_id)
    rev_page = cur.fetchall()[0][0]

    payload = {
        "action": "query",
        "format": "json",
        "meta": "tokens",
        "type": "csrf"
    }
    r = requests.get(app.config['API_MWURI'], params=payload, auth=get_auth())
    token = r.json()['query']['tokens']['csrftoken']
    payload = {
        "action": "edit",
        "format": "json",
        "pageid": rev_page,
        "undo": request.args.get('rev_id'),
        "token": token
    }
    r = requests.post(app.config['API_MWURI'], data=payload, auth=get_auth())
    return jsonify({
        'status': 'ok',
        'message': 'reverted',
        'rev_id': rev_id,
        "response": r.json(),
    })
Пример #22
0
def iter_files_and_users(
        days,
        delay_mins=30) -> Iterator[Tuple[pywikibot.Page, pywikibot.Page]]:
    start_ts = ((datetime.datetime.utcnow() -
                 datetime.timedelta(days=days)).replace(
                     hour=0, minute=0, second=0).strftime("%Y%m%d%H%M%S"))
    end_ts = (datetime.datetime.utcnow() -
              datetime.timedelta(minutes=delay_mins)).strftime("%Y%m%d%H%M%S")
    query = """
SELECT
  p0.page_namespace,
  p0.page_title,
  CONCAT("User talk:", actor_name)
FROM
  categorylinks
  JOIN page p0 ON cl_from = p0.page_id
  JOIN logging_logindex
    ON log_page = p0.page_id AND log_type = "upload"
  JOIN actor_logging ON log_actor = actor_id
WHERE
  cl_to = "Files_with_no_machine-readable_license"
  AND log_timestamp > %(start_ts) s
  AND log_timestamp < %(end_ts) s
  AND "Deletion_template_tag" NOT IN (
    SELECT tl_title
    FROM templatelinks
    WHERE
      tl_namespace = 10
      AND tl_from = p0.page_id
  )
  AND (
    log_action = "upload"
    OR actor_id IN (
      SELECT log_actor
      FROM logging_logindex
      WHERE
        log_page = p0.page_id
        AND log_type = "upload"
        AND log_action = "upload"
    )
  )
GROUP BY page_id
ORDER BY actor_id
"""
    conn = toolforge.connect("commonswiki_p", cluster=cluster)
    with conn.cursor() as cur:
        cur.execute(query, args={"start_ts": start_ts, "end_ts": end_ts})
        data = cast(Iterator[Tuple[int, bytes, bytes]], cur.fetchall())
    for ns, title, user in data:
        page = pywikibot.Page(site, title=str(title, encoding="utf-8"), ns=ns)
        user_talk = pywikibot.Page(site, title=str(user, encoding="utf-8"))
        if not page.exists():
            continue
        if user_talk.isRedirectPage():
            user_talk = user_talk.getRedirectTarget()
        yield (
            page,
            user_talk,
        )
Пример #23
0
def getAllWikis():
    conn_meta = toolforge.connect('meta_p')
    results = run_query(
        "SELECT dbname FROM wiki WHERE family='wikipedia' and is_closed=0",
        conn_meta)

    wikipediaList = [encode_if_necessary(f[0]) for f in results]

    return set(wikipediaList)
Пример #24
0
def get_cat_files(category: str) -> int:
    conn = toolforge.connect("commonswiki_p")
    query = """
SELECT cat_files
FROM category
WHERE cat_title = %s
"""
    with conn.cursor() as cur:
        cur.execute(query, args=[category])
        return cur.fetchall()[0]
Пример #25
0
def do_db_query(db_name: str, query: str, **kwargs) -> Any:
    """Uses the toolforge library to query the replica databases"""
    if not wmcs():
        raise ConnectionError("Not running on Toolforge, database unavailable")

    conn = toolforge.connect(db_name)
    with conn.cursor() as cur:
        cur.execute(query, kwargs)
        res = cur.fetchall()
    return res
Пример #26
0
    def _connect(self):
        """
        connect to DB
        """

        orig_conv = pymysql.converters.conversions
        #Adding support for bit data type
        orig_conv[FIELD_TYPE.BIT] = lambda data: data == b'\x01'

        args = {'host': self._HOST, 'conv': orig_conv}
        self.ctx = toolforge.connect(self._get_db_name(), **args)
Пример #27
0
def getAllCurrentWikidataItemsInWikipedia():
    conn_lvwiki = toolforge.connect('etwiki_p', 'analytics')

    currWDitemsQuery = run_query(
        "select pp_value from page_props where pp_propname='wikibase_item'",
        conn_lvwiki)
    currWDitems = [
        int(encode_if_necessary(b[0])[1:]) for b in currWDitemsQuery
    ]

    return set(currWDitems)
Пример #28
0
 def get_page_links(self) -> None:
     page = self.page
     query = """
     SELECT COUNT(pl_from)
     FROM pagelinks
     WHERE pl_title = %s and pl_namespace = %s"""
     conn = toolforge.connect("enwiki_p")
     with conn.cursor() as cur:
         cur.execute(query, (page.title(
             underscore=True, with_ns=False), page.namespace().id))
         self.links = cast(Tuple[Tuple[int]], cur.fetchall())[0][0]
Пример #29
0
def getAllCurrentWikidataItemsInWikipedia():
    conn_lvwiki = toolforge.connect('lvwiki_p', 'analytics')

    currWDitemsQuery = run_query(
        "select pp_value from page_props where pp_propname='wikibase_item'",
        conn_lvwiki)
    currWDitems = [
        int(encode_if_necessary(b[0])[1:]) for b in currWDitemsQuery
    ]
    currWDitems.append(24575438)  #some bug with "Module:Wikidata2" item

    return set(currWDitems)
Пример #30
0
def getDataFromDB(wikiLang):
    conn = toolforge.connect(wikiLang + 'wiki_p', 'analytics')

    queryRES = run_query(sqlQuery, conn)
    queryRES = [[encode_if_necessary(f) for f in fileSaver]
                for fileSaver in queryRES]

    with open('comcat_ex_' + wikiLang + '.txt', 'w',
              encoding='utf-8') as fileS:
        fileS.write(str(queryRES))

    return queryRES
Пример #31
0
    def __init__(self, db="dewiki_p", revert_detector=None):
        self.logger = logging.getLogger("kokolores.creator")

        self.start = self.START_DEFAULT
        self.stop = self.START_DEFAULT

        if revert_detector is None:
            self.revert_detector = RevertDetector(db)
        else:
            self.revert_detector = revert_detector
        self.conn = toolforge.connect(db,
                                      cursorclass=pymysql.cursors.DictCursor)