def user_iter(lang='en', paginate=10000000):
    contrib, conn = get_contributions_table()

    count_query = select([func.count(contrib.c.id)],
               contrib.c.lang == lang)
    s = select([contrib],
                contrib.c.lang == lang).order_by(
                    contrib.c.id).limit(paginate)

    count = conn.execute(count_query).fetchall()[0][0]

    #print >>sys.stderr, 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            ## row is a RowProxy object: supports dict and list methods
            ## convert it to dict to use with csv.DictWriter
            v = dict(row)
            del v['id']
            del v['lang']
            v['namespace_edits'] = deserialize(decompress(b64decode(
                v['namespace_edits']
            ))) if v['namespace_edits'] is not None else None
            yield v
Exemplo n.º 2
0
def user_iter(lang='en', paginate=10000000):
    contrib, conn = get_contributions_table()

    count_query = select([func.count(contrib.c.id)],
               contrib.c.lang == lang)
    s = select([contrib],
                contrib.c.lang == lang).order_by(
                    contrib.c.id).limit(paginate)

    count = conn.execute(count_query).fetchall()[0][0]

    #print >>sys.stderr, 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            ## row is a RowProxy object: supports dict and list methods
            ## convert it to dict to use with csv.DictWriter
            v = dict(row)
            del v['id']
            del v['lang']
            v['namespace_edits'] = deserialize(decompress(b64decode(
                v['namespace_edits']
            ))) if v['namespace_edits'] is not None else None
            yield v
    def __init__(self, namespaces):
        global ATTR_LEN
        super(ContribDict, self).__init__()
        self._namespaces = namespaces
        ATTR_LEN = len(namespaces)
        self._d_namespaces = dict([(name.decode('utf-8'), idx) for idx, (_,
            name) in enumerate(namespaces)])
        self._re_welcome = re2_compile_with_fallback(r'well?come', flags=re.I)
        self._re_npov = re2_compile_with_fallback(r'[ n]pov', flags=re.I)
        self._re_please = re2_compile_with_fallback(r'pl(s|z|ease)',
                                                    flags=re.I)
        self._re_thanks = re2_compile_with_fallback(r'th(ank|anx|x)',
                                                    flags=re.I)
        self._re_revert = re2_compile_with_fallback(r'(revert| rev )',
                                                    flags=re.I)

        contributions, self.connection = get_contributions_table()
        self.insert = contributions.insert()
Exemplo n.º 4
0
    def __init__(self, namespaces):
        global ATTR_LEN
        super(ContribDict, self).__init__()
        self._namespaces = namespaces
        ATTR_LEN = len(namespaces)
        self._d_namespaces = dict([(name.decode('utf-8'), idx)
                                   for idx, (_, name) in enumerate(namespaces)
                                   ])
        self._re_welcome = re2_compile_with_fallback(r'well?come', flags=re.I)
        self._re_npov = re2_compile_with_fallback(r'[ n]pov', flags=re.I)
        self._re_please = re2_compile_with_fallback(r'pl(s|z|ease)',
                                                    flags=re.I)
        self._re_thanks = re2_compile_with_fallback(r'th(ank|anx|x)',
                                                    flags=re.I)
        self._re_revert = re2_compile_with_fallback(r'(revert| rev )',
                                                    flags=re.I)

        contributions, self.connection = get_contributions_table()
        self.insert = contributions.insert()