예제 #1
0
파일: fields.py 프로젝트: ywl/wiki-network
 def to_python(self, value):
     if isinstance(value, dict):
         return value
     else:
         if not value:
             return value
         return wbin.deserialize(decompress(b64decode(value)))
예제 #2
0
def page_iter(lang='en', paginate=10000000, desired=None):
    events, conn = get_events_table()

    count_query = select([func.count(events.c.id)],
               events.c.lang == lang)
    s = select([events.c.title, events.c.data, events.c.talk,
                events.c.total_editors, events.c.bot_editors,
                events.c.anonymous_editors],
                events.c.lang == lang).order_by(
        events.c.title, events.c.talk).limit(paginate)

    ## searching only desired pages
    if desired:
        s = s.where(events.c.title.in_(desired))
        count_query = count_query.where(events.c.title.in_(desired))

    count = conn.execute(count_query).fetchall()[0][0]

    print 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            yield (row[0],
                   deserialize(decompress(b64decode(row[1]))),
                   row[2], row[3], row[4], row[5])
def user_iter(lang='en', paginate=10000000):
    contrib, conn = get_contributions_table()

    count_query = select([func.count(contrib.c.id)],
               contrib.c.lang == lang)
    s = select([contrib],
                contrib.c.lang == lang).order_by(
                    contrib.c.id).limit(paginate)

    count = conn.execute(count_query).fetchall()[0][0]

    #print >>sys.stderr, 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            ## row is a RowProxy object: supports dict and list methods
            ## convert it to dict to use with csv.DictWriter
            v = dict(row)
            del v['id']
            del v['lang']
            v['namespace_edits'] = deserialize(decompress(b64decode(
                v['namespace_edits']
            ))) if v['namespace_edits'] is not None else None
            yield v
예제 #4
0
def user_iter(lang='en', paginate=10000000):
    contrib, conn = get_contributions_table()

    count_query = select([func.count(contrib.c.id)],
               contrib.c.lang == lang)
    s = select([contrib],
                contrib.c.lang == lang).order_by(
                    contrib.c.id).limit(paginate)

    count = conn.execute(count_query).fetchall()[0][0]

    #print >>sys.stderr, 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            ## row is a RowProxy object: supports dict and list methods
            ## convert it to dict to use with csv.DictWriter
            v = dict(row)
            del v['id']
            del v['lang']
            v['namespace_edits'] = deserialize(decompress(b64decode(
                v['namespace_edits']
            ))) if v['namespace_edits'] is not None else None
            yield v
예제 #5
0
def page_iter(lang='en', paginate=10000000, desired=None):
    events, conn = get_events_table()

    count_query = select([func.count(events.c.id)], events.c.lang == lang)
    s = select([
        events.c.title, events.c.data, events.c.talk, events.c.total_editors,
        events.c.bot_editors, events.c.anonymous_editors
    ], events.c.lang == lang).order_by(events.c.title,
                                       events.c.talk).limit(paginate)

    ## searching only desired pages
    if desired:
        s = s.where(events.c.title.in_(desired))
        count_query = count_query.where(events.c.title.in_(desired))

    count = conn.execute(count_query).fetchall()[0][0]

    print 'PAGES:', count

    for offset in xrange(0, count, paginate):
        rs = conn.execute(s.offset(offset))
        for row in rs:
            yield (row[0], deserialize(decompress(b64decode(row[1]))), row[2],
                   row[3], row[4], row[5])
예제 #6
0
 def _deserialize(self, s):
     if self._des_func is not None:
         return wbin.deserialize(s, self._des_func, (s,), self._des_size)
     else:
         return wbin.deserialize(s)