def page_iter(lang='en', paginate=10000000, desired=None): events, conn = get_events_table() count_query = select([func.count(events.c.id)], events.c.lang == lang) s = select([events.c.title, events.c.data, events.c.talk, events.c.total_editors, events.c.bot_editors, events.c.anonymous_editors], events.c.lang == lang).order_by( events.c.title, events.c.talk).limit(paginate) ## searching only desired pages if desired: s = s.where(events.c.title.in_(desired)) count_query = count_query.where(events.c.title.in_(desired)) count = conn.execute(count_query).fetchall()[0][0] print 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: yield (row[0], deserialize(decompress(b64decode(row[1]))), row[2], row[3], row[4], row[5])
def page_iter(lang='en', paginate=10000000, desired=None): events, conn = get_events_table() count_query = select([func.count(events.c.id)], events.c.lang == lang) s = select([ events.c.title, events.c.data, events.c.talk, events.c.total_editors, events.c.bot_editors, events.c.anonymous_editors ], events.c.lang == lang).order_by(events.c.title, events.c.talk).limit(paginate) ## searching only desired pages if desired: s = s.where(events.c.title.in_(desired)) count_query = count_query.where(events.c.title.in_(desired)) count = conn.execute(count_query).fetchall()[0][0] print 'PAGES:', count for offset in xrange(0, count, paginate): rs = conn.execute(s.offset(offset)) for row in rs: yield (row[0], deserialize(decompress(b64decode(row[1]))), row[2], row[3], row[4], row[5])
def __init__(self, **kwargs): super(HistoryEventsPageProcessor, self).__init__(**kwargs) self.queue = [] events, self.connection = get_events_table() self.insert = events.insert()