def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl(settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", {"min":min, "max":max}): if param.allow_private_bugs: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl(**{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error("Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl( settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", { "min": min, "max": max }): if param.allow_private_bugs: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl( **{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error( "Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)