Пример #1
0
def run_both_etl(db, output_queue, es_comments, param):
    comment_thread = Thread.run("etl comments", etl_comments, db, es_comments, param)
    process_thread = Thread.run("etl", etl, db, output_queue, param)

    result = comment_thread.join()
    if result.exception:
        Log.error("etl_comments had problems", result.exception)

    result = process_thread.join()
    if result.exception:
        Log.error("etl had problems", result.exception)
Пример #2
0
def run_both_etl(db, output_queue, es_comments, param):
    comment_thread = Thread.run("etl comments", etl_comments, db, es_comments,
                                param)
    process_thread = Thread.run("etl", etl, db, output_queue, param)

    result = comment_thread.join()
    if result.exception:
        Log.error("etl_comments had problems", result.exception)

    result = process_thread.join()
    if result.exception:
        Log.error("etl had problems", result.exception)
Пример #3
0
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue):
    with Thread.run("alias_analysis", alias_analysis.main, settings=settings):
        end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
        start = nvl(settings.param.start, 0)
        if resume_from_last_run:
            start = nvl(settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment))

        #############################################################
        ## MAIN ETL LOOP
        #############################################################

        #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD
        # with Multithread([run_both_etl, run_both_etl]) as workers:
        for min, max in Q.intervals(start, end, settings.param.increment):
            if settings.args.quick and min < end - settings.param.increment and min != 0:
                #--quick ONLY DOES FIRST AND LAST BLOCKS
                continue

            try:
                #GET LIST OF CHANGED BUGS
                with Timer("time to get {{min}}..{{max}} bug list", {"min":min, "max":max}):
                    if param.allow_private_bugs:
                        bug_list = Q.select(db.query("""
                            SELECT
                                b.bug_id
                            FROM
                                bugs b
                            WHERE
                                delta_ts >= {{start_time_str}} AND
                                ({{min}} <= b.bug_id AND b.bug_id < {{max}})
                        """, {
                            "min": min,
                            "max": max,
                            "start_time_str": param.start_time_str
                        }), u"bug_id")
                    else:
                        bug_list = Q.select(db.query("""
                            SELECT
                                b.bug_id
                            FROM
                                bugs b
                            LEFT JOIN
                                bug_group_map m ON m.bug_id=b.bug_id
                            WHERE
                                delta_ts >= {{start_time_str}} AND
                                ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND
                                m.bug_id IS NULL
                        """, {
                            "min": min,
                            "max": max,
                            "start_time_str": param.start_time_str
                        }), u"bug_id")

                if not bug_list:
                    continue

                param.bug_list = bug_list
                run_both_etl(**{
                    "db": db,
                    "output_queue": output_queue,
                    "es_comments": es_comments,
                    "param": param.copy()
                })

            except Exception, e:
                Log.error("Problem with dispatch loop in range [{{min}}, {{max}})", {
                    "min": min,
                    "max": max
                }, e)
Пример #4
0
                    b.bug_id
                FROM
                    bugs b
                LEFT JOIN
                    bug_group_map m ON m.bug_id=b.bug_id
                WHERE
                    delta_ts >= {{start_time_str}} AND
                    m.bug_id IS NULL
            """, {
                "start_time_str": param.start_time_str
            }), u"bug_id")

    if not bug_list:
        return

    with Thread.run("alias analysis", alias_analysis.main, settings=settings, bug_list=bug_list):
        Log.note("Updating {{num}} bugs:\n{{bug_list|indent}}", {
            "num": len(bug_list),
            "bug_list": bug_list
        })
        param.bug_list = bug_list
        run_both_etl(**{
            "db": db,
            "output_queue": output_queue,
            "es_comments": es_comments,
            "param": param.copy()
        })


def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue):
    with Thread.run("alias_analysis", alias_analysis.main, settings=settings):
Пример #5
0
def full_etl(resume_from_last_run, settings, param, db, es, es_comments,
             output_queue):
    with Thread.run("alias_analysis", alias_analysis.main, settings=settings):
        end = nvl(settings.param.end,
                  db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id)
        start = nvl(settings.param.start, 0)
        if resume_from_last_run:
            start = nvl(
                settings.param.start,
                Math.floor(get_max_bug_id(es), settings.param.increment))

        #############################################################
        ## MAIN ETL LOOP
        #############################################################

        #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD
        # with Multithread([run_both_etl, run_both_etl]) as workers:
        for min, max in Q.intervals(start, end, settings.param.increment):
            if settings.args.quick and min < end - settings.param.increment and min != 0:
                #--quick ONLY DOES FIRST AND LAST BLOCKS
                continue

            try:
                #GET LIST OF CHANGED BUGS
                with Timer("time to get {{min}}..{{max}} bug list", {
                        "min": min,
                        "max": max
                }):
                    if param.allow_private_bugs:
                        bug_list = Q.select(
                            db.query(
                                """
                            SELECT
                                b.bug_id
                            FROM
                                bugs b
                            WHERE
                                delta_ts >= {{start_time_str}} AND
                                ({{min}} <= b.bug_id AND b.bug_id < {{max}})
                        """, {
                                    "min": min,
                                    "max": max,
                                    "start_time_str": param.start_time_str
                                }), u"bug_id")
                    else:
                        bug_list = Q.select(
                            db.query(
                                """
                            SELECT
                                b.bug_id
                            FROM
                                bugs b
                            LEFT JOIN
                                bug_group_map m ON m.bug_id=b.bug_id
                            WHERE
                                delta_ts >= {{start_time_str}} AND
                                ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND
                                m.bug_id IS NULL
                        """, {
                                    "min": min,
                                    "max": max,
                                    "start_time_str": param.start_time_str
                                }), u"bug_id")

                if not bug_list:
                    continue

                param.bug_list = bug_list
                run_both_etl(
                    **{
                        "db": db,
                        "output_queue": output_queue,
                        "es_comments": es_comments,
                        "param": param.copy()
                    })

            except Exception, e:
                Log.error(
                    "Problem with dispatch loop in range [{{min}}, {{max}})", {
                        "min": min,
                        "max": max
                    }, e)
Пример #6
0
                SELECT
                    b.bug_id
                FROM
                    bugs b
                LEFT JOIN
                    bug_group_map m ON m.bug_id=b.bug_id
                WHERE
                    delta_ts >= {{start_time_str}} AND
                    m.bug_id IS NULL
            """, {"start_time_str": param.start_time_str}), u"bug_id")

    if not bug_list:
        return

    with Thread.run("alias analysis",
                    alias_analysis.main,
                    settings=settings,
                    bug_list=bug_list):
        Log.note("Updating {{num}} bugs:\n{{bug_list|indent}}", {
            "num": len(bug_list),
            "bug_list": bug_list
        })
        param.bug_list = bug_list
        run_both_etl(
            **{
                "db": db,
                "output_queue": output_queue,
                "es_comments": es_comments,
                "param": param.copy()
            })