def verify_no_private_attachments(es, private_attachments): #VERIFY ATTACHMENTS ARE NOT IN OUTPUT for b in Q.select(private_attachments, "bug_id"): versions = compare_es.get_all_bug_versions(es, b) #WE ASSUME THE ATTACHMENT, IF IT EXISTS, WILL BE SOMEWHERE IN THE BUG IT #BELONGS TO, IF AT ALL for v in versions: for a in v.attachments: if a.attach_id in Q.select(private_attachments, "attach_id"): Log.error("Private attachment should not exist")
def get(es, esfilter, fields=None, limit=None): query = struct.wrap({ "query": {"filtered": { "query": {"match_all": {}}, "filter": esfilter }}, "from": 0, "size": nvl(limit, 200000), "sort": [] }) if fields: query.fields=fields results = es.search(query) return Q.select(results.hits.hits, "fields") else: results = es.search(query) return Q.select(results.hits.hits, "_source")
def incremental_etl(settings, param, db, es, es_comments, output_queue): #################################################################### ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS #################################################################### #REMOVE PRIVATE BUGS private_bugs = get_private_bugs_for_delete(db, param) Log.note( "Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": private_bugs}) for g, delete_bugs in Q.groupby(private_bugs, size=1000): still_existing = get_bug_ids(es, {"terms": {"bug_id": delete_bugs}}) if still_existing: Log.note( "Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": still_existing}) es.delete_record({"terms": {"bug_id": delete_bugs}}) es_comments.delete_record({"terms": {"bug_id": delete_bugs}}) #RECENT PUBLIC BUGS possible_public_bugs = get_recent_private_bugs(db, param) if param.allow_private_bugs: #PRIVATE BUGS # A CHANGE IN PRIVACY INDICATOR MEANS THE WHITEBOARD IS AFFECTED, REDO es.delete_record({"terms": {"bug_id": possible_public_bugs}}) else: #PUBLIC BUGS # IF ADDING GROUP THEN private_bugs ALREADY DID THIS # IF REMOVING GROUP THEN NO RECORDS TO DELETE pass #REMOVE **RECENT** PRIVATE ATTACHMENTS private_attachments = get_recent_private_attachments(db, param) bugs_to_refresh = set(Q.select(private_attachments, "bug_id")) es.delete_record({"terms": {"bug_id": bugs_to_refresh}}) #REBUILD BUGS THAT GOT REMOVED bug_list = (possible_public_bugs | bugs_to_refresh) - private_bugs # REMOVE PRIVATE BUGS if bug_list: refresh_param = param.copy() refresh_param.bug_list = bug_list refresh_param.start_time = 0 refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0) try: etl(db, output_queue, refresh_param.copy(), please_stop=None) etl_comments(db, es_comments, refresh_param.copy(), please_stop=None) except Exception, e: Log.error("Problem with etl using parameters {{parameters}}", {"parameters": refresh_param}, e)
def get_screened_whiteboard(db): if not SCREENED_BUG_GROUP_IDS: groups = db.query( "SELECT id FROM groups WHERE {{where}}", { "where": esfilter2sqlwhere( db, {"terms": { "name": SCREENED_WHITEBOARD_BUG_GROUPS }}) }) globals()["SCREENED_BUG_GROUP_IDS"] = Q.select(groups, "id")
def get(es, esfilter, fields=None, limit=None): query = struct.wrap({ "query": { "filtered": { "query": { "match_all": {} }, "filter": esfilter } }, "from": 0, "size": nvl(limit, 200000), "sort": [] }) if fields: query.fields = fields results = es.search(query) return Q.select(results.hits.hits, "fields") else: results = es.search(query) return Q.select(results.hits.hits, "_source")
def analysis(settings, last_run, please_stop): DIFF = 7 if last_run: DIFF = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING try_again = True while try_again and not please_stop: #FIND EMAIL MOST NEEDING REPLACEMENT problem_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): #ONLY COUNT NEGATIVE EMAILS for email, count in agg.dic.iteritems(): if count < 0: problem_agg.add(alias(email), amount=count) problems = Q.sort([ {"email": e, "count": c} for e, c in problem_agg.dic.iteritems() if not aliases.get(e, Null).ignore and (c <= -(DIFF / 2) or last_run) ], ["count", "email"]) try_again = False for problem in problems: if please_stop: break #FIND MOST LIKELY MATCH solution_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): if agg.dic.get(problem.email, 0) < 0: #ONLY BUGS THAT ARE EXPERIENCING THIS problem solution_agg += agg solutions = Q.sort([{"email": e, "count": c} for e, c in solution_agg.dic.iteritems()], [{"field": "count", "sort": -1}, "email"]) if last_run and len(solutions) == 2 and solutions[0].count == -solutions[1].count: #exact match pass elif len(solutions) <= 1 or (solutions[1].count + DIFF >= solutions[0].count): #not distinctive enough continue best_solution = solutions[0] Log.note("{{problem}} ({{score}}) -> {{solution}} {{matches}}", { "problem": problem.email, "score": problem.count, "solution": best_solution.email, "matches": CNV.object2JSON(Q.select(solutions, "count")[:10:]) }) try_again = True add_alias(problem.email, best_solution.email) saveAliases(settings)
def incremental_etl(settings, param, db, es, es_comments, output_queue): #################################################################### ## ES TAKES TIME TO DELETE RECORDS, DO DELETE FIRST WITH HOPE THE ## INDEX GETS A REWRITE DURING ADD OF NEW RECORDS #################################################################### #REMOVE PRIVATE BUGS private_bugs = get_private_bugs_for_delete(db, param) Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": private_bugs}) for g, delete_bugs in Q.groupby(private_bugs, size=1000): still_existing = get_bug_ids(es, {"terms": {"bug_id": delete_bugs}}) if still_existing: Log.note("Ensure the following private bugs are deleted:\n{{private_bugs|indent}}", {"private_bugs": still_existing}) es.delete_record({"terms": {"bug_id": delete_bugs}}) es_comments.delete_record({"terms": {"bug_id": delete_bugs}}) #RECENT PUBLIC BUGS possible_public_bugs = get_recent_private_bugs(db, param) if param.allow_private_bugs: #PRIVATE BUGS # A CHANGE IN PRIVACY INDICATOR MEANS THE WHITEBOARD IS AFFECTED, REDO es.delete_record({"terms": {"bug_id": possible_public_bugs}}) else: #PUBLIC BUGS # IF ADDING GROUP THEN private_bugs ALREADY DID THIS # IF REMOVING GROUP THEN NO RECORDS TO DELETE pass #REMOVE **RECENT** PRIVATE ATTACHMENTS private_attachments = get_recent_private_attachments(db, param) bugs_to_refresh = set(Q.select(private_attachments, "bug_id")) es.delete_record({"terms": {"bug_id": bugs_to_refresh}}) #REBUILD BUGS THAT GOT REMOVED bug_list = (possible_public_bugs | bugs_to_refresh) - private_bugs # REMOVE PRIVATE BUGS if bug_list: refresh_param = param.copy() refresh_param.bug_list = bug_list refresh_param.start_time = 0 refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0) try: etl(db, output_queue, refresh_param.copy(), please_stop=None) etl_comments(db, es_comments, refresh_param.copy(), please_stop=None) except Exception, e: Log.error("Problem with etl using parameters {{parameters}}", { "parameters": refresh_param }, e)
def verify_no_private_comments(es, private_comments): data = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"terms": {"comment_id": private_comments}} ]} }}, "from": 0, "size": 200000, "sort": [] }) if Q.select(data.hits.hits, "_source"): Log.error("Expecting no comments")
def get_all_bug_versions(es, bug_id, max_time=None): max_time = nvl(max_time, datetime.max) data = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"term": {"bug_id": bug_id}}, {"range": {"modified_ts": {"lte": CNV.datetime2milli(max_time)}}} ]} }}, "from": 0, "size": 200000, "sort": [] }) return Q.select(data.hits.hits, "_source")
def verify_no_private_comments(es, private_comments): data = es.search({ "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [{ "terms": { "comment_id": private_comments } }] } } }, "from": 0, "size": 200000, "sort": [] }) if Q.select(data.hits.hits, "_source"): Log.error("Expecting no comments")
refresh_param.start_time = 0 refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0) try: etl(db, output_queue, refresh_param.copy(), please_stop=None) etl_comments(db, es_comments, refresh_param.copy(), please_stop=None) except Exception, e: Log.error("Problem with etl using parameters {{parameters}}", {"parameters": refresh_param}, e) #REFRESH COMMENTS WITH PRIVACY CHANGE private_comments = get_recent_private_comments(db, param) comment_list = set(Q.select(private_comments, "comment_id")) | {0} es_comments.delete_record({"terms": {"comment_id": comment_list}}) changed_comments = get_comments_by_id(db, comment_list, param) es_comments.extend({ "id": c.comment_id, "value": c } for c in changed_comments) #GET LIST OF CHANGED BUGS with Timer("time to get changed bug list"): if param.allow_private_bugs: bug_list = Q.select( db.query( """ SELECT b.bug_id
def test_changes_to_private_bugs_still_have_bug_group(self): self.settings.param.allow_private_bugs = True File(self.settings.param.first_run_time).delete() File(self.settings.param.last_run_time).delete() private_bugs = set(Random.sample(self.settings.param.bugs, 3)) Log.note("The private bugs for this test are {{bugs}}", {"bugs": private_bugs}) database.make_test_instance(self.settings.bugzilla) #MARK SOME BUGS PRIVATE with DB(self.settings.bugzilla) as db: for b in private_bugs: database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING) es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs) es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments) bz_etl.main(self.settings, es, es_c) # MAKE A CHANGE TO THE PRIVATE BUGS with DB(self.settings.bugzilla) as db: for b in private_bugs: old_bug = db.query("SELECT * FROM bugs WHERE bug_id={{bug_id}}", {"bug_id": b})[0] new_bug = old_bug.copy() new_bug.bug_status = "NEW STATUS" diff(db, "bugs", old_bug, new_bug) #RUN INCREMENTAL bz_etl.main(self.settings, es, es_c) #VERIFY BUG GROUP STILL EXISTS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING now = datetime.utcnow() results = es.search({ "query": {"filtered": { "query": {"match_all": {}}, "filter": {"and": [ {"terms": {"bug_id": private_bugs}}, {"range": {"expires_on": {"gte": CNV.datetime2milli(now)}}} ]} }}, "from": 0, "size": 200000, "sort": [] }) latest_bugs = Q.select(results.hits.hits, "_source") latest_bugs_index = Q.unique_index(latest_bugs, "bug_id") # IF NOT UNIQUE, THEN ETL IS WRONG for bug_id in private_bugs: if latest_bugs_index[bug_id] == None: Log.error("Expecting to find the private bug {{bug_id}}", {"bug_id": bug_id}) bug_group = latest_bugs_index[bug_id].bug_group if not bug_group: Log.error("Expecting private bug ({{bug_id}}) to have a bug group", {"bug_id": bug_id}) if BUG_GROUP_FOR_TESTING not in bug_group: Log.error("Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group", { "bug_id": bug_id, "bug_group": BUG_GROUP_FOR_TESTING })
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl(settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", {"min":min, "max":max}): if param.allow_private_bugs: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl(**{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error("Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)
refresh_param = param.copy() refresh_param.bug_list = bug_list refresh_param.start_time = 0 refresh_param.start_time_str = extract_bugzilla.milli2string(db, 0) try: etl(db, output_queue, refresh_param.copy(), please_stop=None) etl_comments(db, es_comments, refresh_param.copy(), please_stop=None) except Exception, e: Log.error("Problem with etl using parameters {{parameters}}", { "parameters": refresh_param }, e) #REFRESH COMMENTS WITH PRIVACY CHANGE private_comments = get_recent_private_comments(db, param) comment_list = set(Q.select(private_comments, "comment_id")) | {0} es_comments.delete_record({"terms": {"comment_id": comment_list}}) changed_comments = get_comments_by_id(db, comment_list, param) es_comments.extend({"id": c.comment_id, "value": c} for c in changed_comments) #GET LIST OF CHANGED BUGS with Timer("time to get changed bug list"): if param.allow_private_bugs: bug_list = Q.select(db.query(""" SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} """, {
def full_etl(resume_from_last_run, settings, param, db, es, es_comments, output_queue): with Thread.run("alias_analysis", alias_analysis.main, settings=settings): end = nvl(settings.param.end, db.query("SELECT max(bug_id)+1 bug_id FROM bugs")[0].bug_id) start = nvl(settings.param.start, 0) if resume_from_last_run: start = nvl( settings.param.start, Math.floor(get_max_bug_id(es), settings.param.increment)) ############################################################# ## MAIN ETL LOOP ############################################################# #TWO WORKERS IS MORE THAN ENOUGH FOR A SINGLE THREAD # with Multithread([run_both_etl, run_both_etl]) as workers: for min, max in Q.intervals(start, end, settings.param.increment): if settings.args.quick and min < end - settings.param.increment and min != 0: #--quick ONLY DOES FIRST AND LAST BLOCKS continue try: #GET LIST OF CHANGED BUGS with Timer("time to get {{min}}..{{max}} bug list", { "min": min, "max": max }): if param.allow_private_bugs: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") else: bug_list = Q.select( db.query( """ SELECT b.bug_id FROM bugs b LEFT JOIN bug_group_map m ON m.bug_id=b.bug_id WHERE delta_ts >= {{start_time_str}} AND ({{min}} <= b.bug_id AND b.bug_id < {{max}}) AND m.bug_id IS NULL """, { "min": min, "max": max, "start_time_str": param.start_time_str }), u"bug_id") if not bug_list: continue param.bug_list = bug_list run_both_etl( **{ "db": db, "output_queue": output_queue, "es_comments": es_comments, "param": param.copy() }) except Exception, e: Log.error( "Problem with dispatch loop in range [{{min}}, {{max}})", { "min": min, "max": max }, e)
def get_screened_whiteboard(db): if not SCREENED_BUG_GROUP_IDS: groups = db.query("SELECT id FROM groups WHERE {{where}}", { "where": esfilter2sqlwhere(db, {"terms": {"name": SCREENED_WHITEBOARD_BUG_GROUPS}}) }) globals()["SCREENED_BUG_GROUP_IDS"] = Q.select(groups, "id")
def get_bugs(db, param): try: get_bugs_table_columns(db, db.settings.schema) get_screened_whiteboard(db) #TODO: CF_LAST_RESOLVED IS IN PDT, FIX IT def lower(col): if col.column_type.startswith("varchar"): return "lower(" + db.quote_column(col.column_name) + ") " + db.quote_column(col.column_name) else: return db.quote_column(col.column_name) param.bugs_columns = Q.select(bugs_columns, "column_name") param.bugs_columns_SQL = SQL(",\n".join([lower(c) for c in bugs_columns])) param.bug_filter = esfilter2sqlwhere(db, {"terms": {"b.bug_id": param.bug_list}}) param.screened_whiteboard = esfilter2sqlwhere(db, {"and": [ {"exists": "m.bug_id"}, {"terms": {"m.group_id": SCREENED_BUG_GROUP_IDS}} ]}) if param.allow_private_bugs: param.sensitive_columns = SQL(""" '[screened]' short_desc, '[screened]' bug_file_loc """) else: param.sensitive_columns = SQL(""" short_desc, bug_file_loc """) bugs = db.query(""" SELECT b.bug_id, UNIX_TIMESTAMP(CONVERT_TZ(b.creation_ts, 'US/Pacific','UTC'))*1000 AS modified_ts, lower(pr.login_name) AS modified_by, UNIX_TIMESTAMP(CONVERT_TZ(b.creation_ts, 'US/Pacific','UTC'))*1000 AS created_ts, lower(pr.login_name) AS created_by, lower(pa.login_name) AS assigned_to, lower(pq.login_name) AS qa_contact, lower(prod.`name`) AS product, lower(comp.`name`) AS component, CASE WHEN {{screened_whiteboard}} AND b.status_whiteboard IS NOT NULL AND trim(b.status_whiteboard)<>'' THEN '[screened]' ELSE trim(lower(b.status_whiteboard)) END status_whiteboard, {{sensitive_columns}}, {{bugs_columns_SQL}} FROM bugs b LEFT JOIN profiles pr ON b.reporter = pr.userid LEFT JOIN profiles pa ON b.assigned_to = pa.userid LEFT JOIN profiles pq ON b.qa_contact = pq.userid LEFT JOIN products prod ON prod.id = product_id LEFT JOIN components comp ON comp.id = component_id LEFT JOIN bug_group_map m ON m.bug_id = b.bug_id WHERE {{bug_filter}} """, param) #bugs IS LIST OF BUGS WHICH MUST BE CONVERTED TO THE DELTA RECORDS FOR ALL FIELDS output = [] for r in bugs: flatten_bugs_record(r, output) return output except Exception, e: Log.error("can not get basic bug data", e)
def analysis(settings, last_run, please_stop): DIFF = 7 if last_run: DIFF = 4 #ONCE WE HAVE ALL THE DATA IN WE CAN BE LESS DISCRIMINATING try_again = True while try_again and not please_stop: #FIND EMAIL MOST NEEDING REPLACEMENT problem_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): #ONLY COUNT NEGATIVE EMAILS for email, count in agg.dic.iteritems(): if count < 0: problem_agg.add(alias(email), amount=count) problems = Q.sort([{ "email": e, "count": c } for e, c in problem_agg.dic.iteritems() if not aliases.get(e, Null).ignore and (c <= -(DIFF / 2) or last_run)], ["count", "email"]) try_again = False for problem in problems: if please_stop: break #FIND MOST LIKELY MATCH solution_agg = Multiset(allow_negative=True) for bug_id, agg in bugs.iteritems(): if agg.dic.get( problem.email, 0) < 0: #ONLY BUGS THAT ARE EXPERIENCING THIS problem solution_agg += agg solutions = Q.sort([{ "email": e, "count": c } for e, c in solution_agg.dic.iteritems()], [{ "field": "count", "sort": -1 }, "email"]) if last_run and len(solutions) == 2 and solutions[ 0].count == -solutions[1].count: #exact match pass elif len(solutions) <= 1 or (solutions[1].count + DIFF >= solutions[0].count): #not distinctive enough continue best_solution = solutions[0] Log.note( "{{problem}} ({{score}}) -> {{solution}} {{matches}}", { "problem": problem.email, "score": problem.count, "solution": best_solution.email, "matches": CNV.object2JSON( Q.select(solutions, "count")[:10:]) }) try_again = True add_alias(problem.email, best_solution.email) saveAliases(settings)
def test_changes_to_private_bugs_still_have_bug_group(self): self.settings.param.allow_private_bugs = True File(self.settings.param.first_run_time).delete() File(self.settings.param.last_run_time).delete() private_bugs = set(Random.sample(self.settings.param.bugs, 3)) Log.note("The private bugs for this test are {{bugs}}", {"bugs": private_bugs}) database.make_test_instance(self.settings.bugzilla) #MARK SOME BUGS PRIVATE with DB(self.settings.bugzilla) as db: for b in private_bugs: database.add_bug_group(db, b, BUG_GROUP_FOR_TESTING) es = elasticsearch.make_test_instance("candidate", self.settings.real.bugs) es_c = elasticsearch.make_test_instance("candidate_comments", self.settings.real.comments) bz_etl.main(self.settings, es, es_c) # MAKE A CHANGE TO THE PRIVATE BUGS with DB(self.settings.bugzilla) as db: for b in private_bugs: old_bug = db.query( "SELECT * FROM bugs WHERE bug_id={{bug_id}}", {"bug_id": b})[0] new_bug = old_bug.copy() new_bug.bug_status = "NEW STATUS" diff(db, "bugs", old_bug, new_bug) #RUN INCREMENTAL bz_etl.main(self.settings, es, es_c) #VERIFY BUG GROUP STILL EXISTS Thread.sleep(2) # MUST SLEEP WHILE ES DOES ITS INDEXING now = datetime.utcnow() results = es.search({ "query": { "filtered": { "query": { "match_all": {} }, "filter": { "and": [{ "terms": { "bug_id": private_bugs } }, { "range": { "expires_on": { "gte": CNV.datetime2milli(now) } } }] } } }, "from": 0, "size": 200000, "sort": [] }) latest_bugs = Q.select(results.hits.hits, "_source") latest_bugs_index = Q.unique_index( latest_bugs, "bug_id") # IF NOT UNIQUE, THEN ETL IS WRONG for bug_id in private_bugs: if latest_bugs_index[bug_id] == None: Log.error("Expecting to find the private bug {{bug_id}}", {"bug_id": bug_id}) bug_group = latest_bugs_index[bug_id].bug_group if not bug_group: Log.error( "Expecting private bug ({{bug_id}}) to have a bug group", {"bug_id": bug_id}) if BUG_GROUP_FOR_TESTING not in bug_group: Log.error( "Expecting private bug ({{bug_id}}) to have a \"{{bug_group}}\" bug group", { "bug_id": bug_id, "bug_group": BUG_GROUP_FOR_TESTING })
def search(self, query): query=wrap(query) f = CNV.esfilter2where(query.query.filtered.filter) filtered=wrap([{"_id": i, "_source": d} for i, d in self.data.items() if f(d)]) if query.fields: return wrap({"hits": {"total":len(filtered), "hits": [{"_id":d._id, "fields":unwrap(Q.select([unwrap(d._source)], query.fields)[0])} for d in filtered]}}) else: return wrap({"hits": {"total":len(filtered), "hits": filtered}})
def get_bugs(db, param): try: get_bugs_table_columns(db, db.settings.schema) get_screened_whiteboard(db) #TODO: CF_LAST_RESOLVED IS IN PDT, FIX IT def lower(col): if col.column_type.startswith("varchar"): return "lower(" + db.quote_column( col.column_name) + ") " + db.quote_column(col.column_name) else: return db.quote_column(col.column_name) param.bugs_columns = Q.select(bugs_columns, "column_name") param.bugs_columns_SQL = SQL(",\n".join( [lower(c) for c in bugs_columns])) param.bug_filter = esfilter2sqlwhere( db, {"terms": { "b.bug_id": param.bug_list }}) param.screened_whiteboard = esfilter2sqlwhere( db, { "and": [{ "exists": "m.bug_id" }, { "terms": { "m.group_id": SCREENED_BUG_GROUP_IDS } }] }) if param.allow_private_bugs: param.sensitive_columns = SQL(""" '[screened]' short_desc, '[screened]' bug_file_loc """) else: param.sensitive_columns = SQL(""" short_desc, bug_file_loc """) bugs = db.query( """ SELECT b.bug_id, UNIX_TIMESTAMP(CONVERT_TZ(b.creation_ts, 'US/Pacific','UTC'))*1000 AS modified_ts, lower(pr.login_name) AS modified_by, UNIX_TIMESTAMP(CONVERT_TZ(b.creation_ts, 'US/Pacific','UTC'))*1000 AS created_ts, lower(pr.login_name) AS created_by, lower(pa.login_name) AS assigned_to, lower(pq.login_name) AS qa_contact, lower(prod.`name`) AS product, lower(comp.`name`) AS component, CASE WHEN {{screened_whiteboard}} AND b.status_whiteboard IS NOT NULL AND trim(b.status_whiteboard)<>'' THEN '[screened]' ELSE trim(lower(b.status_whiteboard)) END status_whiteboard, {{sensitive_columns}}, {{bugs_columns_SQL}} FROM bugs b LEFT JOIN profiles pr ON b.reporter = pr.userid LEFT JOIN profiles pa ON b.assigned_to = pa.userid LEFT JOIN profiles pq ON b.qa_contact = pq.userid LEFT JOIN products prod ON prod.id = product_id LEFT JOIN components comp ON comp.id = component_id LEFT JOIN bug_group_map m ON m.bug_id = b.bug_id WHERE {{bug_filter}} """, param) #bugs IS LIST OF BUGS WHICH MUST BE CONVERTED TO THE DELTA RECORDS FOR ALL FIELDS output = [] for r in bugs: flatten_bugs_record(r, output) return output except Exception, e: Log.error("can not get basic bug data", e)