def main(settings): #MAKE HANDLES TO CONTAINERS with DB(settings.bugzilla) as db: #REAL ES # if settings.candidate.alias is None: # settings.candidate.alias=settings.candidate.index # settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S") # candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read()) candidate = Fake_ES(settings.fake_es) reference = ElasticSearch(settings.reference) #SETUP RUN PARAMETERS param = Struct() param.BUGS_TABLE_COLUMNS = get_bugs_table_columns( db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join( ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME = CNV.datetime2milli(datetime.utcnow()) param.START_TIME = 0 param.alias_file = settings.param.alias_file param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}", {"bugs": db.quote(settings.param.bugs)}) etl(db, candidate, param) #COMPARE ALL BUGS compare_both(candidate, reference, settings, settings.param.bugs)
def main(settings): #MAKE HANDLES TO CONTAINERS with DB(settings.bugzilla) as db: #REAL ES # if settings.candidate.alias is None: # settings.candidate.alias=settings.candidate.index # settings.candidate.index=settings.candidate.alias+CNV.datetime2string(datetime.utcnow(), "%Y%m%d_%H%M%S") # candidate=ElasticSearch.create_index(settings.candidate, File(settings.candidate.schema_file).read()) candidate=Fake_ES(settings.fake_es) reference=ElasticSearch(settings.reference) #SETUP RUN PARAMETERS param=Struct() param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME=CNV.datetime2milli(datetime.utcnow()) param.START_TIME=0 param.alias_file=settings.param.alias_file param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(settings.param.bugs)}) etl(db, candidate, param) #COMPARE ALL BUGS compare_both(candidate, reference, settings, settings.param.bugs)
def random_sample_of_bugs(settings): NUM_TO_TEST = 100 MAX_BUG_ID = 900000 with DB(settings.bugzilla) as db: candidate = Fake_ES(settings.fake_es) reference = ElasticSearch(settings.reference) #GO FASTER BY STORING LOCAL FILE local_cache = File(settings.param.temp_dir + "/private_bugs.json") if local_cache.exists: private_bugs = set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs = compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs = [ b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs ] #SETUP RUN PARAMETERS param = Struct() param.BUGS_TABLE_COLUMNS = get_bugs_table_columns( db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL = SQL(",\n".join( ["`" + c.column_name + "`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS = Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME = CNV.datetime2milli(datetime.utcnow()) param.START_TIME = 0 param.alias_file = settings.param.alias_file param.BUG_IDS_PARTITION = SQL("bug_id in {{bugs}}", {"bugs": db.quote(some_bugs)}) try: etl(db, candidate, param) #COMPARE ALL BUGS found_errors = compare_both(candidate, reference, settings, some_bugs) if found_errors: D.println("Errors found") break else: pass except Exception, e: D.warning("Total faiure during compare of bugs {{bugs}}", {"bugs": some_bugs}, e)
def random_sample_of_bugs(settings): NUM_TO_TEST=100 MAX_BUG_ID=900000 with DB(settings.bugzilla) as db: candidate=Fake_ES(settings.fake_es) reference=ElasticSearch(settings.reference) #GO FASTER BY STORING LOCAL FILE local_cache=File(settings.param.temp_dir+"/private_bugs.json") if local_cache.exists: private_bugs=set(CNV.JSON2object(local_cache.read())) else: with Timer("get private bugs"): private_bugs= compare_es.get_private_bugs(reference) local_cache.write(CNV.object2JSON(private_bugs)) while True: some_bugs=[b for b in [Random.int(MAX_BUG_ID) for i in range(NUM_TO_TEST)] if b not in private_bugs] #SETUP RUN PARAMETERS param=Struct() param.BUGS_TABLE_COLUMNS=get_bugs_table_columns(db, settings.bugzilla.schema) param.BUGS_TABLE_COLUMNS_SQL=SQL(",\n".join(["`"+c.column_name+"`" for c in param.BUGS_TABLE_COLUMNS])) param.BUGS_TABLE_COLUMNS=Q.select(param.BUGS_TABLE_COLUMNS, "column_name") param.END_TIME=CNV.datetime2milli(datetime.utcnow()) param.START_TIME=0 param.alias_file=settings.param.alias_file param.BUG_IDS_PARTITION=SQL("bug_id in {{bugs}}", {"bugs":db.quote(some_bugs)}) try: etl(db, candidate, param) #COMPARE ALL BUGS found_errors=compare_both(candidate, reference, settings, some_bugs) if found_errors: D.println("Errors found") break else: pass except Exception, e: D.warning("Total faiure during compare of bugs {{bugs}}", {"bugs":some_bugs}, e)