def main(): try: settings = startup.read_settings(filename="file2db_settings.json") Log.start(settings.debug) with DB(settings.db) as db: db.execute(""" DROP TABLE IF EXISTS b2g_tests """) db.execute(""" CREATE TABLE b2g_tests ( id INTEGER PRIMARY KEY NOT NULL, branch VARCHAR(100), name VARCHAR(100), version VARCHAR(100), suite varchar(200), revision varchar(100), `date` LONG ) """) file2db(db, "b2g_tests", settings.source_file) except Exception, e: Log.error("can not seem to startup", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--no_restart", "--no_reset", "--no_redo", "--norestart", "--noreset", "--noredo"], "help": "do not allow creation of new index (for debugging rouge resets)", "action": "store_true", "dest": "no_restart" }, { "name": ["--restart", "--reset", "--redo"], "help": "force a reprocessing of all data", "action": "store_true", "dest": "restart" }, { "name": ["--file", "--scan_file", "--scanfile", "--use_file", "--usefile"], "help": "scan file for missing ids", "action": "store_true", "dest": "scan_file" }, { "name": ["--nofile", "--no_file", "--no-file"], "help": "do not scan file for missing ids", "action": "store_false", "dest": "scan_file" }]) Log.start(settings.debug) with startup.SingleInstance(flavor_id=settings.args.filename): settings.production.threads = nvl(settings.production.threads, 1) settings.param.output_file = nvl(settings.param.output_file, "./results/raw_json_blobs.tab") transformer = DZ_to_ES(settings.pushlog) #RESET ONLY IF NEW Transform IS USED if settings.args.restart: es = Cluster(settings.elasticsearch).create_index(settings.elasticsearch) es.add_alias() es.delete_all_but_self() extract_from_datazilla_using_id(es, settings, transformer) else: es = Cluster(settings.elasticsearch).get_or_create_index(settings.elasticsearch) extract_from_datazilla_using_id(es, settings, transformer) except Exception, e: Log.error("Problem with etl", e)
for k, v in [(k, v) for k, v in r.items()]: new_path = path + "[" + k + "]" arrays_add(id, new_path, v) elif isinstance(r, list): try: values = r.map(float) arrays.append([id, path, len(values), 1]) except Exception, e: for i, v in enumerate(r): r[i] = arrays_add(id, path + "[" + str(i) + "]", v) # return r except Exception, e: Log.warning("Can not summarize: {{json}}", {"json": CNV.object2JSON(r)}) settings = startup.read_settings() Log.settings(settings.debug) all = set() with open(settings.output_file, "r") as input_file: with open("good_talos.tab", "w") as output_file: for line in input_file: try: if len(line.strip()) == 0: continue col = line.split("\t") id = int(col[0]) if id < MINIMUM_ID: continue