def start(): try: settings = startup.read_settings() Log.start(settings.debug) main(settings) except Exception, e: Log.error("Problems exist", e)
def main(): try: settings = startup.read_settings() Log.start(settings.debug) backfill(settings) except Exception, e: Log.error("Problem with backfill", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL(name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ settings = startup.read_settings(defs=[ { "name": ["--bucket"], "help": "bucket to scan", "type": str, "dest": "bucket", "required": True } ]) Log.start(settings.debug) source = Connection(settings.aws).get_bucket(settings.args.bucket) for k in qb.sort(source.keys()): try: data = source.read_bytes(k) if convert.ascii2unicode(data).find("2e2834fa7ecd8d3bb1ad49ec981fdb89eb4df95e18") >= 0: Log.note("Found at {{key}}", key=k) except Exception, e: Log.warning("Problem with {{key}}", key=k, cause=e) finally:
def main(): try: settings = startup.read_settings() Log.start(settings.debug) constants.set(settings.constants) with startup.SingleInstance(flavor_id=settings.args.filename): with aws.s3.Bucket(settings.destination) as bucket: if settings.param.debug: if settings.source.durable: Log.error("Can not run in debug mode with a durable queue") synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) else: synch = SynchState(bucket.get_key(SYNCHRONIZATION_KEY, must_exist=False)) if settings.source.durable: synch.startup() queue = PersistentQueue(settings.param.queue_file) if queue: last_item = queue[len(queue) - 1] synch.source_key = last_item._meta.count + 1 with pulse.Consumer(settings=settings.source, target=None, target_queue=queue, start=synch.source_key): Thread.run("pulse log loop", log_loop, settings, synch, queue, bucket) Thread.wait_for_shutdown_signal(allow_exit=True) Log.warning("starting shutdown") queue.close() Log.note("write shutdown state to S3") synch.shutdown() except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [ {"name": "branch", "value": "build.branch"}, {"name": "revision", "value": "build.revision12"}, {"name": "suite", "value": "run.suite"}, {"name": "chunk", "value": "run.chunk"}, {"name": "test", "value": "result.test"} ], "where": {"and": [ {"eq": {"result.ok": False}}, {"gt": {"run.timestamp": Date.today() - WEEK}}, {"missing": "treeherder.job.note"} ]}, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id(s) to process. Use \"..\" for a range.", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) if settings.args.id: etl_one(settings) return hg = HgMozillaOrg(settings=settings.hg) resources = Dict(hg=dictwrap(hg)) stopper = Signal() for i in range(coalesce(settings.param.threads, 1)): ETL( name="ETL Loop " + unicode(i), work_queue=settings.work_queue, resources=resources, workers=settings.workers, settings=settings.param, please_stop=stopper ) Thread.wait_for_shutdown_signal(stopper, allow_exit=True) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) try: _refresh_etl() # TODO: UPON FAILURE, TERMINATE INSTANCE AND SPOT REQUEST except Exception, e: ec2_conn.terminate_instances([i.id]) Log.warning("Problem resetting {{instance}}, terminated", instance=i.id, cause=e) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) some_failures = http.post_json("http://activedata.allizom.org/query", data={ "from": "unittest", "select": [{ "name": "branch", "value": "build.branch" }, { "name": "revision", "value": "build.revision12" }, { "name": "suite", "value": "run.suite" }, { "name": "chunk", "value": "run.chunk" }, { "name": "test", "value": "result.test" }], "where": { "and": [{ "eq": { "result.ok": False } }, { "gt": { "run.timestamp": Date.today() - WEEK } }, { "missing": "treeherder.job.note" }] }, "format": "list", "limit": 10 }) th = TreeHerder(settings={}) # th.get_job_classification("mozilla-inbound", "7380457b8ba0") for f in some_failures.data: th.get_job_classification(f.branch, f.revision) except Exception, e: Log.error("Problem with etl", e)
def main(): settings = startup.read_settings() Log.start(settings.debug) try: with Multithread(update_repo, threads=10, outbound=False) as multi: for repo in Random.combination(settings.param.repos): multi.execute([{"repos": repo, "settings": settings}]) finally: Log.stop()
def main(): settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) with startup.SingleInstance(flavor_id=settings.args.filename): try: full_etl(settings) finally: Log.stop()
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def setup(settings=None): global config try: config = startup.read_settings(defs={ "name": ["--process_num", "--process"], "help": "Additional port offset (for multiple Flask processes", "type": int, "dest": "process_num", "default": 0, "required": False }, filename=settings) constants.set(config.constants) Log.start(config.debug) if config.args.process_num and config.flask.port: config.flask.port += config.args.process_num # PIPE REQUEST LOGS TO ES DEBUG if config.request_logs: request_logger = elasticsearch.Cluster( config.request_logs).get_or_create_index(config.request_logs) active_data.request_log_queue = request_logger.threaded_queue( max_size=2000) # SETUP DEFAULT CONTAINER, SO THERE IS SOMETHING TO QUERY containers.config.default = { "type": "elasticsearch", "settings": config.elasticsearch.copy() } # TURN ON /exit FOR WINDOWS DEBUGGING if config.flask.debug or config.flask.allow_exit: config.flask.allow_exit = None Log.warning("ActiveData is in debug mode") app.add_url_rule('/exit', 'exit', _exit) # TRIGGER FIRST INSTANCE FromESMetadata(config.elasticsearch) if config.saved_queries: setattr(save_query, "query_finder", SaveQueries(config.saved_queries)) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) if config.flask.ssl_context: if config.args.process_num: Log.error( "can not serve ssl and multiple Flask instances at once") setup_ssl() return app except Exception, e: Log.error( "Serious problem with ActiveData service construction! Shutdown!", cause=e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) queries.config.default = { "type": "elasticsearch", "settings": settings.elasticsearch.copy() } if settings.args.id: work_queue = Queue("local work queue") work_queue.extend(parse_id_argument(settings.args.id)) else: work_queue = aws.Queue(settings=settings.work_queue) Log.note("Listen to queue {{queue}}, and read off of {{s3}}", queue=settings.work_queue.name, s3=settings.source.bucket) es = MultiDayIndex(settings.elasticsearch, queue_size=100000) threads = [] please_stop = Signal() for _ in range(settings.threads): p = Thread.run("copy to es", copy2es, es, settings, work_queue, please_stop=please_stop) threads.append(p) def monitor_progress(please_stop): while not please_stop: Log.note("Remaining: {{num}}", num=len(work_queue)) Thread.sleep(seconds=10) Thread.run(name="monitor progress", target=monitor_progress, please_stop=please_stop) aws.capture_termination_signal(please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) please_stop.go() Log.note("Shutdown started") except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) hg = HgMozillaOrg(settings.hg) th = TreeHerder(settings=settings) find_some_work(th) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) big_data.MAX_STRING_SIZE = 100 * 1000 * 1000 # get_active_data(settings) get_bugs(settings) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() Log.start(settings.debug) source = get_container(settings.source) destination = get_container(settings.destination) work_queue = aws.Queue(settings.work_queue) backfill(source, destination, work_queue, settings) except Exception, e: Log.error("Problem with backfill", e)
def main(): settings = startup.read_settings(defs={ "name": ["--restart", "--reset", "--redo"], "help": "force a reprocessing of all data", "action": "store_true", "dest": "restart" }) Log.start(settings.debug) try: with startup.SingleInstance(flavor_id=settings.args.filename): if settings.args.restart: reviews = Cluster(settings.destination).create_index(settings.destination) else: reviews = Cluster(settings.destination).get_proto(settings.destination) bugs = Cluster(settings.source).get_index(settings.source) with FromES(bugs) as esq: es_max_bug = esq.query({ "from": "private_bugs", "select": {"name": "max_bug", "value": "bug_id", "aggregate": "maximum"} }) #PROBE WHAT RANGE OF BUGS IS LEFT TO DO (IN EVENT OF FAILURE) with FromES(reviews) as esq: es_min_bug = esq.query({ "from": "reviews", "select": {"name": "min_bug", "value": "bug_id", "aggregate": "minimum"} }) batch_size = coalesce(bugs.settings.batch_size, settings.size, 1000) threads = coalesce(settings.threads, 4) Log.note(str(settings.min_bug)) min_bug = int(coalesce(settings.min_bug, 0)) max_bug = int(coalesce(settings.max_bug, Math.min(es_min_bug + batch_size * threads, es_max_bug))) with ThreadedQueue(reviews, batch_size=coalesce(reviews.settings.batch_size, 100)) as sink: func = functools.partial(full_etl, settings, sink) with Multithread(func, threads=threads) as m: m.inbound.silent = True Log.note("bugs from {{min}} to {{max}}, step {{step}}", { "min": min_bug, "max": max_bug, "step": batch_size }) m.execute(reversed([{"bugs": range(s, e)} for s, e in qb.intervals(min_bug, max_bug, size=1000)])) if settings.args.restart: reviews.add_alias() reviews.delete_all_but_self() finally: Log.stop()
def start(): global hg global config _ = wrap try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) if config.hg: hg = HgMozillaOrg(config.hg) main() except Exception, e: Log.error("Problems exist", e)
def test_read_blobber_file(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_read_blobber_file_profile.tab" } } Log.start(debug_settings) verify_blobber_file( 0, "jetpack-package_raw.log", "http://mozilla-releng-blobs.s3.amazonaws.com/blobs/try/sha512/2d6892a08b84499c0e8cc0b81a32c830f6505fc2812a61e136ae4eb2ecfde0aac3e6358e9d27b76171869e0cc4368418e4dfca9378e69982681213354a2057ac" ) Log.stop()
def main(): try: settings = startup.read_settings(defs={ "name": ["--num"], "help": "number to show", "type": int, "dest": "num", "default": '10', "required": False }) Log.start(settings.debug) list_queue(settings.source, settings.args.num) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings(defs={ "name": ["--filter", "--where"], "help": "ES filter", "type": str, "dest": "filter", "default": '{"match_all":{}}', "required": True }) Log.start(settings.debug) list_s3(settings.source, convert.json2value(convert.ascii2unicode(settings.args.filter))) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = wrap({ "elasticsearch": { "host": "http://activedata.allizom.org", "port": 9200, "debug": True } }) Log.start(settings) move_shards(settings) except Exception, e: Log.error("Problem with assign of shards", e)
def main(): global all_creds global config try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) all_creds = config.users app.run(**config.flask) except Exception, e: Log.error("Serious problem with MoDataSubmission service! Shutdown completed!", cause=e)
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) diff(settings) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(settings=settings.hg.branches).get_or_create_index(settings=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception, e: Log.error("Problem with etl", e)
def main(): global config try: config = startup.read_settings() with startup.SingleInstance(flavor_id=config.args.filename): constants.set(config.constants) Log.start(config.debug) es = elasticsearch.Cluster(config.destination).get_or_create_index(config.destination) please_stop = Signal() Thread.run("aggregator", loop_all_days, es, please_stop=please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) except Exception, e: Log.error("Serious problem with Test Failure Aggregator service! Shutdown completed!", cause=e)
def main(num): try: Log.start() results = [] test_json(results, "jsons.encoder", json_encoder, num) test_json(results, "jsons.encoder (again)", json_encoder, num) test_json(results, "scrub before json.dumps", cPythonJSONEncoder().encode, num) test_json(results, "override JSONEncoder.default()", EnhancedJSONEncoder().encode, num) test_json(results, "default json.dumps", json.dumps, num) # WILL CRASH, CAN NOT HANDLE DIVERSITY OF TYPES try: test_json(results, "scrubbed ujson", ujson.dumps, num) except Exception: pass Log.note("\n{{summary}}", summary=convert.list2tab(results)) finally: Log.stop()
def main(): try: settings = startup.read_settings( defs={ "name": ["--filter", "--where"], "help": "ES filter", "type": str, "dest": "filter", "default": '{"match_all":{}}', "required": True }) Log.start(settings.debug) list_s3( settings.source, convert.json2value(convert.ascii2unicode(settings.args.filter))) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings(defs=[ { "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False } ]) constants.set(settings.constants) Log.start(settings.debug) diff(settings) except Exception, e: Log.error("Problem with etl", e)
def test_51586(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_51586_profile.tab" } } Log.start(debug_settings) source_key = "51586_5124145.52" content = File("tests/resources/51586_5124145.52.json.gz").read_bytes() source = Dict(read_lines=lambda: GzipLines(content)) with Accumulator(File("tests/results/51586_5124145.52.json")) as destination: with Timer("ETL file"): process_unittest_in_s3(source_key, source, destination, please_stop=None) Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = get_branches(settings.hg) es = elasticsearch.Cluster( settings=settings.hg.branches).get_or_create_index( settings=settings.hg.branches) es.add_alias() es.extend({ "id": b.name + " " + b.locale, "value": b } for b in branches) Log.alert("DONE!") except Exception, e: Log.error("Problem with etl", e)
def main(): try: config = startup.read_settings(defs=[{ "name": ["--file"], "help": "file to save backup", "type": str, "dest": "file", "required": True }]) constants.set(config.constants) Log.start(config.debug) sq = elasticsearch.Index(settings=config.saved_queries) result = sq.search({"query": {"match_all": {}}, "size": 200000}) File(config.args.file).write("".join( map(convert.json2value, result.hits.hits))) except Exception, e: Log.error("Problem with etl", e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key) ) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) _refresh_indexer() except Exception, e: Log.error("Problem with etl", e)
def test_51586(self): debug_settings = { "trace": True, "cprofile": { "enabled": True, "filename": "tests/results/test_51586_profile.tab" } } Log.start(debug_settings) source_key = "51586_5124145.52" content = File("tests/resources/51586_5124145.52.json.gz").read_bytes() source = Dict(read_lines=lambda: GzipLines(content)) with Accumulator( File("tests/results/51586_5124145.52.json")) as destination: with Timer("ETL file"): process_unittest_in_s3(source_key, source, destination, please_stop=None) Log.stop()
def main(): try: config = startup.read_settings() with startup.SingleInstance(flavor_id=config.args.filename): constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") coverage_index = elasticsearch.Cluster(config.source).get_index(settings=config.source) config.destination.schema = coverage_index.get_schema() coverage_summary_index = elasticsearch.Cluster(config.destination).get_or_create_index(read_only=False, settings=config.destination) coverage_summary_index.add_alias(config.destination.index) Thread.run( "processing loop", loop, config.source, coverage_summary_index, config, please_stop=please_stop ) Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with code coverage score calculation", cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) aws_args = dict( region_name=settings.aws.region, aws_access_key_id=unwrap(settings.aws.aws_access_key_id), aws_secret_access_key=unwrap(settings.aws.aws_secret_access_key)) ec2_conn = boto_ec2.connect_to_region(**aws_args) instances = _get_managed_instances(ec2_conn, settings.name) for i in instances: Log.note("Reset {{instance_id}} ({{name}}) at {{ip}}", insance_id=i.id, name=i.tags["Name"], ip=i.ip_address) _config_fabric(settings.fabric, i) _refresh_indexer() except Exception, e: Log.error("Problem with etl", e)
@app.route('/<path:path>', methods=['GET', 'POST']) def catch_all(path): return Response( b"", status=400, headers={ "access-control-allow-origin": "*", "content-type": "text/html" } ) if __name__ == "__main__": try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SETUP TREEHERDER CACHE hg = HgMozillaOrg(use_cache=True, settings=config.hg) th = TreeherderService(hg, settings=config.treeherder) app.add_url_rule('/treeherder', None, th.get_treeherder_job, methods=['GET']) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) app.run(**config.flask) except Exception, e: Log.error("Serious problem with service construction! Shutdown!", cause=e) finally: Log.stop() sys.exit(0)
def setUp(self): config = startup.read_settings(filename=CONFIG_FILE) Log.start(config.debug) constants.set(config.constants) app.config=config
def setUpClass(cls): Log.start()
test = float(10.0) output = pypy_json_encode(test) expecting='10' self.assertEqual(output, expecting, "expecting integer") def test_nan(self): test = float("nan") output = pypy_json_encode(test) expecting = cpython_json_encoder(jsons.scrub(test)) self.assertEqual(output, expecting, "expecting "+expecting) def test_inf(self): test = float("+inf") output = pypy_json_encode(test) expecting = cpython_json_encoder(jsons.scrub(test)) self.assertEqual(output, expecting, "expecting "+expecting) def test_minus_inf(self): test = float("-inf") output = pypy_json_encode(test) expecting = cpython_json_encoder(jsons.scrub(test)) self.assertEqual(output, expecting, "expecting "+expecting) if __name__ == '__main__': try: Log.start() unittest.main() finally: Log.stop()
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ try: settings = startup.read_settings(defs=[ { "name": ["--bucket"], "help": "bucket to reprocess", "type": str, "dest": "bucket", "required": True }, { "name": ["--begin", "--start"], "help": "lowest key (or prefix) to reprocess", "type": str, "dest": "start", "default": "1", "required": False }, { "name": ["--end", "--stop"], "help": "highest key (or prefix) to reprocess", "type": str, "dest": "end", "default": None, "required": False }, { "name": ["--file"], "help": "path to file with CR-delimited prefix list", "type": str, "dest": "file", "default": None, "required": False } ]) Log.start(settings.debug) with aws.Queue(settings.work_queue) as work_queue: source = Connection(settings.aws).get_bucket(settings.args.bucket) if settings.args.file: now = Date.now() for prefix in File(settings.args.file): all_keys = source.keys(prefix=key_prefix(prefix)) for k in all_keys: Log.note("Adding {{key}}", key=k) work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) return if settings.args.end and settings.args.start: up_to = str(int(settings.args.end) - 1) prefix = strings.common_prefix(settings.args.start, up_to) else: prefix = None start = Version(settings.args.start) end = Version(settings.args.end) all_keys = source.keys(prefix=prefix) with Timer("filtering {{num}} keys", {"num": len(all_keys)}): all_keys = [(k, Version(k)) for k in all_keys if k.find("None") == -1] all_keys = [(k, p) for k, p in all_keys if start <= p < end] with Timer("sorting {{num}} keys", {"num": len(all_keys)}): all_keys = qb.sort(all_keys, 1) for k, p in all_keys: Log.note("Adding {{key}}", key= k) now = Date.now() work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) except Exception, e: Log.error("Problem with etl", e)
def setUpClass(cls): Log.start({"cprofile":True})
def setUp(self): Log.start()
def main(): """ CLEAR OUT KEYS FROM BUCKET BY RANGE, OR BY FILE """ try: settings = startup.read_settings(defs=[{ "name": ["--bucket"], "help": "bucket to reprocess", "type": str, "dest": "bucket", "required": True }, { "name": ["--begin", "--start"], "help": "lowest key (or prefix) to reprocess", "type": str, "dest": "start", "default": "1", "required": False }, { "name": ["--end", "--stop"], "help": "highest key (or prefix) to reprocess", "type": str, "dest": "end", "default": None, "required": False }, { "name": ["--file"], "help": "path to file with CR-delimited prefix list", "type": str, "dest": "file", "default": None, "required": False }]) Log.start(settings.debug) with aws.Queue(settings.work_queue) as work_queue: source = Connection(settings.aws).get_bucket(settings.args.bucket) if settings.args.file: now = Date.now() for prefix in File(settings.args.file): all_keys = source.keys(prefix=key_prefix(prefix)) for k in all_keys: Log.note("Adding {{key}}", key=k) work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) return if settings.args.end and settings.args.start: up_to = str(int(settings.args.end) - 1) prefix = strings.common_prefix(settings.args.start, up_to) else: prefix = None start = Version(settings.args.start) end = Version(settings.args.end) all_keys = source.keys(prefix=prefix) with Timer("filtering {{num}} keys", {"num": len(all_keys)}): all_keys = [(k, Version(k)) for k in all_keys if k.find("None") == -1] all_keys = [(k, p) for k, p in all_keys if start <= p < end] with Timer("sorting {{num}} keys", {"num": len(all_keys)}): all_keys = qb.sort(all_keys, 1) for k, p in all_keys: Log.note("Adding {{key}}", key=k) now = Date.now() work_queue.add({ "bucket": settings.args.bucket, "key": k, "timestamp": now.unix, "date/time": now.format() }) except Exception, e: Log.error("Problem with etl", e)
@app.route('/', defaults={'path': ''}, methods=['GET', 'POST']) @app.route('/<path:path>', methods=['GET', 'POST']) def catch_all(path): return Response(b"", status=400, headers={ "access-control-allow-origin": "*", "content-type": "text/html" }) if __name__ == "__main__": try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) # SETUP TREEHERDER CACHE hg = HgMozillaOrg(use_cache=True, settings=config.hg) th = TreeherderService(hg, settings=config.treeherder) app.add_url_rule('/treeherder', None, th.get_treeherder_job, methods=['GET']) HeaderRewriterFix(app, remove_headers=['Date', 'Server']) app.run(**config.flask) except Exception, e: Log.error("Serious problem with service construction! Shutdown!", cause=e)