def test_wrap_2(): Log.alert("Random types") switch = [ lambda: Random.int(20), lambda: Random.string(20), lambda: {"i": Random.int(2000)}, lambda: Data(i=Random.int(2000)), lambda: FlatList([{"i": Random.int(2000)}]), lambda: [{"i": Random.int(2000)}] ] inputs = [switch[min(len(switch) - 1, int(floor(-log(Random.float(), 2))))]() for i in range(NUM_INPUT)] for i in range(NUM_REPEAT): results = [] gc.collect() with Timer("more string: to_data"): for v in inputs: results.append(to_data(v)) results = [] gc.collect() with Timer("more string: baseline"): for v in inputs: results.append(baseline(v)) Log.note("Done {{i}} of {{num}}", i=i, num=NUM_REPEAT)
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: c = self.todo.pop() if c == THREAD_STOP: break if not c.last_updated or c.last_updated >= Date.now()-TOO_OLD: continue with self.meta.columns.locker: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear":[ "count", "cardinality", "partitions", ], "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}} }) if DEBUG: Log.note("Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: column = self.todo.pop() if column == THREAD_STOP: break if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE: DEBUG and Log.note("{{column.es_column}} is a struct", column=column) column.last_updated = Date.now() continue elif column.last_updated > Date.now() - TOO_OLD and column.cardinality is not None: # DO NOT UPDATE FRESH COLUMN METADATA DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds) continue with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05): if untype_path(column.name) in ["build.type", "run.type"]: try: self._update_cardinality(column) except Exception as e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) else: column.last_updated = Date.now()
def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START) / 60 # MINUTES try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, this is probably not a spot node, (http://169.254.169.254 is unreachable)" ) return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning( "AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # if DEBUG: # Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # if DEBUG: # Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE """ wait_time = 5 (DEBUG and len(self.queue) > 1 * 1000 * 1000 ) and Log.warning("Queue {{name}} has over a million items") now = time() if timeout != None: time_to_stop_waiting = now + timeout else: time_to_stop_waiting = now + DEFAULT_WAIT_TIME if self.next_warning < now: self.next_warning = now + wait_time while not self.closed and len(self.queue) >= self.max: if now > time_to_stop_waiting: Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(Till(till=time_to_stop_waiting)) else: self.lock.wait(Till(seconds=wait_time)) if len(self.queue) >= self.max: now = time() if self.next_warning < now: self.next_warning = now + wait_time Log.alert( "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=wait_time)
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: line = sys.stdin.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if line == "": cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == "exit": Log.alert("'exit' Detected! Stopping...") return
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: c = self.todo.pop() if c == THREAD_STOP: break if c.last_updated >= Date.now() - TOO_OLD: continue with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05): self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "multi", "partitions", ], "where": { "eq": { "es_index": c.es_index, "es_column": c.es_column } } })
def setUpClass(self): while True: try: es = test_jx.global_settings.backend_es http.get_json(URL(es.host, port=es.port)) break except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") else: Log.error("Server raised exception", e) # REMOVE OLD INDEXES cluster = elasticsearch.Cluster(test_jx.global_settings.backend_es) aliases = cluster.get_aliases() for a in aliases: try: if a.index.startswith("testing_"): create_time = Date( a.index[-15:], "%Y%m%d_%H%M%S" ) # EXAMPLE testing_0ef53e45b320160118_180420 if create_time < Date.now() - 10 * MINUTE: cluster.delete_index(a.index) except Exception as e: Log.warning("Problem removing {{index|quote}}", index=a.index, cause=e)
def _wait_for_queue_space(self, timeout=None): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE :param timeout: IN SECONDS """ wait_time = 5 (DEBUG and len(self.queue) > 1 * 1000 * 1000 ) and Log.warning("Queue {{name}} has over a million items") start = time() stop_waiting = Till(till=start + coalesce(timeout, DEFAULT_WAIT_TIME)) while not self.closed and len(self.queue) >= self.max: if stop_waiting: Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(stop_waiting) else: self.lock.wait(Till(seconds=wait_time)) if not stop_waiting and len(self.queue) >= self.max: now = time() Log.alert( "Queue with name {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=now - start)
def _wait_for_queue_space(self, timeout=DEFAULT_WAIT_TIME): """ EXPECT THE self.lock TO BE HAD, WAITS FOR self.queue TO HAVE A LITTLE SPACE """ wait_time = 5 now = time() if timeout != None: time_to_stop_waiting = now + timeout else: time_to_stop_waiting = Null if self.next_warning < now: self.next_warning = now + wait_time while not self.please_stop and len(self.queue) >= self.max: if now > time_to_stop_waiting: if not _Log: _late_import() _Log.error(THREAD_TIMEOUT) if self.silent: self.lock.wait(Till(till=time_to_stop_waiting)) else: self.lock.wait(Till(timeout=wait_time)) if len(self.queue) > self.max: now = time() if self.next_warning < now: self.next_warning = now + wait_time _Log.alert( "Queue by name of {{name|quote}} is full with ({{num}} items), thread(s) have been waiting {{wait_time}} sec", name=self.name, num=len(self.queue), wait_time=wait_time)
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: c = self.todo.pop() if c == THREAD_STOP: break if not c.last_updated or c.last_updated >= Date.now() - TOO_OLD: continue with self.meta.columns.locker: self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "multi", "partitions", ], "where": { "eq": { "es_index": c.es_index, "es_column": c.es_column } } }) if DEBUG: Log.note( "Could not get {{col.es_index}}.{{col.es_column}} info", col=c)
def update_local_database(config, deviant_summary, candidates, since): if isinstance(deviant_summary, bigquery.Table): Log.note("Only the ETL process should fill the bigquery table") return # GET EVERYTHING WE HAVE SO FAR exists = deviant_summary.query({ "select": ["signature_hash", "last_updated"], "where": { "and": [ { "in": { "signature_hash": candidates.signature_hash } }, { "exists": "num_pushes" }, ] }, "sort": "last_updated", "limit": 100000, "format": "list", }).data # CHOOSE MISSING, THEN OLDEST, UP TO "RECENT" missing = list(set(candidates.signature_hash) - set(exists.signature_hash)) too_old = Date.today() - parse(LOCAL_RETENTION) needs_update = missing + [ e.signature_hash for e in exists if e.last_updated < too_old.unix ] Log.alert("{{num}} series are candidates for local update", num=len(needs_update)) limited_update = Queue("sigs") limited_update.extend( left(needs_update, coalesce(config.display.download_limit, 100))) Log.alert("Updating local database with {{num}} series", num=len(limited_update)) with Timer("Updating local database"): def loop(please_stop): while not please_stop: signature_hash = limited_update.pop_one() if not signature_hash: return process( signature_hash, since, source=config.database, deviant_summary=deviant_summary, ) threads = [Thread.run(text(i), loop) for i in range(3)] for t in threads: t.join() Log.note("Local database is up to date")
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.put( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def create_index( self, index, alias=None, create_timestamp=None, schema=None, limit_replicas=None, read_only=False, tjson=False, kwargs=None ): if not alias: alias = kwargs.alias = kwargs.index index = kwargs.index = proto_name(alias, create_timestamp) if kwargs.alias == index: Log.error("Expecting index name to conform to pattern") if kwargs.schema_file: Log.error('schema_file attribute not supported. Use {"$ref":<filename>} instead') if schema == None: Log.error("Expecting a schema") elif isinstance(schema, basestring): schema = mo_json.json2value(schema, leaves=True) else: schema = mo_json.json2value(convert.value2json(schema), leaves=True) if limit_replicas: # DO NOT ASK FOR TOO MANY REPLICAS health = self.get("/_cluster/health") if schema.settings.index.number_of_replicas >= health.number_of_nodes: Log.warning("Reduced number of replicas: {{from}} requested, {{to}} realized", {"from": schema.settings.index.number_of_replicas}, to= health.number_of_nodes - 1 ) schema.settings.index.number_of_replicas = health.number_of_nodes - 1 self.post( "/" + index, data=schema, headers={"Content-Type": "application/json"} ) # CONFIRM INDEX EXISTS while True: try: state = self.get("/_cluster/state", retry={"times": 5}, timeout=3) if index in state.metadata.indices: break Log.note("Waiting for index {{index}} to appear", index=index) except Exception as e: Log.warning("Problem while waiting for index {{index}} to appear", index=index, cause=e) Till(seconds=1).wait() Log.alert("Made new index {{index|quote}}", index=index) es = Index(kwargs=kwargs) return es
def __init__( self, index, # NAME OF THE INDEX, EITHER ALIAS NAME OR FULL VERSION NAME type=None, # SCHEMA NAME, (DEFAULT TO TYPE IN INDEX, IF ONLY ONE) alias=None, explore_metadata=True, # PROBING THE CLUSTER FOR METADATA IS ALLOWED read_only=True, tjson=False, # STORED AS TYPED JSON timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) consistency="one", # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) debug=False, # DO NOT SHOW THE DEBUG STATEMENTS cluster=None, kwargs=None ): if index==None: Log.error("not allowed") if index == alias: Log.error("must have a unique index name") self.cluster_state = None self.debug = debug self.settings = kwargs if cluster: self.cluster = cluster else: self.cluster = Cluster(kwargs) try: full_index = self.get_index(index) if full_index and alias==None: kwargs.alias = kwargs.index kwargs.index = full_index if full_index==None: Log.error("not allowed") if type == None: # NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT? with self.cluster.metadata_locker: index_ = self.cluster._metadata.indices[self.settings.index] if not index_: indices = self.cluster.get_metadata().indices index_ = indices[self.settings.index] candidate_types = list(index_.mappings.keys()) if len(candidate_types) != 1: Log.error("Expecting `type` parameter") self.settings.type = type = candidate_types[0] except Exception as e: # EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER Log.error("not expected", cause=e) if not type: Log.error("not allowed") self.path = "/" + full_index + "/" + type if self.debug: Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
def __init__( self, index, # NAME OF THE INDEX, EITHER ALIAS NAME OR FULL VERSION NAME type=None, # SCHEMA NAME, (DEFAULT TO TYPE IN INDEX, IF ONLY ONE) alias=None, explore_metadata=True, # PROBING THE CLUSTER FOR METADATA IS ALLOWED read_only=True, tjson=False, # STORED AS TYPED JSON timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) consistency="one", # ES WRITE CONSISTENCY (https://www.elastic.co/guide/en/elasticsearch/reference/1.7/docs-index_.html#index-consistency) debug=False, # DO NOT SHOW THE DEBUG STATEMENTS cluster=None, kwargs=None ): if index==None: Log.error("not allowed") if index == alias: Log.error("must have a unique index name") self.cluster_state = None self.debug = debug self.settings = kwargs if cluster: self.cluster = cluster else: self.cluster = Cluster(kwargs) try: full_index = self.get_index(index) if full_index and alias==None: kwargs.alias = kwargs.index kwargs.index = full_index if full_index==None: Log.error("not allowed") if type == None: # NO type PROVIDED, MAYBE THERE IS A SUITABLE DEFAULT? with self.cluster.metadata_locker: index_ = self.cluster._metadata.indices[self.settings.index] if not index_: indices = self.cluster.get_metadata().indices index_ = indices[self.settings.index] candidate_types = list(index_.mappings.keys()) if len(candidate_types) != 1: Log.error("Expecting `type` parameter") self.settings.type = type = candidate_types[0] except Exception as e: # EXPLORING (get_metadata()) IS NOT ALLOWED ON THE PUBLIC CLUSTER Log.error("not expected", cause=e) if not type: Log.error("not allowed") self.path = "/" + full_index + "/" + type if self.debug: Log.alert("elasticsearch debugging for {{url}} is on", url=self.url)
def push_to_queue(): if self.slow_queue.__class__.__name__ == "Index": if self.slow_queue.settings.index.startswith("saved"): Log.alert("INSERT SAVED QUERY {{data|json}}", data=copy(_buffer)) self.slow_queue.extend(_buffer) del _buffer[:] for ppf in _post_push_functions: ppf() del _post_push_functions[:]
def post_till_response(self, *args, **kwargs): while True: try: response = self.server.post(*args, **kwargs) return response except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e: Log.alert("Problem connecting, retrying") else: Log.error("Server raised exception", e)
def deploy_all(parent_dir, prefix, config): deployed = [] for c in parent_dir.children: if c.name.lower().startswith(prefix): Log.alert("Process {{dir}}", dir=c.abspath) d = Deploy(c, kwargs=config) if d.deploy(): deployed.append(d) for d in deployed: d.local("pip", ["pip", "install", d.name, "--upgrade"]) return deployed
def try_till_response(self, *args, **kwargs): while True: try: response = self.server.get(*args, **kwargs) return response except Exception as e: e = Except.wrap(e) if "No connection could be made because the target machine actively refused it" in e or "Connection refused" in e: Log.alert("Problem connecting") Till(seconds=WAIT_AFTER_PROBLEM).wait() else: Log.error("Server raised exception", e)
def rollback(self): if self.pending: pending, self.pending = self.pending, [] for p in pending: m = Message() m.set_body(p.get_body()) self.queue.write(m) for p in pending: self.queue.delete_message(p) if self.settings.debug: Log.alert("{{num}} messages returned to queue", num=len(pending))
def __init__( self, alias, # NAME OF THE ALIAS type=None, # SCHEMA NAME, WILL HUNT FOR ONE IF None explore_metadata=True, # IF PROBING THE CLUSTER FOR METADATA IS ALLOWED debug=False, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) kwargs=None ): self.debug = debug if self.debug: Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index) if alias == None: Log.error("Alias can not be None") self.settings = kwargs self.cluster = Cluster(kwargs) if type == None: if not explore_metadata: Log.error("Alias() was given no `type` (aka schema) and not allowed to explore metadata. Do not know what to do now.") if not self.settings.alias or self.settings.alias==self.settings.index: alias_list = self.cluster.get("/_alias") candidates = ( [(name, i) for name, i in alias_list.items() if self.settings.index in i.aliases.keys()] + [(name, Null) for name, i in alias_list.items() if self.settings.index==name] ) full_name = jx.sort(candidates, 0).last()[0] if not full_name: Log.error("No index by name of {{name}}", name=self.settings.index) mappings = self.cluster.get("/" + full_name + "/_mapping")[full_name] else: mappings = self.cluster.get("/"+self.settings.index+"/_mapping")[self.settings.index] # FIND MAPPING WITH MOST PROPERTIES (AND ASSUME THAT IS THE CANONICAL TYPE) max_prop = -1 for _type, mapping in mappings.mappings.items(): if _type == "_default_": continue num_prop = len(mapping.properties.keys()) if max_prop < num_prop: max_prop = num_prop self.settings.type = _type type = _type if type == None: Log.error("Can not find schema type for index {{index}}", index=coalesce(self.settings.alias, self.settings.index)) self.path = "/" + alias + "/" + type
def wait_for_shutdown_signal( self, please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False => IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES THAT NEVER DIE UNLESS EXTERNAL SHUTDOWN IS REQUESTED CALLING THREAD WILL SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit" :param please_stop: :param allow_exit: :param wait_forever:: Assume all needed threads have been launched. When done :return: """ self_thread = Thread.current() if self_thread != MAIN_THREAD or self_thread != self: Log.error( "Only the main thread can sleep forever (waiting for KeyboardInterrupt)" ) if isinstance(please_stop, Signal): # MUTUAL SIGNALING MAKES THESE TWO EFFECTIVELY THE SAME SIGNAL self.please_stop.on_go(please_stop.go) please_stop.on_go(self.please_stop.go) else: please_stop = self.please_stop if not wait_forever: # TRIGGER SIGNAL WHEN ALL CHILDREN THEADS ARE DONE with self_thread.child_lock: pending = copy(self_thread.children) children_done = AndSignals(please_stop, len(pending)) children_done.signal.on_go(self.please_stop.go) for p in pending: p.stopped.on_go(children_done.done) try: if allow_exit: _wait_for_exit(please_stop) else: _wait_for_interrupt(please_stop) except KeyboardInterrupt as _: Log.alert("SIGINT Detected! Stopping...") except SystemExit as _: Log.alert("SIGTERM Detected! Stopping...") finally: self.stop()
def _wait_for_exit_on_windows(please_stop): import msvcrt line = "" while not please_stop: if msvcrt.kbhit(): chr = msvcrt.getche() if ord(chr) == 13: if line == "exit": Log.alert("'exit' Detected! Stopping...") return elif ord(chr) > 32: line += chr else: sleep(1)
def worker(please_stop): while not please_stop: try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def __init__( self, alias, # NAME OF THE ALIAS type=None, # SCHEMA NAME, WILL HUNT FOR ONE IF None explore_metadata=True, # IF PROBING THE CLUSTER FOR METADATA IS ALLOWED debug=False, timeout=None, # NUMBER OF SECONDS TO WAIT FOR RESPONSE, OR SECONDS TO WAIT FOR DOWNLOAD (PASSED TO requests) kwargs=None ): self.debug = debug if self.debug: Log.alert("Elasticsearch debugging on {{index|quote}} is on", index= kwargs.index) if alias == None: Log.error("Alias can not be None") self.settings = kwargs self.cluster = Cluster(kwargs) if type == None: if not explore_metadata: Log.error("Alias() was given no `type` (aka schema) and not allowed to explore metadata. Do not know what to do now.") if not self.settings.alias or self.settings.alias==self.settings.index: alias_list = self.cluster.get("/_alias") candidates = ( [(name, i) for name, i in alias_list.items() if self.settings.index in i.aliases.keys()] + [(name, Null) for name, i in alias_list.items() if self.settings.index==name] ) full_name = jx.sort(candidates, 0).last()[0] mappings = self.cluster.get("/" + full_name + "/_mapping")[full_name] else: mappings = self.cluster.get("/"+self.settings.index+"/_mapping")[self.settings.index] # FIND MAPPING WITH MOST PROPERTIES (AND ASSUME THAT IS THE CANONICAL TYPE) max_prop = -1 for _type, mapping in mappings.mappings.items(): if _type == "_default_": continue num_prop = len(mapping.properties.keys()) if max_prop < num_prop: max_prop = num_prop self.settings.type = _type type = _type if type == None: Log.error("Can not find schema type for index {{index}}", index=coalesce(self.settings.alias, self.settings.index)) self.path = "/" + alias + "/" + type
def update_local_database(): # GET EVERYTHING WE HAVE SO FAR exists = summary_table.query({ "select": ["id", "last_updated"], "where": { "and": [{ "in": { "id": candidates.id } }, { "exists": "num_pushes" }] }, "sort": "last_updated", "limit": 100000, "format": "list", }).data # CHOOSE MISSING, THEN OLDEST, UP TO "RECENT" missing = list(set(candidates.id) - set(exists.id)) too_old = Date.today() - parse(LOCAL_RETENTION) needs_update = missing + [ e for e in exists if e.last_updated < too_old.unix ] Log.alert("{{num}} series are candidates for local update", num=len(needs_update)) limited_update = Queue("sigs") limited_update.extend( left(needs_update, coalesce(config.analysis.download_limit, 100))) Log.alert("Updating local database with {{num}} series", num=len(limited_update)) with Timer("Updating local database"): def loop(please_stop): while not please_stop: sig_id = limited_update.pop_one() if not sig_id: return process(sig_id) threads = [Thread.run(text(i), loop) for i in range(3)] for t in threads: t.join() Log.note("Local database is up to date")
def wait_for_shutdown_signal( self, please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False => IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES THAT NEVER DIE UNLESS EXTERNAL SHUTDOWN IS REQUESTED CALLING THREAD WILL SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit" :param please_stop: :param allow_exit: :param wait_forever:: Assume all needed threads have been launched. When done :return: """ self_thread = Thread.current() if self_thread != MAIN_THREAD or self_thread != self: Log.error("Only the main thread can sleep forever (waiting for KeyboardInterrupt)") if isinstance(please_stop, Signal): # MUTUAL TRIGGERING, SO THEY ARE EFFECTIVELY THE SAME self.please_stop.on_go(please_stop.go) please_stop.on_go(self.please_stop.go) else: please_stop = self.please_stop if not wait_forever: # TRIGGER SIGNAL WHEN ALL CHILDREN THEADS ARE DONE with self_thread.child_lock: pending = copy(self_thread.children) all = AndSignals(please_stop, len(pending)) for p in pending: p.stopped.on_go(all.done) try: if allow_exit: _wait_for_exit(please_stop) else: _wait_for_interrupt(please_stop) except KeyboardInterrupt as _: Log.alert("SIGINT Detected! Stopping...") except SystemExit as _: Log.alert("SIGTERM Detected! Stopping...") finally: self.stop()
def rollback(self): if self.pending: pending, self.pending = self.pending, [] try: for p in pending: m = Message() m.set_body(p.get_body()) self.queue.write(m) for p in pending: self.queue.delete_message(p) if self.settings.debug: Log.alert("{{num}} messages returned to queue", num=len(pending)) except Exception as e: Log.warning("Failed to return {{num}} messages to the queue", num=len(pending), cause=e)
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception as e: Log.error("Problem with etl", e) finally: Log.stop()
def main(): try: settings = startup.read_settings() constants.set(settings.constants) Log.start(settings.debug) branches = _get_branches_from_hg(settings.hg) es = elasticsearch.Cluster(kwargs=settings.hg.branches).get_or_create_index(kwargs=settings.hg.branches) es.add_alias() es.extend({"id": b.name + " " + b.locale, "value": b} for b in branches) Log.alert("DONE!") except Exception as e: Log.error("Problem with etl", e) finally: Log.stop()
def setup( self, instance, # THE boto INSTANCE OBJECT FOR THE MACHINE TO SETUP utility, # THE utility OBJECT FOUND IN CONFIG please_stop ): try: with Connection(host=instance.ip_address, kwargs=self.settings.connect) as conn: gigabytes = mo_math.floor(utility.memory) Log.note("setup {{instance}}", instance=instance.id) _install_python_indexer(instance=instance, conn=conn) _install_es(gigabytes, instance=instance, conn=conn) _install_supervisor(instance=instance, conn=conn) _start_supervisor(conn=conn) Log.alert("Done install of {{host}}", host=instance.ip_address) except Exception as e: Log.error("could not setup ES at {{ip}}", ip=instance.ip_address, cause=e)
def _wait_for_exit(please_stop): """ /dev/null PIPED TO sys.stdin SPEWS INFINITE LINES, DO NOT POLL AS OFTEN """ try: import msvcrt _wait_for_exit_on_windows(please_stop) return except: pass cr_count = 0 # COUNT NUMBER OF BLANK LINES try: while not please_stop: # DEBUG and Log.note("inside wait-for-shutdown loop") if cr_count > 30: (Till(seconds=3) | please_stop).wait() try: # line = "" line = STDIN.readline() except Exception as e: Except.wrap(e) if "Bad file descriptor" in e: Log.note("can not read from stdin") _wait_for_interrupt(please_stop) break # DEBUG and Log.note("read line {{line|quote}}, count={{count}}", line=line, count=cr_count) if not line: cr_count += 1 else: cr_count = -1000000 # NOT /dev/null if line.strip() == b"exit": Log.alert("'exit' Detected! Stopping...") return except Exception as e: Log.warning("programming error", cause=e) finally: if please_stop: Log.note("please_stop has been requested") Log.note("done waiting for exit")
def wait_for_shutdown_signal( please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False -> IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL SHUTDOWN IS REQUESTED SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit" :param please_stop: :param allow_exit: :param wait_forever:: Assume all needed threads have been launched. When done :return: """ if not isinstance(please_stop, Signal): please_stop = Signal() please_stop.on_go(lambda: start_new_thread(_stop_main_thread, ())) self_thread = Thread.current() if self_thread != MAIN_THREAD: Log.error( "Only the main thread can sleep forever (waiting for KeyboardInterrupt)" ) if not wait_forever: # TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE pending = copy(self_thread.children) all = AndSignals(please_stop, len(pending)) for p in pending: p.stopped.on_go(all.done) try: if allow_exit: _wait_for_exit(please_stop) else: _wait_for_interrupt(please_stop) except (KeyboardInterrupt, SystemExit) as _: Log.alert("SIGINT Detected! Stopping...") finally: please_stop.go()
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: column = self.todo.pop() if column == THREAD_STOP: break # if untype_path(column.name) in ["build.type", "run.type"]: # Log.note("found") if column.jx_type in STRUCT or split_field( column.es_column)[-1] == EXISTS_TYPE: DEBUG and Log.note("{{column.es_column}} is a struct", column=column) column.last_updated = Date.now() continue elif column.last_updated > Date.now( ) - TOO_OLD and column.cardinality is not None: # DO NOT UPDATE FRESH COLUMN METADATA DEBUG and Log.note( "{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now() - Date(column.last_updated)).seconds) continue with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05): if untype_path(column.name) in ["build.type", "run.type"]: try: self._update_cardinality(column) except Exception as e: Log.warning( "problem getting cardinality for {{column.name}}", column=column, cause=e) else: column.last_updated = Date.now()
def worker(please_stop): while not please_stop: try: response = requests.get( "http://169.254.169.254/latest/meta-data/spot/termination-time" ) if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note( "AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)" ) return else: Log.warning("AWS shutdown detection has problems", cause=e) (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def __init__( self, service_url, # location of the ActiveData server we are testing backend_es, # the ElasticSearch settings for filling the backend sql_url=None, # location of the SQL service fast_testing=False, kwargs=None ): if backend_es.schema==None: Log.error("Expecting backed_es to have a schema defined") letters = unicode(ascii_lowercase) self.random_letter = letters[int(Date.now().unix / 30) % 26] self.service_url = service_url self.sql_url = sql_url self.backend_es = backend_es self.settings = kwargs self._es_test_settings = None self._es_cluster = None self._index = None if not containers.config.default: containers.config.default = { "type": "elasticsearch", "settings": backend_es } if not fast_testing: self.server = http else: Log.alert("TESTS WILL RUN FAST, BUT NOT ALL TESTS ARE RUN!\nEnsure the `file://tests/config/elasticsearch.json#fastTesting=true` to turn on the network response tests.") # WE WILL USE THE ActiveServer CODE, AND CONNECT TO ES DIRECTLY. # THIS MAKES FOR SLIGHTLY FASTER TEST TIMES BECAUSE THE PROXY IS # MISSING self.server = FakeHttp() containers.config.default = { "type": "elasticsearch", "settings": kwargs.backend_es.copy() }
def worker(please_stop): seen_problem = False while not please_stop: request_time = (time.time() - timer.START)/60 # MINUTES try: response = requests.get("http://169.254.169.254/latest/meta-data/spot/termination-time") seen_problem = False if response.status_code not in [400, 404]: Log.alert("Shutdown AWS Spot Node {{name}} {{type}}", name=machine_metadata.name, type=machine_metadata.aws_instance_type) please_stop.go() except Exception as e: e = Except.wrap(e) if "Failed to establish a new connection: [Errno 10060]" in e or "A socket operation was attempted to an unreachable network" in e: Log.note("AWS Spot Detection has shutdown, probably not a spot node, (http://169.254.169.254 is unreachable)") return elif seen_problem: # IGNORE THE FIRST PROBLEM Log.warning("AWS shutdown detection has more than one consecutive problem: (last request {{time|round(1)}} minutes since startup)", time=request_time, cause=e) seen_problem = True (Till(seconds=61) | please_stop).wait() (Till(seconds=11) | please_stop).wait()
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.then(lambda: self.todo.add(THREAD_STOP)) while not please_stop: pair = self.todo.pop() if pair is THREAD_STOP: break column, after = pair with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": column}, silent=not DEBUG, too_long=0.05): if column.jx_type in STRUCT or split_field(column.es_column)[-1] == EXISTS_TYPE: # DEBUG and Log.note("{{column.es_column}} is a struct", column=column) continue elif after and column.last_updated > after: continue # COLUMN IS STILL YOUNG elif column.last_updated > Date.now() - TOO_OLD and column.cardinality > 0: # DO NOT UPDATE FRESH COLUMN METADATA DEBUG and Log.note("{{column.es_column}} is still fresh ({{ago}} ago)", column=column, ago=(Date.now()-Date(column.last_updated)).seconds) continue if untype_path(column.name) in KNOWN_MULTITYPES: try: self._update_cardinality(column) except Exception as e: Log.warning("problem getting cardinality for {{column.name}}", column=column, cause=e) continue self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "multi", "partitions", ], "where": {"eq": {"es_index": column.es_index, "es_column": column.es_column}} })
def not_monitor(self, please_stop): Log.alert("metadata scan has been disabled") please_stop.on_go(lambda: self.todo.add(THREAD_STOP)) while not please_stop: c = self.todo.pop() if c == THREAD_STOP: break if c.last_updated >= Date.now()-TOO_OLD: continue with Timer("Update {{col.es_index}}.{{col.es_column}}", param={"col": c}, silent=not DEBUG, too_long=0.05): self.meta.columns.update({ "set": { "last_updated": Date.now() }, "clear": [ "count", "cardinality", "multi", "partitions", ], "where": {"eq": {"es_index": c.es_index, "es_column": c.es_column}} })
def test_shutdown(config, app): # SHOULD NOT ACCEPT A SHUTDOWN COMMAND, WILL BREAK OTHER TESTS url = "http://localhost:" + text_type(config.flask.port) + "/shutdown" http.get(url) Log.alert("/shutdown sent, should have no effect")
def update_spot_requests(self, utility_required): spot_requests = self._get_managed_spot_requests() # ADD UP THE CURRENT REQUESTED INSTANCES all_instances = UniqueIndex("id", data=self._get_managed_instances()) self.active = active = wrap([r for r in spot_requests if r.status.code in RUNNING_STATUS_CODES | PENDING_STATUS_CODES | PROBABLY_NOT_FOR_A_WHILE | MIGHT_HAPPEN]) for a in active.copy(): if a.status.code == "request-canceled-and-instance-running" and all_instances[a.instance_id] == None: active.remove(a) used_budget = 0 current_spending = 0 for a in active: about = self.price_lookup[a.launch_specification.instance_type, a.launch_specification.placement] discount = coalesce(about.type.discount, 0) Log.note( "Active Spot Request {{id}}: {{type}} {{instance_id}} in {{zone}} @ {{price|round(decimal=4)}}", id=a.id, type=a.launch_specification.instance_type, zone=a.launch_specification.placement, instance_id=a.instance_id, price=a.price - discount ) used_budget += a.price - discount current_spending += coalesce(about.current_price, a.price) - discount Log.note( "Total Exposure: ${{budget|round(decimal=4)}}/hour (current price: ${{current|round(decimal=4)}}/hour)", budget=used_budget, current=current_spending ) remaining_budget = self.settings.budget - used_budget current_utility = coalesce(SUM(self.price_lookup[r.launch_specification.instance_type, r.launch_specification.placement].type.utility for r in active), 0) net_new_utility = utility_required - current_utility Log.note("have {{current_utility}} utility running; need {{need_utility}} more utility", current_utility=current_utility, need_utility=net_new_utility) if remaining_budget < 0: remaining_budget, net_new_utility = self.save_money(remaining_budget, net_new_utility) if net_new_utility < 0: if self.settings.allowed_overage: net_new_utility = Math.min(net_new_utility + self.settings.allowed_overage * utility_required, 0) net_new_utility = self.remove_instances(net_new_utility) if net_new_utility > 0: net_new_utility = Math.min(net_new_utility, self.settings.max_new_utility) net_new_utility, remaining_budget = self.add_instances(net_new_utility, remaining_budget) if net_new_utility > 0: Log.alert( "Can not fund {{num|round(places=2)}} more utility (all utility costs more than ${{expected|round(decimal=2)}}/hour). Remaining budget is ${{budget|round(decimal=2)}} ", num=net_new_utility, expected=self.settings.max_utility_price, budget=remaining_budget ) # Give EC2 a chance to notice the new requests before tagging them. Till(timeout=3).wait() with self.net_new_locker: for req in self.net_new_spot_requests: req.add_tag("Name", self.settings.ec2.instance.name) Log.note("All requests for new utility have been made") self.done_spot_requests.go()
def daemon(please_stop): from mo_logs import Log Till.enabled = True sorted_timers = [] try: while not please_stop: now = time() with Till.locker: later = Till.next_ping - now if later > 0: try: sleep(min(later, INTERVAL)) except Exception as e: from mo_logs import Log Log.warning( "Call to sleep failed with ({{later}}, {{interval}})", later=later, interval=INTERVAL, cause=e ) continue with Till.locker: Till.next_ping = now + INTERVAL new_timers, Till.new_timers = Till.new_timers, [] if DEBUG and new_timers: Log.note("new timers: {{timers}}", timers=[t for t, s in new_timers]) sorted_timers.extend(new_timers) if sorted_timers: sorted_timers.sort(key=lambda r: r[0]) for i, (t, s) in enumerate(sorted_timers): if now < t: work, sorted_timers = sorted_timers[:i], sorted_timers[i:] Till.next_ping = min(Till.next_ping, sorted_timers[0][0]) break else: work, sorted_timers = sorted_timers, [] if work: if DEBUG: Log.note( "done: {{timers}}. Remaining {{pending}}", timers=[t for t, s in work], pending=[t for t, s in sorted_timers] ) for t, s in work: s.go() except Exception as e: Log.warning("timer shutdown", cause=e) finally: if DEBUG: Log.alert("TIMER SHUTDOWN") Till.enabled = False # TRIGGER ALL REMAINING TIMERS RIGHT NOW with Till.locker: new_work, Till.new_timers = Till.new_timers, [] for t, s in new_work + sorted_timers: s.go()
def daemon(please_stop): Till.enabled = True sorted_timers = [] try: while not please_stop: now = time() with Till.locker: later = Till.next_ping - now if later > 0: try: sleep(min(later, INTERVAL)) except Exception as e: Log.warning( "Call to sleep failed with ({{later}}, {{interval}})", later=later, interval=INTERVAL, cause=e ) continue with Till.locker: Till.next_ping = now + INTERVAL new_timers, Till.new_timers = Till.new_timers, [] if DEBUG and new_timers: if len(new_timers) > 5: Log.note("{{num}} new timers", num=len(new_timers)) else: Log.note("new timers: {{timers}}", timers=[t for t, _ in new_timers]) sorted_timers.extend(new_timers) if sorted_timers: sorted_timers.sort(key=actual_time) for i, rec in enumerate(sorted_timers): t = actual_time(rec) if now < t: work, sorted_timers = sorted_timers[:i], sorted_timers[i:] Till.next_ping = min(Till.next_ping, sorted_timers[0].timestamp) break else: work, sorted_timers = sorted_timers, [] if work: DEBUG and Log.note( "done: {{timers}}. Remaining {{pending}}", timers=[t for t, s in work] if len(work) <= 5 else len(work), pending=[t for t, s in sorted_timers] if len(sorted_timers) <= 5 else len(sorted_timers) ) for t, r in work: s = r() if s is not None: s.go() except Exception as e: Log.warning("unexpected timer shutdown", cause=e) finally: DEBUG and Log.alert("TIMER SHUTDOWN") Till.enabled = False # TRIGGER ALL REMAINING TIMERS RIGHT NOW with Till.locker: new_work, Till.new_timers = Till.new_timers, [] for t, r in new_work + sorted_timers: s = r() if s is not None: s.go()