def __init__(self, name, params, cwd=None, env=None, debug=False): self.name = name self.service_stopped = Signal("stopped signal for " + convert.string2quote(name)) self.stdin = Queue("stdin for process " + convert.string2quote(name), silent=True) self.stdout = Queue("stdout for process " + convert.string2quote(name), silent=True) self.stderr = Queue("stderr for process " + convert.string2quote(name), silent=True) try: self.debug = debug or DEBUG self.service = service = subprocess.Popen(params, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1, cwd=cwd, env=env) self.stopper = Signal() self.stopper.on_go(self._kill) self.thread_locker = Lock() self.children = [ Thread.run(self.name + " waiter", self._monitor, parent_thread=self), Thread.run(self.name + " stdin", self._writer, service.stdin, self.stdin, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stdout", self._reader, service.stdout, self.stdout, please_stop=self.stopper, parent_thread=self), Thread.run(self.name + " stderr", self._reader, service.stderr, self.stderr, please_stop=self.stopper, parent_thread=self), ] except Exception, e: Log.error("Can not call", e)
def __init__(self, logger): # DELAYED LOAD FOR THREADS MODULE from pyLibrary.thread.threads import Queue self.queue = Queue("logs", max=10000, silent=True) self.logger = logger def worker(please_stop): while not please_stop: Thread.sleep(1) logs = self.queue.pop_all() for log in logs: if log is Thread.STOP: if DEBUG_LOGGING: sys.stdout.write( "Log_usingThread.worker() sees stop, filling rest of queue\n" ) please_stop.go() else: self.logger.write(**log) self.thread = Thread("log thread", worker) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def __init__(self, functions): self.outbound = Queue("out to process") self.inbound = Queue("in from stdin") self.inbound = Queue("in from stderr") # MAKE # MAKE THREADS self.threads = [] for t, f in enumerate(functions): thread = worker( "worker " + unicode(t), f, self.inbound, self.outbound, ) self.threads.append(thread)
def _late_import(): global _Log from pyLibrary.debugs.logs import Log as _Log from pyLibrary.thread.threads import Queue if _Log.cprofiler_stats == None: _Log.cprofiler_stats = Queue( "cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS
def main(): try: settings = startup.read_settings(defs=[{ "name": ["--id"], "help": "id (prefix, really) to process", "type": str, "dest": "id", "required": False }]) constants.set(settings.constants) Log.start(settings.debug) queries.config.default = { "type": "elasticsearch", "settings": settings.elasticsearch.copy() } if settings.args.id: work_queue = Queue("local work queue") work_queue.extend(parse_id_argument(settings.args.id)) else: work_queue = aws.Queue(settings=settings.work_queue) Log.note("Listen to queue {{queue}}, and read off of {{s3}}", queue=settings.work_queue.name, s3=settings.source.bucket) es = MultiDayIndex(settings.elasticsearch, queue_size=100000) threads = [] please_stop = Signal() for _ in range(settings.threads): p = Thread.run("copy to es", copy2es, es, settings, work_queue, please_stop=please_stop) threads.append(p) def monitor_progress(please_stop): while not please_stop: Log.note("Remaining: {{num}}", num=len(work_queue)) Thread.sleep(seconds=10) Thread.run(name="monitor progress", target=monitor_progress, please_stop=please_stop) aws.capture_termination_signal(please_stop) Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True) please_stop.go() Log.note("Shutdown started") except Exception, e: Log.error("Problem with etl", e)
def __init__(self, db=None): """ :param db: Optional, wrap a sqlite db in a thread :return: Multithread save database """ self.db = None self.queue = Queue( "sql commands") # HOLD (command, result, signal) PAIRS self.worker = Thread.run("sqlite db thread", self._worker) self.get_trace = DEBUG
def __init__(self, host, index, type="log", max_size=1000, batch_size=100, settings=None): """ settings ARE FOR THE ELASTICSEARCH INDEX """ self.es = Cluster(settings).get_or_create_index( schema=convert.json2value(convert.value2json(SCHEMA), leaves=True), limit_replicas=True, tjson=True, settings=settings ) self.batch_size = batch_size self.es.add_alias(coalesce(settings.alias, settings.index)) self.queue = Queue("debug logs to es", max=max_size, silent=True) self.es.settings.retry.times = coalesce(self.es.settings.retry.times, 3) self.es.settings.retry.sleep = Duration(coalesce(self.es.settings.retry.sleep, MINUTE)) Thread.run("add debug logs to es", self._insert_loop)
def stop(cls): from pyLibrary.debugs import profiles if cls.cprofiler and hasattr(cls, "settings"): if cls.cprofiler == None: from pyLibrary.thread.threads import Queue cls.cprofiler_stats = Queue("cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS import pstats cls.cprofiler_stats.add(pstats.Stats(cls.cprofiler)) write_profile(cls.settings.cprofile, cls.cprofiler_stats.pop_all()) if profiles.ON and hasattr(cls, "settings"): profiles.write(cls.settings.profile) cls.main_log.stop() cls.main_log = TextLog_usingStream(sys.stdout)
def etl_one(settings): queue = Queue("temp work queue") queue.__setattr__(b"commit", Null) queue.__setattr__(b"rollback", Null) settings.param.wait_forever = False already_in_queue = set() for w in settings.workers: source = get_container(w.source) # source.settings.fast_forward = True if id(source) in already_in_queue: continue try: for i in parse_id_argument(settings.args.id): data = source.get_key(i) if data != None: already_in_queue.add(id(source)) queue.add(Dict(bucket=w.source.bucket, key=i)) except Exception, e: if "Key {{key}} does not exist" in e: already_in_queue.add(id(source)) queue.add(Dict(bucket=w.source.bucket, key=settings.args.id)) Log.warning("Problem", cause=e)
def __init__(self, stream): assert stream use_UTF8 = False if isinstance(stream, basestring): if stream.startswith("sys."): use_UTF8 = True # sys.* ARE OLD AND CAN NOT HANDLE unicode self.stream = eval(stream) name = stream else: self.stream = stream name = "stream" # WRITE TO STREAMS CAN BE *REALLY* SLOW, WE WILL USE A THREAD from pyLibrary.thread.threads import Queue if use_UTF8: def utf8_appender(value): if isinstance(value, unicode): value = value.encode('utf8') self.stream.write(value) appender = utf8_appender else: appender = self.stream.write self.queue = Queue("log to stream", max=10000, silent=True) self.thread = Thread("log to " + name, time_delta_pusher, appender=appender, queue=self.queue, interval=timedelta(seconds=0.3)) self.thread.parent.remove_child( self.thread) # LOGGING WILL BE RESPONSIBLE FOR THREAD stop() self.thread.start()
def __init__(self, host, index, alias=None, name=None, port=9200, settings=None): global _elasticsearch if hasattr(self, "settings"): return from pyLibrary.queries.containers.list_usingPythonList import ListContainer from pyLibrary.env import elasticsearch as _elasticsearch self.settings = settings self.default_name = coalesce(name, alias, index) self.default_es = _elasticsearch.Cluster(settings=settings) self.todo = Queue("refresh metadata", max=100000, unique=True) self.es_metadata = Null self.last_es_metadata = Date.now() - OLD_METADATA self.meta = Dict() table_columns = metadata_tables() column_columns = metadata_columns() self.meta.tables = ListContainer( "meta.tables", [], wrap({c.name: c for c in table_columns})) self.meta.columns = ColumnList() self.meta.columns.insert(column_columns) self.meta.columns.insert(table_columns) # TODO: fix monitor so it does not bring down ES if ENABLE_META_SCAN: self.worker = Thread.run("refresh metadata", self.monitor) else: self.worker = Thread.run("refresh metadata", self.not_monitor) return
def __init__(self, name=None): queue_name = "log messages to queue" if name: queue_name += " "+name self.queue = Queue(queue_name)
def loop(source, coverage_summary_index, settings, please_stop): try: cluster = elasticsearch.Cluster(source) aliases = cluster.get_aliases() candidates = [] for pairs in aliases: if pairs.alias == source.index: candidates.append(pairs.index) candidates = jx.sort(candidates, {".": "desc"}) for index_name in candidates: coverage_index = elasticsearch.Index(index=index_name, read_only=False, settings=source) push_date_filter = unicode2Date(coverage_index.settings.index[-15::], elasticsearch.INDEX_DATE_FORMAT) while not please_stop: # IDENTIFY NEW WORK Log.note("Working on index {{index}}", index=index_name) coverage_index.refresh() todo = http.post_json(settings.url, json={ "from": "coverage", "groupby": ["source.file.name", "build.revision12"], "where": {"and": [ {"missing": "source.method.name"}, {"missing": "source.file.min_line_siblings"}, {"gte": {"repo.push.date": push_date_filter}} ]}, "format": "list", "limit": coalesce(settings.batch_size, 100) }) if not todo.data: break queue = Queue("pending source files to review") queue.extend(todo.data[0:coalesce(settings.batch_size, 100):]) threads = [ Thread.run( "processor" + unicode(i), process_batch, queue, coverage_index, coverage_summary_index, settings, please_stop=please_stop ) for i in range(NUM_THREAD) ] # ADD STOP MESSAGE queue.add(Thread.STOP) # WAIT FOR THEM TO COMPLETE for t in threads: t.join() please_stop.go() return except Exception, e: Log.warning("Problem processing", cause=e)
def __init__(self): self.queue = Queue("log messages")
class Log(object): """ FOR STRUCTURED LOGGING AND EXCEPTION CHAINING """ trace = False main_log = None logging_multi = None profiler = None # simple pypy-friendly profiler cprofiler = None # screws up with pypy, but better than nothing cprofiler_stats = Queue( "cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS error_mode = False # prevent error loops @classmethod def start(cls, settings=None): """ RUN ME FIRST TO SETUP THE THREADED LOGGING http://victorlin.me/2012/08/good-logging-practice-in-python/ log - LIST OF PARAMETERS FOR LOGGER(S) trace - SHOW MORE DETAILS IN EVERY LOG LINE (default False) cprofile - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False) USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"} profile - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):) USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"} constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE) """ if not settings: return settings = wrap(settings) cls.settings = settings cls.trace = cls.trace | coalesce(settings.trace, False) if cls.trace: from pyLibrary.thread.threads import Thread if settings.cprofile is False: settings.cprofile = {"enabled": False} elif settings.cprofile is True or (isinstance( settings.cprofile, Mapping) and settings.cprofile.enabled): if isinstance(settings.cprofile, bool): settings.cprofile = { "enabled": True, "filename": "cprofile.tab" } import cProfile cls.cprofiler = cProfile.Profile() cls.cprofiler.enable() if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): from pyLibrary.debugs import profiles if isinstance(settings.profile, bool): profiles.ON = True settings.profile = {"enabled": True, "filename": "profile.tab"} if settings.profile.enabled: profiles.ON = True if settings.constants: constants.set(settings.constants) if settings.log: cls.logging_multi = TextLog_usingMulti() if cls.main_log: cls.main_log.stop() cls.main_log = TextLog_usingThread(cls.logging_multi) for log in listwrap(settings.log): Log.add_log(Log.new_instance(log)) if settings.cprofile.enabled == True: Log.alert("cprofiling is enabled, writing to {{filename}}", filename=os.path.abspath(settings.cprofile.filename)) @classmethod def stop(cls): from pyLibrary.debugs import profiles if cls.cprofiler and hasattr(cls, "settings"): import pstats cls.cprofiler_stats.add(pstats.Stats(cls.cprofiler)) write_profile(cls.settings.cprofile, cls.cprofiler_stats.pop_all()) if profiles.ON and hasattr(cls, "settings"): profiles.write(cls.settings.profile) cls.main_log.stop() cls.main_log = TextLog_usingStream(sys.stdout) @classmethod def new_instance(cls, settings): settings = wrap(settings) if settings["class"]: if settings["class"].startswith("logging.handlers."): from .log_usingLogger import TextLog_usingLogger return TextLog_usingLogger(settings) else: with suppress_exception: from .log_usingLogger import make_log_from_settings return make_log_from_settings(settings) # OH WELL :( if settings.log_type == "file" or settings.file: return TextLog_usingFile(settings.file) if settings.log_type == "file" or settings.filename: return TextLog_usingFile(settings.filename) if settings.log_type == "console": from .log_usingThreadedStream import TextLog_usingThreadedStream return TextLog_usingThreadedStream(sys.stdout) if settings.log_type == "stream" or settings.stream: from .log_usingThreadedStream import TextLog_usingThreadedStream return TextLog_usingThreadedStream(settings.stream) if settings.log_type == "elasticsearch" or settings.stream: from .log_usingElasticSearch import TextLog_usingElasticSearch return TextLog_usingElasticSearch(settings) if settings.log_type == "email": from .log_usingEmail import TextLog_usingEmail return TextLog_usingEmail(settings) if settings.log_type == "ses": from .log_usingSES import TextLog_usingSES return TextLog_usingSES(settings) Log.error("Log type of {{log_type|quote}} is not recognized", log_type=settings.log_type) @classmethod def add_log(cls, log): cls.logging_multi.add_log(log) @classmethod def note(cls, template, default_params={}, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if len(template) > 10000: template = template[:10000] params = dict(unwrap(default_params), **more_params) log_params = set_default( { "template": template, "params": params, "timestamp": datetime.utcnow(), "machine": machine_metadata }, log_context, {"context": exceptions.NOTE}) if not template.startswith("\n") and template.find("\n") > -1: template = "\n" + template if cls.trace: log_template = "{{machine.name}} - {{timestamp|datetime}} - {{thread.name}} - \"{{location.file}}:{{location.line}}\" ({{location.method}}) - " + template.replace( "{{", "{{params.") f = sys._getframe(stack_depth + 1) log_params.location = { "line": f.f_lineno, "file": f.f_code.co_filename.split(os.sep)[-1], "method": f.f_code.co_name } thread = Thread.current() log_params.thread = {"name": thread.name, "id": thread.id} else: log_template = "{{timestamp|datetime}} - " + template.replace( "{{", "{{params.") cls.main_log.write(log_template, log_params) @classmethod def unexpected(cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) if cause and not isinstance(cause, Except): cause = Except(exceptions.UNEXPECTED, unicode(cause), trace=exceptions._extract_traceback(0)) trace = exceptions.extract_stack(1) e = Except(exceptions.UNEXPECTED, template, params, cause, trace) Log.note("{{error}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1) @classmethod def alarm(cls, template, default_params={}, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ # USE replace() AS POOR MAN'S CHILD TEMPLATE template = ("*" * 80) + "\n" + indent( template, prefix="** ").strip() + "\n" + ("*" * 80) Log.note(template, default_params=default_params, stack_depth=stack_depth + 1, log_context=set_default({"context": exceptions.ALARM}, log_context), **more_params) @classmethod def alert(cls, template, default_params={}, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ return Log.alarm(template, default_params=default_params, stack_depth=stack_depth + 1, log_context=set_default({"context": exceptions.ALARM}, log_context), **more_params) @classmethod def warning(cls, template, default_params={}, cause=None, stack_depth=0, log_context=None, **more_params): """ :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if isinstance(default_params, BaseException): cause = default_params default_params = {} if "values" in more_params.keys(): Log.error("Can not handle a logging parameter by name `values`") params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.WARNING, template, params, cause, trace) Log.note("{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.WARNING}, log_context), stack_depth=stack_depth + 1) @classmethod def error( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, **more_params): """ raise an exception with a trace for the cause too :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance( listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) add_to_trace = False cause = wrap( unwraplist( [Except.wrap(c, stack_depth=1) for c in listwrap(cause)])) trace = exceptions.extract_stack(stack_depth + 1) if add_to_trace: cause[0].trace.extend(trace[1:]) e = Except(exceptions.ERROR, template, params, cause, trace) raise e @classmethod def fatal( cls, template, # human readable template default_params={}, # parameters for template cause=None, # pausible cause stack_depth=0, log_context=None, **more_params): """ SEND TO STDERR :param template: *string* human readable string with placeholders for parameters :param default_params: *dict* parameters to fill in template :param cause: *Exception* for chaining :param stack_depth: *int* how many calls you want popped off the stack to report the *true* caller :param log_context: *dict* extra key:value pairs for your convenience :param more_params: *any more parameters (which will overwrite default_params) :return: """ if default_params and isinstance( listwrap(default_params)[0], BaseException): cause = default_params default_params = {} params = dict(unwrap(default_params), **more_params) cause = unwraplist([Except.wrap(c) for c in listwrap(cause)]) trace = exceptions.extract_stack(stack_depth + 1) e = Except(exceptions.ERROR, template, params, cause, trace) str_e = unicode(e) error_mode = cls.error_mode with suppress_exception: if not error_mode: cls.error_mode = True Log.note("{{error|unicode}}", error=e, log_context=set_default({"context": exceptions.FATAL}, log_context), stack_depth=stack_depth + 1) cls.error_mode = error_mode sys.stderr.write(str_e.encode('utf8')) def write(self): raise NotImplementedError
class Log(object): """ FOR STRUCTURED LOGGING AND EXCEPTION CHAINING """ trace = False # SHOW MACHINE AND LINE NUMBER main_log = None logging_multi = None profiler = None # simple pypy-friendly profiler cprofiler = None # screws up with pypy, but better than nothing cprofiler_stats = Queue( "cprofiler stats") # ACCUMULATION OF STATS FROM ALL THREADS error_mode = False # prevent error loops @classmethod def start(cls, settings=None): """ RUN ME FIRST TO SETUP THE THREADED LOGGING http://victorlin.me/2012/08/good-logging-practice-in-python/ log - LIST OF PARAMETERS FOR LOGGER(S) trace - SHOW MORE DETAILS IN EVERY LOG LINE (default False) cprofile - True==ENABLE THE C-PROFILER THAT COMES WITH PYTHON (default False) USE THE LONG FORM TO SET THE FILENAME {"enabled": True, "filename": "cprofile.tab"} profile - True==ENABLE pyLibrary SIMPLE PROFILING (default False) (eg with Profiler("some description"):) USE THE LONG FORM TO SET FILENAME {"enabled": True, "filename": "profile.tab"} constants - UPDATE MODULE CONSTANTS AT STARTUP (PRIMARILY INTENDED TO CHANGE DEBUG STATE) """ if not settings: return settings = wrap(settings) cls.settings = settings cls.trace = cls.trace | coalesce(settings.trace, False) if cls.trace: from pyLibrary.thread.threads import Thread if settings.cprofile is True or (isinstance(settings.cprofile, Mapping) and settings.cprofile.enabled): if isinstance(settings.cprofile, bool): settings.cprofile = { "enabled": True, "filename": "cprofile.tab" } import cProfile cls.cprofiler = cProfile.Profile() cls.cprofiler.enable() if settings.profile is True or (isinstance(settings.profile, Mapping) and settings.profile.enabled): from pyLibrary.debugs import profiles if isinstance(settings.profile, bool): profiles.ON = True settings.profile = {"enabled": True, "filename": "profile.tab"} if settings.profile.enabled: profiles.ON = True if settings.constants: constants.set(settings.constants) if not settings.log: return cls.logging_multi = Log_usingMulti() if cls.main_log: cls.main_log.stop() cls.main_log = Log_usingThread(cls.logging_multi) for log in listwrap(settings.log): Log.add_log(Log.new_instance(log)) @classmethod def stop(cls): from pyLibrary.debugs import profiles if cls.cprofiler and hasattr(cls, "settings"): import pstats cls.cprofiler_stats.add(pstats.Stats(cls.cprofiler)) write_profile(cls.settings.cprofile, cls.cprofiler_stats.pop_all()) if profiles.ON and hasattr(cls, "settings"): profiles.write(cls.settings.profile) cls.main_log.stop() cls.main_log = Log_usingStream(sys.stdout) @classmethod def new_instance(cls, settings): settings = wrap(settings) if settings["class"]: if settings["class"].startswith("logging.handlers."): from .log_usingLogger import Log_usingLogger return Log_usingLogger(settings) else: try: from .log_usingLogger import make_log_from_settings return make_log_from_settings(settings) except Exception, e: pass # OH WELL :( if settings.log_type == "file" or settings.file: return Log_usingFile(settings.file) if settings.log_type == "file" or settings.filename: return Log_usingFile(settings.filename) if settings.log_type == "console": from .log_usingThreadedStream import Log_usingThreadedStream return Log_usingThreadedStream(sys.stdout) if settings.log_type == "stream" or settings.stream: from .log_usingThreadedStream import Log_usingThreadedStream return Log_usingThreadedStream(settings.stream) if settings.log_type == "elasticsearch" or settings.stream: from .log_usingElasticSearch import Log_usingElasticSearch return Log_usingElasticSearch(settings) if settings.log_type == "email": from .log_usingEmail import Log_usingEmail return Log_usingEmail(settings)