def test_lock_and_till(self): locker = Lock("prime lock") got_lock = Signal() a_is_ready = Signal("a lock") b_is_ready = Signal("b lock") def loop(is_ready, please_stop): with locker: while not got_lock: # Log.note("{{thread}} is waiting", thread=Thread.current().name) locker.wait(till=Till(seconds=0)) is_ready.go() locker.wait() Log.note("thread is expected to get here") thread_a = Thread.run("a", loop, a_is_ready) thread_b = Thread.run("b", loop, b_is_ready) a_is_ready.wait() b_is_ready.wait() with locker: got_lock.go() Till(seconds=0.1).wait() # MUST WAIT FOR a AND b TO PERFORM locker.wait() Log.note("leaving") pass with locker: Log.note("leaving again") pass Till(seconds=1).wait() self.assertTrue(bool(thread_a.stopped), "Thread should be done by now") self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
def test_memory_cleanup_with_till(self): objgraph.growth() root = Signal() for i in range(100000): if i % 1000 == 0: Log.note("at {{num}} tills", num=i) root = root | Till(seconds=100000) mid_mem = psutil.Process(os.getpid()).memory_info().rss if mid_mem > 1000 * 1000 * 1000: Log.note("{{num}} Till triggers created", num=i) break trigger = Signal() root = root | trigger growth = objgraph.growth(limit=4) growth and Log.note("More object\n{{growth}}", growth=growth) trigger.go() root.wait() # THERE SHOULD BE NO DELAY HERE for _ in range(0, 20): try: Till(seconds=0.1).wait() # LET TIMER DAEMON CLEANUP current = [(t, objgraph.count(t), objgraph.count(t) - c) for t, c, d in growth] Log.note("Object count\n{{current}}", current=current) # NUMBER OF OBJECTS CLEANED UP SHOULD MATCH NUMBER OF OBJECTS CREATED for (_, _, cd), (_, _, gd) in zip(current, growth): self.assertAlmostEqual(-cd, gd, places=2) return except Exception as e: pass Log.error("object counts did not go down")
def __init__(self, name, target, *args, **kwargs): self.id = -1 self.name = name self.target = target self.end_of_thread = None self.synch_lock = Lock("response synch lock") self.args = args # ENSURE THERE IS A SHARED please_stop SIGNAL self.kwargs = copy(kwargs) self.kwargs["please_stop"] = self.kwargs.get( "please_stop", Signal("please_stop for " + self.name)) self.please_stop = self.kwargs["please_stop"] self.thread = None self.stopped = Signal("stopped signal for " + self.name) self.cprofiler = None self.children = [] if "parent_thread" in kwargs: del self.kwargs["parent_thread"] self.parent = kwargs["parent_thread"] else: self.parent = Thread.current() self.parent.add_child(self)
def _setup(): threads = Data() signals = Data() db = Sqlite() db.query("CREATE TABLE my_table (value TEXT)") for name in ["a", "b"]: signals[name] = [{ "begin": Signal(), "done": Signal() } for _ in range(4)] threads[name] = Thread.run(name, _work, name, db, signals[name]) return db, threads, signals
def test_loop(self): acc = [] started = Signal() def work(please_stop): started.go() while not please_stop: acc.append(Date.now().unix) Till(seconds=0.1).wait() worker = Thread.run("loop", work) started.wait() while len(acc) < 10: Till(seconds=0.1).wait() worker.stop() worker.join() # We expect 10, but 9 is good enough num = len(acc) self.assertGreater( num, 9, "Expecting some reasonable number of entries to prove there was looping, not " + text(num), )
def test_queue_speed(self): SCALE = 1000*10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", queue=slow) def empty(please_stop): while not please_stop: item = q.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() self.assertLess(timer.duration.seconds, 1.5, "Expecting queue to be fast")
def main(): try: settings = startup.read_settings() with startup.SingleInstance(settings.args.filename): constants.set(settings.constants) Log.start(settings.debug) extractor = Extract(settings) def extract(please_stop): with MySQL(**settings.snowflake.database) as db: with db.transaction(): for kwargs in extractor.queue: if please_stop: break try: extractor.extract(db=db, please_stop=please_stop, **kwargs) except Exception as e: Log.warning("Could not extract", cause=e) extractor.queue.add(kwargs) for i in range(settings.extract.threads): Thread.run("extract #" + text_type(i), extract) please_stop = Signal() Thread.wait_for_shutdown_signal(please_stop=please_stop, allow_exit=True, wait_forever=False) except Exception as e: Log.warning("Problem with data extraction", e) finally: Log.stop()
def __init__(self, conn=None, tuid_service=None, kwargs=None): try: self.config = kwargs self.conn = conn if conn else sql.Sql(self.config.database.name) self.hg_cache = HgMozillaOrg( kwargs=self.config.hg_cache, use_cache=True) if self.config.hg_cache else Null self.tuid_service = tuid_service if tuid_service else tuid.service.TUIDService( database=None, hg=None, kwargs=self.config, conn=self.conn, clogger=self) self.rev_locker = Lock() self.working_locker = Lock() self.init_db() self.next_revnum = coalesce( self.conn.get_one("SELECT max(revnum)+1 FROM csetLog")[0], 1) self.csets_todo_backwards = Queue( name="Clogger.csets_todo_backwards") self.deletions_todo = Queue(name="Clogger.deletions_todo") self.maintenance_signal = Signal(name="Clogger.maintenance_signal") self.config = self.config.tuid self.disable_backfilling = False self.disable_tipfilling = False self.disable_deletion = False self.disable_maintenance = False # Make sure we are filled before allowing queries numrevs = self.conn.get_one("SELECT count(revnum) FROM csetLog")[0] if numrevs < MINIMUM_PERMANENT_CSETS: Log.note("Filling in csets to hold {{minim}} csets.", minim=MINIMUM_PERMANENT_CSETS) oldest_rev = 'tip' with self.conn.transaction() as t: tmp = t.query( "SELECT min(revnum), revision FROM csetLog").data[0][1] if tmp: oldest_rev = tmp self._fill_in_range(MINIMUM_PERMANENT_CSETS - numrevs, oldest_rev, timestamp=False) Log.note( "Table is filled with atleast {{minim}} entries. Starting workers...", minim=MINIMUM_PERMANENT_CSETS) Thread.run('clogger-tip', self.fill_forward_continuous) Thread.run('clogger-backfill', self.fill_backward_with_list) Thread.run('clogger-maintenance', self.csetLog_maintenance) Thread.run('clogger-deleter', self.csetLog_deleter) Log.note("Started clogger workers.") except Exception as e: Log.warning("Cannot setup clogger: {{cause}}", cause=str(e))
def test_daemon(service): from mo_threads import Signal temp_signal = Signal() # Run the daemon indefinitely to see if # we can update all known files to the latest # revisions. This can take a while though. service._daemon(temp_signal)
def main(): try: config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) please_stop = Signal("main stop signal") Thread.wait_for_shutdown_signal(please_stop) except Exception, e: Log.error("Problem with etl", cause=e)
def test_lock_and_till(self): locker = Lock("prime lock") got_signal = Signal() a_is_ready = Signal("a lock") b_is_ready = Signal("b lock") Log.note("begin") def loop(is_ready, please_stop): with locker: while not got_signal: locker.wait(till=Till(seconds=0.01)) is_ready.go() Log.note("{{thread}} is ready", thread=Thread.current().name) Log.note("outside loop") locker.wait() Log.note("thread is expected to get here") thread_a = Thread.run("a", loop, a_is_ready).release() thread_b = Thread.run("b", loop, b_is_ready).release() a_is_ready.wait() b_is_ready.wait() timeout = Till(seconds=1) with locker: got_signal.go() while not thread_a.stopped: # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE locker.wait(till=Till(seconds=0.1)) Log.note("wait for a thread") while not thread_b.stopped: # WE MUST CONTINUE TO USE THE locker TO ENSURE THE OTHER THREADS ARE NOT ORPHANED IN THERE locker.wait(till=Till(seconds=0.1)) Log.note("wait for b thread") thread_a.join() thread_b.join() if timeout: Log.error("Took too long") self.assertTrue(bool(thread_a.stopped), "Thread should be done by now") self.assertTrue(bool(thread_b.stopped), "Thread should be done by now")
def request(self, method, path, headers): now = Date.now() self.inbound_rate.add(now) ready = Signal(path) # TEST CACHE with self.cache_locker: pair = self.cache.get(path) if pair is None: self.cache[path] = (ready, None, None, now) if pair is not None: # REQUEST IS IN THE QUEUE ALREADY, WAIT ready, headers, response, then = pair if response is None: ready.wait() with self.cache_locker: ready, headers, response, timestamp = self.cache.get(path) with self.db.transaction() as t: t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) return Response( response, status=200, headers=json.loads(headers) ) # TEST DB db_response = self.db.query("SELECT headers, response FROM cache WHERE path=" + quote_value(path)).data if db_response: headers, response = db_response[0] with self.db.transaction() as t: t.execute("UPDATE cache SET timestamp=" + quote_value(now) + " WHERE path=" + quote_value(path) + " AND timestamp<" + quote_value(now)) with self.cache_locker: self.cache[path] = (ready, headers, response.encode('latin1'), now) ready.go() return Response( response, status=200, headers=json.loads(headers) ) # MAKE A NETWORK REQUEST self.todo.add((ready, method, path, headers, now)) ready.wait() with self.cache_locker: ready, headers, response, timestamp = self.cache[path] return Response( response, status=200, headers=json.loads(headers) )
def test_job_queue_in_signal(self): gc.collect() start_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("Start memory {{mem|comma}}", mem=start_mem) main = Signal() result = [main | Signal() for _ in range(10000)] mid_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("Mid memory {{mem|comma}}", mem=mid_mem) del result gc.collect() end_mem = psutil.Process(os.getpid()).memory_info().rss Log.note("End memory {{mem|comma}}", mem=end_mem) main.go() # NOT NEEDED, BUT INTERESTING self.assertLess(end_mem, (start_mem + mid_mem) / 2, "end memory should be closer to start")
def __init__(self, backing, database, kwargs=None): if backing.directory: self.backing = DirectoryBacking(kwargs=backing) else: self.backing = s3.Bucket(kwargs=backing) self.db = Sqlite(database) # ENSURE DATABASE IS SETUP if not self.db.about(VERSION_TABLE): schema.setup(self) self.next_id = id_generator(db=self.db, version_table=VERSION_TABLE) self.queues = [] self.please_stop = Signal() self.cleaner = Thread.run("cleaner", self._cleaner)
def test_till_in_loop(self): def loop(please_stop): counter = 0 while not please_stop: (Till(seconds=0.001) | please_stop).wait() counter += 1 Log.note("{{count}}", count=counter) please_stop=Signal("please_stop") Thread.run("loop", loop, please_stop=please_stop) Till(seconds=1).wait() with please_stop.lock: self.assertLessEqual(len(please_stop.job_queue), 1, "Expecting only one pending job on go") please_stop.go()
def query(self, command): """ WILL BLOCK CALLING THREAD UNTIL THE command IS COMPLETED :param command: COMMAND FOR SQLITE :return: list OF RESULTS """ if not self.worker: self.worker = Thread.run("sqlite db thread", self._worker) signal = Signal() result = Data() self.queue.add((command, result, signal, None)) signal.wait() if result.exception: Log.error("Problem with Sqlite call", cause=result.exception) return result
def _test_queue_speed(self, test=False): SCALE = 1000 * 10 done = Signal("done") slow = Queue() q = ThreadedQueue("test queue", slow_queue=slow) def empty(please_stop): while not please_stop: item = slow.pop() if item is THREAD_STOP: break done.go() Thread.run("empty", empty) timer = Timer("add {{num}} to queue", param={"num": SCALE}) with timer: for i in range(SCALE): q.add(i) q.add(THREAD_STOP) Log.note("Done insert") done.wait() Log.note( "{{num}} items through queue in {{seconds|round(3)}} seconds", num=SCALE, seconds=timer.duration.seconds, ) if PY2 and "windows" not in platform.system().lower(): expected_time = 15 # LINUX PY2 IS CRAZY SLOW elif PY3 and "windows" not in platform.system().lower(): expected_time = 6 # LINUX PY3 IS SLOW else: expected_time = 6 if test: self.assertLess( timer.duration.seconds, expected_time, "Expecting queue to be fast, not " + text(timer.duration.seconds) + " seconds", )
def wait_for_shutdown_signal( please_stop=False, # ASSIGN SIGNAL TO STOP EARLY allow_exit=False, # ALLOW "exit" COMMAND ON CONSOLE TO ALSO STOP THE APP wait_forever=True # IGNORE CHILD THREADS, NEVER EXIT. False -> IF NO CHILD THREADS LEFT, THEN EXIT ): """ FOR USE BY PROCESSES NOT EXPECTED TO EVER COMPLETE UNTIL EXTERNAL SHUTDOWN IS REQUESTED SLEEP UNTIL keyboard interrupt, OR please_stop, OR "exit" :param please_stop: :param allow_exit: :param wait_forever:: Assume all needed threads have been launched. When done :return: """ if not isinstance(please_stop, Signal): please_stop = Signal() please_stop.on_go(lambda: start_new_thread(_stop_main_thread, ())) self_thread = Thread.current() if self_thread != MAIN_THREAD: Log.error( "Only the main thread can sleep forever (waiting for KeyboardInterrupt)" ) if not wait_forever: # TRIGGER SIGNAL WHEN ALL EXITING THREADS ARE DONE pending = copy(self_thread.children) all = AndSignals(please_stop, len(pending)) for p in pending: p.stopped.on_go(all.done) try: if allow_exit: _wait_for_exit(please_stop) else: _wait_for_interrupt(please_stop) except (KeyboardInterrupt, SystemExit) as _: Log.alert("SIGINT Detected! Stopping...") finally: please_stop.go()
def execute(self, command): """ COMMANDS WILL BE EXECUTED IN THE ORDER THEY ARE GIVEN BUT CAN INTERLEAVE WITH OTHER TREAD COMMANDS :param command: COMMAND FOR SQLITE :return: Signal FOR IF YOU WANT TO BE NOTIFIED WHEN DONE """ if DEBUG_EXECUTE: # EXECUTE IMMEDIATELY FOR BETTER STACK TRACE self.query(command) return DONE if self.get_trace: trace = extract_stack(1) else: trace = None is_done = Signal() self.queue.add((command, None, is_done, trace)) return is_done
def _execute(self, command): with self.lock: if self.current_task is not None: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None self.process.stdin.add(value2json(command)) self.current_task.wait() with self.lock: try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = None self.current_response = None self.current_error = None
def __init__(self, _file): """ file - USES FILE FOR PERSISTENCE """ self.file = File.new_instance(_file) self.lock = Lock("lock for persistent queue using file " + self.file.name) self.please_stop = Signal() self.db = Data() self.pending = [] if self.file.exists: for line in self.file: with suppress_exception: delta = mo_json.json2value(line) apply_delta(self.db, delta) if self.db.status.start == None: # HAPPENS WHEN ONLY ADDED TO QUEUE, THEN CRASH self.db.status.start = 0 self.start = self.db.status.start # SCRUB LOST VALUES lost = 0 for k in self.db.keys(): with suppress_exception: if k != "status" and int(k) < self.start: self.db[k] = None lost += 1 # HAPPENS FOR self.db.status, BUT MAYBE OTHER PROPERTIES TOO if lost: Log.warning("queue file had {{num}} items lost", num=lost) if DEBUG: Log.note("Persistent queue {{name}} found with {{num}} items", name=self.file.abspath, num=len(self)) else: self.db.status = Data(start=0, end=0) self.start = self.db.status.start if DEBUG: Log.note("New persistent queue {{name}}", name=self.file.abspath)
def _execute(self, command): with self.lock: self.current_task.wait() self.current_task = Signal() self.current_response = None self.current_error = None if self.process.service_stopped: Log.error("python is not running") self.process.stdin.add(value2json(command)) (self.current_task | self.process.service_stopped).wait() try: if self.current_error: Log.error("problem with process call", cause=Except.new_instance(self.current_error)) else: return self.current_response finally: self.current_task = DONE self.current_response = None self.current_error = None
def __init__(self, name, max=None, silent=False, unique=False, allow_add_after_close=False): """ max - LIMIT THE NUMBER IN THE QUEUE, IF TOO MANY add() AND extend() WILL BLOCK silent - COMPLAIN IF THE READERS ARE TOO SLOW unique - SET True IF YOU WANT ONLY ONE INSTANCE IN THE QUEUE AT A TIME """ if not _Log: _late_import() self.name = name self.max = coalesce(max, 2**10) self.silent = silent self.allow_add_after_close = allow_add_after_close self.unique = unique self.please_stop = Signal("stop signal for " + name) self.lock = Lock("lock for queue " + name) self.queue = deque() self.next_warning = time() # FOR DEBUGGING
def __init__(self, instance_manager, disable_prices=False, kwargs=None): self.settings = kwargs self.instance_manager = instance_manager aws_args = dict(region_name=kwargs.aws.region, aws_access_key_id=unwrap(kwargs.aws.aws_access_key_id), aws_secret_access_key=unwrap( kwargs.aws.aws_secret_access_key)) self.ec2_conn = boto.ec2.connect_to_region(**aws_args) self.vpc_conn = boto.vpc.connect_to_region(**aws_args) self.price_locker = Lock() self.prices = None self.price_lookup = None self.no_capacity = {} self.no_capacity_file = File( kwargs.price_file).parent / "no capacity.json" self.done_making_new_spot_requests = Signal() self.net_new_locker = Lock() self.net_new_spot_requests = UniqueIndex( ("id", )) # SPOT REQUESTS FOR THIS SESSION self.watcher = None self.active = None self.settings.uptime.bid_percentile = coalesce( self.settings.uptime.bid_percentile, self.settings.bid_percentile) self.settings.uptime.history = coalesce( Date(self.settings.uptime.history), DAY) self.settings.uptime.duration = coalesce( Duration(self.settings.uptime.duration), Date("5minute")) self.settings.max_percent_per_type = coalesce( self.settings.max_percent_per_type, 1) if ENABLE_SIDE_EFFECTS and instance_manager and instance_manager.setup_required( ): self._start_life_cycle_watcher() if not disable_prices: self.pricing()
from mo_dots import set_default, listwrap, coalesce from mo_files import File from mo_future import is_text from mo_future import text_type from mo_json import json2value, value2json from mo_logs import Log, constants, Except from mo_threads import Signal from mo_threads.threads import STDOUT, STDIN context = copy(globals()) del context['copy'] DEBUG = False DONE = value2json({"out": {}}).encode('utf8') + b"\n" please_stop = Signal() def command_loop(local): STDOUT.write(b'{"out":"ok"}\n') DEBUG and Log.note("python process running") file = File while not please_stop: line = STDIN.readline() try: command = json2value(line.decode('utf8')) DEBUG and Log.note("got {{command}}", command=command) if "import" in command: dummy = {}
now = Date.now().unix if time_offset is None: time_offset = now - request.meta.request_time next_request = request.meta.request_time + time_offset if next_request > now: Log.note("Next request in {{wait_time}}", wait_time=Duration(seconds=next_request - now)) Till(till=next_request).wait() Thread.run("request " + text_type(request_count), one_request, request) request_count += 1 queue.commit() if __name__ == '__main__': try: tmp_signal = Signal() config = startup.read_settings() constants.set(config.constants) Log.start(config.debug) queue_consumer(kwargs=config, please_stop=tmp_signal) worker = Thread.run("sqs consumer", queue_consumer, kwargs=config) MAIN_THREAD.wait_for_shutdown_signal(allow_exit=True, please_stop=worker.stopped) except BaseException as e: Log.error("Serious problem with consumer construction! Shutdown!", cause=e)
def test_signal_is_boolean(self): a = Signal() self.assertEqual(bool(a), False) a.go() self.assertEqual(bool(a), True)
def test_signal_is_not_null(self): a = Signal() self.assertNotEqual(a, None) a.go() self.assertNotEqual(a, None)
def __init__(self, kwargs=None): self.settings = kwargs self.schema = SnowflakeSchema(self.settings.snowflake) self._extract = extract = kwargs.extract # SOME PREP get_git_revision() # VERIFY WE DO NOT HAVE TOO MANY OTHER PROCESSES WORKING ON STUFF with MySQL(**kwargs.snowflake.database) as db: processes = None try: processes = jx.filter( db.query("show processlist"), { "and": [{ "neq": { "Command": "Sleep" } }, { "neq": { "Info": "show processlist" } }] }) except Exception as e: Log.warning("no database", cause=e) if processes: if DEBUG: Log.warning("Processes are running\n{{list|json}}", list=processes) else: Log.error("Processes are running\n{{list|json}}", list=processes) extract.type = listwrap(extract.type) extract.start = listwrap(extract.start) extract.batch = listwrap(extract.batch) extract.field = listwrap(extract.field) if any( len(extract.type) != len(other) for other in [extract.start, extract.batch, extract.field]): Log.error( "Expecting same number of dimensions for `type`, `start`, `batch`, and `field` in the `extract` inner object" ) for i, t in enumerate(extract.type): if t == "time": extract.start[i] = Date(extract.start[i]) extract.batch[i] = Duration(extract.batch[i]) elif t == "number": pass else: Log.error('Expecting `extract.type` to be "number" or "time"') extract.threads = coalesce(extract.threads, 1) self.done_pulling = Signal() self.queue = Queue("all batches", max=2 * coalesce(extract.threads, 1), silent=True) self.bucket = s3.Bucket(self.settings.destination) self.notify = aws.Queue(self.settings.notify) Thread.run("get records", self.pull_all_remaining)
def __init__(self, id, mail=None, app=None): self.id = int(id) self.mail = coalesce(self.mail, mail) self.app = app self._cache = None self._ready = Signal()