class StoppableThread(threading.Thread): """This is thread can be stopped. Note: Thread by default does not return function result in any case, which is why I've implemented this workaroung with built-in Queue. """ def __init__(self, **kwargs): super(StoppableThread, self).__init__(**kwargs) self.__target = kwargs.get('target') self.__args = kwargs.get('args') if self.__args is None: self.__args = () self.__kwargs = kwargs.get('kwargs') if self.__kwargs is None: self.__kwargs = {} self.__result_queue = Queue() self.__stopped = threading.Event() def stop(self): """Stop the thread. It will not terminate code, but set the flag that should be handled in executed function. """ self.__stopped.set() def is_stopped(self): """Check the status of the thread. It only monitors the flag state. If task is stopped you have to pay attention to `.is_alive()`. """ return self.__stopped.is_set() def run(self): """Run the target function, check expected result and propagate exceptions. """ try: self.__kwargs['_is_stopped'] = self.__stopped.is_set try: if self.__target: func_result = self.__target(*self.__args, **self.__kwargs) finally: # Avoid a refcycle if the thread is running a function with # an argument that has a member that points to the thread. del self.__target, self.__args, self.__kwargs if func_result is None: func_result = {} elif not isinstance(func_result, dict): raise TypeError("Task has to return a dict or None.") except Exception: # pylint: disable=W0703 self.__result_queue.put(traceback.format_exc()) else: self.__result_queue.put(func_result) def get_result(self): """Return results of target function execution. """ self.join() try: return self.__result_queue.get_nowait() except Queue.Empty: return None
def make_buffer_for_iterator_with_thread(gen, n_workers, buffer_size): wait_time = 0.02 generator_queue = Queue() _stop = threading.Event() def generator_task(): while not _stop.is_set(): try: if generator_queue.qsize() < buffer_size: generator_output = next(gen) generator_queue.put(generator_output) else: time.sleep(wait_time) except (StopIteration, KeyboardInterrupt): _stop.set() return generator_threads = [threading.Thread(target=generator_task) for _ in range(n_workers)] for thread in generator_threads: thread.start() while not _stop.is_set() or not generator_queue.empty(): if not generator_queue.empty(): yield generator_queue.get() else: time.sleep(wait_time)
def __init__(self, jobStore, toilState): logger.debug("Initializing service manager") self.jobStore = jobStore self.toilState = toilState self.jobGraphsWithServicesBeingStarted = set() self._terminate = Event() # This is used to terminate the thread associated # with the service manager self._jobGraphsWithServicesToStart = Queue() # This is the input queue of # jobGraphs that have services that need to be started self._jobGraphsWithServicesThatHaveStarted = Queue() # This is the output queue # of jobGraphs that have services that are already started self._serviceJobGraphsToStart = Queue() # This is the queue of services for the # batch system to start self.jobsIssuedToServiceManager = 0 # The number of jobs the service manager # is scheduling # Start a thread that starts the services of jobGraphs in the # jobsWithServicesToStart input queue and puts the jobGraphs whose services # are running on the jobGraphssWithServicesThatHaveStarted output queue self._serviceStarter = Thread(target=self._startServices, args=(self._jobGraphsWithServicesToStart, self._jobGraphsWithServicesThatHaveStarted, self._serviceJobGraphsToStart, self._terminate, self.jobStore))
class Pool(object): class Error(Exception): pass def __init__(self, threads, host, port, ssl, user, password): self._threads=[] self._queue = Queue(maxsize=1000) count=0 while len(self._threads) < threads and count < 3* threads: try: count+=1 w=Downloader(self._queue, host, port, ssl, user, password) w.start() self._threads.append(w) except SOFT_ERRORS as e: log.warn('Cannot create downloder thread: %s', e) if len(self._threads) != threads: log.error('Cannot create enough workers') raise Pool.Error('Cannot create enough workers') def wait_finish(self): self._queue.join() def stop(self): for t in self._threads: t.stop() def download(self, **kwargs): kwargs['retry']=0 self._queue.put(kwargs)
def run(self): args = list(islice(self.reqs, self.requests)) if self.shuffle: random.shuffle(args) print("Total requests: %d" % len(args)) print("Concurrency : %d" % self.concurrency) starttime = time.time() q, p = Queue(), Queue() for _ in six.moves.range(self.concurrency): t = Thread(target=worker, args=(self.host, q, p, self.verbose)) t.daemon = True t.start() for a in args: q.put(a) q.join() outputs = [] for _ in six.moves.range(self.requests): outputs.append(p.get()) elapsed = time.time() - starttime print() print("Total requests: %d" % len(args)) print("Concurrency : %d" % self.concurrency) print("Elapsed time : %.3fs" % elapsed) print("Avg time p/req: %.3fs" % (elapsed / len(args))) print("Received (per status code or error):") for c, n in Counter(outputs).items(): print(" %s: %d" % (c, n))
def lines(self, fuseki_process): """ Provides an iterator generating the encoded string representation of each member of this metarelate mapping translation. Returns: An iterator of string. """ msg = '\tGenerating phenomenon translation {!r}.' print(msg.format(self.mapping_name)) lines = ['\n%s = {\n' % self.mapping_name] # Retrieve encodings for the collection of mapping instances. # Retrieval is threaded as it is heavily bound by resource resolution # over http. # Queue for metarelate mapping instances mapenc_queue = Queue() for mapping in self.mappings: mapenc_queue.put(mapping) # deque to contain the results of the jobs processed from the queue mapencs = deque() # run worker threads for i in range(MAXTHREADS): MappingEncodeWorker(mapenc_queue, mapencs, fuseki_process).start() # block progress until the queue is empty mapenc_queue.join() # end of threaded retrieval process. # now sort the payload payload = [mapenc.encoding for mapenc in mapencs] payload.sort(key=self._key) lines.extend(payload) lines.append(' }\n') return iter(lines)
class Executor(object): _INTERRUPT = object() def __init__(self, num_workers=1): super(Executor, self).__init__() self._queue = Queue() self._workers = [] for _ in range(num_workers): th = Thread(target=self._work) th.start() self._workers.append(th) def submit(self, task): self._queue.put(task) def shutdown(self): for _ in self._workers: self._queue.put(self._INTERRUPT) def join(self): for worker in self._workers: worker.join() def _work(self): while True: task = self._queue.get(block=True) if task is self._INTERRUPT: break try: task() except BaseException as e: logger.exception(e)
class Search(TracePosterior): """ Trace and Poutine-based implementation of systematic search. :param callable model: Probabilistic model defined as a function. :param int max_tries: The maximum number of times to try completing a trace from the queue. """ def __init__(self, model, max_tries=1e6): """ Constructor. Default `max_tries` to something sensible - 1e6. :param callable model: Probabilistic model defined as a function. :param int max_tries: The maximum number of times to try completing a trace from the queue. """ self.model = model self.max_tries = int(max_tries) def _traces(self, *args, **kwargs): """ algorithm entered here Running until the queue is empty and collecting the marginal histogram is performing exact inference :returns: Iterator of traces from the posterior. :rtype: Generator[:class:`pyro.Trace`] """ # currently only using the standard library queue self.queue = Queue() self.queue.put(poutine.Trace()) p = poutine.trace( poutine.queue(self.model, queue=self.queue, max_tries=self.max_tries)) while not self.queue.empty(): tr = p.get_trace(*args, **kwargs) yield (tr, tr.log_pdf())
def acquireResource(self, namespace, name, lockType, timeout=None): """ Acquire a resource synchronously. :returns: a reference to the resource. """ if timeout is not None: try: timeout = int(timeout) except ValueError: raise TypeError("'timeout' must be number") resource = Queue() def callback(req, res): resource.put(res) request = self.registerResource(namespace, name, lockType, callback) request.wait(timeout) if not request.granted(): try: request.cancel() raise RequestTimedOutError("Request timed out. Could not " "acquire resource '%s.%s'" % (namespace, name)) except RequestAlreadyProcessedError: # We might have acquired the resource between 'wait' and # 'cancel' if request.canceled(): raise se.ResourceAcqusitionFailed() return resource.get()
def test_acquire_contextmanager(self): class TestedClass(Monitor): def __init__(self, cqueue): self.cqueue = cqueue Monitor.__init__(self) @Monitor.synchronized def execute(self): self.cqueue.put(1) sleep(1) self.cqueue.get() class TesterThread(Thread): def __init__(self, tc): self.tc = tc Thread.__init__(self) def run(self): self.tc.execute() cq = Queue() cq.put(1) tc = TestedClass(cq) tt = TesterThread(tc) with Monitor.acquire(tc): tt.start() sleep(0.4) self.assertEqual(cq.qsize(), 1)
def parallel_execute_stream(objects, func, get_deps): if get_deps is None: get_deps = _no_deps results = Queue() state = State(objects) while not state.is_done(): for event in feed_queue(objects, func, get_deps, results, state): yield event try: event = results.get(timeout=0.1) except Empty: continue # See https://github.com/docker/compose/issues/189 except thread.error: raise ShutdownException() obj, _, exception = event if exception is None: log.debug('Finished processing: {}'.format(obj)) state.finished.add(obj) else: log.debug('Failed: {}'.format(obj)) state.failed.add(obj) yield event
def test_monitoring(self): class TestedClass(Monitor): def __init__(self, cqueue): self.cqueue = cqueue Monitor.__init__(self) @Monitor.synchronized def execute(self): self.cqueue.put(1) sleep(1) self.cqueue.get() class TesterThread(Thread): def __init__(self, tc): self.tc = tc Thread.__init__(self) def run(self): self.tc.execute() q = Queue() tc = TestedClass(q) a, b = TesterThread(tc), TesterThread(tc) a.start(), b.start() while a.is_alive() or b.is_alive(): sleep(0.1) self.assertNotEqual(q.qsize(), 2)
def run_with_timeout_and_stack(request, timeout): ''' interrupts evaluation after a given time period. provides a suitable stack environment. ''' # only use set_thread_stack_size if max recursion depth was changed via the environment variable # MATHICS_MAX_RECURSION_DEPTH. if it is set, we always use a thread, even if timeout is None, in # order to be able to set the thread stack size. if MAX_RECURSION_DEPTH > settings.DEFAULT_MAX_RECURSION_DEPTH: set_thread_stack_size(python_stack_size(MAX_RECURSION_DEPTH)) elif timeout is None: return request() queue = Queue(maxsize=1) # stores the result or exception thread = Thread(target=_thread_target, args=(request, queue)) thread.start() thread.join(timeout) if thread.is_alive(): raise TimeoutInterrupt() success, result = queue.get() if success: return result else: six.reraise(*result)
class ThreadTestCase(testtools.TestCase): def setUp(self): super(ThreadTestCase, self).setUp() self.got_items = Queue() self.got_args_kwargs = Queue() self.starting_thread_count = threading.active_count() def _func(self, conn, item, *args, **kwargs): self.got_items.put((conn, item)) self.got_args_kwargs.put((args, kwargs)) if item == 'sleep': sleep(1) if item == 'go boom': raise Exception('I went boom!') return 'success' def _create_conn(self): return "This is a connection" def _create_conn_fail(self): raise Exception("This is a failed connection") def assertQueueContains(self, queue, expected_contents): got_contents = [] try: while True: got_contents.append(queue.get(timeout=0.1)) except Empty: pass if isinstance(expected_contents, set): got_contents = set(got_contents) self.assertEqual(expected_contents, got_contents)
def inner(self, *args, **kwargs): if self.use_post_event: # create ephemeral queue q = Queue() # create an invocation that calls the decorated function class Invocation(object): def __call__(killme): # when the invocation is called, we call the function and stick the result into the queue try: res = func(self, *args, **kwargs) except Exception as e: # if we got an exception, just queue that instead res = e q.put(res) # post this invocation to be called on the main thread at the next opportunity gdb.post_event(Invocation()) # now we wait until there's something in the queue, which indicates that the invocation has run and return # the result that was pushed onto the queue by the invocation res = q.get() # if we got an exception back from the posted event, raise it if isinstance(res, Exception): raise res return res else: return func(self, *args, **kwargs)
class ThreadTestCase(testtools.TestCase): def setUp(self): super(ThreadTestCase, self).setUp() self.got_args_kwargs = Queue() self.starting_thread_count = threading.active_count() def _func(self, q_item, *args, **kwargs): self.got_items.put(q_item) self.got_args_kwargs.put((args, kwargs)) if q_item == 'go boom': raise Exception('I went boom!') if q_item == 'c boom': raise ClientException( 'Client Boom', http_scheme='http', http_host='192.168.22.1', http_port=80, http_path='/booze', http_status=404, http_reason='to much', http_response_content='no sir!') return 'best result EVAR!' def assertQueueContains(self, queue, expected_contents): got_contents = [] try: while True: got_contents.append(queue.get(timeout=0.1)) except Empty: pass if isinstance(expected_contents, set): got_contents = set(got_contents) self.assertEqual(expected_contents, got_contents)
def test_equipped(self): """ Created equipped worker that will use an internal Counter resource to keep track of the job count. """ results = Queue() def toolbox_factory(): return Counter() def worker_factory(job_queue): return workerpool.EquippedWorker(job_queue, toolbox_factory) pool = workerpool.WorkerPool(1, worker_factory=worker_factory) # Run 10 jobs for i in range(10): j = CountJob(results) pool.put(j) # Get 10 results for i in range(10): r = results.get() # Each result should be an incremented value self.assertEquals(r, i) pool.shutdown()
def __init__(self, app, disk_image): super(ScsiDevice, self).__init__(app, None) self.disk_image = disk_image self.handlers = { ScsiCmds.INQUIRY: self.handle_inquiry, ScsiCmds.REQUEST_SENSE: self.handle_request_sense, ScsiCmds.TEST_UNIT_READY: self.handle_test_unit_ready, ScsiCmds.READ_CAPACITY_10: self.handle_read_capacity_10, # ScsiCmds.SEND_DIAGNOSTIC: self.handle_send_diagnostic, ScsiCmds.PREVENT_ALLOW_MEDIUM_REMOVAL: self.handle_prevent_allow_medium_removal, ScsiCmds.WRITE_10: self.handle_write_10, ScsiCmds.READ_10: self.handle_read_10, # ScsiCmds.WRITE_6: self.handle_write_6, # ScsiCmds.READ_6: self.handle_read_6, # ScsiCmds.VERIFY_10: self.handle_verify_10, ScsiCmds.MODE_SENSE_6: self.handle_mode_sense_6, ScsiCmds.MODE_SENSE_10: self.handle_mode_sense_10, ScsiCmds.READ_FORMAT_CAPACITIES: self.handle_read_format_capacities, ScsiCmds.SYNCHRONIZE_CACHE: self.handle_synchronize_cache, } self.tx = Queue() self.rx = Queue() self.stop_event = Event() self.thread = Thread(target=self.handle_data_loop) self.thread.daemon = True self.thread.start() self.is_write_in_progress = False self.write_cbw = None self.write_base_lba = 0 self.write_length = 0 self.write_data = b''
def generator_to_async_generator(get_iterable): """ Turn a generator or iterable into an async generator. This works by running the generator in a background thread. The new async generator will yield both `Future` objects as well as the original items. :param get_iterable: Function that returns a generator or iterable when called. """ q = Queue() f = Future() l = RLock() quitting = False def runner(): """ Consume the generator in background thread. When items are received, they'll be pushed to the queue and the Future is set. """ for item in get_iterable(): with l: q.put(item) if not f.done(): f.set_result(None) # When this async generator was cancelled (closed), stop this # thread. if quitting: break with l: if not f.done(): f.set_result(None) # Start background thread. done_f = run_in_executor(runner, _daemon=True) try: while not done_f.done(): # Wait for next item(s): yield Future. yield From(f) # Items received. Yield all items so far. with l: while not q.empty(): yield AsyncGeneratorItem(q.get()) f = Future() # Yield final items. while not q.empty(): yield q.get() finally: # When this async generator is closed (GeneratorExit exception, stop # the background thread as well. - we don't need that anymore.) quitting = True
class TagGroup( object ): ''' Process groups of tag reads and return the best time estimated using quadratic regression. Stray reads are also detected if there is no quiet period for the tag. The first read time of each stray read is returned. ''' def __init__( self ): self.q = Queue() self.tagInfo = {} def add( self, antenna, tag, t, db ): self.q.put((antenna, tag, t, db)) def flush( self ): # Process all waiting reads. while 1: try: antenna, tag, t, db = self.q.get(False) except Empty: break try: self.tagInfo[tag].add( antenna, t, db ) except KeyError: self.tagInfo[tag] = TagGroupEntry( antenna, t, db ) self.q.task_done() def getReadsStrays( self, tNow=None, method=QuadraticRegressionMethod, antennaChoice=MostReadsChoice, removeOutliers=True ): ''' Returns two lists: reads = [(tag1, t1, sampleSize1, antennaID1), (tag2, t2, sampleSize2, , antennaID2), ...] strays = [(tagA, tFirstReadA), (tagB, tFirstReadB), ...] Each stray will be reported as a read the first time it is detected. ''' self.flush() trNow = datetimeToTr( tNow or datetime.now() ) reads, strays = [], [] toDelete = [] for tag, tge in six.iteritems(self.tagInfo): if trNow - tge.lastReadMax >= tQuiet: # Tag has left read range. if not tge.isStray: t, sampleSize, antennaID = tge.getBestEstimate(method, antennaChoice, removeOutliers) reads.append( (tag, t, sampleSize, antennaID) ) toDelete.append( tag ) elif tge.lastReadMax - tge.firstReadMin >= tStray: # This is a stray. t = trToDatetime( tge.firstReadMin ) if not tge.isStray: tge.setStray() reads.append( (tag, t, 1, 0) ) # Report stray first read time. strays.append( (tag, t) ) for tag in toDelete: del self.tagInfo[tag] reads.sort( key=operator.itemgetter(1,0)) strays.sort( key=operator.itemgetter(1,0) ) return reads, strays
def test_item_is_stop_without_cascade_stop(self): queue = Queue() for item in QueueItem.stop(), QueueItem.new('a'), QueueItem.new('b'): queue.put(item) generator = consume_queue(queue, False) assert next(generator) == 'a' assert next(generator) == 'b'
def test_item_is_stop_with_cascade_stop(self): """Return the name of the container that caused the cascade_stop""" queue = Queue() for item in QueueItem.stop('foobar-1'), QueueItem.new('a'), QueueItem.new('b'): queue.put(item) generator = consume_queue(queue, True) assert next(generator) is 'foobar-1'
def daily_metadata(year, month, day, dst_folder, writers=[file_writer], geometry_check=None, num_worker_threads=1): """ Extra metadata for all products in a specific date """ threaded = False counter = { 'products': 0, 'saved_tiles': 0, 'skipped_tiles': 0, 'skipped_tiles_paths': [] } if num_worker_threads > 1: threaded = True queue = Queue() # create folders year_dir = os.path.join(dst_folder, str(year)) month_dir = os.path.join(year_dir, str(month)) day_dir = os.path.join(month_dir, str(day)) product_list = get_products_metadata_path(year, month, day) logger.info('There are %s products in %s-%s-%s' % (len(list(iterkeys(product_list))), year, month, day)) for name, product in iteritems(product_list): product_dir = os.path.join(day_dir, name) if threaded: queue.put([product, product_dir, counter, writers, geometry_check]) else: counter = product_metadata(product, product_dir, counter, writers, geometry_check) if threaded: def worker(): while not queue.empty(): args = queue.get() try: product_metadata(*args) except Exception: exc = sys.exc_info() logger.error('%s tile skipped due to error: %s' % (threading.current_thread().name, exc[1].__str__())) args[2]['skipped_tiles'] += 1 queue.task_done() threads = [] for i in range(num_worker_threads): t = threading.Thread(target=worker) t.start() threads.append(t) queue.join() return counter
def _put_and_notify(self): with self.not_empty: while self.delayed: when, item = heapq.heappop(self.delayed) if when <= time.time(): Queue._put(self, item) self.not_empty.notify() else: heapq.heappush(self.delayed, (when, item)) break
class AutocheckEventHandler(RegexMatchingEventHandler): def __init__(self, filepattern=DEFAULT_FILEPATTERN): self.queue = Queue() super(AutocheckEventHandler, self).__init__( regexes=[filepattern], ignore_directories=True, case_sensitive=False) def on_any_event(self, event): self.queue.put(event)
class FrameSaver( threading.Thread ): def __init__( self ): threading.Thread.__init__( self ) self.daemon = True self.name = 'FrameSaver' self.reset() def reset( self ): self.queue = Queue() def run( self ): self.reset() while 1: message = self.queue.get() if message[0] == 'Save': cmd, fileName, bib, t, frame = message #sys.stderr.write( 'save' ) PhotoFinish.SavePhoto( fileName, bib, t, frame ) self.queue.task_done() elif message[0] == 'Terminate': self.queue.task_done() self.reset() break def stop( self ): self.queue.put( ['Terminate'] ) self.join() def save( self, fileName, bib, t, frame ): self.queue.put( ['Save', fileName, bib, t, frame] )
def parallel_execute(objects, obj_callable, msg_index, msg): """ For a given list of objects, call the callable passing in the first object we give it. """ stream = get_output_stream(sys.stdout) lines = [] errors = {} for obj in objects: write_out_msg(stream, lines, msg_index(obj), msg) q = Queue() def inner_execute_function(an_callable, parameter, msg_index): try: result = an_callable(parameter) except APIError as e: errors[msg_index] = e.explanation result = "error" except Exception as e: errors[msg_index] = e result = 'unexpected_exception' q.put((msg_index, result)) for an_object in objects: t = Thread( target=inner_execute_function, args=(obj_callable, an_object, msg_index(an_object)), ) t.daemon = True t.start() done = 0 total_to_execute = len(objects) while done < total_to_execute: try: msg_index, result = q.get(timeout=1) if result == 'unexpected_exception': raise errors[msg_index] if result == 'error': write_out_msg(stream, lines, msg_index, msg, status='error') else: write_out_msg(stream, lines, msg_index, msg) done += 1 except Empty: pass if errors: stream.write("\n") for error in errors: stream.write("ERROR: for {} {} \n".format(error, errors[error]))
def _put(self, item): delay, item = item if delay: if self.task.running: heapq.heappush(self.delayed, (time.time() + delay, item)) else: message = 'TimeDelayQueue.put called with a delay parameter without background task having been started' log.warning(message) warn(message) else: Queue._put(self, item)
def handle_reset(self): self.debug('handling reset') if self.is_write_in_progress and self.write_data: self.disk_image.put_sector_data(self.write_base_lba, self.write_data) self.is_write_in_progress = False self.write_cbw = None self.write_base_lba = 0 self.write_length = 0 self.write_data = b'' self.tx = Queue() self.rx = Queue()
def __init__(self, spider, cache): self.spider = spider self.cache = cache self.idle_event = Event() self.queue_size = 100 self.input_queue = Queue() self.result_queue = Queue() self.thread = Thread(target=self.thread_worker) self.thread.daemon = True self.thread.start()
def __init__(self): self.trade_queue = Queue() self.expired_cmds = set() self.s = requests.Session()
#!/usr/bin/env python from cbapi.response.models import Binary from cbapi.example_helpers import build_cli_parser, get_cb_response_object import sys from six.moves.queue import Queue import os import threading import json import logging worker_queue = Queue(maxsize=50) def get_path_for_md5(d, basepath=''): d = d.upper() return os.path.join(basepath, d[:3], d[3:6], d) def create_directory(pathname): try: os.makedirs(os.path.dirname(pathname)) except: pass class BinaryWorker(threading.Thread): def __init__(self, basepath): self.basepath = basepath threading.Thread.__init__(self)
from __future__ import unicode_literals import os from threading import Thread, RLock from six.moves.queue import Queue from carry import exc from carry.logger import logger from carry.task import TaskFactory, RDBToRDBTask, RDBToCSVTask from carry.utils import topological_find, topological_remove _thread_count = 0 _work_queue = Queue() class ThreadPoolManger(object): def __init__(self, thread_num): self.work_queue = _work_queue self.thread_num = thread_num self.__init_threading_pool(self.thread_num) def __init_threading_pool(self, thread_num): global _thread_count for i in range(_thread_count, thread_num): thread = ThreadManger(self.work_queue) thread.start() _thread_count = thread_num def add_job(self, func, *args): self.work_queue.put((func, args))
class BaseFollower(object): LOGIN_PAGE = '' LOGIN_API = '' TRANSACTION_API = '' CMD_CACHE_FILE = 'cmd_cache.pk' WEB_REFERER = '' def __init__(self): self.trade_queue = Queue() self.expired_cmds = set() self.s = requests.Session() def login(self, user, password, **kwargs): # mock headers headers = { 'Accept': 'application/json, text/javascript, */*; q=0.01', 'Accept-Encoding': 'gzip, deflate, br', 'Accept-Language': 'en-US,en;q=0.8', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.100 Safari/537.36', 'Referer': self.WEB_REFERER, 'X-Requested-With': 'XMLHttpRequest', 'Origin': 'https://www.joinquant.com', 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', } self.s.headers.update(headers) # init cookie self.s.get(self.LOGIN_PAGE) # post for login params = self.create_login_params(user, password, **kwargs) rep = self.s.post(self.LOGIN_API, data=params) self.check_login_success(rep) log.info('登录成功') def check_login_success(self, rep): """检查登录状态是否成功 :param rep: post login 接口返回的 response 对象 :raise 如果登录失败应该抛出 NotLoginError """ pass def create_login_params(self, user, password, **kwargs): """生成 post 登录接口的参数 :param user: 用户名 :param password: 密码 :return dict 登录参数的字典 """ pass def follow(self, users, strategies, track_interval=1, trade_cmd_expire_seconds=120, cmd_cache=True, **kwargs): """跟踪joinquant对应的模拟交易,支持多用户多策略 :param users: 支持easytrader的用户对象,支持使用 [] 指定多个用户 :param strategies: 雪球组合名, 类似 ZH123450 :param total_assets: 雪球组合对应的总资产, 格式 [ 组合1对应资金, 组合2对应资金 ] 若 strategies=['ZH000001', 'ZH000002'] 设置 total_assets=[10000, 10000], 则表明每个组合对应的资产为 1w 元, 假设组合 ZH000001 加仓 价格为 p 股票 A 10%, 则对应的交易指令为 买入 股票 A 价格 P 股数 1w * 10% / p 并按 100 取整 :param initial_assets:雪球组合对应的初始资产, 格式 [ 组合1对应资金, 组合2对应资金 ] 总资产由 初始资产 × 组合净值 算得, total_assets 会覆盖此参数 :param track_interval: 轮询模拟交易时间,单位为秒 :param trade_cmd_expire_seconds: 交易指令过期时间, 单位为秒 :param cmd_cache: 是否读取存储历史执行过的指令,防止重启时重复执行已经交易过的指令 """ users = self.warp_list(users) strategies = self.warp_list(strategies) total_assets = self.warp_list(kwargs.get('total_assets')) initial_assets = self.warp_list(kwargs.get('initial_assets')) if cmd_cache: self.load_expired_cmd_cache() self.start_trader_thread(users, trade_cmd_expire_seconds) for strategy_url, strategy_total_assets, strategy_initial_assets in zip(strategies, total_assets, initial_assets): assets = self.calculate_assets(strategy_url, strategy_total_assets, strategy_initial_assets) try: strategy_id = self.extract_strategy_id(strategy_url) strategy_name = self.extract_strategy_name(strategy_url) except: log.error('抽取交易id和策略名失败, 无效的模拟交易url: {}'.format(strategy_url)) raise strategy_worker = Thread(target=self.track_strategy_worker, args=[strategy_id, strategy_name], kwargs={'interval': track_interval, 'assets': assets}) strategy_worker.start() log.info('开始跟踪策略: {}'.format(strategy_name)) def load_expired_cmd_cache(self): if os.path.exists(self.CMD_CACHE_FILE): with open(self.CMD_CACHE_FILE, 'rb') as f: self.expired_cmds = pickle.load(f) def start_trader_thread(self, users, trade_cmd_expire_seconds): trader = Thread(target=self.trade_worker, args=[users], kwargs={'expire_seconds': trade_cmd_expire_seconds}) trader.setDaemon(True) trader.start() @staticmethod def warp_list(value): if not isinstance(value, list): value = [value] return value @staticmethod def extract_strategy_id(strategy_url): """ 抽取 策略 id,一般用于获取策略相关信息 :param strategy_url: 策略 url :return: str 策略 id """ pass def extract_strategy_name(self, strategy_url): """ 抽取 策略名,主要用于日志打印,便于识别 :param strategy_url: :return: str 策略名 """ pass def track_strategy_worker(self, strategy, name, interval=10, **kwargs): """跟踪下单worker :param strategy: 策略id :param name: 策略名字 :param interval: 轮训策略的时间间隔,单位为秒""" while True: transactions = self.query_strategy_transaction(strategy, **kwargs) for t in transactions: trade_cmd = { 'strategy': strategy, 'strategy_name': name, 'action': t['action'], 'stock_code': t['stock_code'], 'amount': t['amount'], 'price': t['price'], 'datetime': t['datetime'] } if self.is_cmd_expired(trade_cmd): continue log.info('策略 [{}] 发送指令到交易队列, 股票: {} 动作: {} 数量: {} 价格: {} 信号产生时间: {}'.format( name, trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'] )) self.trade_queue.put(trade_cmd) self.add_cmd_to_expired_cmds(trade_cmd) try: for _ in range(interval): time.sleep(1) except KeyboardInterrupt: log.info('程序退出') break @staticmethod def generate_expired_cmd_key(cmd): return '{}_{}_{}_{}_{}_{}'.format( cmd['strategy_name'], cmd['stock_code'], cmd['action'], cmd['amount'], cmd['price'], cmd['datetime']) def is_cmd_expired(self, cmd): key = self.generate_expired_cmd_key(cmd) return key in self.expired_cmds def add_cmd_to_expired_cmds(self, cmd): key = self.generate_expired_cmd_key(cmd) self.expired_cmds.add(key) with open(self.CMD_CACHE_FILE, 'wb') as f: pickle.dump(self.expired_cmds, f) @staticmethod def _is_number(s): try: float(s) return True except ValueError: return False def trade_worker(self, users, expire_seconds=120): while True: trade_cmd = self.trade_queue.get() for user in users: # check expire now = datetime.now() expire = (now - trade_cmd['datetime']).total_seconds() if expire > expire_seconds: log.warning( '策略 [{}] 指令(股票: {} 动作: {} 数量: {} 价格: {})超时,指令产生时间: {} 当前时间: {}, 超过设置的最大过期时间 {} 秒, 被丢弃'.format( trade_cmd['strategy_name'], trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'], now, expire_seconds)) break # check price price = trade_cmd['price'] if not self._is_number(price) or price <= 0: log.warning( '策略 [{}] 指令(股票: {} 动作: {} 数量: {} 价格: {})超时,指令产生时间: {} 当前时间: {}, 价格无效 , 被丢弃'.format( trade_cmd['strategy_name'], trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'], now)) break # check amount if trade_cmd['amount'] <= 0: log.warning( '策略 [{}] 指令(股票: {} 动作: {} 数量: {} 价格: {})超时,指令产生时间: {} 当前时间: {}, 买入股数无效 , 被丢弃'.format( trade_cmd['strategy_name'], trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'], now)) break args = { 'stock_code': trade_cmd['stock_code'], 'price': trade_cmd['price'], 'amount': trade_cmd['amount'] } try: response = getattr(user, trade_cmd['action'])(**args) except Exception as e: trader_name = type(user).__name__ err_msg = '{}: {}'.format(type(e).__name__, e.message) log.error( '{} 执行 策略 [{}] 指令(股票: {} 动作: {} 数量: {} 价格: {} 指令产生时间: {}) 失败, 错误信息: {}'.format( trader_name, trade_cmd['strategy_name'], trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'], err_msg)) continue log.info( '策略 [{}] 指令(股票: {} 动作: {} 数量: {} 价格: {} 指令产生时间: {}) 执行成功, 返回: {}'.format( trade_cmd['strategy_name'], trade_cmd['stock_code'], trade_cmd['action'], trade_cmd['amount'], trade_cmd['price'], trade_cmd['datetime'], response)) def query_strategy_transaction(self, strategy, **kwargs): params = self.create_query_transaction_params(strategy) rep = self.s.get(self.TRANSACTION_API, params=params) history = rep.json() transactions = self.extract_transactions(history) self.project_transactions(transactions, **kwargs) return self.order_transactions_sell_first(transactions) def extract_transactions(self, history): """ 抽取接口返回中的调仓记录列表 :param history: 调仓接口返回信息的字典对象 :return: [] 调参历史记录的列表 """ pass def create_query_transaction_params(self, strategy): """ 生成用于查询调参记录的参数 :param strategy: 策略 id :return: dict 调参记录参数 """ pass @staticmethod def re_find(pattern, string, dtype=str): return dtype(re.search(pattern, string).group()) def project_transactions(self, transactions, **kwargs): """ 修证调仓记录为内部使用的统一格式 :param transactions: [] 调仓记录的列表 :return: [] 修整后的调仓记录 """ pass def order_transactions_sell_first(self, transactions): # 调整调仓记录的顺序为先卖再买 sell_first_transactions = [] for t in transactions: if t['action'] == 'sell': sell_first_transactions.insert(0, t) else: sell_first_transactions.append(t) return sell_first_transactions
def __init__(self): self._queue = TrQueue() self._thread = Thread(target=self._worker) self._thread.daemon = True self._thread.start()
pModel = patch_config_as_nothrow(pModel) pOpt = patch_config_as_nothrow(pOpt) pTest = patch_config_as_nothrow(pTest) sym = pModel.test_symbol sym.save(pTest.model.prefix + "_mask_test.json") image_sets = pDataset.image_set roidbs_all = [pkl.load(open("data/cache/{}.roidb".format(i), "rb"), encoding="latin1") for i in image_sets] roidbs_all = reduce(lambda x, y: x + y, roidbs_all) from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval coco = COCO(pTest.coco.annotation) data_queue = Queue(100) result_queue = Queue() execs = [] workers = [] coco_result = [] split_size = 1000 for index_split in range(int(math.ceil(len(roidbs_all) / split_size))): print("evaluating [%d, %d)" % (index_split * split_size, (index_split + 1) * split_size)) roidb = roidbs_all[index_split * split_size:(index_split + 1) * split_size] roidb = pTest.process_roidb(roidb) for i, x in enumerate(roidb): x["rec_id"] = i loader = Loader(roidb=roidb,
class WSClient(object): """Provides a basic means of testing pub/sub notifications with payloads similar to 'groups': {'jobs': ['status_changed', 'summary'], 'schedules': ['changed'], 'ad_hoc_command_events': [ids...], 'job_events': [ids...], 'workflow_events': [ids...], 'project_update_events': [ids...], 'inventory_update_events': [ids...], 'system_job_events': [ids...], 'control': ['limit_reached']} e.x: ``` ws = WSClient(token, port=8013, secure=False).connect() ws.job_details() ... # launch job job_messages = [msg for msg in ws] ws.ad_hoc_stdout() ... # launch ad hoc command ad_hoc_messages = [msg for msg in ws] ws.close() ``` """ # Subscription group types def __init__(self, token=None, hostname='', port=443, secure=True, session_id=None, csrftoken=None): # delay this import, because this is an optional dependency import websocket if not hostname: result = urlparse(config.base_url) secure = result.scheme == 'https' port = result.port if port is None: port = 80 if secure: port = 443 # should we be adding result.path here? hostname = result.hostname self.port = port self._use_ssl = secure self.hostname = hostname self.token = token self.session_id = session_id self.csrftoken = csrftoken self._recv_queue = Queue() self._ws_closed = False self._ws_connected_flag = threading.Event() if self.token is not None: auth_cookie = 'token="{0.token}";'.format(self) elif self.session_id is not None: auth_cookie = 'sessionid="{0.session_id}"'.format(self) if self.csrftoken: auth_cookie += ';csrftoken={0.csrftoken}'.format(self) else: auth_cookie = '' pref = 'wss://' if self._use_ssl else 'ws://' url = '{0}{1.hostname}:{1.port}/websocket/'.format(pref, self) self.ws = websocket.WebSocketApp(url, on_open=self._on_open, on_message=self._on_message, on_error=self._on_error, on_close=self._on_close, cookie=auth_cookie) self._message_cache = [] self._should_subscribe_to_pending_job = False def connect(self): wst = threading.Thread(target=self._ws_run_forever, args=(self.ws, { "cert_reqs": ssl.CERT_NONE })) wst.daemon = True wst.start() atexit.register(self.close) if not self._ws_connected_flag.wait(20): raise WSClientException( 'Failed to establish channel connection w/ AWX.') return self def close(self): log.info('close method was called, but ignoring') if not self._ws_closed: log.info('Closing websocket connection.') self.ws.close() def job_details(self, *job_ids): """subscribes to job status, summary, and, for the specified ids, job events""" self.subscribe(jobs=[status_changed, summary], job_events=list(job_ids)) def pending_job_details(self): """subscribes to job status and summary, with responsive job event subscription for an id provided by AWX """ self.subscribe_to_pending_events('job_events', [status_changed, summary]) def status_changes(self): self.subscribe(jobs=[status_changed]) def job_stdout(self, *job_ids): self.subscribe(jobs=[status_changed], job_events=list(job_ids)) def pending_job_stdout(self): self.subscribe_to_pending_events('job_events') # mirror page behavior def ad_hoc_stdout(self, *ahc_ids): self.subscribe(jobs=[status_changed], ad_hoc_command_events=list(ahc_ids)) def pending_ad_hoc_stdout(self): self.subscribe_to_pending_events('ad_hoc_command_events') def project_update_stdout(self, *project_update_ids): self.subscribe(jobs=[status_changed], project_update_events=list(project_update_ids)) def pending_project_update_stdout(self): self.subscribe_to_pending_events('project_update_events') def inventory_update_stdout(self, *inventory_update_ids): self.subscribe(jobs=[status_changed], inventory_update_events=list(inventory_update_ids)) def pending_inventory_update_stdout(self): self.subscribe_to_pending_events('inventory_update_events') def workflow_events(self, *wfjt_ids): self.subscribe(jobs=[status_changed], workflow_events=list(wfjt_ids)) def pending_workflow_events(self): self.subscribe_to_pending_events('workflow_events') def system_job_events(self, *system_job_ids): self.subscribe(jobs=[status_changed], system_job_events=list(system_job_ids)) def pending_system_job_events(self): self.subscribe_to_pending_events('system_job_events') def subscribe_to_pending_events(self, events, jobs=[status_changed]): self._should_subscribe_to_pending_job = dict(jobs=jobs, events=events) self.subscribe(jobs=jobs) # mirror page behavior def jobs_list(self): self.subscribe(jobs=[status_changed, summary], schedules=[changed]) # mirror page behavior def dashboard(self): self.subscribe(jobs=[status_changed]) def subscribe(self, **groups): """Sends a subscription request for the specified channel groups. ``` ws.subscribe(jobs=[ws.status_changed, ws.summary], job_events=[1,2,3]) ``` """ self._subscribe(groups=groups) def _subscribe(self, **payload): payload['xrftoken'] = self.csrftoken self._send(json.dumps(payload)) def unsubscribe(self): self._send(json.dumps(dict(groups={}, xrftoken=self.csrftoken))) # it takes time for the unsubscribe event to be recieved and consumed and for # messages to stop being put on the queue for daphne to send to us time.sleep(5) def _on_message(self, message): message = json.loads(message) log.debug('received message: {}'.format(message)) if all([ message.get('group_name') == 'jobs', message.get('status') == 'pending', message.get('unified_job_id'), self._should_subscribe_to_pending_job ]): if bool(message.get('project_id')) == ( self._should_subscribe_to_pending_job['events'] == 'project_update_events'): self._update_subscription(message['unified_job_id']) return self._recv_queue.put(message) def _update_subscription(self, job_id): subscription = dict(jobs=self._should_subscribe_to_pending_job['jobs']) events = self._should_subscribe_to_pending_job['events'] subscription[events] = [job_id] self.subscribe(**subscription) self._should_subscribe_to_pending_job = False def _on_open(self): self._ws_connected_flag.set() def _on_error(self, error): log.info('Error received: {}'.format(error)) def _on_close(self): log.info('Successfully closed ws.') self._ws_closed = True def _ws_run_forever(self, sockopt=None, sslopt=None): self.ws.run_forever(sslopt=sslopt) log.debug('ws.run_forever finished') def _recv(self, wait=False, timeout=10): try: msg = self._recv_queue.get(wait, timeout) except Empty: return None return msg def _send(self, data): self.ws.send(data) log.debug('successfully sent {}'.format(data)) def __iter__(self): while True: val = self._recv() if not val: return yield val
class Scheduler(object): def __init__(self,ROLE=None,QUEUE_TYPE='PYTHON'): if ROLE == None: if QUEUE_TYPE == 'PYTHON': from six.moves.queue import Queue from utils.myset import NormalFilterSet as Set print("ROLE is {},QUEUE_TYPE is {}".format(ROLE, QUEUE_TYPE)) elif QUEUE_TYPE == 'REDIS': from utils.redisqueue import Queue from utils.myset import RedisFilterSet as Set print("ROLE is {},QUEUE_TYPE is {}".format(ROLE, QUEUE_TYPE)) else: raise ImportError( "Not Support this Role : <{}> or Not Support this QUEUETYPE: <{}>".format(ROLE, QUEUE_TYPE)) elif ROLE in ['master', 'slave']: from utils.redisqueue import Queue from utils.myset import RedisFilterSet as Set print("ROLE is {},QUEUE_TYPE is {}".format(ROLE, QUEUE_TYPE)) else: raise ImportError( "Not Support this Role : <{}> or Not Support this QUEUETYPE: <{}>".format(ROLE, QUEUE_TYPE)) self.queue = Queue() self._filter_set = Set() self.total_request = 0 def add_request(self, request): # 请求是否去重的控制,如果不去重,直接将请求加入请求队列 if request.dont_filter: self.queue.put(request) self.total_request += 1 else: fp = self._get_fingerprint(request) if self._filter_request(fp, request): self._filter_set.add_fp(fp) self.queue.put(request) self.total_request += 1 def get_request(self): try: return self.queue.get(False) except: return False def get_batch_requests(self, batch:int): li_req=[] for _ in range(batch): req=self.get_request() if req: li_req.append(req) else: break return li_req def _filter_request(self, fp, request): """ 请求去重,并返回判断结果 """ # 如果请求的url地址不在去重集合中,那么返回True,表示允许添加到请求队列中 #if fp not in self._filter_set: if not self._filter_set.is_filter(fp): return True else: # 否则,表示重复, 不允许添加 print("Filter request: [{}] <{}>".format(request.method, request.url)) return False def _get_fingerprint(self, request): import w3lib.url from hashlib import sha1 # 对url地址进行规整排序处理 url = w3lib.url.canonicalize_url(request.url) # 将请求方法转为大写处理 method = request.method.upper() # 保证返回一个字典(不管用户有没有传参,面sha1生成数据出错) params = request.params if request.params else {} params = str(sorted(params.items(), key=lambda x : x[0])) formdata = request.formdata if request.formdata else {} formdata = str(sorted(formdata.items(), key=lambda x : x[0])) sha1_data = sha1() # update()必须接收一个字节码字符串 python2 str unicode, python3 bytes str sha1_data.update(self._get_utf8_str(url)) sha1_data.update(self._get_utf8_str(method)) sha1_data.update(self._get_utf8_str(params)) sha1_data.update(self._get_utf8_str(formdata)) # 生成一个16进制数的字符串,做为请求指纹 fp = sha1_data.hexdigest() return fp # 判断字符串的类型,如果是Unicode则转为utf-8 def _get_utf8_str(self, string): if six.PY2: if isinstance(string, str): return string else: return string.encode("utf-8") else: if isinstance(string, bytes): return string else: return string.encode("utf-8")
def github(args=None): port = _find_available_port() if not port: print("Github sign in requires an open port, please open port 3000.") # Signal when the HTTP server has started server_started_queue = Queue() # Signal when we have the access token access_token_queue = Queue() # Server that we will run in the background to accept a post-OAuth redirect from # the Hyperdash server which will contain the user's access token def start_server(): class OAuthRedirectHandler(BaseHTTPServer.BaseHTTPRequestHandler): def do_GET(self): parsed_path = urlparse(self.path) query = parse_qs(parsed_path.query) access_token = query["access_token"][ 0] if "access_token" in query else None if not access_token: print("Something went wrong! Please try again.") sys.exit() print("Access token auto-detected!") access_token_queue.put(access_token) # Redirect user's browser self.send_response(301) self.send_header( "Location", "{}/{}".format(get_base_http_url(), "/oauth/github/success")) self.end_headers() # Silence logs def log_message(self, _format, *args): return server = BaseHTTPServer.HTTPServer((LOOPBACK, port), OAuthRedirectHandler) server_started_queue.put(True) server.handle_request() server_thread = Thread(target=start_server) # Prevent server_thread from preventing program shutdown server_thread.setDaemon(True) server_thread.start() url = "{}/{}".format(get_base_http_url(), GITHUB_OAUTH_START) auto_login_query_args = { "state": "client_cli_auto:{}".format(port), } auto_login_url = "{}?{}".format(url, urlencode(auto_login_query_args)) # Copy manual_login_query_args = dict(auto_login_query_args) manual_login_query_args["state"] = "client_cli_manual" manual_login_url = "{}?{}".format(url, urlencode(manual_login_query_args)) print( "Opening browser, please wait. If something goes wrong, press CTRL+C to cancel." ) print( "\033[1m SSH'd into a remote machine, or just don't have access to a browser? Open this link in any browser and then copy/paste the provided access token: \033[4m{}\033[0m \033[0m" .format(manual_login_url)) # If the user doesn't have programatic access to a browser, then we need to give them # the option of opening a URL manually and copy-pasting the access token into the CLI. # We spin this up in a separate thread so that it doesn't block the happy path where # the browser is available and we're able to auto-detect the access token manual_entry_thread_started_queue = Queue() def manual_entry(): print("Waiting for Github OAuth to complete.") print("If something goes wrong, press CTRL+C to cancel.") manual_entry_thread_started_queue.put(True) access_token = get_input("Access token: ") access_token_queue.put(access_token) manual_entry_thread = Thread(target=manual_entry) # Prevent manual_entry_thread from preventing program shutdown manual_entry_thread.setDaemon(True) manual_entry_thread.start() # Wait until the server and manual entry threads have started before opening the # user's browser to prevent a race condition where the Hyperdash server # redirects with an access token but the Python server isn't ready yet. # # Also, we set the timeout to ONE_YEAR_IN_SECONDS because without a timeout, # the .get() call on the queue can not be interrupted with CTRL+C. server_started_queue.get(block=True, timeout=ONE_YEAR_IN_SECONDS) manual_entry_thread_started_queue.get(block=True, timeout=ONE_YEAR_IN_SECONDS) # Blocks until browser opens, but doesn't wait for user to close it webbrowser.open_new_tab(auto_login_url) # Wait for the Hyperdash server to redirect with the access token to our embedded # server, or for the user to manually enter an access token. Whichever happens # first. access_token = access_token_queue.get(block=True, timeout=ONE_YEAR_IN_SECONDS) # Use the access token to retrieve the user's API key and store a valid # hyperdash.json file success, default_api_key = _after_access_token_login(access_token) if success: print( "Successfully logged in! We also installed: {} as your default API key" .format(default_api_key))
def __init__(self): self.q = Queue(maxsize=100) self.worker_running = False
class QueuedRunner(object): """\ Secondary backend runner that puts method calls on a bounded queue and drops them when the queue is full. A separate (non-main) thread works the queue. """ def __init__(self): self.q = Queue(maxsize=100) self.worker_running = False def start_worker(self): def worker(): while True: (func, args, kwargs) = self.q.get() try: func(*args, **kwargs) metrics.incr( 'tagstore.multi.runner.execute', instance='success', skip_internal=True, ) except Exception as e: logger.exception(e) metrics.incr( 'tagstore.multi.runner.execute', instance='fail', skip_internal=True, ) finally: self.q.task_done() t = Thread(target=worker) t.setDaemon(True) t.start() self.worker_running = True def run(self, f, *args, **kwargs): if random.random() <= options.get('tagstore.multi-sampling'): if not self.worker_running: self.start_worker() try: self.q.put((f, args, kwargs), block=False) metrics.incr( 'tagstore.multi.runner.schedule', instance='put', skip_internal=True, ) except Full: metrics.incr( 'tagstore.multi.runner.schedule', instance='full', skip_internal=True, ) return else: metrics.incr( 'tagstore.multi.runner.schedule', instance='sampled', skip_internal=True, )
def get_images(job_model, dataset, node, trainer): concurrent = 15 from PIL import ImageFile if hasattr(ImageFile, 'LOAD_TRUNCATED_IMAGES'): ImageFile.LOAD_TRUNCATED_IMAGES = True q = Queue(concurrent) config = dataset['config'] dir = job_model.get_dataset_downloads_dir(dataset) ensure_dir(dir) if 'classes' not in config or not config['classes']: trainer.logger.warning("Dataset %s does not contain any classes." % (dataset['id'], )) return { 'X_train': np.array([]), 'Y_train': np.array([]), 'X_test': np.array([]), 'Y_test': np.array([]) } classes = config['classes'] trainer.set_status('LOAD IMAGES') max = 0 images = {} dataset_path = job_model.get_dataset_downloads_dir(dataset) meta_information_file = dataset_path + '/meta.json' classes_changed = False config_changed = False had_previous = False classes_md5 = hashlib.md5( json.dumps(classes, default=invalid_json_values, sort_keys=True).encode('utf-8')).hexdigest() validationFactor = 0.2 meta = {} if os.path.isdir(dataset_path): if os.path.isfile(meta_information_file): with open(meta_information_file) as f: meta = json.load(f) if meta: had_previous = True if 'classes_md5' in meta and meta[ 'classes_md5'] != classes_md5: classes_changed = True trigger_changed = [ 'resize', 'resizeWidth', 'resizeHeight', 'resizeCompression' ] for i in trigger_changed: if i in meta['config'] and i in config and meta[ 'config'][i] != config[i]: config_changed = True else: config_changed = True else: config_changed = True need_download = classes_changed or config_changed if need_download: if had_previous: trainer.logger.info("Reset dataset and re-download images to " + dir) if classes_changed: trainer.logger.info(" .. because classes changed in", meta['classes_md5'], classes_md5, meta_information_file) if config_changed: trainer.logger.info(" .. because settings changed in", meta_information_file) else: trainer.logger.info("Download images to " + dir) resize = bool(get_option(config, 'resize', True)) if resize: resizeSize = (int(get_option(config, 'resizeWidth', 64)), int(get_option(config, 'resizeHeight', 64))) trainer.logger.info(" .. with resizing to %dx%d " % resizeSize) # # we need to donwload all images shutil.rmtree(dataset_path) controller = {'running': True} try: for category in classes: max += len(category['images']) progress = trainer.job_backend.create_progress( 'dataset-download-images', max) progress.label('Download dataset images') for i in range(concurrent): t = ImageDownloaderWorker(q, progress, dataset, max, images, controller) t.daemon = True t.start() for category_idx, category in enumerate(classes): for image in category['images']: local_name = image['id'] local_path = '%s/%s' % ( trainer.job_model.get_dataset_downloads_dir(dataset), local_name) q.put([image, category_idx, local_path]) q.join() controller['running'] = False def move_image(image, category='training'): if image['id'] in images and os.path.isfile( images[image['id']]): target_path = dataset_path + \ '/%s/category_%s/%s' % (category, category_idx, os.path.basename(images[image['id']])) ensure_dir(os.path.dirname(target_path)) os.rename(images[image['id']], target_path) for category_idx, category in enumerate(classes): random.shuffle(category['images']) position = int( math.ceil(len(category['images']) * validationFactor)) ensure_dir(dataset_path + '/training') ensure_dir(dataset_path + '/validation') for image in category['images'][position:]: # test data if image['id'] in images and os.path.isfile( images[image['id']]): move_image(image, 'training') for image in category['images'][:position]: # validation data if image['id'] in images and os.path.isfile( images[image['id']]): move_image(image, 'validation') with open(meta_information_file, 'w') as f: meta = { 'loaded_at': classes_md5, 'classes_md5': classes_md5, 'config': config } json.dump(meta, f, default=invalid_json_values) except KeyboardInterrupt: controller['running'] = False sys.exit(1) else: trainer.logger.info("Downloaded images up2date in " + dir) trainer.logger.info( " - Remove this directory if you want to re-download all images of your dataset and re-shuffle training/validation images." ) trainer.output_size = len(classes) # change to type local_images dataset_transformed = dataset.copy() dataset_transformed['config']['path'] = dir all_memory = get_option(dataset['config'], 'allMemory', False, 'bool') if all_memory: return read_images_in_memory(job_model, dataset_transformed, node, trainer) else: return read_images_keras_generator(job_model, dataset_transformed, node, trainer)
class USBTransport(object): '''Implement USB transport.''' def __init__(self, dev=None): '''Instantiate the first available PTP device over USB''' logger.debug('Init') self.__setup_constructors() # If no device is specified, find all devices claiming to be Cameras # and get the USB endpoints for the first one that works. cameras = find_usb_cameras() devs = [dev] if (dev is not None) else cameras for dev in devs: if self.__setup_device(dev): break else: message = 'No USB PTP device found.' logger.error(message) raise PTPError(message) if self.__dev.is_kernel_driver_active(self.__intf.bInterfaceNumber): try: self.__dev.detach_kernel_driver(self.__intf.bInterfaceNumber) usb.util.claim_interface(self.__dev, self.__intf) except usb.core.USBError: message = ( 'Could not detach kernel driver. ' 'Maybe the camera is mounted?' ) logger.error(message) raise PTPError(message) logger.debug('Claiming {}'.format(repr(dev))) usb.util.claim_interface(self.__dev, self.__intf) self.__event_queue = Queue() self.__event_shutdown = Event() # Locks for different end points. self.__inep_lock= RLock() self.__intep_lock= RLock() self.__outep_lock= RLock() self.__event_proc = Thread(name='EvtPolling', target=self.__poll_events) self.__event_proc.daemon = False atexit.register(self._shutdown) self.__event_proc.start() def _shutdown(self): logger.debug('Shutdown request') self.__event_shutdown.set() # Free USB resource on shutdown. # Only join a running thread. if self.__event_proc.is_alive(): self.__event_proc.join(2) logger.debug('Release {}'.format(repr(self.__dev))) usb.util.release_interface(self.__dev, self.__intf) # Helper methods. # --------------------- def __setup_device(self, dev): '''Get endpoints for a device. True on success.''' self.__inep = None self.__outep = None self.__intep = None self.__cfg = None self.__dev = None self.__intf = None # Attempt to find the USB in, out and interrupt endpoints for a PTP # interface. for cfg in dev: for intf in cfg: if intf.bInterfaceClass == PTP_USB_CLASS: for ep in intf: ep_type = endpoint_type(ep.bmAttributes) ep_dir = endpoint_direction(ep.bEndpointAddress) if ep_type == ENDPOINT_TYPE_BULK: if ep_dir == ENDPOINT_IN: self.__inep = ep elif ep_dir == ENDPOINT_OUT: self.__outep = ep elif ((ep_type == ENDPOINT_TYPE_INTR) and (ep_dir == ENDPOINT_IN)): self.__intep = ep if not (self.__inep and self.__outep and self.__intep): self.__inep = None self.__outep = None self.__intep = None else: logger.debug('Found {}'.format(repr(self.__inep))) logger.debug('Found {}'.format(repr(self.__outep))) logger.debug('Found {}'.format(repr(self.__intep))) self.__cfg = cfg self.__dev = dev self.__intf = intf return True return False def __setup_constructors(self): '''Set endianness and create transport-specific constructors.''' # Set endianness of constructors before using them. self._set_endian('little') self.__Length = Int32ul self.__Type = Enum( Int16ul, default=Pass, Undefined=0x0000, Command=0x0001, Data=0x0002, Response=0x0003, Event=0x0004, ) # This is just a convenience constructor to get the size of a header. self.__Code = Int16ul self.__Header = Struct( 'Length' / self.__Length, 'Type' / self.__Type, 'Code' / self.__Code, 'TransactionID' / self._TransactionID, ) # These are the actual constructors for parsing and building. self.__CommandHeader = Struct( 'Length' / self.__Length, 'Type' / self.__Type, 'OperationCode' / self._OperationCode, 'TransactionID' / self._TransactionID, ) self.__ResponseHeader = Struct( 'Length' / self.__Length, 'Type' / self.__Type, 'ResponseCode' / self._ResponseCode, 'TransactionID' / self._TransactionID, ) self.__EventHeader = Struct( 'Length' / self.__Length, 'Type' / self.__Type, 'EventCode' / self._EventCode, 'TransactionID' / self._TransactionID, ) # Apparently nobody uses the SessionID field. Even though it is # specified in ISO15740:2013(E), no device respects it and the session # number is implicit over USB. self.__Param = Range(0, 5, self._Parameter) self.__FullParam = Struct(Array(5, self._Parameter)) self.__FullEventParam = Struct(Array(3, self._Parameter)) self.__CommandTransactionBase = Struct( Embedded(self.__CommandHeader), 'Payload' / Bytes( lambda ctx, h=self.__Header: ctx.Length - h.sizeof() ) ) self.__CommandTransaction = ExprAdapter( self.__CommandTransactionBase, encoder=lambda obj, ctx, h=self.__Header: Container( Length=len(obj.Payload) + h.sizeof(), **obj ), decoder=lambda obj, ctx: obj, ) self.__ResponseTransactionBase = Struct( Embedded(self.__ResponseHeader), 'Payload' / Bytes( lambda ctx, h=self.__Header: ctx.Length - h.sizeof()) ) self.__ResponseTransaction = ExprAdapter( self.__ResponseTransactionBase, encoder=lambda obj, ctx, h=self.__Header: Container( Length=len(obj.Payload) + h.sizeof(), **obj ), decoder=lambda obj, ctx: obj, ) self.__EventTransactionBase = Struct( Embedded(self.__EventHeader), 'Payload' / Bytes( lambda ctx, h=self.__Header: ctx.Length - h.sizeof()), ) self.__EventTransaction = ExprAdapter( self.__EventTransactionBase, encoder=lambda obj, ctx, h=self.__Header: Container( Length=len(obj.Payload) + h.sizeof(), **obj ), decoder=lambda obj, ctx: obj, ) def __parse_response(self, usbdata): '''Helper method for parsing USB data.''' # Build up container with all PTP info. usbdata = bytearray(usbdata) transaction = self.__ResponseTransaction.parse(usbdata) response = Container( SessionID=self.session_id, TransactionID=transaction.TransactionID, ) if transaction.Type == 'Response': response['ResponseCode'] = transaction.ResponseCode response['Parameter'] = self.__Param.parse(transaction.Payload) elif transaction.Type == 'Event': event = self.__EventHeader.parse( usbdata[0:self.__Header.sizeof()] ) response['EventCode'] = event.EventCode response['Parameter'] = self.__Param.parse(transaction.Payload) else: command = self.__CommandHeader.parse( usbdata[0:self.__Header.sizeof()] ) response['OperationCode'] = command.OperationCode response['Data'] = transaction.Payload return response def __recv(self, event=False, wait=False, raw=False): '''Helper method for receiving data.''' # TODO: clear stalls automatically ep = self.__intep if event else self.__inep lock = self.__intep_lock if event else self.__inep_lock with lock: try: usbdata = ep.read( ep.wMaxPacketSize, timeout=0 if wait else 5 ) except usb.core.USBError as e: # Ignore timeout or busy device once. if e.errno == 110 or e.errno == 16: if event: return None else: usbdata = ep.read( ep.wMaxPacketSize, timeout=5000 ) else: raise e header = self.__ResponseHeader.parse( bytearray(usbdata[0:self.__Header.sizeof()]) ) if header.Type not in ['Response', 'Data', 'Event']: raise PTPError( 'Unexpected USB transfer type.' 'Expected Response, Event or Data but received {}' .format(header.Type) ) while len(usbdata) < header.Length: usbdata += ep.read( ep.wMaxPacketSize, timeout=5000 ) if raw: return usbdata else: return self.__parse_response(usbdata) def __send(self, ptp_container, event=False): '''Helper method for sending data.''' ep = self.__intep if event else self.__outep lock = self.__intep_lock if event else self.__outep_lock transaction = self.__CommandTransaction.build(ptp_container) with lock: try: ep.write(transaction, timeout=1) except usb.core.USBError as e: # Ignore timeout or busy device once. if e.errno == 110 or e.errno == 16: ep.write(transaction, timeout=5000) def __send_request(self, ptp_container): '''Send PTP request without checking answer.''' # Don't modify original container to keep abstraction barrier. ptp = Container(**ptp_container) # Don't send unused parameters try: while not ptp.Parameter[-1]: ptp.Parameter.pop() if len(ptp.Parameter) == 0: break except IndexError: # The Parameter list is already empty. pass # Send request ptp['Type'] = 'Command' ptp['Payload'] = self.__Param.build(ptp.Parameter) self.__send(ptp) def __send_data(self, ptp_container, data): '''Send data without checking answer.''' # Don't modify original container to keep abstraction barrier. ptp = Container(**ptp_container) # Send data ptp['Type'] = 'Data' ptp['Payload'] = data self.__send(ptp) # Actual implementation # --------------------- def send(self, ptp_container, data): '''Transfer operation with dataphase from initiator to responder''' logger.debug('SEND {}{}'.format( ptp_container.OperationCode, ' ' + str(list(map(hex, ptp_container.Parameter))) if ptp_container.Parameter else '', )) self.__send_request(ptp_container) self.__send_data(ptp_container, data) # Get response and sneak in implicit SessionID and missing parameters. response = self.__recv() logger.debug('SEND {} {}{}'.format( ptp_container.OperationCode, response.ResponseCode, ' ' + str(list(map(hex, response.Parameter))) if ptp_container.Parameter else '', )) return response def recv(self, ptp_container): '''Transfer operation with dataphase from responder to initiator.''' logger.debug('RECV {}{}'.format( ptp_container.OperationCode, ' ' + str(list(map(hex, ptp_container.Parameter))) if ptp_container.Parameter else '', )) self.__send_request(ptp_container) dataphase = self.__recv() if hasattr(dataphase, 'Data'): response = self.__recv() if ( (ptp_container.OperationCode != dataphase.OperationCode) or (ptp_container.TransactionID != dataphase.TransactionID) or (ptp_container.SessionID != dataphase.SessionID) or (dataphase.TransactionID != response.TransactionID) or (dataphase.SessionID != response.SessionID) ): raise PTPError( 'Dataphase does not match with requested operation.' ) response['Data'] = dataphase.Data else: response = dataphase logger.debug('RECV {} {}{}'.format( ptp_container.OperationCode, response.ResponseCode, ' ' + str(list(map(hex, response.Parameter))) if response.Parameter else '', )) return response def mesg(self, ptp_container): '''Transfer operation without dataphase.''' logger.debug('MESG {}{}'.format( ptp_container.OperationCode, ' ' + str(list(map(hex, ptp_container.Parameter))) if ptp_container.Parameter else '', )) self.__send_request(ptp_container) # Get response and sneak in implicit SessionID and missing parameters # for FullResponse. response = self.__recv() logger.debug('MESG {} {}{}'.format( ptp_container.OperationCode, response.ResponseCode, ' ' + str(list(map(hex, response.Parameter))) if response.Parameter else '', )) return response def event(self, wait=False): '''Check event. If `wait` this function is blocking. Otherwise it may return None. ''' evt = None usbdata = None timeout = None if wait else 0.001 if not self.__event_queue.empty(): usbdata = self.__event_queue.get(block=not wait, timeout=timeout) if usbdata is not None: evt = self.__parse_response(usbdata) return evt def __poll_events(self): '''Poll events, adding them to a queue.''' while not self.__event_shutdown.is_set() and _main_thread_alive(): evt = self.__recv(event=True, wait=False, raw=True) if evt is not None: logger.debug('Event queued') self.__event_queue.put(evt)
def __init__(self): self.q = Queue() self.fp_set = set() self.total_repeat_nums = 0
class Scheduler(): def __init__(self): self.q = Queue() self.fp_set = set() self.total_repeat_nums = 0 def add_request(self, request): # 把request放入请求队列 # 判断指纹是否在集合中,如果不在就入队 if self._filter_request(request): self.q.put(request) def get_request(self): # 取出一个request;取不出就返回none try: request = self.q.get_nowait() except: request = None return request def _filter_request(self, request): '''请求去重: 判断指纹是否在集合中,如果不在就指纹进集合,返回True''' fp = self._gen_fp(request) if fp not in self.fp_set: self.fp_set.add(fp) return True self.total_repeat_nums += 1 # 重复的请求数 +1 logger.info("发现重复的请求:<{} {}>".format(request.method, request.url)) return False def _gen_fp(self, request): # 返回request的fp指纹字符串 url = canonicalize_url(request.url) method = request.method.upper() data = request.data if request.data else {} data = sorted(data.items(), key=lambda x: x[0]) # 把data字典按(k,v)进行迭代,按照k作为排序的依据 # 默认就是用k作为排序的依据 # key=lambda x:x[0] x就是每次迭代的(k,v), x[0]就是排序的依据 # 最终返回 [('a', 1), ('b', 2)] sha1 = hashlib.sha1() sha1.update(self._to_bytes(url)) sha1.update(self._to_bytes(method)) sha1.update(self._to_bytes(str(data))) fp = sha1.hexdigest() return fp def _to_bytes(self, string): """py2 py3 正好相反!""" if six.PY2: # 判断当前是否是python2 if isinstance(string, str): return string else: return string.encode() elif six.PY3: # 判断当前是否是python3 if isinstance(string, str): return string.encode() else: return string
class MockBatchSystemAndProvisioner(AbstractScalableBatchSystem, AbstractProvisioner): """ Mimics a job batcher, provisioner and scalable batch system """ def __init__(self, config, secondsPerJob): super(MockBatchSystemAndProvisioner, self).__init__(config=config) # To mimic parallel preemptable and non-preemptable queues # for jobs we create two parallel instances of the following class self.config = config self.secondsPerJob = secondsPerJob self.provisioner = self self.batchSystem = self self.nodeTypes = config.nodeTypes self.nodeShapes = self.nodeTypes self.nodeShapes.sort() self.jobQueue = Queue() self.updatedJobsQueue = Queue() self.jobBatchSystemIDToIssuedJob = {} self.totalJobs = 0 # Count of total jobs processed self.totalWorkerTime = 0.0 # Total time spent in worker threads self.nodesToWorker = {} # Map from Node to instances of the Worker class self.workers = {nodeShape:[] for nodeShape in self.nodeShapes} # Instances of the Worker class self.maxWorkers = {nodeShape:0 for nodeShape in self.nodeShapes} # Maximum number of workers self.running = False self.leaderThread = Thread(target=self._leaderFn) def start(self): self.running = True self.leaderThread.start() def shutDown(self): self.running = False self.leaderThread.join() # Stub out all AbstractBatchSystem methods since they are never called for name, value in iteritems(AbstractBatchSystem.__dict__): if getattr(value, '__isabstractmethod__', False): exec('def %s(): pass' % name) # Without this, the class would end up with .name and .value attributes del name, value # AbstractScalableBatchSystem methods def nodeInUse(self, nodeIP): return False def ignoreNode(self, nodeAddress): pass def unignoreNode(self, nodeAddress): pass @contextmanager def nodeFiltering(self, filter): nodes = self.getProvisionedWorkers(preemptable=True, nodeType=None) + self.getProvisionedWorkers(preemptable=False, nodeType=None) yield nodes # AbstractProvisioner methods def getProvisionedWorkers(self, nodeType=None, preemptable=None): """ Returns a list of Node objects, each representing a worker node in the cluster :param preemptable: If True only return preemptable nodes else return non-preemptable nodes :return: list of Node """ nodesToWorker = self.nodesToWorker if nodeType: return [node for node in nodesToWorker if node.nodeType == nodeType] else: return list(nodesToWorker.keys()) def terminateNodes(self, nodes): self._removeNodes(nodes) def remainingBillingInterval(self, node): pass def addJob(self, jobShape, preemptable=False): """ Add a job to the job queue """ self.totalJobs += 1 jobID = uuid.uuid4() self.jobBatchSystemIDToIssuedJob[jobID] = Job(memory=jobShape.memory, cores=jobShape.cores, disk=jobShape.disk, preemptable=preemptable) self.jobQueue.put(jobID) # JobBatcher functionality def getNumberOfJobsIssued(self, preemptable=None): if preemptable is not None: jobList = [job for job in list(self.jobQueue.queue) if self.jobBatchSystemIDToIssuedJob[job].preemptable == preemptable] return len(jobList) else: return self.jobQueue.qsize() def getJobs(self): return self.jobBatchSystemIDToIssuedJob.values() # AbstractScalableBatchSystem functionality def getNodes(self, preemptable=False, timeout=None): nodes = dict() for node in self.nodesToWorker: if node.preemptable == preemptable: worker = self.nodesToWorker[node] nodes[node.privateIP] = NodeInfo(coresTotal=0, coresUsed=0, requestedCores=1, memoryTotal=0, memoryUsed=0, requestedMemory=1, workers=1 if worker.busyEvent.is_set() else 0) return nodes # AbstractProvisioner functionality def addNodes(self, nodeType, numNodes, preemptable): self._addNodes(numNodes=numNodes, nodeType=nodeType, preemptable=preemptable) return self.getNumberOfNodes(nodeType=nodeType, preemptable=preemptable) def getNodeShape(self, nodeType, preemptable=False): #Assume node shapes and node types are the same thing for testing return nodeType def getWorkersInCluster(self, nodeShape): return self.workers[nodeShape] def _leaderFn(self): while self.running: updatedJobID = None try: updatedJobID = self.updatedJobsQueue.get(timeout=1.0) except Empty: continue if updatedJobID: del self.jobBatchSystemIDToIssuedJob[updatedJobID] time.sleep(0.1) def _addNodes(self, numNodes, nodeType, preemptable=False): nodeShape = self.getNodeShape(nodeType=nodeType, preemptable=preemptable) class Worker(object): def __init__(self, jobQueue, updatedJobsQueue, secondsPerJob): self.busyEvent = Event() self.stopEvent = Event() def workerFn(): while True: if self.stopEvent.is_set(): return try: jobID = jobQueue.get(timeout=1.0) except Empty: continue updatedJobsQueue.put(jobID) self.busyEvent.set() time.sleep(secondsPerJob) self.busyEvent.clear() self.startTime = time.time() self.worker = Thread(target=workerFn) self.worker.start() def stop(self): self.stopEvent.set() self.worker.join() return time.time() - self.startTime for i in range(numNodes): node = Node('127.0.0.1', uuid.uuid4(), 'testNode', time.time(), nodeType=nodeType, preemptable=preemptable) self.nodesToWorker[node] = Worker(self.jobQueue, self.updatedJobsQueue, self.secondsPerJob) self.workers[nodeShape].append(self.nodesToWorker[node]) self.maxWorkers[nodeShape] = max(self.maxWorkers[nodeShape], len(self.workers[nodeShape])) def _removeNodes(self, nodes): logger.info("removing nodes. %s workers and %s to terminate", len(self.nodesToWorker), len(nodes)) for node in nodes: logger.info("removed node") try: nodeShape = self.getNodeShape(node.nodeType, node.preemptable) worker = self.nodesToWorker.pop(node) self.workers[nodeShape].pop() self.totalWorkerTime += worker.stop() except KeyError: # Node isn't our responsibility pass def getNumberOfNodes(self, nodeType=None, preemptable=None): if nodeType: nodeShape = self.getNodeShape(nodeType=nodeType, preemptable=preemptable) return len(self.workers[nodeShape]) else: return len(self.nodesToWorker)
def __init__(self, function, *args, **kwargs): self._function = function self._func_args = args self._func_kwargs = kwargs self.__exceptions = Queue() threading.Thread.__init__(self, name=self._function.__name__)
def check_unseen(imap, loop=False): from time import time if sys.version_info >= (3, ): from .oauth2 import RefreshToken from .oauth2 import GenerateOAuth2String else: from oauth2 import RefreshToken from oauth2 import GenerateOAuth2String task = Queue() timeout_or_pwd = {} rx_pwd = {} # trigger all config for name, config in imap.config.items(): if config.get('token'): # refresh token def refresh_token(config): config['token'] = eval(config['token']) config['token'] = RefreshToken( config['clientid'], config['secret'], config['token']['refresh_token']) return time() + float(config['token']['expires_in']) - 1 timeout_or_pwd[name] = refresh_token(config) else: timeout_or_pwd[name] = config.get('password', '') task.put(name) # process while True: # check if got password try: config_name, pwd = imap.pwd_queue[0].get(block=False) rx_pwd[config_name] = pwd except Empty: # check if got config try: config_name = task.get(block=False) except Empty: # do delay and check if stop if imap.stop.wait(CHECK_QUEUE_INTERVAL): break else: continue # access imap unseen = 0 invalid_pwd = False config = imap.config[config_name] mailbox = None if config.get('token'): # oauth if time() > timeout_or_pwd[config_name]: timeout_or_pwd[config_name] = refresh_token(config) auth_string = GenerateOAuth2String( config['username'], config['token']['access_token'], False) mailbox = imaplib.IMAP4_SSL(config['host']) if DEBUG > 1: mailbox.debug = 4 mailbox.authenticate('XOAUTH2', lambda x: auth_string) else: # non-oauth if timeout_or_pwd[config_name]: mailbox = imaplib.IMAP4_SSL(config['host']) if DEBUG > 1: mailbox.debug = 4 mailbox.login(config['username'], timeout_or_pwd[config_name]) else: logger.debug("%s, Waiting password." % config_name) try: if rx_pwd.get(config_name, ''): pwd = rx_pwd.pop(config_name) mailbox = imaplib.IMAP4_SSL(config['host']) if DEBUG > 1: mailbox.debug = 4 mailbox.login(config['username'], pwd) timeout_or_pwd[config_name] = pwd imap.pwd_queue[1].put('OK for %s' % config_name) else: invalid_pwd = True except (imaplib.IMAP4.abort, imaplib.IMAP4.error): imap.pwd_queue[1].put('%s Error: Wrong password!' % config_name) invalid_pwd = True except Exception as e: imap.pwd_queue[1].put('%s Error: %s!' % (config_name, str(e))) invalid_pwd = True if not invalid_pwd: # check status if config.get('search'): mailbox.select(config['mailbox']) typ, data = mailbox.search(None, config['search']) if typ == 'OK': unseen = len(data[0].split()) logger.info("%s: %d messages match '%s'" % (config_name, unseen, config['search'])) else: typ, data = mailbox.status(config['mailbox'], '(Messages UnSeen)') if typ == 'OK': total, unseen = re.search( 'Messages\s+(\d+)\s+UnSeen\s+(\d+)', decode(data[0]), re.I).groups() unseen = int(unseen) logger.info("%s: %s messages and %s unseen" % (config_name, total, unseen)) # control usblamp if unseen: delay = float(config['delay']) color = eval(config['color']) if delay: imap.usblamp.start_fading(delay, color) else: imap.usblamp.set_color(color) else: imap.usblamp.off() delay = float(config['interval']) * (1 if DEBUG else 60) else: delay = CHECK_QUEUE_INTERVAL if mailbox: mailbox.logout() if not loop: break # do delay and check if stop if imap.stop.wait(delay): break else: # schedule next check task.put(config_name) logger.debug("*** check_unseen thread exited.")
class serversocket: """ A server socket to receive and process string messages from client sockets to a central queue """ def __init__(self, name=None, verbose=False): self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) self.sock.bind(('localhost', 0)) self.sock.listen(10) # queue a max of n connect requests self.verbose = verbose self.name = name self.queue = Queue() if self.verbose: print("Server bound to: " + str(self.sock.getsockname())) def start_accepting(self): """ Start the listener thread """ thread = threading.Thread(target=self._accept, args=()) thread.daemon = True # stops from blocking shutdown if self.name is not None: thread.name = thread.name + "-" + self.name thread.start() def _accept(self): """ Listen for connections and pass handling to a new thread """ while True: (client, address) = self.sock.accept() thread = threading.Thread(target=self._handle_conn, args=(client, address)) thread.daemon = True thread.start() def _handle_conn(self, client, address): """ Receive messages and pass to queue. Messages are prefixed with a 4-byte integer to specify the message length and 1-byte character to indicate the type of serialization applied to the message. Supported serialization formats: 'n' : no serialization 'u' : Unicode string in UTF-8 'd' : dill pickle 'j' : json """ if self.verbose: print("Thread: %s connected to: %s" % (threading.current_thread(), address)) try: while True: msg = self.receive_msg(client, 5) msglen, serialization = struct.unpack('>Lc', msg) if self.verbose: print("Received message, length %d, serialization %r" % (msglen, serialization)) msg = self.receive_msg(client, msglen) if serialization != b'n': try: if serialization == b'd': # dill serialization msg = dill.loads(msg) elif serialization == b'j': # json serialization msg = json.loads(msg.decode('utf-8')) elif serialization == b'u': # utf-8 serialization msg = msg.decode('utf-8') else: print("Unrecognized serialization type: %r" % serialization) continue except (UnicodeDecodeError, ValueError) as e: print("Error de-serializing message: %s \n %s" % (msg, traceback.format_exc(e))) continue self.queue.put(msg) except RuntimeError: if self.verbose: print("Client socket: " + str(address) + " closed") def receive_msg(self, client, msglen): msg = b'' while len(msg) < msglen: chunk = client.recv(msglen - len(msg)) if not chunk: raise RuntimeError("socket connection broken") msg = msg + chunk return msg def close(self): self.sock.close()
def execute_dynamic_multithreaded_task(iterable, thread_checker_func, poll_period, worker_function, output_queue_handler): """ Execute a function for every item in iterable with a dynamic number of threads as defined by the return of thread_checker_func :type iterable: any iterable :type thread_checker_func: function with zero parameters and returns int of # of threads should be running :type poll_period: int :type worker_function: function with at least 1 parameter :type output_queue_handler: function with at least 1 parameter :param iterable: Iterable to pass into worker_function :param thread_checker_func: function that accepts no args and will return int for # of threads we should run :param poll_period: how often (in sec) we will run thread_checker_func :param worker_function: function that will be run multi-threaded and once per item in file_list :param output_queue_handler: consume things that worker_function returns. this will run single threaded, once per execution :rtype : None - output_queue_handler should handle all output functionality """ LOGGER.info("starting dynamic multithreaded execution") # Type checking on all inputs assert isinstance(iterable, collections.Iterable) assert callable(thread_checker_func) assert isinstance(poll_period, six.integer_types) assert callable(worker_function) assert callable(output_queue_handler) LOGGER.info("all assertions passed") # Validate function inputs are good (check to ensure they accept at least one variable # I haven't been able to get this to work reliably between callable classes / functions and class methods, giving up. # if get_num_input_vars(worker_function) != 1: # raise RuntimeError("worker_function must accept one and only one inputs") # # if get_num_input_vars(output_queue_handler) != 1: # raise RuntimeError("output_queue_handler must accept one and only one inputs") # # if get_num_input_vars(thread_checker_func) != 0: # raise RuntimeError("thread_checker_func must accept no inputs") # # LOGGER.info("callables appear to have ok inputs") # prep the thread-wide variables inq = Queue() # queue full of filenames outq = Queue() # queue we will write from deathq = Queue( ) # queue to tell the next thread that's done with execution to die kill_boolean = threading.Event() LOGGER.info("loading up inq") # Load up inq inq.queue.extend(iterable) thread_list = [] # spin up our finisher thread LOGGER.info("starting up finisher thread") fin_thread = threading.Thread(target=finisher, kwargs={ "outq": outq, "output_queue_handler": output_queue_handler, "kill_boolean": kill_boolean }) fin_thread.start() # do all the executions, scaling up/down as needed LOGGER.info("entering infinite loop (until job is done)") # initializing this so we don't die on the first run target_threads = 0 while True: last_run = datetime.datetime.now() if kill_boolean.is_set(): # everything should spin down and die LOGGER.debug("kill_boolean is true, we are going to stop now!") return if not inq.empty(): # get new target for our threads target_threads = thread_checker_func(target_threads) # this could feasibly be done better, right now we are blocking until all deathq items are taken # we could do math and manage the deathq or spin up more threads based on that, which could make our deathq more accurate and less up / down # concern here is that this "control" algorithm get out of whack and vacillate up and down too much # Especially since we effect BDB Load # prob don't need this but doing it just in case thread_list = [t for t in thread_list if t.is_alive()] # spin up threads if need be while len(thread_list) < target_threads: LOGGER.debug("spinning up a new worker thread") base_kwargs = { "inq": inq, "outq": outq, "deathq": deathq, "worker_function": worker_function, "kill_boolean": kill_boolean } t = threading.Thread(target=worker, kwargs=base_kwargs) t.start() thread_list.append(t) # kill any extra threads thread_overage = len(thread_list) - target_threads for i in range(thread_overage): # kill em LOGGER.debug("sending death signal to deathq") deathq.put(Sentinel("DIE")) # wait up to 10 min for deathq to be empty, then start forcibly killing threads # TODO: need to implement forcibly killing while not deathq.empty(): time.sleep(1) # deathq is empty, which means we should have killed off however many threads we needed to # keeping this out of the if statement above in case we get exceptions in our child threads, we can spin up new workers thread_list = [t for t in thread_list if t.is_alive()] LOGGER.debug("Currently have %s threads running", len(thread_list)) # only check for load every [poll_period] seconds while (datetime.datetime.now() - last_run).total_seconds() < poll_period: # Need to check if we're actually done if inq.empty(): # inq is empty, we need to see if we have any threads thread_list = [t for t in thread_list if t.is_alive()] if not thread_list: LOGGER.info( "All worker threads are done, killing finisher thread") outq.put(Sentinel("DIE")) # wait for finisher thread to die while fin_thread.is_alive(): LOGGER.info( "finisher thread is still running, sleeping") time.sleep(1) LOGGER.info("All threads have spun down, returning!") return else: LOGGER.info( "inq is empty, but looks like we still have %s threads running, we will wait until all threads complete", len(thread_list)) time.sleep(1)
def main(): volumes = 900 num_worker_threads = 25 task = Queue() poems = Queue() for i in range(num_worker_threads): t = Thread(target=worker, args=(task, poems)) t.daemon = True t.start() write_thread = Thread(target=write_poems, args=('./data/poems.txt', poems)) write_thread.start() for js in range(1, volumes + 1): task.put(js) task.join() poems.join() poems.put(None) write_thread.join()
class AbstractGridEngineBatchSystem(BatchSystemLocalSupport): """ A partial implementation of BatchSystemSupport for batch systems run on a standard HPC cluster. By default worker cleanup and hot deployment are not implemented. """ class Worker(with_metaclass(ABCMeta, Thread)): def __init__(self, newJobsQueue, updatedJobsQueue, killQueue, killedJobsQueue, boss): """ Abstract worker interface class. All instances are created with five initial arguments (below). Note the Queue instances passed are empty. :param newJobsQueue: a Queue of new (unsubmitted) jobs :param updatedJobsQueue: a Queue of jobs that have been updated :param killQueue: a Queue of active jobs that need to be killed :param killedJobsQueue: Queue of killed jobs for this worker :param boss: the AbstractGridEngineBatchSystem instance that controls this AbstractGridEngineWorker """ Thread.__init__(self) self.newJobsQueue = newJobsQueue self.updatedJobsQueue = updatedJobsQueue self.killQueue = killQueue self.killedJobsQueue = killedJobsQueue self.waitingJobs = list() self.runningJobs = set() self.runningJobsLock = Lock() self.boss = boss self.allocatedCpus = dict() self.batchJobIDs = dict() self._checkOnJobsCache = None self._checkOnJobsTimestamp = None def getBatchSystemID(self, jobID): """ Get batch system-specific job ID Note: for the moment this is the only consistent way to cleanly get the batch system job ID :param: string jobID: toil job ID """ if jobID not in self.batchJobIDs: raise RuntimeError("Unknown jobID, could not be converted") (job, task) = self.batchJobIDs[jobID] if task is None: return str(job) else: return str(job) + "." + str(task) def forgetJob(self, jobID): """ Remove jobID passed :param: string jobID: toil job ID """ with self.runningJobsLock: self.runningJobs.remove(jobID) del self.allocatedCpus[jobID] del self.batchJobIDs[jobID] def createJobs(self, newJob): """ Create a new job with the Toil job ID. Implementation-specific; called by AbstractGridEngineWorker.run() :param string newJob: Toil job ID """ activity = False # Load new job id if present: if newJob is not None: self.waitingJobs.append(newJob) # Launch jobs as necessary: while (len(self.waitingJobs) > 0 and sum(self.allocatedCpus.values()) < int(self.boss.maxCores)): activity = True jobID, cpu, memory, command = self.waitingJobs.pop(0) # prepare job submission command subLine = self.prepareSubmission(cpu, memory, jobID, command) logger.debug("Running %r", subLine) # submit job and get batch system ID batchJobID = self.submitJob(subLine) logger.debug("Submitted job %s", str(batchJobID)) # Store dict for mapping Toil job ID to batch job ID # TODO: Note that this currently stores a tuple of (batch system # ID, Task), but the second value is None by default and doesn't # seem to be used self.batchJobIDs[jobID] = (batchJobID, None) # Add to queue of running jobs with self.runningJobsLock: self.runningJobs.add(jobID) # Add to allocated resources self.allocatedCpus[jobID] = cpu return activity def killJobs(self): """ Kill any running jobs within worker """ killList = list() while True: try: jobId = self.killQueue.get(block=False) except Empty: break else: killList.append(jobId) if not killList: return False # Do the dirty job for jobID in list(killList): if jobID in self.runningJobs: logger.debug('Killing job: %s', jobID) # this call should be implementation-specific, all other # code is redundant w/ other implementations self.killJob(jobID) else: if jobID in self.waitingJobs: self.waitingJobs.remove(jobID) self.killedJobsQueue.put(jobID) killList.remove(jobID) # Wait to confirm the kill while killList: for jobID in list(killList): batchJobID = self.getBatchSystemID(jobID) if self.getJobExitCode(batchJobID) is not None: logger.debug('Adding jobID %s to killedJobsQueue', jobID) self.killedJobsQueue.put(jobID) killList.remove(jobID) self.forgetJob(jobID) if len(killList) > 0: logger.warn("Some jobs weren't killed, trying again in %is.", self.boss.sleepSeconds()) return True def checkOnJobs(self): """Check and update status of all running jobs. Respects statePollingWait and will return cached results if not within time period to talk with the scheduler. """ if (self._checkOnJobsTimestamp and (datetime.now() - self._checkOnJobsTimestamp).total_seconds() < self.boss.config.statePollingWait): return self._checkOnJobsCache activity = False for jobID in list(self.runningJobs): batchJobID = self.getBatchSystemID(jobID) status = self.getJobExitCode(batchJobID) if status is not None: activity = True self.updatedJobsQueue.put((jobID, status)) self.forgetJob(jobID) self._checkOnJobsCache = activity self._checkOnJobsTimestamp = datetime.now() return activity def run(self): """ Run any new jobs """ while True: activity = False newJob = None if not self.newJobsQueue.empty(): activity = True newJob = self.newJobsQueue.get() if newJob is None: logger.debug('Received queue sentinel.') break activity |= self.killJobs() activity |= self.createJobs(newJob) activity |= self.checkOnJobs() if not activity: logger.debug('No activity, sleeping for %is', self.boss.sleepSeconds()) @abstractmethod def prepareSubmission(self, cpu, memory, jobID, command): """ Preparation in putting together a command-line string for submitting to batch system (via submitJob().) :param: string cpu :param: string memory :param: string jobID : Toil job ID :param: string subLine: the command line string to be called :rtype: string """ raise NotImplementedError() @abstractmethod def submitJob(self, subLine): """ Wrapper routine for submitting the actual command-line call, then processing the output to get the batch system job ID :param: string subLine: the literal command line string to be called :rtype: string: batch system job ID, which will be stored internally """ raise NotImplementedError() @abstractmethod def getRunningJobIDs(self): """ Get a list of running job IDs. Implementation-specific; called by boss AbstractGridEngineBatchSystem implementation via AbstractGridEngineBatchSystem.getRunningBatchJobIDs() :rtype: list """ raise NotImplementedError() @abstractmethod def killJob(self, jobID): """ Kill specific job with the Toil job ID. Implementation-specific; called by AbstractGridEngineWorker.killJobs() :param string jobID: Toil job ID """ raise NotImplementedError() @abstractmethod def getJobExitCode(self, batchJobID): """ Returns job exit code. Implementation-specific; called by AbstractGridEngineWorker.checkOnJobs() :param string batchjobID: batch system job ID """ raise NotImplementedError() def __init__(self, config, maxCores, maxMemory, maxDisk): super(AbstractGridEngineBatchSystem, self).__init__(config, maxCores, maxMemory, maxDisk) self.currentJobs = set() # NOTE: this may be batch system dependent, maybe move into the worker? # Doing so would effectively make each subclass of AbstractGridEngineBatchSystem # much smaller self.maxCPU, self.maxMEM = self.obtainSystemConstants() self.newJobsQueue = Queue() self.updatedJobsQueue = Queue() self.killQueue = Queue() self.killedJobsQueue = Queue() # get the associated worker class here self.worker = self.Worker(self.newJobsQueue, self.updatedJobsQueue, self.killQueue, self.killedJobsQueue, self) self.worker.start() self._getRunningBatchJobIDsTimestamp = None self._getRunningBatchJobIDsCache = {} @classmethod def supportsWorkerCleanup(cls): return False @classmethod def supportsHotDeployment(cls): return False def issueBatchJob(self, jobNode): # Avoid submitting internal jobs to the batch queue, handle locally localID = self.handleLocalJob(jobNode) if localID: return localID else: self.checkResourceRequest(jobNode.memory, jobNode.cores, jobNode.disk) jobID = self.getNextJobID() self.currentJobs.add(jobID) self.newJobsQueue.put((jobID, jobNode.cores, jobNode.memory, jobNode.command)) logger.debug("Issued the job command: %s with job id: %s ", jobNode.command, str(jobID)) return jobID def killBatchJobs(self, jobIDs): """ Kills the given jobs, represented as Job ids, then checks they are dead by checking they are not in the list of issued jobs. """ self.killLocalJobs(jobIDs) jobIDs = set(jobIDs) logger.debug('Jobs to be killed: %r', jobIDs) for jobID in jobIDs: self.killQueue.put(jobID) while jobIDs: killedJobId = self.killedJobsQueue.get() if killedJobId is None: break jobIDs.remove(killedJobId) if killedJobId in self.currentJobs: self.currentJobs.remove(killedJobId) if jobIDs: logger.debug('Some kills (%s) still pending, sleeping %is', len(jobIDs), self.sleepSeconds()) def getIssuedBatchJobIDs(self): """ Gets the list of issued jobs """ return list(self.getIssuedLocalJobIDs()) + list(self.currentJobs) def getRunningBatchJobIDs(self): """Retrieve running job IDs from local and batch scheduler. Respects statePollingWait and will return cached results if not within time period to talk with the scheduler. """ if (self._getRunningBatchJobIDsTimestamp and (datetime.now() - self._getRunningBatchJobIDsTimestamp).total_seconds() < self.config.statePollingWait): batchIds = self._getRunningBatchJobIDsCache else: batchIds = self.worker.getRunningJobIDs() self._getRunningBatchJobIDsCache = batchIds self._getRunningBatchJobIDsTimestamp = datetime.now() batchIds.update(self.getRunningLocalJobIDs()) return batchIds def getUpdatedBatchJob(self, maxWait): local_tuple = self.getUpdatedLocalJob(0) if local_tuple: return local_tuple else: try: item = self.updatedJobsQueue.get(timeout=maxWait) except Empty: return None logger.debug('UpdatedJobsQueue Item: %s', item) jobID, retcode = item self.currentJobs.remove(jobID) return jobID, retcode, None def shutdown(self): """ Signals worker to shutdown (via sentinel) then cleanly joins the thread """ self.shutdownLocal() newJobsQueue = self.newJobsQueue self.newJobsQueue = None newJobsQueue.put(None) self.worker.join() def setEnv(self, name, value=None): if value and ',' in value: raise ValueError(type(self).__name__ + " does not support commata in environment variable values") return super(AbstractGridEngineBatchSystem,self).setEnv(name, value) @classmethod def getWaitDuration(self): return 5 def sleepSeconds(self, sleeptime=1): """ Helper function to drop on all state-querying functions to avoid over-querying. """ time.sleep(sleeptime) return sleeptime @abstractclassmethod def obtainSystemConstants(cls): """ Returns the max. memory and max. CPU for the system """ raise NotImplementedError()
class SafeQueue(object): """ Many writers Single Reader multiprocessing safe Queue """ __thread_pool = SingletonThreadPool() def __init__(self, *args, **kwargs): self._reader_thread = None self._reader_thread_started = False self._q = SimpleQueue(*args, **kwargs) # Fix the simple queue write so it uses a single OS write, making it atomic message passing # noinspection PyBroadException try: # noinspection PyUnresolvedReferences,PyProtectedMember self._q._writer._send_bytes = partial(SafeQueue._pipe_override_send_bytes, self._q._writer) except Exception: pass self._internal_q = None self._q_size = 0 def empty(self): return self._q.empty() and (not self._internal_q or self._internal_q.empty()) def is_pending(self): # check if we have pending requests to be pushed (it does not mean they were pulled) # only call from main put process return self._q_size > 0 def close(self, event, timeout=100.0): # wait until all pending requests pushed tic = time() while self.is_pending(): if event: event.set() if not self.__thread_pool.is_active(): break sleep(0.1) if timeout and (time()-tic) > timeout: break def get(self, *args, **kwargs): return self._get_internal_queue(*args, **kwargs) def batch_get(self, max_items=1000, timeout=0.2, throttle_sleep=0.1): buffer = [] timeout_count = int(timeout/throttle_sleep) empty_count = timeout_count while len(buffer) < max_items: while not self.empty() and len(buffer) < max_items: try: buffer.append(self._get_internal_queue(block=False)) empty_count = 0 except Empty: break empty_count += 1 if empty_count > timeout_count or len(buffer) >= max_items: break sleep(throttle_sleep) return buffer def put(self, obj): # GIL will make sure it is atomic self._q_size += 1 # make sure the block put is done in the thread pool i.e. in the background obj = pickle.dumps(obj) self.__thread_pool.get().apply_async(self._q_put, args=(obj, )) def _q_put(self, obj): self._q.put(obj) # GIL will make sure it is atomic self._q_size -= 1 def _init_reader_thread(self): if not self._internal_q: self._internal_q = TrQueue() if not self._reader_thread or not self._reader_thread.is_alive(): # read before we start the thread self._reader_thread = Thread(target=self._reader_daemon) self._reader_thread.daemon = True self._reader_thread.start() # if we have waiting results # wait until thread is up and pushed some results while not self._reader_thread_started: sleep(0.2) # just in case make sure we pulled some stuff if we had any # todo: wait until a queue is not empty, but for some reason that might fail sleep(1.0) def _get_internal_queue(self, *args, **kwargs): self._init_reader_thread() obj = self._internal_q.get(*args, **kwargs) # deserialize return pickle.loads(obj) def _reader_daemon(self): self._reader_thread_started = True # pull from process queue and push into thread queue while True: # noinspection PyBroadException try: obj = self._q.get() if obj is None: break except Exception: break self._internal_q.put(obj) @staticmethod def _pipe_override_send_bytes(self, buf): n = len(buf) # For wire compatibility with 3.2 and lower header = struct.pack("!i", n) # Issue #20540: concatenate before sending, to avoid delays due # to Nagle's algorithm on a TCP socket. # Also note we want to avoid sending a 0-length buffer separately, # to avoid "broken pipe" errors if the other end closed the pipe. self._send(header + buf)
class SocketServer(object): """ZMQ-based socket server for sending and receiving messages from the host PC. Because of the weird way in which PyEPL handles events, we can't run this as its own thread, but instead have to poll for events in the general PyEPL machinery. In the future, we should clean up PyEPL entirely so that it does not block other threads (amongst other reasons). :param zmq.Context ctx: """ def __init__(self, ctx=None): self.ctx = ctx or zmq.Context() self._handlers = [] self.sock = self.ctx.socket(zmq.PAIR) self._bound = False self.poller = zmq.Poller() self.poller.register(self.sock, zmq.POLLIN) # Outgoing message queue self._out_queue = Queue() # time of last sent heartbeat message self._last_heartbeat = 0. # Logging of sent and received messages. self.logger = create_logger("network") def join(self): """Block until all outgoing messages have been processed.""" self.logger.warning("Joining doesn't work yet; doing nothing...") # self._out_queue.join() def bind(self, address="tcp://*:8889"): """Bind the socket to start listening for connections. :param str address: ZMQ address string """ self.sock.bind(address) self._bound = True def register_handler(self, func): """Register a message handler. :param callable func: Handler function which takes the message as its only argument. """ self.logger.debug("Adding handler: %s", func.__name__) self._handlers.append(func) def enqueue_message(self, msg): """Submit a new outgoing message to the queue.""" self._out_queue.put_nowait(msg) def send(self, msg): """Immediately transmit a message to the host PC. It is advisable to not call this method directly in most cases, but rather enqueue a message to be sent via :meth:`enqueue_message`. :param RAMMessage msg: Message to send. """ out = msg.jsonize() try: self.logger.debug("Sending message: %s", out) try: self.sock.send(out, zmq.NOBLOCK) except: pass except Exception: self.logger.error("Sending failed!", exc_info=True) def send_heartbeat(self): """Convenience method to send a heartbeat message to the host PC.""" if time.time() - self._last_heartbeat >= 1.0: self.send(HeartbeatMessage()) self._last_heartbeat = time.time() def log_message(self, message, incoming=True): """Log a message to the log file.""" if not incoming: message = message.to_dict() message["in_or_out"] = "in" if incoming else "out" self.logger.info("%s", json.dumps(message)) def handle_incoming(self): events = self.poller.poll(1) if self.sock in dict(events): try: msg = self.sock.recv_json() self.log_message(msg, incoming=True) except: self.logger.error("Unable to decode JSON.", exc_info=True) return for handler in self._handlers: try: handler(msg) except: self.logger.error("Error handling message", exc_info=True) continue def handle_outgoing(self): try: while not self._out_queue.empty(): msg = self._out_queue.get_nowait() self.send(msg) self._out_queue.task_done( ) # so we can join the queue elsewhere self.log_message(msg, incoming=False) except: self.logger.error("Error in outgoing message processing", exc_info=True) def update(self): """Call periodically to check for incoming messages and/or send messages in the outgoing queue. """ self.handle_incoming() self.handle_outgoing()
class StenoEngine(object): HOOKS = ''' stroked translated machine_state_changed output_changed config_changed dictionaries_loaded send_string send_backspaces send_key_combination add_translation focus configure lookup quit '''.split() def __init__(self, config, keyboard_emulation): self._config = config self._is_running = False self._queue = Queue() self._lock = threading.RLock() self._machine = None self._machine_state = None self._machine_params = MachineParams(None, None, None) self._formatter = Formatter() self._formatter.set_output(self) self._formatter.add_listener(self._on_translated) self._translator = Translator() self._translator.add_listener(log.translation) self._translator.add_listener(self._formatter.format) self._dictionaries = self._translator.get_dictionary() self._dictionaries_manager = DictionaryLoadingManager() self._running_state = self._translator.get_state() self._keyboard_emulation = keyboard_emulation self._hooks = { hook: [] for hook in self.HOOKS } self._running_extensions = {} def __enter__(self): self._lock.__enter__() return self def __exit__(self, exc_type, exc_value, traceback): self._lock.__exit__(exc_type, exc_value, traceback) def _in_engine_thread(self): raise NotImplementedError() def _same_thread_hook(self, func, *args, **kwargs): if self._in_engine_thread(): func(*args, **kwargs) else: self._queue.put((func, args, kwargs)) def run(self): while True: func, args, kwargs = self._queue.get() try: with self._lock: if func(*args, **kwargs): break except Exception: log.error('engine %s failed', func.__name__[1:], exc_info=True) def _stop(self): self._stop_extensions(self._running_extensions.keys()) if self._machine is not None: self._machine.stop_capture() self._machine = None def _start(self): self._set_output(self._config.get_auto_start()) self._update(full=True) def _set_dictionaries(self, dictionaries): def dictionaries_changed(l1, l2): if len(l1) != len(l2): return True for d1, d2 in zip(l1, l2): if d1 is not d2: return True return False if not dictionaries_changed(dictionaries, self._dictionaries.dicts): # No change. return self._dictionaries = StenoDictionaryCollection(dictionaries) self._translator.set_dictionary(self._dictionaries) self._trigger_hook('dictionaries_loaded', self._dictionaries) def _update(self, config_update=None, full=False, reset_machine=False): original_config = self._config.as_dict() # Update configuration. if config_update is not None: self._config.update(**config_update) config = self._config.as_dict() else: config = original_config # Create configuration update. if full: config_update = config else: config_update = { option: value for option, value in config.items() if value != original_config[option] } if 'machine_type' in config_update: for opt in ( 'machine_specific_options', 'system_keymap', ): config_update[opt] = config[opt] # Update logging. log.set_stroke_filename(config['log_file_name']) log.enable_stroke_logging(config['enable_stroke_logging']) log.enable_translation_logging(config['enable_translation_logging']) # Update output. self._formatter.set_space_placement(config['space_placement']) self._formatter.start_attached = config['start_attached'] self._formatter.start_capitalized = config['start_capitalized'] self._translator.set_min_undo_length(config['undo_levels']) # Update system. system_name = config['system_name'] if system.NAME != system_name: log.info('loading system: %s', system_name) system.setup(system_name) # Update machine. update_keymap = False start_machine = False machine_params = MachineParams(config['machine_type'], config['machine_specific_options'], config['system_keymap']) # Do not reset if only the keymap changed. if self._machine_params is None or \ self._machine_params.type != machine_params.type or \ self._machine_params.options != machine_params.options: reset_machine = True if reset_machine: if self._machine is not None: self._machine.stop_capture() self._machine = None machine_type = config['machine_type'] machine_options = config['machine_specific_options'] try: machine_class = registry.get_plugin('machine', machine_type).obj except Exception as e: raise InvalidConfigurationError(str(e)) log.info('setting machine: %s', machine_type) self._machine = machine_class(machine_options) self._machine.set_suppression(self._is_running) self._machine.add_state_callback(self._machine_state_callback) self._machine.add_stroke_callback(self._machine_stroke_callback) self._machine_params = machine_params update_keymap = True start_machine = True elif self._machine is not None: update_keymap = 'system_keymap' in config_update if update_keymap: machine_keymap = config['system_keymap'] if machine_keymap is not None: self._machine.set_keymap(machine_keymap) if start_machine: self._machine.start_capture() # Update running extensions. enabled_extensions = config['enabled_extensions'] running_extensions = set(self._running_extensions) self._stop_extensions(running_extensions - enabled_extensions) self._start_extensions(enabled_extensions - running_extensions) # Trigger `config_changed` hook. if config_update: self._trigger_hook('config_changed', config_update) # Update dictionaries. config_dictionaries = OrderedDict( (d.path, d) for d in config['dictionaries'] ) copy_default_dictionaries(config_dictionaries.keys()) # Start by unloading outdated dictionaries. self._dictionaries_manager.unload_outdated() self._set_dictionaries([ d for d in self._dictionaries.dicts if d.path in config_dictionaries and \ d.path in self._dictionaries_manager ]) # And then (re)load all dictionaries. dictionaries = [] for result in self._dictionaries_manager.load(config_dictionaries.keys()): if isinstance(result, DictionaryLoaderException): d = ErroredDictionary(result.path, result.exception) # Only show an error if it's new. if d != self._dictionaries.get(result.path): log.error('loading dictionary `%s` failed: %s', shorten_path(result.path), str(result.exception)) else: d = result d.enabled = config_dictionaries[d.path].enabled dictionaries.append(d) self._set_dictionaries(dictionaries) def _start_extensions(self, extension_list): for extension_name in extension_list: log.info('starting `%s` extension', extension_name) try: extension = registry.get_plugin('extension', extension_name).obj(self) extension.start() except Exception: log.error('initializing extension `%s` failed', extension_name, exc_info=True) else: self._running_extensions[extension_name] = extension def _stop_extensions(self, extension_list): for extension_name in list(extension_list): log.info('stopping `%s` extension', extension_name) extension = self._running_extensions.pop(extension_name) extension.stop() del extension def _quit(self): self._stop() return True def _toggle_output(self): self._set_output(not self._is_running) def _set_output(self, enabled): if enabled == self._is_running: return self._is_running = enabled if enabled: self._translator.set_state(self._running_state) else: self._translator.clear_state() if self._machine is not None: self._machine.set_suppression(enabled) self._trigger_hook('output_changed', enabled) def _machine_state_callback(self, machine_state): self._same_thread_hook(self._on_machine_state_changed, machine_state) def _machine_stroke_callback(self, steno_keys): self._same_thread_hook(self._on_stroked, steno_keys) @with_lock def _on_machine_state_changed(self, machine_state): assert machine_state is not None self._machine_state = machine_state machine_type = self._config.get_machine_type() self._trigger_hook('machine_state_changed', machine_type, machine_state) def _consume_engine_command(self, command): # The first commands can be used whether plover has output enabled or not. if command == 'RESUME': self._set_output(True) return True elif command == 'TOGGLE': self._toggle_output() return True elif command == 'QUIT': self._trigger_hook('quit') return True if not self._is_running: return False # These commands can only be run when plover has output enabled. if command == 'SUSPEND': self._set_output(False) elif command == 'CONFIGURE': self._trigger_hook('configure') elif command == 'FOCUS': self._trigger_hook('focus') elif command == 'ADD_TRANSLATION': self._trigger_hook('add_translation') elif command == 'LOOKUP': self._trigger_hook('lookup') else: command_args = command.split(':', 1) command_fn = registry.get_plugin('command', command_args[0]).obj command_fn(self, command_args[1] if len(command_args) == 2 else '') return False def _on_stroked(self, steno_keys): stroke = Stroke(steno_keys) log.stroke(stroke) self._translator.translate(stroke) self._trigger_hook('stroked', stroke) def _on_translated(self, old, new): if not self._is_running: return self._trigger_hook('translated', old, new) def send_backspaces(self, b): if not self._is_running: return self._keyboard_emulation.send_backspaces(b) self._trigger_hook('send_backspaces', b) def send_string(self, s): if not self._is_running: return self._keyboard_emulation.send_string(s) self._trigger_hook('send_string', s) def send_key_combination(self, c): if not self._is_running: return self._keyboard_emulation.send_key_combination(c) self._trigger_hook('send_key_combination', c) def send_engine_command(self, command): suppress = not self._is_running suppress &= self._consume_engine_command(command) if suppress: self._machine.suppress_last_stroke(self._keyboard_emulation.send_backspaces) def toggle_output(self): self._same_thread_hook(self._toggle_output) def set_output(self, enabled): self._same_thread_hook(self._set_output, enabled) @property @with_lock def machine_state(self): return self._machine_state @property @with_lock def output(self): return self._is_running @output.setter def output(self, enabled): self._same_thread_hook(self._set_output, enabled) @property @with_lock def config(self): return self._config.as_dict() @config.setter def config(self, update): self._same_thread_hook(self._update, config_update=update) def reset_machine(self): self._same_thread_hook(self._update, reset_machine=True) def load_config(self): try: with open(self._config.target_file, 'rb') as f: self._config.load(f) except Exception: log.error('loading configuration failed, reseting to default', exc_info=True) self._config.clear() return False return True def start(self): self._same_thread_hook(self._start) def quit(self): self._same_thread_hook(self._quit) @with_lock def machine_specific_options(self, machine_type): return self._config.get_machine_specific_options(machine_type) @with_lock def system_keymap(self, machine_type, system_name): return self._config.get_system_keymap(machine_type, system_name) @with_lock def lookup(self, translation): return self._dictionaries.lookup(translation) @with_lock def raw_lookup(self, translation): return self._dictionaries.raw_lookup(translation) @with_lock def reverse_lookup(self, translation): matches = self._dictionaries.reverse_lookup(translation) return [] if matches is None else matches @with_lock def casereverse_lookup(self, translation): matches = self._dictionaries.casereverse_lookup(translation) return set() if matches is None else matches @with_lock def add_dictionary_filter(self, dictionary_filter): self._dictionaries.add_filter(dictionary_filter) @with_lock def remove_dictionary_filter(self, dictionary_filter): self._dictionaries.remove_filter(dictionary_filter) @with_lock def get_suggestions(self, translation): return Suggestions(self._dictionaries).find(translation) @property @with_lock def translator_state(self): return self._translator.get_state() @translator_state.setter @with_lock def translator_state(self, state): self._translator.set_state(state) @with_lock def clear_translator_state(self, undo=False): if undo: state = self._translator.get_state() self._formatter.format(state.translations, (), None) self._translator.clear_state() @property @with_lock def starting_stroke_state(self): return StartingStrokeState(self._formatter.start_attached, self._formatter.start_capitalized) @starting_stroke_state.setter @with_lock def starting_stroke_state(self, state): self._formatter.start_attached = state.attach self._formatter.start_capitalized = state.capitalize @with_lock def add_translation(self, strokes, translation, dictionary_path=None): if dictionary_path is None: dictionary_path = self._dictionaries.first_writable().path self._dictionaries.set(strokes, translation, path=dictionary_path) self._dictionaries.save(path_list=(dictionary_path,)) @property @with_lock def dictionaries(self): return self._dictionaries # Hooks. def _trigger_hook(self, hook, *args, **kwargs): for callback in self._hooks[hook]: try: callback(*args, **kwargs) except Exception: log.error('hook %r callback %r failed', hook, callback, exc_info=True) @with_lock def hook_connect(self, hook, callback): self._hooks[hook].append(callback) @with_lock def hook_disconnect(self, hook, callback): self._hooks[hook].remove(callback)
def __init__(self, url, subscription_id, callback, paused): self.url = url self.subscription_id = subscription_id self.callback = callback self.paused = paused self.event_q = Queue()
def __init__(self, executor): self.executor = executor self.TASK_CONFIG_INTERFACE = executor.TASK_CONFIG_INTERFACE self.queue = Queue()
class GraphiteReporter(threading.Thread): """A graphite reporter thread.""" def __init__(self, host, port, maxQueueSize=10000): """Connect to a Graphite server on host:port.""" threading.Thread.__init__(self) self.host, self.port = host, port self.sock = None self.queue = Queue() self.maxQueueSize = maxQueueSize self.daemon = True def run(self): """Run the thread.""" while True: try: try: name, value, valueType, stamp = self.queue.get() except TypeError: break self.log(name, value, valueType, stamp) finally: self.queue.task_done() def connect(self): """Connects to the Graphite server if not already connected.""" if self.sock is not None: return backoff = 0.01 while True: try: sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.settimeout(5) sock.connect((self.host, self.port)) self.sock = sock return except socket.error: time.sleep(random.uniform(0, 2.0 * backoff)) backoff = min(backoff * 2.0, 5.0) def disconnect(self): """Disconnect from the Graphite server if connected.""" if self.sock is not None: try: self.sock.close() except socket.error: pass finally: self.sock = None def _sendMsg(self, msg): """Send a line to graphite. Retry with exponential backoff.""" if not self.sock: self.connect() if not isinstance(msg, binary_type): msg = msg.encode("UTF-8") backoff = 0.001 while True: try: self.sock.sendall(msg) break except socket.error: log.warning('Graphite connection error', exc_info=True) self.disconnect() time.sleep(random.uniform(0, 2.0 * backoff)) backoff = min(backoff * 2.0, 5.0) self.connect() def _sanitizeName(self, name): """Sanitize a metric name.""" return name.replace(' ', '-') def log(self, name, value, valueType=None, stamp=None): """Log a named numeric value. The value type may be 'value', 'count', or None.""" if type(value) == float: form = "%s%s %2.2f %d\n" else: form = "%s%s %s %d\n" if valueType is not None and len( valueType) > 0 and valueType[0] != '.': valueType = '.' + valueType if not stamp: stamp = time.time() self._sendMsg( form % (self._sanitizeName(name), valueType or '', value, stamp)) def enqueue(self, name, value, valueType=None, stamp=None): """Enqueue a call to log.""" # If queue is too large, refuse to log. if self.maxQueueSize and self.queue.qsize() > self.maxQueueSize: return # Stick arguments into the queue self.queue.put((name, value, valueType, stamp)) def flush(self): """Block until all stats have been sent to Graphite.""" self.queue.join() def shutdown(self): """Shut down the background thread.""" self.queue.put(None) self.flush()
def read_images_in_memory(job_model, dataset, node, trainer): """ Reads all images into memory and applies augmentation if enabled """ concurrent = psutil.cpu_count() dataset_config = dataset['config'] controller = {'running': True} q = Queue(concurrent) result = {'X_train': [], 'Y_train': [], 'X_test': [], 'Y_test': []} images = [] max = 0 path = job_model.get_dataset_downloads_dir(dataset) if 'path' in dataset['config']: path = dataset['config']['path'] classes_count = 0 category_map = {} classes = [] trainer.set_status('LOAD IMAGES INTO MEMORY') try: for i in range(concurrent): t = ImageReadWorker(q, job_model, node, path, images, controller) t.daemon = True t.start() for validation_or_training in ['validation', 'training']: if os.path.isdir(path + '/' + validation_or_training): for category_name in os.listdir(path + '/' + validation_or_training): if os.path.isdir(path + '/' + validation_or_training + '/' + category_name): if category_name not in category_map: category_map[category_name] = classes_count if 'classes' in dataset_config and 'category_' in category_name: category_idx = int( category_name.replace('category_', '')) category_map[category_name] = category_idx target_category = dataset_config['classes'][ category_idx] classes.append(target_category['title'] or 'Class %s' % (category_idx, )) else: classes.append(category_name) classes_count += 1 for id in os.listdir(path + '/' + validation_or_training + '/' + category_name): file_path = os.path.join(path, validation_or_training, category_name, id) q.put([ file_path, validation_or_training == 'validation', category_name ]) max += 1 q.join() controller['running'] = False train_images = [] test_images = [] for v in images: image, validation, category_dir = v if validation is True: test_images.append([image, category_map[category_dir]]) else: train_images.append([image, category_map[category_dir]]) train_datagen = None augmentation = bool(get_option(dataset_config, 'augmentation', False)) if augmentation: train_datagen = get_image_data_augmentor_from_dataset(dataset) train = InMemoryDataGenerator(train_datagen, train_images, classes_count, job_model.job['config']['batchSize']) test = InMemoryDataGenerator(None, test_images, classes_count, job_model.job['config']['batchSize']) nb_sample = len(train_images) trainer.set_info('Dataset size', { 'training': nb_sample, 'validation': len(test_images) }) trainer.set_generator_training_nb(nb_sample) trainer.set_generator_validation_nb(len(test_images)) trainer.logger.info(( "Found %d classes, %d images (%d in training [%saugmented], %d in validation). Read all images into memory from %s" % (classes_count, max, len(train_images), 'not ' if augmentation is False else '', len(test_images), path))) if classes_count == 0: trainer.logger.warning( "Could not find any classes. Does the directory contains images?" ) sys.exit(1) trainer.output_size = classes_count trainer.set_info('classes', classes) trainer.classes = classes result['X_train'] = train result['Y_train'] = train result['X_test'] = test result['Y_test'] = test return result except KeyboardInterrupt: controller['running'] = False sys.exit(1)
class ServiceManager( object ): """ Manages the scheduling of services. """ def __init__(self, jobStore, toilState): logger.debug("Initializing service manager") self.jobStore = jobStore self.toilState = toilState self.jobGraphsWithServicesBeingStarted = set() self._terminate = Event() # This is used to terminate the thread associated # with the service manager self._jobGraphsWithServicesToStart = Queue() # This is the input queue of # jobGraphs that have services that need to be started self._jobGraphsWithServicesThatHaveStarted = Queue() # This is the output queue # of jobGraphs that have services that are already started self._serviceJobGraphsToStart = Queue() # This is the queue of services for the # batch system to start self.jobsIssuedToServiceManager = 0 # The number of jobs the service manager # is scheduling # Start a thread that starts the services of jobGraphs in the # jobsWithServicesToStart input queue and puts the jobGraphs whose services # are running on the jobGraphssWithServicesThatHaveStarted output queue self._serviceStarter = Thread(target=self._startServices, args=(self._jobGraphsWithServicesToStart, self._jobGraphsWithServicesThatHaveStarted, self._serviceJobGraphsToStart, self._terminate, self.jobStore)) def start(self): """ Start the service scheduling thread. """ self._serviceStarter.start() def scheduleServices(self, jobGraph): """ Schedule the services of a job asynchronously. When the job's services are running the jobGraph for the job will be returned by toil.leader.ServiceManager.getJobGraphsWhoseServicesAreRunning. :param toil.jobGraph.JobGraph jobGraph: wrapper of job with services to schedule. """ # Add jobGraph to set being processed by the service manager self.jobGraphsWithServicesBeingStarted.add(jobGraph) # Add number of jobs managed by ServiceManager self.jobsIssuedToServiceManager += sum(map(len, jobGraph.services)) + 1 # The plus one accounts for the root job # Asynchronously schedule the services self._jobGraphsWithServicesToStart.put(jobGraph) def getJobGraphWhoseServicesAreRunning(self, maxWait): """ :param float maxWait: Time in seconds to wait to get a jobGraph before returning :return: a jobGraph added to scheduleServices whose services are running, or None if no such job is available. :rtype: JobGraph """ try: jobGraph = self._jobGraphsWithServicesThatHaveStarted.get(timeout=maxWait) self.jobGraphsWithServicesBeingStarted.remove(jobGraph) assert self.jobsIssuedToServiceManager >= 0 self.jobsIssuedToServiceManager -= 1 return jobGraph except Empty: return None def getServiceJobsToStart(self, maxWait): """ :param float maxWait: Time in seconds to wait to get a job before returning. :return: a tuple of (serviceJobStoreID, memory, cores, disk, ..) representing a service job to start. :rtype: toil.job.ServiceJobNode """ try: serviceJob = self._serviceJobGraphsToStart.get(timeout=maxWait) assert self.jobsIssuedToServiceManager >= 0 self.jobsIssuedToServiceManager -= 1 return serviceJob except Empty: return None def killServices(self, services, error=False): """ :param dict services: Maps service jobStoreIDs to the communication flags for the service """ for serviceJobStoreID in services: serviceJob = services[serviceJobStoreID] if error: self.jobStore.deleteFile(serviceJob.errorJobStoreID) self.jobStore.deleteFile(serviceJob.terminateJobStoreID) def isActive(self, serviceJobNode): """ Returns true if the service job has not been told to terminate. :rtype: boolean """ return self.jobStore.fileExists(serviceJobNode.terminateJobStoreID) def isRunning(self, serviceJobNode): """ Returns true if the service job has started and is active :rtype: boolean """ return (not self.jobStore.fileExists(serviceJobNode.startJobStoreID)) and self.isActive(serviceJobNode) def check(self): """ Check on the service manager thread. :raise RuntimeError: If the underlying thread has quit. """ if not self._serviceStarter.is_alive(): raise RuntimeError("Service manager has quit") def shutdown(self): """ Cleanly terminate worker threads starting and killing services. Will block until all services are started and blocked. """ logger.debug('Waiting for service manager thread to finish ...') startTime = time.time() self._terminate.set() self._serviceStarter.join() # Kill any services still running to avoid deadlock for services in list(self.toilState.servicesIssued.values()): self.killServices(services, error=True) logger.debug('... finished shutting down the service manager. Took %s seconds', time.time() - startTime) @staticmethod def _startServices(jobGraphsWithServicesToStart, jobGraphsWithServicesThatHaveStarted, serviceJobsToStart, terminate, jobStore): """ Thread used to schedule services. """ servicesThatAreStarting = set() servicesRemainingToStartForJob = {} serviceToJobGraph = {} while True: with throttle(1.0): if terminate.is_set(): logger.debug('Received signal to quit starting services.') break try: jobGraph = jobGraphsWithServicesToStart.get_nowait() if len(jobGraph.services) > 1: # Have to fall back to the old blocking behavior to # ensure entire service "groups" are issued as a whole. blockUntilServiceGroupIsStarted(jobGraph, jobGraphsWithServicesThatHaveStarted, serviceJobsToStart, terminate, jobStore) continue # Found a new job that needs to schedule its services. for serviceJob in jobGraph.services[0]: serviceToJobGraph[serviceJob] = jobGraph servicesRemainingToStartForJob[jobGraph] = len(jobGraph.services[0]) # Issue the service jobs all at once. for serviceJob in jobGraph.services[0]: logger.debug("Service manager is starting service job: %s, start ID: %s", serviceJob, serviceJob.startJobStoreID) serviceJobsToStart.put(serviceJob) # We should now start to monitor these services to see if # they've started yet. servicesThatAreStarting.update(jobGraph.services[0]) except Empty: # No new jobs that need services scheduled. pass for serviceJob in list(servicesThatAreStarting): if not jobStore.fileExists(serviceJob.startJobStoreID): # Service has started! servicesThatAreStarting.remove(serviceJob) parentJob = serviceToJobGraph[serviceJob] servicesRemainingToStartForJob[parentJob] -= 1 assert servicesRemainingToStartForJob[parentJob] >= 0 del serviceToJobGraph[serviceJob] # Find if any jobGraphs have had *all* their services started. jobGraphsToRemove = set() for jobGraph, remainingServices in servicesRemainingToStartForJob.items(): if remainingServices == 0: jobGraphsWithServicesThatHaveStarted.put(jobGraph) jobGraphsToRemove.add(jobGraph) for jobGraph in jobGraphsToRemove: del servicesRemainingToStartForJob[jobGraph]