Пример #1
0
def handler(event, contest):
    logger.info("Start!")

    executor = ThreadPoolExecutor(max_workers=1000)
    main_loop = asyncio.new_event_loop()
    main_loop.set_default_executor(executor)
    asyncio.set_event_loop(main_loop)

    poll = Poll(main_loop)
    cal = poll.cal
    update_and_delete = UpdateAndDelete(main_loop, executor)

    table = event['table']
    queue_url = event['queueUrl']
    message_count = event['messageCount']

    poll.messages(sqs, queue_url, message_count)

    logger.info("Receive API count: {}".format(poll.fetch_count))
    logger.info("Fetched messages: {}".format(poll.message_count))

    update_and_delete.execute(sqs_client, db, queue_url, table, cal.stats)

    logger.info("Update API count: {}".format(update_and_delete.update_count))
    logger.info("Delete API count: {}".format(update_and_delete.delete_count))
    logger.info("Delete Message count: {}".format(
        update_and_delete.deleted_message_count))

    main_loop.close()
    executor.shutdown()

    return "Lambda job finished successfully."
Пример #2
0
class TaskPool(BasePool):
    """Thread Task Pool."""

    body_can_be_buffer = True
    signal_safe = False

    def __init__(self, *args, **kwargs):
        super(TaskPool, self).__init__(*args, **kwargs)

        # from 3.5, it is calculated from number of CPUs
        if (3, 0) <= sys.version_info < (3, 5) and self.limit is None:
            self.limit = 5

        self.executor = ThreadPoolExecutor(max_workers=self.limit)

    def on_stop(self):
        self.executor.shutdown()
        super(TaskPool, self).on_stop()

    def on_apply(self, target, args=None, kwargs=None, callback=None,
                 accept_callback=None, **_):
        f = self.executor.submit(apply_target, target, args, kwargs,
                                 callback, accept_callback)
        return ApplyResult(f)

    def _get_info(self):
        return {
            'max-concurrency': self.limit,
            'threads': len(self.executor._threads)
            # TODO use a public api to retrieve the current number of threads
            # in the executor when available. (Currently not available).
        }
Пример #3
0
class ModulePool:

    def __init__(self, workers=1):
        self._jobs = {}
        self._pool = ThreadPoolExecutor(workers)

    def _launch(self, func, hook, dependencies):
        if dependencies:
            for dependency in dependencies:
                while True:
                    if dependency in self._jobs.keys():
                        self._jobs[dependency].result()
                        break
                    else:
                        sleep(0.1)
        func(hook)

    def schedule(self, func, hook):
        innerfunc, args, module, hookname, dependencies = hook
        job = self._pool.submit(self._launch, func, hook, dependencies)
        self._jobs[module] = job

    def shutdown(self):
        for job in as_completed([self._jobs[j] for j in self._jobs]):
            job.result()
        self._pool.shutdown()
Пример #4
0
def main():
    """Main method executed when run"""

    if len(argv) != 2:
        print("Usage: flymirror.py [rules_file]")
        return

    if not file_exists(argv[1]):
        print("Error: rules file", argv[1], "does not exist.")
        return

    config = read_config(argv[1])
    URLS.put(config.start)

    # Start the loops in another thread
    # include 2 extra threads for the loopers
    pool = ThreadPoolExecutor(int(config.workers) + 2)
    perfprint("[START]")
    pool.submit(download_loop, pool)
    pool.submit(handle_response_loop, pool, config)

    # Join on both the queues at once (Yeah, this is hacky -- may break in later versions)
    while URLS.unfinished_tasks or RESPONSES.unfinished_tasks:
        sleep(0.3)

    # Shut everything down (may take 1 second)
    DONE.put(True)
    pool.shutdown()
    perfprint("[END]")
Пример #5
0
    def start(self):

        left_lng = 103.9213455517
        top_lat = 30.7828453209
        right_lng = 104.2178123382
        bottom_lat = 30.4781772402

        offset = 0.002

        if os.path.isfile(self.db_name):
            os.remove(self.db_name)

        try:
            with sqlite3.connect(self.db_name) as c:
                c.execute('''CREATE TABLE mobike
                    (Time DATETIME, bikeIds VARCHAR(12), bikeType TINYINT,distId INTEGER,distNum TINYINT, type TINYINT, x DOUBLE, y DOUBLE)''')
        except Exception as ex:
            pass

        executor = ThreadPoolExecutor(max_workers=250)
        print("Start")
        self.total = 0
        lat_range = np.arange(top_lat, bottom_lat, -offset)
        for lat in lat_range:
            lng_range = np.arange(left_lng, right_lng, offset)
            for lon in lng_range:
                self.total += 1
                executor.submit(self.get_nearby_bikes, (lat, lon))

        executor.shutdown()
        self.group_data()
def handler(event, contest):
    logger.info("Start!")

    executor = ThreadPoolExecutor(max_workers=100)

    cal = Sum()

    queue_url = event['queueUrl']
    message_count = event['messageCount']

    queue = sqs.Queue(queue_url)
    num_of_calls = message_count // batch_count

    queues = []
    for i in range(num_of_calls):
        queues.append(queue)

    message_count = 0

    responses = executor.map(one_request, queues)
    for response in responses:
        message_count += len(response)
        for msg in response:
            cal.add(msg)

    logger.info("Receive API count: {}".format(num_of_calls))
    logger.info("Fetched messages: {}".format(message_count))

    executor.shutdown()
Пример #7
0
class Session(object):
    def __init__(self, hostnames=None, port=8080, max_workers=6, catalog="hive", schema="default", user='******', result_mode='dict'):
        self.catalog = catalog
        self.schema = schema
        self.hostnames = hostnames
        self.port = port
        self.user = user
        self.result_mode = result_mode

        self.executor = ThreadPoolExecutor(max_workers=max_workers)

        self.default_timeout = None
        self.wait_step = 0.2
        self.wait_max_step = 10

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.close()

    def close(self):
        self.executor.shutdown()

    def query_async(self, q, args=None):
        q = Query(self, q, args)
        return self.executor.submit(q.execute_wait)

    def query(self, q, args=None):
        q = Query(self, q, args)
        q.execute()
        return q
Пример #8
0
class ScheduledIOPool(Thread):
    """Schedule events to an IO worker pool.
    """

    def __init__(self, workers, delay):
        super(ScheduledIOPool, self).__init__()
        self.scheduler = sched.scheduler(time.time, time.sleep)
        self.thread_pool = ThreadPoolExecutor(max_workers=workers)
        self.delay = delay
        self.shutdown = False
        self.daemon = True

    def shutdown(self):
        self.shutdown = True

    # TODO shutdown
    def run(self):
        while not self.shutdown:
            try:
                self.scheduler.run()
                time.sleep(.1)    # TODO: no wait/notify when queue is empty
            except:     # TODO log
                print "Unexpected error scheduling IO:"
                traceback.print_exc()
                time.sleep(.1)
        self.thread_pool.shutdown()

    def cancel(self, event):
        return self.scheduler.cancel(event)

    def schedule(self, *args):
        return self.scheduler.enter(self.delay, 1,
                                    self.thread_pool.submit, args)
Пример #9
0
class ThreadPool:

    def __init__(self, workers=1):
        self._jobs = []
        self._pool = ThreadPoolExecutor(workers)

    def _finish(self, job):
        try:
            self._jobs.remove(job)
        except ValueError:
            pass
        try:
            e = job.exception()
            if e:
                raise e
        except Exception as e:
            error(str(e), exc_info=True)

    def schedule(self, func):
        job = self._pool.submit(func)
        job.add_done_callback(self._finish)
        self._jobs.append(job)

    def shutdown(self):
        for job in as_completed(self._jobs):
            job.result()
        self._pool.shutdown()
Пример #10
0
class ActiveVoice:
    def __init__(self):
        # See https://msdn.microsoft.com/en-us/library/ms723602%28v=vs.85%29.aspx for doc
        self._voice = win32com.client.Dispatch("SAPI.SpVoice")
        self._executor = ThreadPoolExecutor(max_workers=1)

    def shutdown(self):
        self._executor.shutdown(False)

    def speak(self, text, volume=DEFAULT_VOLUME, rate=DEFAULT_RATE):
        volume = max(MIN_VOLUME, min(MAX_VOLUME, volume))
        rate = max(MIN_RATE, min(MAX_RATE, rate))

        def do_speak():
            self._voice.Volume = volume
            self._voice.Rate = rate
            self._voice.Speak(text)

        self._executor.submit(do_speak)

    def wait(self, time_in_sec):
        self._executor.submit(time.sleep, time_in_sec)

    def list_avatars(self): # pointless, only one on Windows 10!
        avatars = []
        for index, token in enumerate(self._voice.GetVoices("", "")):
            avatars.append( '%d: %s' % (index+1, token.GetDescription()) )
        return avatars
Пример #11
0
def main(cmd, args):

    logging.basicConfig(
        level=logging.INFO,
        format=("%(relativeCreated)04d %(process)05d %(threadName)-10s "
                "%(levelname)-5s %(msg)s"))

    pool = Pool(max_workers=1)

    arg1 = tmp_fname
    arg2 = 'arg2'

    stage_file_name = cmd

    if (glob_stage == 1):

        f = pool.submit(check_output, ["ec-perl", "-w", stage_file_name, args, arg2], shell=True)
        print "stage 1 started. Initiate call to" + stage_file_name
        print "Stage is " + str(glob_stage)
    else:

        f = pool.submit(check_output, ["ec-perl", stage_file_name, " ", args, arg2], shell=True)
        print "stage 2 started. Initiate call to" + stage_file_name + " " + args + " " + arg2 + "'"
        print "Stage is " + str(glob_stage)

    f.add_done_callback(callback)
    pool.shutdown(wait=False)
Пример #12
0
class BaseDataLayer(Layer):

    def setup(self, bottom, top):
        param = eval(self.param_str_)
        self.batch_size_ = param['batch_size']
        self.data_setup(bottom, top)
        top[0].reshape(*self.data_.shape)
        self.executor_ = ThreadPoolExecutor(max_workers=1)
        self.thread_ = self.executor_.submit(self.internal_thread_entry)

    def reshape(self, bottom, top):
        pass

    def forward(self, bottom, top):
        self.thread_.result()
        top[0].reshape(*self.data_.shape)
        top[0].data[...] = self.data_
        self.thread_ = self.executor_.submit(self.internal_thread_entry)

    def data_setup(self, bottom, top):
        raise NotImplementedError()

    def internal_thread_entry(self):
        raise NotImplementedError()

    def __del__(self):
        self.thread_.result()
        self.executor_.shutdown()
        super(self.__class__, self).__del__()
Пример #13
0
def upload_file(upload_file_name,
                temp_file_name='encoded.csv',
                split_file_format="{orig_file}_{id}.{orig_ext}",
                parent_folder_id='0B46HJMu9Db4xTUxhQ0x4WHpfVmM'):
    file_name = os.path.basename(upload_file_name)

    # Encode file.
    base64.encode(open(upload_file_name), open(temp_file_name, 'w+'))

    # Split file.
    num_split_files, file_names = splitfile(temp_file_name, SPLIT_SIZE, split_file_format)

    # Start upload threads.
    start = time.time()
    file_id = uuid.uuid1()
    thread_pool = ThreadPoolExecutor(max_workers=MAX_DOWNLOADS)

    for i in range(num_split_files):
        current_file_name = file_names[i]
        up_t = upload_worker.UploadWorker(index=i + 1,
                                          file_id=file_id,
                                          filename=file_name,
                                          parent_folder_id=parent_folder_id,
                                          total_file_num=num_split_files,
                                          upload_file_name=current_file_name)
        future = thread_pool.submit(up_t.run)

    # Wait for completion.
    thread_pool.shutdown()

    end = time.time()
    m, s = divmod(end - start, 60)
    print "Overall time taken: ", m, "m ", s, "s"
    return file_id
Пример #14
0
class HttpThreadpool(object):

    def __init__(self, max_workers=10, queue_size=200):
        self.executor = ThreadPoolExecutor(max_workers, queue_size)

    @retry(max_tries=3)
    def _download(self, url):
        req = urllib2.Request(url)
        req.add_header('User-Agent', 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36')
        rsp = urllib2.urlopen(req, timeout=30)
        return rsp.read()

    def download_and_process(self, url, body_process):
        return self.executor.submit(self._download_and_process, url, body_process)

    def _download_and_process(self, url, body_process):
        body_func, body_args, body_kw = body_process
        body = self._download(url)
        try:
            body_func(body, *body_args, **body_kw)
        except Exception as e:
            print url, traceback.format_exec()

    def shutdown(self):
        self.executor.shutdown()
Пример #15
0
class LoaferRunner:
    def __init__(self, loop=None, max_workers=None, on_stop_callback=None):
        self._on_stop_callback = on_stop_callback
        self.loop = loop or asyncio.get_event_loop()

        # XXX: See https://github.com/python/asyncio/issues/258
        # The minimum value depends on the number of cores in the machine
        # See https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
        self._executor = ThreadPoolExecutor(max_workers)
        self.loop.set_default_executor(self._executor)

    def start(self, future=None, run_forever=True):
        start = 'starting Loafer, pid={}, run_forever={}'
        logger.info(start.format(os.getpid(), run_forever))

        self.loop.add_signal_handler(signal.SIGINT, self.stop)
        self.loop.add_signal_handler(signal.SIGTERM, self.stop)

        try:
            if run_forever:
                self.loop.run_forever()
            else:
                self.loop.run_until_complete(future)
                self.stop()
        except CancelledError:
            self.loop.close()

    def stop(self, *args, **kwargs):
        logger.info('stopping Loafer ...')
        if callable(self._on_stop_callback):
            self._on_stop_callback()

        self._executor.shutdown(wait=True)
        if self.loop.is_running():
            self.loop.stop()
 def test(no_workers, pipeline, input, state, run_function = run_pipeline):
      executor = ThreadPoolExecutor(max_workers = no_workers)
      try:
           result = run_function(executor, pipeline, input, state)
      finally:
           executor.shutdown(True)
      return result
Пример #17
0
class ExecutorEventEmitter(BaseEventEmitter):
    """An event emitter class which runs handlers in a ``concurrent.futures``
    executor. If using python 2, this will fall back to trying to use the
    ``futures`` backported library (caveats there apply).

    By default, this class creates a default ``ThreadPoolExecutor``, but
    a custom executor may also be passed in explicitly to, for instance,
    use a ``ProcessPoolExecutor`` instead.

    This class runs all emitted events on the configured executor. Errors
    captured by the resulting Future are automatically emitted on the
    ``error`` event. This is unlike the BaseEventEmitter, which have no error
    handling.

    The underlying executor may be shut down by calling the ``shutdown``
    method. Alternately you can treat the event emitter as a context manager::

        with ExecutorEventEmitter() as ee:
            # Underlying executor open

            @ee.on('data')
            def handler(data):
                print(data)

            ee.emit('event')

        # Underlying executor closed

    Since the function call is scheduled on an executor, emit is always
    non-blocking.

    No effort is made to ensure thread safety, beyond using an executor.
    """
    def __init__(self, executor=None):
        super(ExecutorEventEmitter, self).__init__()
        if executor:
            self._executor = executor
        else:
            self._executor = ThreadPoolExecutor()

    def _emit_run(self, f, args, kwargs):
        future = self._executor.submit(f, *args, **kwargs)

        @future.add_done_callback
        def _callback(f):
            exc = f.exception()
            if exc:
                self.emit('error', exc)

    def shutdown(self, wait=True):
        """Call ``shutdown`` on the internal executor."""

        self._executor.shutdown(wait=wait)

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        self.shutdown()
Пример #18
0
def poem_from_template(template, db, sound_cache=None):
    executor = ThreadPoolExecutor(4)
    letter_sound_map = map_letters_to_sounds(db, template, sound_cache)
    process_tmpl_line = threaded(partial(extract_ruleset, db, letter_sound_map),
                                 partial(ruleset_to_line, db))
    poem_lines = executor.map(process_tmpl_line, template)
    executor.shutdown()
    return list(poem_lines)
Пример #19
0
    def fetch(self, tiles):
        """
        Execute all tile requests.

        :param tiles: List of tile requests.
        """
        pool = ThreadPoolExecutor(max_workers=32)
        pool.map(self.fetch_tile, tiles, timeout=5)
        pool.shutdown()
Пример #20
0
class LoaferRunner:

    def __init__(self, loop=None, max_workers=None, on_stop_callback=None):
        self._on_stop_callback = on_stop_callback
        self.loop = loop or asyncio.get_event_loop()

        # XXX: See https://github.com/python/asyncio/issues/258
        # The minimum value depends on the number of cores in the machine
        # See https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
        self._executor = ThreadPoolExecutor(max_workers)
        self.loop.set_default_executor(self._executor)

    def start(self, future=None, run_forever=None, debug=False):
        if debug:
            self.loop.set_debug(enabled=debug)

        if future:
            logger.warning(
                'runner `future` argument is deprecated and will be removed in the next major version'
            )
        if run_forever:
            logger.warning(
                'runner `run_forever` argument is deprecated and will be removed in the next major version'
            )

        self.loop.add_signal_handler(signal.SIGINT, self.prepare_stop)
        self.loop.add_signal_handler(signal.SIGTERM, self.prepare_stop)

        try:
            self.loop.run_forever()
        finally:
            self.stop()
            self.loop.close()
            logger.debug('loop.is_running={}'.format(self.loop.is_running()))
            logger.debug('loop.is_closed={}'.format(self.loop.is_closed()))

    def prepare_stop(self, *args):
        if self.loop.is_running():
            # signals loop.run_forever to exit in the next iteration
            self.loop.stop()

    def stop(self, *args, **kwargs):
        logger.info('stopping Loafer ...')
        if callable(self._on_stop_callback):
            self._on_stop_callback()

        logger.info('cancel schedulled operations ...')
        for task in asyncio.Task.all_tasks(self.loop):
            task.cancel()
            if task.cancelled() or task.done():
                continue

            with suppress(CancelledError):
                self.loop.run_until_complete(task)

        self._executor.shutdown(wait=True)
 def search(self, query):
     n_days = (self.until - self.since).days
     tp = ThreadPoolExecutor(max_workers=self.n_threads)
     for i in range(0, n_days):
         since_query = self.since + datetime.timedelta(days=i)
         until_query = self.since + datetime.timedelta(days=(i + 1))
         day_query = "%s since:%s until:%s" % (query, since_query.strftime("%Y-%m-%d"),
                                               until_query.strftime("%Y-%m-%d"))
         tp.submit(self.perform_search, day_query)
     tp.shutdown(wait=True)
Пример #22
0
class ThreadedResolverTest(AsyncTestCase, _ResolverTestMixin):
    def setUp(self):
        super(ThreadedResolverTest, self).setUp()
        from concurrent.futures import ThreadPoolExecutor
        self.executor = ThreadPoolExecutor(2)
        self.resolver = Resolver(self.io_loop, self.executor)

    def tearDown(self):
        self.executor.shutdown()
        super(ThreadedResolverTest, self).tearDown()
Пример #23
0
 def launch(self, *args, **kwargs):
     """
         Launch a new pool with one thread for that one process.
         Then we call the callback.
     """
     pool = Pool(max_workers=1)
     f = pool.submit(Executor, *args, **kwargs)
     f.add_done_callback(self.callback)
     pool.shutdown(wait=True)
     return f
class Foo(object):
    def __init__(self):
        self._thread_pool = ThreadPoolExecutor(8)

    @run_on_executor(executor="_thread_pool")
    def foo(self, i):
        logging.debug("{0} {1}".format(current_thread(), i))

    def shutdown(self):
        self._thread_pool.shutdown()
        class Executor():
            def __init__(self, io_loop=None):
                self.io_loop = io_loop or tornado.ioloop.IOLoop.instance()
                self.executor = ThreadPoolExecutor(2)

            @tornado.concurrent.run_on_executor
            def wait_and_return_a_value(self):
                return 20

            def destroy(self):
                self.executor.shutdown(1)
Пример #26
0
def main():
    executor=ThreadPoolExecutor(max_workers=3)
    futures=[]
    for i in range(6):
        print("Threads: {}".format(len(executor._threads)))
        futures.append(executor.submit(spam,i))
    print("main thread exit")
    for future in concurrent.futures.as_completed(futures):
            print(future.result())

    executor.shutdown()
Пример #27
0
def stop():
    logging.info("Stopping tasks on server...")
    executor = ThreadPoolExecutor(MAX_SUBMISSION_THREADS)
    global _unfinished_run_ids
    stopTasks = set()
    for runId in _unfinished_run_ids:
        stopTasks.add(executor.submit(_stop_run, runId))
    for task in stopTasks:
        task.result()
    executor.shutdown(wait=True)
    logging.info("Stopped all tasks.")
Пример #28
0
def console_main():
    """ This function handles all the console action. """
    setproctitle('image-scraper')
    scraper = ImageScraper()
    scraper.get_arguments()
    print("\nImageScraper\n============\nRequesting page....\n")
    try:
        scraper.get_html()
    except PageLoadError as err:
        if err.status_code is None:
            print("ImageScraper is unable to acces the internet.")
        else:
            print("Page failed to load. Status code: {0}".format(err.status_code))
        sys.exit()

    scraper.get_img_list()

    if len(scraper.images) == 0:
        sys.exit("Sorry, no images found.")
    if scraper.no_to_download is None:
        scraper.no_to_download = len(scraper.images)

    print("Found {0} images: ".format(len(scraper.images)))

    try:
        scraper.process_download_path()
    except DirectoryAccessError:
        print("Sorry, the directory can't be accessed.")
        sys.exit()
    except DirectoryCreateError:
        print("Sorry, the directory can't be created.")
        sys.exit()

    if scraper.dump_urls:
        for img_url in scraper.images:
            print(img_url)

    status_flags = {'count': 0, 'percent': 0.0, 'failed': 0, 'under_min_or_over_max_filesize': 0}
    widgets = ['Progress: ', Percentage(), ' ', Bar(marker=RotatingMarker()),
               ' ', ETA(), ' ', FileTransferSpeed()]
    pbar = ProgressBar(widgets=widgets, maxval=100).start()
    pool = ThreadPoolExecutor(max_workers=scraper.nthreads)
    status_lock = threading.Lock()
    for img_url in scraper.images:
        if status_flags['count'] == scraper.no_to_download:
            break
        pool.submit(download_worker_fn, scraper, img_url, pbar, status_flags, status_lock)
        status_flags['count'] += 1
    pool.shutdown(wait=True)
    pbar.finish()
    print("\nDone!\nDownloaded {0} images\nFailed: {1}\n".format(
        status_flags['count']-status_flags['failed']-status_flags['under_min_or_over_max_filesize'],
        status_flags['failed']))
    return
Пример #29
0
def poem_from_template(template, db: Database, corpus_id, sound_cache=None):
    engine = get_engine(db)
    conn = engine.connect()
    executor = ThreadPoolExecutor(4)
    letter_sound_map = map_letters_to_sounds(conn, corpus_id, template, sound_cache)
    process_tmpl_line = threaded(partial(extract_ruleset, conn, corpus_id, letter_sound_map),
                                 partial(ruleset_to_line, conn, corpus_id))
    poem_lines = executor.map(process_tmpl_line, template)
    executor.shutdown()

    return list(poem_lines)
Пример #30
0
def test_propagation_time(nb_worlds, nb_changes):

    executor = ThreadPoolExecutor(max_workers=nb_worlds)
    pool = Pool(nb_worlds)

    pipes = []
    res = []
    for i in range(nb_worlds-1):
        print("Setting up passthrough between world %d and world %d" % (i, i+1))
        #f = executor.submit(passthrough, "world%d" % i, "world%d" % (i+1))
        conn1, conn2 = Pipe()
        pipes.append(conn1)
        res.append(pool.apply_async(passthrough, ["world%d" % i, "world%d" % (i+1), conn2]))

    time.sleep(0.5)

    ctx = underworlds.Context("test_client")
    entry_world = ctx.worlds["world0"]
    exit_world = ctx.worlds["world%d" % (nb_worlds-1)]

    future = executor.submit(wait_for_changes, exit_world, nb_changes)

    print("\n\n\nPropagating %d change(s) from world %s..." % (nb_changes, entry_world))
    profileonce("start test with %d worlds" % nb_worlds)


    for i in range(nb_changes):
        n = Node()
        n.name = "node_%d" % i
        entry_world.scene.append_and_propagate(n)
        time.sleep(0.01)

    seen, duration = future.result()

    profileonce("end test")


    if seen is None:
        logger.error("The changes have not been seen!")
        duration = 0
    else:
        print("It took %s to be notified of the %d change(s) in world %s" % (ms(duration), nb_changes, exit_world))

    executor.shutdown(wait=True)

    for p in pipes:
        p.send(True)
    pool.close()
    pool.join()
    ctx.close()

    return duration
Пример #31
0
class AWSBenchmark(Benchmark):
    """AWSBenchmark
    an extension of Benchmark class, to run benchmarks on AWS
    """
    @classmethod
    def fetch_results(cls, instances_file, filter=None, force_update=False):
        bench = cls(None, None)
        bench._load_instances(normalize_path(instances_file))
        inst = next(inst for inst in bench.instances.values())
        bench.sid = inst.session
        bucket_name = re.match(r's3://([\w\-.]+)/.*', inst.s3_dir).group(1)
        bench.s3 = boto3.resource('s3', region_name=bench.region)
        bench.bucket = bench._create_s3_bucket(bucket_name, auto_create=False)
        filter = (
            lambda items: [k for k, v in items]) if filter is None else filter
        for iid in filter(bench.instances.items()):
            if force_update:
                bench.instances[iid].success = False
            bench._download_results(iid)

    def __init__(self,
                 framework_name,
                 benchmark_name,
                 constraint_name,
                 region=None):
        """

        :param framework_name:
        :param benchmark_name:
        :param constraint_name:
        :param region:
        """
        super().__init__(framework_name, benchmark_name, constraint_name)
        self.suid = datetime_iso(
            micros=True, no_sep=True
        )  # short sid for AWS entities whose name length is limited
        self.region = region if region \
            else rconfig().aws.region if rconfig().aws['region'] \
            else boto3.session.Session().region_name
        self.ami = rconfig().aws.ec2.regions[self.region].ami
        self.cloudwatch = None
        self.ec2 = None
        self.iam = None
        self.s3 = None
        self.bucket = None
        self.uploaded_resources = None
        self.instance_profile = None
        self.instances = {}
        self.jobs = []
        self.exec = None
        self.monitoring = None
        self._validate2()

    def _validate(self):
        if rconfig().aws.ec2.terminate_instances not in [
                'always', 'success', 'never', True, False
        ]:
            raise ValueError(
                "`terminate_instances` setting should be one among ['always', 'success', 'never']"
            )

        if self.parallel_jobs == 0 or self.parallel_jobs > rconfig(
        ).max_parallel_jobs:
            log.warning("Forcing parallelization to its upper limit: %s.",
                        rconfig().max_parallel_jobs)
            self.parallel_jobs = rconfig().max_parallel_jobs

    def _validate2(self):
        if self.ami is None:
            raise ValueError("Region {} not supported by AMI yet.".format(
                self.region))

    def setup(self, mode):
        if mode == Benchmark.SetupMode.skip:
            log.warning(
                "AWS setup mode set to unsupported {mode}, ignoring.".format(
                    mode=mode))
        # S3 setup to exchange files between local and ec2 instances
        self.s3 = boto3.resource('s3', region_name=self.region)
        self.bucket = self._create_s3_bucket()
        self.uploaded_resources = self._upload_resources()

        # IAM setup to secure exchanges between s3 and ec2 instances
        self.iam = boto3.resource('iam', region_name=self.region)
        if mode == Benchmark.SetupMode.force:
            log.warning("Cleaning up previously created IAM entities if any.")
            self._delete_iam_entities()
        self.instance_profile = self._create_instance_profile()

        # EC2 setup to prepare creation of ec2 instances
        self.ec2 = boto3.resource('ec2', region_name=self.region)
        self.cloudwatch = boto3.resource('cloudwatch', region_name=self.region)

    def cleanup(self):
        self._stop_all_instances()
        self._monitoring_stop()
        self._exec_stop()
        if rconfig().aws.s3.delete_resources is True:
            self._delete_resources()
        if rconfig().aws.iam.temporary is True:
            self._delete_iam_entities()
        if rconfig().aws.s3.temporary is True:
            self._delete_s3_bucket()

    def run(self, task_name=None, fold=None):
        task_defs = self._get_task_defs(task_name)  # validates tasks
        self._exec_start()
        self._monitoring_start()
        if self.parallel_jobs > 1:
            if rconfig().aws.minimize_instances:
                # use one instance per task: all folds executed on same instance
                try:
                    jobs = flatten([
                        self._make_aws_job([task_def.name], fold)
                        for task_def in task_defs
                    ])
                    results = self._run_jobs(jobs)
                    return self._process_results(results, task_name=task_name)
                finally:
                    self.cleanup()
            else:
                # use one instance per fold per task
                return super().run(task_name, fold)
        else:
            # use one instance for all
            try:
                task_names = None if task_name is None else [
                    task_def.name for task_def in task_defs
                ]
                job = self._make_aws_job(task_names, fold)
                results = self._run_jobs([job])
                return self._process_results(results, task_name=task_name)
            finally:
                self.cleanup()

    def _make_job(self, task_def, fold=int):
        return self._make_aws_job([task_def.name], [fold])

    def _exec_start(self):
        if self.exec is not None:
            return
        self.exec = ThreadPoolExecutor(max_workers=1,
                                       thread_name_prefix="aws_exec_")

    def _exec_stop(self):
        if self.exec is None:
            return
        try:
            self.exec.shutdown(wait=True)
        except:
            pass
        finally:
            self.exec = None

    def _exec_send(self, fn, *args, **kwargs):
        if self.exec is not None:
            self.exec.submit(fn, *args, **kwargs)
        else:
            log.warning(
                "Sending exec function while executor is not started: executing the function in the calling thread."
            )
            try:
                fn(*args, **kwargs)
            except:
                pass

    def _make_aws_job(self, task_names=None, folds=None):
        task_names = [] if task_names is None else task_names
        folds = [] if folds is None else [str(f) for f in folds]
        task_def = self._get_task_def(task_names[0]) if len(task_names) >= 1 \
            else self._get_task_def('__defaults__', include_disabled=True, fail_on_missing=False)
        instance_def = ns(
            type=task_def.ec2_instance_type,
            volume_type=task_def.ec2_volume_type,
        ) if task_def else ns(
            type='.'.join([
                rconfig().aws.ec2.instance_type.series,
                rconfig().aws.ec2.instance_type.map.default
            ]),
            volume_type=rconfig().aws.ec2.volume_type,
        )
        if task_def and task_def.min_vol_size_mb > 0:
            instance_def.volume_size = math.ceil(
                (task_def.min_vol_size_mb +
                 rconfig().benchmarks.os_vol_size_mb) / 1024.)
        else:
            instance_def.volume_size = None

        timeout_secs = task_def.max_runtime_seconds if task_def \
            else sum([task.max_runtime_seconds for task in self.benchmark_def])
        timeout_secs += rconfig().aws.overhead_time_seconds

        job = Job('_'.join([
            'aws', self.benchmark_name, self.constraint_name,
            '.'.join(task_names) if len(task_names) > 0 else 'all',
            '.'.join(folds), self.framework_name
        ]))
        job.instance_id = None

        def _run(job_self):
            resources_root = "/custom" if rconfig(
            ).aws.use_docker else "/s3bucket/user"
            job_self.instance_id = self._start_instance(
                instance_def,
                script_params=
                "{framework} {benchmark} {constraint} {task_param} {folds_param} -Xseed={seed}"
                .format(
                    framework=self.framework_name,
                    benchmark=self.benchmark_name
                    if self.benchmark_path.startswith(rconfig().root_dir) else
                    "{}/{}.yaml".format(resources_root, self.benchmark_name),
                    constraint=self.constraint_name,
                    task_param=''
                    if len(task_names) == 0 else ' '.join(['-t'] + task_names),
                    folds_param='' if len(folds) == 0 else ' '.join(['-f'] +
                                                                    folds),
                    seed=rget().seed(int(folds[0]))
                    if len(folds) == 1 else rconfig().seed,
                ),
                # instance_key='_'.join([job.name, datetime_iso(micros=True, time_sep='.')]),
                instance_key=job.name,
                timeout_secs=timeout_secs)
            try:
                return self._wait_for_results(job_self)
            except Exception as e:
                fold = int(folds[0]) if len(folds) > 0 else -1
                results = TaskResult(task_def=task_def, fold=fold)
                return results.compute_scores(self.framework_name, [],
                                              result=ErrorResult(e))

        def _on_done(job_self):
            terminate = self._download_results(job_self.instance_id)
            if not terminate and rconfig(
            ).aws.ec2.terminate_instances == 'success':
                log.warning(
                    "[WARNING]: EC2 Instance %s won't be terminated as we couldn't download the results: "
                    "please terminate it manually or restart it (after clearing its UserData) if you want to inspect the instance.",
                    job_self.instance_id)
            self._stop_instance(job_self.instance_id, terminate=terminate)

        job._run = _run.__get__(job)
        job._on_done = _on_done.__get__(job)
        return job

    def _wait_for_results(self, job):
        instance = self.instances[job.instance_id].instance
        last_console_line = -1

        def log_console():
            nonlocal last_console_line
            try:
                output = instance.console_output(Latest=True)
                if 'Output' in output:
                    output = output[
                        'Output']  # note that console_output only returns the last 64kB of console
                    new_log, last_line = tail(output,
                                              from_line=last_console_line,
                                              include_line=False)
                    if last_line is not None:
                        last_console_line = last_line['line']
                    if new_log:
                        log.info(new_log)
            except Exception as e:
                log.exception(e)

        interrupt = threading.Event()
        while not interrupt.is_set():
            if job.instance_id in self.instances:
                inst_desc = self.instances[job.instance_id]
                if inst_desc['abort']:
                    self._update_instance(job.instance_id, status='aborted')
                    raise Exception("Aborting instance {} for job {}.".format(
                        job.instance_id, job.name))
            try:
                state = instance.state['Name']
                log.info("[%s] checking job %s on instance %s: %s.",
                         datetime_iso(), job.name, job.instance_id, state)
                log_console()
                self._update_instance(job.instance_id, status=state)

                if instance.state['Code'] > 16:  # ended instance
                    log.info("EC2 instance %s is %s: %s", job.instance_id,
                             state, instance.state_reason['Message'])
                    interrupt.set()
            except Exception as e:
                log.exception(e)
            finally:
                interrupt.wait(rconfig().aws.query_frequency_seconds)

    def _get_cpu_activity(self, iid, delta_minutes=60, period_minutes=5):
        now = dt.datetime.utcnow()
        resp = self.cloudwatch.meta.client.get_metric_statistics(
            Namespace='AWS/EC2',
            MetricName='CPUUtilization',
            Dimensions=[dict(Name='InstanceId', Value=iid)],
            StartTime=now - dt.timedelta(minutes=delta_minutes),
            EndTime=now,
            Period=60 * period_minutes,
            Statistics=['Average'],
            Unit='Percent')
        return [
            activity['Average']
            for activity in sorted(resp['Datapoints'],
                                   key=op.itemgetter('Timestamp'),
                                   reverse=True)
        ]

    def _is_hanging(self, iid):
        cpu_config = rconfig().aws.ec2.monitoring.cpu
        activity = self._get_cpu_activity(
            iid,
            delta_minutes=cpu_config.delta_minutes,
            period_minutes=cpu_config.period_minutes)
        threshold = cpu_config.threshold
        min_activity_len = int(cpu_config.delta_minutes /
                               cpu_config.period_minutes)
        return len(activity) >= min_activity_len and all(
            [a < threshold for a in activity])

    def _monitoring_start(self):
        if self.monitoring is not None:
            return

        interrupt = threading.Event()

        def cpu_monitor():
            cpu_config = rconfig().aws.ec2.monitoring.cpu
            if cpu_config.query_frequency_seconds <= 0:
                return
            while not interrupt.is_set():
                try:
                    hanging_instances = list(
                        filter(self._is_hanging, self.instances.keys()))
                    for inst in hanging_instances:
                        if inst in self.instances:
                            inst_desc = self.instances[inst]
                            log.warning(
                                "WARN: Instance %s (%s) has no CPU activity in the last %s minutes.",
                                inst, inst_desc.key, cpu_config.delta_minutes)
                            if cpu_config.abort_inactive_instances:
                                inst_desc.abort = True
                except Exception as e:
                    log.exception(e)
                finally:
                    interrupt.wait(cpu_config.query_frequency_seconds)

        self.monitoring = ns(executor=ThreadPoolExecutor(
            max_workers=1, thread_name_prefix="aws_monitoring_"),
                             interrupt=interrupt)
        self.monitoring.executor.submit(cpu_monitor)

    def _monitoring_stop(self):
        if self.monitoring is None:
            return
        try:
            self.monitoring.interrupt.set()
            self.monitoring.executor.shutdown(wait=False)
        except:
            pass
        finally:
            self.monitoring = None

    def _start_instance(self,
                        instance_def,
                        script_params="",
                        instance_key=None,
                        timeout_secs=-1):
        log.info("Starting new EC2 instance with params: %s.", script_params)
        inst_key = instance_key.lower() if instance_key \
            else "{}_p{}_i{}".format(self.sid,
                                     re.sub(r"[\s-]", '', script_params),
                                     datetime_iso(micros=True, time_sep='.')).lower()
        # TODO: don't know if it would be considerably faster to reuse previously stopped instances sometimes
        #   instead of always creating a new one:
        #   would still need to set a new UserData though before restarting the instance.
        try:
            ebs = dict(VolumeType=instance_def.volume_type)
            if instance_def.volume_size:
                ebs['VolumeSize'] = instance_def.volume_size

            instance = self.ec2.create_instances(
                BlockDeviceMappings=[
                    dict(DeviceName=rconfig().aws.ec2.root_device_name,
                         Ebs=ebs)
                ],
                IamInstanceProfile=dict(Name=self.instance_profile.name),
                ImageId=self.ami,
                InstanceType=instance_def.type,
                MinCount=1,
                MaxCount=1,
                SubnetId=rconfig().aws.ec2.subnet_id,
                UserData=self._ec2_startup_script(
                    inst_key,
                    script_params=script_params,
                    timeout_secs=timeout_secs))[0]
            log.info("Started EC2 instance %s.", instance.id)
            self.instances[instance.id] = ns(instance=instance,
                                             key=inst_key,
                                             status='started',
                                             success='',
                                             start_time=datetime_iso(),
                                             stop_time='')
        except Exception as e:
            fake_iid = "no_instance_{}".format(len(self.instances) + 1)
            self.instances[fake_iid] = ns(instance=None,
                                          key=inst_key,
                                          status='failed',
                                          success=False,
                                          start_time=datetime_iso(),
                                          stop_time=datetime_iso())
            raise e
        finally:
            self._exec_send(self._save_instances)
        return instance.id

    def _stop_instance(self, instance_id, terminate=None):
        instance = self.instances[instance_id].instance
        self.instances[instance_id].instance = None
        if instance is None:
            return

        terminate_config = rconfig().aws.ec2.terminate_instances
        if terminate_config in ['always', True]:
            terminate = True
        elif terminate_config in ['never', False]:
            terminate = False
        else:
            terminate = False if terminate is None else terminate

        try:
            log.info("%s EC2 instances %s.",
                     "Terminating" if terminate else "Stopping", instance_id)
            if terminate:
                response = instance.terminate()
            else:
                response = instance.stop()
            log.info("%s EC2 instances %s with response %s.",
                     "Terminated" if terminate else "Stopped", instance_id,
                     response)
        except Exception as e:
            log.error(
                "ERROR: EC2 instance %s could not be %s!\n"
                "Even if the instance should stop by itself after a certain timeout, "
                "you may want to stop/terminate it manually:\n%s", instance_id,
                "terminated" if terminate else "stopped", str(e))
        finally:
            try:
                state = response['TerminatingInstances'][0]['CurrentState'][
                    'Name']
                log.info("Instance %s state: %s.", instance_id, state)
                self._update_instance(instance_id,
                                      status=state,
                                      stop_time=datetime_iso())
            except:
                pass

    def _update_instance(self, instance_id, **kwargs):
        do_save = False
        if len(kwargs):
            do_save = True
        inst = self.instances[instance_id]
        for k, v in kwargs.items():
            if k in inst and inst[k] != v:
                inst[k] = v
                do_save = True
        if do_save:
            self._exec_send(lambda: self._save_instances())

    def _stop_all_instances(self):
        for iid in self.instances.keys():
            self._stop_instance(iid)

    def _save_instances(self):
        write_csv(
            [(iid, self.instances[iid].status, self.instances[iid].success,
              self.instances[iid].start_time, self.instances[iid].stop_time,
              self.sid, self.instances[iid].key,
              self._s3_key(self.sid, instance_key_or_id=iid, absolute=True))
             for iid in self.instances.keys()],
            columns=[
                'ec2', 'status', 'success', 'start_time', 'stop_time',
                'session', 'instance_key', 's3 dir'
            ],
            path=os.path.join(self.output_dirs.session, 'instances.csv'))

    def _load_instances(self, instances_file):
        df = read_csv(instances_file)
        self.instances = {
            row['ec2']: ns(
                status=row['status'],
                success=row['success'],
                session=row['session'],
                key=row['instance_key'],
                s3_dir=row['s3 dir'],
            )
            for idx, row in df.iterrows()
        }

    def _s3_key(self,
                main_dir,
                *subdirs,
                instance_key_or_id=None,
                absolute=False,
                encode=False):
        root_key = str_def(rconfig().aws.s3.root_key)
        if instance_key_or_id is None:
            ikey = ''
        elif instance_key_or_id in self.instances.keys():
            ikey = self.instances[instance_key_or_id].key
        else:
            ikey = instance_key_or_id
        tokens = [main_dir, ikey, *subdirs]
        if encode:
            tokens = map(uenc, tokens)
        rel_key = url_join(root_key, *tokens)
        return url_join('s3://', self.bucket.name,
                        rel_key) if absolute else rel_key

    def _s3_session(self, *subdirs, **kwargs):
        return self._s3_key(self.sid, *subdirs, **kwargs)

    def _s3_user(self, *subdirs, **kwargs):
        return self._s3_key(self.sid, 'user', *subdirs, **kwargs)

    def _s3_input(self, *subdirs, **kwargs):
        return self._s3_key(self.sid, 'input', *subdirs, **kwargs)

    def _s3_output(self, instance_key_or_id, *subdirs, **kwargs):
        return self._s3_key(self.sid,
                            'output',
                            *subdirs,
                            instance_key_or_id=instance_key_or_id,
                            **kwargs)

    def _create_s3_bucket(self, bucket_name=None, auto_create=True):
        # cf. s3 restrictions: https://docs.aws.amazon.com/AmazonS3/latest/dev/BucketRestrictions.html
        if bucket_name is None:
            bucket_name = rconfig().aws.s3.bucket
            if rconfig().aws.s3.temporary:
                bucket_name += ('-' + self.suid)
        try:
            self.s3.meta.client.head_bucket(Bucket=bucket_name)
            bucket = self.s3.Bucket(bucket_name)
        except botocore.exceptions.ClientError as e:
            error_code = int(e.response['Error']['Code'])
            if error_code == 404 and auto_create:
                log.info("%s bucket doesn't exist, creating it in region %s.",
                         bucket_name, self.region)
                bucket = self.s3.create_bucket(
                    Bucket=bucket_name,
                    CreateBucketConfiguration=dict(
                        LocationConstraint=self.region))
                log.info("S3 bucket %s was successfully created.", bucket_name)
            else:
                if error_code == 403:
                    log.error(
                        "You don't have access rights to S3 bucket %s.\n"
                        "Please ensure that you specified a unique `aws.s3.bucket` in your config file"
                        " or verify that your AWS account is correctly configured"
                        " (cf. docs/README.md for more details).", bucket_name)
                elif error_code == 404:
                    log.error(
                        "S3 bucket %s does not exist and auto-creation is disabled.",
                        bucket_name)
                raise e
        return bucket

    def _delete_s3_bucket(self):
        if self.bucket:
            # we can only delete 1000 objects at a time using this API,
            # but this is intended only for temporary buckets, so no need for pagination
            to_delete = [dict(Key=o.key) for o in self.bucket.objects.all()]
            if len(to_delete) > 0:
                log.info("Deleting objects from S3 bucket %s: %s",
                         self.bucket.name, to_delete)
                self.bucket.delete_objects(
                    Delete=dict(Objects=to_delete, Quiet=True))
            log.info("Deleting s3 bucket %s.", self.bucket.name)
            self.bucket.delete()
            log.info("S3 bucket %s was successfully deleted.",
                     self.bucket.name)

    def _upload_resources(self):
        def dest_path(res_path):
            in_app_dir = res_path.startswith(rconfig().root_dir)
            if in_app_dir:
                return None
            in_input_dir = res_path.startswith(rconfig().input_dir)
            in_user_dir = res_path.startswith(rconfig().user_dir)
            name = (os.path.relpath(res_path, start=rconfig().input_dir)
                    if in_input_dir else
                    os.path.relpath(res_path, start=rconfig().user_dir)
                    if in_user_dir else os.path.basename(res_path))
            return self._s3_input(name) if in_input_dir else self._s3_user(
                name)

        upload_paths = [self.benchmark_path] + rconfig().aws.resource_files
        upload_files = list_all_files(upload_paths,
                                      exclude=rconfig().aws.resource_ignore)
        log.debug("Uploading files to S3: %s", upload_files)
        uploaded_resources = []
        for res in upload_files:
            upload_path = dest_path(res)
            if upload_path is None:
                log.debug("Skipping upload of `%s` to s3 bucket.", res)
                continue
            log.info("Uploading `%s` to `%s` on s3 bucket %s.", res,
                     upload_path, self.bucket.name)
            self.bucket.upload_file(res, upload_path)
            uploaded_resources.append(upload_path)
        return uploaded_resources

    def _delete_resources(self):
        if self.uploaded_resources is None:
            return
        log.info("Deleting uploaded resources `%s` from s3 bucket %s.",
                 self.uploaded_resources, self.bucket.name)
        self.bucket.delete_objects(Delete=dict(
            Objects=[dict(Key=res) for res in self.uploaded_resources]))

    def _download_results(self, instance_id):
        """
        :param instance_id:
        :return: True iff the main result/scoring file has been successfully downloaded. Other failures are only logged.
        """
        def download_file(obj, dest, dest_display_path=None):
            dest_display_path = dest if dest_display_path is None else dest_display_path
            try:
                log.info("Downloading `%s` from s3 bucket %s to `%s`.",
                         obj.key, self.bucket.name, dest_display_path)
                if isinstance(dest, str):
                    touch(dest)
                    obj.download_file(dest)
                else:
                    obj.download_fileobj(dest)
            except Exception as e:
                log.error("Failed downloading `%s` from s3 bucket %s: %s",
                          obj.key, self.bucket.name, str(e))
                log.exception(e)

        success = self.instances[instance_id].success is True
        try:
            instance_output_key = self._s3_output(instance_id, encode=True)
            objs = [
                o.Object()
                for o in self.bucket.objects.filter(Prefix=instance_output_key)
            ]
            session_key = self._s3_session(encode=True)
            # result_key = self._s3_output(instance_id, Scoreboard.results_file, encode=True)
            for obj in objs:
                rel_path = url_relpath(obj.key, start=session_key)
                dest_path = os.path.join(self.output_dirs.session, rel_path)
                download_file(obj, dest_path)
                # if obj.key == result_key:
                if not success and os.path.basename(
                        obj.key) == Scoreboard.results_file:
                    if rconfig().results.save:
                        self._exec_send(
                            lambda path: self._append(Scoreboard.load_df(path)
                                                      ), dest_path)
                    success = True
        except Exception as e:
            log.error(
                "Failed downloading benchmark results from s3 bucket %s: %s",
                self.bucket.name, str(e))
            log.exception(e)

        log.info("Instance `%s` success=%s", instance_id, success)
        self._update_instance(instance_id, success=success)
        return success

    def _create_instance_profile(self):
        """
        see https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/iam-roles-for-amazon-ec2.html
        for steps defined here.
        for restrictions, cf. https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_iam-limits.html
        :return:
        """
        s3c = rconfig().aws.s3
        iamc = rconfig().aws.iam
        bucket_prefix = (s3c.bucket +
                         '-') if (s3c.temporary
                                  and not iamc.temporary) else self.bucket.name
        role_name = iamc.role_name
        profile_name = iamc.instance_profile_name
        if iamc.temporary:
            role_name += ('-' + self.suid)
            profile_name += ('-' + self.suid)

        irole = None
        try:
            self.iam.meta.client.get_role(RoleName=role_name)
            irole = self.iam.Role(role_name)
        except botocore.exceptions.ClientError as e:
            log.info("Role %s doesn't exist, creating it: [%s].", role_name,
                     str(e))

        if not irole:
            ec2_role_trust_policy_json = json.dumps({  # trust role
                'Version':
                '2012-10-17',  # version of the policy language, cf. https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_policies_elements_version.html
                'Statement': [{
                    'Effect': 'Allow',
                    'Principal': {
                        'Service': 'ec2.amazonaws.com'
                    },
                    'Action': 'sts:AssumeRole'
                }]
            })
            irole = self.iam.create_role(
                RoleName=role_name,
                AssumeRolePolicyDocument=ec2_role_trust_policy_json,
                MaxSessionDuration=iamc.max_role_session_duration_secs)
            log.info("Role %s successfully created.", role_name)

        if iamc.s3_policy_name not in [p.name for p in irole.policies.all()]:
            resource_prefix = "arn:aws:s3:::{bucket}*/{root_key}".format(
                bucket=bucket_prefix, root_key=str_def(s3c.root_key)
            )  # ARN format for s3, cf. https://docs.aws.amazon.com/AmazonS3/latest/dev/s3-arn-format.html
            s3_policy_json = json.dumps({
                'Version':
                '2012-10-17',
                'Statement': [
                    {
                        'Effect': 'Allow',
                        'Action': 's3:List*',
                        'Resource': 'arn:aws:s3:::{}*'.format(bucket_prefix)
                    },
                    {
                        'Effect': 'Allow',
                        'Action':
                        's3:GetObject',  # S3 actions, cf. https://docs.aws.amazon.com/AmazonS3/latest/dev/using-with-s3-actions.html
                        'Resource': resource_prefix + '*'
                    },
                    {
                        'Effect': 'Allow',
                        'Action': 's3:PutObject',
                        'Resource': resource_prefix +
                        '*'  # technically, we could grant write access for each instance only to its own 'directory', but this is not necessary
                    }
                ]
            })
            self.iam.meta.client.put_role_policy(
                RoleName=irole.name,
                PolicyName=iamc.s3_policy_name,
                PolicyDocument=s3_policy_json)

        iprofile = None
        try:
            self.iam.meta.client.get_instance_profile(
                InstanceProfileName=profile_name)
            iprofile = self.iam.InstanceProfile(profile_name)
        except botocore.exceptions.ClientError as e:
            log.info("Instance profile %s doesn't exist, creating it: [%s].",
                     profile_name, str(e))
        if not iprofile:
            iprofile = self.iam.create_instance_profile(
                InstanceProfileName=profile_name)
            log.info("Instance profile %s successfully created.", profile_name)
            waiting_time = iamc.credentials_propagation_waiting_time_secs
            steps = math.ceil(waiting_time / 10)
            for i in range(steps):
                log.info(
                    "Waiting for new credentials propagation, time left = %ss.",
                    round(waiting_time * (1 - i / steps)))
                time.sleep(waiting_time / steps)

        if irole.name not in [r.name for r in iprofile.roles]:
            iprofile.add_role(RoleName=irole.name)

        return iprofile

    def _delete_iam_entities(self):
        iamc = rconfig().aws.iam
        iprofile = self.instance_profile

        if iprofile is None:
            profile_name = iamc.instance_profile_name
            if iamc.temporary:
                profile_name += ('-' + self.suid)
            try:
                self.iam.meta.client.get_instance_profile(
                    InstanceProfileName=profile_name)
                iprofile = self.iam.InstanceProfile(profile_name)
            except botocore.exceptions.ClientError as e:
                log.info(
                    "Instance profile %s doesn't exist, nothing to delete: [%s]",
                    profile_name, str(e))

        if iprofile is not None:
            for role in iprofile.roles:
                log.info("Removing role %s from instance profile %s.",
                         role.name, iprofile.name)
                iprofile.remove_role(RoleName=role.name)
                self._delete_iam_entities_from_role(role.name)
            log.info("Deleting instance profile %s.", iprofile.name)
            iprofile.delete()
            log.info("Instance profile %s was successfully deleted.",
                     iprofile.name)
        else:
            role_name = iamc.role_name
            if iamc.temporary:
                role_name += ('-' + self.suid)
            self._delete_iam_entities_from_role(role_name)

    def _delete_iam_entities_from_role(self, role_name):
        iamc = rconfig().aws.iam
        try:
            self.iam.meta.client.get_role(RoleName=role_name)
            irole = self.iam.Role(role_name)
            for policy in irole.policies.all():
                log.info("Deleting role policy %s from role %s.", policy.name,
                         policy.role_name)
                policy.delete()
                log.info("Policy %s was successfully deleted.", policy.name)
            for profile in irole.instance_profiles.all():
                log.info("Removing instance profile %s from role %s.",
                         profile.name, irole.name)
                profile.remove_role(RoleName=irole.name)
                log.info("Deleting instance profile %s.", profile.name)
                profile.delete()
                log.info("Instance profile %s was successfully deleted.",
                         profile.name)
            log.info("Deleting role %s.", irole.name)
            irole.delete()
            log.info("Role %s was successfully deleted.", irole.name)
        except botocore.exceptions.ClientError as e:
            log.info("Role %s doesn't exist, skipping its deletion: [%s]",
                     iamc.role_name, str(e))

    def _ec2_startup_script(self,
                            instance_key,
                            script_params="",
                            timeout_secs=-1):
        """
        Generates the UserData is cloud-config format for the EC2 instance:
        this script is automatically executed by the instance at the end of its boot process.

        This cloud-config version is currently preferred as the runcmd are always executed sequentially,
        regardless of the previous one raising an error. Especially, the power_state directive is always executed.

        Notes about cloud-config syntax:
            - runcmd: all command are executed sequentially. If one raises an error, the next one is still executed afterwards.
            - power_state:
                * delay (in mn) passed to the shutdown command is executed,
                * timeout (in sec) waiting for cloud-init to complete before triggering shutdown.

        :param instance_key: the unique local identifier for the instance.
            This is different from EC2 instance id as we don't know it yet.
            Mainly used to put output files to dedicated key on s3.
        :param script_params: the custom params passed to the benchmark script, usually only task, fold params
        :return: the UserData for the new ec2 instance
        """
        script_extra_params = "--session="
        cloud_config = """
#cloud-config

package_update: true
package_upgrade: false
packages:
  - python3
  - python3-pip
  - docker.io

runcmd:
  - apt-get -y remove unattended-upgrades
  - systemctl stop apt-daily.timer
  - systemctl disable apt-daily.timer
  - systemctl disable apt-daily.service
  - systemctl daemon-reload
  - mkdir -p /s3bucket/input
  - mkdir -p /s3bucket/output
  - mkdir -p /s3bucket/user
  - pip3 install -U awscli
  - aws s3 cp '{s3_input}' /s3bucket/input --recursive
  - aws s3 cp '{s3_user}' /s3bucket/user --recursive
  - docker run {docker_options} -v /s3bucket/input:/input -v /s3bucket/output:/output -v /s3bucket/user:/custom --rm {image} {params} -i /input -o /output -u /custom -s skip -Xrun_mode=aws.docker {extra_params}
  - aws s3 cp /s3bucket/output '{s3_output}' --recursive
  - rm -f /var/lib/cloud/instances/*/sem/config_scripts_user

final_message: "AutoML benchmark (docker) {ikey} completed after $UPTIME s"

power_state:
  delay: "+1"
  mode: poweroff
  message: "I'm losing power"
  timeout: {timeout}
  condition: True
""" if rconfig().aws.use_docker else """
#cloud-config

package_update: true
package_upgrade: false
packages:
  - curl
  - wget
  - unzip
  - git
  - python3
  - python3-pip
  - python3-venv

runcmd:
  - apt-get -y remove unattended-upgrades
  - systemctl stop apt-daily.timer
  - systemctl disable apt-daily.timer
  - systemctl disable apt-daily.service
  - systemctl daemon-reload
  - pip3 install -U awscli
  - python3 -m venv /venvs/bench
  - alias PIP='/venvs/bench/bin/pip3'
  - alias PY='/venvs/bench/bin/python3 -W ignore'
  - alias PIP_REQ='xargs -L 1 /venvs/bench/bin/pip3 install --no-cache-dir'
  - mkdir -p /s3bucket/input
  - mkdir -p /s3bucket/output
  - mkdir -p /s3bucket/user
  - mkdir /repo
  - cd /repo
  - git clone --depth 1 --single-branch --branch {branch} {repo} .
  - PIP install -U pip=={pip_version}
  - PIP_REQ < requirements.txt
#  - until aws s3 ls '{s3_base_url}'; do echo "waiting for credentials"; sleep 10; done
  - aws s3 cp '{s3_input}' /s3bucket/input --recursive
  - aws s3 cp '{s3_user}' /s3bucket/user --recursive
  - PY {script} {params} -i /s3bucket/input -o /s3bucket/output -u /s3bucket/user -s only --session=
  - PY {script} {params} -i /s3bucket/input -o /s3bucket/output -u /s3bucket/user -Xrun_mode=aws -Xproject_repository={repo}#{branch} {extra_params}
  - aws s3 cp /s3bucket/output '{s3_output}' --recursive
  - rm -f /var/lib/cloud/instances/*/sem/config_scripts_user

final_message: "AutoML benchmark {ikey} completed after $UPTIME s"

power_state:
  delay: "+1"
  mode: poweroff
  message: "I'm losing power"
  timeout: {timeout}
  condition: True
"""
        return cloud_config.format(
            repo=rget().project_info.repo,
            branch=rget().project_info.branch,
            image=rconfig().docker.image
            or DockerBenchmark.docker_image_name(self.framework_def),
            pip_version=rconfig().versions.pip,
            s3_base_url=self._s3_session(absolute=True, encode=True),
            s3_user=self._s3_user(absolute=True, encode=True),
            s3_input=self._s3_input(absolute=True, encode=True),
            s3_output=self._s3_output(instance_key, absolute=True,
                                      encode=True),
            script=rconfig().script,
            ikey=instance_key,
            params=script_params,
            extra_params=script_extra_params,
            docker_options=rconfig().docker.run_extra_options,
            timeout=timeout_secs
            if timeout_secs > 0 else rconfig().aws.max_timeout_seconds,
        )

    def _ec2_startup_script_bash(self,
                                 instance_key,
                                 script_params="",
                                 timeout_secs=-1):
        """
        Backup UserData version if the cloud-config version doesn't work as expected.

        Generates the UserData is bash format for the EC2 instance:
        this script is automatically executed by the instance at the end of its boot process.
        TODO: current version doesn't handle errors at all, that's why the cloud-config version is currently preferred.
        :param instance_key: the unique local identifier for the instance.
            This is different from EC2 instance id as we don't know it yet.
            Mainly used to put output files to dedicated key on s3.
        :param script_params: the custom params passed to the benchmark script, usually only task, fold params
        :return: the UserData for the new ec2 instance
        """
        script_extra_params = "--session="
        return """#!/bin/bash
apt-get update
#apt-get -y upgrade
apt-get -y install curl wget unzip git
apt-get -y install python3 python3-pip python3-venv
#apt-get -y install docker.io

pip3 install -U awscli
python3 -m venv /venvs/bench
alias PIP='/venvs/bench/bin/pip3'
alias PY='/venvs/bench/bin/python3 -W ignore'

mkdir -p /s3bucket/input
mkdir -p /s3bucket/output
mkdir -p /s3bucket/user
mkdir ~/repo
cd ~/repo
git clone --depth 1 --single-branch --branch {branch} {repo} .

PIP install -U pip=={pip_version}
xargs -L 1 PIP install --no-cache-dir < requirements.txt
PIP install -U awscli

aws s3 cp '{s3_input}' /s3bucket/input --recursive
aws s3 cp '{s3_user}' /s3bucket/user --recursive
PY {script} {params} -i /s3bucket/input -o /s3bucket/output -u /s3bucket/user -s only --session=
PY {script} {params} -i /s3bucket/input -o /s3bucket/output -u /s3bucket/user -Xrun_mode=aws -Xproject_repository={repo}#{branch} {extra_params}
aws s3 cp /s3bucket/output '{s3_output}' --recursive
rm -f /var/lib/cloud/instances/*/sem/config_scripts_user
shutdown -P +1 "I'm losing power"
""".format(
            repo=rget().project_info.repo,
            branch=rget().project_info.branch,
            pip_version=rconfig().versions.pip,
            s3_base_url=self._s3_session(absolute=True, encode=True),
            s3_user=self._s3_user(absolute=True, encode=True),
            s3_input=self._s3_input(absolute=True, encode=True),
            s3_output=self._s3_output(instance_key, absolute=True,
                                      encode=True),
            script=rconfig().script,
            ikey=instance_key,
            params=script_params,
            extra_params=script_extra_params,
        )
Пример #32
0
def do_work(named_proc, resolver, rndc_cmd, kill_method, n_workers, n_queries):
    """Creates a number of A queries to run in parallel
       in order simulate a slightly more realistic test scenario.

       The main idea of this function is to create and send a bunch
       of A queries to a target named instance and during this process
       a request for shutting down named will be issued.

       In the process of shutting down named, a couple control connections
       are created (by launching rndc) to ensure that the crash was fixed.

       if kill_method=="rndc" named will be asked to shutdown by
       means of rndc stop.
       if kill_method=="sigterm" named will be killed by SIGTERM on
       POSIX systems or by TerminateProcess() on Windows systems.

       :param named_proc: named process instance
       :type named_proc: subprocess.Popen

       :param resolver: target resolver
       :type resolver: dns.resolver.Resolver

       :param rndc_cmd: rndc command with default arguments
       :type rndc_cmd: list of strings, e.g. ["rndc", "-p", "23750"]

       :kill_method: "rndc" or "sigterm"
       :type kill_method: str

       :param n_workers: Number of worker threads to create
       :type n_workers: int

       :param n_queries: Total number of queries to send
       :type n_queries: int
    """

    # pylint: disable-msg=too-many-arguments
    # pylint: disable-msg=too-many-locals

    # helper function, args must be a list or tuple with arguments to rndc.
    def launch_rndc(args):
        return subprocess.call(rndc_cmd + args, timeout=10)

    # We're going to execute queries in parallel by means of a thread pool.
    # dnspython functions block, so we need to circunvent that.
    executor = ThreadPoolExecutor(n_workers + 1)

    # Helper dict, where keys=Future objects and values are tags used
    # to process results later.
    futures = {}

    # 50% of work will be A queries.
    # 1 work will be rndc stop.
    # Remaining work will be rndc status (so we test parallel control
    #  connections that were crashing named).
    shutdown = True
    for i in range(n_queries):
        if i < (n_queries // 2):
            # Half work will be standard A queries.
            # Among those we split 50% queries relname='www',
            # 50% queries relname=random characters
            if random.randrange(2) == 1:
                tag = "good"
                relname = "www"
            else:
                tag = "bad"
                length = random.randint(4, 10)
                relname = "".join(letters[random.randrange(len(letters))]
                                  for i in range(length))

            qname = relname + ".test"
            futures[executor.submit(resolver.query, qname, 'A')] = tag
        elif shutdown:  # We attempt to stop named in the middle
            shutdown = False
            if kill_method == "rndc":
                futures[executor.submit(launch_rndc, ['stop'])] = 'stop'
            else:
                futures[executor.submit(named_proc.terminate)] = 'kill'

        else:
            # We attempt to send couple rndc commands while named is
            # being shutdown
            futures[executor.submit(launch_rndc, ['status'])] = 'status'

    ret_code = -1
    for future in as_completed(futures):
        try:
            result = future.result()
            # If tag is "stop", result is an instance of
            # subprocess.CompletedProcess, then we check returncode
            # attribute to know if rncd stop command finished successfully.
            #
            # if tag is "kill" then the main function will check if
            # named process exited gracefully after SIGTERM signal.
            if futures[future] == "stop":
                ret_code = result

        except (dns.resolver.NXDOMAIN, dns.exception.Timeout):
            pass

    if kill_method == "rndc":
        assert ret_code == 0

    executor.shutdown()
Пример #33
0
class KinesisProducer:
    """Basic Kinesis Producer.

    Parameters
    ----------
    stream_name : string
        Name of the stream to send the records.
    batch_size : int
        Numbers of records to batch before flushing the queue.
    batch_time : int
        Maximum of seconds to wait before flushing the queue.
    max_retries: int
        Maximum number of times to retry the put operation.
    kinesis_client: boto3.client
        Kinesis client.

    Attributes
    ----------
    records : array
        Queue of formated records.
    pool: concurrent.futures.ThreadPoolExecutor
        Pool of threads handling client I/O.
    """
    def __init__(self,
                 stream_name,
                 batch_size=500,
                 batch_time=5,
                 max_retries=5,
                 threads=10,
                 kinesis_client=boto3.client('kinesis')):
        self.stream_name = stream_name
        self.queue = Queue()
        self.batch_size = batch_size
        self.batch_time = batch_time
        self.max_retries = max_retries
        self.kinesis_client = kinesis_client
        self.pool = ThreadPoolExecutor(threads)
        self.last_flush = time.time()
        self.monitor_running = threading.Event()
        self.monitor_running.set()
        self.pool.submit(self.monitor)

    def monitor(self):
        """Flushes the queue periodically."""
        while self.monitor_running.is_set():
            if time.time() - self.last_flush > self.batch_time:
                if not self.queue.empty():
                    logger.info(
                        "Flushing the queue (time without flush exceeded)")
                    self.flush_queue()
                    time.sleep(self.batch_time)

    def put_record(self, data, partition_key=None):
        """Add data to the record queue in the proper format.

        Parameters
        ----------
        data : str
            Data to send.
        partition_key: str
            Hash that determines which shard a given data record belongs to.

        """
        # Byte encode the data
        data = encode_data(data)

        # Create a random partition key if not provided
        if not partition_key:
            partition_key = uuid.uuid4().hex

        # Build the record
        record = {'Data': data, 'PartitionKey': partition_key}

        # Flush the queue if it reaches the batch size
        if self.queue.qsize() >= self.batch_size:
            logger.info("Flushing the queue (batch size reached)")
            self.pool.submit(self.flush_queue)

        # Append the record
        logger.info('Putting record "{}"'.format(record['Data'][:100]))
        self.queue.put(record)

    def close(self):
        """Flushes the queue and waits for the executor to finish."""
        self.flush_queue()
        self.monitor_running.clear()
        self.pool.shutdown()

    def flush_queue(self):
        """Grab all the current records in the queue and send them."""
        records = []

        while not self.queue.empty() and len(records) < self.batch_size:
            records.append(self.queue.get())

        if records:
            self.send_records(records)
            self.last_flush = time.time()

    def send_records(self, records, attempt=0):
        """Send records to the Kinesis stream.

        Falied records are sent again with an exponential backoff decay.

        Parameters
        ----------
        records : array
            Array of formated records to send.
        attempt: int
            Number of times the records have been sent without success.
        """

        # If we already tried more times than we wanted, save to a file
        if attempt > self.max_retries:
            logger.warning('Writing {} records to file'.format(len(records)))
            with open('failed_records.dlq', 'ab') as f:
                for r in records:
                    f.write(r.get('Data'))
            return

        # Sleep before retrying
        if attempt:
            time.sleep(2**attempt * .1)

        response = self.kinesis_client.put_records(StreamName=self.stream_name,
                                                   Records=records)
        failed_record_count = response['FailedRecordCount']

        # Grab failed records
        if failed_record_count:
            logger.warning('Retrying failed records')
            failed_records = []
            for i, record in enumerate(response['Records']):
                if record.get('ErrorCode'):
                    failed_records.append(records[i])

            # Recursive call
            attempt += 1
            self.send_records(failed_records, attempt=attempt)
Пример #34
0
#     name = name*2
#     time.sleep(2)
#     print("当前线程是%s,结果是%s" % (threading.current_thread(), name))

# print('thread %s is running...' % threading.current_thread())
# for i in range(1,5):
#     t = threading.Thread(target=work,args=(i,))
#     t.start()
#
# print('ok')
# t.join()
#
# print("=====================")
#
#
# t1 = threading.Thread(target=work,args=(11,))
# t2 = threading.Thread(target=work,args=(22,))
# t3 = threading.Thread(target=work,args=(22,))
# t1.start()
# t2.start()
# t3.start()
# t1.join()
# t2.join()
# t3.join()

print("++++++++++++++++++++++++")
th = ThreadPoolExecutor(max_workers=3)
for s in range(1, 10):
    th.submit(work, s)
th.shutdown()
print('````````````````````````````')
Пример #35
0
from get_mmf_url import mmf_url_list
from get_fund_info import get_info
import csv
from concurrent.futures import ThreadPoolExecutor

if __name__ == '__main__':
    executor = ThreadPoolExecutor(max_workers=250)
    with open('data.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerow([
            'id', 'size', 'annualized', 'prop_dt', 'prop_large', 'prop_retail',
            'prop_insider', 'share'
        ])
        for num, url in enumerate(mmf_url_list[2800:]):
            print(num, url)
            row = executor.submit(get_info, url)
            if row.result() != None:
                writer.writerow(row.result())
        executor.shutdown()
Пример #36
0
    class SseResultDownloader:
        def __init__(self, web_interface, result_poll_interval):
            logging.debug("Server-Send Events are used to get state of runs.")

            self._web_interface = web_interface
            self._run_finished_url = web_interface._web_interface_url + "runs/finished"
            self._result_poll_interval = result_poll_interval
            self._sse_client = None
            self._shutdown = False
            self._new_runs = False
            self._state_receive_executor = ThreadPoolExecutor(max_workers=1)

        def _log_future_exception_and_fallback(self, result):
            e = result.exception()
            if e is not None:
                if (self._shutdown and isinstance(e, AttributeError) and str(e)
                        == "'NoneType' object has no attribute 'read'"):
                    # This is harmless, it occurs because SSEClient reads on closed connection.
                    logging.debug('Error during result processing:',
                                  exc_info=True)
                else:
                    logging.warning('Error during result processing:',
                                    exc_info=True)

                if not self._shutdown:
                    self._fall_back()

        def _should_reconnect(self, error):
            if self._new_runs:
                return False
            elif type(error) == HTTPError and error.response is not None \
                    and error.response.status >= 400 and error.response.status < 500 :
                logging.debug("Exception in SSE connection: %s", error)
                return False
            else:

                return True

        def _start_sse_connection(self):

            while (self._new_runs):
                run_ids = set(self._web_interface._unfinished_runs.keys())
                self._new_runs = False

                # nothing to do
                if len(run_ids) == 0:
                    return

                params = []
                for run_id in run_ids:
                    params.append(("run", run_id))

                headers = {"Accept-Encoding": "UTF-8"}

                logging.debug("Creating Server-Send Event connection.")
                try:
                    self._sse_client = ShouldReconnectSeeClient(
                        self._run_finished_url,
                        self._should_reconnect,
                        session=self._web_interface._connection,
                        headers=headers,
                        data=params)

                except Exception as e:
                    logging.warning("Creating SSE connection failed: %s", e)
                    self._fall_back()
                    return

                for message in self._sse_client:
                    data = message.data
                    tokens = data.split(" ")
                    if len(tokens) == 2:
                        run_id = tokens[0]
                        state = tokens[1]

                        if state == "FINISHED":
                            if run_id in run_ids:
                                logging.debug('Run %s finished.', run_id)
                                self._web_interface._download_result_async(
                                    run_id)

                        elif state == "UNKNOWN":
                            logging.debug(
                                'Run %s is not known by the webclient, trying to get the result.',
                                run_id)
                            self._web_interface._download_async(run_id)

                        elif state == "ERROR":
                            self._web_interface._run_failed(run_id)

                        else:
                            logging.warning(
                                'Received unknown run state %s for run %s.',
                                state, run_id)

                        run_ids.discard(run_id)
                        if self._shutdown or self._new_runs or len(
                                run_ids) == 0:
                            break

                    else:
                        logging.warning("Received invalid message %s", data)

            self._sse_client = None

            # fall back to polling if Server-Send Event based approach failed
            if len(run_ids) != 0 and not self._shutdown:
                self._fall_back()

        def _fall_back(self):
            logging.info("Fall back to polling.")
            self._web_interface._result_downloader = PollingResultDownloader(
                self._web_interface, self._result_poll_interval)
            self._web_interface._result_downloader.start()
            self.shutdown(wait=False)

        def start(self):
            self._new_runs = True
            if self._sse_client:
                self._sse_client.resp.close()
            else:
                future = self._state_receive_executor.submit(
                    self._start_sse_connection)
                future.add_done_callback(
                    self._log_future_exception_and_fallback)

        def shutdown(self, wait=True):
            self._shutdown = True
            if self._sse_client:
                self._sse_client.resp.close()
            self._state_receive_executor.shutdown(wait=wait)
    dt_start = datetime.datetime.now()
    print(dt_start)
    print("start \n")

    remaining_time = mklist_list[0] * mklist_list[1]
    print("making a device list,  please wait about", remaining_time, "second")
    executor = ThreadPoolExecutor(max_workers=2)  #並列処理のためのスレッドを作成
    addr_list_info = []  #デバイススキャン側のスレッド情報を格納するList

    addr_list = executor.submit(Search_btAddr, mklist_list[0], mklist_list[1])
    addr_list_info.append(addr_list)  #デバイススキャンの情報をListに追加

    executor.submit(remaining_timer, addr_list.running(), remaining_time)

    executor.shutdown()  #スレッドを閉じる

    print(addr_list.result(), "\n")

    try:
        data_list = [
            addr_list.result(),
        ]  #スレッドで実行した結果を格納
        pprint.pprint(data_list)
        print()

        ipaddr = IPget()
        #print(ipaddr,"\n")
        dt_now = datetime.datetime.now()
        name = str(dt_now.year) + "_" + str(dt_now.month) + str(
            dt_now.day) + "_" + str(dt_now.hour) + str(
def streaming_session(enabled_sensors=None,
                      run_signal=None,
                      reads_queue=None,
                      path=None):
    """
    This method reads data from a group of Shimmer devices.

    :param path: location where to save .csv files with data
    :param enabled_sensors: list of enabled sensors of the stream
    :param run_signal: signal to handle thread end
    :param reads_queue: queue to communicate with PlotsScreen
    :return:
    """

    # Thread pool for CSV writing
    pool = ThreadPoolExecutor(10)  # 5 types of Shimmer * 2

    master_timestamp = -1  # timestamp RTC of the first sensor's first packet (for synchronization purposes)
    shifts = {}
    headers = {}

    # Start Streaming to all sensors
    for sensor in enabled_sensors:
        # Retrieve the Header
        headers[sensor.shimmer_type] = sensor.data_packet_header()
        # in that context we will edit the packet, so let's tune the fieldnames of the Header
        headers[sensor.shimmer_type] = headers[
            sensor.shimmer_type][3:len(headers[sensor.shimmer_type])]
        headers[sensor.shimmer_type].insert(0, "Synced RTC Timestamp")
        # Creating CSV files and printing header and sampling_rate
        filename = path + "/" + sensor.shimmer_type + '.csv'
        with open(filename, 'w') as csv_file:
            writer = csv.DictWriter(csv_file,
                                    fieldnames=headers[sensor.shimmer_type])
            writer.writeheader()
            writer.writerow(
                {headers[sensor.shimmer_type][0]: sensor.sampling_rate})

        # start the streaming
        sensor.start_bt_streaming()

    for sensor in enabled_sensors:
        # Let's compute SHIFTS (time difference) between master sensor and the other (for synchronization purposes)
        data = sensor.read_data_packet_bt(calibrated=True)
        # If the master timestamp isn't set yet
        if master_timestamp == -1:
            master_timestamp = data[1]
        if sensor.shimmer_type not in shifts.keys():
            shifts[sensor.shimmer_type] = data[2] - master_timestamp

    # Let's loop while run_signal is set. It will be unset when user will close the plot window
    while run_signal.is_set():
        read = {}
        for sensor in enabled_sensors:
            # Read a group of available packets
            n_of_packets, packets = sensor.read_data_packet_extended(
                calibrated=True)
            if n_of_packets > 0:
                # For the plot we are interested only at the last packet of the group
                packet = packets[-1].copy()
                # Let's synchronize the timestamp
                synced_timestamp = packet[2] - shifts[sensor.shimmer_type]
                # Engineering the packet
                packet.pop(0)  # removing local timestamp
                packet.pop(0)  # removing timestamp rtc of the first packet
                packet.pop(
                    0
                )  # removing timestamp rtc of the current packet (not synched)
                packet.insert(0, synced_timestamp)
                # Put the packet into 'read' that's a dict that will go into the queue
                read[sensor.shimmer_type] = packet

                # Send all packets to CSV writer for .csv writing
                pool.submit(csv_writer.csv_writer, sensor.shimmer_type,
                            headers[sensor.shimmer_type],
                            shifts[sensor.shimmer_type], packets, path)
            else:
                read[sensor.shimmer_type] = None

        # Check if is all empty
        all_empty = True
        for key, value in read.items():
            if value:
                all_empty = False

        if not all_empty:
            # This line make sure that in the queue there is only one element with
            # the latest reads
            with reads_queue.mutex:
                reads_queue.queue.clear()
            reads_queue.put(read)

    # Stop streaming
    for sensor in enabled_sensors:
        sensor.stop_bt_streaming()
    pool.shutdown(wait=True)
Пример #39
0
    sub_task_obj.result = task_result
    sub_task_obj.save()


if __name__ == '__main__':
    base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
    sys.path.append(base_dir)
    os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fortress.settings")
    import django
    django.setup()
    from web import models
    if len(sys.argv) == 1:
        exit('task id not provided.')
    else:
        task_id = sys.argv[1]  #task_obj_id
        task_obj = models.Task.objects.get(id=task_id)
        print('task runner', type(task_obj), task_obj)

    pool = ThreadPoolExecutor(10)
    if task_obj.task_type == 'cmd':
        for sub_task_obj in task_obj.tasklogdetail_set.all():
            # pool = ThreadPoolExecutor(10)
            pool.submit(ssh_cmd, sub_task_obj)
    else:
        task_data = json.loads(task_obj.content)
        print('-->task_data', type(task_data), task_data)
        for sub_task_obj in task_obj.tasklogdetail_set.all():
            pool.submit(file_transfer, sub_task_obj, task_data)

    pool.shutdown(wait=True)
Пример #40
0
        # urlretrieve(imagelink, imagepathname)


print('开始时间:%s' % datetime.now())
for item in urls:
    # if item== urls[0]:
    #     continue
    page = 1
    url = 'https://unsplash.com/napi/topics/%s/photos?page=%d' % (item, page)
    print("目录: " + item)
    if page == 1:
        itemdir = os.path.join('.', imagedownpath)
        checkdestfile.checkdestdir(item, itemdir)
        jsondata, end = getjsondata(url, isfirst=True)
        if not jsondata == None and not end == None:
            if item == urls[0]:
                end = int(('%d' % (end))[2:])
                downimage(jsondata)
            page = page + 1
            # break
            while int(page) <= int(end) and int(page) <= 30:
                url = 'https://unsplash.com/napi/topics/%s/photos?page=%d' % (
                    item, page)
                # print(url)
                jsondata = getjsondata(url)
                if not json == None:
                    downimage(jsondata)
                page = page + 1
threadPool.shutdown(wait=True)
print('结束时间:%s' % datetime.now())
Пример #41
0
class Kernel(object):
    def __init__(self,
                 environment,
                 bundles,
                 configuration_file="config/config.yml",
                 parameters_file="config/parameters.yml"):
        self.logger = None
        self.configuration_file = configuration_file
        self.parameters_file = parameters_file
        self.bundles = bundles
        self.environment = environment
        self.log_handlers = []
        self.is_shutdown = False
        max_workers = cpu_count() * 5
        self.thread_pool = ThreadPoolExecutor(max_workers=max_workers)
        self.running_services = []
        self.mediator = Mediator()

        try:
            self.configuration = self.load_configuration(environment)

            # Subscribe bundle events
            for bundle in self.bundles:
                if hasattr(bundle, 'event_listeners'):
                    for event_type, listener in bundle.event_listeners:
                        self.mediator.add_listener(event=event_type,
                                                   listener=listener)

                if hasattr(bundle, 'event_subscribers'):
                    for subscriber in bundle.event_subscribers:
                        self.mediator.add_subscriber(subscriber=subscriber)

            # Injection provided by the base system
            injection_bindings = {
                Kernel: self,
                Configuration: self.configuration,
                EventManager: self.mediator
            }
            self.mediator.dispatch(ConfigurationReadyEvent(self.configuration))
            # Injection from other bundles
            for bundle in self.bundles:
                if hasattr(bundle, 'injection_bindings'):
                    injection_bindings.update(bundle.injection_bindings)

            # Set this kernel and configuration available for injection
            def my_config(binder):
                for key, value in injection_bindings.items():
                    binder.bind(key, value)

            inject.configure(my_config)
            self.mediator.dispatch(InjectorReadyEvent())

            for bundle in self.bundles:
                if hasattr(bundle, 'log_handlers'):
                    self.log_handlers += bundle.log_handlers
        except Exception as e:
            logging.exception(e)
            raise e

        self.configure_logger(environment=environment)
        self.register_signals()
        logging.info("Kernel Ready")
        self.mediator.dispatch(KernelReadyEvent())

    def __enter__(self):
        return self

    def __exit__(self, exc_type, exc_value, traceback):
        self.shutdown()

    def register_signals(self):
        signal.signal(signal.SIGINT, self._signal_handler)
        signal.signal(signal.SIGTERM, self._signal_handler)

    def _signal_handler(self, signal, frame):
        self.shutdown()

    def run_service(self, service_function, *args):
        self.running_services.append(
            self.thread_pool.submit(service_function, *args))

    def configure_logger(self, environment):
        log_level = logging.INFO
        if environment == Environments.DEVELOPMENT:
            # Console output
            ch = logging.StreamHandler()
            ch.setLevel(logging.DEBUG)
            formatter = logging.Formatter(
                '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
            ch.setFormatter(formatter)
            self.log_handlers.append(ch)
            log_level = logging.INFO

        logging.basicConfig(level=log_level, handlers=self.log_handlers)
        logging.info("Logger ready")

    def load_configuration(self, environment):
        config_mappings = [
            bundle.config_mapping for bundle in self.bundles
            if hasattr(bundle, "config_mapping")
        ]
        config = None
        if len(config_mappings) > 0:
            c = YmlLoader()
            config = c.load_config(self.configuration_file,
                                   self.parameters_file)
            try:
                config = c.build_config(config, config_mappings)
            except (NoValueException, NodeIsNotConfiguredException,
                    IgnoredFieldException) as ex:
                print("Configuration error: " + str(ex))
                exit()
        return config

    def shutdown(self):
        if not self.is_shutdown:
            self.is_shutdown = True
            logging.info("Kernel shutting down")
            self.mediator.dispatch(KernelShutdownEvent())
            self.thread_pool.shutdown()
            logging.info("Kernel shutdown")

    def wait(self):
        """Wait for all services to finish"""
        for i in self.running_services:
            i.result()
Пример #42
0
class Crawler:
    def __init__(self, starting_url, ending_url):
        self.ending_url = ending_url
        self.queue = deque()
        self.queue.append(starting_url)
        self.queue.append('level')
        self.session = requests.Session()
        self.pool = ThreadPoolExecutor(max_workers=10)

    def crawl(self, depth):
        seen = set()
        level = 0
        t1 = time()
        links_crawled = 0

        while True:
            url = self.queue.popleft()
            if level > depth:
                return False

            print(url, level)
            if url == self.ending_url:
                return True

            if url == 'level':
                level += 1
                self.queue.append('level')
                self.pool.shutdown(wait=True)
                self.pool = ThreadPoolExecutor(max_workers=10)

                continue

            # Check if already visited
            if url in seen:
                continue

            self.pool.submit(self.get_wiki_links, url)
            # self.get_wiki_links(url=url)

            links_crawled += 1

            if links_crawled % 100 == 0:
                print(f'for the {links_crawled} required {time() - t1}')

        return

    def get_wiki_links(self, url: str):
        """
        Given a wiki url, put all the wiki sun links in the wue
        :param url:
        :return:
        """
        prefix = '/wiki'
        page = self.session.get('https://en.wikipedia.org' + url)
        soup = BeautifulSoup(page.text, 'lxml', parse_only=SoupStrainer('a'))

        for link in soup.find_all(['a'], href=True):
            wiki_link = link.get('href')
            if ':' in wiki_link or '.png' in wiki_link:
                continue
            if wiki_link.startswith(prefix):
                self.queue.append(wiki_link)

        return
Пример #43
0
class Saver:
    def __init__(self,
                 fallback_filename=None,
                 exclude_matches=None,
                 variable_transforms=None,
                 save_dtype=None,
                 target_model_init_from_base_model=False):
        self.variable_transforms = variable_transforms or []
        self.exclude_matches = exclude_matches
        self.variables = None
        self.save_dtype = save_dtype
        if fallback_filename is not None:
            self.set_fallback(fallback_filename)
        self.target_model_init_from_base_model = target_model_init_from_base_model

    def set_fallback(self, fallback_filename):
        self.tpe = ThreadPoolExecutor()
        if not os.path.exists(fallback_filename):
            raise FileNotFoundError(
                "Error loading base model - file not found.")
        self.fallback_filename = fallback_filename
        self.fallback_future = self.tpe.submit(joblib.load, fallback_filename)
        self.fallback_ = None

    @property
    def fallback(self):
        if self.fallback_ is None:
            self.fallback_ = self.fallback_future.result()
            self.fallback_future = None
            self.tpe.shutdown()
            if self.target_model_init_from_base_model:
                if self.variables is None:
                    self.variables = dict()
                for k, v in self.fallback_.items():
                    self.variables['model/target/' + k] = v

        return self.fallback_

    def get_saver_hook(self, estimator, keep_best_model, steps_per_epoch,
                       early_stopping_steps, eval_frequency,
                       cache_weights_to_file):
        return SaverHook(self,
                         estimator=estimator,
                         keep_best_model=keep_best_model,
                         steps_per_epoch=steps_per_epoch,
                         early_stopping_steps=early_stopping_steps,
                         eval_frequency=eval_frequency,
                         cache_weights_to_file=cache_weights_to_file)

    def save(self, finetune_obj, path, mkdir=True):
        if self.variables is None:
            raise FinetuneError("Cowardly refusing to save default model.")
        if self.exclude_matches is not None:
            variables = {
                k: v
                for k, v in self.variables.items()
                if self.exclude_matches not in k
            }
        else:
            variables = self.variables

        names, values = variables.keys(), variables.values()
        if isinstance(path, str):
            folder = os.path.dirname(path)
            os.makedirs(folder, exist_ok=True)
        if self.save_dtype is not None:
            LOGGER.info("Saving with {} precision.".format(
                self.save_dtype.__name__))
            values = [a.astype(self.save_dtype) for a in values]

        var_names_reduced, vals_reduced = self.remove_unchanged(
            names, values, self.fallback)
        var_dict = dict(zip(var_names_reduced, vals_reduced))
        assert len(vals_reduced) == len(var_names_reduced) == len(var_dict)
        joblib.dump((var_dict, finetune_obj), path)

    def load(self, path):
        self.variables, finetune_obj = joblib.load(path)
        finetune_obj.config = get_config(error_on_invalid_keywords=False,
                                         **dict(finetune_obj.config))
        return finetune_obj

    def get_scaffold_init_fn(self):
        def init_fn(scaffold,
                    session,
                    model_portion=None,
                    refresh_base_model=False):
            var_loader = BatchedVarLoad()
            self.var_val = []

            if self.variables is not None:
                variables_sv = self.variables
            else:
                variables_sv = dict()
            all_vars = tf.global_variables()

            zero_out_adapters = False
            if (model_portion != "entire_model"
                ):  # we must be loading in the case of two separate estimators
                all_vars, zero_out_adapters = self.subset_to_load(
                    model_portion, refresh_base_model, all_vars)

            global_step_var = tf.train.get_global_step()

            for var in all_vars:
                if global_step_var is not None and global_step_var.name == var.name:
                    continue
                name = var.name
                saved_var = None
                if name in variables_sv.keys():
                    saved_var = variables_sv[name]
                elif name in self.fallback.keys():
                    saved_var = self.fallback[name]
                elif (self.target_model_init_from_base_model
                      and name.startswith("model/target/") and name.replace(
                          "model/target/", "") in self.fallback.keys()):
                    saved_var = self.fallback[name.replace(
                        "model/target/", "")]

                if zero_out_adapters and "adapter" in name:
                    var_loader.add(var, np.zeros(var.get_shape().as_list()))
                if saved_var is not None:
                    for func in self.variable_transforms:
                        saved_var = func(name, saved_var)
                    var_loader.add(var, saved_var)
            var_loader.run(session)

        return init_fn

    def subset_to_load(self, model_portion, refresh_base_model, all_vars):
        assert model_portion in [
            "featurizer",
            "target",
            "whole_featurizer",
        ], "Must be using separate estimators if loading before graph creation"
        base = [v for v in all_vars if "target" not in v.name]
        zero_out_adapters = False
        if (
                model_portion == "whole_featurizer"
        ):  # load every weight in featurizer - used to initialize and for loading without adapters
            to_load = base
            adapters = [v for v in base if "adapter" in v.name]
            zero_out_adapters = True
        elif (model_portion == "featurizer"
              ):  # update featurizer, loading adapters and scaling weights
            norm_variable_scopes = ["b:0", "g:0", "beta:0", "gamma:0"]
            to_load = (base if refresh_base_model else [
                v for v in base
                if "target" not in v.name and ("adapter" in v.name or any(
                    scope in v.name for scope in norm_variable_scopes))
            ])
        elif model_portion == "target":  # update target model weights
            to_load = [v for v in all_vars if "target" in v.name]
        return to_load, zero_out_adapters

    def remove_unchanged(self, variable_names, variable_values, fallback_vars):
        skips = []
        for var_val, var_name in zip(variable_values, variable_names):
            skip = False
            for fb_var_name, fb_var in fallback_vars.items():
                if fb_var_name == var_name:
                    for func in self.variable_transforms:
                        fb_var = func(var_name, fb_var)
                    if np.allclose(fb_var, var_val):
                        skip = True
                        break
            skips.append(skip)
        return (
            [var for skip, var in zip(skips, variable_names) if not skip],
            [
                var_val for skip, var_val in zip(skips, variable_values)
                if not skip
            ],
        )
Пример #44
0
class HttpClient(object):
    """A http based client for submitting Spark-based jobs to a Livy backend.

    Parameters
    ----------
    url_str : string
        Livy server url to create a new session or the url of an existing
        session
    load_defaults : boolean, optional
        This parameter decides if the default config needs to be loaded
        Default is True
    conf_dict : dict, optional
        The key-value pairs in the conf_dict will be loaded to the config
        Default is None

    Examples
    --------
    Imports needed to create an instance of HttpClient
    >>> from livy.client import HttpClient

    1) Creates a client that is loaded with default config
       as 'load_defaults' is True by default
    >>> client = HttpClient("http://example:8998/")

    2) Creates a client that does not load default config, but loads
       config that are passed in 'config_dict'
    >>> config_dict = {'spark.app.name', 'Test App'}
    >>> client = HttpClient("http://example:8998/", load_defaults=False,
    >>>    config_dict=config_dict)

    """

    _CONFIG_SECTION = 'env'
    _LIVY_CLIENT_CONF_DIR = "LIVY_CLIENT_CONF_DIR"

    def __init__(self, url, load_defaults=True, conf_dict=None):
        uri = urlparse(url)
        self._config = ConfigParser()
        self._load_config(load_defaults, conf_dict)
        self._job_type = 'pyspark'
        match = re.match(r'(.*)/sessions/([0-9]+)', uri.path)
        if match:
            base = ParseResult(scheme=uri.scheme,
                               netloc=uri.netloc,
                               path=match.group(1),
                               params=uri.params,
                               query=uri.query,
                               fragment=uri.fragment)
            self._set_uri(base)
            self._conn = _LivyConnection(base, self._config)
            self._session_id = int(match.group(2))
            self._reconnect_to_existing_session()
        else:
            self._set_uri(uri)
            session_conf_dict = dict(self._config.items(self._CONFIG_SECTION))
            self._conn = _LivyConnection(uri, self._config)
            self._session_id = self._create_new_session(
                session_conf_dict).json()['id']
        self._executor = ThreadPoolExecutor(max_workers=1)
        self._stopped = False
        self.lock = threading.Lock()

    def submit(self, job):
        """
        Submits a job for execution to the spark cluster.

        Parameters
        ----------
        job : function
            The function must accept a single parameter, which is an instance
            of JobContext.

        Returns
        -------
        job_handle : an instance of the class JobHandle
            A handle that can be used to monitor the job

        Examples
        -------
        >>> def simple_spark_job(context):
        >>>     elements = [10, 20, 30, 40, 50]
        >>>     return context.sc.parallelize(elements, 2).count()

        >>> client.submit(simple_spark_job)

        """
        return self._send_job('submit-job', job)

    def run(self, job):
        """
        Asks the remote context to run a job immediately.

        Normally, the remote context will queue jobs and execute them based on
        how many worker threads have been configured. This method will run
        the submitted job in the same thread processing the RPC message,
        so that queueing does not apply.

        It's recommended that this method only be used to run code that
        finishes quickly. This avoids interfering with the normal operation
        of the context.

        Parameters
        ----------
        job : function
            The function must accept a single parameter, which is an instance
            of JobContext. Spark jobs can be created with the help of
            JobContext, which exposes the Spark libraries.

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> def simple_job(context):
        >>>     return "hello"

        >>> client.run(simple_job)
        """
        return self._send_job("run-job", job)

    def add_file(self, file_uri):
        """
        Adds a file to the running remote context.

        Note that the URL should be reachable by the Spark driver process. If
        running the driver in cluster mode, it may reside on a different
        host, meaning "file:" URLs have to exist on that node (and not on
        the client machine).

        Parameters
        ----------
        file_uri : string
            String representation of the uri that points to the location
            of the file

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> client.add_file("file:/test_add.txt")

        >>> # Example job using the file added using add_file function
        >>> def add_file_job(context):
        >>>    from pyspark import SparkFiles
        >>>    def func(iterator):
        >>>        with open(SparkFiles.get("test_add.txt")) as testFile:
        >>>        fileVal = int(testFile.readline())
        >>>        return [x * fileVal for x in iterator]
        >>>    return context.sc.parallelize([1, 2, 3, 4])
        >>>        .mapPartitions(func).collect()

        >>> client.submit(add_file_job)
        """
        return self._add_file_or_pyfile_job("add-file", file_uri)

    def add_jar(self, file_uri):
        """
        Adds a jar file to the running remote context.

        Note that the URL should be reachable by the Spark driver process. If
        running the driver  in cluster mode, it may reside on a different host,
        meaning "file:" URLs have to exist on that node (and not on the
        client machine).

        Parameters
        ----------
        file_uri : string
            String representation of the uri that points to the location
            of the file

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> client.add_jar("file:/test_package.jar")

        """
        return self._add_file_or_pyfile_job("add-jar", file_uri)

    def add_pyfile(self, file_uri):
        """
        Adds a .py or .zip to the running remote context.

        Note that the URL should be reachable by the Spark driver process. If
        running the driver  in cluster mode, it may reside on a different host,
        meaning "file:" URLs have to exist on that node (and not on the
        client machine).

        Parameters
        ----------
        file_uri : string
            String representation of the uri that points to the location
            of the file

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> client.add_pyfile("file:/test_package.egg")

        >>> # Example job using the file added using add_pyfile function
        >>> def add_pyfile_job(context):
        >>>    # Importing module from test_package.egg
        >>>    from test.pyfile_test import TestClass
        >>>    test_class = TestClass()
        >>>    return test_class.say_hello()

        >>> client.submit(add_pyfile_job)
        """
        return self._add_file_or_pyfile_job("add-pyfile", file_uri)

    def upload_file(self, file_path):
        """
        Upload a file to be passed to the Spark application.

        Parameters
        ----------
        file_path : string
            File path of the local file to be uploaded.

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> client.upload_file("/test_upload.txt")

        >>> # Example job using the file uploaded using upload_file function
        >>> def upload_file_job(context):
        >>>    from pyspark import SparkFiles
        >>>    def func(iterator):
        >>>        with open(SparkFiles.get("test_upload.txt")) as testFile:
        >>>        fileVal = int(testFile.readline())
        >>>        return [x * fileVal for x in iterator]
        >>>    return context.sc.parallelize([1, 2, 3, 4])
        >>>        .mapPartitions(func).collect()

        >>> client.submit(add_file_job)
        """
        return self._upload_file_or_pyfile("upload-file",
                                           open(file_path, 'rb'))

    def upload_pyfile(self, file_path):
        """
        Upload a .py or .zip dependency to be passed to the Spark application.

        Parameters
        ----------
        file_path : string
            File path of the local file to be uploaded.

        Returns
        -------
        future : concurrent.futures.Future
            A future to monitor the status of the job

        Examples
        -------
        >>> client.upload_pyfile("/test_package.egg")

        >>> # Example job using the file uploaded using upload_pyfile function
        >>> def upload_pyfile_job(context):
        >>>    # Importing module from test_package.egg
        >>>    from test.pyfile_test import TestClass
        >>>    test_class = TestClass()
        >>>    return test_class.say_hello()

        >>> client.submit(upload_pyfile_job)
        """
        return self._upload_file_or_pyfile("upload-pyfile",
                                           open(file_path, 'rb'))

    def stop(self, shutdown_context):
        """
        Stops the remote context.
        The function will return immediately and will not wait for the pending
        jobs to get completed

        Parameters
        ----------
        shutdown_context : Boolean
            Whether to shutdown the underlying Spark context. If false, the
            context will keep running and it's still possible to send commands
            to it, if the backend being used supports it.
        """
        with self.lock:
            if not self._stopped:
                self._executor.shutdown(wait=False)
                try:
                    if shutdown_context:
                        session_uri = "/" + str(self._session_id)
                        headers = {'X-Requested-By': 'livy'}
                        self._conn.send_request("DELETE",
                                                session_uri,
                                                headers=headers)
                except Exception:
                    raise Exception(traceback.format_exc())
                self._stopped = True

    def _set_uri(self, uri):
        if uri is not None and uri.scheme in ('http', 'https'):
            self._config.set(self._CONFIG_SECTION, 'livy.uri', uri.geturl())
        else:
            url_exception = uri.geturl if uri is not None else None
            raise ValueError('Cannot create client - Uri not supported - ',
                             url_exception)

    def _set_conf(self, key, value):
        if value is not None:
            self._config.set(self._CONFIG_SECTION, key, value)
        else:
            self._delete_conf(key)

    def _delete_conf(self, key):
        self._config.remove_option(self._CONFIG_SECTION, key)

    def _set_multiple_conf(self, conf_dict):
        for key, value in conf_dict.items():
            self._set_conf(key, value)

    def _load_config(self, load_defaults, conf_dict):
        self._config.add_section(self._CONFIG_SECTION)
        if load_defaults:
            self._load_default_config()
        if conf_dict is not None and len(conf_dict) > 0:
            self._set_multiple_conf(conf_dict)

    def _load_default_config(self):
        config_dir = os.environ.get(self._LIVY_CLIENT_CONF_DIR)
        if config_dir is not None:
            config_files = os.listdir(config_dir)
            default_conf_files = ['spark-defaults.conf', 'livy-client.conf']
            for default_conf_file in default_conf_files:
                if default_conf_file in config_files:
                    self._load_config_from_file(config_dir, default_conf_file)

    def _load_config_from_file(self, config_dir, config_file):
        path = os.path.join(config_dir, config_file)
        data = "[" + self._CONFIG_SECTION + "]\n" + \
            open(path, encoding='utf-8').read()
        self._config.readfp(StringIO(data))

    def _create_new_session(self, session_conf_dict):
        data = {'kind': 'pyspark', 'conf': session_conf_dict}
        response = self._conn.send_request('POST',
                                           "/",
                                           headers=self._conn._JSON_HEADERS,
                                           data=data)
        return response

    def _reconnect_to_existing_session(self):
        reconnect_uri = "/" + str(self._session_id) + "/connect"
        self._conn.send_request('POST',
                                reconnect_uri,
                                headers=self._conn._JSON_HEADERS)

    def _send_job(self, command, job):
        pickled_job = cloudpickle.dumps(job)
        base64_pickled_job = base64.b64encode(pickled_job).decode('utf-8')
        base64_pickled_job_data = \
            {'job': base64_pickled_job, 'jobType': self._job_type}
        handle = JobHandle(self._conn, self._session_id, self._executor)
        handle._start(command, base64_pickled_job_data)
        return handle

    def _add_file_or_pyfile_job(self, command, file_uri):
        data = {'uri': file_uri}
        suffix_url = "/" + str(self._session_id) + "/" + command
        return self._executor.submit(self._add_or_upload_resource,
                                     suffix_url,
                                     data=data,
                                     headers=self._conn._JSON_HEADERS)

    def _upload_file_or_pyfile(self, command, open_file):
        files = {'file': open_file}
        suffix_url = "/" + str(self._session_id) + "/" + command
        return self._executor.submit(self._add_or_upload_resource,
                                     suffix_url,
                                     files=files)

    def _add_or_upload_resource(self,
                                suffix_url,
                                files=None,
                                data=None,
                                headers=None):
        return self._conn.send_request('POST',
                                       suffix_url,
                                       files=files,
                                       data=data,
                                       headers=headers).content
Пример #45
0
class WebInterface:
    """
    The WebInterface is a executor like class for the submission of runs to the VerifierCloud
    """
    def __init__(self,
                 web_interface_url,
                 user_pwd,
                 svn_branch='trunk',
                 svn_revision='HEAD',
                 thread_count=1,
                 result_poll_interval=2,
                 user_agent=None,
                 version=None):
        """
        Creates a new WebInterface object.
        The given svn revision is resolved (e.g. 'HEAD' -> 17495).
        @param web_interface_url: the base URL of the VerifierCloud's web interface
        @param user_pwd: user name and password in the format '<user_name>:<password>' or none if no authentification is required
        @param svn_branch: the svn branch name or 'trunk', defaults to 'trunk'
        @param svn_revision: the svn revision number or 'HEAD', defaults to 'HEAD'
        @param thread_count: the number of threads for fetching results in parallel
        @param result_poll_interval: the number of seconds to wait between polling results
        """
        if not (1 <= thread_count <= MAX_SUBMISSION_THREADS):
            sys.exit(
                "Invalid number {} of client threads, needs to be between 1 and {}."
                .format(thread_count, MAX_SUBMISSION_THREADS))
        if not 1 <= result_poll_interval:
            sys.exit("Poll interval {} is too small, needs to be at least 1s.".
                     format(result_poll_interval))
        if not web_interface_url[-1] == '/':
            web_interface_url += '/'

        default_headers = {'Connection': 'Keep-Alive'}
        if user_agent:
            default_headers['User-Agent'] = \
                '{}/{} (Python/{} {}/{})'.format(user_agent, version, platform.python_version(), platform.system(), platform.release())

        urllib.parse.urlparse(web_interface_url)  # sanity check
        self._web_interface_url = web_interface_url
        logging.info('Using VerifierCloud at %s', web_interface_url)

        self._connection = requests.Session()
        self._connection.headers.update(default_headers)
        self._connection.verify = '/etc/ssl/certs'
        if user_pwd:
            self._connection.auth = (user_pwd.split(":")[0],
                                     user_pwd.split(":")[1])
            self._base64_user_pwd = base64.b64encode(
                user_pwd.encode("utf-8")).decode("utf-8")
        else:
            self._base64_user_pwd = None

        self._unfinished_runs = {}
        self._unfinished_runs_lock = threading.Lock()
        self._downloading_result_futures = {}
        self._download_attempts = {}
        self.thread_count = thread_count
        self._executor = ThreadPoolExecutor(thread_count)
        self._thread_local = threading.local()
        self._hash_code_cache = {}
        self._group_id = str(random.randint(0, 1000000))
        self._read_hash_code_cache()
        self._resolved_tool_revision(svn_branch, svn_revision)
        self._tool_name = self._request_tool_name()

        try:
            self._result_downloader = SseResultDownloader(
                self, result_poll_interval)
        except:
            self._result_downloader = PollingResultDownloader(
                self, result_poll_interval)

    def _read_hash_code_cache(self):
        if not os.path.isfile(HASH_CODE_CACHE_PATH):
            return

        with open(HASH_CODE_CACHE_PATH, mode='r') as hashCodeCacheFile:
            for line in hashCodeCacheFile:
                tokens = line.strip().split('\t')
                if len(tokens) == 3:
                    self._hash_code_cache[(tokens[0], tokens[1])] = tokens[2]

    def _write_hash_code_cache(self):
        directory = os.path.dirname(HASH_CODE_CACHE_PATH)
        try:
            os.makedirs(directory, exist_ok=True)
            with tempfile.NamedTemporaryFile(dir=directory,
                                             delete=False) as tmpFile:
                for (path, mTime), hashValue in self._hash_code_cache.items():
                    line = (path + '\t' + mTime + '\t' + hashValue +
                            '\n').encode()
                    tmpFile.write(line)

                os.renames(tmpFile.name, HASH_CODE_CACHE_PATH)
        except OSError as e:
            logging.warning("Could not write hash-code cache file to %s: %s",
                            HASH_CODE_CACHE_PATH, e.strerror)

    def _resolved_tool_revision(self, svn_branch, svn_revision):

        path = "tool/version?svnBranch=" + svn_branch + "&revision=" + svn_revision

        (resolved_svn_revision, _) = self._request("GET", path)
        self._svn_branch = svn_branch
        self._svn_revision = resolved_svn_revision.decode("UTF-8")

    def _request_tool_name(self):
        path = "tool/name"
        (tool_name, _) = self._request("GET", path)
        return tool_name.decode("UTF-8")

    def tool_revision(self):
        return self._svn_branch + ':' + self._svn_revision

    def tool_name(self):
        return self._tool_name

    def _get_sha1_hash(self, path):
        path = os.path.abspath(path)
        mTime = str(os.path.getmtime(path))
        if ((path, mTime) in self._hash_code_cache):
            return self._hash_code_cache[(path, mTime)]

        else:
            with open(path, 'rb') as file:
                hashValue = hashlib.sha1(file.read()).hexdigest()
                self._hash_code_cache[(path, mTime)] = hashValue
                return hashValue

    def _create_and_add_run_future(self, run_id):
        result = RunResultFuture(self, run_id)
        with self._unfinished_runs_lock:
            self._unfinished_runs[run_id] = result
        return result

    def submit_witness_validation(self,
                                  witness_path,
                                  program_path,
                                  configuration=None,
                                  user_pwd=None):
        """
        Submits a single witness validation run to the VerifierCloud.
        @note: flush() should be called after the submission of the last run.
        @param witness_path: path to the file containing the witness
        @param program_path: path to the file containing the program
        @param configuration: name of configuration (optional)
        @param user_pwd: overrides the user name and password given in the constructor (optional)
        """

        # collect parameters
        params = {}

        with open(witness_path, 'rb') as witness_file:
            params['errorWitnessText'] = witness_file.read()

        with open(program_path, 'rb') as program_file:
            params['programText'] = program_file.read()

        if configuration:
            params['configuration'] = configuration

        # prepare request
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "Content-Encoding": "deflate",
            "Accept": "text/plain"
        }

        paramsCompressed = zlib.compress(
            urllib.parse.urlencode(params, doseq=True).encode('utf-8'))
        path = "runs/witness_validation/"

        (run_id, _) = self._request("POST",
                                    path,
                                    paramsCompressed,
                                    headers,
                                    user_pwd=user_pwd)

        run_id = run_id.decode("UTF-8")
        logging.debug('Submitted witness validation run with id %s', run_id)

        return self._create_and_add_run_future(run_id)

    def submit(self, run, limits, cpu_model, result_files_pattern=None, meta_information=None, \
               priority='IDLE', user_pwd=None, svn_branch=None, svn_revision=None,
               result_files_patterns=[], required_files=[]):
        """
        Submits a single run to the VerifierCloud.
        @note: flush() should be called after the submission of the last run.
        @param run: The input for the run:  command line options (run.options),
                                            source files (run.sourcefiles),
                                            property file (run.propertyfile),
                                            identifier for error messages (run.identifier)
        @param limits: dict of limitations for the run (memlimit, timelimit, corelimit, softtimelimit)
        @param cpu_model: substring of CPU model to use or 'None' for no restriction
        @param result_files_pattern: the result is filtered with the given glob pattern, '**' is no restriction and None or the empty string do not match any file.
        @param meta_information: meta information about the submitted run as JSON string
        @param priority: the priority of the submitted run, defaults to 'IDLE'
        @param user_pwd: overrides the user name and password given in the constructor (optional)
        @param svn_branch: overrids the svn branch given in the constructor (optional)
        @param svn_revision: overrides the svn revision given in the constructor (optional)
        @param result_files_patterns: list of result_files_pattern (optional)
        @param required_files: list of additional file required to execute the run (optional)
        @raise WebClientError: if the HTTP request could not be created
        @raise HTTPError: if the HTTP request was not successful
        """
        if result_files_pattern:
            if result_files_patterns:
                raise ValueError(
                    "Cannot specify result_files_pattern and result_files_patterns "
                    "at the same time.")
            result_files_patterns = [result_files_pattern]

        return self._submit(run, limits, cpu_model, required_files,
                            result_files_patterns, meta_information, priority,
                            user_pwd, svn_branch, svn_revision)

    def _submit(self,
                run,
                limits,
                cpu_model,
                required_files,
                result_files_patterns,
                meta_information,
                priority,
                user_pwd,
                svn_branch,
                svn_revision,
                counter=0):

        params = []
        opened_files = [
        ]  # open file handles are passed to the request library

        for programPath in run.sourcefiles:
            norm_path = self._normalize_path_for_cloud(programPath)
            params.append(('programTextHash',
                           (norm_path, self._get_sha1_hash(programPath))))

        for required_file in required_files:
            norm_path = self._normalize_path_for_cloud(required_file)
            params.append(
                ('requiredFileHash', (norm_path,
                                      self._get_sha1_hash(required_file))))

        params.append(('svnBranch', svn_branch or self._svn_branch))
        params.append(('revision', svn_revision or self._svn_revision))

        if run.propertyfile:
            file = self._add_file_to_params(params, 'propertyText',
                                            run.propertyfile)
            opened_files.append(file)

        if MEMLIMIT in limits:
            params.append(('memoryLimitation', str(limits[MEMLIMIT])))
        if TIMELIMIT in limits:
            params.append(('timeLimitation', str(limits[TIMELIMIT])))
        if SOFTTIMELIMIT in limits:
            params.append(('softTimeLimitation', str(limits[SOFTTIMELIMIT])))
        if CORELIMIT in limits:
            params.append(('coreLimitation', str(limits[CORELIMIT])))
        if cpu_model:
            params.append(('cpuModel', cpu_model))

        if result_files_patterns:
            for pattern in result_files_patterns:
                params.append(('resultFilesPattern', pattern))
        else:
            params.append(('resultFilesPattern', ''))

        if priority:
            params.append(('priority', priority))

        (invalidOption, files) = self._handle_options(run, params, limits)
        opened_files.extend(files)
        if invalidOption:
            raise WebClientError('Command {0}  contains option "{1}" that is not usable with the webclient. '\
                .format(run.options, invalidOption))

        params.append(('groupId', str(self._group_id)))
        if meta_information:
            params.append(('metaInformation', meta_information))

        # prepare request
        headers = {"Accept": "text/plain"}
        path = "runs/"
        (run_id, statusCode) = self._request("POST", path, files=params, headers=headers, \
                                             expectedStatusCodes=[200, 412], user_pwd=user_pwd)

        for opened_file in opened_files:
            opened_file.close()

        # program files or required files given as hash value are not known by the cloud system
        if statusCode == 412 and counter < 1:
            headers = {
                "Content-Type": "application/octet-stream",
                "Content-Encoding": "deflate"
            }
            filePath = "files/"

            # upload all used program files
            for programPath in run.sourcefiles:
                with open(programPath, 'rb') as programFile:
                    compressedProgramText = zlib.compress(
                        programFile.read(), 9)
                    self._request('POST', filePath, data=compressedProgramText, headers=headers,\
                                   expectedStatusCodes=[200, 204], user_pwd=user_pwd)

            # upload all required files
            for required_file_path in required_files:
                with open(required_file_path, 'rb') as required_file:
                    compressed_required_file = zlib.compress(
                        required_file.read(), 9)
                    self._request('POST', filePath, data=compressed_required_file, headers=headers,\
                                   expectedStatusCodes=[200, 204], user_pwd=user_pwd)

            # retry submission of run
            return self._submit(run, limits, cpu_model, required_files,
                                result_files_patterns, meta_information,
                                priority, user_pwd, svn_branch, svn_revision,
                                counter + 1)

        else:
            run_id = run_id.decode("UTF-8")
            logging.debug('Submitted run with id %s', run_id)
            return self._create_and_add_run_future(run_id)

    def _handle_options(self, run, params, rlimits):
        opened_files = []
        config = None

        # TODO use code from CPAchecker module, it add -stats and sets -timelimit,
        # instead of doing it here manually, too
        if self._tool_name == "CPAchecker":
            params.append(("option", "statistics.print=true"))

            if 'softtimelimit' in rlimits:
                params.append(
                    ("option",
                     "limits.time.cpu=" + str(rlimits['softtimelimit']) + "s"))

        if run.options:
            i = iter(run.options)
            while True:
                try:
                    option = next(i)
                    if len(option) == 0:
                        continue

                    if option == "-heap":
                        params.append(('heap', next(i)))
                    elif option == "-stack":
                        params.append(('stack', next(i)))

                    elif option == "-noout":
                        params.append(("option", "output.disable=true"))
                    elif option == "-outputpath":
                        params.append(("option", "output.path=" + next(i)))
                    elif option == "-logfile":
                        params.append(("option", "log.file=" + next(i)))
                    elif option == "-nolog":
                        params.append(("option", "log.level=OFF"))
                        params.append(("option", "log.consoleLevel=OFF"))
                    elif option == "-stats":
                        # ignore, is always set by this script
                        pass
                    elif option == "-disable-java-assertions":
                        params.append(('disableJavaAssertions', 'true'))
                    elif option == "-java":
                        params.append(("option", "language=JAVA"))
                    elif option == "-32":
                        params.append(
                            ("option", "analysis.machineModel=Linux32"))
                    elif option == "-64":
                        params.append(
                            ("option", "analysis.machineModel=Linux64"))
                    elif option == "-entryfunction":
                        params.append(
                            ("option", "analysis.entryFunction=" + next(i)))
                    elif option == "-timelimit":
                        params.append(("option", "limits.time.cpu=" + next(i)))
                    elif option == "-skipRecursion":
                        params.append(
                            ("option", "cpa.callstack.skipRecursion=true"))
                        params.append(("option", "analysis.summaryEdges=true"))
                    elif option == "-cbmc":
                        params.append(
                            ("option", "analysis.checkCounterexamples=true"))
                        params.append(
                            ("option", "counterexample.checker=CBMC"))
                    elif option == "-preprocess":
                        params.append(
                            ("option", "parser.usePreprocessor=true"))
                    elif option == "-generateReport":
                        params.append(('generateReport', 'true'))

                    elif option == "-spec":
                        spec_path = next(i)
                        file = self._add_file_to_params(
                            params, "specificationText", spec_path)
                        opened_files.append(file)

                    elif option == "-config":
                        configPath = next(i)
                        tokens = configPath.split('/')
                        if (tokens[0] == "config" and len(tokens) == 2):
                            config = tokens[1].split('.')[0]
                            params.append(('configuration', config))
                        else:
                            params.append(
                                ("option", "configuration.file=" + configPath))

                    elif option == "-setprop":
                        params.append(("option", next(i)))

                    elif option[0] == '-':
                        if config:
                            raise WebClientError(
                                "More than one configuration: '{}' and '{}'".
                                format(config, option[1:]))
                        else:
                            params.append(('configuration', option[1:]))
                            config = option[1:]
                    else:
                        return (option, opened_files)

                except StopIteration:
                    break

        return (None, opened_files)

    def _add_file_to_params(self, params, name, path):
        norm_path = self._normalize_path_for_cloud(path)
        file = open(path, 'rb')
        params.append((name, (norm_path, file)))
        return file

    def _normalize_path_for_cloud(self, path):
        norm_path = os.path.normpath(path)
        if '..' in norm_path or os.path.isabs(norm_path):
            norm_path = os.path.basename(norm_path)
        return norm_path

    def flush_runs(self):
        """
        Starts the execution of all previous submitted runs in the VerifierCloud.
        The web interface groups runs and submits them to the VerifierCloud only from time to time.
        This method forces the web interface to do this immediately and starts downloading of results.
        @return: the ids of the RunCollections created since the last flush request
        """
        headers = {
            "Content-Type": "application/x-www-form-urlencoded",
            "Connection": "Keep-Alive"
        }

        params = {"groupId": self._group_id}
        path = "runs/flush"
        (response, _) = self._request("POST",
                                      path,
                                      data=params,
                                      headers=headers,
                                      expectedStatusCodes=[200, 204])
        run_collections = response.decode('utf-8').split("\n")
        if len(run_collections) == 0:
            logging.warning(
                'No runs were submitted to the VerifierCloud before or a rate limit is hit.'
            )
        else:
            logging.info('Submitted %s run collection: %s',
                         len(run_collections), ",".join(run_collections))
            self._result_downloader.start()

        return run_collections

    def _is_finished(self, run_id):
        headers = {"Accept": "text/plain"}
        path = "runs/" + run_id + "/state"

        try:
            (state, _) = self._request("GET", path, headers=headers)

            state = state.decode('utf-8')
            if state == "FINISHED":
                logging.debug('Run %s finished.', run_id)

            if state == "UNKNOWN":
                logging.debug(
                    'Run %s is not known by the webclient, trying to get the result.',
                    run_id)

            return state

        except requests.HTTPError as e:
            logging.warning('Could not get run state %s: %s', run_id,
                            e.response)
            return False

    def _download_result(self, run_id):
        # download result as zip file
        headers = {"Accept": "application/zip"}
        path = "runs/" + run_id + "/result"
        (zip_content, _) = self._request("GET", path, headers=headers)
        return zip_content

    def _download_result_async(self, run_id):
        def callback(downloaded_result):
            run_id = self._downloading_result_futures.pop(downloaded_result)
            exception = downloaded_result.exception()

            if not exception:
                with self._unfinished_runs_lock:
                    result_future = self._unfinished_runs.pop(run_id, None)
                if result_future:
                    result_future.set_result(downloaded_result.result())

            else:
                logging.info('Could not get result of run %s: %s', run_id,
                             downloaded_result.exception())

                # client error
                if type(exception) is HTTPError and exception.response and  \
                    400 <= exception.response.status_code and exception.response.status_code <= 499:

                    attempts = self._download_attempts.pop(run_id, 1)
                    if attempts < 10:
                        self._download_attempts[run_id] = attempts + 1
                        self._download_result_async(run_id)
                    else:
                        self._run_failed(run_id)

                else:
                    # retry it
                    self._download_result_async(run_id)

        if run_id not in self._downloading_result_futures.values(
        ):  # result is not downloaded
            future = self._executor.submit(self._download_result, run_id)
            self._downloading_result_futures[future] = run_id
            future.add_done_callback(callback)

    def _run_failed(self, run_id):
        run_result_future = self._unfinished_runs.pop(run_id, None)
        if run_result_future:
            logging.warning('Execution of run %s failed.', run_id)
            run_result_future.set_exception(
                WebClientError("Execution failed."))

    def shutdown(self):
        """
        Cancels all unfinished runs and stops all internal threads.
        """
        self._result_downloader.shutdown()

        if len(self._unfinished_runs) > 0:
            logging.info("Stopping tasks on server...")
            stop_executor = ThreadPoolExecutor(max_workers=5 *
                                               self.thread_count)
            stop_tasks = set()
            with self._unfinished_runs_lock:
                for runId in self._unfinished_runs.keys():
                    stop_tasks.add(stop_executor.submit(self._stop_run, runId))
                    self._unfinished_runs[runId].set_exception(
                        WebClientError("WebInterface was stopped."))
                self._unfinished_runs.clear()

            for task in stop_tasks:
                task.result()
            stop_executor.shutdown(wait=True)
            logging.info("Stopped all tasks.")

        self._write_hash_code_cache()
        self._executor.shutdown(wait=True)
        self._connection.close()

    def _stop_run(self, run_id):
        with self._unfinished_runs_lock:
            self._unfinished_runs.pop(run_id, None)

        path = "runs/" + run_id
        try:
            self._request("DELETE", path, expectedStatusCodes=[200, 204, 404])
        except HTTPError as e:
            logging.info("Stopping of run %s failed: %s", run_id, e.reason)

    def _request(self,
                 method,
                 path,
                 data=None,
                 headers=None,
                 files=None,
                 expectedStatusCodes=[200],
                 user_pwd=None):
        url = self._web_interface_url + path
        if user_pwd:
            auth = (user_pwd.split(":")[0], user_pwd.split(":")[1])
        else:
            auth = None

        counter = 0
        while (counter < 5):
            counter += 1
            # send request
            try:
                response = self._connection.request(method,
                                                    url,
                                                    data=data,
                                                    files=files,
                                                    headers=headers,
                                                    auth=auth)

            except Exception as e:
                if (counter < 5):
                    logging.debug("Exception during %s request to %s: %s",
                                  method, path, e)
                    sleep(1)
                    continue
                else:
                    raise

            if response.status_code in expectedStatusCodes:
                return (response.content, response.status_code)

            else:
                message = ""
                if response.status_code == 401:
                    message = 'Error 401: Permission denied. Please check the URL given to --cloudMaster and specify credentials if necessary.'

                elif response.status_code == 404:
                    message = 'Error 404: Not found. Please check the URL given to --cloudMaster.'

                elif response.status_code == 503:
                    message = 'Error 503: Service Unavailable.'
                    if counter < 5:
                        logging.debug(message)
                        sleep(60)
                        continue

                else:
                    message += response.content.decode('UTF-8')

                logging.warning(message)
                raise requests.HTTPError(path, message, response=response)
Пример #46
0
    print('执行下载任务')
    print(page)

    fullurl = 'http://blog.jobbole.com/all-posts/page/%s/' % str(page)
    #发起请求
    req_headers = {
        'User-Agent':
        'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36',
    }
    response = requests.get(fullurl, headers=req_headers)
    if response.status_code == 200:
        print('数据加入成功')
        #2.将获取到的页面源码数据,返回
        return response.text, response.status_code


def download_done(future):
    print(future.result)


if __name__ == '__main__':
    #实例化一个线程池
    #max_workers:在线程池中要创建的线程的数量
    thread_pool = ThreadPoolExecutor(max_workers=10)

    for page in range(1, 101):
        handler = thread_pool.submit(download_page_data, page)
        handler.add_done_callback(download_done)

    thread_pool.shutdown()  #->实际上调用了join()方法
Пример #47
0
                    print('modifying company code: {0} from {1} to {2}'.format(
                        comp[0], exist_comp, comp[1]))
                    cur.execute('INSERT INTO fund_company VALUES(?, ?)', (
                        comp[0],
                        comp[1],
                    ))
    except Exception as err:
        print('get_fund_companies error when INSERT INTO')
        print(err)
        raise
    print('init companies finished...')
    pass


def get_fund_real_time(name):
    pass


def init():
    init_database()
    get_fund_companies()
    get_all_funds()


print("hello")
init()
for future in futures:
    future.result()
thread_pool.shutdown(True)
sql.close()
Пример #48
0
class MessageHandler:
    def __init__(self, client):
        self.app = App()
        self.client = client
        self.state = None
        self.executors = ThreadPoolExecutor(1)

    # end def

    def destroy(self):
        try:
            self.app.destroy()
            self.executors.shutdown(False)
        except:
            logger.exception('While destroying MessageHandler')
        finally:
            self.client.handlers.pop(self.user.id)
            shutil.rmtree(self.app.output_path, ignore_errors=True)
        # end try

    # end def

    @asyncio.coroutine
    async def send(self, *contents):
        for text in contents:
            if text:
                # await self.client.send_typing(self.user)
                async with self.user.typing():
                    await self.user.send(text)
            # end if
        # end for

    # end def

    @asyncio.coroutine
    async def process(self, message):
        self.message = message
        self.user = message.author
        if not self.state:
            await self.send('-' * 80 + '\n' + ('Hello %s\n' % self.user.name) +
                            '*Lets make reading lightnovels great again!*\n' +
                            '-' * 80 + '\n')
            self.state = self.get_novel_url
        # end if
        await self.state()

    # end def

    async def get_novel_url(self):
        await self.send(
            'I recognize these two categories:\n'
            '- Profile page url of a lightnovel.\n'
            '- A query to search your lightnovel.',
            'What are you looking for?')
        self.state = self.handle_novel_url

    # end def

    async def handle_novel_url(self):
        try:
            self.app.user_input = self.message.content.strip()
            self.app.init_search()
        except:
            await self.send(
                'Sorry! I only know these sources:\n' +
                '\n'.join(['- %s' % x for x in crawler_list.keys()]),
                'Enter something again.')
        # end try
        if len(self.app.user_input) < 4:
            await self.send('Your query is too short')
            return
        # end if

        if self.app.crawler:
            await self.send('Got your page link')
            await self.get_novel_info()
        else:
            await self.send(
                'Searching %d sources for "%s"\n' %
                (len(self.app.crawler_links), self.app.user_input),
                'Please do not type anything before I reply!')
            await self.display_novel_selection()
        # end if

    # end def

    async def display_novel_selection(self):
        async with self.user.typing():
            self.app.search_novel()

            if len(self.app.search_results) == 0:
                await self.send('No novels found for "%s"' %
                                self.app.user_input)
                return
            # end if
            if len(self.app.search_results) == 1:
                self.selected_novel = self.app.search_results[0]
                await self.display_sources_selection()
                return
            # end if

            await self.send(
                ('Found %d novels:\n' % len(self.app.search_results)) +
                '\n'.join([
                    '%d. **%s** `%d sources`' %
                    (i + 1, item['title'], len(item['novels']))
                    for i, item in enumerate(self.app.search_results)
                ]) + '\n' + 'Enter name or index of your novel.\n' +
                'Send `!cancel` to stop this session.')
            self.state = self.handle_novel_selection

    # end def

    async def handle_novel_selection(self):
        text = self.message.content.strip()
        if text.startswith('!cancel'):
            await self.get_novel_url()
            return
        # end if

        async with self.user.typing():
            match_count = 0
            selected = None
            for i, res in enumerate(self.app.search_results):
                if str(i + 1) == text:
                    selected = res
                    match_count += 1
                elif text.isdigit() or len(text) < 3:
                    pass
                elif res['title'].lower().find(text) != -1:
                    selected = res
                    match_count += 1
                # end if
            # end for

            if match_count != 1:
                await self.send(
                    'Sorry! You should select *one* novel from the list (%d selected).'
                    % match_count)
                await self.display_novel_selection()
                return
            # end if

            self.selected_novel = selected
            await self.display_sources_selection()

    # end def

    async def display_sources_selection(self):
        async with self.user.typing():
            await self.send(
                ('**%s** is found in %d sources:\n' %
                 (self.selected_novel['title'],
                  len(self.selected_novel['novels']))) + '\n'.join([
                      '%d. <%s> %s' % (i + 1, item['url'],
                                       item['info'] if 'info' in item else '')
                      for i, item in enumerate(self.selected_novel['novels'])
                  ]) + '\n' + 'Enter index or name of your source.\n' +
                'Send `!cancel` to stop this session.')
        self.state = self.handle_sources_to_search

    # end def

    async def handle_sources_to_search(self):
        if len(self.selected_novel['novels']) == 1:
            novel = self.selected_novel['novels'][0]
            await self.handle_search_result(novel)
            return
        # end if

        text = self.message.content.strip()
        if text.startswith('!cancel'):
            await self.get_novel_url()
            return
        # end if

        match_count = 0
        selected = None
        for i, res in enumerate(self.selected_novel['novels']):
            if str(i + 1) == text:
                selected = res
                match_count += 1
            elif text.isdigit() or len(text) < 3:
                pass
            elif res['url'].lower().find(text) != -1:
                selected = res
                match_count += 1
            # end if
        # end for

        if match_count != 1:
            await self.send(
                'Sorry! You should select *one* source from the list (%d selected).'
                % match_count)
            await self.display_sources_selection()
            return
        # end if

        await self.handle_search_result(selected)

    # end def

    async def handle_search_result(self, novel):
        await self.send('Selected: %s' % novel['url'])
        self.app.init_crawler(novel['url'])
        await self.get_novel_info()

    # end def

    async def get_novel_info(self):
        if not self.app.crawler:
            await self.send('Could not find any crawler to get your novel')
            self.state = self.get_novel_info
            return
        # end if

        # TODO: Handle login here

        await self.send('Getting information about your novel...')
        async with self.user.typing():
            self.app.get_novel_info()

            # Setup output path
            good_name = os.path.basename(self.app.output_path)
            output_path = os.path.abspath(
                os.path.join('.discord_bot_output', str(self.user.id),
                             good_name))
            if os.path.exists(output_path):
                shutil.rmtree(output_path, ignore_errors=True)
            # end if
            os.makedirs(output_path, exist_ok=True)
            self.app.output_path = output_path

            # Get chapter range
            await self.send(
                'It has %d volumes and %d chapters.' %
                (len(self.app.crawler.volumes), len(self.app.crawler.chapters))
            )

        await self.display_range_selection()

    # end def

    async def display_range_selection(self):
        await self.send('\n'.join([
            'Now you can send the following commands to modify what to download:',
            '- To download everything send `!all` or pass `!cancel` to stop.',
            '- Send `!last` followed by a number to download last few chapters. '
            'If it does not followed by a number, last 50 chapters will be downloaded.',
            '- Similarly you can send `!first` followed by a number to get first few chapters.',
            '- Send `!volume` followed by volume numbers to download.',
            '- To download a range of chatpers, Send `!chapter` followed by ' +
            'two chapter numbers or urls separated by *space*. ' +
            ('Chapter number must be between 1 and %d, ' %
             len(self.app.crawler.chapters)) +
            ('and chapter urls should be from <%s>.' %
             (self.app.crawler.home_url))
        ]))
        self.state = self.handle_range_selection

    # end def

    async def handle_range_selection(self):
        text = self.message.content.strip()
        if text.startswith('!cancel'):
            await self.get_novel_url()
            return
        # end if
        if text.startswith('!all'):
            self.app.chapters = self.app.crawler.chapters[:]
        elif text.startswith('!first'):
            text = text[len('!first'):].strip()
            n = int(text) if text.isdigit() else 50
            n = 50 if n < 0 else n
            self.app.chapters = self.app.crawler.chapters[:n]
        elif text.startswith('!last'):
            text = text[len('!last'):].strip()
            n = int(text) if text.isdigit() else 50
            n = 50 if n < 0 else n
            self.app.chapters = self.app.crawler.chapters[-n:]
        elif text.startswith('!volume'):
            text = text[len('!volume'):].strip()
            selected = re.findall(r'\d+', text)
            await self.send('Selected volumes: ' + ', '.join(selected), )
            selected = [int(x) for x in selected]
            self.app.chapters = [
                chap for chap in self.app.crawler.chapters
                if selected.count(chap['volume']) > 0
            ]
        elif text.startswith('!chapter'):
            text = text[len('!chapter'):].strip()
            pair = text.split(' ')
            if len(pair) == 2:

                def resolve_chapter(name):
                    cid = 0
                    if name.isdigit():
                        cid = int(str)
                    else:
                        cid = self.app.crawler.get_chapter_index_of(name)
                    # end if
                    return cid - 1

                # end def
                first = resolve_chapter(pair[0])
                second = resolve_chapter(pair[1])
                if first > second:
                    second, first = first, second
                # end if
                if first >= 0 or second < len(self.app.crawler.chapters):
                    self.app.chapters = self.app.crawler.chapters[first:second]
                # end if
            # end if
            if len(self.app.chapters) == 0:
                await self.send('Chapter range is not valid. Please try again')
                return
            # end if
        else:
            await self.send(
                'Sorry! I did not recognize your input. Please try again')
            return
        # end if

        if len(self.app.chapters) == 0:
            await self.send(
                'You have not selected any chapters. Please select at least one'
            )
            return
        # end if

        await self.send('Get your range selection')

        await self.display_output_selection()

    # end def

    async def display_output_selection(self):
        await self.send('\n'.join([
            'Now you can send the following commands choose book format you want to download:',
            '- Output format now supported is docx,mobi,pdf,rtf,txt,azw3,fb2,lit,lrf,oeb,pdb,rb,snb,tcr,epub,text,web',
            '- To download everything send `!all` or pass `!cancel` to stop.',
            '- or pass supported ootput format after ! command like example bellow',
            '- Send `!mobi` to download in mobi format. ',
            '- Send `!pdf` to download in pdf format.',
            '- Send `!{supported output format}',
        ]))
        self.state = self.handle_output_selection

    # end def

    async def handle_output_selection(self):
        text = self.message.content.strip()
        if text.startswith('!cancel'):
            await self.get_novel_url()
            return
        # end if
        output_format = text[1:]
        if text.startswith('!all'):
            self.app.output_formats = None
        else:
            self.app.output_formats = {}
            if output_format in available_formats:
                for x in available_formats:
                    if x == output_format:
                        self.app.output_formats[x] = True
                    else:
                        self.app.output_formats[x] = False
                    # end if
                # end for
                await self.send('I will generate e-book in %s format' %
                                output_format)
            else:
                await self.send(
                    'Sorry! I did not recognize your input. Please try again')
                return
            # end if
        # end if

        await self.send(
            'Received your request. Starting download...\n' +
            'Send anything to view status.\nSend `!cancel` to stop it.')

        self.status = ['', '']
        self.state = self.report_download_progress
        try:
            self.executors.submit(self.start_download)
        except Exception:
            logger.exception('Download failure: %s', self.user.id)
        # end try

    # end def

    def start_download(self):
        self.app.pack_by_volume = False

        self.status = ['**%s**' % self.app.crawler.novel_title]
        self.status.append('Downloading %d chapters...' %
                           len(self.app.chapters))
        self.app.start_download()

        self.status.append('Binding books...')
        self.app.bind_books()
        self.status[-1] = 'Book binding completed.'

        self.status.append('Compressing output folder...')
        self.app.compress_output()
        self.status[-1] = 'Compressed output folder.'

        self.status.append('Uploading files...')
        for archive in self.app.archived_outputs:
            asyncio.run_coroutine_threadsafe(self.upload_file(archive),
                                             self.client.loop).result()
        # end for

        self.destroy()

    # end def

    async def upload_file(self, archive):
        file_size = os.stat(archive).st_size
        if file_size > 7.99 * 1024 * 1024:
            await self.send('File %s exceeds 8MB. Uploading To Google Drive.' %
                            os.path.basename(archive))
            description = 'Generated By : Discord Bot Ebook Smelter'
            link_id = upload(archive, description)
            if link_id:
                await self.send('https://drive.google.com/open?id=%s' % link_id
                                )
            else:
                await self.send('Failed to upload to google drive')
            # end if
        else:
            k = 0
            while (file_size > 1024 and k < 3):
                k += 1
                file_size /= 1024.0
            # end while
            await self.send('Uploading %s [%d%s] ...' %
                            (os.path.basename(archive), int(file_size * 100) /
                             100.0, ['B', 'KB', 'MB', 'GB'][k]))
            async with self.user.typing():
                # await message.channel.send('Hello', file=discord.File('cool.png', 'testing.png'))
                await self.user.send('Here you go ! ',
                                     file=discord.File(
                                         open(archive, 'rb'),
                                         os.path.basename(archive)))
        # end if

    # end def

    async def report_download_progress(self):
        text = self.message.content.strip()

        if text == '!cancel':
            await self.send('Closing the session')
            self.destroy()
            await self.send(
                'Session is now closed. Type *anything* to create a new one.')
            return
        # end if

        async with self.user.typing():
            if self.app.progress < len(self.app.chapters):
                self.status[
                    1] = '%d out of %d chapters has been downloaded.' % (
                        self.app.progress, len(self.app.chapters))
            else:
                self.status[1] = 'Download complete.'
            # end if

            await self.send('\n'.join(self.status).strip() + '\n\n' +
                            'Send `!cancel` to stop')
Пример #49
0
class RemoteShell():
    def __init__(self, max_async_threads=2):
        """
        :type args: int
        :param args: maximun number of async command executors
        """

        self.ssh = paramiko.SSHClient()
        self.ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy())
        self.executor = ThreadPoolExecutor(max_workers=max_async_threads)
        self.futures = []

    def connect(self, auth, address, port):
        """
        Creates an ssh session to a remote host

        :type auth: :py:class:`margaritashotgun.auth.AuthMethods`
        :param auth: Authentication object
        :type address: str
        :param address: remote server address
        :type port: int
        :param port: remote server port
        """
        self.username = auth.username
        self.address = address
        self.port = port
        try:
            logger.debug(("{0}: paramiko client connecting to "
                          "{0}:{1} with {2}".format(address, port,
                                                    auth.method)))
            if auth.method == AuthMethods.key:
                self.connect_with_key(auth.username, auth.key, address, port)
            elif auth.method == AuthMethods.password:
                self.connect_with_password(auth.username, auth.password,
                                           address, port)
            else:
                raise AuthenticationMethodMissingError()
            logger.debug(("{0}: paramiko client connected to "
                          "{0}:{1}".format(address, port)))
        except (AuthenticationException, SSHException, SocketError) as ex:
            raise SSHConnectionError("{0}:{1}".format(address, port), ex)

    def connect_with_password(self, username, password, address, port):
        """
        Create an ssh session to a remote host with a username and password

        :type username: str
        :param username: username used for ssh authentication
        :type password: str
        :param password: password used for ssh authentication
        :type address: str
        :param address: remote server address
        :type port: int
        :param port: remote server port
        """
        self.ssh.connect(username=username,
                         password=password,
                         hostname=address,
                         port=port)

    def connect_with_key(self, username, key, address, port):
        """
        Create an ssh session to a remote host with a username and rsa key

        :type username: str
        :param username: username used for ssh authentication
        :type key: :py:class:`paramiko.key.RSAKey`
        :param key: paramiko rsa key used for ssh authentication
        :type address: str
        :param address: remote server address
        :type port: int
        :param port: remote server port
        """
        self.ssh.connect(hostname=address,
                         port=port,
                         username=username,
                         pkey=key)

    def execute(self, command):
        """
        Executes command on remote hosts

        :type command: str
        :param command: command to be run on remote host
        """
        logger.debug('{0}: executing "{1}"'.format(self.address, command))
        stdin, stdout, stderr = self.ssh.exec_command(command)
        return dict(zip(['stdin', 'stdout', 'stderr'],
                        [stdin, stdout, stderr]))

    def execute_async(self, command, callback=None):
        """
        Executes command on remote hosts without blocking

        :type command: str
        :param command: command to be run on remote host
        :type callback: function
        :param callback: function to call when execution completes
        """
        logger.debug(('{0}: execute async "{1}"'
                      'with callback {2}'.format(self.address, command,
                                                 callback)))
        future = self.executor.submit(self.execute, command)
        if callback is not None:
            future.add_done_callback(callback)
        return future

    def decode(self, stream, encoding='utf-8'):
        """
        Convert paramiko stream into a string

        :type stream:
        :param stream: stream to convert
        :type encoding: str
        :param encoding: stream encoding
        """
        data = stream.read().decode(encoding).strip("\n")
        if data != "":
            logger.debug(('{0}: decoded "{1}" with encoding '
                          '{2}'.format(self.address, data, encoding)))
        return data

    def upload_file(self, local_path, remote_path):
        """
        Upload a file from the local filesystem to the remote host

        :type local_path: str
        :param local_path: path of local file to upload
        :type remote_path: str
        :param remote_path: destination path of upload on remote host
        """
        logger.debug("{0}: uploading {1} to {0}:{2}".format(
            self.address, local_path, remote_path))
        try:
            sftp = self.ssh.open_sftp()
            sftp.put(local_path, remote_path)
            sftp.close()
        except SSHException as ex:
            logger.warn(("{0}: LiME module upload failed with exception:"
                         "{1}".format(self.address, ex)))

    def cleanup(self):
        """
        Release resources used during shell execution
        """
        for future in self.futures:
            future.cancel()
        self.executor.shutdown(wait=10)
        self.ssh.close()
Пример #50
0
    def pagedownload(baseurl, start):
        def gethtml(url):
            headers = {'User-Agent': 'Mozilla/5.0'}
            page1 = urllib.request.Request(url, headers=headers)
            page = urllib.request.urlopen(page1)
            basehtml = page.read().decode("UTF-8")
            return basehtml

        basehtml = gethtml(baseurl)

        def basereg(html):
            reg = r'(data-id=")([0-9]+?)(")'
            all = re.compile(reg)
            alllist = re.findall(all, html)
            return alllist

        baseimgurls = basereg(basehtml)

        def download(baseurl, start, num):
            url = "https://danbooru.donmai.us/posts/" + str(baseurl)
            # print('当前图片爬取地址 '+url)
            os.makedirs('./image/', exist_ok=True)

            def gethtml(url):
                headers = {'User-Agent': 'Mozilla/5.0'}
                page1 = urllib.request.Request(url, headers=headers)
                page = urllib.request.urlopen(page1)
                basehtml = page.read().decode("UTF-8")
                return basehtml

            html = gethtml(url)

            def reg(html):
                reg = r'(id="image")(.+?)(src=")(.+?)(")'
                all = re.compile(reg)
                alllist = re.findall(all, html)
                return alllist

            imgurls = reg(html)

            def urllib_download():
                for imgurl in imgurls:
                    from urllib.request import urlretrieve
                    IMAGE_URL = imgurl[3]
                    IMAGE_URL = IMAGE_URL.replace("/sample", "", 1)
                    IMAGE_URL = IMAGE_URL.replace("sample-", "", 1)
                    IMAGE_URL = IMAGE_URL.replace("jpg", "jpg?download=1", 1)

                    def cbk(a, b, c):
                        '''''回调函数 
                        @a:已经下载的数据块 
                        @b:数据块的大小 
                        @c:远程文件的大小 
                        '''
                        per = 100.0 * a * b / c
                        if per > 100:
                            per = 100
                        print('%.2f%%' % per)

                    try:
                        opener = urllib.request.build_opener()
                        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                        urllib.request.install_opener(opener)
                        urlretrieve(IMAGE_URL,
                                    './image/danimg' + str(baseurl) + '.jpg')
                        print('第' + str(start - 1) + '页 ' + '第' + str(num) +
                              '张图 下载成功')
                    except:
                        IMAGE_URL = IMAGE_URL.replace("jpg", "png?download=1",
                                                      1)
                        opener = urllib.request.build_opener()
                        opener.addheaders = [('User-agent', 'Mozilla/5.0')]
                        urllib.request.install_opener(opener)
                        urlretrieve(IMAGE_URL,
                                    './image/danimg' + str(baseurl) + '.png')
                        print('第' + str(start - 1) + '页 ' + '第' + str(num) +
                              '张图 下载成功')
                    else:
                        pass

            urllib_download()

        length = len(baseimgurls)
        if (length == 0):
            print('第' + str(start - 1) + '页 无数据')
        else:
            print('第' + str(start - 1) + '页 共' + str(length) + '张图片')
            num = 1
            pool = ThreadPoolExecutor(max_workers=11)
            for baseimgurl in baseimgurls:
                pool.submit(download, baseimgurl[1], start, num)
                num += 1
            pool.shutdown()
Пример #51
0
            articleInfo['coverImage'] = '暂无图片'
        p_as = article.xpath('.//div[@class="post-meta"]/p[1]//a')
        if len(p_as) > 2:
            # tag类型
            articleInfo['tag'] = p_as[1].xpath('./text()')[0]
            # 评论量
            articleInfo['commentNum'] = p_as[2].xpath('./text()')[0]
        else:
            # tag类型
            articleInfo['tag'] = p_as[1].xpath('./text()')[0]
            # 评论量
            articleInfo['commentNum'] = '0'
        # 简介
        articleInfo['content'] = article.xpath(
            './/span[@class="excerpt"]/p/text()')[0]
        # 时间
        articleInfo['publishTime'] = ''.join(
            article.xpath('.//div[@class="post-meta"]/p[1]/text()')).replace(
                '\n', '').replace(' ', '').replace('\r', '').replace('·', '')
        print(articleInfo)


if __name__ == '__main__':
    pool = ThreadPoolExecutor(max_workers=10)
    for i in range(1, 201):
        handler = pool.submit(down_load_data, i)
        #设置回调方法
        handler.add_done_callback(download_done)
    #执行shutdown()实质是执行了join()方法
    pool.shutdown()
Пример #52
0
    def scrape(self):
        """function that scrapes job posting from monster and pickles it"""
        log_info(f'jobfunnel monster to pickle running @ {self.date_string}')

        # get the search url
        search = self.get_search_url()

        # get the html data, initialize bs4 with lxml
        request_html = self.s.get(search, headers=self.headers)

        # create the soup base
        soup_base = BeautifulSoup(request_html.text, self.bs4_parser)

        # scrape total number of results, and calculate the # pages needed
        num_res = soup_base.find('h2', 'figure').text.strip()
        num_res = int(re.findall(r'(\d+)', num_res)[0])
        log_info(f'Found {num_res} monster results for query='
                 f'{self.query}')

        pages = int(ceil(num_res / self.max_results_per_page))
        # scrape soups for all the pages containing jobs it found
        page_url = f'{search}&start={pages}'
        log_info(f'getting monster pages 1 to {pages} : {page_url}')

        jobs = BeautifulSoup(
            self.s.get(page_url, headers=self.headers).text, self.bs4_parser). \
            find_all('div', attrs={'class': 'flex-row'})

        job_soup_list = []
        job_soup_list.extend(jobs)

        # id regex quantifiers
        id_regex = re.compile(r'/((?:[0-9a-f]{8}-[0-9a-f]{4}-[1-5][0-9a-f'
                              r']{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12})|\d+)')

        # make a dict of job postings from the listing briefs
        for s in job_soup_list:
            # init dict to store scraped data
            job = dict([(k, '') for k in MASTERLIST_HEADER])

            # scrape the post data
            job['status'] = 'new'
            try:
                # jobs should at minimum have a title, company and location
                job['title'] = s.find('h2', attrs={
                    'class': 'title'}).text.strip()
                job['company'] = s.find(
                    'div', attrs={'class': 'company'}).text.strip()
                job['location'] = s.find('div', attrs={
                    'class': 'location'}).text.strip()
            except AttributeError:
                continue

            # no blurb is available in monster job soups
            job['blurb'] = ''
            # tags are not supported in monster
            job['tags'] = ''
            try:
                job['date'] = s.find('time').text.strip()
            except AttributeError:
                job['date'] = ''
            # captures uuid or int ids, by extracting from url instead
            try:
                job['link'] = str(s.find('a', attrs={
                    'data-bypass': '******'}).get('href'))
                job['id'] = id_regex.findall(job['link'])[0]
            except AttributeError:
                job['id'] = ''
                job['link'] = ''

            job['query'] = self.query
            job['provider'] = self.provider

            # key by id
            self.scrape_data[str(job['id'])] = job

        # apply job pre-filter before scraping blurbs
        super().pre_filter(self.scrape_data, self.provider)

        # stores references to jobs in list to be used in blurb retrieval
        scrape_list = [i for i in self.scrape_data.values()]

        # converts job date formats into a standard date format
        post_date_from_relative_post_age(scrape_list)

        threads = ThreadPoolExecutor(max_workers=8)
        # checks if delay is set or not, then extracts blurbs from job links
        if self.delay_config is not None:
            # calls super class to run delay specific threading logic
            super().delay_threader(scrape_list, self.get_blurb_with_delay,
                                   self.parse_blurb, threads)
        else:
            # start time recording
            start = time()

            # maps jobs to threads and cleans them up when done
            threads.map(self.search_joblink_for_blurb, scrape_list)
            threads.shutdown()

            # end and print recorded time
            end = time()
            print(f'{self.provider} scrape job took {(end - start):.3f}s')
Пример #53
0
def apply_activities(experiment: Experiment,
                     configuration: Configuration,
                     secrets: Secrets,
                     pool: ThreadPoolExecutor,
                     journal: Journal,
                     dry: bool = False) -> List[Run]:
    with controls(level="method",
                  experiment=experiment,
                  context=experiment,
                  configuration=configuration,
                  secrets=secrets) as control:
        result = []
        runs = []
        method = experiment.get("method", [])
        wait_for_background_activities = True

        try:
            for run in run_activities(experiment, configuration, secrets, pool,
                                      dry):
                runs.append(run)
                if journal["status"] in ["aborted", "failed", "interrupted"]:
                    break
        except SystemExit as x:
            # when we got a signal for an ungraceful exit, we can decide
            # not to wait for background activities. Their statuses will
            # remain failed.
            wait_for_background_activities = x.code != 30  # see exit.py
            raise
        finally:
            background_activity_timeout = None

            if wait_for_background_activities and pool:
                logger.debug("Waiting for background activities to complete")
                pool.shutdown(wait=True)
            elif pool:
                harshly_terminate_pending_background_activities(pool)
                logger.debug(
                    "Do not wait for the background activities to finish "
                    "as per signal")
                background_activity_timeout = 0.2
                pool.shutdown(wait=False)

            for index, run in enumerate(runs):
                if not run:
                    continue

                if isinstance(run, dict):
                    result.append(run)
                else:
                    try:
                        # background activities
                        result.append(
                            run.result(timeout=background_activity_timeout))
                    except TimeoutError:
                        # we want an entry for the background activity in our
                        # results anyway, we won't have anything meaningful
                        # to say about it
                        result.append({
                            "activity": method[index],
                            "status": "failed",
                            "output": None,
                            "duration": None,
                            "start": None,
                            "end": None,
                            "exception": None
                        })

            # now let's ensure the journal has all activities in their correct
            # order (background ones included)
            journal["run"] = result

            control.with_state(result)

    return result
Пример #54
0
 # t2.start()
 # t3.start()
 # t4.start()
 for i in range(0, 1):
     # give_packet()
     # time.sleep(3)
     packet_id = str(
         sql.read_mysql(
             sql=
             'select order_no from activity.rp_give_red_packet order by created_at desc'
         )[0])
     print(packet_id)
     time.sleep(1)
     p = ThreadPoolExecutor(5)
     p.submit(draw, './data_1.txt')
     p.shutdown(wait=True)
     time.sleep(3)
     amount = sql.read_mysql(
         sql=
         'select draw_amount from activity.rp_give_red_packet where order_no = {}'
         .format(packet_id))
     draw_sum = sql.read_mysql(
         sql=
         'select sum(amount) from activity.rp_draw_red_packet where order_no = {}'
         .format(packet_id))
     sum = sql.read_mysql(
         sql=
         'select count(*) from activity.rp_draw_red_packet where order_no = {}'
         .format(packet_id))
     sum_rp = sql.read_mysql(
         sql=
Пример #55
0
def run(links):
    executor = ThreadPoolExecutor(max_workers=thread_count)
    futures = [executor.submit(work, link) for link in links
               ]  # Not bothering about results of futures for time being.
    executor.shutdown(wait=True)
Пример #56
0
class TaskSupervisor:

    timeout_message = '{supervisor_id} task {task_id}: ' + \
            '{target} started in {time_spent:.3f} seconds. ' + \
            'Increase pool size or decrease number of workers'

    def __init__(self, supervisor_id=None):

        self.poll_delay = default_poll_delay

        self.timeout_warning = default_timeout_warning
        self.timeout_warning_func = None
        self.timeout_critical = default_timeout_critical
        self.timeout_critical_func = None
        self.id = supervisor_id if supervisor_id else str(uuid.uuid4())

        self._active_threads = set()
        self._active_mps = set()
        self._active = False
        self._main_loop_active = False
        self._started = threading.Event()
        self._lock = threading.Lock()
        self._max_threads = {}
        self._max_mps = {}
        self._schedulers = {}
        self._tasks = {}
        self._Qt = {}
        self._Qmp = {}
        self.default_aloop = None
        self.default_async_job_scheduler = None
        self.mp_pool = None
        self.daemon = False
        self._processors_stopped = {}
        self.aloops = {}
        self.async_job_schedulers = {}

        self.set_thread_pool(pool_size=thread_pool_default_size,
                             reserve_normal=default_reserve_normal,
                             reserve_high=default_reserve_high,
                             max_size=None)

    def set_thread_pool(self, **kwargs):
        for p in ['pool_size', 'reserve_normal', 'reserve_high']:
            if p in kwargs:
                setattr(self, 'thread_' + p, int(kwargs[p]))
        self._max_threads[TASK_LOW] = self.thread_pool_size
        self._max_threads[
            TASK_NORMAL] = self.thread_pool_size + self.thread_reserve_normal
        thc= self.thread_pool_size + \
                self.thread_reserve_normal + self.thread_reserve_high
        self._max_threads[TASK_HIGH] = thc
        self._prespawn_threads = kwargs.get('min_size', 0)
        max_size = kwargs.get('max_size')
        if not max_size:
            max_size = thc if self.thread_pool_size else \
                    thread_pool_default_size
        if self._prespawn_threads == 'max':
            self._prespawn_threads = max_size
        elif max_size < self._prespawn_threads:
            raise ValueError(
                'min pool size ({}) can not be larger than max ({})'.format(
                    self._prespawn_threads, max_size))
        self.thread_pool = ThreadPoolExecutor(
            max_workers=max_size,
            thread_name_prefix='supervisor_{}_pool'.format(self.id))
        if self._max_threads[TASK_HIGH] > max_size:
            logger.warning(
                ('supervisor {} executor thread pool max size ({}) is ' +
                 'lower than reservations ({})').format(
                     self.id, max_size, self._max_threads[TASK_HIGH]))

    def set_mp_pool(self, **kwargs):
        for p in ['pool_size', 'reserve_normal', 'reserve_high']:
            setattr(self, 'mp_' + p, int(kwargs.get(p, 0)))
        self._max_mps[TASK_LOW] = self.mp_pool_size
        self._max_mps[TASK_NORMAL] = self.mp_pool_size + self.mp_reserve_normal
        self._max_mps[TASK_HIGH] = self.mp_pool_size + \
                self.mp_reserve_normal + self.mp_reserve_high
        if not self.mp_pool:
            self.create_mp_pool(processes=self._max_mps[TASK_HIGH])

    def timeout_warning_func(task):
        pass

    def timeout_critical_func(task):
        pass

    def _higher_queues_busy(self, tt, task_priority):
        if tt == TT_THREAD:
            q = self._Qt
        elif tt == TT_MP:
            q = self._Qmp
        if task_priority == TASK_NORMAL:
            return not q[TASK_HIGH].empty()
        elif task_priority == TASK_LOW:
            return not q[TASK_HIGH].empty() or not q[TASK_NORMAL].empty()
        else:
            return False

    def spawn_thread(self, target, args=(), kwargs={}):
        return self.thread_pool.submit(target, *args, **kwargs)

    def put_task(self,
                 target,
                 args=(),
                 kwargs={},
                 callback=None,
                 priority=TASK_NORMAL,
                 delay=None,
                 tt=TT_THREAD,
                 task_id=None,
                 worker=None,
                 _send_task_id=True):
        if not self._started.is_set() or not self._active or target is None:
            return
        ti = Task(tt,
                  task_id,
                  priority=priority,
                  target=target,
                  args=args,
                  kwargs=kwargs,
                  callback=callback,
                  delay=delay,
                  worker=worker,
                  _send_task_id=_send_task_id)
        ti.time_queued = time.time()
        ti._tqueued = time.perf_counter()
        with self._lock:
            self._tasks[ti.id] = ti
        if priority == TASK_CRITICAL:
            self.mark_task_started(ti)
            asyncio.run_coroutine_threadsafe(self._start_task(ti),
                                             loop=self.event_loop)
        else:
            if tt == TT_THREAD:
                q = self._Qt[priority]
            else:
                q = self._Qmp[priority]
            asyncio.run_coroutine_threadsafe(q.put(ti), loop=self.event_loop)
        return ti

    async def _task_processor(self, queue, priority, tt):
        logger.debug('supervisor {} task processor {}/{} started'.format(
            self.id, tt, priority))
        while True:
            task = await queue.get()
            if task is None: break
            if tt == TT_THREAD:
                pool_size = self.thread_pool_size
            elif tt == TT_MP:
                pool_size = self.mp_pool_size
            if pool_size:
                self._lock.acquire()
                try:
                    if tt == TT_THREAD:
                        mx = self._max_threads[priority]
                    elif tt == TT_MP:
                        mx = self._max_mps[priority]
                    while (self._get_active_count(tt) >= mx
                           or self._higher_queues_busy(tt, priority)):
                        self._lock.release()
                        await asyncio.sleep(self.poll_delay)
                        self._lock.acquire()
                finally:
                    self._lock.release()
            self.mark_task_started(task)
            self.event_loop.create_task(self._start_task(task))
        logger.debug('supervisor {} task processor {}/{} finished'.format(
            self.id, tt, _priorities[priority]))
        self._processors_stopped[(tt, priority)].set()

    def get_task(self, task_id):
        with self._lock:
            return self._tasks.get(task_id)

    def create_mp_pool(self, *args, **kwargs):
        if args or kwargs:
            self.mp_pool = multiprocessing.Pool(*args, **kwargs)
        else:
            self.mp_pool = multiprocessing.Pool(
                processes=multiprocessing.cpu_count())

    def register_scheduler(self, scheduler):
        if not self._started.is_set():
            return False
        asyncio.run_coroutine_threadsafe(self._Q.put(
            (RQ_SCHEDULER, scheduler, time.time())),
                                         loop=self.event_loop)
        return True

    def create_async_job(self, scheduler=None, **kwargs):
        if scheduler is None:
            scheduler = self.default_async_job_scheduler
        elif isinstance(scheduler, str):
            scheduler = self.async_job_schedulers[scheduler]
        return scheduler.create_threadsafe(**kwargs)

    def cancel_async_job(self, scheduler=None, job=None):
        if job:
            if scheduler is None:
                scheduler = self.default_async_job_scheduler
            elif isinstance(scheduler, str):
                scheduler = self.async_job_schedulers[scheduler]
            scheduler.cancel(job)
        else:
            logger.warning('supervisor {} async job cancellation ' +
                           'requested but job not specified'.format(self.id))

    def register_sync_scheduler(self, scheduler):
        with self._lock:
            self._schedulers[scheduler] = None
        return True

    def unregister_sync_scheduler(self, scheduler):
        with self._lock:
            try:
                del self._schedulers[scheduler]
                return True
            except:
                return False

    def unregister_scheduler(self, scheduler):
        with self._lock:
            if scheduler not in self._schedulers:
                return False
            else:
                self._schedulers[scheduler][1].cancel()
                del self._schedulers[scheduler]
                return True

    def _get_active_count(self, tt):
        if tt == TT_THREAD:
            return len(self._active_threads)
        elif tt == TT_MP:
            return len(self._active_mps)

    def create_aloop(self, name, daemon=False, start=True, default=False):
        if name == '__supervisor__':
            raise RuntimeError('Name "__supervisor__" is reserved')
        with self._lock:
            if name in self.aloops:
                logger.error('supervisor {} loop {} already exists'.format(
                    self.id, name))
                return False
        l = ALoop(name, supervisor=self)
        l.daemon = daemon
        l.poll_delay = self.poll_delay
        with self._lock:
            self.aloops[name] = l
            if start:
                l.start()
            if default:
                self.set_default_aloop(l)
        return l

    def create_async_job_scheduler(self,
                                   name,
                                   aloop=None,
                                   start=True,
                                   default=False):
        """
        Create async job scheduler (aiosched.scheduler)

        ALoop must always be specified or default ALoop defined
        """
        if name == '__supervisor__':
            raise RuntimeError('Name "__supervisor__" is reserved')
        with self._lock:
            if name in self.async_job_schedulers:
                logger.error(
                    'supervisor {} async job_scheduler {} already exists'.
                    format(self.id, name))
                return False
        l = AsyncJobScheduler(name)
        if aloop is None:
            aloop = self.default_aloop
        elif not isinstance(aloop, ALoop):
            aloop = self.get_aloop(aloop)
        loop = aloop.get_loop()
        with self._lock:
            self.async_job_schedulers[name] = l
            if default:
                self.set_default_async_job_scheduler(l)
        if start:
            l.set_loop(loop)
            l._aloop = aloop
            aloop.background_task(l.scheduler_loop())
        else:
            l.set_loop(loop)
        return l

    def set_default_aloop(self, aloop):
        self.default_aloop = aloop

    def set_default_async_job_scheduler(self, scheduler):
        self.default_async_job_scheduler = scheduler

    def get_aloop(self, name=None, default=True):
        with self._lock:
            if name is not None:
                return self.aloops.get(name)
            elif default:
                return self.default_aloop

    def start_aloop(self, name):
        with self._lock:
            if name not in self.aloops:
                logger.error('supervisor {} loop {} not found'.format(
                    self.id, name))
                return False
            else:
                self.aloops[name].start()
                return True

    def stop_aloop(self, name, wait=True, cancel_tasks=False, _lock=True):
        if _lock:
            self._lock.acquire()
        try:
            if name not in self.aloops:
                logger.error('supervisor {} loop {} not found'.format(
                    self.id, name))
                return False
            else:
                self.aloops[name].stop(wait=wait, cancel_tasks=cancel_tasks)
                return True
        finally:
            if _lock:
                self._lock.release()

    def get_info(self,
                 tt=None,
                 aloops=True,
                 schedulers=True,
                 async_job_schedulers=True):
        class SupervisorInfo:
            pass

        result = SupervisorInfo()
        with self._lock:
            result.id = self.id
            result.active = self._active
            result.started = self._started.is_set()
            for p in ['pool_size', 'reserve_normal', 'reserve_high']:
                if tt == TT_THREAD or tt is None or tt is False:
                    setattr(result, 'thread_' + p,
                            getattr(self, 'thread_' + p))
                if self.mp_pool and (tt == TT_MP or tt is None or tt is False):
                    setattr(result, 'mp_' + p, getattr(self, 'mp_' + p))
            if tt == TT_THREAD or tt is None or tt is False:
                if not tt is False:
                    result.thread_tasks = list(self._active_threads)
                result.thread_tasks_count = len(self._active_threads)
            if tt == TT_MP or tt is None or tt is False:
                if not tt is False:
                    result.mp_tasks = list(self._active_mps)
                result.mp_tasks_count = len(self._active_mps)
            if aloops:
                result.aloops = self.aloops.copy()
            if schedulers:
                result.schedulers = self._schedulers.copy()
            if async_job_schedulers:
                result.async_job_schedulers = self.async_job_schedulers.copy()
            if tt != False:
                result.tasks = {}
                for n, v in self._tasks.items():
                    if tt is None or v.tt == tt:
                        result.tasks[n] = v
        return result

    def get_aloops(self):
        with self._lock:
            return self.aloops.copy()

    def get_schedulers(self):
        with self._lock:
            return self._schedulers.copy()

    def get_tasks(self, tt=None):
        result = {}
        with self._lock:
            for n, v in self._tasks.items():
                if tt is None or v.tt == tt:
                    result[n] = v
        return result

    def mark_task_started(self, task):
        with self._lock:
            if task.tt == TT_THREAD:
                self._active_threads.add(task.id)
                if debug:
                    logger.debug(
                        ('supervisor {} new task {}: {}, {}' +
                         ' thread pool size: {} / {}').format(
                             self.id, task.id, task.target,
                             _priorities[task.priority],
                             len(self._active_threads), self.thread_pool_size))
            elif task.tt == TT_MP:
                self._active_mps.add(task.id)
                if debug:
                    logger.debug(
                        ('supervisor {} new task {}: {}, {}' +
                         ' mp pool size: {} / {}').format(
                             self.id, task.id,
                             task.target, _priorities[task.priority],
                             len(self._active_mps), self.mp_pool_size))

    async def _start_task(self, task):
        with self._lock:
            task.time_started = time.time()
            task._tstarted = time.perf_counter()
            if not task.delay:
                task.mark_started()
        if task.delay:
            task.status = TASK_STATUS_DELAYED
            await asyncio.sleep(task.delay)
            task.mark_started()
        if task.tt == TT_THREAD:
            self.thread_pool.submit(task.target, *task.args, **task.kwargs)
        elif task.tt == TT_MP:
            self.mp_pool.apply_async(task.target, task.args, task.kwargs,
                                     task.callback)
        time_spent = task._tstarted - task._tqueued
        if time_spent > self.timeout_critical:
            logger.critical(
                self.timeout_message.format(supervisor_id=self.supervisor.id,
                                            task_id=task_id,
                                            target=task.target,
                                            time_spent=time_spent))
            self.timeout_critical_func(task)
        elif time_spent > self.timeout_warning:
            logger.warning(
                self.timeout_message.format(supervisor_id=self.supervisor.id,
                                            task_id=task_id,
                                            target=task.target,
                                            time_spent=time_spent))
            self.timeout_warning_func(task)

    def mark_task_completed(self, task=None, task_id=None):
        with self._lock:
            if task is None:
                try:
                    task = self._tasks[task_id]
                except:
                    raise LookupError('supervisor {} task {} not found'.format(
                        self.id, task_id))
            task_id = task.id
            tt = task.tt
            if tt == TT_THREAD:
                if task_id in self._active_threads:
                    self._active_threads.remove(task_id)
                    if debug:
                        logger.debug(('supervisor {} removed task {}:' +
                                      ' {}, thread pool size: {} / {}').format(
                                          self.id, task_id, task,
                                          len(self._active_threads),
                                          self.thread_pool_size))
                    task.mark_completed()
                    del self._tasks[task_id]
            elif tt == TT_MP:
                if task_id in self._active_mps:
                    self._active_mps.remove(task_id)
                    if debug:
                        logger.debug(('supervisor {} removed task {}:' +
                                      ' {} mp pool size: {} / {}').format(
                                          self.id, task_id, task,
                                          len(self._active_mps),
                                          self.mp_pool_size))
                task.mark_completed()
                del self._tasks[task_id]
        return True

    def start(self, daemon=None):
        def _prespawn():
            pass

        self._active = True
        self._main_loop_active = True
        t = threading.Thread(
            name='supervisor_{}_event_loop'.format(self.id),
            target=self._start_event_loop,
            daemon=daemon if daemon is not None else self.daemon)
        t.start()
        for i in range(self._prespawn_threads):
            self.thread_pool.submit(_prespawn)
        self._started.wait()

    def block(self):
        while self._started.is_set():
            time.sleep(0.1)

    async def _launch_scheduler_loop(self, scheduler):
        try:
            t = scheduler.worker_loop.create_task(scheduler.loop())
            with self._lock:
                self._schedulers[scheduler] = (scheduler, t)
            if hasattr(scheduler, 'extra_loops'):
                for l in scheduler.extra_loops:
                    scheduler.worker_loop.create_task(getattr(scheduler, l)())
            await t
        except CancelledError:
            pass
        except Exception as e:
            logger.error(e)

    async def _main_loop(self):
        self._Q = asyncio.queues.Queue()
        for p in (TASK_LOW, TASK_NORMAL, TASK_HIGH):
            self._Qt[p] = asyncio.queues.Queue()
            self._processors_stopped[(TT_THREAD, p)] = asyncio.Event()
            self.event_loop.create_task(
                self._task_processor(self._Qt[p], p, TT_THREAD))
        if self.mp_pool:
            for p in (TASK_LOW, TASK_NORMAL, TASK_HIGH):
                self._Qmp[p] = asyncio.queues.Queue()
                self._processors_stopped[(TT_MP, p)] = asyncio.Event()
                self.event_loop.create_task(
                    self._task_processor(self._Qmp[p], p, TT_MP))
        self._started.set()
        logger.info('supervisor {} event loop started'.format(self.id))
        while self._main_loop_active:
            data = await self._Q.get()
            try:
                if data is None: break
                r, res, t_put = data
                if r == RQ_SCHEDULER:
                    if debug:
                        logger.debug('supervisor {} new scheduler {}'.format(
                            self.id, res))
                    asyncio.run_coroutine_threadsafe(
                        self._launch_scheduler_loop(res), loop=res.worker_loop)
            finally:
                self._Q.task_done()
        for i, t in self._processors_stopped.items():
            await t.wait()
        logger.info('supervisor {} event loop finished'.format(self.id))

    def _start_event_loop(self):
        if self._active:
            self.event_loop = asyncio.new_event_loop()
            asyncio.set_event_loop(self.event_loop)
            mp = ', mp pool: {} + {} RN + {} RH'.format(
                self.mp_pool_size, self.mp_reserve_normal,
                self.mp_reserve_high) if hasattr(self, 'mp_pool_size') else ''
            logger.info(
                ('supervisor {} started, thread pool: ' +
                 '{} + {} RN + {} RH{}').format(self.id, self.thread_pool_size,
                                                self.thread_reserve_normal,
                                                self.thread_reserve_high, mp))
            try:
                self.event_loop.run_until_complete(self._main_loop())
            except CancelledError:
                logger.warning('supervisor {} loop had active tasks'.format(
                    self.id))

    def _cancel_all_tasks(self):
        with self._lock:
            for task in asyncio.Task.all_tasks(loop=self.event_loop):
                task.cancel()

    def _stop_schedulers(self, wait=True):
        with self._lock:
            schedulers = self._schedulers.copy()
        for s in schedulers:
            s.stop(wait=wait)

    def _stop_async_job_schedulers(self, wait=True):
        with self._lock:
            schedulers = self.async_job_schedulers.copy().items()
        for i, s in schedulers:
            try:
                s.stop(wait=wait)
            except:
                pass

    def stop(self, wait=True, stop_schedulers=True, cancel_tasks=False):
        self._active = False
        if isinstance(wait, bool):
            to_wait = None
        else:
            to_wait = time.perf_counter() + wait
        if stop_schedulers:
            self._stop_async_job_schedulers(wait)
            self._stop_schedulers(True if wait else False)
            if debug:
                logger.debug('supervisor {} schedulers stopped'.format(
                    self.id))
        with self._lock:
            for i, l in self.aloops.items():
                self.stop_aloop(i,
                                wait=wait,
                                cancel_tasks=cancel_tasks,
                                _lock=False)
            if debug:
                logger.debug('supervisor {} async loops stopped'.format(
                    self.id))
        if (to_wait or wait is True) and not cancel_tasks:
            while True:
                with self._lock:
                    if not self._tasks:
                        break
                time.sleep(self.poll_delay)
                if to_wait and time.perf_counter() > to_wait: break
        if debug:
            logger.debug('supervisor {} no task in queues'.format(self.id))
        if to_wait or wait is True:
            if debug:
                logger.debug(
                    'supervisor {} waiting for tasks to finish'.format(
                        self.id))
            while True:
                if not self._active_threads:
                    break
                if to_wait and time.perf_counter() > to_wait:
                    logger.warning(
                        ('supervisor {} wait timeout, ' +
                         'skipping, hope threads will finish').format(self.id))
                    break
                time.sleep(self.poll_delay)
        if cancel_tasks:
            self._cancel_all_tasks()
            if debug:
                logger.debug('supervisor {} remaining tasks canceled'.format(
                    self.id))
        if to_wait or wait is True:
            while True:
                with self._lock:
                    if (not self._active_threads and not self._active_mps) or (
                            to_wait and time.perf_counter() > to_wait):
                        break
                time.sleep(self.poll_delay)
        if debug:
            logger.debug('supervisor {} no active threads/mps'.format(self.id))
        if debug:
            logger.debug('supervisor {} stopping event loop'.format(self.id))
        asyncio.run_coroutine_threadsafe(self._Q.put(None),
                                         loop=self.event_loop)
        for p in (TASK_LOW, TASK_NORMAL, TASK_HIGH):
            asyncio.run_coroutine_threadsafe(self._Qt[p].put(None),
                                             loop=self.event_loop)
        if self.mp_pool:
            for p in (TASK_LOW, TASK_NORMAL, TASK_HIGH):
                asyncio.run_coroutine_threadsafe(self._Qmp[p].put(None),
                                                 loop=self.event_loop)
        self._main_loop_active = False
        if wait is True or to_wait:
            while True:
                if to_wait and time.perf_counter() > to_wait:
                    logger.warning(
                        'supervisor {} wait timeout, canceling all tasks'.
                        format(self.id))
                    self._cancel_all_tasks()
                    break
                else:
                    can_break = True
                    for t in asyncio.Task.all_tasks(self.event_loop):
                        if not t.cancelled() and not t.done():
                            can_break = False
                            break
                    if can_break: break
                time.sleep(self.poll_delay)
        with self._lock:
            for i, v in self._tasks.items():
                v.status = TASK_STATUS_CANCELED
        self._started.clear()
        self.thread_pool.shutdown()
        logger.info('supervisor {} stopped'.format(self.id))
Пример #57
0
class RiotApiScheduler(object):
    MAX_QSIZE = 25
    REQ_PER_MIN = 50
    _DEFAULT_DUPE_BACKOFF = 1

    def __init__(self):
        self._timer = threading.Timer(0, self._run)
        self._lock = threading.Lock()
        self._executor = ThreadPoolExecutor(max_workers=5)
        self._queue = Queue(maxsize=RiotApiScheduler.MAX_QSIZE)

        self._is_running = False
        self._backoff = None  # guarded by lock
        self._next_sleep = 60.0 / RiotApiScheduler.REQ_PER_MIN  # initial sleep period

    def _process_request(self, req):
        try:
            try:
                req.execute()
            except RiotRateLimitException as e:
                print "!! [API Rate Limit] Type: %r, Retry after: %r" % (
                    e.limit_type, e.retry_after)
                with self._lock:
                    if self._backoff == None:
                        self._backoff = e.retry_after
                    else:
                        self.backoff = RiotApiScheduler._DEFAULT_DUPE_BACKOFF
                req.mark_invalid()
            except RiotApiException as e:
                print "!! [API Exception] Code: %r for url: %r" % (
                    e.status_code, e.url)
                req.mark_invalid()
            except Exception as e:
                print "!! [RANDOM EXCEPTION] %r" % e
        except Exception as e:
            print "!! [RANDOM EXCEPTION] %r" % e

    def _run(self):
        # TODO optimize to take better advantage of api limit
        # Pull requests off per rate limit and schedule them with thread pool executor
        try:
            req = self._queue.get(False)
            self._executor.submit(self._process_request, req)
        except Empty:
            pass

        if self._is_running:
            delay = self._next_sleep
            with self._lock:
                if self._backoff:
                    delay = self._backoff
                    self._backoff = None

            self._timer = threading.Timer(delay, self._run)
            self._timer.start()
        return

    def add_request(self, req, timeout=1):
        # should block if full, raises Full exception if times out
        while self._is_running:
            try:
                self._queue.put(req, True, timeout)
                return
            except Full:
                continue

    def start(self):
        self._is_running = True
        self._timer.start()

    def stop(self):
        self._is_running = False
        self._executor.shutdown(wait=True)
        self._timer.cancel()
Пример #58
0
class Worker(Server):
    """ Worker Node

    Workers perform two functions:

    1.  **Serve data** from a local dictionary
    2.  **Perform computation** on that data and on data from peers

    Additionally workers keep a Center informed of their data and use that
    Center to gather data from other workers when necessary to perform a
    computation.

    You can start a worker with the ``dworker`` command line application::

        $ dworker scheduler-ip:port

    **State**

    * **data:** ``{key: object}``:
        Dictionary mapping keys to actual values
    * **active:** ``{key}``:
        Set of keys currently under computation
    * **ncores:** ``int``:
        Number of cores used by this worker process
    * **executor:** ``concurrent.futures.ThreadPoolExecutor``:
        Executor used to perform computation
    * **local_dir:** ``path``:
        Path on local machine to store temporary files
    * **center:** ``rpc``:
        Location of center or scheduler.  See ``.ip/.port`` attributes.
    * **name:** ``string``:
        Alias
    * **services:** ``{str: Server}``:
        Auxiliary web servers running on this worker
    * **service_ports:** ``{str: port}``:

    Examples
    --------

    Create centers and workers in Python:

    >>> from distributed import Center, Worker
    >>> c = Center('192.168.0.100', 8787)  # doctest: +SKIP
    >>> w = Worker(c.ip, c.port)  # doctest: +SKIP
    >>> yield w._start(port=8788)  # doctest: +SKIP

    Or use the command line::

       $ dcenter
       Start center at 127.0.0.1:8787

       $ dworker 127.0.0.1:8787
       Start worker at:            127.0.0.1:8788
       Registered with center at:  127.0.0.1:8787

    See Also
    --------
    distributed.center.Center:
    """
    def __init__(self,
                 center_ip,
                 center_port,
                 ip=None,
                 ncores=None,
                 loop=None,
                 local_dir=None,
                 services=None,
                 service_ports=None,
                 name=None,
                 **kwargs):
        self.ip = ip or get_ip()
        self._port = 0
        self.ncores = ncores or _ncores
        self.data = dict()
        self.loop = loop or IOLoop.current()
        self.status = None
        self.local_dir = local_dir or tempfile.mkdtemp(prefix='worker-')
        self.executor = ThreadPoolExecutor(self.ncores)
        self.thread_tokens = Queue(
        )  # https://github.com/tornadoweb/tornado/issues/1595#issuecomment-198551572
        for i in range(self.ncores):
            self.thread_tokens.put_nowait(i)
        self.center = rpc(ip=center_ip, port=center_port)
        self.active = set()
        self.name = name

        if not os.path.exists(self.local_dir):
            os.mkdir(self.local_dir)

        if self.local_dir not in sys.path:
            sys.path.insert(0, self.local_dir)

        self.services = {}
        self.service_ports = service_ports or {}
        for k, v in (services or {}).items():
            if isinstance(k, tuple):
                k, port = k
            else:
                port = 0

            self.services[k] = v(self)
            self.services[k].listen(port)
            self.service_ports[k] = self.services[k].port

        handlers = {
            'compute': self.compute,
            'gather': self.gather,
            'compute-stream': self.compute_stream,
            'run': self.run,
            'get_data': self.get_data,
            'update_data': self.update_data,
            'delete_data': self.delete_data,
            'terminate': self.terminate,
            'ping': pingpong,
            'health': self.health,
            'upload_file': self.upload_file
        }

        super(Worker, self).__init__(handlers, **kwargs)

    @gen.coroutine
    def _start(self, port=0):
        self.listen(port)
        self.name = self.name or self.address
        for k, v in self.services.items():
            v.listen(0)
            self.service_ports[k] = v.port

        logger.info('      Start worker at: %20s:%d', self.ip, self.port)
        for k, v in self.service_ports.items():
            logger.info('  %16s at: %20s:%d' % (k, self.ip, v))
        logger.info('Waiting to connect to: %20s:%d', self.center.ip,
                    self.center.port)
        while True:
            try:
                resp = yield self.center.register(ncores=self.ncores,
                                                  address=(self.ip, self.port),
                                                  keys=list(self.data),
                                                  services=self.service_ports,
                                                  name=self.name)
                break
            except (OSError, StreamClosedError):
                logger.debug("Unable to register with scheduler.  Waiting")
                yield gen.sleep(0.5)
        if resp != 'OK':
            raise ValueError(resp)
        logger.info('        Registered to: %20s:%d', self.center.ip,
                    self.center.port)
        self.status = 'running'

    def start(self, port=0):
        self.loop.add_callback(self._start, port)

    def identity(self, stream):
        return {
            'type': type(self).__name__,
            'id': self.id,
            'center': (self.center.ip, self.center.port)
        }

    @gen.coroutine
    def _close(self, report=True, timeout=10):
        if report:
            yield gen.with_timeout(timedelta(seconds=timeout),
                                   self.center.unregister(address=(self.ip,
                                                                   self.port)),
                                   io_loop=self.loop)
        self.center.close_streams()
        self.stop()
        self.executor.shutdown()
        if os.path.exists(self.local_dir):
            shutil.rmtree(self.local_dir)

        for k, v in self.services.items():
            v.stop()
        self.status = 'closed'
        self.stop()

    @gen.coroutine
    def terminate(self, stream, report=True):
        yield self._close(report=report)
        raise Return('OK')

    @property
    def address(self):
        return '%s:%d' % (self.ip, self.port)

    @property
    def address_tuple(self):
        return (self.ip, self.port)

    @gen.coroutine
    def gather(self, stream=None, who_has=None):
        who_has = {
            k: [coerce_to_address(addr) for addr in v]
            for k, v in who_has.items() if k not in self.data
        }
        try:
            result = yield gather_from_workers(who_has)
        except KeyError as e:
            logger.warn("Could not find data", e)
            raise Return({'status': 'missing-data', 'keys': e.args})
        else:
            self.data.update(result)
            raise Return({'status': 'OK'})

    @gen.coroutine
    def _ready_task(self,
                    function=None,
                    key=None,
                    args=(),
                    kwargs={},
                    task=None,
                    who_has=None):
        diagnostics = {}
        if who_has:
            local_data = {k: self.data[k] for k in who_has if k in self.data}
            who_has = {
                k: set(map(coerce_to_address, v))
                for k, v in who_has.items() if k not in self.data
            }
            try:
                logger.info("gather %d keys from peers: %s", len(who_has),
                            str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                data = merge(local_data, other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({
                    'status': 'missing-data',
                    'keys': e.args,
                    'key': key
                })
        else:
            data = {}
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task, )

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({
            'status': 'OK',
            'function': function,
            'args': args2,
            'kwargs': kwargs2,
            'diagnostics': diagnostics,
            'key': key
        })

    @gen.coroutine
    def executor_submit(self, key, function, *args, **kwargs):
        """ Safely run function in thread pool executor

        We've run into issues running concurrent.future futures within
        tornado.  Apparently it's advantageous to use timeouts and periodic
        callbacks to ensure things run smoothly.  This can get tricky, so we
        pull it off into an separate method.
        """
        token = yield self.thread_tokens.get()
        job_counter[0] += 1
        i = job_counter[0]
        # logger.info("%s:%d Starts job %d, %s", self.ip, self.port, i, key)
        future = self.executor.submit(function, *args, **kwargs)
        pc = PeriodicCallback(
            lambda: logger.debug("future state: %s - %s", key, future._state),
            1000)
        pc.start()
        try:
            if sys.version_info < (3, 2):
                yield future
            else:
                while not future.done() and future._state != 'FINISHED':
                    try:
                        yield gen.with_timeout(timedelta(seconds=1),
                                               future,
                                               io_loop=self.loop)
                        break
                    except gen.TimeoutError:
                        logger.info("work queue size: %d",
                                    self.executor._work_queue.qsize())
                        logger.info("future state: %s", future._state)
                        logger.info("Pending job %d: %s", i, future)
        finally:
            pc.stop()
            self.thread_tokens.put(token)

        result = future.result()

        logger.info("Finish job %d, %s", i, key)
        raise gen.Return(result)

    @gen.coroutine
    def compute_stream(self, stream):
        with log_errors():
            logger.debug("Open compute stream")
            bstream = BatchedSend(interval=10, loop=self.loop)
            bstream.start(stream)

        @gen.coroutine
        def process(msg):
            try:
                result = yield self.compute(report=False, **msg)
                bstream.send(result)
            except Exception as e:
                logger.exception(e)
                bstream.send(assoc(error_message(e), 'key', msg.get('key')))

        with log_errors():
            while True:
                try:
                    msgs = yield read(stream)
                except StreamClosedError:
                    break
                if not isinstance(msgs, list):
                    msgs = [msgs]

                for msg in msgs:
                    op = msg.pop('op', None)
                    if op == 'close':
                        break
                    elif op == 'compute-task':
                        self.loop.add_callback(process, msg)
                    else:
                        logger.warning("Unknown operation %s, %s", op, msg)

            yield bstream.close()
            logger.info("Close compute stream")

    @gen.coroutine
    def compute(self,
                stream=None,
                function=None,
                key=None,
                args=(),
                kwargs={},
                task=None,
                who_has=None,
                report=True):
        """ Execute function """
        self.active.add(key)

        # Ready function for computation
        msg = yield self._ready_task(function=function,
                                     key=key,
                                     args=args,
                                     kwargs=kwargs,
                                     task=task,
                                     who_has=who_has)
        if msg['status'] != 'OK':
            try:
                self.active.remove(key)
            except KeyError:
                pass
            raise Return(msg)
        else:
            function = msg['function']
            args = msg['args']
            kwargs = msg['kwargs']

        # Log and compute in separate thread
        result = yield self.executor_submit(key, apply_function, function,
                                            args, kwargs)

        result['key'] = key
        result.update(msg['diagnostics'])

        if result['status'] == 'OK':
            self.data[key] = result.pop('result')
            if report:
                response = yield self.center.add_keys(address=(self.ip,
                                                               self.port),
                                                      keys=[key])
                if not response == 'OK':
                    logger.warn('Could not report results to center: %s',
                                str(response))
        else:
            logger.warn(
                " Compute Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

        logger.debug("Send compute response to scheduler: %s, %s", key, msg)
        try:
            self.active.remove(key)
        except KeyError:
            pass
        raise Return(result)

    @gen.coroutine
    def run(self, stream, function=None, args=(), kwargs={}):
        function = loads(function)
        if args:
            args = loads(args)
        if kwargs:
            kwargs = loads(kwargs)
        try:
            result = function(*args, **kwargs)
        except Exception as e:
            logger.warn(
                " Run Failed\n"
                "Function: %s\n"
                "args:     %s\n"
                "kwargs:   %s\n",
                str(funcname(function))[:1000],
                str(args)[:1000],
                str(kwargs)[:1000],
                exc_info=True)

            response = error_message(e)
        else:
            response = {
                'status': 'OK',
                'result': dumps(result),
            }
        raise Return(response)

    @gen.coroutine
    def update_data(self, stream, data=None, report=True):
        data = valmap(loads, data)
        self.data.update(data)
        if report:
            response = yield self.center.add_keys(address=(self.ip, self.port),
                                                  keys=list(data))
            assert response == 'OK'
        info = {
            'nbytes': {k: sizeof(v)
                       for k, v in data.items()},
            'status': 'OK'
        }
        raise Return(info)

    @gen.coroutine
    def delete_data(self, stream, keys=None, report=True):
        for key in keys:
            if key in self.data:
                del self.data[key]
        logger.info("Deleted %d keys", len(keys))
        if report:
            logger.debug("Reporting loss of keys to center")
            yield self.center.remove_keys(address=self.address,
                                          keys=list(keys))
        raise Return('OK')

    def get_data(self, stream, keys=None):
        return {k: dumps(self.data[k]) for k in keys if k in self.data}

    def upload_file(self, stream, filename=None, data=None, load=True):
        out_filename = os.path.join(self.local_dir, filename)
        if isinstance(data, unicode):
            data = data.encode()
        with open(out_filename, 'wb') as f:
            f.write(data)
            f.flush()

        if load:
            try:
                name, ext = os.path.splitext(filename)
                if ext in ('.py', '.pyc'):
                    logger.info("Reload module %s from .py file", name)
                    name = name.split('-')[0]
                    reload(import_module(name))
                if ext == '.egg':
                    sys.path.append(out_filename)
                    pkgs = pkg_resources.find_distributions(out_filename)
                    for pkg in pkgs:
                        logger.info("Load module %s from egg",
                                    pkg.project_name)
                        reload(import_module(pkg.project_name))
                    if not pkgs:
                        logger.warning("Found no packages in egg file")
            except Exception as e:
                logger.exception(e)
                return {'status': 'error', 'exception': dumps(e)}
        return {'status': 'OK', 'nbytes': len(data)}

    def health(self, stream=None):
        """ Information about worker """
        d = {
            'active': len(self.active),
            'stored': len(self.data),
            'time': time()
        }
        try:
            import psutil
            mem = psutil.virtual_memory()
            d.update({
                'cpu': psutil.cpu_percent(),
                'memory': mem.total,
                'memory-percent': mem.percent
            })
            try:
                net_io = psutil.net_io_counters()
                d['network-send'] = net_io.bytes_sent - self._last_net_io.bytes_sent
                d['network-recv'] = net_io.bytes_recv - self._last_net_io.bytes_recv
            except AttributeError:
                pass
            self._last_net_io = net_io

            try:
                disk_io = psutil.disk_io_counters()
                d['disk-read'] = disk_io.read_bytes - self._last_disk_io.read_bytes
                d['disk-write'] = disk_io.write_bytes - self._last_disk_io.write_bytes
            except (AttributeError, RuntimeError):
                disk_io = None
            self._last_disk_io = disk_io
        except ImportError:
            pass
        return d
Пример #59
0
def executor():
    executor = ThreadPoolExecutor(max_workers=1)
    yield executor
    executor.shutdown()
Пример #60
0
class HostPolicy(LoggedThread, Terminable):
    """
    At a regular interval, this thread triggers system reconfiguration by
    sampling host and guest data, evaluating the policy and reporting the
    results to all enabled Controller plugins.
    """
    def __init__(self,
                 config: DictConfig,
                 libvirt_iface,
                 host_monitor: Monitor,
                 guest_manager: GuestManager,
                 shared_terminable: Terminable = None):
        LoggedThread.__init__(self, daemon=True)
        Terminable.__init__(self, shared_terminable=shared_terminable)

        self.policy_lock = threading.RLock()
        self.host_monitor = host_monitor
        self.guest_manager = guest_manager
        self.config = config

        self.interval: float = max(self.config.get('policy', 'interval'), 1.)
        self.grace_period: float = min(
            self.config.get('policy', 'grace-period'), self.interval)
        self.inquiry_timeout: float = min(
            self.config.get('policy', 'inquiry-timeout'), self.interval)

        self.resources = [
            r.strip() for r in self.config.get('policy', 'resources')
        ]
        self.policy = ClassImporter(allocators).get_class(
            config.get('policy', 'allocator'))(self.resources)
        self.controllers = []

        self.properties = {
            'libvirt_iface': libvirt_iface,
            'host_monitor': host_monitor,
            'guest_manager': guest_manager,
            'config': config
        }

        self.policy_data_loggers = {}
        self.client_executor = None

        self.get_controllers()

    def get_controllers(self):
        """ Initialize the Controllers called for in the config file. """
        controllers_importer = ClassImporter(controllers)
        for resource_name in self.resources:
            try:
                controller_name = self.config.get(
                    'policy', f'{resource_name}-controller')
                controller_class = controllers_importer.get_class(
                    controller_name)
                self.logger.debug("Loaded %s controller for resource %s",
                                  controller_name, resource_name)
            except Exception as e:
                self.logger.exception(
                    "Unable to import controller for resource '%s': %s",
                    resource_name, e)
                continue
            try:
                self.controllers.append(
                    controller_class(resource_name, self.properties))
            except Exception as e:
                self.logger.exception(
                    "Unable to instantiate controller for resource '%s': %s",
                    resource_name, e)

    def get_policy_data_logger(self, entity: MonitorDataEntity):
        source = entity.prop('source')
        data_logger = self.policy_data_loggers.get(source, None)
        if data_logger is None:
            data_logger = DataLogger('policy', source)
            self.policy_data_loggers[source] = data_logger
        return data_logger

    def notify_guest(self,
                     entity: MonitorDataEntity,
                     grace_period: Union[int, float, None] = None):
        alloc = {}
        for r in self.resources:
            alloc[r] = entity.get_control(r)
        try:
            msg_obj = MessageNotify({
                'alloc': alloc,
                'grace-period': grace_period
            })
            timeout = grace_period
            if grace_period is None:
                timeout = self.inquiry_timeout
            entity.prop('guest-client').send_receive_message(msg_obj,
                                                             timeout=timeout)
        except socket.timeout as e:
            if self.should_run:
                self.logger.error("Could not notify guest: %s", e)
        except Exception as ex:
            if self.should_run:
                self.logger.exception("Failed to notify guest %s: %s",
                                      entity.prop('source'), ex)

    def inquire_guest(self,
                      entity: MonitorDataEntity,
                      grace_period: Union[int, float, None] = None):
        last_alloc = entity.get_var('last_control', {})
        try:
            msg_obj = MessageInquiry({
                'last-alloc': last_alloc,
                'grace-period': grace_period,
                'timeout': self.inquiry_timeout
            })
            return entity.prop('guest-client').send_receive_message(
                msg_obj, timeout=self.inquiry_timeout)
        except socket.timeout as e:
            if self.should_run:
                self.logger.error("Could not inquire guest: %s", e)
        except Exception as ex:
            if self.should_run:
                self.logger.exception("Failed to inquire guest: %s", ex)

    def parallel_for_each_guest(self,
                                func,
                                guest_list: List[MonitorDataEntity],
                                results_var_key=None,
                                **kwargs):
        jobs = []
        for g in guest_list:
            jobs.append(self.client_executor.submit(func, g, **kwargs))

        for g, j in zip(guest_list, jobs):
            res = j.result()
            if results_var_key:
                g.set_var(results_var_key, res)

    def do_controls(self):
        """
        Sample host and guest data, process the rule set and feed the results
        into each configured Controller.
        """
        if not self.should_run:
            return
        # collect data
        host: MonitorDataEntity = self.host_monitor.interrogate()
        if host is None:
            return
        guest_list: List[MonitorDataEntity] = list(
            self.guest_manager.interrogate().values())

        # send inquiry to the clients regarding the next allocation
        inquiry_time = time.time()
        self.parallel_for_each_guest(self.inquire_guest,
                                     guest_list,
                                     results_var_key='inquiry',
                                     grace_period=self.grace_period)
        if not self.should_run:
            return
        try:
            with self.policy_lock:
                self.policy.apply_policy(host, guest_list)
        except Exception as e:
            self.logger.exception("Exception while applying policy: %s", e)
            return

        if not self.should_run:
            return

        # send notification to the clients on the next allocation
        notify_start = time.time()
        remaining_grace = max(
            0., self.grace_period - (notify_start - inquiry_time))
        self.parallel_for_each_guest(self.notify_guest,
                                     guest_list,
                                     grace_period=remaining_grace)
        notify_end = time.time()

        if not self.should_run:
            return

        # log and store control and variable of host and guest
        for entity in (host, *guest_list):
            data_logger = self.get_policy_data_logger(entity)
            data_logger.append_data({'notify': entity.controls}, notify_start,
                                    notify_end)

        remaining_grace = max(0.,
                              self.grace_period - (time.time() - inquiry_time))
        self.terminable_sleep(remaining_grace)

        if not self.should_run:
            return

        policy_start = time.time()
        for c in self.controllers:
            c.apply_control(host, guest_list)
        policy_end = time.time()

        # log and store control and variable of host and guest
        for entity in (host, *guest_list):
            data_logger = self.get_policy_data_logger(entity)
            data_logger.append_data(
                {
                    'controls': entity.controls,
                    'variables': entity.variables
                }, policy_start, policy_end)
            entity.store_variables()

        if not self.should_run:
            return
        # send notification to the clients on the applied allocation
        self.parallel_for_each_guest(self.notify_guest,
                                     guest_list,
                                     grace_period=None)

    def logged_run(self) -> None:
        self.client_executor = ThreadPoolExecutor(
            thread_name_prefix=f"{self.logger_name}-dispatcher")
        try:
            while self.should_run:
                start = time.time()
                self.do_controls()
                self.terminable_sleep(self.interval - (time.time() - start))
        finally:
            self.client_executor.shutdown()