Exemplo n.º 1
0
class BackPressure:
    """
    Wrapper for an iterator to provide
    async access with backpressure
    """
    def __init__(self, iterator, n):
        self.iterator = iter(iterator)
        self.back_pressure = BoundedSemaphore(n)

    def __aiter__(self):
        return self

    async def __anext__(self):
        await self.back_pressure.acquire()

        try:
            return next(self.iterator)
        except StopIteration:
            raise StopAsyncIteration

    async def release(self, async_iterator):
        """
        release iterator to pipeline the backpressure
        """
        async for item in async_iterator:
            try:
                self.back_pressure.release()
            except ValueError:
                pass

            yield item
Exemplo n.º 2
0
 def __init__(self, name: str, dev: str, mon: Monitor = None, **kwargs):
     self._dev = dev
     self._kwargs = kwargs
     self._mon = mon
     if (name == HCI_TRANSPORT_UART):
         self._transport = UART()
     elif (name == HCI_TRANSPORT_TCP):
         self._transport = UARToTCP()
     else:
         raise RuntimeError('Unknown transport type {}'.name)
     self._tx_cmd_q = Queue()
     # Bound to max 1, so we send only one command at a time
     self._tx_cmd_sem = BoundedSemaphore(value=1)
Exemplo n.º 3
0
async def test_full(mock_server, test_client_config):
    mock_server.reset()
    client = await AsyncDispatcher.init(test_client_config)

    # [0 1 2
    #  3 4 5
    #  6 7 8]
    idx = create_dist_index(ORIGS, DESTS)

    async with aiohttp.ClientSession() as session:
        res = await client.distance_rows(
            missing=idx,
            provider='google',
            session=session,
            # TODO: use default sem for async calls also
            sem=BoundedSemaphore(20)
        )

    assert res.isnull().sum().sum() == 0

    # origin-major order
    np.testing.assert_array_almost_equal(
        res.index.values.tolist(),
        FULL_INDEX
    )

    np.testing.assert_array_almost_equal(
        res.meters.values,
        FULL_RESULTS
    )
Exemplo n.º 4
0
async def check_sources(sources: Iterable[Source],
                        concurrency: int = 10) -> List[Optional[Exception]]:
    semaphore = BoundedSemaphore(concurrency)
    async with ClientSession() as session:
        return await asyncio.gather(*(check_source(semaphore, session, s)
                                      for s in sources),
                                    return_exceptions=True)
Exemplo n.º 5
0
async def build_apps(
    branch: str,
    *,
    locks: Locks = None,
    no_export: bool = False,
    force_export: bool = False,
    keep_build_dirs: bool = False,
    delete_build_dirs: bool = False,
    concurrency: int = None,
):
    config = await utils.load_yaml(Path.cwd() / "nufb.yml")
    apps = config["apps"].get(branch)
    if apps is None:
        apps = config["apps"].get("master")
    assert apps

    if locks is None:
        locks = Locks()

    semaphore = BoundedSemaphore(concurrency or len(apps))

    async def task(name):
        async with semaphore:
            return await build_app(
                branch,
                name,
                no_export=no_export,
                force_export=force_export,
                keep_build_dirs=keep_build_dirs,
                delete_build_dirs=delete_build_dirs,
                locks=locks,
            )

    await asyncio.gather(*map(task, apps))
Exemplo n.º 6
0
async def test_thin(mock_server, test_client_config):
    mock_server.reset()
    client = await AsyncDispatcher.init(test_client_config)

    # [_ 1 2
    #  _ 4 5
    #  _ 7 8]
    # take only last 2 destinations, 3 x 2 query
    idx = create_dist_index(ORIGS, DESTS[[1, 2], :])
    expected = [
        1, 4, 7,    # destination-major order, dest1
        2, 5, 8     # dest2
    ]

    async with aiohttp.ClientSession() as session:
        res = await client.distance_rows(
            missing=idx,
            provider='google',
            session=session,
            # TODO: use default sem for async calls also
            sem=BoundedSemaphore(20)
        )

    assert res.isnull().sum().sum() == 0

    np.testing.assert_array_almost_equal(
        res.index.values.tolist(),
        FULL_INDEX[expected, :]
    )

    np.testing.assert_array_almost_equal(
        res.meters.values,
        FULL_RESULTS[expected]
    )
Exemplo n.º 7
0
async def test_odd(mock_server, test_client_config):
    mock_server.reset()
    client = await AsyncDispatcher.init(test_client_config)

    idx = create_dist_index(ORIGS, DESTS)

    # [0 _ 2
    #  _ 4 _
    #  6 _ 8]
    # take even indices only
    missing = idx.iloc[range(0, len(idx), 2)]

    async with aiohttp.ClientSession() as session:
        res = await client.distance_rows(
            missing=missing,
            provider='google',
            session=session,
            # TODO: use default sem for async calls also
            sem=BoundedSemaphore(20)
        )

    assert res.isnull().sum().sum() == 0

    np.testing.assert_array_almost_equal(
        res.index.values.tolist(),
        [idx for i, idx in enumerate(FULL_INDEX) if i % 2 == 0]
    )

    np.testing.assert_array_almost_equal(
        res.meters.values,
        [idx for i, idx in enumerate(FULL_RESULTS) if i % 2 == 0]
    )
Exemplo n.º 8
0
async def main():
    async with AsyncClient(timeout=10) as client:
        if (db := read_db()) is None:
            r = await client.get(url, params={**params, "num": 0})
            info = r.json()
            total = info["total_results"]
            sem = BoundedSemaphore(max_parallel)
            db = [
                img for lst in await asyncio.gather(
                    *(getDb(start, client, sem)
                      for start in range(0, total, page_size))) for img in lst
            ]
            write_db(db)

        sem = BoundedSemaphore(max_parallel)
        await asyncio.gather(*(getImage(res, client, sem, i, len(db))
                               for i, res in enumerate(db)))
Exemplo n.º 9
0
 async def __aenter__(self):
     if self.slot not in self.parent.slots:
         self.parent.slots[self.slot] = _SlotMeta(
             BoundedSemaphore(self.parent.concurrency_per_slot), registered_tasks=0
         )
     slot_info = self.parent.slots[self.slot]
     slot_info.registered_tasks += 1
     await slot_info.semaphore.acquire()
Exemplo n.º 10
0
class Message:
    no_retry = False
    __overall_concurrency = 30
    __overall_semaphore = BoundedSemaphore(__overall_concurrency)

    __lock_type = 'r'

    def __init__(self,
                 text: Optional[str] = None,
                 media: Optional[Union[list[Medium], list[Medium],
                                       Medium]] = None,
                 parse_mode: Optional[str] = 'HTML'):
        self.text = text
        self.media = media
        self.parse_mode = parse_mode
        self.retries = 0

    async def send(self,
                   chat_id: Union[str, int],
                   reply_to_msg_id: int = None,
                   silent: bool = None):
        semaphore, rwlock, flood_rwlock = locks.user_msg_locks(chat_id)
        rlock_or_wlock = await rwlock.gen_wlock(
        ) if self.__lock_type == 'w' else await rwlock.gen_rlock()
        flood_rlock_or_wlock = await flood_rwlock.gen_rlock(
        )  # always acquire a read lock first

        async with semaphore:  # acquire user semaphore first to reduce per user concurrency
            while True:
                try:
                    async with rlock_or_wlock:  # acquire a msg rwlock
                        async with flood_rlock_or_wlock:  # acquire a flood rwlock
                            async with self.__overall_semaphore:  # only acquire overall semaphore when sending
                                await self._send(chat_id, reply_to_msg_id,
                                                 silent)
                    return
                except (FloodWaitError, SlowModeWaitError) as e:
                    # telethon has retried for us, but we release locks and retry again here to see if it will be better
                    if self.retries >= 1:
                        logger.error(
                            f'Msg dropped due to too many flood control retries ({chat_id})'
                        )
                        return

                    self.retries += 1
                    flood_rlock_or_wlock = await flood_rwlock.gen_wlock(
                    )  # enforce a wlock here block other attempts
                    if not flood_rwlock.v_write_count:  # only flood wait once, thus only lock once
                        async with flood_rlock_or_wlock:  # acquire a flood rwlock
                            await asyncio.sleep(e.seconds + 1)  # sleep

    async def _send(self,
                    chat_id: Union[str, int],
                    reply_to_msg_id: int = None,
                    silent: bool = None):
        pass
Exemplo n.º 11
0
    def __init__(self, max_workers=None, stack_limit: Union[bool, int] = 0, stack_file: TextIO = None):
        if max_workers is None:
            max_workers = min(32, (os.cpu_count() or 1) + 4)
        if max_workers <= 0:
            raise ValueError("max_workers must be greater than 0")
        self._lock = Lock()
        self._task_semaphore = BoundedSemaphore(max_workers)
        self._task_map = {}

        self.stack_limit = stack_limit
        self.stack_file = stack_file
Exemplo n.º 12
0
    async def start(self, injection: Injection = None) -> 'AttackContext':
        if self.session:
            raise RuntimeError('already has a session')

        semaphore = BoundedSemaphore(self.concurrency)
        connector = TCPConnector(ssl=False, limit=None)
        async with ClientSession(headers=self.headers,
                                 connector=connector) as sesh:
            yield self._replace(session=sesh,
                                injection=injection,
                                semaphore=semaphore)
Exemplo n.º 13
0
 def __init__(self, name, config, host, virtual=False):
     super().__init__(name, config, host)
     self.virtual = virtual
     # Active generator created from get(), referenced to cancel on disconnect.
     self._getter = None
     # Message queue, to move processing from the event stream to the generator.
     self._queue = Queue()
     # Message history, to match up received messages with their sent sources.
     # Mapping from (channel, message ID) to (source message, all IDs).
     self._sent = {}
     # Hook lock, to put a hold on retrieving messages whilst a send is in progress.
     self._lock = BoundedSemaphore()
Exemplo n.º 14
0
    def __init__(self, cpus=None, blocking_io_penalty=None):
        """The LocalBackend executes tasks as processes on the local machine.

        This contains a semaphore that limits tasks by the number of cpus
        that they require. It requires that self.runner be set to get the
        event loop, so it's not instantiated until preflight.

        :param cpus: If this is None, the number of available CPUs will be
            guessed. This cannot be changed after starting the backend.
        :param blocking_io_penalty: Delay (in seconds) when a BlockingIOError
            prevents a new process from spawning.
        :param max_concurrency: Max concurrency limit
        """
        super(LocalBackend, self).__init__()
        self.cpus = cpus \
                    or jetstream.settings['backends']['local']['cpus'].get() \
                    or jetstream.utils.guess_local_cpus()
        self.bip = blocking_io_penalty \
                   or jetstream.settings['backends']['local']['blocking_io_penalty'].get(int)
        self._cpu_sem = BoundedSemaphore(self.cpus)
        log.info(f'LocalBackend initialized with {self.cpus} cpus')
Exemplo n.º 15
0
async def download_and_enqueue(queue: Queue, condition: Condition,
                               semaphore_value: int,
                               limit: Optional[int]) -> None:
    """Download source json and all documentsand enqueue all documents from """
    source_json_url = (
        "https://raw.githubusercontent.com/aptnotes/data/master/APTnotes.json")
    async with aiohttp.ClientSession(
            headers={"Connection": "keep-alive"}) as session:
        semaphore = BoundedSemaphore(semaphore_value)
        source_json = await fetch_source_json(session, source_json_url)
        source_json_with_file_urls = await add_file_urls_source_json(
            session, semaphore, source_json[slice(limit)])
        await fetch_and_enqueue_multiple(session, semaphore,
                                         source_json_with_file_urls, condition,
                                         queue)
Exemplo n.º 16
0
 def __init__(self, name, config, host):
     super().__init__(name, config, host)
     self.db = None
     # Message cache, stores IDs of all synced messages by channel.
     self._cache = SyncCache(self)
     # Hook lock, to put a hold on retrieving messages whilst a send is in progress.
     self._lock = BoundedSemaphore()
     # Add a virtual plug to the host, for external subscribers.
     if self.config["plug"]:
         log.debug("Creating virtual plug: %r", self.config["plug"])
         self.plug = SyncPlug(self.config["plug"], self, host)
         host.add_plug(self.plug)
         for label in self.config["channels"]:
             host.add_channel(label, immp.Channel(self.plug, label))
     else:
         self.plug = None
Exemplo n.º 17
0
    def start(self, workflow, pipeline=None, project=None):
        """Called to start the runner on a workflow."""
        if project:
            try:
                project.lock.acquire()
            except TimeoutError:
                err = 'Failed to acquire project lock, there may be a run pending. If ' \
                      'this problem persists, and a run is not pending, remove the lock ' \
                      'file located at <project>/jetstream/pid.lock'
                raise TimeoutError(err) from None

            self._previous_directory = os.getcwd()
            os.chdir(project.paths.path)

        self._pipeline = pipeline
        self._project = project
        self._workflow = workflow
        self._workflow_len = len(workflow)
        self._workflow_iterator = iter(self.workflow.graph)
        self._run_started = datetime.now()
        self._errs = False
        self._start_event_loop()
        self._start_backend()

        if self.max_concurrency is None:
            self.max_concurrency = utils.guess_max_forks()

        self._conc_sem = BoundedSemaphore(value=self.max_concurrency)

        with sigterm_ignored():
            self.preflight()

            try:
                self._main = self.loop.create_task(self._spawn_new_tasks())
                self.loop.run_until_complete(self._main)
            finally:
                self._cleanup_event_loop()
                log.debug(f'Runner finished shutdown, errs={self._errs}')

                if self._errs:
                    self.backend.cancel()

                self.shutdown()

                if project:
                    project.lock.release()
                    os.chdir(self._previous_directory)
Exemplo n.º 18
0
async def test_jumbled(mock_server, test_client_config):
    mock_server.reset()
    client = await AsyncDispatcher.init(test_client_config)

    jumble = [8, 0, 7, 1, 6, 2, 5, 3]  # omit 4
    idx = create_dist_index(ORIGS, DESTS).take(jumble)


    # should be ordered origin-major
    # grouped by first occurrence each origin value
    #
    # [0 1 2
    #  3 4 5
    #  6 7 8]
    # 
    # 8 (row3) is first group
    # 0 (row1) is second group
    # 5 (row2) is last group
    expected = [
        8, 7, 6,
        0, 1, 2,
        5, 3
    ]

    async with aiohttp.ClientSession() as session:
        res = await client.distance_rows(
            missing=idx,
            provider='google',
            session=session,
            # TODO: use default sem for async calls also
            sem=BoundedSemaphore(20)
        )

    assert res.isnull().sum().sum() == 0

    np.testing.assert_array_almost_equal(
        res.index.values.tolist(),
        FULL_INDEX[expected, :]
    )

    np.testing.assert_array_almost_equal(
        res.meters.values,
        FULL_RESULTS[expected]
    )
Exemplo n.º 19
0
async def _check_ports(target: str,
                       port: int,
                       loop: asyncio.AbstractEventLoop,
                       sem: asyncio.BoundedSemaphore,
                       results: list,
                       config: DockerScanModel):

    open_ports = set()

    # for port in ports:

    log.error("   > Trying {}:{}".format(target, port))

    is_ssl = True

    try:
        # If connection SSL?
        try:
            # This definition of ssl context allow to connect with servers with
            # self-signed certs
            sslcontext = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
            sslcontext.options |= ssl.OP_NO_SSLv2
            sslcontext.options |= ssl.OP_NO_SSLv3
            sslcontext.options |= getattr(ssl, "OP_NO_COMPRESSION", 0)
            sslcontext.set_default_verify_paths()

            reader, writer = await _get_connection(target,
                                                   port,
                                                   sslcontext,
                                                   config.timeout,
                                                   loop)

            if not reader:
                return

        except ssl.SSLError:
            reader, writer = await _get_connection(target,
                                                   port,
                                                   None,
                                                   config.timeout,
                                                   loop)

            if not reader:
                return

            is_ssl = False

        # Send HTTP Header
        writer.write(
            "GET /v2/ HTTP/1.1\r\nHost: {}\r\n\r\n".format(target).encode()
        )

        # Get Server response
        reader = reader.read(1000)
        try:
            data = await asyncio.wait_for(reader,
                                          1,
                                          loop=loop)
        except (asyncio.TimeoutError, ConnectionRefusedError):
            # If this point reached -> server doesn't sent response
            return

        if b"registry/2.0" in data or \
                        b"Docker-Distribution-Api-Version" in data:

            content = data.lower()

            if b"200 ok" in content:
                status = "open"
            elif b"401" in content:
                status = "auth required"
            else:
                status = "reachable"

            log.info("     + Discovered port {}:{}".format(
                target,
                port
            ))

            open_ports.add((port, status, is_ssl))

        # close descriptor
        writer.close()

        if open_ports:
            results.append(
                {
                    target: open_ports
                }
            )

    finally:
        sem.release()
Exemplo n.º 20
0
async def main():
    semaphore = BoundedSemaphore(1)

    await semaphore.acquire()
    semaphore.release()
    semaphore.release()
Exemplo n.º 21
0
class HCIHost:
    def __init__(self, name: str, dev: str, mon: Monitor = None, **kwargs):
        self._dev = dev
        self._kwargs = kwargs
        self._mon = mon
        if (name == HCI_TRANSPORT_UART):
            self._transport = UART()
        elif (name == HCI_TRANSPORT_TCP):
            self._transport = UARToTCP()
        else:
            raise RuntimeError('Unknown transport type {}'.name)
        self._tx_cmd_q = Queue()
        # Bound to max 1, so we send only one command at a time
        self._tx_cmd_sem = BoundedSemaphore(value=1)

    async def open(self):
        try:
            await self._transport.open(self._dev, **self._kwargs)
        except OSError as e:
            #log.error(f'Unable to open serial port {e}')
            raise e from None

        # Start RX task
        self._rx_task = create_task(self._rx_task())
        # Start TX command task
        self._curr_cmd = None
        self._tx_cmd_task = create_task(self._tx_cmd_task())

        await self.init()

    async def close(self):
        await self._rx_task
        await self._tx_cmd_task
        self._transport.close()

    async def init(self):
        # Reset the controller first
        log.debug('init: Reset')
        pkt = HCICmd(cmd.Reset)
        await self.send_cmd(pkt)
        log.debug('init: ReadLocalVersionInformation')
        pkt = HCICmd(cmd.ReadLocalVersionInformation)
        await self.send_cmd(pkt)

    async def _rx_task(self):
        log.debug('rx task started')
        while True:
            try:
                pkt = await self._transport.recv()
            except CancelledError:
                # Here for compatibility with 3.7
                raise
            except Exception as e:
                log.debug(f'_rx_task: {e}')
            else:
                if not pkt:
                    # Broken RX Path
                    log.debug('_rx_task exiting')
                    break
                elif self._mon:
                    self._mon.feed_rx(0, pkt)

            if isinstance(pkt, HCIEvt):
                self._rx_evt(pkt)
            elif instance(pkt, HCIACLData):
                self._rx_acl(pkt)
            else:
                log.error('Invalid rx type: {type(pkt)}')

    def _rx_evt(self, evt: HCIEvt):
        log.debug(f'evt rx: {evt}')
        # Look for a handler
        try:
            handler = self.evt_handlers[evt.hdr.code]
        except KeyError:
            log.warn(f'Discarding event with code: {evt.hdr.code}')
        else:
            handler(self, evt)

    def _rx_acl(self, acl: HCIACLData):
        log.debug(f'acl rx: {evt}')

    async def _tx_cmd_task(self):
        log.debug('tx cmd task started')
        while True:
            pkt = await self._tx_cmd_q.get()
            log.debug(f'pkt tx: {pkt}')

            # Wait for the current command to complete
            try:
                await wait_for(self._tx_cmd_sem.acquire(), 10)
            except CancelledError:
                # Here for compatibility with 3.7
                raise
            except TimeoutError:
                log.debug('_tx_cmd_task: sem timeout: exiting')
                return

            assert self._curr_cmd == None
            self._curr_cmd = pkt
            log.debug(f'_tx_cmd curr set')

            try:
                await self._transport.send(pkt)
            except OSError as e:
                log.debug('_tx_cmd_task: send error: exiting')
                return
            else:
                if self._mon:
                    self._mon.feed_tx(0, pkt)

    def tx_cmd(self, pkt: HCICmd) -> None:
        self._tx_cmd_q.put_nowait(pkt)

    async def send_cmd(self, pkt: HCICmd) -> HCIEvt:
        pkt.event.clear()
        self.tx_cmd(pkt)
        try:
            await wait_for(pkt.event.wait(), 15)
        except TimeoutError:
            raise

    def _complete_cmd(self):
        log.debug(f'complete: {self._curr_cmd}')
        assert self._curr_cmd
        # Wake up a potential task waiting on completion
        self._curr_cmd.event.set()
        self._curr_cmd = None

        # The command itself is complete, allow the tx cmd task to move on to
        # the next queued command
        self._tx_cmd_sem.release()

    @evt_handler(evt.CommandComplete)
    def _evt_cc(self, evt: HCIEvt) -> None:
        log.debug(f'handling CC: {evt}')

        self._complete_cmd()
Exemplo n.º 22
0
async def _check_ports(target: str, port: int, loop: asyncio.AbstractEventLoop,
                       sem: asyncio.BoundedSemaphore, results: list,
                       config: DockerScanModel):

    open_ports = set()

    # for port in ports:

    log.error("   > Trying {}:{}".format(target, port))

    is_ssl = True

    try:
        # If connection SSL?
        try:
            # This definition of ssl context allow to connect with servers with
            # self-signed certs
            sslcontext = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
            sslcontext.options |= ssl.OP_NO_SSLv2
            sslcontext.options |= ssl.OP_NO_SSLv3
            sslcontext.options |= getattr(ssl, "OP_NO_COMPRESSION", 0)
            sslcontext.set_default_verify_paths()

            reader, writer = await _get_connection(target, port, sslcontext,
                                                   config.timeout, loop)

            if not reader:
                return

        except ssl.SSLError:
            reader, writer = await _get_connection(target, port, None,
                                                   config.timeout, loop)

            if not reader:
                return

            is_ssl = False

        # Send HTTP Header
        writer.write(
            "GET /v2/ HTTP/1.1\r\nHost: {}\r\n\r\n".format(target).encode())

        # Get Server response
        reader = reader.read(1000)
        try:
            data = await asyncio.wait_for(reader, 1, loop=loop)
        except (asyncio.TimeoutError, ConnectionRefusedError):
            # If this point reached -> server doesn't sent response
            return

        if b"registry/2.0" in data or \
                        b"Docker-Distribution-Api-Version" in data:

            content = data.lower()

            if b"200 ok" in content:
                status = "open"
            elif b"401" in content:
                status = "auth required"
            else:
                status = "reachable"

            log.info("     + Discovered port {}:{}".format(target, port))

            open_ports.add((port, status, is_ssl))

        # close descriptor
        writer.close()

        if open_ports:
            results.append({target: open_ports})

    finally:
        sem.release()
Exemplo n.º 23
0
    def first(self, model: Type[T], **kwargs) -> Optional[T]:
        return self.query(model, **kwargs).first()

    def count(self, model: Type[T], **kwargs) -> int:
        return self.query(model, **kwargs).count()

    def get(self, model: Type[T], primary_key) -> Optional[T]:
        return self.session.query(model).get(primary_key)

    @property
    def session(self) -> Session:
        return self._Session()


thread_semaphore = BoundedSemaphore(5)


async def run_in_thread(function, *args, **kwargs):
    async with thread_semaphore:

        def inner():
            try:
                out = function(*args, **kwargs)
                db.session.commit()
            finally:
                db.close()
            return out

        return await async_thread.run_in_thread(inner)
Exemplo n.º 24
0
from __future__ import absolute_import

import re
import sys
import urllib

from datetime import date, timedelta

from .session import Session
from .urls import BASE_URL
from .utils import Logger  # noqa:F401

if sys.version_info >= (3, 7):
    from asyncio import BoundedSemaphore

    semaphore = BoundedSemaphore(value=50)


class ForgeBase(object):
    """
    Superclass for all api model classes in this Forge Python Wrapper.
    """

    session = Session(base_url=BASE_URL)
    TODAY = date.today()
    TODAY_STRING = TODAY.strftime("%Y-%m-%d")
    IN_ONE_YEAR_STRING = (TODAY + timedelta(365)).strftime("%Y-%m-%d")

    # TODO - Reorganise Extension Types (https://forge.autodesk.com/en/docs/data/v2/developers_guide/basics/#extension-types)  # noqa: E501

    BASE_TYPES = [
Exemplo n.º 25
0
 def __init__(self, iterator, n):
     self.iterator = iter(iterator)
     self.back_pressure = BoundedSemaphore(n)
Exemplo n.º 26
0
class LocalBackend(jetstream.backends.BaseBackend):
    def __init__(self, cpus=None, blocking_io_penalty=None):
        """The LocalBackend executes tasks as processes on the local machine.

        This contains a semaphore that limits tasks by the number of cpus
        that they require. It requires that self.runner be set to get the
        event loop, so it's not instantiated until preflight.

        :param cpus: If this is None, the number of available CPUs will be
            guessed. This cannot be changed after starting the backend.
        :param blocking_io_penalty: Delay (in seconds) when a BlockingIOError
            prevents a new process from spawning.
        :param max_concurrency: Max concurrency limit
        """
        super(LocalBackend, self).__init__()
        self.cpus = cpus \
                    or jetstream.settings['backends']['local']['cpus'].get() \
                    or jetstream.utils.guess_local_cpus()
        self.bip = blocking_io_penalty \
                   or jetstream.settings['backends']['local']['blocking_io_penalty'].get(int)
        self._cpu_sem = BoundedSemaphore(self.cpus)
        log.info(f'LocalBackend initialized with {self.cpus} cpus')

    async def spawn(self, task):
        log.debug('Spawn: {}'.format(task))

        if 'cmd' not in task.directives:
            return task.complete()

        cmd = task.directives['cmd']
        cpus = task.directives.get('cpus', 0)
        cpus_reserved = 0
        open_fps = list()

        if cpus > self.cpus:
            raise RuntimeError('Task cpus greater than available cpus')

        try:
            for i in range(task.directives.get('cpus', 0)):
                await self._cpu_sem.acquire()
                cpus_reserved += 1

            stdin, stdout, stderr = self.get_fd_paths(task)

            if stdin:
                stdin_fp = open(stdin, 'r')
                open_fps.append(stdin_fp)
            else:
                stdin_fp = None

            if stdout:
                stdout_fp = open(stdout, 'w')
                open_fps.append(stdout_fp)
            else:
                stdout_fp = None

            if stderr:
                stderr_fp = open(stderr, 'w')
                open_fps.append(stderr_fp)
            else:
                stderr_fp = None

            p = await self.subprocess_sh(cmd,
                                         stdin=stdin_fp,
                                         stdout=stdout_fp,
                                         stderr=stderr_fp)

            task.state.update(
                stdout_path=stdout,
                stderr_path=stderr,
                label=f'Slurm({p.pid})',
            )

            log.info(f'LocalBackend spawned({p.pid}): {task.name}')
            rc = await p.wait()

            if rc != 0:
                log.info(f'Failed: {task.name}')
                return task.fail(p.returncode)
            else:
                log.info(f'Complete: {task.name}')
                return task.complete(p.returncode)
        except CancelledError:
            task.state['err'] = 'Runner cancelled Backend.spawn()'
            return task.fail(-15)
        finally:
            for fp in open_fps:
                fp.close()

            for i in range(cpus_reserved):
                self._cpu_sem.release()

            return task

    async def subprocess_sh(self,
                            args,
                            *,
                            stdin=None,
                            stdout=None,
                            stderr=None,
                            cwd=None,
                            encoding=None,
                            errors=None,
                            env=None,
                            loop=None,
                            executable='/bin/bash'):
        """Asynchronous version of subprocess.run

        This will always use a shell to launch the subprocess, and it prefers
        /bin/bash (can be changed via arguments)"""
        log.debug(f'subprocess_sh:\n{args}')

        while 1:
            try:
                p = await create_subprocess_shell(args,
                                                  stdin=stdin,
                                                  stdout=stdout,
                                                  stderr=stderr,
                                                  cwd=cwd,
                                                  encoding=encoding,
                                                  errors=errors,
                                                  env=env,
                                                  loop=loop,
                                                  executable=executable)
                break
            except BlockingIOError as e:
                log.warning(f'System refusing new processes: {e}')
                await asyncio.sleep(self.bip)

        return p
Exemplo n.º 27
0
class CloudSwiftBackend(BaseBackend):
    """
    Executes tasks on cloud-based worker nodes, handling all data transfer to/from worker nodes and 
    the client node.
    """
    def __init__(self,
                 pw_pool_name=None,
                 pw_api_key=None,
                 cpus=None,
                 blocking_io_penalty=None,
                 cloud_storage_provider='azure',
                 **kwargs):
        """
        If ``pw_pool_name`` and ``pw_api_key`` are both given valid values, this backend will use ParallelWorks to 
        manage resouces elastically. If they remain ``None``, then it is assumed that the use has manually started 
        a pool using the included ``start_pool.py`` script. Note that both approaches requires the use of binaries 
        available as part of the Swift workflow language.
        
        :param pw_pool_name: str Name of an active pool in PW
        :param pw_api_key: str Valid API for a PW account
        :param cpus: int The total number of CPUs available to the worker pool
        :param blocking_io_penalty: int Delay (in seconds) when a BlockingIOError prevents a new process from spawning.
        :param cloud_storage_provder: str Name of the cloud storage provider, which must match up with one of the keys 
            in ``jetstream.cloud.base.CLOUD_STORAGE_PROVIDERS``
        """
        super().__init__()
        self.is_pw_pool = pw_pool_name is not None
        if pw_pool_name is not None and pw_api_key is not None:
            self.pool_info = get_pool_info(pw_pool_name, pw_api_key)
            log.info('PW Pool info: {}'.format(self.pool_info))
        else:
            self.pool_info = {
                'cpus': kwargs['pool_info']['cpus_per_worker'],
                'maxworkers': kwargs['pool_info']['workers'],
                'serviceurl': kwargs['pool_info']['serviceurl'],
            }
        self.total_cpus_in_pool = self.pool_info['cpus'] * self.pool_info[
            'maxworkers']
        self.bip = blocking_io_penalty \
                   or jetstream.settings['backends']['local']['blocking_io_penalty'].get(int)
        self._cpu_sem = BoundedSemaphore(int(self.total_cpus_in_pool))
        self.project_dir = os.getcwd()
        try:
            os.remove('cjs_cmds_debug.sh')
        except:
            pass  # Fail silently

        # Make directory for cjs launch scripts
        self.cloud_scripts_dir = os.path.join(self.project_dir,
                                              'cloud_scripts')
        os.makedirs(self.cloud_scripts_dir, exist_ok=True)

        self.cloud_logs_dir = os.path.join(self.project_dir, 'cloud_logs')
        os.makedirs(self.cloud_logs_dir, exist_ok=True)

        # Instantiate a cloud storage provider
        storage_class = dynamic_import(
            CLOUD_STORAGE_PROVIDERS[cloud_storage_provider])
        self.cloud_storage = storage_class(
            **kwargs.get(storage_class.config_key, dict()))

        # Initialize cloud metrics log
        CloudMetricsLogger.init(at=os.path.join(
            self.project_dir, 'cloud_metrics_{}.yaml'.format(
                datetime.now().strftime('%Y%m%d%H%M%S'))))

        # Get path to Petalink library, if it exists
        self.petalink_so_path = kwargs.get('petalink_path',
                                           '/usr/lib/petalink.so')

        log.info(
            f'CloudSwiftBackend initialized with {self.total_cpus_in_pool} cpus'
        )

    async def spawn(self, task):
        # Ensure the command body exists, otherwise there is nothing to do
        if 'cmd' not in task.directives:
            return task.complete()

        # Ensure there will ever be enough CPUs to run this task, otherwise fail
        task_requested_cpus = task.directives.get('cpus', 0)
        if task_requested_cpus > self.total_cpus_in_pool:
            log.critical(
                'Task requested cpus ({}) greater than total available cpus ({})'
                .format(task_requested_cpus, self.total_cpus_in_pool))
            return task.fail(1)

        # Add in the pre- and post-hooks into the task body
        task.directives['cmd'] = (
            f'if [[ -f "{self.petalink_so_path}" ]]; ' +
            f'then export LD_PRELOAD={self.petalink_so_path}; fi;\n\n' +
            self.cloud_storage.task_cmd_prehook() + '\n' +
            task.directives['cmd'] + '\n' +
            self.cloud_storage.task_cmd_posthook())

        # Determine whether this task should be run locally or on a remote cloud worker
        is_local_task = task.directives.get('cloud_args',
                                            dict()).get('local_task', False)
        log.info('Spawn ({}): {}'.format('Local' if is_local_task else 'Cloud',
                                         task))

        if is_local_task:
            return await self.spawn_local(task)

        # This is a cloud task
        return await self.spawn_cloud(task)

    async def spawn_cloud(self, task):
        cmd = task.directives['cmd']
        cpus_reserved = 0

        start_time = datetime.now()
        bytes_sent_bundle, bytes_received_bundle = list(), list()
        try:
            # Get file descriptor paths and file pointers for this task
            fd_paths = {
                fd_name: fd
                for fd_name, fd in zip(('stdin', 'stdout',
                                        'stderr'), self.get_fd_paths(task))
            }
            fd_filepointers = {
                fd_name: open(fd, fd_mode) if fd else None
                for fd_mode, (fd_name,
                              fd) in zip(('r', 'w', 'w'), fd_paths.items())
            }

            # Upload data inputs into cloud storage
            data_metrics = blob_inputs_to_remote(task.directives['input'],
                                                 self.cloud_storage)

            # Upload reference inputs into cloud storage
            reference_inputs = parse_reference_input(
                task.directives.get('cloud_args',
                                    dict()).get('reference_input', list()))
            ref_metrics = blob_inputs_to_remote(reference_inputs,
                                                self.cloud_storage,
                                                blob_basename=True)

            # If the user provides a non-URL path to a container and explicitly says it should be transfer,
            # then consider it similar to reference data and upload it to cloud storage
            singularity_container_uri = task.directives.get(
                'cloud_args', dict()).get('singularity_container')
            container_input = ([singularity_container_uri] if (
                singularity_container_uri is not None
                and not urllib.parse.urlparse(singularity_container_uri).scheme
                and get_cloud_directive('transfer_container_to_remote',
                                        task.directives)) else list())
            container_metrics = blob_inputs_to_remote(container_input,
                                                      self.cloud_storage)

            # Log metrics for input data
            total_input_metrics = data_metrics + ref_metrics + container_metrics
            for m in total_input_metrics:
                bytes_sent_bundle.append(m)
            bytes_sent_bundle.append({
                'name':
                'total',
                'size':
                sum([max(0, t['size']) for t in total_input_metrics]),
                'time':
                sum([max(0, t['time']) for t in total_input_metrics])
            })

            # Construct the cog-job-submit command for execution
            cjs_cmd = construct_cjs_cmd(
                task_body=cmd,
                service_url='http://beta.parallel.works:{}'.format(
                    self.pool_info['serviceport'])
                if self.is_pw_pool else self.pool_info['serviceurl'],
                cloud_storage=self.cloud_storage,
                cjs_stagein=None,
                cjs_stageout=None,
                cloud_downloads=task.directives['input'] + reference_inputs +
                container_input,
                cloud_uploads=task.directives['output'],
                cloud_scripts_dir=self.cloud_scripts_dir,
                singularity_container_uri=singularity_container_uri,
                task_name=task.name)
            log.debug(cjs_cmd)

            # Async submit as a subprocess
            p = await self.subprocess_sh(cjs_cmd, **fd_filepointers)

            # Once command is executed, update task with some process metadata
            task.state.update(
                stdout_path=fd_paths['stdout'],
                stderr_path=fd_paths['stderr'],
                label=f'CloudSwift({p.pid})',
            )

            log.info(f'CloudSwiftBackend spawned({p.pid}): {task.name}')
            rc = await p.wait()

            if rc != 0:
                log.info(f'Failed: {task.name}')
                return task.fail(p.returncode)
            else:
                # Download completed data files from cloud storage
                output_metrics = blob_outputs_to_local(
                    task.directives['output'], self.cloud_storage)
                log.info(f'Complete: {task.name}')

                # Log metrics for output data
                for m in output_metrics:
                    bytes_received_bundle.append(m)
                bytes_received_bundle.append({
                    'name':
                    'total',
                    'size':
                    sum([max(0, t['size']) for t in output_metrics]),
                    'time':
                    sum([max(0, t['time']) for t in output_metrics])
                })

                return task.complete(p.returncode)
        except CancelledError:
            task.state['err'] = 'Runner cancelled Backend.spawn()'
            return task.fail(-15)
        except Exception as e:
            log.error('Exception: {}'.format(e))
            traceback.print_exc()
            raise
        finally:
            for fp in fd_filepointers.values():
                if fp is not None:
                    fp.close()

            for i in range(cpus_reserved):
                self._cpu_sem.release()

            # Get task runtime and which node it ran on
            elapsed_time = datetime.now() - start_time
            hostname = 'UNAVAILABLE'
            try:
                with open(f'.{task.name}.hostname', 'r') as hostname_log:
                    hostname = hostname_log.read().strip()
                subprocess.call(['mv'] + glob.glob('*.remote.out') +
                                [self.cloud_logs_dir])
                subprocess.call(['mv'] + glob.glob('*.remote.err') +
                                [self.cloud_logs_dir])
                os.remove(f'.{task.name}.hostname')
            except:
                pass  # Fail silently

            CloudMetricsLogger.write_record({
                'task':
                task.name,
                'start_datetime':
                start_time.strftime('%Y-%m-%d %H:%M:%S'),
                'elapsed_time':
                str(elapsed_time),
                'end_datetime':
                datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
                'in_files':
                bytes_sent_bundle,
                'out_files':
                bytes_received_bundle,
                'node':
                hostname
            })

            return task

    async def spawn_local(self, task):
        cpus_reserved = 0
        fd_filepointers = dict()

        try:
            for i in range(task.directives.get('cpus', 0)):
                await self._cpu_sem.acquire()
                cpus_reserved += 1

            fd_paths = {
                fd_name: fd
                for fd_name, fd in zip(('stdin', 'stdout',
                                        'stderr'), self.get_fd_paths(task))
            }
            fd_filepointers = {
                fd_name: open(fd, fd_mode) if fd else None
                for fd_mode, (fd_name,
                              fd) in zip(('r', 'w', 'w'), fd_paths.items())
            }

            p = await self.subprocess_sh(task.directives['cmd'],
                                         **fd_filepointers)

            task.state.update(
                stdout_path=fd_paths['stdout'],
                stderr_path=fd_paths['stderr'],
                label=f'Slurm({p.pid})',
            )

            log.info(f'LocalBackend spawned({p.pid}): {task.name}')
            rc = await p.wait()

            if rc != 0:
                log.info(f'Failed: {task.name}')
                return task.fail(p.returncode)
            else:
                log.info(f'Complete: {task.name}')
                return task.complete(p.returncode)
        except CancelledError:
            task.state['err'] = 'Runner cancelled Backend.spawn()'
            return task.fail(-15)
        finally:
            for fp in fd_filepointers.values():
                if fp is not None:
                    fp.close()

            for i in range(cpus_reserved):
                self._cpu_sem.release()

            return task

    async def subprocess_sh(self,
                            args,
                            *,
                            stdin=None,
                            stdout=None,
                            stderr=None,
                            cwd=None,
                            encoding=None,
                            errors=None,
                            env=None,
                            loop=None,
                            executable="/bin/bash"):
        """Asynchronous version of subprocess.run

        This will always use a shell to launch the subprocess, and it prefers
        /bin/bash (can be changed via arguments)"""
        log.debug(f'subprocess_sh:\n{args}')
        while 1:
            try:
                p = await create_subprocess_shell(args,
                                                  stdin=stdin,
                                                  stdout=stdout,
                                                  stderr=stderr,
                                                  cwd=cwd,
                                                  encoding=encoding,
                                                  errors=errors,
                                                  env=env,
                                                  loop=loop,
                                                  executable=executable)
                break
            except BlockingIOError as e:
                log.warning(f'System refusing new processes: {e}')
                await asyncio.sleep(self.bip)

        return p
Exemplo n.º 28
0
    def __init__(self,
                 pw_pool_name=None,
                 pw_api_key=None,
                 cpus=None,
                 blocking_io_penalty=None,
                 cloud_storage_provider='azure',
                 **kwargs):
        """
        If ``pw_pool_name`` and ``pw_api_key`` are both given valid values, this backend will use ParallelWorks to 
        manage resouces elastically. If they remain ``None``, then it is assumed that the use has manually started 
        a pool using the included ``start_pool.py`` script. Note that both approaches requires the use of binaries 
        available as part of the Swift workflow language.
        
        :param pw_pool_name: str Name of an active pool in PW
        :param pw_api_key: str Valid API for a PW account
        :param cpus: int The total number of CPUs available to the worker pool
        :param blocking_io_penalty: int Delay (in seconds) when a BlockingIOError prevents a new process from spawning.
        :param cloud_storage_provder: str Name of the cloud storage provider, which must match up with one of the keys 
            in ``jetstream.cloud.base.CLOUD_STORAGE_PROVIDERS``
        """
        super().__init__()
        self.is_pw_pool = pw_pool_name is not None
        if pw_pool_name is not None and pw_api_key is not None:
            self.pool_info = get_pool_info(pw_pool_name, pw_api_key)
            log.info('PW Pool info: {}'.format(self.pool_info))
        else:
            self.pool_info = {
                'cpus': kwargs['pool_info']['cpus_per_worker'],
                'maxworkers': kwargs['pool_info']['workers'],
                'serviceurl': kwargs['pool_info']['serviceurl'],
            }
        self.total_cpus_in_pool = self.pool_info['cpus'] * self.pool_info[
            'maxworkers']
        self.bip = blocking_io_penalty \
                   or jetstream.settings['backends']['local']['blocking_io_penalty'].get(int)
        self._cpu_sem = BoundedSemaphore(int(self.total_cpus_in_pool))
        self.project_dir = os.getcwd()
        try:
            os.remove('cjs_cmds_debug.sh')
        except:
            pass  # Fail silently

        # Make directory for cjs launch scripts
        self.cloud_scripts_dir = os.path.join(self.project_dir,
                                              'cloud_scripts')
        os.makedirs(self.cloud_scripts_dir, exist_ok=True)

        self.cloud_logs_dir = os.path.join(self.project_dir, 'cloud_logs')
        os.makedirs(self.cloud_logs_dir, exist_ok=True)

        # Instantiate a cloud storage provider
        storage_class = dynamic_import(
            CLOUD_STORAGE_PROVIDERS[cloud_storage_provider])
        self.cloud_storage = storage_class(
            **kwargs.get(storage_class.config_key, dict()))

        # Initialize cloud metrics log
        CloudMetricsLogger.init(at=os.path.join(
            self.project_dir, 'cloud_metrics_{}.yaml'.format(
                datetime.now().strftime('%Y%m%d%H%M%S'))))

        # Get path to Petalink library, if it exists
        self.petalink_so_path = kwargs.get('petalink_path',
                                           '/usr/lib/petalink.so')

        log.info(
            f'CloudSwiftBackend initialized with {self.total_cpus_in_pool} cpus'
        )
Exemplo n.º 29
0
async def fetch_source_json(session: ClientSession, url: str) -> List[dict]:
    semaphore: BoundedSemaphore = BoundedSemaphore(1)
    data: List[dict] = await fetch(semaphore, session, url, return_type="json")
    source_json = rename_source_json_keys(data)
    logging.debug(f"Files available for download: {len(source_json)}")
    return source_json
Exemplo n.º 30
0
    def send_reply(self, writer, with_error=False):
        # send reply with error
        self._reply[0] = 10 if with_error else 2
        self._reply[5:5 + 6] = self._get_timestamp()
        checksum = self._calc_checksum(self._reply[:-1])
        self._reply[-1] = np.array([checksum], dtype=np.int8)

        writer.write(bytes(self._reply))


if __name__ == "__main__":

    # NOTE: required so that the SIGINT signal is properly captured on Windows
    def wakeup():
        # Call again
        loop.call_later(0.1, wakeup)

    srv = SoundCardTCPServer("localhost", 9999)

    loop = asyncio.SelectorEventLoop()
    loop.call_later(0.1, wakeup)
    asyncio.set_event_loop(loop)
    sem = BoundedSemaphore(value=1, loop=loop)

    try:
        loop.run_until_complete(srv.start_server(sem))
    except KeyboardInterrupt as k:
        print(f'Event captured: {k}')
        srv.close()
Exemplo n.º 31
0
#!/usr/bin/python3
import sys
import traceback2
import fdb
import datetime
from asyncio import BoundedSemaphore
import tmtapi.settings as settings
from tmtapi.settings import logger

# db = fdb.connect(**settings.FDB)
db_semaphore = BoundedSemaphore(settings.FDB_PARALLEL_OPERS)
term_accounts_semaphore = BoundedSemaphore()
term_accounts = {}
pay_systems_semaphore = BoundedSemaphore()
pay_systems = {}


async def get_driver_id(term_account):
    with (await term_accounts_semaphore):
        global term_accounts
        if term_account not in term_accounts:
            with open(f'{settings.SQL_DIR}/driver_terminal_accounts.sql',
                      'r') as sql:
                SQL = ''.join(sql.readlines())
            with (await db_semaphore), fdb.connect(**settings.FDB) as db:
                with db.cursor() as c:
                    c.execute(SQL)
                    term_accounts = {k: int(v) for k, v in c.fetchall()}
        try:
            driver_id = term_accounts[term_account]
        except: