Пример #1
0
class BackendQueue:
    def start(self, loop: asyncio.AbstractEventLoop):
        if hasattr(self, 'started') and self.started:
            # prevent a backend callback from starting more than 1 writer and creating more than 1 queue
            return
        self.queue = Queue()
        loop.create_task(self.writer())
        self.started = True

    async def writer(self):
        raise NotImplementedError

    @asynccontextmanager
    async def read_queue(self):
        update = await self.queue.get()
        yield update
        self.queue.task_done()

    @asynccontextmanager
    async def read_many_queue(self, count: int):
        ret = []
        counter = 0
        while counter < count:
            update = await self.queue.get()
            ret.append(update)
            counter += 1

        yield ret

        for _ in range(count):
            self.queue.task_done()
 async def consumer_log_response(self, res_queue: queues.Queue):
     while True:
         print(res_queue.qsize())
         item: requests.Response = await res_queue.get()
         if item is None:
             break
         print(item.url)
         res_queue.task_done()
Пример #3
0
 async def consumer(self, queue: Queue, session: ClientSession):
     while True:
         action: AiohttpAction = await queue.get()
         try:
             self.task_count += 1
             await action.do_action(session, queue)
         except Exception as ex:
             logger.exception("Queue worker %s caught an exception from %r",
                              self.uid, action)
         queue.task_done()
 async def producer_get_site(self, site_queue: queues.Queue,
                             response_queue: queues.Queue):
     while True:
         site: str = await site_queue.get()
         if site is None:
             break
         res = requests.get(site)
         print(res.status_code)
         await response_queue.put(res)
         site_queue.task_done()
     await response_queue.put(None)
Пример #5
0
async def searchWorker(name: str, searchQueue: Queue, queueProgress: tqdm):
    while True:
        # Get a "work item" out of the queue.
        data: QueueObj = await searchQueue.get()
        query = data['query']
        url = queryToUrl(query)
        print(Fore.MAGENTA + f'{name} got url: {url} to work on' + Fore.RESET)
        start = time.time()
        custom_timeout = aiohttp.ClientTimeout(total=60)  # type: ignore
        # custom_timeout.total = 2*60
        async with aiohttp.ClientSession(timeout=custom_timeout) as session:
            # nonlocal results
            results = []
            try:
                results = await getSearchQueryResults(session, url)
            except asyncio.TimeoutError as e:
                logging.exception(
                    Fore.RED +
                    f'Exception raised by worker = {name}; error = {e}' +
                    Fore.RESET)
            except Exception as e:  # pylint: disable=broad-except
                logging.exception(
                    Fore.RED +
                    f'Exception raised by worker = {name}; error = {e}' +
                    Fore.RESET)
        print(
            Fore.YELLOW +
            f'{name}\'s work for url {url} done; time taken {time.time() - start} seconds'
            + Fore.RESET)
        if (len(results) > 0 and len(results[0]['results']) > 0
                and len(results[0]['results'][query]) > 0):
            print(f'query: \'{query}\' fetched with data'.encode(
                encoding='utf-8'))
            allResults = fromSearchData(results, query)
            searchQueue.task_done()
            onlyTitles = [item['title'] for item in allResults]
            relevantIndices = getRelevantTitleIndices(onlyTitles)
            relevantSearchData = [
                allResults[index] for index in relevantIndices
            ]
            bookRow = booksDf[booksDf['id'] == data['id']].copy()
            bookRow['searchLinks'] = [relevantSearchData]
            bookRow['query'] = query
            saveLinks(bookRow)
            queueProgress.update(1)
            # searchResults.append({
            #     query: results
            # })
        else:
            print(f'query: \'{query}\' unsuccessful, adding to queue again'.
                  encode(encoding='utf-8'))
            searchQueue.task_done()
            searchQueue.put_nowait(data)
Пример #6
0
class BackendQueue:
    def start(self, loop: asyncio.AbstractEventLoop):
        self.queue = Queue()
        loop.create_task(self.writer())

    async def writer(self):
        raise NotImplementedError

    @asynccontextmanager
    async def read_queue(self):
        update = await self.queue.get()
        yield update
        self.queue.task_done()
Пример #7
0
async def queued_worker_wrapper(
    coroutine_function: Callable[[aiohttp.ClientSession, str, str], Awaitable],
    session: aiohttp.ClientSession,
    queue: Queue,
) -> None:
    while True:
        print("Getting item from queue")
        url, filename = await queue.get()
        print(f"Got {url}, {filename} from queue")
        print(f"Running coroutine for {url}, {filename}")
        await coroutine_function(session, url, filename)
        print(f"Coruoutine finished for {url}, {filename}")
        print("Letting queue know that the task is done")
        queue.task_done()
Пример #8
0
async def patched_alerta(service, stop_event, bot_alert_queue: Queue):
    alerta = AlertaRunner(msg_service=service,
                          stop_event=stop_event,
                          send_heartbeats=False)

    def _alert(item):
        bot_alert_queue.put_nowait(item)

    alerta.alert = _alert
    asyncio.create_task(alerta.start())
    await asyncio.sleep(.5)
    yield alerta
    alerta.stop_event.set()
    while not bot_alert_queue.empty():
        bot_alert_queue.get_nowait()
        bot_alert_queue.task_done()
Пример #9
0
class BackendQueue:
    def start(self, loop: asyncio.AbstractEventLoop):
        if hasattr(self, 'started') and self.started:
            # prevent a backend callback from starting more than 1 writer and creating more than 1 queue
            return
        self.queue = Queue()
        loop.create_task(self.writer())
        self.started = True

    async def writer(self):
        raise NotImplementedError

    @asynccontextmanager
    async def read_queue(self):
        update = await self.queue.get()
        yield update
        self.queue.task_done()
Пример #10
0
async def work(q: Queue, sess: ClientSession):
    while True:
        text = await q.get()

        # Test <text>%
        if await test(sess, escape(text) + '%'):
            print(f'PASS - "{text}"%')

            # Did we really need the %? If we're just fine without it, then the string is complete
            if await test(sess, escape(text)):
                print(f'FOUND - "{text}"')
                found.add(text)

            # Otherwise we're missing something, branch off
            else:
                await branch(q, text)

        # Either way call Queue.task_done() so that Queue.join() will work as intended
        q.task_done()
Пример #11
0
async def searchWorker(name: str, searchQueue: Queue, queueProgress: tqdm):
    while True:
        # Get a "work item" out of the queue.
        data: QueueObj = await searchQueue.get()
        factsLink = data['factsLink']
        # url = queryToUrl(query)
        print(Fore.MAGENTA + f'{name} got url: {factsLink} to work on' +
              Fore.RESET)
        start = time.time()
        custom_timeout = aiohttp.ClientTimeout(total=60)  # type: ignore
        # custom_timeout.total = 2*60
        async with aiohttp.ClientSession(timeout=custom_timeout) as session:
            # nonlocal results
            summary_sents = {}
            docs_dict = {0: ''}
            sents_dict = {0: ''}
            isSkipped = True
            try:
                if (urlparse(factsLink).path.endswith(".pdf") == False):
                    isSkipped = False
                    docs_dict, sents_dict = getDocsAndSentsPerUrl(factsLink, 5)
                    summary_sents = getTfidfSummary(tfidfModel=tfidf,
                                                    dictionary=dictionary,
                                                    docs_dict=docs_dict,
                                                    sents_dict=sents_dict)
            except asyncio.TimeoutError as e:
                logging.exception(
                    Fore.RED +
                    f'Exception raised by worker = {name}; error = {e}' +
                    Fore.RESET)
            except Exception as e:  # pylint: disable=broad-except
                logging.exception(
                    Fore.RED +
                    f'Exception raised by worker = {name}; error = {e}' +
                    Fore.RESET)
        print(
            Fore.YELLOW +
            f'{name}\'s work for url {factsLink} done; time taken {time.time() - start} seconds'
            + Fore.RESET)
        if (isSkipped == True):
            print(f'factsLink: \'{factsLink}\' skipped because its pdf'.encode(
                encoding='utf-8'))
            searchQueue.task_done()
            bookRow = dfDeduped[dfDeduped['id'] == data['id']].copy()
            bookRow['factsLink'] = factsLink
            bookRow['facts'] = [['']]
            saveFacts(bookRow)
            queueProgress.update(1)
        elif (len(sents_dict[0]) > 2):
            print(f'factsLink: \'{factsLink}\' fetched with > 2 sentences'.
                  encode(encoding='utf-8'))
            searchQueue.task_done()
            bookRow = dfDeduped[dfDeduped['id'] == data['id']].copy()
            bookRow['factsLink'] = factsLink
            bookRow['facts'] = [[sent for score, sent in summary_sents[0]]]
            saveFacts(bookRow)
            queueProgress.update(1)
        else:
            print(
                f'factsLink: \'{factsLink}\' unsuccessful, returned < 2 sents, adding to queue again'
                .encode(encoding='utf-8'))
            searchQueue.task_done()
            queueProgress.update(1)
            searchQueue.put_nowait(data)
Пример #12
0
class BackendQueue:
    def start(self, loop: asyncio.AbstractEventLoop, multiprocess=False):
        if hasattr(self, 'started') and self.started:
            # prevent a backend callback from starting more than 1 writer and creating more than 1 queue
            return
        self.multiprocess = multiprocess
        if self.multiprocess:
            self.queue = Pipe(duplex=False)
            self.worker = Process(target=BackendQueue.worker, args=(self.writer,), daemon=True)
            self.worker.start()
        else:
            self.queue = Queue()
            self.worker = loop.create_task(self.writer())
        self.started = True

    async def stop(self):
        if self.multiprocess:
            self.queue[1].send(SHUTDOWN_SENTINEL)
            self.worker.join()
        else:
            await self.queue.put(SHUTDOWN_SENTINEL)
        self.running = False

    @staticmethod
    def worker(writer):
        try:
            loop = asyncio.new_event_loop()
            loop.run_until_complete(writer())
        except KeyboardInterrupt:
            pass

    async def writer(self):
        raise NotImplementedError

    async def write(self, data):
        if self.multiprocess:
            self.queue[1].send(data)
        else:
            await self.queue.put(data)

    @asynccontextmanager
    async def read_queue(self) -> list:
        if self.multiprocess:
            msg = self.queue[0].recv()
            if msg == SHUTDOWN_SENTINEL:
                self.running = False
                yield []
            else:
                yield [msg]
        else:
            current_depth = self.queue.qsize()
            if current_depth == 0:
                update = await self.queue.get()
                if update == SHUTDOWN_SENTINEL:
                    yield []
                else:
                    yield [update]
                self.queue.task_done()
            else:
                ret = []
                count = 0
                while current_depth > count:
                    update = await self.queue.get()
                    count += 1
                    if update == SHUTDOWN_SENTINEL:
                        self.running = False
                        break
                    ret.append(update)

                yield ret

                for _ in range(count):
                    self.queue.task_done()