Пример #1
0
def printKclosest(arr, n, x, k):
    # Make a max heap of difference with
    # first k elements.
    pq = PriorityQueue()
    for i in range(k):
        pq.put((-abs(arr[i] - x), i))

    # Now process remaining elements
    for i in range(k, n):
        diff = abs(arr[i] - x)
        p, pi = pq.get()
        curr = -p

        # If difference with current
        # element is more than root,
        # then put it back.
        if diff > curr:
            pq.put((-curr, pi))
            continue
        else:

            # Else remove root and insert
            pq.put((-diff, i))

    # Print contents of heap.
    curr = 0
    while (not pq.empty()):
        p, q = pq.get()
        curr += abs(val - arr[q])
    last_ans[val] = curr
Пример #2
0
def djikstra(grid, start, end):
    distance = defaultdict(lambda: float("inf"))
    distance[start] = 0
    frontier = PriorityQueue()
    frontier.put_nowait((0, start))
    visited = set([])
    retrace = {}

    while not frontier.empty():
        (d, p) = frontier.get_nowait()
        if p in visited:
            continue

        visited.add(p)

        if p == end:
            break

        for n in reachable(grid, p):
            if distance[n] > d + 1:
                distance[n] = d + 1
                retrace[n] = p
            frontier.put_nowait((distance[n], n))

    # Backtrack
    current = end
    forward = {}
    while current != start:
        forward[retrace[current]] = current
        current = retrace[current]

    return ({p: distance[p] for p in visited}, forward)
Пример #3
0
 async def queue_dumper(self):
     """
     Dumps status of queue to terminal (Eventually a file) every second.
     """
     # TODO : Ensure this also prints to a file.
     while True:
         if not self.queue.qsize():
             await asyncio.sleep(1)
         else:
             _copy = PriorityQueue()
             while not self.queue.empty():
                 await _copy.put(await self.queue.get())
             print(chr(27) + "[2J")  # Bit of Ctr + L magic trick
             while not _copy.empty():
                 element = await _copy.get()
                 print(element)
                 await self.queue.put(element)
         await asyncio.sleep(1)
Пример #4
0
def reverse_djikstra(grid, end):
    distance = defaultdict(lambda: float("inf"))
    distance[end] = 0
    frontier = PriorityQueue()
    frontier.put_nowait((0, end))
    visited = set([])

    while not frontier.empty():
        (d, p) = frontier.get_nowait()

        if p in visited:
            continue
        else:
            visited.add(p)

        for n in reachable(grid, p):
            if distance[n] > d + 1:
                distance[n] = d + 1
            frontier.put_nowait((distance[n], n))
    return {p: distance[p] for p in visited}
Пример #5
0
class TaskQueue(Generic[TTask]):
    """
    TaskQueue keeps priority-order track of pending tasks, with a limit on number pending.

    A producer of tasks will insert pending tasks with await add(), which will not return until
    all tasks have been added to the queue.

    A task consumer calls await get() to retrieve tasks for processing. Tasks will be returned in
    priority order. If no tasks are pending, get()
    will pause until at least one is available. Only one consumer will have a task "checked out"
    from get() at a time.

    After tasks are successfully completed, the consumer will call complete() to remove them from
    the queue. The consumer doesn't need to complete all tasks, but any uncompleted tasks will be
    considered abandoned. Another consumer can pick it up at the next get() call.
    """

    # a function that determines the priority order (lower int is higher priority)
    _order_fn: FunctionProperty[Callable[[TTask], Any]]

    # batches of tasks that have been started but not completed
    _in_progress: Dict[int, Tuple[TTask, ...]]

    # all tasks that have been placed in the queue and have not been started
    _open_queue: 'PriorityQueue[Tuple[Any, TTask]]'

    # all tasks that have been placed in the queue and have not been completed
    _tasks: Set[TTask]

    def __init__(self,
                 maxsize: int = 0,
                 order_fn: Callable[[TTask], Any] = identity,
                 *,
                 loop: AbstractEventLoop = None) -> None:
        self._maxsize = maxsize
        self._full_lock = Lock(loop=loop)
        self._open_queue = PriorityQueue(maxsize, loop=loop)
        self._order_fn = order_fn
        self._id_generator = count()
        self._tasks = set()
        self._in_progress = {}

    async def add(self, tasks: Tuple[TTask, ...]) -> None:
        """
        add() will insert as many tasks as can be inserted until the queue fills up.
        Then it will pause until the queue is no longer full, and continue adding tasks.
        It will finally return when all tasks have been inserted.
        """
        if not isinstance(tasks, tuple):
            raise ValidationError(
                f"must pass a tuple of tasks to add(), but got {tasks!r}")

        already_pending = self._tasks.intersection(tasks)
        if already_pending:
            raise ValidationError(
                f"Duplicate tasks detected: {already_pending!r} are already present in the queue"
            )

        # make sure to insert the highest-priority items first, in case queue fills up
        remaining = tuple(
            sorted((self._order_fn(task), task) for task in tasks))

        while remaining:
            num_tasks = len(self._tasks)

            if self._maxsize <= 0:
                # no cap at all, immediately insert all tasks
                open_slots = len(remaining)
            elif num_tasks < self._maxsize:
                # there is room to add at least one more task
                open_slots = self._maxsize - num_tasks
            else:
                # wait until there is room in the queue
                await self._full_lock.acquire()

                # the current number of tasks has changed, can't reuse num_tasks
                num_tasks = len(self._tasks)
                open_slots = self._maxsize - num_tasks

            queueing, remaining = remaining[:open_slots], remaining[
                open_slots:]

            for task in queueing:
                # There will always be room in _open_queue until _maxsize is reached
                try:
                    self._open_queue.put_nowait(task)
                except QueueFull as exc:
                    task_idx = queueing.index(task)
                    qsize = self._open_queue.qsize()
                    raise QueueFull(
                        f'TaskQueue unsuccessful in adding task {task[1]!r} because qsize={qsize}, '
                        f'num_tasks={num_tasks}, maxsize={self._maxsize}, open_slots={open_slots}, '
                        f'num queueing={len(queueing)}, len(_tasks)={len(self._tasks)}, task_idx='
                        f'{task_idx}, queuing={queueing}, original msg: {exc}',
                    )

            unranked_queued = tuple(task for _rank, task in queueing)
            self._tasks.update(unranked_queued)

            if self._full_lock.locked() and len(self._tasks) < self._maxsize:
                self._full_lock.release()

    def get_nowait(self,
                   max_results: int = None) -> Tuple[int, Tuple[TTask, ...]]:
        """
        Get pending tasks. If no tasks are pending, raise an exception.

        :param max_results: return up to this many pending tasks. If None, return all pending tasks.
        :return: (batch_id, tasks to attempt)
        :raise ~asyncio.QueueFull: if no tasks are available
        """
        if self._open_queue.empty():
            raise QueueFull("No tasks are available to get")
        else:
            pending_tasks = self._get_nowait(max_results)

            # Generate a pending batch of tasks, so uncompleted tasks can be inferred
            next_id = next(self._id_generator)
            self._in_progress[next_id] = pending_tasks

            return (next_id, pending_tasks)

    async def get(self,
                  max_results: int = None) -> Tuple[int, Tuple[TTask, ...]]:
        """
        Get pending tasks. If no tasks are pending, wait until a task is added.

        :param max_results: return up to this many pending tasks. If None, return all pending tasks.
        :return: (batch_id, tasks to attempt)
        """
        if max_results is not None and max_results < 1:
            raise ValidationError(
                "Must request at least one task to process, not {max_results!r}"
            )

        # if the queue is empty, wait until at least one item is available
        queue = self._open_queue
        if queue.empty():
            _rank, first_task = await queue.get()
        else:
            _rank, first_task = queue.get_nowait()

        # In order to return from get() as soon as possible, never await again.
        # Instead, take only the tasks that are already available.
        if max_results is None:
            remaining_count = None
        else:
            remaining_count = max_results - 1
        remaining_tasks = self._get_nowait(remaining_count)

        # Combine the first and remaining tasks
        all_tasks = (first_task, ) + remaining_tasks

        # Generate a pending batch of tasks, so uncompleted tasks can be inferred
        next_id = next(self._id_generator)
        self._in_progress[next_id] = all_tasks

        return (next_id, all_tasks)

    def _get_nowait(self, max_results: int = None) -> Tuple[TTask, ...]:
        queue = self._open_queue

        # How many results do we want?
        available = queue.qsize()
        if max_results is None:
            num_tasks = available
        else:
            num_tasks = min((available, max_results))

        # Combine the remaining tasks with the first task we already pulled.
        ranked_tasks = tuple(queue.get_nowait() for _ in range(num_tasks))

        # strip out the rank value used internally for sorting in the priority queue
        return tuple(task for _rank, task in ranked_tasks)

    def complete(self, batch_id: int, completed: Tuple[TTask, ...]) -> None:
        if batch_id not in self._in_progress:
            raise ValidationError(
                f"batch id {batch_id} not recognized, with tasks {completed!r}"
            )

        attempted = self._in_progress.pop(batch_id)

        unrecognized_tasks = set(completed).difference(attempted)
        if unrecognized_tasks:
            self._in_progress[batch_id] = attempted
            raise ValidationError(
                f"cannot complete tasks {unrecognized_tasks!r} in this batch, only {attempted!r}"
            )

        incomplete = set(attempted).difference(completed)

        for task in incomplete:
            # These tasks are already counted in the total task count, so there will be room
            self._open_queue.put_nowait((self._order_fn(task), task))

        self._tasks.difference_update(completed)

        if self._full_lock.locked() and len(self._tasks) < self._maxsize:
            self._full_lock.release()

    def __contains__(self, task: TTask) -> bool:
        """Determine if a task has been added and not yet completed"""
        return task in self._tasks
Пример #6
0
class BrokerHandler(BrokerHandlerSetup):
    """Broker Handler class."""

    __slots__ = "_handlers", "_records", "_retry", "_queue", "_consumers", "_consumer_concurrency"

    def __init__(
        self,
        records: int,
        handlers: dict[str, Optional[Callable]],
        retry: int,
        publisher: BrokerPublisher,
        consumer_concurrency: int = 15,
        **kwargs: Any,
    ):
        super().__init__(**kwargs)
        self._handlers = handlers
        self._records = records
        self._retry = retry

        self._queue = PriorityQueue(maxsize=self._records)
        self._consumers: list[Task] = list()
        self._consumer_concurrency = consumer_concurrency

        self._publisher = publisher

    @classmethod
    def _from_config(cls, config: MinosConfig, **kwargs) -> BrokerHandler:
        kwargs["handlers"] = cls._get_handlers(config, **kwargs)
        kwargs["publisher"] = cls._get_publisher(**kwargs)
        # noinspection PyProtectedMember
        return cls(**config.broker.queue._asdict(), **kwargs)

    @staticmethod
    def _get_handlers(
        config: MinosConfig, handlers: dict[str, Optional[Callable]] = None, **kwargs
    ) -> dict[str, Callable[[BrokerRequest], Awaitable[Optional[BrokerResponse]]]]:
        if handlers is None:
            builder = EnrouteBuilder(*config.services, middleware=config.middleware)
            decorators = builder.get_broker_command_query_event(config=config, **kwargs)
            handlers = {decorator.topic: fn for decorator, fn in decorators.items()}
        return handlers

    # noinspection PyUnusedLocal
    @staticmethod
    @inject
    def _get_publisher(
        publisher: Optional[BrokerPublisher] = None,
        broker_publisher: BrokerPublisher = Provide["broker_publisher"],
        **kwargs,
    ) -> BrokerPublisher:
        if publisher is None:
            publisher = broker_publisher
        if publisher is None or isinstance(publisher, Provide):
            raise NotProvidedException(f"A {BrokerPublisher!r} object must be provided.")
        return publisher

    async def _setup(self) -> None:
        await super()._setup()
        await self._create_consumers()

    async def _destroy(self) -> None:
        await self._destroy_consumers()
        await super()._destroy()

    async def _create_consumers(self):
        while len(self._consumers) < self._consumer_concurrency:
            self._consumers.append(create_task(self._consume()))

    async def _destroy_consumers(self):
        for consumer in self._consumers:
            consumer.cancel()
        await gather(*self._consumers, return_exceptions=True)
        self._consumers = list()

        while not self._queue.empty():
            entry = self._queue.get_nowait()
            await self.submit_query(self._queries["update_not_processed"], (entry.id,))

    async def _consume(self) -> None:
        while True:
            await self._consume_one()

    async def _consume_one(self) -> None:
        entry = await self._queue.get()
        try:
            await self._dispatch_one(entry)
        finally:
            self._queue.task_done()

    @property
    def publisher(self) -> BrokerPublisher:
        """Get the publisher instance.

        :return: A ``BrokerPublisher`` instance.
        """
        return self._publisher

    @property
    def consumers(self) -> list[Task]:
        """Get the consumers.

        :return: A list of ``Task`` instances.
        """
        return self._consumers

    @property
    def handlers(self) -> dict[str, Optional[Callable]]:
        """Handlers getter.

        :return: A dictionary in which the keys are topics and the values are the handler.
        """
        return self._handlers

    @property
    def topics(self) -> KeysView[str]:
        """Get an iterable containing the topic names.

        :return: An ``Iterable`` of ``str``.
        """
        return self.handlers.keys()

    async def dispatch_forever(self, max_wait: Optional[float] = 60.0) -> NoReturn:
        """Dispatch the items in the consuming queue forever.

        :param max_wait: Maximum seconds to wait for notifications. If ``None`` the wait is performed until infinity.
        :return: This method does not return anything.
        """
        async with self.cursor() as cursor:
            await self._listen_entries(cursor)
            try:
                while True:
                    await self._wait_for_entries(cursor, max_wait)
                    await self.dispatch(cursor, background_mode=True)
            finally:
                await self._unlisten_entries(cursor)

    async def _listen_entries(self, cursor: Cursor):
        for topic in self.topics:
            # noinspection PyTypeChecker
            await cursor.execute(_LISTEN_QUERY.format(Identifier(topic)))

    async def _unlisten_entries(self, cursor: Cursor) -> None:
        for topic in self.topics:
            # noinspection PyTypeChecker
            await cursor.execute(_UNLISTEN_QUERY.format(Identifier(topic)))

    async def _wait_for_entries(self, cursor: Cursor, max_wait: Optional[float]) -> None:
        if await self._get_count(cursor):
            return

        while True:
            try:
                return await wait_for(consume_queue(cursor.connection.notifies, self._records), max_wait)
            except TimeoutError:
                if await self._get_count(cursor):
                    return

    async def _get_count(self, cursor) -> int:
        if not len(self.topics):
            return 0
        await cursor.execute(_COUNT_NOT_PROCESSED_QUERY, (self._retry, tuple(self.topics)))
        count = (await cursor.fetchone())[0]
        return count

    async def dispatch(self, cursor: Optional[Cursor] = None, background_mode: bool = False) -> None:
        """Dispatch a batch of ``HandlerEntry`` instances from the database's queue.

        :param cursor: The cursor to interact with the database. If ``None`` is provided a new one is acquired.
        :param background_mode: If ``True`` the entries dispatching waits until every entry is processed. Otherwise,
            the dispatching is performed on background.
        :return: This method does not return anything.
        """

        is_external_cursor = cursor is not None
        if not is_external_cursor:
            cursor = await self.cursor().__aenter__()

        async with cursor.begin():
            await cursor.execute(
                self._queries["select_not_processed"], (self._retry, tuple(self.topics), self._records)
            )
            result = await cursor.fetchall()

            if len(result):
                entries = self._build_entries(result)

                await cursor.execute(self._queries["mark_processing"], (tuple(e.id for e in entries),))

                for entry in entries:
                    await self._queue.put(entry)

        if not is_external_cursor:
            await cursor.__aexit__(None, None, None)

        if not background_mode:
            await self._queue.join()

    def _build_entries(self, rows: list[tuple]) -> list[BrokerHandlerEntry]:
        kwargs = {"callback_lookup": self.get_action}
        return [BrokerHandlerEntry(*row, **kwargs) for row in rows]

    async def _dispatch_one(self, entry: BrokerHandlerEntry) -> None:
        logger.debug(f"Dispatching '{entry!r}'...")
        try:
            await self.dispatch_one(entry)
        except (CancelledError, Exception) as exc:
            logger.warning(f"Raised an exception while dispatching {entry!r}: {exc!r}")
            entry.exception = exc
            if isinstance(exc, CancelledError):
                raise exc
        finally:
            query_id = "delete_processed" if entry.success else "update_not_processed"
            await self.submit_query(self._queries[query_id], (entry.id,))

    async def dispatch_one(self, entry: BrokerHandlerEntry) -> None:
        """Dispatch one row.

        :param entry: Entry to be dispatched.
        :return: This method does not return anything.
        """
        logger.info(f"Dispatching '{entry!s}'...")

        fn = self.get_callback(entry.callback)
        message = entry.data
        data, status, headers = await fn(message)

        if message.reply_topic is not None:
            await self.publisher.send(
                data,
                topic=message.reply_topic,
                identifier=message.identifier,
                status=status,
                user=message.user,
                headers=headers,
            )

    @staticmethod
    def get_callback(
        fn: Callable[[BrokerRequest], Union[Optional[BrokerRequest], Awaitable[Optional[BrokerRequest]]]]
    ) -> Callable[[BrokerMessage], Awaitable[tuple[Any, BrokerMessageStatus, dict[str, str]]]]:
        """Get the handler function to be used by the Broker Handler.

        :param fn: The action function.
        :return: A wrapper function around the given one that is compatible with the Broker Handler API.
        """

        @wraps(fn)
        async def _wrapper(raw: BrokerMessage) -> tuple[Any, BrokerMessageStatus, dict[str, str]]:
            request = BrokerRequest(raw)
            user_token = REQUEST_USER_CONTEXT_VAR.set(request.user)
            headers_token = REQUEST_HEADERS_CONTEXT_VAR.set(raw.headers)

            try:
                response = fn(request)
                if isawaitable(response):
                    response = await response
                if isinstance(response, Response):
                    response = await response.content()
                return response, BrokerMessageStatus.SUCCESS, REQUEST_HEADERS_CONTEXT_VAR.get()
            except ResponseException as exc:
                logger.warning(f"Raised an application exception: {exc!s}")
                return repr(exc), BrokerMessageStatus.ERROR, REQUEST_HEADERS_CONTEXT_VAR.get()
            except Exception as exc:
                logger.exception(f"Raised a system exception: {exc!r}")
                return repr(exc), BrokerMessageStatus.SYSTEM_ERROR, REQUEST_HEADERS_CONTEXT_VAR.get()
            finally:
                REQUEST_USER_CONTEXT_VAR.reset(user_token)
                REQUEST_HEADERS_CONTEXT_VAR.reset(headers_token)

        return _wrapper

    def get_action(self, topic: str) -> Optional[Callable]:
        """Get handling function to be called.

        Gets the instance of the class and method to call.

        Args:
            topic: Kafka topic. Example: "TicketAdded"

        Raises:
            MinosNetworkException: topic TicketAdded have no controller/action configured, please review th
                configuration file.
        """
        if topic not in self._handlers:
            raise MinosActionNotFoundException(
                f"topic {topic} have no controller/action configured, " f"please review th configuration file"
            )

        handler = self._handlers[topic]

        logger.debug(f"Loaded {handler!r} action!")
        return handler

    @cached_property
    def _queries(self) -> dict[str, str]:
        # noinspection PyTypeChecker
        return {
            "count_not_processed": _COUNT_NOT_PROCESSED_QUERY,
            "select_not_processed": _SELECT_NOT_PROCESSED_QUERY,
            "mark_processing": _MARK_PROCESSING_QUERY,
            "delete_processed": _DELETE_PROCESSED_QUERY,
            "update_not_processed": _UPDATE_NOT_PROCESSED_QUERY,
        }
Пример #7
0
class MemoryQueue(BaseQueue):
    def __init__(self, ):
        # 下载队列,一个优先级队列
        self.waiting = None
        # 进行中的队列,key为Request,value取出的时间戳
        self.pending = {}
        # 失败次数记录,key为Request,value失败次数
        self.failure = {}
        self.module = None
        self.setting = None

    async def init(self, setting):
        """
        初始化
        """
        self.setting = setting
        self.waiting = PriorityQueue()
        self.module = importlib.import_module(setting.SERIALIZATION)

    async def clean_scheduler(self, waiting=True, pending=True, failure=True, data=True):
        """
        清空队列
        """
        pass

    async def get(self, priority):
        """
        从队列中获取一个request
        """
        if not self.waiting.empty():
            result = await self.waiting.get()
            self.pending[result[1]] = get_timestamp()
            return Request.unserialize(result[1], self.module)
        return None

    async def add(self, requests: typing.Union[Request, typing.List[Request]]):
        """
        向队列添加多个request
        @param requests:
        """
        if isinstance(requests, Request):
            requests = [requests]

        count = 0
        # 判断是否在pending中,如果在,是否过了最大时间
        for request in requests:
            str_request = request.serialize(self.module)
            pended_time = self.pending.get(str_request, 0)
            if time.time() - pended_time < self.setting["PENDING_THRESHOLD"]:
                continue

            count += 1
            self.waiting.put_nowait((-request.priority, str_request))
            if pended_time:
                self.pending.pop(str_request)
        return count

    async def set_result(self, request: Request, response: Response, task_request: Request):
        """
        保存结果
        @param request:
        @param response:
        @param task_request:
        """
        # 如果失败,且失败次数未达到,返回waiting
        str_request = request.serialize(self.module)

        # 如果在进行队列中,删除
        if str_request in self.pending:
            self.pending.pop(str_request)

        # 如果成功
        if response.ok == 1:
            return True

        if response.ok == -1:
            self.failure[str_request] = response.status
            return False

        if str_request in self.failure:
            self.failure[str_request] += 1
            await self.add(request)
        else:
            self.failure[str_request] = 1
            await self.add(request)

    async def check_status(self, spider_ins, run_forever=False):
        if len(self.pending) == 0 and self.waiting.empty():
            spider_ins.run = False
Пример #8
0
class AsyncProxyBroker:
    def __init__(self,
                 check_url,
                 allowed_anonymity_levels=None,
                 qps_per_proxy=1,
                 max_consecutive_failures=5,
                 providers=PROVIDERS,
                 timeout=5):
        self._proxies = Queue()
        self._pending_providers = Queue()
        self._providers = providers

        self._verified_proxies = {}
        self._throttled_proxies = PriorityQueue()
        self._errors = {}

        self._check_url = check_url
        self._qps_per_proxy = qps_per_proxy
        self._max_consecutive_failures = max_consecutive_failures
        self._timeout = timeout

        self._ip = None
        self._ip_lock = Lock()

        if not allowed_anonymity_levels:
            self._allowed_anonymity_levels = ['Anonymous', 'Elite']
        else:
            self._allowed_anonymity_levels = allowed_anonymity_levels

    async def _get_real_ip(self):
        while not self._ip:
            async with self._ip_lock:
                if self._ip:
                    return self._ip

                try:
                    async with aiohttp.request(
                            url=random.choice(IP_HOSTS),
                            method='GET',
                            timeout=aiohttp.ClientTimeout(
                                total=self._timeout)) as response:
                        contents = await response.text()
                        ips = get_all_ip(contents)

                        if len(ips) == 1:
                            self._ip = ips.pop()
                            return self._ip
                except (UnicodeDecodeError, asyncio.TimeoutError,
                        aiohttp.ClientOSError, aiohttp.ClientResponseError,
                        aiohttp.ServerDisconnectedError):
                    pass

        return self._ip

    async def _get_anonymity_level(self, proxy_address):
        judge = random.choice(JUDGES)
        ip = await self._get_real_ip()

        try:
            async with aiohttp.request(url=judge,
                                       method='GET',
                                       proxy=proxy_address,
                                       timeout=aiohttp.ClientTimeout(
                                           total=self._timeout)) as response:
                contents = (await response.text()).lower()
                contained_ips = get_all_ip(contents)

                if ip in contained_ips:
                    return 'Transparent'
                elif 'via' in contents or 'proxy' in contents:
                    return 'Anonymous'
                else:
                    return 'Elite'
        except (UnicodeDecodeError, asyncio.TimeoutError,
                aiohttp.ClientOSError, aiohttp.ClientResponseError,
                aiohttp.ServerDisconnectedError):
            return 'None'

    def _populate_providers(self):
        for provider in self._providers:
            self._pending_providers.put_nowait(provider)

    async def _can_connect_to_test_url(self, proxy_address):
        try:
            async with aiohttp.request(url=self._check_url,
                                       method='GET',
                                       proxy=proxy_address,
                                       timeout=aiohttp.ClientTimeout(
                                           total=self._timeout)) as response:
                await response.text()
                return True
        except (UnicodeDecodeError, asyncio.TimeoutError,
                aiohttp.ClientOSError, aiohttp.ClientResponseError,
                aiohttp.ServerDisconnectedError):
            return False

    async def _populate_proxies(self):
        if self._pending_providers.empty():
            self._populate_providers()

        provider = self._pending_providers.get_nowait()
        proxies = await provider.get_proxies()

        for proxy in proxies:
            self._proxies.put_nowait(proxy)

        self._pending_providers.task_done()

    async def _try_verify_one_proxy(self):
        if self._proxies.empty():
            await self._populate_proxies()
            return

        (host, port, types) = self._proxies.get_nowait()
        proxy_address = 'http://%s:%s' % (host, port)

        if await self._get_anonymity_level(proxy_address) in self._allowed_anonymity_levels and \
                await self._can_connect_to_test_url(proxy_address):
            self._verified_proxies[proxy_address] = deque()
            self._errors[proxy_address] = 0

        self._proxies.task_done()

    @staticmethod
    def _flush_history(history):
        executions_removed = 0
        earliest_time = time.monotonic()

        while len(history) > 0:
            earliest_time = history.popleft()
            if time.monotonic() - earliest_time < 1:
                history.appendleft(earliest_time)
                break
            executions_removed += 1

        return executions_removed, earliest_time

    def _flush_throttled_proxies(self):
        while not self._throttled_proxies.empty():
            (_, proxy_url, history) = self._throttled_proxies.get_nowait()
            executions_removed, earliest_time = self._flush_history(history)

            if executions_removed == 0:
                self._throttled_proxies.put_nowait(
                    (earliest_time, proxy_url, history))
                self._throttled_proxies.task_done()
                return

            self._verified_proxies[proxy_url] = history
            self._throttled_proxies.task_done()

    def mark_successful(self, proxy_url):
        if proxy_url not in self._errors:
            return

        self._errors[proxy_url] = max(0, self._errors[proxy_url] - 1)

    def mark_failure(self, proxy_url):
        if proxy_url not in self._errors:
            return

        self._errors[proxy_url] += 1

    async def random_proxy(self):
        while True:
            self._flush_throttled_proxies()

            if not self._verified_proxies:
                await self._try_verify_one_proxy()

            while self._verified_proxies:
                proxy_url = random.choice(list(self._verified_proxies.keys()))

                if self._errors[proxy_url] >= self._max_consecutive_failures:
                    del self._errors[proxy_url]
                    del self._verified_proxies[proxy_url]
                    continue

                history = self._verified_proxies[proxy_url]

                _, earliest_time = self._flush_history(history)
                if len(history) < self._qps_per_proxy:
                    history.append(time.monotonic())
                    return proxy_url

                del self._verified_proxies[proxy_url]
                self._throttled_proxies.put_nowait(
                    (earliest_time, proxy_url, history))
Пример #9
0
class JsonServer:
    """
    Module can receive a JSON string and if the action is “apply” it adds the
    contents of the “template” into a priority queue. The module also processes
    the queue every second and saves the template to a file.

    Example JSON string:
    {"action": "apply", "when": "2016-4-19 09:00:02", "template": "AAAAAA"}
    """
    def __init__(self):
        """
        Initialize and run an asyncio event loop for ever.
        """
        self.loop = asyncio.get_event_loop()
        self.queue = PriorityQueue(loop=self.loop)
        self.loop.create_task(self.json_server(('', 25000)))
        self.loop.create_task(self.queue_dumper())
        self.loop.run_forever()

    async def json_server(self, address):
        """
        Creates server connection at the given address.
        :param address: Tuple (host, port) for eg. ('' 25000)
        """
        sock = socket(AF_INET, SOCK_STREAM)
        sock.setsockopt(SOL_SOCKET, SO_REUSEADDR, 1)
        sock.bind(address)
        sock.listen(5)
        sock.setblocking(False)
        while True:
            client, addr = await self.loop.sock_accept(sock)
            self.loop.create_task(self.json_handler(client))

    async def json_handler(self, client):
        """
        Accepts incoming client connections.
        :param client: socket client
        """
        with client:
            while True:
                raw_data = await self.loop.sock_recv(client, 10000)
                if not raw_data:
                    break
                try:
                    data = json.loads(raw_data.strip().decode("utf-8"))
                except:
                    # In case a valid json dose not come in ignore incoming
                    # message and ignore it.
                    # TODO: Log it.
                    await self.loop.sock_sendall(client,
                                                 b'Rejected: ' + raw_data)
                    break
                if self.is_valid_data_input(
                        data) and data['action'] == 'apply':
                    ts = datetime.strptime(data['when'], '%Y-%m-%d %H:%M:%S')
                    await self.queue.put((ts, data['template']))
                    await self.loop.sock_sendall(client,
                                                 b'Accepted: ' + raw_data)
                else:
                    await self.loop.sock_sendall(client,
                                                 b'Rejected: ' + raw_data)

    def is_valid_data_input(self, data):
        """
        Validates incoming data.
        :param data: dictionary
        :return:  'True' if valid, else 'False'.
        """
        if not data.get('action'):
            return False
        if not data.get('when'):
            return False
        if not data.get('template'):
            return False
        return True

    async def queue_dumper(self):
        """
        Dumps status of queue to terminal (Eventually a file) every second.
        """
        # TODO : Ensure this also prints to a file.
        while True:
            if not self.queue.qsize():
                await asyncio.sleep(1)
            else:
                _copy = PriorityQueue()
                while not self.queue.empty():
                    await _copy.put(await self.queue.get())
                print(chr(27) + "[2J")  # Bit of Ctr + L magic trick
                while not _copy.empty():
                    element = await _copy.get()
                    print(element)
                    await self.queue.put(element)
            await asyncio.sleep(1)