Python Queue.put примеры использования

Язык программирования: Python

Пространство имен/Пакет: dirhunt._compat

Класс/Тип: Queue

Метод/Функция: put

Примеров на hotexamples.com: 3

Python Queue.put - 3 примера найдено. Это лучшие примеры Python кода для dirhunt._compat.Queue.put, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Queue(5)

get(3)

put(3)

empty(1)

Основные методы

Queue (5)

get (3)

put (3)

empty (1)

Пример #1

Показать файл

Файл: sessions.py Проект: zhaoshiling1017/dirhunt

class Sessions(object):
    def __init__(self, proxies=None, delay=0):
        self.availables = Queue()
        self.proxies_lists = RandomProxies()
        self.delay = delay
        self.sessions = self.create_sessions(proxies or [None])
        for session in self.sessions:
            self.availables.put(session)

    def add_available(self, session):
        if self.delay:
            threading.Timer(self.delay, self.availables.put, [session]).start()
        else:
            self.availables.put(session)

    def create_sessions(self, proxies):
        return [Session(self, proxy) for proxy in proxies]

    def get_random_session(self):
        return random.choice(self.sessions)

    def get_session(self):
        if not self.delay and self.availables.empty():
            return self.get_random_session()
        return self.availables.get()

Пример #2

Показать файл

class Crawler(ThreadPoolExecutor):
    def __init__(self,
                 max_workers=None,
                 interesting_extensions=None,
                 interesting_files=None,
                 std=None,
                 progress_enabled=True,
                 timeout=10,
                 depth=3,
                 not_follow_subdomains=False,
                 exclude_sources=(),
                 not_allow_redirects=False,
                 proxies=None,
                 delay=0,
                 limit=1000):
        if not max_workers and not delay:
            max_workers = (multiprocessing.cpu_count() or 1) * 5
        elif not max_workers and delay:
            max_workers = len(proxies or [None])
        super(Crawler, self).__init__(max_workers)
        self.domains = set()
        self.results = Queue()
        self.index_of_processors = []
        self.sessions = Sessions(proxies, delay)
        self.processing = {}
        self.processed = {}
        self.add_lock = Lock()
        self.spinner = random_spinner()
        self.start_dt = datetime.datetime.now()
        self.interesting_extensions = interesting_extensions or []
        self.interesting_files = interesting_files or []
        self.closing = False
        self.std = std or None
        self.progress_enabled = progress_enabled
        self.timeout = timeout
        self.not_follow_subdomains = not_follow_subdomains
        self.depth = depth
        self.sources = Sources(self.add_url, self.add_message, exclude_sources)
        self.not_allow_redirects = not_allow_redirects
        self.limit = limit
        self.current_processed_count = 0

    def add_init_urls(self, *urls):
        """Add urls to queue.
        """
        for crawler_url in urls:
            if not isinstance(crawler_url, CrawlerUrl):
                crawler_url = CrawlerUrl(self,
                                         crawler_url,
                                         depth=self.depth,
                                         timeout=self.timeout)
            self.add_domain(crawler_url.url.only_domain)
            self.add_url(crawler_url)

    def in_domains(self, domain):
        if self.not_follow_subdomains and domain not in self.domains:
            return False
        initial_domain = domain
        while True:
            if domain in self.domains:
                if initial_domain != domain:
                    # subdomain
                    self.add_domain(initial_domain)
                return True
            parts = domain.split('.')
            if len(parts) <= 2:
                return False
            domain = '.'.join(parts[1:])

    def add_domain(self, domain):
        if domain in self.domains:
            return
        self.domains.add(domain)
        self.sources.add_domain(domain)

    def add_url(self, crawler_url, force=False):
        """Add url to queue"""
        if not isinstance(crawler_url, CrawlerUrl):
            crawler_url = CrawlerUrl(self,
                                     crawler_url,
                                     depth=self.depth,
                                     timeout=self.timeout)
        self.add_lock.acquire()
        url = crawler_url.url
        if not url.is_valid() or not url.only_domain or not self.in_domains(
                url.only_domain):
            self.add_lock.release()
            return
        if url.url in self.processing or url.url in self.processed:
            self.add_lock.release()
            return self.processing.get(url.url) or self.processed.get(url.url)

        fn = reraise_with_stack(crawler_url.start)
        if self.closing:
            self.add_lock.release()
            return
        if force:
            future = ThreadPoolExecutor(max_workers=1).submit(fn)
        else:
            future = self.submit(fn)
        self.processing[url.url] = future
        self.add_lock.release()
        return future

    def add_message(self, body):
        from dirhunt.processors import Message
        self.results.put(Message(body))

    def echo(self, body):
        if self.std is None:
            return
        # TODO: remove ANSI chars on is not tty
        self.std.write(str(body))
        self.std.write('\n')

    def erase(self):
        if self.std is None or not self.std.isatty():
            return
        CURSOR_UP_ONE = '\x1b[1A'
        ERASE_LINE = '\x1b[2K'
        # This can be improved. In the future we may want to define stdout/stderr with an cli option
        # fn = sys.stderr.write if sys.stderr.isatty() else sys.stdout.write
        self.std.write(CURSOR_UP_ONE + ERASE_LINE)

    def print_progress(self, finished=False):
        if not self.progress_enabled:
            # Cancel print progress on
            return
        self.echo('{} {} {}'.format(
            next(self.spinner),
            'Finished after' if finished else 'Started',
            (humanize.naturaldelta if finished else
             humanize.naturaltime)(datetime.datetime.now() - self.start_dt),
        ))

    def print_results(self, exclude=None, include=None):
        exclude = exclude or set()
        self.echo('Starting...')
        while True:
            result = None
            try:
                result = self.results.get(timeout=.5)
            except queue.Empty:
                pass
            self.erase()
            if result and result.maybe_directory() and not (result.crawler_url.flags & exclude) \
                    and (not include or (include & result.crawler_url.flags)):
                self.echo(result)
            self.print_progress()
            if (self.sources.finished() and not self.processing) or \
                    self.current_processed_count >= self.limit and self.limit:
                # Ended
                if self.current_processed_count >= self.limit and self.limit:
                    # Force shutdown
                    self.closing = True
                    self.shutdown()
                self.erase()
                self.print_progress(True)
                return

    def print_urls_info(self):
        if not self.index_of_processors:
            self.echo('No interesting files detected ¯\_(ツ)_/¯')
            return
        self.echo('━' * get_terminal_size()[0])
        UrlsInfo(self.index_of_processors, self.sessions, self.std,
                 self._max_workers, self.progress_enabled,
                 self.timeout).start()

    def restart(self):
        try:
            self.add_lock.release()
        except (ThreadError, RuntimeError):
            pass

    def close(self):
        self.closing = True
        self.shutdown(False)
        atexit.unregister(_python_exit)

Пример #3

Показать файл

class Crawler(ThreadPoolExecutor):
    urls_info = None

    def __init__(self, max_workers=None, interesting_extensions=None, interesting_files=None, std=None,
                 progress_enabled=True, timeout=10, depth=3, not_follow_subdomains=False, exclude_sources=(),
                 not_allow_redirects=False, proxies=None, delay=0, limit=1000, to_file=None, user_agent=None,
                 cookies=None, headers=None):
        if not max_workers and not delay:
            max_workers = (multiprocessing.cpu_count() or 1) * 5
        elif not max_workers and delay:
            max_workers = len(proxies or [None])
        super(Crawler, self).__init__(max_workers)
        self.domains = set()
        self.results = Queue()
        self.index_of_processors = []
        self.proxies = proxies
        self.delay = delay
        self.sessions = Sessions(proxies, delay, user_agent, cookies, headers)
        self.processing = {}
        self.processed = {}
        self.add_lock = Lock()
        self.spinner = random_spinner()
        self.start_dt = datetime.datetime.now()
        self.interesting_extensions = interesting_extensions or []
        self.interesting_files = interesting_files or []
        self.closing = False
        self.std = std or None
        self.progress_enabled = progress_enabled
        self.timeout = timeout
        self.not_follow_subdomains = not_follow_subdomains
        self.depth = depth
        self.exclude_sources = exclude_sources
        self.sources = Sources(self.add_url, self.add_message, exclude_sources)
        self.not_allow_redirects = not_allow_redirects
        self.limit = limit
        self.current_processed_count = 0
        self.to_file = to_file

    def add_init_urls(self, *urls):
        """Add urls to queue.
        """
        for crawler_url in urls:
            if not isinstance(crawler_url, CrawlerUrl):
                crawler_url = CrawlerUrl(self, crawler_url, depth=self.depth, timeout=self.timeout)
            self.add_domain(crawler_url.url.only_domain)
            self.add_url(crawler_url)

    def in_domains(self, domain):
        if self.not_follow_subdomains and domain not in self.domains:
            return False
        initial_domain = domain
        while True:
            if domain in self.domains:
                if initial_domain != domain:
                    # subdomain
                    self.add_domain(initial_domain)
                return True
            parts = domain.split('.')
            if len(parts) <= 2:
                return False
            domain = '.'.join(parts[1:])

    def add_domain(self, domain):
        if domain in self.domains:
            return
        self.domains.add(domain)
        self.sources.add_domain(domain)

    def add_url(self, crawler_url, force=False):
        """Add url to queue"""
        if not isinstance(crawler_url, CrawlerUrl):
            crawler_url = CrawlerUrl(self, crawler_url, depth=self.depth, timeout=self.timeout)
        self.add_lock.acquire()
        url = crawler_url.url
        if not url.is_valid() or not url.only_domain or not self.in_domains(url.only_domain):
            self.add_lock.release()
            return
        if url.url in self.processing or url.url in self.processed:
            self.add_lock.release()
            return self.processing.get(url.url) or self.processed.get(url.url)

        fn = reraise_with_stack(crawler_url.start)
        if self.closing:
            self.add_lock.release()
            return
        if force:
            future = ThreadPoolExecutor(max_workers=1).submit(fn)
        else:
            future = self.submit(fn)
        self.processing[url.url] = future
        self.add_lock.release()
        return future

    def add_message(self, body):
        from dirhunt.processors import Message
        self.results.put(Message(body))

    def echo(self, body):
        if self.std is None:
            return
        # TODO: remove ANSI chars on is not tty
        self.std.write(str(body))
        self.std.write('\n')

    def erase(self):
        if self.std is None or not self.std.isatty():
            return
        CURSOR_UP_ONE = '\x1b[1A'
        ERASE_LINE = '\x1b[2K'
        # This can be improved. In the future we may want to define stdout/stderr with an cli option
        # fn = sys.stderr.write if sys.stderr.isatty() else sys.stdout.write
        self.std.write(CURSOR_UP_ONE + ERASE_LINE)

    def print_progress(self, finished=False):
        if not self.progress_enabled:
            # Cancel print progress on
            return
        self.echo('{} {} {}'.format(
            next(self.spinner),
            'Finished after' if finished else 'Started',
            (humanize.naturaldelta if finished else humanize.naturaltime)(datetime.datetime.now() - self.start_dt),
        ))

    def print_results(self, exclude=None, include=None):
        exclude = exclude or set()
        self.echo('Starting...')
        while True:
            result = None
            try:
                result = self.results.get(timeout=.5)
            except queue.Empty:
                pass
            self.erase()
            if result and result.maybe_directory() and not (result.crawler_url.flags & exclude) \
                    and (not include or (include & result.crawler_url.flags)):
                self.echo(result)
            self.print_progress()
            if (self.sources.finished() and not self.processing) or \
                    self.current_processed_count >= self.limit and self.limit:
                # Ended
                if self.current_processed_count >= self.limit and self.limit:
                    # Force shutdown
                    self.closing = True
                    self.shutdown()
                self.erase()
                self.print_progress(True)
                return

    def print_urls_info(self):
        if not self.index_of_processors:
            self.echo(r'No interesting files detected ¯\_(ツ)_/¯')
            return
        self.echo('━' * get_terminal_size()[0])
        self.urls_info = UrlsInfo(self.index_of_processors, self.sessions, self.std, self._max_workers,
                                  self.progress_enabled, self.timeout, bool(self.to_file))
        self.urls_info.start()

    def restart(self):
        try:
            self.add_lock.release()
        except (ThreadError, RuntimeError):
            pass

    def options(self):
        return {
            'interesting_extensions': self.interesting_extensions,
            'interesting_files': self.interesting_files,
            'timeout': self.interesting_files,
            'depth': self.interesting_files,
            'not_follow_subdomains': self.not_follow_subdomains,
            'exclude_sources': self.exclude_sources,
            'not_allow_redirects': self.not_allow_redirects,
            'proxies': self.proxies,
            'delay': self.delay,
            'limit': self.limit,
        }

    @property
    def options_file(self):
        checksum = sha256(json.dumps(self.options(), sort_keys=True).encode('utf-8')).hexdigest()
        return os.path.join(resume_dir, checksum)

    def get_resume_file(self):
        return self.to_file or self.options_file

    def close(self, create_resume=False):
        self.closing = True
        self.shutdown(False)
        if create_resume:
            self.create_report(self.get_resume_file())
        unregister(_python_exit)

    def create_report(self, to_file):
        """Write to a file a report with current json() state. This file can be read
        to continue an analysis."""
        to_file = os.path.abspath(to_file)
        directory = os.path.dirname(to_file)
        if not os.path.exists(directory):
            os.makedirs(directory)
        data = self.json()
        json.dump(data, open(to_file, 'w'), cls=JsonReportEncoder, indent=4, sort_keys=True)

    def resume(self, path):
        resume_data = json.load(open(path))
        file_version = resume_data.get('version')
        if file_version != __version__:
            raise IncompatibleVersionError(
                'Analysis file incompatible with the current version of dirhunt. '
                'Dirhunt version: {}. File version: {}'.format(__version__, file_version)
            )
        for data in resume_data['processed']:
            crawler_url_data = data['crawler_url']
            url = crawler_url_data['url']['address']
            crawler_url = CrawlerUrl(self, url, crawler_url_data['depth'], None, crawler_url_data['exists'],
                                     crawler_url_data['type'])
            crawler_url.flags = set(crawler_url_data['flags'])
            crawler_url.processor_data = data
            self.processed[url] = crawler_url
            self.echo(data['line'])
        for url in resume_data['processing']:
            self.add_url(url)

    def json(self):
        urls_infos = self.urls_info.urls_info if self.urls_info else []
        urls_infos = [urls_info.json() for urls_info in urls_infos]
        return {
            'version': __version__,
            'current_processed_count': self.current_processed_count,
            'domains': self.domains,
            'index_of_processors': self.index_of_processors,
            'processing': list(self.processing.keys()),
            'processed': list(filter(bool, [processed.processor_data for processed in self.processed.values()])),
            'urls_infos': urls_infos,
        }