Python Index.Index примеры использования

Язык программирования: Python

Пространство имен/Пакет: saas.storage.index

Класс/Тип: Index

Метод/Функция: Index

Примеров на hotexamples.com: 12

Python Index.Index - 12 примеров найдено. Это лучшие примеры Python кода для saas.storage.index.Index.Index, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Index(12)

random_uncrawled_url(2)

calculate_throughput(1)

clear(1)

create_indices(1)

lock_crawled_url(1)

photos_get_photo(1)

photos_list_directories_in_directory(1)

photos_most_recent_capture_of_domain(1)

photos_unique_captures_of_domain(1)

photos_unique_domains(1)

ping(1)

recently_crawled_url(1)

remove_uncrawled_url(1)

Пример #1

Показать файл

Файл: test_filesystem.py Проект: nattvara/saas

 def setUp(self):
     """Set up test."""
     self.console = console
     self.refresh_rate = refresh.Hourly
     self.datadir = DataDirectory(dirname(__file__) + '/datadir')
     self.index = Index(self.datadir, MagicMock())
     self.filesystem = Filesystem(self.index, self.refresh_rate)

Пример #2

Показать файл

    def test_next_url_returns_none_if_no_url_was_found(self):
        """Test _next_url() returns None if no url was found."""
        index = Index()
        index.random_uncrawled_url = MagicMock()
        index.random_uncrawled_url.side_effect = EmptySearchResultException()
        self.crawler = Crawler(self.path_to_url_source, index)

        self.assertEqual(None, self.crawler._next_url())

Пример #3

Показать файл

    def test_crawler_can_read_next_url_from_source(self):
        """Test crawler can read next url from source."""
        self.add_url_source('https://example.com')

        self.crawler = Crawler(self.path_to_url_source, Index())
        self.assertEqual(
            Url.from_string('https://example.com').to_string(),
            self.crawler._next_url().to_string())

Пример #4

Показать файл

    def test_crawler_can_read_next_url_from_index(self):
        """Test crawler can read next url from source."""
        index = Index()
        url = Url.from_string('https://example.com/foo')
        index.remove_uncrawled_url = MagicMock()
        index.random_uncrawled_url = MagicMock(return_value=url)

        self.crawler = Crawler(self.path_to_url_source, index)

        self.assertEqual(
            Url.from_string('https://example.com/foo').to_string(),
            self.crawler._next_url().to_string())
        index.remove_uncrawled_url.assert_called_with(url.hash())

Пример #5

Показать файл

    def test_crawler_removes_urls_read_from_source(self):
        """Test crawler removes urls read from source."""
        self.add_url_source('https://example.com')
        self.add_url_source('https://example.com/foo')
        self.add_url_source('https://example.com/bar')

        self.crawler = Crawler(self.path_to_url_source, Index())

        # first line should now be https://example.com
        self.assertEqual(
            Url.from_string('https://example.com').to_string(),
            self.crawler._next_url().to_string())

        # first line should now be https://example.com/foo
        self.assertEqual(
            Url.from_string('https://example.com/foo').to_string(),
            self.crawler._next_url().to_string())

        # first line should now be https://example.com/bar
        self.assertEqual(
            Url.from_string('https://example.com/bar').to_string(),
            self.crawler._next_url().to_string())

        self.crawler = Crawler(self.path_to_url_source, Index())

Пример #6

Показать файл

Файл: threads.py Проект: nattvara/saas

def _stats_thread(elasticsearch_host: str):
    """Stats thread.

    Prints system and saas statistics every 5th minute

    Args:
        elasticsearch_host: elasticsearch host
    """
    start = time.time()
    last_print = 1
    while Controller.SHOULD_RUN:

        time.sleep(1)
        mins = int(int(time.time() - start) / 60)
        if mins % 5 != 0 or mins <= last_print:
            continue

        index = Index(host=elasticsearch_host)
        last_print = mins

        t = '[throughput]           5m: {}, 15m: {}, 30min: {}, 1h: {}'.format(
            stats.throughput(index, 5),
            stats.throughput(index, 15),
            stats.throughput(index, 30),
            stats.throughput(index, 60),
        )
        ta = '{}  5m: {}, 15m: {}, 30min: {}, 1h: {}'.format(
            '[throughput 1min avg]',
            round(stats.throughput(index, 5) / 5, 2) if mins > 4 else 'n/a',
            round(stats.throughput(index, 15) / 15, 2) if mins > 14 else 'n/a',
            round(stats.throughput(index, 30) / 30, 2) if mins > 29 else 'n/a',
            round(stats.throughput(index, 60) / 60, 2) if mins > 59 else 'n/a',
        )
        load = '[load avg]             1m: {}, 5m: {}, 15min: {}'.format(
            stats.load_avg(1),
            stats.load_avg(5),
            stats.load_avg(15),
        )
        cpu = f'[current cpu usage]    {stats.cpu_usage(10)}%'
        mem = f'[memory usage]         {stats.memory_usage(10)}%'

        for msg in [t, ta, load, cpu, mem]:
            console.p(msg)

Пример #7

Показать файл

Файл: threads.py Проект: nattvara/saas

def _crawler_thread(
    url_file: str,
    ignore_found_urls: bool,
    stay_at_domain: bool,
    elasticsearch_host: str,
    debug: bool,
    thread_id: str
):
    """Crawler thread.

    Args:
        url_file: path to url file
        ignore_found_urls: if crawler should ignore new urls found on
            pages it crawls
        stay_at_domain: if crawler should ignore urls from a different
            domain than the one it was found at
        elasticsearch_host: elasticsearch host
        debug: Display debugging information
        thread_id: id of thread
    """
    try:
        crawler = Crawler(
            url_file=url_file,
            index=Index(host=elasticsearch_host),
            ignore_found_urls=ignore_found_urls,
            stay_at_domain=stay_at_domain,
        )
        while Controller.SHOULD_RUN:
            crawler.tick()
    except UrlFileNotFoundError:
        console.p(f'ERROR: url_file was not found at \'{url_file}\'')
        time.sleep(2)
        Controller.threads[thread_id]['running'] = False
        Controller.stop_all()
    except Exception as e:
        console.p(f'error occured in crawler thread {thread_id}: {e}')
        if debug:
            raise e
    finally:
        Controller.threads[thread_id]['running'] = False

Пример #8

Показать файл

Файл: threads.py Проект: nattvara/saas

    def start_filesystem(
        mountpoint: str,
        datadir: DataDirectory,
        refresh_rate: Type[refresh.RefreshRate],
        elasticsearch_host: str
    ):
        """Start filesystem process.

        FUSE python library will kill the main process,
        forking main process and mounts the filesystem
        from that process instead.

        Args:
            mountpoint: where to mount filesystem
            datadir: Data directory to store pictures in
            refresh_rate: Which refresh rate filesystem should use
                for fetching photos
            elasticsearch_host: elasticsearch host

        Returns:
            True if main process, False if the forked process
            bool
        """
        console.p(f'mounting filesystem at: {real_path(mountpoint)}')

        pid = os.fork()
        if pid != 0:
            Controller.FUSE_PID = pid
            return True

        try:
            Filesystem.mount(
                mountpoint,
                Index(datadir, host=elasticsearch_host),
                refresh_rate
            )
        except RuntimeError as e:
            console.p(f'failed to mount FUSE filesystem: {e}')

        return False

Пример #9

Показать файл

Файл: threads.py Проект: nattvara/saas

def _photographer_thread(
    refresh_rate: Type[refresh.RefreshRate],
    datadir: DataDirectory,
    viewport_width: int,
    viewport_height: int,
    viewport_max_height: Optional[int],
    elasticsearch_host: str,
    debug: bool,
    thread_id: str
):
    """Photographer thread.

    Args:
        refresh_rate: How often photographs should be refreshed
        datadir: Data directory to store pictures in
        viewport_width: width of camera viewport
        viewport_height: height of camera viewport
        viewport_max_height: max height of camera viewport
        elasticsearch_host: elasticsearch host
        debug: Display debugging information
        thread_id: id of thread
    """
    try:
        photographer = p.Photographer(
            Index(host=elasticsearch_host),
            refresh_rate,
            datadir,
            viewport_width,
            viewport_height,
            viewport_max_height
        )
        while Controller.SHOULD_RUN:
            photographer.tick()
    except Exception as e:
        console.p(f'error occured in photographer thread {thread_id}: {e}')
        if debug:
            raise e
    finally:
        Controller.threads[thread_id]['running'] = False

Пример #10

Показать файл

 def setUp(self):
     """Set up test."""
     self.index = Index()
     self.datadir = DataDirectory(dirname(__file__) + '/datadir')
     self.photographer = Photographer(self.index, refresh.Hourly,
                                      self.datadir)

Пример #11

Показать файл

Файл: saas.py Проект: nattvara/saas

def main():
    """Entry point for saas."""
    try:

        parser = arguments.get_argument_parser()
        args = parser.parse_args(sys.argv[1:])

        console.DEBUG = args.debug

        JavascriptSnippets.load()

        index = Index(host=args.elasticsearch_host)

        if not index.ping():
            console.p('ERROR: failed to connect to elasticsearch')
            sys.exit()

        if not index.verify():
            if not args.setup_elasticsearch and not args.clear_elasticsearch:
                console.p('ERROR: elasticsearch is not configured')
                console.p('       {} {}'.format(
                    'start saas with --setup-elasticsearch',
                    'to configure elasticsearch'))
                sys.exit()

        datadir = DataDirectory(args.data_dir, args.optimize_storage)

        refresh_rate = {
            'day': refresh.Daily,
            'hour': refresh.Hourly,
            'minute': refresh.EveryMinute,
        }[args.refresh_rate]

        if args.setup_elasticsearch:
            index.create_indices()

        if args.clear_elasticsearch:
            index.clear()
            index.create_indices()

        if args.clear_data_dir:
            datadir.clear()

        if not Controller.start_filesystem(
                mountpoint=args.mountpoint,
                datadir=datadir,
                refresh_rate=refresh_rate,
                elasticsearch_host=args.elasticsearch_host):
            sys.exit()

        Controller.start_stats(elasticsearch_host=args.elasticsearch_host)

        Controller.start_crawlers(amount=args.crawler_threads,
                                  url_file=args.url_file,
                                  ignore_found_urls=args.ignore_found_urls,
                                  stay_at_domain=args.stay_at_domain,
                                  elasticsearch_host=args.elasticsearch_host,
                                  debug=args.debug)

        Controller.start_photographers(
            amount=args.photographer_threads,
            refresh_rate=refresh_rate,
            datadir=datadir,
            viewport_width=args.viewport_width,
            viewport_height=args.viewport_height,
            viewport_max_height=args.viewport_max_height,
            elasticsearch_host=args.elasticsearch_host,
            debug=args.debug)

        while True:

            if args.stop_if_idle == 0:
                time.sleep(10)
                continue

            try:
                crawled = index.timestamp_of_most_recent_document(
                    index.CRAWLED)
                photos = index.timestamp_of_most_recent_document(index.PHOTOS)

                timestamp = photos
                if crawled > timestamp:
                    timestamp = crawled

                seconds = int(time.time()) - timestamp
                mins = int(seconds / 60)
                if mins >= args.stop_if_idle:
                    console.p(f'was idle for {mins} minutes', end='')
                    raise StopIfIdleTimeoutExpired

            except EmptySearchResultException:
                pass
            finally:
                time.sleep(2)

    except (KeyboardInterrupt, StopIfIdleTimeoutExpired):
        console.p(' terminating.')
        Controller.stop_all()
        console.p('')

Пример #12

Показать файл

Файл: test_index.py Проект: nattvara/saas

 def setUp(self):
     """Set up test."""
     self.datadir = DataDirectory(dirname(__file__) + '/datadir')
     self.index = Index(self.datadir, MagicMock())