Python DataCollectionError примеры использования

Язык программирования: Python

Пространство имен/Пакет: sirmordred.error

Класс/Тип: DataCollectionError

Примеров на hotexamples.com: 2

Python DataCollectionError - 2 примера найдено. Это лучшие примеры Python кода для sirmordred.error.DataCollectionError, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

DataCollectionError(2)

Основные методы

DataCollectionError (2)

Пример #1

Показать файл

Файл: task_collection.py Проект: inishchith/grimoirelab-sirmordred

    def execute(self):

        errors = []
        cfg = self.config.get_conf()

        if 'scroll_size' in cfg['general']:
            ElasticItems.scroll_size = cfg['general']['scroll_size']

        if 'bulk_size' in cfg['general']:
            ElasticSearch.max_items_bulk = cfg['general']['bulk_size']

        if 'collect' in cfg[self.backend_section] and not cfg[
                self.backend_section]['collect']:
            logging.info('%s collect disabled', self.backend_section)
            return errors

        t2 = time.time()
        logger.info('[%s] collection phase starts', self.backend_section)
        print("Collection for {}: starting...".format(self.backend_section))
        clean = False

        fetch_archive = False
        if 'fetch-archive' in cfg[self.backend_section] and cfg[
                self.backend_section]['fetch-archive']:
            fetch_archive = True

        # repos could change between executions because changes in projects
        repos = TaskProjects.get_repos_by_backend_section(self.backend_section)

        if not repos:
            logger.warning("No collect repositories for %s",
                           self.backend_section)

        for repo in repos:
            repo, repo_labels = self._extract_repo_labels(
                self.backend_section, repo)
            p2o_args = self._compose_p2o_params(self.backend_section, repo)
            filter_raw = p2o_args[
                'filter-raw'] if 'filter-raw' in p2o_args else None

            if filter_raw:
                # If filter-raw exists it means that there is an equivalent URL
                # in the `unknown` section of the projects.json. Thus the URL with
                # filter-raw is ignored in the collection phase, while the URL
                # in `unknown` is considered in this phase.
                logging.warning("Not collecting filter raw repository: %s",
                                repo)
                continue

            url = p2o_args['url']
            backend_args = self._compose_perceval_params(
                self.backend_section, repo)
            logger.debug(backend_args)
            logger.info('[%s] collection starts for %s', self.backend_section,
                        repo)
            es_col_url = self._get_collection_url()
            ds = self.backend_section
            backend = self.get_backend(self.backend_section)
            project = None  # just used for github in cauldron

            es_aliases = self.select_aliases(cfg, self.backend_section)

            try:
                error_msg = feed_backend(es_col_url,
                                         clean,
                                         fetch_archive,
                                         backend,
                                         backend_args,
                                         cfg[ds]['raw_index'],
                                         cfg[ds]['enriched_index'],
                                         project,
                                         es_aliases=es_aliases,
                                         projects_json_repo=repo,
                                         repo_labels=repo_labels)
                error = {'backend': backend, 'repo': repo, 'error': error_msg}

                errors.append(error)
            except Exception:
                logger.error(
                    "Something went wrong collecting data from this %s repo: %s . "
                    "Using the backend_args: %s " %
                    (ds, url, str(backend_args)))
                traceback.print_exc()
                raise DataCollectionError('Failed to collect data from %s' %
                                          url)
            logger.info('[%s] collection finished for %s',
                        self.backend_section, repo)

        t3 = time.time()
        spent_time = time.strftime("%H:%M:%S", time.gmtime(t3 - t2))
        logger.info('[%s] collection phase finished in %s',
                    self.backend_section, spent_time)
        print("Collection for {}: finished after {} hours".format(
            self.backend_section, spent_time))

        self.retain_data(cfg['general']['retention_time'],
                         self.conf['es_collection']['url'],
                         self.conf[self.backend_section]['raw_index'])

        return errors

Пример #2

Показать файл

Файл: task_collection.py Проект: lukaszgryglicki/grimoirelab-sirmordred

    def execute(self):
        cfg = self.config.get_conf()

        if 'scroll_size' in cfg['general']:
            ElasticItems.scroll_size = cfg['general']['scroll_size']

        if 'bulk_size' in cfg['general']:
            ElasticSearch.max_items_bulk = cfg['general']['bulk_size']

        if ('collect' in cfg[self.backend_section]
                and not cfg[self.backend_section]['collect']):
            logging.info('%s collect disabled', self.backend_section)
            return

        t2 = time.time()
        logger.info('[%s] raw data collection starts', self.backend_section)
        print("Collection for {}: starting...".format(self.backend_section))
        clean = False

        fetch_archive = False
        if ('fetch-archive' in cfg[self.backend_section]
                and cfg[self.backend_section]['fetch-archive']):
            fetch_archive = True

        # repos could change between executions because changes in projects
        repos = TaskProjects.get_repos_by_backend_section(self.backend_section)

        if not repos:
            logger.warning("No collect repositories for %s",
                           self.backend_section)

        for repo in repos:
            p2o_args = self._compose_p2o_params(self.backend_section, repo)
            filter_raw = p2o_args[
                'filter-raw'] if 'filter-raw' in p2o_args else None

            if filter_raw:
                # If filter-raw exists the goal is to enrich already collected
                # data, so don't collect anything
                logging.warning("Not collecting filter raw repository: %s",
                                repo)
                continue

            url = p2o_args['url']
            backend_args = self._compose_perceval_params(
                self.backend_section, repo)
            logger.debug(backend_args)
            logger.debug('[%s] collection starts for %s', self.backend_section,
                         repo)
            es_col_url = self._get_collection_url()
            ds = self.backend_section
            backend = self.get_backend(self.backend_section)
            project = None  # just used for github in cauldron
            try:
                feed_backend(es_col_url, clean, fetch_archive, backend,
                             backend_args, cfg[ds]['raw_index'],
                             cfg[ds]['enriched_index'], project)
            except Exception:
                logger.error(
                    "Something went wrong collecting data from this %s repo: %s . "
                    "Using the backend_args: %s " %
                    (ds, url, str(backend_args)))
                traceback.print_exc()
                raise DataCollectionError('Failed to collect data from %s' %
                                          url)

        t3 = time.time()

        spent_time = time.strftime("%H:%M:%S", time.gmtime(t3 - t2))
        logger.info('[%s] Data collection finished in %s',
                    self.backend_section, spent_time)
        print("Collection for {}: finished after {} hours".format(
            self.backend_section, spent_time))