示例#1
0
class IndexPipelineConfig(Component):
    implements(IndexPipelineConfig)

    def get_pipeline(self, *args, **kwargs):
        return [
            self.env[Processor1Provider],
            self.env[Processor2Provider],
        ]
示例#2
0
        class ForcePipeline(Component):
            implements(IndexPipelineConfig)

            def get_pipeline(self, *args, **kwargs):
                return [
                    env[IndexAPIForward],
                    env[LocalDumbIndex],
                    env[LocalKV],
                ]
示例#3
0
class CheckProcessor(Component):
    implements(IndexAPIProvider)
    schema_provider = ExtensionPoint(CheckProcessorSchemaProvider, unique=True)

    def get_index_api(self, **config):
        service = config['service']
        default_schema = self.schema_provider.default_schema(service)
        query_schema = self.schema_provider.query_schema(service)
        card_schemas = self.schema_provider.card_schemas(service)
        return Check(default_schema, card_schemas, query_schema, **config)
        class DumbIndexAPIConfiguration(Component):
            implements(IndexAPIConfigurationProvider)

            def get_index_api_conf(self, service, docido_user_id,
                                   account_login, config):
                return {
                    'service': service,
                    'docido_user_id': docido_user_id,
                    'account_login': account_login,
                }
        class MyCrawler(Component):
            implements(ICrawler)

            def get_service_name(self):
                return 'fake-crawler'

            def iter_crawl_tasks(self, index, token, logger, full):
                ret = {'tasks': list(repeat(_crawl_task, tasks_count))}
                if with_epilogue:
                    ret['epilogue'] = _epilogue
                return ret
示例#6
0
        class ForceConfig(Component):
            implements(IndexAPIConfigurationProvider)

            def get_index_api_conf(self, service, docido_user_id,
                                   account_login, config):
                return {
                    'local_storage': {
                        'documents': {
                            'path': env.temp_dir,
                        },
                        'kv': {
                            'path': env.temp_dir,
                        },
                    },
                }
        class MyExactCrawler(Component):
            implements(ICrawler)

            def get_service_name(self):
                return 'fake-crawler'

            def iter_crawl_tasks(self, index, token, config, logger):
                return {
                    'tasks': [
                        list(repeat(_increment_task, 10)),
                        list(repeat(_increment_task, 13)),
                    ],
                    'epilogue':
                    _epilogue,
                }
        class MyRetryCrawler(Component):
            implements(ICrawler)

            def get_service_name(self):
                return 'fake-crawler'

            def iter_crawl_tasks(self, index, token, config, logger):
                return {
                    'tasks': [
                        _retry_crawl_task,
                        functools.partial(_retry_crawl_task, max_retries=2),
                    ],
                    'epilogue':
                    _retry_epilogue,
                }
示例#9
0
class DocidoCheckProcessorSchemaProvider(Component):
    implements(CheckProcessorSchemaProvider)
    indexing_config = ExtensionPoint(PullCrawlerIndexingConfig, unique=True)

    def _get_config(self, indexing_config):
        check_processor = indexing_config.get('check_processor', {})
        return check_processor.get('schemas', {})

    @lazy
    def _core_config(self):
        return self._get_config(self.indexing_config.core())

    def _crawler_config(self, service):
        return self._get_config(self.indexing_config.service(service))

    def _schema_from_dicts(self, core_conf, crawler_conf):
        schema, options = from_dict(
            merge_dicts(copy.deepcopy(core_conf), copy.deepcopy(crawler_conf)))
        return voluptuous.Schema(schema, **options)

    def _get_schemas(self, service):
        kind_schemas = self._core_config.get('card', {}).get('kind', {}) or {}
        return {
            k: self._schema_from_dicts(
                v, copy.deepcopy(self._crawler_config(service).get(k, {})))
            for k, v in kind_schemas.iteritems()
        }

    def card_schemas(self, service):
        return self._get_schemas(service)

    def default_schema(self, service):
        core_default = self._core_config.get('card', {}).get('default', {})
        crawler_config = self._crawler_config(service).get('card', {}).get(
            'default', {})
        return self._schema_from_dicts(core_default, crawler_config)

    def query_schema(self, service):
        core_query = self._core_config.get('query', {})
        crawler_query = self._crawler_config(service).get('query', {})
        return self._schema_from_dicts(core_query, crawler_query)
示例#10
0
class YamlPullCrawlersIndexingConfig(Component):
    implements(PullCrawlerIndexingConfig, IndexPipelineConfig)
    index_api_providers = ExtensionPoint(IndexAPIProvider)

    def service(self, service):
        prs = docido_config.get('pull_crawlers') or {}
        crawlers_config = prs.get('crawlers') or {}
        return (crawlers_config.get(service) or {}).get('indexing', {})

    def core(self):
        return docido_config.pull_crawlers.indexing

    def get_pipeline(self, service):
        service_config = self.service(service)
        if 'pipeline' in service_config:
            processor_pipeline = service_config.pipeline
        else:
            indexing_config = self.core()
            processor_pipeline = indexing_config.pipeline
        providers = dict([(p.__class__.__name__, p)
                          for p in list(self.index_api_providers)])
        return list(map(lambda p: providers[p], processor_pipeline))
示例#11
0
class Processor2Provider(Component):
    implements(IndexAPIProvider)

    def get_index_api(self, parent=None, **config):
        return Processor2(parent, **config)
示例#12
0
class FooComponent(Component):
    implements(FooInterface, Foobar)
示例#13
0
class LocalKV(Component):
    implements(IndexAPIProvider)

    def get_index_api(self, **config):
        return LocalKVProcessor(**config)
示例#14
0
        class IndexAPIForward(Component):
            implements(IndexAPIProvider)

            def get_index_api(self, **config):
                return IndexAPIProcessor(**config)
示例#15
0
 class IndexAPIForwardProcessor(Component):
     implements(IndexAPIProcessor)
示例#16
0
class BarComponent(Component):
    implements(BarInterface, Foobar)
示例#17
0
class Elasticsearch(Component):
    implements(IndexAPIProvider)

    def get_index_api(self, **config):
        return ElasticsearchProcessor(**config)