def configure_sync_manager(self): sync_manager_prod = syncer.SyncerManager( job_manager=self.managers["job_manager"]) sync_manager_prod.configure(klasses=[ partial(MyVariantThrottledESColdHotJsonDiffSelfContainedSyncer, config.MAX_SYNC_WORKERS), partial(MyVariantThrottledESJsonDiffSelfContainedSyncer, config.MAX_SYNC_WORKERS) ]) self.managers["sync_manager"] = sync_manager_prod sync_manager_test = syncer.SyncerManager( job_manager=self.managers["job_manager"]) sync_manager_test.configure(klasses=[ MyVariantESColdHotJsonDiffSelfContainedSyncer, MyVariantESJsonDiffSelfContainedSyncer ]) self.managers["sync_manager_test"] = sync_manager_test self.logger.info("Using custom syncer, prod(throttled): %s, test: %s" % (sync_manager_prod, sync_manager_test))
diff_manager.poll( "diff", lambda doc: shell.launch( partial(diff_manager.diff, "jsondiff-selfcontained", old=None, new=doc["_id"]))) diff_manager.poll( "release_note", lambda doc: shell.launch( partial(diff_manager.release_note, old=None, new=doc["_id"]))) inspector = inspector.InspectorManager(upload_manager=upload_manager, build_manager=build_manager, job_manager=job_manager) # test will access localhost ES, no need to throttle sync_manager = syncer.SyncerManager(job_manager=job_manager) sync_manager.configure() # prod needs to be throttled from biothings.hub.databuild.syncer import ThrottledESJsonDiffSyncer, ThrottledESJsonDiffSelfContainedSyncer sync_manager_prod = syncer.SyncerManager(job_manager=job_manager) sync_manager_prod.configure(klasses=[ partial(ThrottledESJsonDiffSyncer, config.MAX_SYNC_WORKERS), partial(ThrottledESJsonDiffSelfContainedSyncer, config.MAX_SYNC_WORKERS) ]) index_manager = indexer.IndexerManager(job_manager=job_manager) index_manager.configure(config.ES_CONFIG) # API manager: used to run API instances from the hub api_manager = APIManager()
import biothings.hub.dataindex.indexer as indexer from hub.databuild.mapper import HasGeneMapper from hub.databuild.builder import TaxonomyDataBuilder from hub.dataindex.indexer import TaxonomyIndexer differ_manager = differ.DifferManager(job_manager=jmanager, poll_schedule="* * * * * */10") differ_manager.configure() differ_manager.poll( "diff", lambda doc: differ_manager.diff( "jsondiff-selfcontained", old=None, new=doc["_id"])) differ_manager.poll( "release_note", lambda doc: differ_manager.release_note(old=None, new=doc["_id"])) syncer_manager = syncer.SyncerManager(job_manager=jmanager) syncer_manager.configure() dmanager = dumper.DumperManager(job_manager=jmanager) dmanager.register_sources(hub.dataload.__sources__) dmanager.schedule_all() # will check every 10 seconds for sources to upload umanager = uploader.UploaderManager(poll_schedule='* * * * * */10', job_manager=jmanager) umanager.register_sources(hub.dataload.__sources__) umanager.poll('upload', lambda doc: umanager.upload_src(doc["_id"])) hasgene = HasGeneMapper(name="has_gene") pbuilder = partial(TaxonomyDataBuilder, mappers=[hasgene]) bmanager = builder.BuilderManager(job_manager=jmanager,
build_manager = builder.BuilderManager(builder_class=MyChemDataBuilder, job_manager=job_manager) build_manager.configure() differ_manager = differ.DifferManager(job_manager=job_manager, poll_schedule="* * * * * */10") differ_manager.configure() differ_manager.poll( "diff", lambda doc: differ_manager.diff( "jsondiff-selfcontained", old=None, new=doc["_id"])) differ_manager.poll( "release_note", lambda doc: differ_manager.release_note(old=None, new=doc["_id"])) # test will access localhost ES, no need to throttle syncer_manager_test = syncer.SyncerManager(job_manager=job_manager) syncer_manager_test.configure() # prod needs to be throttled from biothings.hub.databuild.syncer import ThrottledESJsonDiffSyncer, ThrottledESJsonDiffSelfContainedSyncer syncer_manager_prod = syncer.SyncerManager(job_manager=job_manager) syncer_manager_prod.configure(klasses=[ partial(ThrottledESJsonDiffSyncer, config.MAX_SYNC_WORKERS), partial(ThrottledESJsonDiffSelfContainedSyncer, config.MAX_SYNC_WORKERS) ]) index_manager = indexer.IndexerManager(job_manager=job_manager) index_manager.configure(config.ES_CONFIG) COMMANDS = OrderedDict() # dump commands COMMANDS["dump"] = dump_manager.dump_src