Exemplo n.º 1
0
 def configure_build_manager(self):
     observed = TagObserved(name="observed")
     build_manager = builder.BuilderManager(
         builder_class=partial(MyVariantDataBuilder, mappers=[observed]),
         job_manager=self.managers["job_manager"])
     build_manager.configure()
     self.managers["build_manager"] = build_manager
     self.logger.info("Using custom builder %s" % MyVariantDataBuilder)
Exemplo n.º 2
0
 def configure_build_manager(self):
     mygeneset_mapper = MyGenesetMapper(name="count_genes")
     partial_builder = partial(builder.DataBuilder,
                               mappers=[mygeneset_mapper])
     build_manager = builder.BuilderManager(
         job_manager=self.managers["job_manager"],
         builder_class=partial_builder)
     build_manager.configure()
     build_manager.poll()
     self.managers["build_manager"] = build_manager
Exemplo n.º 3
0
 def configure_build_manager(self):
     retired2current = EntrezRetired2Current(convert_func=int,db_provider=mongo.get_src_db)
     ensembl2entrez = Ensembl2Entrez(db_provider=mongo.get_src_db,
     		retired2current=retired2current)
     build_manager = builder.BuilderManager(
             builder_class=partial(MyGeneDataBuilder,mappers=[ensembl2entrez]),
             job_manager=self.managers["job_manager"])
     build_manager.configure()
     self.managers["build_manager"] = build_manager
     self.logger.info("Using custom builder %s" % MyGeneDataBuilder)
Exemplo n.º 4
0
 def configure_build_manager(self):
     hasgene = HasGeneMapper(name="has_gene")
     pbuilder = partial(TaxonomyDataBuilder, mappers=[hasgene])
     build_manager = builder.BuilderManager(
         job_manager=self.managers["job_manager"],
         builder_class=pbuilder,
         poll_schedule="* * * * * */10")
     build_manager.configure()
     build_manager.configure()
     self.managers["build_manager"] = build_manager
     self.logger.info("Using custom builder %s" % TaxonomyDataBuilder)
Exemplo n.º 5
0
 def configure_build_manager(self):
     observed = TagObserved(name="observed")
     observed_skipidtoolong = TagObservedAndSkipLongId(
         name="observed_skipidtoolong")
     mvbuilder = partial(MyVariantDataBuilder,
                         mappers=[observed, observed_skipidtoolong])
     sharded_mvbuilder = partial(MyVariantShardedDataBuilder,
                                 mappers=[observed, observed_skipidtoolong])
     build_manager = builder.BuilderManager(
         builder_class=[mvbuilder, sharded_mvbuilder],
         job_manager=self.managers["job_manager"])
     build_manager.configure()
     self.managers["build_manager"] = build_manager
     self.logger.info("Using custom builder %s" % MyVariantDataBuilder)
Exemplo n.º 6
0
    data_plugin_manager=dp_manager,
    dumper_manager=dmanager,
    uploader_manager=upload_manager,
    keylookup=hub.keylookup.MyGeneKeyLookup,
    job_manager=job_manager)
# register available plugin assitant
assistant_manager.configure()
# load existing plugins
assistant_manager.load()

retired2current = EntrezRetired2Current(convert_func=int,
                                        db_provider=mongo.get_src_db)
ensembl2entrez = Ensembl2Entrez(db_provider=mongo.get_src_db,
                                retired2current=retired2current)
build_manager = builder.BuilderManager(builder_class=partial(
    MyGeneDataBuilder, mappers=[ensembl2entrez]),
                                       job_manager=job_manager)
build_manager.configure()

diff_manager = differ.DifferManager(job_manager=job_manager,
                                    poll_schedule="* * * * * */10")
diff_manager.configure()
diff_manager.poll(
    "diff", lambda doc: shell.launch(
        partial(diff_manager.diff,
                "jsondiff-selfcontained",
                old=None,
                new=doc["_id"])))
diff_manager.poll(
    "release_note", lambda doc: shell.launch(
        partial(diff_manager.release_note, old=None, new=doc["_id"])))
Exemplo n.º 7
0
syncer_manager.configure()

dmanager = dumper.DumperManager(job_manager=jmanager)
dmanager.register_sources(hub.dataload.__sources__)
dmanager.schedule_all()

# will check every 10 seconds for sources to upload
umanager = uploader.UploaderManager(poll_schedule='* * * * * */10',
                                    job_manager=jmanager)
umanager.register_sources(hub.dataload.__sources__)
umanager.poll('upload', lambda doc: umanager.upload_src(doc["_id"]))

hasgene = HasGeneMapper(name="has_gene")
pbuilder = partial(TaxonomyDataBuilder, mappers=[hasgene])
bmanager = builder.BuilderManager(job_manager=jmanager,
                                  builder_class=pbuilder,
                                  poll_schedule="* * * * * */10")
bmanager.configure()
bmanager.poll("build", lambda conf: bmanager.merge(conf["_id"]))

pindexer = partial(TaxonomyIndexer, es_host=config.ES_HOST)
index_manager = indexer.IndexerManager(job_manager=jmanager)
index_manager.configure([{"default": pindexer}])

from biothings.utils.hub import schedule, pending, done

COMMANDS = OrderedDict()
# dump commands
COMMANDS["dump"] = dmanager.dump_src
# upload commands
COMMANDS["upload"] = umanager.upload_src
Exemplo n.º 8
0
shell = HubShell(job_manager)

# will check every 10 seconds for sources to upload
upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10',
                                          job_manager=job_manager)
dump_manager = dumper.DumperManager(job_manager=job_manager)
sources_path = hub.dataload.__sources_dict__
smanager = source.SourceManager(sources_path, dump_manager, upload_manager)

dump_manager.schedule_all()
upload_manager.poll(
    'upload',
    lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"])))

build_manager = builder.BuilderManager(builder_class=MyChemDataBuilder,
                                       job_manager=job_manager)
build_manager.configure()

differ_manager = differ.DifferManager(job_manager=job_manager,
                                      poll_schedule="* * * * * */10")
differ_manager.configure()
differ_manager.poll(
    "diff", lambda doc: differ_manager.diff(
        "jsondiff-selfcontained", old=None, new=doc["_id"]))
differ_manager.poll(
    "release_note",
    lambda doc: differ_manager.release_note(old=None, new=doc["_id"]))

# test will access localhost ES, no need to throttle
syncer_manager_test = syncer.SyncerManager(job_manager=job_manager)
syncer_manager_test.configure()
Exemplo n.º 9
0
# deal with 3rdparty datasources
import biothings.hub.dataplugin.assistant as assistant
from biothings.hub.dataplugin.manager import DataPluginManager
dp_manager = DataPluginManager(job_manager=job_manager)
assistant_manager = assistant.AssistantManager(data_plugin_manager=dp_manager,
                                               dumper_manager=dmanager,
                                               uploader_manager=upload_manager,
                                               job_manager=job_manager)
# register available plugin assitant
assistant_manager.configure()
# load existing plugins
assistant_manager.load()

observed = TagObserved(name="observed")
build_manager = builder.BuilderManager(builder_class=partial(
    MyVariantDataBuilder, mappers=[observed]),
                                       job_manager=job_manager)
build_manager.configure()

differ_manager = differ.DifferManager(job_manager=job_manager)
differ_manager.configure(
    [differ.ColdHotSelfContainedJsonDiffer, differ.SelfContainedJsonDiffer])

inspector = inspector.InspectorManager(upload_manager=upload_manager,
                                       build_manager=build_manager,
                                       job_manager=job_manager)

from biothings.hub.databuild.syncer import ThrottledESColdHotJsonDiffSelfContainedSyncer, ThrottledESJsonDiffSelfContainedSyncer, \
                                           ESColdHotJsonDiffSelfContainedSyncer, ESJsonDiffSelfContainedSyncer
syncer_manager = syncer.SyncerManager(job_manager=job_manager)
syncer_manager.configure(klasses=[