def configure(self): self.configure_ioloop() self.configure_managers() self.configure_commands() self.configure_extra_commands() # setup the shell self.shell = HubShell(self.managers["job_manager"]) self.shell.register_managers(self.managers) self.shell.set_commands(self.commands, self.extra_commands) self.shell.server = self # propagate server instance in shell # so it's accessible from the console if needed # set api if self.api_config != False: self.configure_api_endpoints( ) # after shell setup as it adds some default commands # we want to expose throught the api from biothings.hub.api import generate_api_routes self.routes = generate_api_routes(self.shell, self.api_endpoints) if self.dataupload_config != False: # this one is not bound to a specific command from biothings.hub.api.handlers.upload import UploadHandler # tuple type = interpreted as a route handler self.routes.append(("/dataupload/([\w\.-]+)?", UploadHandler, self.dataupload_config)) if self.websocket_config != False: # add websocket endpoint import biothings.hub.api.handlers.ws as ws import sockjs.tornado from biothings.utils.hub_db import ChangeWatcher listener = ws.HubDBListener() ChangeWatcher.add(listener) ChangeWatcher.publish() self.logger.info("Starting SockJS router") ws_router = sockjs.tornado.SockJSRouter( partial(ws.WebSocketConnection, listener=listener), '/ws') self.routes.extend(ws_router.urls) if self.reloader_config != False: monitored_folders = self.reloader_config["folders"] or [ "hub/dataload/sources", getattr(config, "DATA_PLUGIN_FOLDER", None) ] reload_managers = [ self.managers[m] for m in self.reloader_config["managers"] if m in self.managers ] reload_func = self.reloader_config["reload_func"] or partial( self.shell.restart, force=True) reloader = HubReloader(monitored_folders, reload_managers, reload_func=reload_func) reloader.monitor() # done self.configured = True
def configure(self): self.before_configure() self.remaining_features = copy.deepcopy(self.features) # keep track of what's been configured self.configure_ioloop() self.configure_managers() # setup the shell self.shell = HubShell(self.managers["job_manager"]) self.shell.register_managers(self.managers) self.shell.server = self # propagate server instance in shell # so it's accessible from the console if needed self.configure_remaining_features() self.configure_commands() self.configure_extra_commands() self.shell.set_commands(self.commands,self.extra_commands) # set api if self.api_config != False: self.configure_api_endpoints() # after shell setup as it adds some default commands # we want to expose throught the api from biothings.hub.api import generate_api_routes self.routes.extend(generate_api_routes(self.shell, self.api_endpoints)) # done self.configured = True
class HubServer(object): DEFAULT_FEATURES = ["config","job","dump","upload","dataplugin","source", "build","diff","index","snapshot","release","inspect","sync","api", "terminal","reloader","dataupload","ws"] DEFAULT_MANAGERS_ARGS = {"upload" : {"poll_schedule" : "* * * * * */10"}} DEFAULT_RELOADER_CONFIG = {"folders": None, # will use default one "managers" : ["source_manager","assistant_manager"], "reload_func" : None} # will use default one DEFAULT_DATAUPLOAD_CONFIG = {"upload_root" : getattr(config,"DATA_UPLOAD_FOLDER",None)} DEFAULT_WEBSOCKET_CONFIG = {} DEFAULT_API_CONFIG = {} def __init__(self, source_list, features=None, name="BioThings Hub", managers_custom_args={}, api_config=None, reloader_config=None, dataupload_config=None, websocket_config=None): """ Helper to setup and instantiate common managers usually used in a hub (eg. dumper manager, uploader manager, etc...) "source_list" is either: - a list of string corresponding to paths to datasources modules - a package containing sub-folders with datasources modules Specific managers can be retrieved adjusting "features" parameter, where each feature corresponds to one or more managers. Parameter defaults to all possible available. Managers are configured/init in the same order as the list, so if a manager (eg. job_manager) is required by all others, it must be the first in the list. "managers_custom_args" is an optional dict used to pass specific arguments while init managers: managers_custom_args={"upload" : {"poll_schedule" : "*/5 * * * *"}} will set poll schedule to check upload every 5min (instead of default 10s) "reloader_config", "dataupload_config" and "websocket_config" can be used to customize reloader, dataupload and websocket. If None, default config is used. If explicitely False, feature is deactivated. """ self.name = name self.source_list = source_list self.logger, self.logfile = get_logger("hub") self._passed_features = features self._passed_managers_custom_args = managers_custom_args self.features = self.clean_features(features or self.DEFAULT_FEATURES) self.managers_custom_args = managers_custom_args self.reloader_config = reloader_config or self.DEFAULT_RELOADER_CONFIG self.dataupload_config = dataupload_config or self.DEFAULT_DATAUPLOAD_CONFIG self.websocket_config = websocket_config or self.DEFAULT_WEBSOCKET_CONFIG self.ws_listeners = [] # collect listeners that should be connected (push data through) to websocket self.api_config = api_config or self.DEFAULT_API_CONFIG # set during configure() self.managers = None self.api_endpoints = None self.shell = None self.commands = None self.extra_commands = None self.routes = [] # flag "do we need to configure?" self.configured = False def clean_features(self, features): # we can't just use "set()" because we need to preserve order ordered = OrderedDict() for feat in features: if not feat in ordered: ordered[feat] = None return list(ordered.keys()) def before_configure(self): """ Hook triggered before configure(), used eg. to adjust features list """ pass def configure(self): self.before_configure() self.remaining_features = copy.deepcopy(self.features) # keep track of what's been configured self.configure_ioloop() self.configure_managers() # setup the shell self.shell = HubShell(self.managers["job_manager"]) self.shell.register_managers(self.managers) self.shell.server = self # propagate server instance in shell # so it's accessible from the console if needed self.configure_remaining_features() self.configure_commands() self.configure_extra_commands() self.shell.set_commands(self.commands,self.extra_commands) # set api if self.api_config != False: self.configure_api_endpoints() # after shell setup as it adds some default commands # we want to expose throught the api from biothings.hub.api import generate_api_routes self.routes.extend(generate_api_routes(self.shell, self.api_endpoints)) # done self.configured = True def configure_ioloop(self): import tornado.platform.asyncio tornado.platform.asyncio.AsyncIOMainLoop().install() def before_start(self): pass def start(self): if not self.configured: self.configure() self.logger.info("Starting server '%s'" % self.name) # can't use asyncio.get_event_loop() if python < 3.5.3 as it would return # another instance of aio loop, take it from job_manager to make sure # we share the same one loop = self.managers["job_manager"].loop if self.routes: self.logger.info(self.routes) self.logger.info("Starting Hub API server") import tornado.web # register app into current event loop api = tornado.web.Application(self.routes) self.extra_commands["api"] = api from biothings.hub.api import start_api api_server = start_api(api,config.HUB_API_PORT,settings=getattr(config,"TORNADO_SETTINGS",{})) else: self.logger.info("No route defined, API server won't start") # at this point, everything is ready/set, last call for customizations self.before_start() self.ssh_server = start_ssh_server(loop,self.name,passwords=config.HUB_PASSWD, port=config.HUB_SSH_PORT,shell=self.shell) try: loop.run_until_complete(self.ssh_server) except (OSError, asyncssh.Error) as exc: sys.exit('Error starting server: ' + str(exc)) loop.run_forever() def mixargs(self, feat, params={}): args = {} for p in params: args[p] = self.managers_custom_args.get(feat,{}).pop(p,None) or params[p] # mix remaining args.update(self.managers_custom_args.get(feat,{})) return args def configure_job_manager(self): import asyncio loop = asyncio.get_event_loop() from biothings.utils.manager import JobManager args = self.mixargs("job",{"num_workers":config.HUB_MAX_WORKERS,"max_memory_usage":config.HUB_MAX_MEM_USAGE}) job_manager = JobManager(loop,**args) self.managers["job_manager"] = job_manager def configure_dump_manager(self): from biothings.hub.dataload.dumper import DumperManager args = self.mixargs("dump") dmanager = DumperManager(job_manager=self.managers["job_manager"],**args) self.managers["dump_manager"] = dmanager def configure_upload_manager(self): from biothings.hub.dataload.uploader import UploaderManager args = self.mixargs("upload",{"poll_schedule":"* * * * * */10"}) upload_manager = UploaderManager(job_manager=self.managers["job_manager"],**args) self.managers["upload_manager"] = upload_manager def configure_dataplugin_manager(self): from biothings.hub.dataplugin.manager import DataPluginManager dp_manager = DataPluginManager(job_manager=self.managers["job_manager"]) self.managers["dataplugin_manager"] = dp_manager from biothings.hub.dataplugin.assistant import AssistantManager args = self.mixargs("dataplugin") assistant_manager = AssistantManager( data_plugin_manager=dp_manager, dumper_manager=self.managers["dump_manager"], uploader_manager=self.managers["upload_manager"], job_manager=self.managers["job_manager"], **args) self.managers["assistant_manager"] = assistant_manager def configure_build_manager(self): from biothings.hub.databuild.builder import BuilderManager args = self.mixargs("build") build_manager = BuilderManager(job_manager=self.managers["job_manager"],**args) build_manager.configure() self.managers["build_manager"] = build_manager def configure_diff_manager(self): from biothings.hub.databuild.differ import DifferManager, SelfContainedJsonDiffer args = self.mixargs("diff") diff_manager = DifferManager(job_manager=self.managers["job_manager"], poll_schedule="* * * * * */10",**args) diff_manager.configure([SelfContainedJsonDiffer,]) diff_manager.poll("diff",lambda doc: diff_manager.diff("jsondiff-selfcontained",old=None,new=doc["_id"])) self.managers["diff_manager"] = diff_manager def configure_index_manager(self): from biothings.hub.dataindex.indexer import IndexManager args = self.mixargs("index") index_manager = IndexManager(job_manager=self.managers["job_manager"],**args) index_manager.configure(config.INDEX_CONFIG) self.managers["index_manager"] = index_manager def configure_snapshot_manager(self): assert "index" in self.features, "'snapshot' feature requires 'index'" from biothings.hub.dataindex.snapshooter import SnapshotManager args = self.mixargs("snapshot") snapshot_manager = SnapshotManager( index_manager=self.managers["index_manager"], job_manager=self.managers["job_manager"], **args) snapshot_manager.configure(config.SNAPSHOT_CONFIG) #snapshot_manager.poll("snapshot",lambda doc: snapshot_manager.snapshot(snapshot_env=???,index=doc["_id"])) self.managers["snapshot_manager"] = snapshot_manager def configure_release_manager(self): assert "diff" in self.features, "'release' feature requires 'diff'" assert "snapshot" in self.features, "'release' feature requires 'snapshot'" from biothings.hub.datarelease.publisher import ReleaseManager args = self.mixargs("release") release_manager = ReleaseManager( diff_manager=self.managers["diff_manager"], snapshot_manager=self.managers["snapshot_manager"], job_manager=self.managers["job_manager"], poll_schedule="* * * * * */10",**args) release_manager.configure(config.RELEASE_CONFIG) release_manager.poll("release_note",lambda doc: release_manager.create_release_note(old=None,new=doc["_id"])) self.managers["release_manager"] = release_manager def configure_sync_manager(self): from biothings.hub.databuild.syncer import SyncerManager args = self.mixargs("sync") sync_manager = SyncerManager(job_manager=self.managers["job_manager"],**args) sync_manager.configure() self.managers["sync_manager"] = sync_manager def configure_inspect_manager(self): assert "upload" in self.features, "'inspect' feature requires 'upload'" assert "build" in self.features, "'inspect' feature requires 'build'" from biothings.hub.datainspect.inspector import InspectorManager args = self.mixargs("inspect") inspect_manager = InspectorManager( upload_manager=self.managers["upload_manager"], build_manager=self.managers["build_manager"], job_manager=self.managers["job_manager"],**args) self.managers["inspect_manager"] = inspect_manager def configure_api_manager(self): assert "index" in self.features, "'api' feature requires 'index'" from biothings.hub.api.manager import APIManager args = self.mixargs("api") api_manager = APIManager(**args) self.managers["api_manager"] = api_manager def configure_source_manager(self): if "dump" in self.features or "upload" in self.features: args = self.mixargs("source") from biothings.hub.dataload.source import SourceManager source_manager = SourceManager( source_list=self.source_list, dump_manager=self.managers["dump_manager"], upload_manager=self.managers["upload_manager"], data_plugin_manager=self.managers.get("dataplugin_manager"), ) self.managers["source_manager"] = source_manager # init data plugin once source_manager has been set (it inits dumper and uploader # managers, if assistant_manager is configured/loaded before, datasources won't appear # in dumper/uploader managers as they were not ready yet) if "dataplugin" in self.features: self.managers["assistant_manager"].configure() self.managers["assistant_manager"].load() # now that we have the source manager setup, we can schedule and poll if "dump" in self.features and not getattr(config,"SKIP_DUMPER_SCHEDULE",False): self.managers["dump_manager"].schedule_all() if "upload" in self.features and not getattr(config,"SKIP_UPLOADER_POLL",False): self.managers["upload_manager"].poll('upload',lambda doc: self.shell.launch(partial(self.managers["upload_manager"].upload_src,doc["_id"]))) def configure_managers(self): if not self.managers is None: raise Exception("Managers have already been configured") self.managers = {} self.logger.info("Setting up managers for following features: %s" % self.features) assert "job" in self.features, "'job' feature is mandatory" if "source" in self.features: assert "dump" in self.features and "upload" in self.features, \ "'source' feature requires both 'dump' and 'upload' features" if "dataplugin" in self.features: assert "source" in self.features, "'dataplugin' feature requires 'source' feature" # specific order, eg. job_manager is used by all managers for feat in self.features: if hasattr(self,"configure_%s_manager" % feat): getattr(self,"configure_%s_manager" % feat)() self.remaining_features.remove(feat) elif hasattr(self,"configure_%s_feature" % feat): # see configure_remaining_features() pass # this is configured after managers but should not produce an error else: raise AttributeError("Feature '%s' listed but no 'configure_%s_{manager|feature}' method found" % (feat,feat)) self.logger.info("Active manager(s): %s" % pformat(self.managers)) def configure_config_feature(self): # just a placeholder pass def configure_ws_feature(self): # add websocket endpoint import biothings.hub.api.handlers.ws as ws import sockjs.tornado from biothings.utils.hub_db import ChangeWatcher # monitor change in database to report activity in webapp self.db_listener = ws.HubDBListener() ChangeWatcher.add(self.db_listener) ChangeWatcher.publish() self.log_listener = ws.LogListener() # push log statements to the webapp root_logger = logging.getLogger() # careful, asyncio logger will trigger log statement while in the handler # (ie. infinite loop), root logger not recommended) root_logger.addHandler(WSLogHandler(self.log_listener)) self.ws_listeners.extend([self.db_listener,self.log_listener]) ws_router = sockjs.tornado.SockJSRouter( partial(ws.WebSocketConnection, listeners=self.ws_listeners), '/ws') self.routes.extend(ws_router.urls) def configure_terminal_feature(self): assert "ws" in self.features, "'terminal' feature requires 'ws'" assert "ws" in self.remaining_features, "'terminal' feature should configured before 'ws'" # shell logger/listener to communicate between webapp and hub ssh console import biothings.hub.api.handlers.ws as ws shell_listener = ws.LogListener() shell_logger = logging.getLogger("shell") assert isinstance(shell_logger,ShellLogger), "shell_logger isn't properly set" shell_logger.addHandler(WSShellHandler(shell_listener)) self.ws_listeners.append(shell_listener) # webapp terminal to hub shell connection through /shell endpoint from biothings.hub.api.handlers.shell import ShellHandler shell_endpoint = ("/shell",ShellHandler, {"shell":self.shell,"shellog":shell_logger}) self.routes.append(shell_endpoint) def configure_dataupload_feature(self): assert "ws" in self.features, "'dataupload' feature requires 'ws'" assert "ws" in self.remaining_features, "'dataupload' feature should configured before 'ws'" # this one is not bound to a specific command from biothings.hub.api.handlers.upload import UploadHandler # tuple type = interpreted as a route handler self.routes.append(("/dataupload/([\w\.-]+)?",UploadHandler,self.dataupload_config)) def configure_reloader_feature(self): monitored_folders = self.reloader_config["folders"] or ["hub/dataload/sources",getattr(config,"DATA_PLUGIN_FOLDER",None)] reload_managers = [self.managers[m] for m in self.reloader_config["managers"] if m in self.managers] reload_func = self.reloader_config["reload_func"] or partial(self.shell.restart,force=True) reloader = HubReloader(monitored_folders, reload_managers, reload_func=reload_func) reloader.monitor() def configure_remaining_features(self): self.logger.info("Setting up remaining features: %s" % self.remaining_features) # specific order, eg. job_manager is used by all managers for feat in copy.deepcopy(self.remaining_features): if hasattr(self,"configure_%s_feature" % feat): getattr(self,"configure_%s_feature" % feat)() self.remaining_features.remove(feat) pass # this is configured after managers but should not produce an error else: raise AttributeError("Feature '%s' listed but no 'configure_%s_feature' method found" % (feat,feat)) def configure_commands(self): """ Configure hub commands according to available managers """ assert self.managers, "No managers configured" self.commands = HubCommands() self.commands["status"] = CommandDefinition(command=partial(status,self.managers),tracked=False) if "config" in self.features: self.commands["config"] = CommandDefinition(command=config.show,tracked=False) self.commands["setconf"] = config.store_value_to_db self.commands["resetconf"] = config.reset # getting info if self.managers.get("source_manager"): self.commands["source_info"] = CommandDefinition(command=self.managers["source_manager"].get_source,tracked=False) self.commands["source_reset"] = CommandDefinition(command=self.managers["source_manager"].reset,tracked=True) # dump commands if self.managers.get("dump_manager"): self.commands["dump"] = self.managers["dump_manager"].dump_src self.commands["dump_all"] = self.managers["dump_manager"].dump_all # upload commands if self.managers.get("upload_manager"): self.commands["upload"] = self.managers["upload_manager"].upload_src self.commands["upload_all"] = self.managers["upload_manager"].upload_all # building/merging if self.managers.get("build_manager"): self.commands["whatsnew"] = CommandDefinition(command=self.managers["build_manager"].whatsnew,tracked=False) self.commands["lsmerge"] = self.managers["build_manager"].list_merge self.commands["rmmerge"] = self.managers["build_manager"].delete_merge self.commands["merge"] = self.managers["build_manager"].merge self.commands["archive"] = self.managers["build_manager"].archive_merge if hasattr(config,"INDEX_CONFIG"): self.commands["index_config"] = config.INDEX_CONFIG if hasattr(config,"SNAPSHOT_CONFIG"): self.commands["snapshot_config"] = config.SNAPSHOT_CONFIG if hasattr(config,"PUBLISH_CONFIG"): self.commands["publish_config"] = config.PUBLISH_CONFIG # diff if self.managers.get("diff_manager"): self.commands["diff"] = self.managers["diff_manager"].diff self.commands["report"] = self.managers["diff_manager"].diff_report # indexing commands if self.managers.get("index_manager"): self.commands["index"] = self.managers["index_manager"].index if self.managers.get("snapshot_manager"): self.commands["snapshot"] = self.managers["snapshot_manager"].snapshot # data release commands if self.managers.get("release_manager"): self.commands["create_release_note"] = self.managers["release_manager"].create_release_note self.commands["get_release_note"] = CommandDefinition(command=self.managers["release_manager"].get_release_note,tracked=False) self.commands["publish"] = self.managers["release_manager"].publish self.commands["publish_diff"] = self.managers["release_manager"].publish_diff self.commands["publish_snapshot"] = self.managers["release_manager"].publish_snapshot if self.managers.get("sync_manager"): self.commands["sync"] = CommandDefinition(command=self.managers["sync_manager"].sync) # inspector if self.managers.get("inspect_manager"): self.commands["inspect"] = self.managers["inspect_manager"].inspect # data plugins if self.managers.get("assistant_manager"): self.commands["register_url"] = partial(self.managers["assistant_manager"].register_url) self.commands["unregister_url"] = partial(self.managers["assistant_manager"].unregister_url) self.commands["export_plugin"] = partial(self.managers["assistant_manager"].export) if self.managers.get("dataplugin_manager"): self.commands["dump_plugin"] = self.managers["dataplugin_manager"].dump_src logging.info("Registered commands: %s" % list(self.commands.keys())) def configure_extra_commands(self): """ Same as configure_commands() but commands are not exposed publicly in the shell (they are shortcuts or commands for API endpoints, supporting commands, etc...) """ assert self.managers, "No managers configured" self.extra_commands = {} # unordered since not exposed, we don't care loop = self.managers.get("job_manager") and self.managers["job_manager"].loop or asyncio.get_event_loop() self.extra_commands["g"] = CommandDefinition(command=globals(),tracked=False) self.extra_commands["sch"] = CommandDefinition(command=partial(schedule,loop),tracked=False) # expose contant so no need to put quotes (eg. top(pending) instead of top("pending") self.extra_commands["pending"] = CommandDefinition(command=pending,tracked=False) self.extra_commands["loop"] = CommandDefinition(command=loop,tracked=False) if self.managers.get("job_manager"): self.extra_commands["pqueue"] = CommandDefinition(command=self.managers["job_manager"].process_queue,tracked=False) self.extra_commands["tqueue"] = CommandDefinition(command=self.managers["job_manager"].thread_queue,tracked=False) self.extra_commands["jm"] = CommandDefinition(command=self.managers["job_manager"],tracked=False) self.extra_commands["top"] = CommandDefinition(command=self.managers["job_manager"].top,tracked=False) self.extra_commands["job_info"] = CommandDefinition(command=self.managers["job_manager"].job_info,tracked=False) if self.managers.get("source_manager"): self.extra_commands["sm"] = CommandDefinition(command=self.managers["source_manager"],tracked=False) self.extra_commands["sources"] = CommandDefinition(command=self.managers["source_manager"].get_sources,tracked=False) self.extra_commands["source_save_mapping"] = CommandDefinition(command=self.managers["source_manager"].save_mapping) if self.managers.get("dump_manager"): self.extra_commands["dm"] = CommandDefinition(command=self.managers["dump_manager"],tracked=False) self.extra_commands["dump_info"] = CommandDefinition(command=self.managers["dump_manager"].dump_info,tracked=False) if self.managers.get("dataplugin_manager"): self.extra_commands["dpm"] = CommandDefinition(command=self.managers["dataplugin_manager"],tracked=False) if self.managers.get("assistant_manager"): self.extra_commands["am"] = CommandDefinition(command=self.managers["assistant_manager"],tracked=False) if self.managers.get("upload_manager"): self.extra_commands["um"] = CommandDefinition(command=self.managers["upload_manager"],tracked=False) self.extra_commands["upload_info"] = CommandDefinition(command=self.managers["upload_manager"].upload_info,tracked=False) if self.managers.get("build_manager"): self.extra_commands["bm"] = CommandDefinition(command=self.managers["build_manager"],tracked=False) self.extra_commands["builds"] = CommandDefinition(command=self.managers["build_manager"].build_info,tracked=False) self.extra_commands["build"] = CommandDefinition(command=lambda id: self.managers["build_manager"].build_info(id=id),tracked=False) self.extra_commands["build_config_info"] = CommandDefinition(command=self.managers["build_manager"].build_config_info,tracked=False) self.extra_commands["build_save_mapping"] = CommandDefinition(command=self.managers["build_manager"].save_mapping) self.extra_commands["create_build_conf"] = CommandDefinition(command=self.managers["build_manager"].create_build_configuration) self.extra_commands["update_build_conf"] = CommandDefinition(command=self.managers["build_manager"].update_build_configuration) self.extra_commands["delete_build_conf"] = CommandDefinition(command=self.managers["build_manager"].delete_build_configuration) if self.managers.get("diff_manager"): self.extra_commands["dim"] = CommandDefinition(command=self.managers["diff_manager"],tracked=False) self.extra_commands["diff_info"] = CommandDefinition(command=self.managers["diff_manager"].diff_info,tracked=False) self.extra_commands["jsondiff"] = CommandDefinition(command=jsondiff,tracked=False) if self.managers.get("sync_manager"): self.extra_commands["sym"] = CommandDefinition(command=self.managers["sync_manager"],tracked=False) if self.managers.get("index_manager"): self.extra_commands["im"] = CommandDefinition(command=self.managers["index_manager"],tracked=False) self.extra_commands["index_info"] = CommandDefinition(command=self.managers["index_manager"].index_info,tracked=False) self.extra_commands["validate_mapping"] = CommandDefinition(command=self.managers["index_manager"].validate_mapping) if self.managers.get("snapshot_manager"): self.extra_commands["ssm"] = CommandDefinition(command=self.managers["snapshot_manager"],tracked=False) self.extra_commands["snapshot_info"] = CommandDefinition(command=self.managers["snapshot_manager"].snapshot_info,tracked=False) if self.managers.get("release_manager"): self.extra_commands["rm"] = CommandDefinition(command=self.managers["release_manager"],tracked=False) self.extra_commands["release_info"] = CommandDefinition(command=self.managers["release_manager"].release_info,tracked=False) self.extra_commands["reset_synced"] = CommandDefinition(command=self.managers["release_manager"].reset_synced,tracked=True) if self.managers.get("inspect_manager"): self.extra_commands["ism"] = CommandDefinition(command=self.managers["inspect_manager"],tracked=False) if self.managers.get("api_manager"): self.extra_commands["api"] = CommandDefinition(command=self.managers["api_manager"],tracked=False) self.extra_commands["get_apis"] = CommandDefinition(command=self.managers["api_manager"].get_apis,tracked=False) self.extra_commands["delete_api"] = CommandDefinition(command=self.managers["api_manager"].delete_api) self.extra_commands["create_api"] = CommandDefinition(command=self.managers["api_manager"].create_api) self.extra_commands["start_api"] = CommandDefinition(command=self.managers["api_manager"].start_api) self.extra_commands["stop_api"] = self.managers["api_manager"].stop_api logging.debug("Registered extra (private) commands: %s" % list(self.extra_commands.keys())) def configure_api_endpoints(self): cmdnames = list(self.commands.keys()) if self.extra_commands: cmdnames.extend(list(self.extra_commands.keys())) from biothings.hub.api import EndpointDefinition self.api_endpoints = {} self.api_endpoints["config"] = [] if "config" in cmdnames: self.api_endpoints["config"].append(EndpointDefinition(name="config",method="get")) self.api_endpoints["config"].append(EndpointDefinition(name="setconf",method="put",force_bodyargs=True)) self.api_endpoints["config"].append(EndpointDefinition(name="resetconf",method="delete",force_bodyargs=True)) if not self.api_endpoints["config"]: self.api_endpoints.pop("config") if "builds" in cmdnames: self.api_endpoints["builds"] = EndpointDefinition(name="builds",method="get") self.api_endpoints["build"] = [] if "build" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="get",name="build")) if "archive" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="post",name="archive",suffix="archive")) if "rmmerge" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(method="delete",name="rmmerge")) if "merge" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(name="merge",method="put",suffix="new")) if "build_save_mapping" in cmdnames: self.api_endpoints["build"].append(EndpointDefinition(name="build_save_mapping",method="put",suffix="mapping")) if not self.api_endpoints["build"]: self.api_endpoints.pop("build") self.api_endpoints["publish"] = [] if "publish_diff" in cmdnames: self.api_endpoints["publish"].append(EndpointDefinition(name="publish_diff",method="post",suffix="incremental",force_bodyargs=True)) if "publish_snapshot" in cmdnames: self.api_endpoints["publish"].append(EndpointDefinition(name="publish_snapshot",method="post",suffix="full",force_bodyargs=True)) if not self.api_endpoints["publish"]: self.api_endpoints.pop("publish") if "diff" in cmdnames: self.api_endpoints["diff"] = EndpointDefinition(name="diff",method="put",force_bodyargs=True) if "job_info" in cmdnames: self.api_endpoints["job_manager"] = EndpointDefinition(name="job_info",method="get") if "dump_info" in cmdnames: self.api_endpoints["dump_manager"] = EndpointDefinition(name="dump_info", method="get") if "upload_info" in cmdnames: self.api_endpoints["upload_manager"] = EndpointDefinition(name="upload_info",method="get") if "build_config_info" in cmdnames: self.api_endpoints["build_manager"] = EndpointDefinition(name="build_config_info",method="get") if "index_info" in cmdnames: self.api_endpoints["index_manager"] = EndpointDefinition(name="index_info",method="get") if "snapshot_info" in cmdnames: self.api_endpoints["snapshot_manager"] = EndpointDefinition(name="snapshot_info",method="get") if "release_info" in cmdnames: self.api_endpoints["release_manager"] = EndpointDefinition(name="release_info",method="get") if "reset_synced" in cmdnames: self.api_endpoints["release_manager/reset_synced"] = EndpointDefinition(name="reset_synced",method="put") if "diff_info" in cmdnames: self.api_endpoints["diff_manager"] = EndpointDefinition(name="diff_info",method="get") if "commands" in cmdnames: self.api_endpoints["commands"] = EndpointDefinition(name="commands",method="get") if "command" in cmdnames: self.api_endpoints["command"] = EndpointDefinition(name="command",method="get") if "sources" in cmdnames: self.api_endpoints["sources"] = EndpointDefinition(name="sources",method="get") self.api_endpoints["source"] = [] if "source_info" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_info",method="get")) if "source_reset" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_reset",method="post",suffix="reset")) if "dump" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="dump",method="put",suffix="dump")) if "upload" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="upload",method="put",suffix="upload")) if "source_save_mapping" in cmdnames: self.api_endpoints["source"].append(EndpointDefinition(name="source_save_mapping",method="put",suffix="mapping")) if not self.api_endpoints["source"]: self.api_endpoints.pop("source") if "inspect" in cmdnames: self.api_endpoints["inspect"] = EndpointDefinition(name="inspect",method="put",force_bodyargs=True) if "register_url" in cmdnames: self.api_endpoints["dataplugin/register_url"] = EndpointDefinition(name="register_url",method="post",force_bodyargs=True) if "unregister_url" in cmdnames: self.api_endpoints["dataplugin/unregister_url"] = EndpointDefinition(name="unregister_url",method="delete",force_bodyargs=True) self.api_endpoints["dataplugin"] = [] if "dump_plugin" in cmdnames: self.api_endpoints["dataplugin"].append(EndpointDefinition(name="dump_plugin",method="put",suffix="dump")) if "export_plugin" in cmdnames: self.api_endpoints["dataplugin"].append(EndpointDefinition(name="export_plugin",method="put",suffix="export")) if not self.api_endpoints["dataplugin"]: self.api_endpoints.pop("dataplugin") if "jsondiff" in cmdnames: self.api_endpoints["jsondiff"] = EndpointDefinition(name="jsondiff",method="post",force_bodyargs=True) if "validate_mapping" in cmdnames: self.api_endpoints["mapping/validate"] = EndpointDefinition(name="validate_mapping",method="post",force_bodyargs=True) self.api_endpoints["buildconf"] = [] if "create_build_conf" in cmdnames: self.api_endpoints["buildconf"].append(EndpointDefinition(name="create_build_conf",method="post",force_bodyargs=True)) self.api_endpoints["buildconf"].append(EndpointDefinition(name="update_build_conf",method="put",force_bodyargs=True)) if "delete_build_conf" in cmdnames: self.api_endpoints["buildconf"].append(EndpointDefinition(name="delete_build_conf",method="delete",force_bodyargs=True)) if not self.api_endpoints["buildconf"]: self.api_endpoints.pop("buildconf") if "index" in cmdnames: self.api_endpoints["index"] = EndpointDefinition(name="index",method="put",force_bodyargs=True) if "snapshot" in cmdnames: self.api_endpoints["snapshot"] = EndpointDefinition(name="snapshot",method="put",force_bodyargs=True) if "sync" in cmdnames: self.api_endpoints["sync"] = EndpointDefinition(name="sync",method="post",force_bodyargs=True) if "whatsnew" in cmdnames: self.api_endpoints["whatsnew"] = EndpointDefinition(name="whatsnew",method="get") if "status" in cmdnames: self.api_endpoints["status"] = EndpointDefinition(name="status",method="get") self.api_endpoints["release_note"] = [] if "create_release_note" in cmdnames: self.api_endpoints["release_note"].append(EndpointDefinition(name="create_release_note",method="put",suffix="create",force_bodyargs=True)) if "get_release_note" in cmdnames: self.api_endpoints["release_note"].append(EndpointDefinition(name="get_release_note",method="get",force_bodyargs=True)) if not self.api_endpoints["release_note"]: self.api_endpoints.pop("release_note") self.api_endpoints["api"] = [] if "start_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="start_api",method="put",suffix="start")) if "stop_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="stop_api",method="put",suffix="stop")) if "delete_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="delete_api",method="delete",force_bodyargs=True)) if "create_api" in cmdnames: self.api_endpoints["api"].append(EndpointDefinition(name="create_api",method="post",force_bodyargs=True)) if not self.api_endpoints["api"]: self.api_endpoints.pop("api") if "get_apis" in cmdnames: self.api_endpoints["api/list"] = EndpointDefinition(name="get_apis",method="get") if "stop" in cmdnames: self.api_endpoints["stop"] = EndpointDefinition(name="stop",method="put") if "restart" in cmdnames: self.api_endpoints["restart"] = EndpointDefinition(name="restart",method="put")
logging.info("Hub DB backend: %s" % biothings.config.HUB_DB_BACKEND) logging.info("Hub database: %s" % biothings.config.DATA_HUB_DB_DATABASE) from biothings.utils.hub import start_server, HubShell from biothings.utils.manager import JobManager loop = asyncio.get_event_loop() process_queue = concurrent.futures.ProcessPoolExecutor( max_workers=config.HUB_MAX_WORKERS) thread_queue = concurrent.futures.ThreadPoolExecutor() loop.set_default_executor(process_queue) job_manager = JobManager(loop, num_workers=config.HUB_MAX_WORKERS, max_memory_usage=config.HUB_MAX_MEM_USAGE) shell = HubShell(job_manager) import hub.dataload import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.dataload.source as source import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer import biothings.hub.datainspect.inspector as inspector from biothings.hub.api.manager import APIManager from hub.databuild.builder import MyGeneDataBuilder from hub.databuild.mapper import EntrezRetired2Current, Ensembl2Entrez import biothings.utils.mongo as mongo
max_memory_usage=config.HUB_MAX_MEM_USAGE) import hub.dataload from biothings.utils.hub import schedule, pending, done, start_server, \ HubShell import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.dataload.source as source import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer from hub.databuild.builder import MyChemDataBuilder from hub.dataindex.indexer import DrugIndexer shell = HubShell(job_manager) # will check every 10 seconds for sources to upload upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10', job_manager=job_manager) dump_manager = dumper.DumperManager(job_manager=job_manager) sources_path = hub.dataload.__sources_dict__ smanager = source.SourceManager(sources_path, dump_manager, upload_manager) dump_manager.schedule_all() upload_manager.poll( 'upload', lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"]))) build_manager = builder.BuilderManager(builder_class=MyChemDataBuilder, job_manager=job_manager)
import biothings.hub.dataload.uploader as uploader import biothings.hub.dataload.dumper as dumper import biothings.hub.dataload.source as source import biothings.hub.databuild.builder as builder import biothings.hub.databuild.differ as differ import biothings.hub.databuild.syncer as syncer import biothings.hub.dataindex.indexer as indexer import biothings.hub.datainspect.inspector as inspector from biothings.hub.api.manager import APIManager from hub.databuild.builder import MyVariantDataBuilder from hub.databuild.mapper import TagObserved from hub.dataindex.indexer import VariantIndexer from biothings.utils.hub import schedule, pending, done, CompositeCommand, \ start_server, HubShell, CommandDefinition shell = HubShell(job_manager) # will check every 10 seconds for sources to upload upload_manager = uploader.UploaderManager(poll_schedule='* * * * * */10', job_manager=job_manager) dmanager = dumper.DumperManager(job_manager=job_manager) sources_path = hub.dataload.__sources_dict__ #"hub/dataload/sources" smanager = source.SourceManager(sources_path, dmanager, upload_manager) #dmanager.schedule_all() upload_manager.poll( 'upload', lambda doc: shell.launch(partial(upload_manager.upload_src, doc["_id"]))) # deal with 3rdparty datasources import biothings.hub.dataplugin.assistant as assistant