def get_handler(logger, conf): if not Data.is_element(conf): logger.error("Malformed log handler:\n{1}".format(repr(conf))) return type = conf.get("type") if type is None or not isinstance( type, basestring) or type.lower() not in _HANDLERS.keys(): logger.error("Unknown or unsupported handler type: {0}\n{1}".format( type, repr(conf))) return handler = _HANDLERS[type](conf) level = conf.get("level") if level is not None: handler.setLevel(get_level(level)) format = conf.get("format", _DEFAULT_FORMAT) if format is not None: if Data.is_list(format): format = "".join(format.to_native()) handler.setFormatter(logging.Formatter(format)) return handler
def __init__(self, conf): JobManager.__init__(self, "saga", conf) self._file_url = self._conf.get("files_url", "file://") self._remote_path = self._conf.get("remote_path", self._work_path) self._output_path = os.path.join(self._work_path, "output") self._remote_output_path = os.path.join(self._remote_path, "output") self._pe = self._conf.get("pe") self._cpu_count = self._conf.get("cpu_count", 1) self._queue = self._conf.get("queue") self._project = self._conf.get("project") self._working_directory = self._conf.get("working_directory") self._state_check_interval = self._conf.get("state_check_interval", 5) ctx_conf = self._conf.get("context") if ctx_conf is not None and not (Data.is_element(ctx_conf) or Data.is_list(ctx_conf)): raise ConfigTypeError("context", ctx_conf) self._session = None self._job_service = None self._queued_count = 0 self._max_queued = self._conf.get("max_queued", 0) self._running = False self._run_thread = None self._join_thread = None
def __init__(self, rule, base_path=None, platform=None): rule = Data.create(rule) self.on = rule.get("on", {}) if isinstance(self.on, basestring): self.on = dict(task=self.on) if platform is not None: self.on["platform"] = platform self.dels = rule.get("del", default=Data.list) if not Data.is_list(self.dels): raise Exception( "Expected a list of strings for del operations of rule: {}". format(repr(rule))) for k in self.dels: if not isinstance(k, basestring): raise Exception( "Expected a list of strings for del operations of rule: {}" .format(repr(rule))) self.set = rule.get("set", default=Data.list) if not Data.is_list(self.dels): raise Exception( "Expected a list of tuples [key, value] for set operations of rule: {}" .format(repr(rule))) for s in self.set: if not Data.is_list(s) or len(s) != 2: raise Exception( "Expected a list of tuples [key, value] for set operations of rule: {}" .format(repr(rule))) self.merge = rule.get("merge") if isinstance(self.merge, basestring): if not os.path.isabs(self.merge): if base_path is None: raise Exception( "Configuration rule merge path should be absolute path: {}" .format(self.merge)) else: self.merge = os.path.join(base_path, self.merge) if not os.path.isfile(self.merge): raise Exception( "Configuration rule merge path not found: {}".format( self.merge)) self.merge = ConfigLoader(os.path.join(base_path or "", self.merge)).load() if self.merge is not None and not Data.is_element(self.merge): raise Exception( "Expected a dictionary for merge operation of rule: {}".format( repr(rule)))
def single_project(projects_port): log = task.logger conf = task.conf log.info("Single project ...") files = conf["files"] if isinstance(files, basestring): files = [files] elif not Data.is_list(files): raise InternalError("Unexpected configuration type for 'files', only string and list are allowed") files = [str(f) for f in files] #unicode is not json serializable project_id = conf["project.id"] assembly = conf.get("assembly", "hg19") annotations = conf.get("annotations") project = { "id" : project_id, "files" : files, "assembly" : assembly } if annotations is not None and isinstance(annotations, dict): project["annotations"] = annotations make_project(log, conf, project) projects_port.send(project)
def get_annotations_to_save(self, keys, annotations, names=None, values=None): assert (names is None and values is None) or (names is not None and values is not None and len(names) == len(values)) if names is None: names = [] values = [] else: names = [name for name in names] values = [value for value in values] ann_keys = keys if ann_keys is None: ann_keys = [] elif Data.is_list(ann_keys): ann_keys = ann_keys.to_native() else: log = logger.get_logger(__name__) log.warn("Wrong type for 'project.annotations', expecting a list but found:\n{0}".format(repr(ann_keys))) ann_keys = [] for ann_key in ann_keys: default = None if isinstance(ann_key, basestring): key = name = ann_key elif isinstance(ann_key, list) and len(ann_key) == 2: key = ann_key[0] name = ann_key[1] value = annotations[key] if key in annotations else default names += [name] values += [value] return names, values
def __init__(self, name, title = "", desc = "", enabled = True, serializer = None, wsize = None, maxpar = None, conf = None, in_ports = None, out_ports = None, resources = None, params = None): _BasePort.__init__(self, name, title, desc, enabled, serializer, wsize) self.maxpar = maxpar self.conf = conf if in_ports is None: self.in_ports = [] self.in_port_map = {} else: self.in_ports = in_ports for p in in_ports: self.in_port_map[p.name] = p if out_ports is None: self.out_ports = [] self.out_port_map = {} else: self.out_ports = out_ports for p in out_ports: self.out_port_map[p.name] = p if resources is None: resources = Data.element() self.resources = resources if params is None: params = [] self.params = params
def scan_projects(projects_port): log = task.logger conf = task.conf config = GlobalConfig(conf) log.debug("Configuration:") log.debug(config) log.info("Creating combination folders ...") paths = PathsConfig(config) create_combination_folders(conf) if "projects" not in conf or not Data.is_list(conf["projects"]): raise InternalError("Required 'projects' configuration parameter has not been found or it is not well defined") log.info("Initializing projects ...") for project in conf["projects"]: if project is None: continue project = validate_project(log, project) project.expand_vars(conf) log.info("--- [{}] ---------------------------------------------------".format(project["id"])) project = init_project(log, config, paths, task.storage, project) log.info(" assembly: {}, variant_files: {}".format(project["assembly"], len(project["files"]))) projects_port.send(project)
def save_def(self, path=None): if path is None: path = self.project["path"] annotations = self.project.get("annotations") if annotations is None: annotations = {} p = { "id" : self.project["id"], "assembly" : self.project["assembly"], "files" : [os.path.relpath(fpath, path) for fpath in self.project["files"]], "storage_objects" : [o for o in self.project["storage_objects"]], "db" : os.path.relpath(self.project["db"], path), "annotations" : annotations } for key in self.project: if key not in ["id", "assembly", "files", "db", "annotations"]: p[key] = self.project[key] with open(os.path.join(path, "project.conf"), "w") as f: json.dump(p, f, indent=4, sort_keys=True) temp_path = self.project["temp_path"] if os.path.exists(temp_path): # for debuging purposes with open(os.path.join(temp_path, "project.conf"), "w") as f: json.dump(Data.create(self.project).to_native(), f, indent=4, sort_keys=True)
def create_case(self, user, case_name, conf_builder, project_name, flow_name, properties=None, start=True): case = Case( owner_id=user.id, name=case_name, project_name=project_name, flow_name=flow_name, conf=conf_builder.get_conf(), properties=Data.element(properties)) session = db.Session() session.add(case) session.commit() engine_case_name = "{}-{}".format(user.nick, case_name) #while self.engine.exists_case(engine_case_name): # engine_case_name = "{}-{}".format(user.nick, uuid4().hex[-6:]) engine_case = self.engine.create_case(engine_case_name, conf_builder, project_name, flow_name, engine_case_name) case.created = engine_case.created case.engine_name = engine_case_name session.commit() if start: engine_case.start() return case
def _create_platforms(self): """ Creates the platform according to the configuration :return: Platform """ platform_confs = self._conf.get("platforms") if platform_confs is None: platform_confs = Data.list() elif not Data.is_list(platform_confs): self._log.error("Wrong configuration type for 'platforms': {}".format(platform_confs)) platform_confs = Data.list() if len(platform_confs) == 0: platform_confs += [Data.element(dict(type="local"))] platforms = [] names = {} for pidx, platform_conf in enumerate(platform_confs): if isinstance(platform_conf, basestring): if not os.path.isabs(platform_conf) and self._conf_base_path is not None: platform_conf = os.path.join(self._conf_base_path, platform_conf) platform_conf = ConfigLoader(platform_conf).load() if not Data.is_element(platform_conf): raise errors.ConfigTypeError("wok.platforms[{}]".format(pidx, platform_conf)) ptype = platform_conf.get("type", "local") name = platform_conf.get("name", ptype) if name in names: name = "{}-{}".format(name, names[name]) names[name] += 1 else: names[name] = 2 platform_conf["name"] = name if "work_path" not in platform_conf: platform_conf["work_path"] = os.path.join(self._work_path, "platform_{}".format(name)) self._log.info("Creating '{}' platform ...".format(name)) self._log.debug("Platform configuration: {}".format(repr(platform_conf))) platforms += [create_platform(ptype, platform_conf)] return platforms
def _load_project_desc(self, path, base_path=None): if not os.path.isabs(path): if base_path is not None: path = os.path.join(base_path, path) else: path = os.path.abspath(path) if not os.path.exists(path): raise Exception("Project path not found: {}".format(path)) if os.path.isdir(path): path = os.path.join(path, "project.conf") if not os.path.isfile(path): raise Exception("Project configuration not found: {}".format(path)) project = Data.element() project.merge(ConfigLoader(path).load()) base_path = os.path.dirname(path) if "path" not in project: project["path"] = base_path if not os.path.isabs(project["path"]): project["path"] = os.path.normpath( os.path.join(base_path, project["path"])) if "conf" in project and isinstance(project["conf"], basestring): conf_path = os.path.join(base_path, project["conf"]) project["conf"] = ConfigLoader(conf_path).load() if "conf_rules" in project and isinstance(project["conf_rules"], basestring): base_path = os.path.dirname(path) conf_path = os.path.join(base_path, project["conf_rules"]) project["conf_rules"] = ConfigLoader(conf_path).load() if "conf_rules" in project and Data.is_list(project["conf_rules"]): for rule in project["conf_rules"]: if Data.is_element(rule) and "merge" in rule and isinstance( rule["merge"], basestring): rule["merge"] = ConfigLoader( os.path.join(base_path, rule["merge"])).load() return project
def initialize(self): self._log.info("Initializing projects ...") if Data.is_element(self.conf): iter_conf = self._iter_dict(self.conf) elif Data.is_list(self.conf): iter_conf = self._iter_list(self.conf) else: iter_conf = iter([]) for name, pdesc in iter_conf: if isinstance(pdesc, basestring): pdesc = self._load_project_desc(pdesc, self.base_path) self._add_project_desc(pdesc, self.base_path) for name, project in sorted(self._projects.items(), key=lambda x: x[0]): project.initialize()
def to_element(self, e=None): if e is None: e = Data.element() e["ns"] = self.namespace e["name"] = self.name e["cname"] = self.cname return e
def to_element(self, e = None): if e is None: e = Data.element() e["ns"] = self.namespace e["name"] = self.name e["cname"] = self.cname return e
def _plain_conf(self, value, path=None): if path is None: path = [] if not Data.is_element(value): yield (".".join(path), value) else: for key in value.keys(): for k, v in self._plain_conf(value[key], path + [key]): yield (k, v)
def resources(self): if self.parent is None: res = Data.element() else: res = self.parent.resources if self.model.resources is not None: res.merge(self.model.resources) return res
def _load_project_desc(self, path, base_path=None): if not os.path.isabs(path): if base_path is not None: path = os.path.join(base_path, path) else: path = os.path.abspath(path) if not os.path.exists(path): raise Exception("Project path not found: {}".format(path)) if os.path.isdir(path): path = os.path.join(path, "project.conf") if not os.path.isfile(path): raise Exception("Project configuration not found: {}".format(path)) project = Data.element() project.merge(ConfigLoader(path).load()) base_path = os.path.dirname(path) if "path" not in project: project["path"] = base_path if not os.path.isabs(project["path"]): project["path"] = os.path.normpath(os.path.join(base_path, project["path"])) if "conf" in project and isinstance(project["conf"], basestring): conf_path = os.path.join(base_path, project["conf"]) project["conf"] = ConfigLoader(conf_path).load() if "conf_rules" in project and isinstance(project["conf_rules"], basestring): base_path = os.path.dirname(path) conf_path = os.path.join(base_path, project["conf_rules"]) project["conf_rules"] = ConfigLoader(conf_path).load() if "conf_rules" in project and Data.is_list(project["conf_rules"]): for rule in project["conf_rules"]: if Data.is_element(rule) and "merge" in rule and isinstance(rule["merge"], basestring): rule["merge"] = ConfigLoader(os.path.join(base_path, rule["merge"])).load() return project
def _start(self): self._log.debug("Creating session ...") self._session = saga.Session() ctxs_conf = self._conf.get("context") if ctxs_conf is not None: if Data.is_element(ctxs_conf): ctxs_conf = Data.list([ctxs_conf]) for ctx_conf in ctxs_conf: try: ctx = saga.Context(ctx_conf["type"]) for key in ctx_conf: if hasattr(ctx, key): setattr(ctx, key, ctx_conf[key]) self._session.add_context(ctx) except Exception as ex: self._log.error("Wrong context configuration: {}".format(repr(ctx_conf))) self._log.exception(ex) self._log.debug("Creating job service ...") url = self._conf.get("service_url", "fork://localhost", dtype=str) self._job_service = saga.job.Service(url, session=self._session) self._remote_dir = saga.filesystem.Directory(self._file_url, session=self._session) # FIXME Use the logging configuration mechanisms of SAGA from wok import logger logger.init_logger("SGEJobService", conf=Data.element(dict(level=self._conf.get("saga_log.level", "error")))) # TODO count the number of previously queued jobs # TODO clean output files ? self._running = True self._run_thread = threading.Thread(target=self._run_handler, name="{}-run".format(self._name)) self._join_thread = threading.Thread(target=self._join_handler, name="{}-join".format(self._name)) self._run_thread.start() self._join_thread.start()
def _parse_exec(self, xmle): execution = Exec() if "launcher" in xmle.attrib: execution.mode = xmle.attrib["launcher"].lower() if execution.mode == "python": execution.mode = "native" execution.conf = Data.from_xmle(xmle) return execution
def init_logger(logger, conf): """ Initializa a logger from configuration. Configuration can be: - An string referring to the log level - A dictionary with the following parameters: - level: log level - handlers: List of log handlers or just a handler. Each handler have the following parameters: - type - ...: each handler type has a set of parameters Supported handlers: - smtp: Send logs by email. Parameters: - host - port (optional) - user - pass - from - to - subject - level - format: can be a simple string or a list of strings that will be joint with '\n' """ if isinstance(logger, basestring): logger = get_logger(logger) if isinstance(conf, basestring): conf = Data.element(dict(level=conf)) else: conf = Data.create(conf) level = conf.get("level") if level is not None: logger.setLevel(get_level(level)) handlers_conf = conf.get("handlers", default=Data.list) if Data.is_element(handlers_conf): handlers_conf = Data.list([handlers_conf]) for handler_conf in handlers_conf: handler = get_handler(logger, handler_conf) logger.addHandler(handler)
def get_handler(logger, conf): if not Data.is_element(conf): logger.error("Malformed log handler:\n{1}".format(repr(conf))) return type = conf.get("type") if type is None or not isinstance(type, basestring) or type.lower() not in _HANDLERS.keys(): logger.error("Unknown or unsupported handler type: {0}\n{1}".format(type, repr(conf))) return handler = _HANDLERS[type](conf) level = conf.get("level") if level is not None: handler.setLevel(get_level(level)) format = conf.get("format", _DEFAULT_FORMAT) if format is not None: if Data.is_list(format): format = "".join(format.to_native()) handler.setFormatter(logging.Formatter(format)) return handler
def to_element(self, e = None): if e is None: e = Data.element() e["name"] = self.name e["state"] = str(self.state) e["conf"] = self.conf #FIXME self.root_node.update_tasks_count_by_state() self.root_node.update_component_count_by_state() self.root_node.to_element(e.element("root")) return e
def to_element(self, e=None): if e is None: e = Data.element() e["name"] = self.name e["state"] = str(self.state) e["conf"] = self.conf #FIXME self.root_node.update_tasks_count_by_state() self.root_node.update_component_count_by_state() self.root_node.to_element(e.element("root")) return e
def __init__(self, parent, index, id=None, namespace="", state=runstates.READY, substate=None, partition=None): Node.__init__(self, parent, namespace=namespace) self.id = id self.index = index self.state = state self.substate = substate self.partition = partition or Data.element() self.job_id = None self.job_result = None
def _initialize_conf(self): # project defined config self.conf = conf = self.project.get_conf() # user defined config self.user_conf = self.conf_builder.get_conf() if "wok" in self.user_conf: self.user_conf.delete("wok.work_path", "wok.projects", "wok.platforms", "wok.logging") conf.merge(self.user_conf) # runtime config conf[rtconf.CASE_NAME] = self.name conf[rtconf.FLOW] = Data.element(dict( name=self.root_flow.name, uri=self.flow_uri))
def _initialize_conf(self): # project defined config self.conf = conf = self.project.get_conf() # user defined config self.user_conf = self.conf_builder.get_conf() if "wok" in self.user_conf: self.user_conf.delete("wok.work_path", "wok.projects", "wok.platforms", "wok.logging") conf.merge(self.user_conf) # runtime config conf[rtconf.CASE_NAME] = self.name conf[rtconf.FLOW] = Data.element( dict(name=self.root_flow.name, uri=self.flow_uri))
def __init__(self, rule, base_path=None, platform=None): rule = Data.create(rule) self.on = rule.get("on", {}) if isinstance(self.on, basestring): self.on = dict(task=self.on) if platform is not None: self.on["platform"] = platform self.dels = rule.get("del", default=Data.list) if not Data.is_list(self.dels): raise Exception("Expected a list of strings for del operations of rule: {}".format(repr(rule))) for k in self.dels: if not isinstance(k, basestring): raise Exception("Expected a list of strings for del operations of rule: {}".format(repr(rule))) self.set = rule.get("set", default=Data.list) if not Data.is_list(self.dels): raise Exception("Expected a list of tuples [key, value] for set operations of rule: {}".format(repr(rule))) for s in self.set: if not Data.is_list(s) or len(s) != 2: raise Exception("Expected a list of tuples [key, value] for set operations of rule: {}".format(repr(rule))) self.merge = rule.get("merge") if isinstance(self.merge, basestring): if not os.path.isabs(self.merge): if base_path is None: raise Exception("Configuration rule merge path should be absolute path: {}".format(self.merge)) else: self.merge = os.path.join(base_path, self.merge) if not os.path.isfile(self.merge): raise Exception("Configuration rule merge path not found: {}".format(self.merge)) self.merge = ConfigLoader(os.path.join(base_path or "", self.merge)).load() if self.merge is not None and not Data.is_element(self.merge): raise Exception("Expected a dictionary for merge operation of rule: {}".format(repr(rule)))
def _override_component(ovr, src): ovr.name = src.name if src.title is not None: ovr.title = src.title if src.desc is not None: ovr.desc = src.desc if src.enabled is not None: ovr.enabled = src.enabled if src.serializer is not None: ovr.serializer = mode_def.serializer if src.wsize is not None: ovr.wsize = mode_def.wsize if src.conf is not None: if ovr.conf is None: ovr.conf = Data.element() ovr.conf.merge(mode_def.conf) ovr.priority = src.priority ovr.depends = src.depends ovr.flow_ref = src.flow_ref
def __init__(self, name, title="", desc="", enabled=True, serializer=None, wsize=None, maxpar=None, conf=None, in_ports=None, out_ports=None, resources=None, params=None): _BasePort.__init__(self, name, title, desc, enabled, serializer, wsize) self.maxpar = maxpar self.conf = conf if in_ports is None: self.in_ports = [] self.in_port_map = {} else: self.in_ports = in_ports for p in in_ports: self.in_port_map[p.name] = p if out_ports is None: self.out_ports = [] self.out_port_map = {} else: self.out_ports = out_ports for p in out_ports: self.out_port_map[p.name] = p if resources is None: resources = Data.element() self.resources = resources if params is None: params = [] self.params = params
def initialize(conf=None, format=None, datefmt=None, level=None): """ Initialize the logging system. If conf is a dictionary then the parameters considered for configuration are: - format: Logger format - datefmt: Date format - loggers: list of tuples (name, conf) to configure loggers If conf is a list then only the loggers are configured. If conf is an string then the default logger is configured for the logging level. """ global _initialized if conf is None: conf = Data.element() elif not isinstance(conf, basestring): conf = Data.create(conf) if Data.is_list(conf): loggers_conf = conf conf = Data.element() elif Data.is_element(conf): loggers_conf = conf.get("loggers", default=Data.list) elif isinstance(conf, basestring): loggers_conf = Data.list([["", conf]]) conf = Data.element() format = format or conf.get("format", _DEFAULT_FORMAT) datefmt = datefmt or conf.get("datefmt", _DEFAULT_DATEFMT) logging.basicConfig(format=format, datefmt=datefmt) for (log_name, log_conf) in loggers_conf: init_logger(log_name, conf=log_conf) if level is not None: init_logger("", conf=level) _initialized = True
def unmarshall(self, raw): value = json.loads(raw) if self.__enhanced and isinstance(value, (list, dict)): value = Data.create(value) return value
def get_project_conf(conf, project, key, default=None, dtype=None): value = conf.get(key, default=default, dtype=dtype) if not Data.is_element(project): project = Data.create(project) return project.get(key, default=value, dtype=dtype)
def project_conf(self, project_name, platform_name=None): if project_name not in self._projects: return Data.element() project = self._projects[project_name] return project.get_conf(platform_name=platform_name)
def init_project(logger, config, paths, storage, project): project_id = project["id"] results_path = paths.results_path() project_path = paths.project_path(project_id) project_temp_path = paths.project_temp_path(project_path) if config.results.purge_on_start: logger.info(" Purging previous results ...") if os.path.isdir(project_path): logger.info(" {} ...".format(os.path.relpath(project_path, results_path))) shutil.rmtree(project_path) #if os.path.isdir(project_temp_path): # logger.info(" {} ...".format(os.path.relpath(project_temp_path, results_path))) # shutil.rmtree(project_temp_path) for obj_name in storage.list_objects(prefix="results/"): logger.info(" {} ...".format(obj_name)) storage.delete_object("results/{}".format(obj_name)) ensure_path_exists(project_path) ensure_path_exists(project_temp_path) projdb_path = os.path.join(project_path, "project.db") if "annotations" in project: annotations = project["annotations"] if not Data.is_element(annotations): logger.warn("Overriding project annotations field with an empty dictionary") project["annotations"] = annotations = Data.element() else: project["annotations"] = annotations = Data.element() # for backward compatibility for key in project.keys(): if key not in ["id", "assembly", "files", "storage_objects", "annotations", "conf", "oncodriveclust", "oncodrivefm"]: value = project[key] del project[key] annotations[key] = value project["conf"] = pconf = project.get("conf") or Data.element() if not Data.is_element(pconf): logger.warn("Overriding project conf field with an empty dictionary") project["conf"] = pconf = Data.element() # for backward compatibility for key in project.keys(): if key in ["oncodriveclust", "oncodrivefm"]: value = project[key] del project[key] pconf[key] = value project["path"] = project_path project["temp_path"] = project_temp_path project["db"] = projdb_path if "assembly" not in project: project["assembly"] = DEFAULT_ASSEMBLY missing_objects = [] for obj_name in project["storage_objects"]: if not storage.exists_object(obj_name): missing_objects += [obj_name] if len(missing_objects) > 0: raise InternalError("Project {0} references some missing objects:\n{1}".format(project_id, "\n".join(missing_objects))) project["files"] = [str(f) for f in project["files"]] #unicode is not json serializable project["storage_objects"] = [str(f) for f in project["storage_objects"]] #unicode is not json serializable project = project.to_native() # save project.conf projres = ProjectResults(project) projres.save_def() return project
class NativeCommmandBuilder(CommmandBuilder): def _plain_conf(self, value, path=None): if path is None: path = [] if not Data.is_element(value): yield (".".join(path), value) else: for key in value.keys(): for k, v in self._plain_conf(value[key], path + [key]): yield (k, v) def prepare(self, case, task, index): execution = task.execution exec_conf = execution.conf if exec_conf is None: exec_conf = Data.element() if "script_path" not in exec_conf: raise MissingValueError("script_path") script_path = exec_conf["script_path"] lang = exec_conf.get("language", "python") case_conf = case.conf.clone().expand_vars() # Environment variables env = Data.element() #for k, v in os.environ.items(): # env[k] = v env.merge(task.conf.get(rtconf.TASK_ENV)) env.merge(exec_conf.get("env")) # Default module script path platform_project_path = task.conf.get(rtconf.PROJECT_PATH, case.project.path) flow_path = os.path.abspath(os.path.dirname(task.flow_path)) flow_rel_path = os.path.relpath(flow_path, case.project.path) platform_script_path = os.path.join(platform_project_path, flow_rel_path, script_path) env[ENV_PROJECT_PATH] = platform_project_path env[ENV_FLOW_PATH] = flow_rel_path env[ENV_SCRIPT_PATH] = script_path env[ENV_PLATFORM_SCRIPT_PATH] = platform_script_path script = [] sources = task.conf.get(rtconf.TASK_SOURCES, default=Data.list) if isinstance(sources, basestring): sources = Data.list([sources]) for source in sources: script += ['source "{}"'.format(source)] if lang == "python": virtualenv = task.conf.get(rtconf.TASK_PYTHON_VIRTUALENV) if virtualenv is not None: #script += ["set -x"] #script += ["echo Activating virtualenv {} ...".format(virtualenv)] script += [ 'source "{}"'.format( os.path.join(virtualenv, "bin", "activate")) ] #script += ["set +x"] #script += ["echo Running workitem ..."] cmd = [task.conf.get(rtconf.TASK_PYTHON_BIN, "python")] cmd += ["${}".format(ENV_PLATFORM_SCRIPT_PATH)] lib_path = task.conf.get(rtconf.TASK_PYTHON_LIBS) if lib_path is not None: if Data.is_list(lib_path): lib_path = ":".join(lib_path) if "PYTHONPATH" in env: env["PYTHONPATH"] = lib_path + ":" + env["PYTHONPATH"] else: env["PYTHONPATH"] = lib_path else: raise LanguageError(lang) cmd += [ "-D", "case={}".format(case.name), "-D", "task={}".format(task.cname), "-D", "index={}".format(index) ] #for key, value in self._storage_conf(workitem.case.engine.storage.basic_conf): # cmd += ["-D", "storage.{}={}".format(key, value)] for key, value in self._plain_conf( Data.create(task.platform.data.context_conf(CTX_EXEC))): cmd += ["-D", "data.{}={}".format(key, value)] for key, value in self._plain_conf( task.platform.storage.context_conf(CTX_EXEC)): cmd += ["-D", "storage.{}={}".format(key, value)] script += [" ".join(cmd)] return "\n".join(script), env.to_native()
def unmarshall(self, raw): raw = raw.replace(r"\n", "\n") value = pickle.loads(raw) if self.__enhanced and isinstance(value, (list, dict)): value = Data.create(value) return value
return self.engine.exists_case(engine_case_name) or exists_in_db() def create_case(self, user, case_name, conf_builder, project_name, flow_name, properties=None, start=True): case = Case(owner_id=user.id, name=case_name, project_name=project_name, flow_name=flow_name, conf=conf_builder.get_conf(), properties=Data.element(properties)) session = db.Session() session.add(case) session.commit() engine_case_name = "{}-{}".format(user.nick, case_name) #while self.engine.exists_case(engine_case_name): # engine_case_name = "{}-{}".format(user.nick, uuid4().hex[-6:]) engine_case = self.engine.create_case(engine_case_name, conf_builder, project_name, flow_name) case.created = engine_case.created case.engine_name = engine_case_name session.commit()
def prepare(self, case, task, index): execution = task.execution exec_conf = execution.conf if exec_conf is None: exec_conf = Data.element() if "script_path" not in exec_conf: raise MissingValueError("script_path") script_path = exec_conf["script_path"] lang = exec_conf.get("language", "python") case_conf = case.conf.clone().expand_vars() # Environment variables env = Data.element() #for k, v in os.environ.items(): # env[k] = v env.merge(task.conf.get(rtconf.TASK_ENV)) env.merge(exec_conf.get("env")) # Default module script path platform_project_path = task.conf.get(rtconf.PROJECT_PATH, case.project.path) flow_path = os.path.abspath(os.path.dirname(task.flow_path)) flow_rel_path = os.path.relpath(flow_path, case.project.path) platform_script_path = os.path.join(platform_project_path, flow_rel_path, script_path) env[ENV_PROJECT_PATH] = platform_project_path env[ENV_FLOW_PATH] = flow_rel_path env[ENV_SCRIPT_PATH] = script_path env[ENV_PLATFORM_SCRIPT_PATH] = platform_script_path script = [] sources = task.conf.get(rtconf.TASK_SOURCES, default=Data.list) if isinstance(sources, basestring): sources = Data.list([sources]) for source in sources: script += ['source "{}"'.format(source)] if lang == "python": virtualenv = task.conf.get(rtconf.TASK_PYTHON_VIRTUALENV) if virtualenv is not None: #script += ["set -x"] #script += ["echo Activating virtualenv {} ...".format(virtualenv)] script += ['source "{}"'.format(os.path.join(virtualenv, "bin", "activate"))] #script += ["set +x"] #script += ["echo Running workitem ..."] cmd = [task.conf.get(rtconf.TASK_PYTHON_BIN, "python")] cmd += ["${}".format(ENV_PLATFORM_SCRIPT_PATH)] lib_path = task.conf.get(rtconf.TASK_PYTHON_LIBS) if lib_path is not None: if Data.is_list(lib_path): lib_path = ":".join(lib_path) if "PYTHONPATH" in env: env["PYTHONPATH"] = lib_path + ":" + env["PYTHONPATH"] else: env["PYTHONPATH"] = lib_path else: raise LanguageError(lang) cmd += ["-D", "case={}".format(case.name), "-D", "task={}".format(task.cname), "-D", "index={}".format(index)] #for key, value in self._storage_conf(workitem.case.engine.storage.basic_conf): # cmd += ["-D", "storage.{}={}".format(key, value)] for key, value in self._plain_conf(Data.create(task.platform.data.context_conf(CTX_EXEC))): cmd += ["-D", "data.{}={}".format(key, value)] for key, value in self._plain_conf(task.platform.storage.context_conf(CTX_EXEC)): cmd += ["-D", "storage.{}={}".format(key, value)] script += [" ".join(cmd)] return "\n".join(script), env.to_native()
def _partition_task(self, task): """ Partition the input data for a task into work items. It is an iterator of WorkItems. """ # Calculate input sizes and the minimum wsize psizes = [] mwsize = sys.maxint for port in task.in_ports: psize = 0 for data_ref in port.data.refs: port_data = task.platform.data.open_port_data( self.name, data_ref) data_ref.size = port_data.size() psize += data_ref.size port.data.size = psize psizes += [psize] pwsize = port.wsize self._log.debug("[{}] {}: size={}, wsize={}".format( self.name, port.cname, psize, pwsize)) if pwsize < mwsize: mwsize = pwsize if len(psizes) == 0: # Submit a task for the module without input ports information workitem = WorkItemNode(parent=task, index=0, namespace=task.namespace) out_ports = [] for port in task.out_ports: port_data = port.data.partition() out_ports += [dict(name=port.name, data=port_data.to_native())] workitem.partition["ports"] = Data.create({"out": out_ports}) yield workitem else: # Check whether all inputs have the same size psize = psizes[0] for i in xrange(1, len(psizes)): if psizes[i] != psize: psize = -1 break # Partition the data on input ports if psize == -1: num_partitions = 1 self._log.warn( "[{}] Unable to partition a task with input ports of different size" .format(task.cname)) else: if mwsize == 0: num_partitions = 1 self._log.warn("[{}] Empty port, no partitioning".format( task.cname)) else: num_partitions = int(math.ceil(psize / float(mwsize))) maxpar = task.maxpar self._log.debug("[{}] {}: maxpar={}".format( self.name, task.cname, maxpar)) if maxpar > 0 and num_partitions > maxpar: mwsize = int(math.ceil(psize / float(maxpar))) num_partitions = int(math.ceil(psize / float(mwsize))) self._log.debug( "[{}] {}: num_par={}, psize={}, mwsize={}".format( self.name, task.cname, num_partitions, psize, mwsize)) start = 0 for i in xrange(num_partitions): workitem = WorkItemNode(parent=task, index=i, namespace=task.namespace) end = min(start + mwsize, psize) size = end - start in_ports = [] for port in task.in_ports: #workitem.in_port_data.append((port.name, port.data.slice(start, size))) port_data = port.data.slice(start, size) in_ports += [ dict(name=port.name, data=port_data.to_native()) ] out_ports = [] for port in task.out_ports: #workitem.out_port_data.append((port.name, port.data.partition())) port_data = port.data.partition() out_ports += [ dict(name=port.name, data=port_data.to_native()) ] workitem.partition["ports"] = Data.create({ "in": in_ports, "out": out_ports }) self._log.debug( "[{}] {}[{:04d}]: start={}, end={}, size={}".format( self.name, task.cname, i, start, end, size)) start += mwsize yield workitem
def process_result_value(self, value, dialect): return Data.create(json.loads(value)) if value is not None else None
def compute(project): log = task.logger config = GlobalConfig(task.conf) paths = PathsConfig(config) projects_out_port = task.ports("projects_out") project_id = project["id"] log.info("--- [{0}] --------------------------------------------".format(project_id)) ofm = Data.element(project["oncodrivefm"]) feature = ofm["feature"] slice_name = ofm["slice"] estimator = ofm.get("estimator") num_cores = ofm.get("num_cores", dtype=str) num_samplings = ofm.get("num_samplings", dtype=str) samples_threshold = ofm.get("samples_threshold", dtype=str) filter_enabled = ofm.get("filter_enabled", dtype=bool) filter_path = ofm.get("filter_path", dtype=str) log.info("feature = {0}".format(feature)) log.info("slice = {0}".format(slice_name)) log.info("estimator = {0}".format(estimator)) log.info("num_cores = {0}".format(num_cores)) log.info("num_samplings = {0}".format(num_samplings)) log.info("samples_threshold = {0}".format(samples_threshold)) log.info("filter_enabled = {0}".format(filter_enabled)) log.info("filter_path = {0}".format(os.path.basename(filter_path))) cmd = [ "oncodrivefm-compute", "-o", project["temp_path"], "-n oncodrivefm-{0}".format(feature), "-N", num_samplings, "--threshold", samples_threshold, "-e {0}".format(estimator), "-j", num_cores, "--slices '{0}'".format(slice_name)] if filter_enabled: cmd += ["--filter", filter_path] if feature == "pathways": cmd += ["-m", paths.data_kegg_path("ensg_kegg.tsv")] cmd += [ofm["data"]] project["oncodrivefm"] = dict( feature=feature, slice=slice_name, results=os.path.join(project["temp_path"], "oncodrivefm-{0}-{1}.tsv".format(feature, slice_name))) cmd = " ".join(cmd) log.debug(cmd) ret_code = subprocess.call(cmd, shell=True) if ret_code != 0: raise Exception("OncodriveFM error while computing {0}:\n{1}".format(feature, cmd)) projects_out_port.send(project)
def __init__(self): # Get task key and storage configuration cmd_conf = OptionsConfig( required=["case", "task", "index", "data.type", "storage.type"]) # Register signals self._signal_names = {} for signame in [x for x in dir(signal) if x.startswith("SIG")]: try: signum = getattr(signal, signame) signal.signal(signum, self.__signal_handler) self._signal_names[signum] = signame except: pass # command line configuration case_name = cmd_conf["case"] task_cname = cmd_conf["task"] workitem_index = cmd_conf["index"] # initialize the data provider provider_conf = cmd_conf["data"] self._provider = data_provider_factory.create(provider_conf) self._provider.start() # initialize storage storage_conf = cmd_conf["storage"] self.storage = storage_factory.create(storage_conf) self.storage = self.storage.get_container(case_name) # load the module and task descriptors task_desc = self._provider.load_task(case_name, task_cname) workitem_desc = self._provider.load_workitem(case_name, task_cname, workitem_index) partition = workitem_desc["partition"] # setup task configuration self.conf = Data.create(task_desc["conf"]) self.conf["__task_index"] = workitem_index self.conf.expand_vars() # setup task attributes self.case = workitem_desc["case"] self.task = workitem_desc["task"] self.id = workitem_desc["cname"] self.name = workitem_desc["name"] self.index = workitem_index # initialize decorators self._main = None self._sources = [] self._foreach = None self._begin = None self._end = None self._start_time = 0 self._end_time = self._start_time # intialize task logging log_conf = self.conf.get("logging") logger.initialize(log_conf) self.logger = logger.get_logger(self.name) self.logger.debug("Task descriptor: {}".format(Data.create(task_desc))) self.logger.debug("WorkItem descriptor: {}".format( Data.create(workitem_desc))) # Initialize input stream self._stream = Stream(self._provider, task_desc["stream"]) # Initialize ports self._ports = {} self._in_ports = [] self._out_ports = [] self._open_ports = {} if "ports" in task_desc and "ports" in partition: port_descriptors = Data.create(task_desc["ports"]) for port_desc in port_descriptors.get("in", default=list): port_desc["mode"] = PORT_MODE_IN self._ports[port_desc["name"]] = port_desc self._in_ports += [port_desc] for port_desc in port_descriptors.get("out", default=list): port_desc["mode"] = PORT_MODE_OUT self._ports[port_desc["name"]] = port_desc self._out_ports += [port_desc] port_descriptors = Data.create(partition["ports"]) for port_desc in port_descriptors.get("in", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] for port_desc in port_descriptors.get("out", default=list): task_port_desc = self._ports[port_desc["name"]] task_port_desc["data"] = port_desc["data"] # Get hostname try: import socket self.hostname = socket.gethostname() except: self.hostname = "unknown" # The context field is free to be used by the task user to # save variables related with the whole task life cycle. # By default it is initialized with a dictionary but can be # overwrote with any value by the user. Wok will never use it. self.context = {}
def _parse_conf(self, xmle): return Data.from_xmle(xmle)