Exemplo n.º 1
0
    def req_params(cls, inst, _exclude=None, _prefer_cli=None, **kwargs):
        # common/intersection params
        params = luigi.util.common_params(inst, cls)

        # determine parameters to exclude
        _exclude = set() if _exclude is None else set(make_list(_exclude))

        # also use this class' req and req_get sets
        # and the req and req_set sets of the instance's class
        _exclude.update(cls.exclude_params_req, cls.exclude_params_req_get)
        _exclude.update(inst.exclude_params_req, inst.exclude_params_req_set)

        # remove excluded parameters
        for name in list(params.keys()):
            if multi_match(name, _exclude, any):
                del params[name]

        # add kwargs
        params.update(kwargs)

        # remove params that are preferably set via cli class arguments
        if _prefer_cli:
            cls_args = []
            prefix = cls.get_task_family() + "_"
            if luigi.cmdline_parser.CmdlineParser.get_instance():
                for key in global_cmdline_values().keys():
                    if key.startswith(prefix):
                        cls_args.append(key[len(prefix):])
            for name in make_list(_prefer_cli):
                if name in params and name in cls_args:
                    del params[name]

        return params
Exemplo n.º 2
0
Arquivo: base.py Projeto: riga/law
    def _initialize_sandbox(self, force=False):
        if self._sandbox_initialized and not force:
            return
        self._sandbox_initialized = True

        # when we are already in a sandbox, this task is placed inside it, i.e., there is no nesting
        if _sandbox_switched:
            self._effective_sandbox = _current_sandbox[0]

        # when the sandbox is set via a parameter and not hard-coded,
        # check if the value is among the valid sandboxes, otherwise determine the fallback
        elif isinstance(self.__class__.sandbox, luigi.Parameter):
            if multi_match(self.sandbox, self.valid_sandboxes, mode=any):
                self._effective_sandbox = self.sandbox
            else:
                self._effective_sandbox = self.fallback_sandbox(self.sandbox)

        # just set the effective sandbox
        else:
            self._effective_sandbox = self.sandbox

        # at this point, the sandbox must be set unless it is explicitely allowed to be empty
        if self._effective_sandbox in (None, NO_STR):
            if not self.allow_empty_sandbox:
                raise Exception("task {!r} requires the sandbox parameter to be set".format(self))
            self._effective_sandbox = NO_STR

        # create the sandbox proxy when required
        if not self.is_sandboxed():
            self._sandbox_inst = Sandbox.new(self._effective_sandbox, self)
            self._sandbox_proxy = SandboxProxy(task=self)
            logger.debug("created sandbox proxy instance of type '{}'".format(
                self._effective_sandbox))
Exemplo n.º 3
0
Arquivo: base.py Projeto: silky/law
    def req_params(cls, inst, _exclude=None, **kwargs):
        # common/intersection params
        params = luigi.util.common_params(inst, cls)

        # determine parameters to exclude
        if _exclude is None:
            _exclude = set()
        elif isinstance(_exclude, (list, tuple)):
            _exclude = set(_exclude)
        elif not isinstance(_exclude, set):
            _exclude = {_exclude}
        # also use this class' req and req_receive sets
        # and the req and req_transfer sets of the instance's class
        _exclude.update(cls.exclude_params_req, cls.exclude_params_req_receive)
        _exclude.update(inst.exclude_params_req,
                        inst.exclude_params_req_transfer)
        # remove excluded parameters
        for name in list(params.keys()):
            if multi_match(name, _exclude, any):
                del params[name]

        # add kwargs
        params.update(kwargs)

        return params
Exemplo n.º 4
0
    def deregister(cls, task_cls=None):
        """
        Removes a task class *task_cls* from the luigi task register. When *None*, *this* class is
        used. Task family strings and patterns are accepted as well. *True* is returned when at
        least one class was successfully removed, and *False* otherwise.
        """
        # always compare task families
        if task_cls is None:
            task_family = cls.get_task_family()
        elif isinstance(task_cls, six.string_types):
            task_family = task_cls
        else:
            task_family = task_cls.get_task_family()

        success = False

        # remove from the register
        i = -1
        while True:
            i += 1
            if i >= len(Register._reg):
                break
            registered_cls = Register._reg[i]

            if multi_match(registered_cls.get_task_family(), task_family, mode=any):
                Register._reg.pop(i)
                i -= 1
                success = True
                logger.debug("removed task class {} from register".format(registered_cls))

        return success
Exemplo n.º 5
0
    def __init__(self, *args, **kwargs):
        super(SandboxTask, self).__init__(*args, **kwargs)

        # check if the task execution must be sandboxed
        if _sandbox_switched:
            self.effective_sandbox = _current_sandbox[0]
        else:
            # is the switch forced?
            if self.force_sandbox:
                self.effective_sandbox = self.sandbox

            # can we run in the requested sandbox?
            elif multi_match(self.sandbox, self.valid_sandboxes, any):
                self.effective_sandbox = self.sandbox

            # we have to fallback
            else:
                self.effective_sandbox = self.fallback_sandbox(self.sandbox)
                if self.effective_sandbox is None:
                    raise Exception("cannot determine fallback sandbox for {} in task {}".format(
                        self.sandbox, self))

        if not self.sandboxed:
            self.sandbox_inst = Sandbox.new(self.effective_sandbox, self)
            self.sandbox_proxy = SandboxProxy(task=self)
            logger.debug("created sandbox proxy instance of type '{}'".format(
                self.effective_sandbox))
        else:
            self.sandbox_inst = None
            self.sandbox_proxy = None
Exemplo n.º 6
0
 def get_task_params(cls):
     params = []
     for attr in dir(cls):
         member = getattr(cls, attr)
         if isinstance(member, luigi.Parameter):
             exclude = getattr(cls, "exclude_params_index", set())
             if not multi_match(attr, exclude, any):
                 params.append(attr.replace("_", "-"))
     return params
Exemplo n.º 7
0
    def before_call():
        if not isinstance(task, SandboxTask):
            raise TypeError("require_sandbox can only be used to decorate methods of tasks that "
                "inherit from SandboxTask, got '{!r}'".format(task))

        if not task.is_sandboxed():
            raise Exception("the invocation of method {} requires task {!r} to be sandboxed".format(
                fn.__name__, task))

        if opts["sandbox"] and not multi_match(task.effective_sandbox, make_list(opts["sandbox"])):
            raise Exception("the invocation of method {} requires the sandbox of task {!r} to "
                "match '{}'" .format(fn.__name__, task, opts["sandbox"]))

        return None
Exemplo n.º 8
0
    def _repr_params(self, all_params=False):
        # build key value pairs of all significant parameters
        params = self.get_params()
        param_values = self.get_param_values(params, [], self.param_kwargs)
        param_objs = dict(params)

        pairs = []
        for param_name, param_value in param_values:
            if param_objs[param_name].significant and \
                    (all_params or not multi_match(param_name, self.exclude_params_repr)):
                pairs.append((param_name,
                              param_objs[param_name].serialize(param_value)))

        return pairs
Exemplo n.º 9
0
    def _repr_params(self, all_params=False):
        # determine parameters to exclude
        exclude = set()
        if not all_params:
            exclude |= self.exclude_params_repr
            exclude |= set(self.interactive_params)

        # build a map "name -> value" for all significant parameters
        params = OrderedDict()
        for name, param in self.get_params():
            if param.significant and not multi_match(name, exclude):
                params[name] = getattr(self, name)

        return params
Exemplo n.º 10
0
    def cli_args(self, exclude=None, replace=None):
        exclude = set() if exclude is None else set(make_list(exclude))
        if replace is None:
            replace = {}

        args = OrderedDict()
        for name, param in self.get_params():
            if multi_match(name, exclude, any):
                continue
            raw = replace.get(name, getattr(self, name))
            val = param.serialize(raw)
            args["--" + name.replace("_", "-")] = str(val)

        return args
Exemplo n.º 11
0
    def cli_args(self, exclude=None, replace=None):
        exclude = set() if exclude is None else set(make_list(exclude))
        if replace is None:
            replace = {}

        args = []
        for name, param in self.get_params():
            if multi_match(name, exclude, any):
                continue
            raw = replace.get(name, getattr(self, name))
            val = param.serialize(raw)
            arg = "--{}".format(name.replace("_", "-"))
            # TODO: why does quote_cmd([val]) fail while str(val) doesn't
            args.extend([arg, str(val)])

        return args
Exemplo n.º 12
0
    def _repr_params(self, all_params=False):
        # build key value pairs of all significant parameters
        params = self.get_params()

        exclude = set()
        if not all_params:
            exclude |= self.exclude_params_repr
            exclude |= self.inst_exclude_params_repr()
            exclude |= set(self.interactive_params)

        pairs = []
        for name, param in params:
            if param.significant and not multi_match(name, exclude):
                value = getattr(self, name)
                pairs.append((name, param.serialize(value)))

        return pairs
Exemplo n.º 13
0
Arquivo: db.py Projeto: silky/law
    def dbline(cls, default_namespace=None):
        # determine parameters
        params = ["workers", "local-scheduler", "help"]
        for attr in dir(cls):
            member = getattr(cls, attr)
            if isinstance(member, luigi.Parameter):
                exclude = getattr(cls, "exclude_params_db", set())
                if not multi_match(attr, exclude, any):
                    params.append(attr.replace("_", "-"))

        # use a truncated task family when a default namespace is set
        if default_namespace is None:
            family = cls.task_family
        else:
            family = cls.task_family[(len(default_namespace) + 1):]

        return cls.__module__ + ":" + family + ":" + " ".join(params)
Exemplo n.º 14
0
    def cli_args(self, exclude=None, replace=None):
        if exclude is None:
            exclude = set()
        if replace is None:
            replace = {}

        # always exclude interactive parameters
        exclude |= set(self.interactive_params)

        args = []
        for name, param in self.get_params():
            if multi_match(name, exclude, any):
                continue
            raw = replace.get(name, getattr(self, name))
            val = param.serialize(raw)
            arg = "--{}".format(name.replace("_", "-"))
            args.extend([arg, quote_cmd([val])])

        return args
Exemplo n.º 15
0
    def cli_args(self, exclude=None, replace=None):
        if exclude is None:
            exclude = set()
        if replace is None:
            replace = {}

        args = []
        for name, param in self.get_params():
            if multi_match(name, exclude, any):
                continue
            raw = replace.get(name, getattr(self, name))
            val = param.serialize(raw)
            arg = "--{}".format(name.replace("_", "-"))
            if isinstance(param, luigi.BoolParameter):
                if raw:
                    args.extend([arg])
            elif isinstance(param, (luigi.IntParameter, luigi.FloatParameter)):
                args.extend([arg, str(val)])
            else:
                args.extend([arg, "\"{}\"".format(val)])

        return args
Exemplo n.º 16
0
def replace_console_handlers(loggers=("luigi", "luigi.*", "luigi-*", "law",
                                      "law.*"),
                             level=None,
                             force_add=False,
                             check_fn=None):
    """
    Removes all tty stream handlers (i.e. those logging to *stdout* or *stderr*) from certain
    *loggers* and adds a ``rich.logging.RichHandler`` with a specified *level*. *loggers* can either
    be logger instances or names. In the latter case, the names are used as patterns to identify
    matching loggers. Unless *force_add* is *True*, no new handler is added when no tty stream
    handler was previously registered.

    *check_fn* can be a function with two arguments, a logger instance and a handler instance, that
    should return *True* if that handler should be removed. When *None*, all handlers inheriting
    from the basic ``logging.StreamHandler`` are removed if their *stream* attibute referes to a
    tty stream. When *level* is *None*, it defaults to the log level of the first removed handler.
    In case no default level can be determined, *INFO* is used.

    The removed handlers are returned in a list of 2-tuples (*logger*, *removed_handlers*).
    """
    from rich import logging as rich_logging

    # prepare the return value
    ret = []

    # default check_fn
    if check_fn is None:
        check_fn = lambda logger, handler: is_tty_handler(handler)

    loggers = make_list(loggers)
    for name, logger in logging.root.manager.loggerDict.items():
        # check if the logger is selected
        for l in loggers:
            if logger == l:
                break
            elif isinstance(l, six.string_types) and multi_match(name, l):
                break
        else:
            # when this point is reached, the logger was not selected
            continue

        removed_handlers = []
        handlers = getattr(logger, "handlers", [])
        for handler in handlers:
            if check_fn(logger, handler):
                # get the level
                if level is None:
                    level = getattr(handler, "level", None)

                # remove it
                logger.removeHandler(handler)
                removed_handlers.append(handler)

        # when at least one handler was found and removed, or force_add is True, add a rich handler
        if removed_handlers or force_add:
            # make sure the level is set
            if level is None:
                level = logging.INFO

            # add the rich handler
            logger.addHandler(rich_logging.RichHandler(level))

        # add the removed handlers to the returned list
        if removed_handlers:
            ret.append((logger, removed_handlers))

    return ret
Exemplo n.º 17
0
 def scheduler_on_host(self):
     config = luigi.interface.core()
     return multi_match(config.scheduler_host,
                        ["0.0.0.0", "127.0.0.1", "localhost"])
Exemplo n.º 18
0
def replace_console_handlers(loggers=("luigi", "luigi.*", "luigi-*", "law",
                                      "law.*"),
                             level=None,
                             force_add=False,
                             check_fn=None,
                             **kwargs):
    """
    Removes all tty stream handlers (i.e. those logging to *stdout* or *stderr*) from certain
    *loggers* and adds a new ``rich.logging.RichHandler`` instance with a specified *level* and all
    *kwargs* passed as additional options to its constructor. *loggers* can either be logger
    instances or names. In the latter case, the names are used as patterns to identify matching
    loggers. Unless *force_add* is *True*, no new handler is added when no tty stream handler was
    previously registered.

    *check_fn* can be a function with two arguments, a logger instance and a handler instance, that
    should return *True* if that handler should be removed. When *None*, all handlers inheriting
    from the basic ``logging.StreamHandler`` are removed if their *stream* attibute referes to a
    tty stream. When *level* is *None*, it defaults to the log level of the first removed handler.
    In case no default level can be determined, *INFO* is used.

    The removed handlers are returned in a list of 2-tuples (*logger*, *removed_handlers*).
    """
    from rich import logging as rich_logging

    # prepare the return value
    ret = []

    # default check_fn
    if check_fn is None:
        check_fn = lambda logger, handler: is_tty_handler(handler)

    loggers = make_list(loggers)
    for name, logger in logging.root.manager.loggerDict.items():
        # check if the logger is selected
        for l in loggers:
            if logger == l:
                break
            elif isinstance(l, six.string_types) and multi_match(name, l):
                break
        else:
            # when this point is reached, the logger was not selected
            continue

        removed_handlers = []
        handlers = getattr(logger, "handlers", [])
        for handler in handlers:
            if check_fn(logger, handler):
                # get the level
                if level is None:
                    level = getattr(handler, "level", None)

                # remove it
                logger.removeHandler(handler)
                removed_handlers.append(handler)

        # when at least one handler was found and removed, or force_add is True, add a rich handler
        if removed_handlers or force_add:
            # make sure the level is set
            if level is None:
                level = logging.INFO

            # add the rich handler
            logger.addHandler(rich_logging.RichHandler(level, **kwargs))

            # emit warning for colored_* configs
            cfg = Config.instance()
            opts = [(s, o) for s in ["task", "target"]
                    for o in ["colored_str", "colored_repr"]]
            if any(cfg.get_expanded_boolean(*opt) for opt in opts):
                logger.warning_once(
                    "interfering_colors_in_rich_handler",
                    "law is currently configured to colorize string representations of tasks and "
                    "targets which might lead to malformed logs of the RichHandler; to avoid this, "
                    "consider updating your law configuration file ({}) to:\n"
                    "[task]\ncolored_repr: False\ncolored_str: False\n\n"
                    "[target]\ncolored_repr: False\ncolored_str: False".format(
                        cfg.config_file),
                )

        # add the removed handlers to the returned list
        if removed_handlers:
            ret.append((logger, removed_handlers))

    return ret