Пример #1
0
def test_initialize():
    with nt.assert_raises(SystemExit):
        api.initialize(['-h'])
    api.initialize(['--configuration_backend', 'json', '--tasks_json', 'a'])
    tj = stolos.get_NS().tasks_json
    api.initialize(['--configuration_backend', 'json', '--tasks_json', 'b'])
    tj2 = stolos.get_NS().tasks_json
    nt.assert_equal(tj, 'a')
    nt.assert_equal(tj2, 'b')
    nt.assert_false(hasattr(stolos, 'Uninitialized'))
Пример #2
0
def test_initialize():
    with nt.assert_raises(SystemExit):
        api.initialize(['-h'])
    api.initialize(['--configuration_backend', 'json', '--tasks_json', 'a'])
    tj = stolos.get_NS().tasks_json
    api.initialize(['--configuration_backend', 'json', '--tasks_json', 'b'])
    tj2 = stolos.get_NS().tasks_json
    nt.assert_equal(tj, 'a')
    nt.assert_equal(tj2, 'b')
    nt.assert_false(hasattr(stolos, 'Uninitialized'))
Пример #3
0
def get_tasks_config():
    """
    Returns object to read Stolos application config from your chosen
    configuration backend.
    """
    ns = stolos.get_NS()
    return ns.configuration_backend()
Пример #4
0
def get_tasks_config():
    """
    Returns object to read Stolos application config from your chosen
    configuration backend.
    """
    ns = stolos.get_NS()
    return ns.configuration_backend()
Пример #5
0
def get_children(app_name, job_id, include_dependency_group=True):
    dg = build_dag()
    child_apps = [(k, vv) for k, v in dg.succ[app_name].items() for vv in v]
    for child, group_name in child_apps:
        depends_on = dg.node[child]['depends_on']
        # 2 types of depends_on definitions:
        # 1) dict with app_name
        # 2) named dependency groups:
        #     2a) dict without app_name that defines a list of dicts (AND)
        #     2b) dict without app_name that defines a single dict (OR)
        if group_name != get_NS().dependency_group_default_name:
            depends_on = depends_on[group_name]

        depends_on = convert_dep_grp_to_parsed_list(app_name, depends_on)

        kwargs = dict(func=_generate_job_ids,
                      kwarg_name='depends_on',
                      list_or_value=depends_on,
                      app_name=app_name,
                      job_id=job_id,
                      child=child,
                      group_name=group_name)
        for rv in flatmap_with_kwargs(**kwargs):
            if include_dependency_group:
                yield rv + (group_name, )
            else:
                yield rv
Пример #6
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func, "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component",
            extra=dict(key=k, value=v, **ld),
            exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(k not in tasks_conf[parent].get(
                    'autofill_values', {}),
                              msg,
                              extra=dict(parent_app_name=parent, key=k, **ld),
                              exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise(
                    ("Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                     " The job_id_validation function complained that the"
                     " value was invalid. Error details: %s") % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(key='%s.%s' % (k, v),
                           value=vv,
                           job_id_validation=func,
                           **ld),
                exception_kls=DAGMisconfigured)
Пример #7
0
def _validate_job_id_identifiers(app_name,
                                 vals,
                                 validations=None,
                                 **_log_details):
    if validations is None:
        validations = get_NS().job_id_validations
    _, template = get_job_id_template(app_name)
    ld = dict(app_name=app_name, job_id_template=template)
    ld.update(_log_details)
    rv = {}
    for key, _val in zip(template, vals):
        # validate the job_id
        try:
            val = validations[key](_val)
            assert val is not None, "validation func returned None"
            assert val is not False, "validation func returned False"
        except KeyError:
            val = _val
            log.warn("No job_id validation for key.  You should implement one",
                     extra=dict(job_id_key=key, **ld))
        except Exception as err:
            val = _val
            msg = "An identifier in a job_id failed validation"
            log.exception(msg,
                          extra=dict(job_id_identifier=key,
                                     bad_value=_val,
                                     error_details=err,
                                     **ld))
            raise InvalidJobId("%s err: %s" % (msg, err))
        rv[key] = val
    return rv
Пример #8
0
def parse_job_id(app_name, job_id, delimiter=None):
    """Convert given `job_id` into a dict

    `app_name` (str) identifies a task
    `job_id` (str) identifies an instance of a task (ie a subtask)
    `validations` (dict) functions to ensure parts of the job_id are
                         properly typed
    `job_id_template` (str) identifies which validations to apply
    `delimiter` (str) value to split job_id into different components

    ie:
        20140506_876_profile -->

        {'date': 20140506, 'client_id': 876, 'collection_name': 'profile'}

    Returned values are cast into the appropriate type by the validations funcs

    """
    if delimiter is None:
        delimiter = get_NS().job_id_delimiter
    template, ptemplate = get_job_id_template(app_name)
    vals = job_id.split(delimiter, len(ptemplate) - 1)
    ld = dict(job_id=job_id, app_name=app_name, job_id_template=template)
    if len(vals) != len(ptemplate):
        _log_raise(("Job_id isn't properly delimited.  You might have too few"
                    " or too many underscores."),
                   extra=ld,
                   exception_kls=InvalidJobId)
    return _validate_job_id_identifiers(app_name, vals)
Пример #9
0
def get_job_id_template(app_name, template=None):
    if template is None:
        template = get_NS().job_id_default_template
    dg = cb.get_tasks_config()
    template = dg[app_name].get('job_id', template)
    parsed_template = re.findall(r'{(.*?)}', template)
    return (template, parsed_template)
Пример #10
0
def _build_dict_deps(dg, app_name, deps):
    """Build edges between dependent nodes by looking at listed dependencies

    `dg` (nx.MultiDiGraph instance) - the Tasks configuration as a graph
    `app_name` (str) - the name of a scheduled application
    `deps` (obj) - the dependencies for given `app_name`.  Should be a subclass
        of cb.TasksConfigBaseMapping, and is the value of given app_name's
        "depends_on" field
    """
    log_details = dict(app_name=app_name, key='depends_on', deps=dict(deps))
    if isinstance(deps, cb.TasksConfigBaseMapping) and "app_name" in deps:
        _add_edges(
            dg, app_name=app_name,
            dep_name=get_NS().dependency_group_default_name,
            dep_grp=deps, log_details=log_details)
    else:
        for dep_name, dep_data in deps.items():
            if isinstance(dep_data, cb.TasksConfigBaseMapping):
                _add_edges(
                    dg=dg, app_name=app_name, dep_name=dep_name,
                    dep_grp=dep_data, log_details=log_details)
            elif isinstance(dep_data, cb.TasksConfigBaseSequence):
                for _dep_grp in dep_data:
                    _add_edges(
                        dg=dg, app_name=app_name, dep_name=dep_name,
                        dep_grp=_dep_grp, log_details=log_details)
            else:
                _log_raise(
                    "Unrecognized dependency.  Expected a list or dict",
                    dict(dep_name=dep_name, dep_data=dep_data, **log_details),
                    exception_kls=DAGMisconfigured)
Пример #11
0
def get_children(app_name, job_id, include_dependency_group=True):
    dg = build_dag()
    child_apps = [(k, vv) for k, v in dg.succ[app_name].items() for vv in v]
    for child, group_name in child_apps:
        depends_on = dg.node[child]['depends_on']
        # 2 types of depends_on definitions:
        # 1) dict with app_name
        # 2) named dependency groups:
        #     2a) dict without app_name that defines a list of dicts (AND)
        #     2b) dict without app_name that defines a single dict (OR)
        if group_name != get_NS().dependency_group_default_name:
            depends_on = depends_on[group_name]

        depends_on = convert_dep_grp_to_parsed_list(app_name, depends_on)

        kwargs = dict(
            func=_generate_job_ids,
            kwarg_name='depends_on', list_or_value=depends_on,
            app_name=app_name, job_id=job_id, child=child,
            group_name=group_name)
        for rv in flatmap_with_kwargs(**kwargs):
            if include_dependency_group:
                yield rv + (group_name, )
            else:
                yield rv
Пример #12
0
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld):
    if "depends_on" not in metadata:
        return

    _log_raise_if(
        not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping),
        ("Configuration Error: Task's value at the depends_on key"
         " must subclass cb.TasksConfigBaseMapping"),
        extra=dict(key="depends_on",
                   received_value_type=type(metadata["depends_on"]),
                   **ld),
        exception_kls=DAGMisconfigured)
    # depends_on  - are we specifying only one unnamed dependency group?
    if "app_name" in metadata["depends_on"]:
        _validate_dep_grp_metadata(
            dep_grp=metadata['depends_on'],
            ld=ld, tasks_conf=tasks_conf,
            dep_name=get_NS().dependency_group_default_name)
    # depends_on  - are we specifying specific dependency_groups?
    else:
        _validate_dependency_groups(tasks_conf, metadata, ld)
        # depends_on  -  are dependent tasks listed properly?
        for parent in dg.pred[app_name1]:
            _log_raise_if(
                parent not in tasks_conf,
                "Task defines an unrecognized parent dependency",
                extra=dict(parent_app_name=parent, **ld),
                exception_kls=DAGMisconfigured)
Пример #13
0
def validate_depends_on(app_name1, metadata, dg, tasks_conf, ld):
    if "depends_on" not in metadata:
        return

    _log_raise_if(
        not isinstance(metadata["depends_on"], cb.TasksConfigBaseMapping),
        ("Configuration Error: Task's value at the depends_on key"
         " must subclass cb.TasksConfigBaseMapping"),
        extra=dict(key="depends_on",
                   received_value_type=type(metadata["depends_on"]),
                   **ld),
        exception_kls=DAGMisconfigured)
    # depends_on  - are we specifying only one unnamed dependency group?
    if "app_name" in metadata["depends_on"]:
        _validate_dep_grp_metadata(
            dep_grp=metadata['depends_on'],
            ld=ld,
            tasks_conf=tasks_conf,
            dep_name=get_NS().dependency_group_default_name)
    # depends_on  - are we specifying specific dependency_groups?
    else:
        _validate_dependency_groups(tasks_conf, metadata, ld)
        # depends_on  -  are dependent tasks listed properly?
        for parent in dg.pred[app_name1]:
            _log_raise_if(parent not in tasks_conf,
                          "Task defines an unrecognized parent dependency",
                          extra=dict(parent_app_name=parent, **ld),
                          exception_kls=DAGMisconfigured)
Пример #14
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)")
                    % (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Пример #15
0
def validate_if_or(app_name1, metadata, dg, tasks_conf, ld):
    # valid_if_or  -  are we specifying what makes a job valid correctly?
    if 'valid_if_or' not in metadata:
        return

    for k, v in metadata['valid_if_or'].items():
        if k == '_func':
            continue
        location = "%s.valid_if_or.%s" % (app_name1, k)
        _log_raise_if(
            not isinstance(v, cb.TasksConfigBaseSequence),
            "Task is misconfigured.  Wrong value type. Expected a sequence",
            extra=dict(wrong_value_type=type(v), key=location, **ld),
            exception_kls=DAGMisconfigured)
        templ = node.get_job_id_template(app_name1)[1]
        _log_raise_if(
            k not in templ,
            "valid_if_or contains a key that isn't in its job_id template",
            extra=dict(key=k, job_id_template=templ, **ld),
            exception_kls=DAGMisconfigured)
        try:
            validation_func = get_NS().job_id_validations[k]
        except KeyError:
            continue
        for vv in v:
            try:
                validation_func(vv)
            except Exception as err:
                _log_raise(
                    ("valid_if_or contains a value that wasn't validated"
                     " by your job_id_validations. err: %s(%s)") %
                    (err.__class__, err),
                    extra=dict(key=location, wrong_value_type=type(vv), **ld),
                    exception_kls=DAGMisconfigured)
Пример #16
0
def get_job_id_template(app_name, template=None):
    if template is None:
        template = get_NS().job_id_default_template
    dg = cb.get_tasks_config()
    template = dg[app_name].get("job_id", template)
    parsed_template = re.findall(r"{(.*?)}", template)
    return (template, parsed_template)
Пример #17
0
def parse_job_id(app_name, job_id, delimiter=None):
    """Convert given `job_id` into a dict

    `app_name` (str) identifies a task
    `job_id` (str) identifies an instance of a task (ie a subtask)
    `validations` (dict) functions to ensure parts of the job_id are
                         properly typed
    `job_id_template` (str) identifies which validations to apply
    `delimiter` (str) value to split job_id into different components

    ie:
        20140506_876_profile -->

        {'date': 20140506, 'client_id': 876, 'collection_name': 'profile'}

    Returned values are cast into the appropriate type by the validations funcs

    """
    if delimiter is None:
        delimiter = get_NS().job_id_delimiter
    template, ptemplate = get_job_id_template(app_name)
    vals = job_id.split(delimiter, len(ptemplate) - 1)
    ld = dict(job_id=job_id, app_name=app_name, job_id_template=template)
    if len(vals) != len(ptemplate):
        _log_raise(
            ("Job_id isn't properly delimited.  You might have too few" " or too many underscores."),
            extra=ld,
            exception_kls=InvalidJobId,
        )
    return _validate_job_id_identifiers(app_name, vals)
Пример #18
0
def _validate_dep_grp_with_job_id_validations(dep_grp, ld, tasks_conf):
    """Do the user defined job_id validations, if they exist,
    apply to each individual value of the relevant key in the dep group?"""
    for k, v in dep_grp.items():

        # don't do validation on depends_on."app_name" field here,
        # and not for the depends_on."job_id" either
        # These fields are the only two fields in depends_on that are
        # not job_id components
        if k in ["app_name", "job_id"]:
            continue

        func = get_NS().job_id_validations.get(k)

        # ensure that job_id validations are fully specified for keys in
        # depends_on sections
        _log_raise_if(
            not func,
            "You introduced a new job_id component in a"
            " <app_name>.depends_on.<key> subsection, and you must inform"
            " Stolos how to parse the component", extra=dict(
                key=k, value=v, **ld), exception_kls=DAGMisconfigured)

        # skip rest of validations if "all" is used
        if v == "all":
            # assert that autofill_values exists on all parents

            msg = (
                " You requested that child depends on \"all\" values for some"
                " part of its parent job_id_template.  If you do this,"
                " the parent must define"
                " <parent_app_name>.autofill_values.<key>")
            for parent in dep_grp['app_name']:
                _log_raise_if(
                    k not in tasks_conf[parent].get('autofill_values', {}),
                    msg, extra=dict(parent_app_name=parent, key=k, **ld),
                    exception_kls=DAGMisconfigured)
            continue

        for vv in v:
            try:
                res = func(vv)
            except Exception as err:
                _log_raise((
                    "Invalid data at <app_name>.depends_on.<key>.[nth_value]."
                    " The job_id_validation function complained that the"
                    " value was invalid. Error details: %s"
                ) % err.message,
                    extra=dict(key='%s.%s' % (k, v), value=vv, **ld),
                    exception_kls=DAGMisconfigured)

            _log_raise_if(
                vv != res,
                ("A job_id_validation func just returned a modified"
                 " value.  It should return input unmodified or fail."),
                extra=dict(
                    key='%s.%s' % (k, v), value=vv, job_id_validation=func,
                    **ld),
                exception_kls=DAGMisconfigured)
Пример #19
0
    def __init__(self, path):
        assert self.SCRIPTS, 'child class must define SCRIPTS'
        assert self._EXTEND_LOCK_SCRIPT_NAME, (
            'child class must define _EXTEND_LOCK_SCRIPT_NAME')

        self._client_id = str(random.randint(0, sys.maxsize))
        self._path = path

        self._lock_timeout = get_NS().qb_redis_lock_timeout
        self._max_network_delay = get_NS().qb_redis_max_network_delay

        if not BaseStolosRedis._BASE_INITIALIZED:
            BaseStolosRedis._BASE_INITIALIZED = True

            # use signals to trigger main thread to exit if child thread errors
            # be nice and don't replace any signal handlers if already set
            for sig in [
                    signal.SIGUSR1, signal.SIGUSR2,
                    'fail: no user-level signals available'
            ]:
                if signal.getsignal(sig) == 0:
                    BaseStolosRedis._SIGNAL = sig
                    break
            signal.signal(BaseStolosRedis._SIGNAL, _raise_err)

        if not self._INITIALIZED:
            self._INITIALIZED = True
            self.LOCKS = dict()

            # submit class's lua scripts to redis and store the SHA's
            self._SHAS = dict()
            for k in self.SCRIPTS:
                self._SHAS[k] = raw_client().script_load(
                    self.SCRIPTS[k]['script'])

            # initialize a lock extender thread for each class type that exists
            # we could just group all together, but this seems like a good idea
            # start extending locks in the background
            t = threading.Thread(
                name=("stolos.queue_backend.qbcli_redis.%s Extender" %
                      self.__class__.__name__),
                target=self._extend_lock_in_background)
            t.daemon = True
            t.start()
Пример #20
0
    def __init__(self, path):
        assert self.SCRIPTS, 'child class must define SCRIPTS'
        assert self._EXTEND_LOCK_SCRIPT_NAME, (
            'child class must define _EXTEND_LOCK_SCRIPT_NAME')

        self._client_id = str(random.randint(0, sys.maxsize))
        self._path = path

        self._lock_timeout = get_NS().qb_redis_lock_timeout
        self._max_network_delay = get_NS().qb_redis_max_network_delay

        if not BaseStolosRedis._BASE_INITIALIZED:
            BaseStolosRedis._BASE_INITIALIZED = True

            # use signals to trigger main thread to exit if child thread errors
            # be nice and don't replace any signal handlers if already set
            for sig in [signal.SIGUSR1, signal.SIGUSR2,
                        'fail: no user-level signals available']:
                if signal.getsignal(sig) == 0:
                    BaseStolosRedis._SIGNAL = sig
                    break
            signal.signal(BaseStolosRedis._SIGNAL, _raise_err)

        if not self._INITIALIZED:
            self._INITIALIZED = True
            self.LOCKS = dict()

            # submit class's lua scripts to redis and store the SHA's
            self._SHAS = dict()
            for k in self.SCRIPTS:
                self._SHAS[k] = raw_client().script_load(
                    self.SCRIPTS[k]['script'])

            # initialize a lock extender thread for each class type that exists
            # we could just group all together, but this seems like a good idea
            # start extending locks in the background
            t = threading.Thread(
                name=("stolos.queue_backend.qbcli_redis.%s Extender"
                      % self.__class__.__name__),
                target=self._extend_lock_in_background)
            t.daemon = True
            t.start()
Пример #21
0
def _get_grps(app_name, filter_deps, ld):
    """
    Return an iterator that yields (dependency_group_name, group_metadata)
    tuples
    """
    td = cb.get_tasks_config()
    try:
        depends_on = td[app_name]['depends_on']
    except KeyError:
        return []  # this task has no dependencies
    if "app_name" in depends_on:
        grps = [(get_NS().dependency_group_default_name, depends_on)]
        _get_parents_validate_group_names(
            [get_NS().dependency_group_default_name], filter_deps, ld)
    elif filter_deps:
        _get_parents_validate_group_names(depends_on, filter_deps, ld)
        grps = (data for data in depends_on.items() if data[0] in filter_deps)
    else:
        grps = depends_on.items()
    return grps
Пример #22
0
def raw_client():
    """Start a connection to ZooKeeper"""
    ns = get_NS()
    log.debug("Connecting to ZooKeeper",
              extra=dict(qb_zookeeper_hosts=ns.qb_zookeeper_hosts,
                         qb_zookeeper_timeout=ns.qb_zookeeper_timeout))
    zk = KazooClient(ns.qb_zookeeper_hosts, ns.qb_zookeeper_timeout)
    zk.logger.handlers = log.handlers
    zk.logger.setLevel('WARN')
    zk.start()
    atexit.register(zk.stop)
    return zk
Пример #23
0
def raw_client():
    """Start a connection to ZooKeeper"""
    ns = get_NS()
    log.debug(
        "Connecting to ZooKeeper",
        extra=dict(qb_zookeeper_hosts=ns.qb_zookeeper_hosts,
                   qb_zookeeper_timeout=ns.qb_zookeeper_timeout))
    zk = KazooClient(ns.qb_zookeeper_hosts, ns.qb_zookeeper_timeout)
    zk.logger.handlers = log.handlers
    zk.logger.setLevel('WARN')
    zk.start()
    atexit.register(zk.stop)
    return zk
Пример #24
0
def _get_grps(app_name, filter_deps, ld):
    """
    Return an iterator that yields (dependency_group_name, group_metadata)
    tuples
    """
    td = cb.get_tasks_config()
    try:
        depends_on = td[app_name]['depends_on']
    except KeyError:
        return []  # this task has no dependencies
    if "app_name" in depends_on:
        grps = [(get_NS().dependency_group_default_name, depends_on)]
        _get_parents_validate_group_names(
            [get_NS().dependency_group_default_name], filter_deps, ld)
    elif filter_deps:
        _get_parents_validate_group_names(
            depends_on, filter_deps, ld)
        grps = (data for data in depends_on.items()
                if data[0] in filter_deps)
    else:
        grps = depends_on.items()
    return grps
Пример #25
0
 def __init__(self, data=None):
     NS = get_NS()
     self.db = NS.redis_db
     self.redis_key_prefix = NS.redis_key_prefix
     self.cli = redis.StrictRedis(
         db=NS.redis_db, port=NS.redis_port, host=NS.redis_host)
     if data is None:
         self.cache = {}
     elif isinstance(data, self.__class__):
         self.cli = data.cli
         self.cache = data.cache
     else:
         assert isinstance(data, dict), (
             "Oops! %s did not receive a dict" % self.__class__.__name__)
         self.cache = data
Пример #26
0
 def __init__(self, data=None):
     NS = get_NS()
     self.db = NS.redis_db
     self.redis_key_prefix = NS.redis_key_prefix
     self.cli = redis.StrictRedis(db=NS.redis_db,
                                  port=NS.redis_port,
                                  host=NS.redis_host)
     if data is None:
         self.cache = {}
     elif isinstance(data, self.__class__):
         self.cli = data.cli
         self.cache = data.cache
     else:
         assert isinstance(data, dict), ("Oops! %s did not receive a dict" %
                                         self.__class__.__name__)
         self.cache = data
Пример #27
0
def passes_filter(app_name, job_id):
    """Determine if this job matches certain criteria that state it is a
    valid job for this app_name.

    A partially out of scope for dag stuff, but important detail:
        Jobs that don't match the criteria should immediately be marked
        as completed
    """
    # for now, if we can parse it, it's valid
    pjob_id = parse_job_id(app_name, job_id)

    # does this job matches criteria that makes it executable? if so, we can't
    # autocomplete it
    dg = cb.get_tasks_config()
    meta = dg[app_name]
    ld = dict(app_name=app_name, job_id=job_id)
    try:
        dct = dict(meta["valid_if_or"])
    except (KeyError, TypeError):
        return True  # everything is valid

    if "_func" in dct:
        import_path = dct.pop("_func")  # safe because config is immutable
        try:
            func = load_obj_from_path(import_path, ld)
        except Exception as err:
            raise err.__class__("valid_if_or._func misconfigured: %s" % err.message)

        if func(app_name, **pjob_id):
            return True

    for k, v in dct.items():
        try:
            kk = pjob_id[k]
        except KeyError:
            _log_raise(
                "valid_if_or contains a key that's not in the job_id",
                extra=dict(valid_if_or_key=k, **ld),
                exception_kls=DAGMisconfigured,
            )
        vals = [get_NS().job_id_validations[k](x) for x in v]
        if kk in vals:
            return True
    return False
Пример #28
0
 def __init__(self, data=None):
     if data is None:
         try:
             fp = get_NS().tasks_json
         except KeyError:
             log.error(("You must define --tasks_json if you use the %s"
                        " configuration backend") % self.__class__.__name__)
             raise
         try:
             self.cache = simplejson.load(open(fp))
         except:
             log.error("Failed to read json file.", extra={'fp': fp})
             raise
     elif isinstance(data, self.__class__):
         self.cache = data.cache
     else:
         assert isinstance(data, dict), ("Oops! %s did not receive a dict" %
                                         self.__class__.__name__)
         self.cache = data
Пример #29
0
def passes_filter(app_name, job_id):
    """Determine if this job matches certain criteria that state it is a
    valid job for this app_name.

    A partially out of scope for dag stuff, but important detail:
        Jobs that don't match the criteria should immediately be marked
        as completed
    """
    # for now, if we can parse it, it's valid
    pjob_id = parse_job_id(app_name, job_id)

    # does this job matches criteria that makes it executable? if so, we can't
    # autocomplete it
    dg = cb.get_tasks_config()
    meta = dg[app_name]
    ld = dict(app_name=app_name, job_id=job_id)
    try:
        dct = dict(meta['valid_if_or'])
    except (KeyError, TypeError):
        return True  # everything is valid

    if '_func' in dct:
        import_path = dct.pop('_func')  # safe because config is immutable
        try:
            func = load_obj_from_path(import_path, ld)
        except Exception as err:
            raise err.__class__("valid_if_or._func misconfigured: %s" %
                                err.message)

        if func(app_name, **pjob_id):
            return True

    for k, v in dct.items():
        try:
            kk = pjob_id[k]
        except KeyError:
            _log_raise("valid_if_or contains a key that's not in the job_id",
                       extra=dict(valid_if_or_key=k, **ld),
                       exception_kls=DAGMisconfigured)
        vals = [get_NS().job_id_validations[k](x) for x in v]
        if kk in vals:
            return True
    return False
Пример #30
0
 def __init__(self, data=None):
     if data is None:
         try:
             fp = get_NS().tasks_json
         except KeyError:
             log.error((
                 "You must define --tasks_json if you use the %s"
                 " configuration backend") % self.__class__.__name__)
             raise
         try:
             self.cache = simplejson.load(open(fp))
         except:
             log.error("Failed to read json file.", extra={'fp': fp})
             raise
     elif isinstance(data, self.__class__):
         self.cache = data.cache
     else:
         assert isinstance(data, dict), (
             "Oops! %s did not receive a dict" % self.__class__.__name__)
         self.cache = data
Пример #31
0
def _validate_job_id_identifiers(app_name, vals, validations=None, **_log_details):
    if validations is None:
        validations = get_NS().job_id_validations
    _, template = get_job_id_template(app_name)
    ld = dict(app_name=app_name, job_id_template=template)
    ld.update(_log_details)
    rv = {}
    for key, _val in zip(template, vals):
        # validate the job_id
        try:
            val = validations[key](_val)
            assert val is not None, "validation func returned None"
            assert val is not False, "validation func returned False"
        except KeyError:
            val = _val
            log.warn("No job_id validation for key.  You should implement one", extra=dict(job_id_key=key, **ld))
        except Exception as err:
            val = _val
            msg = "An identifier in a job_id failed validation"
            log.exception(msg, extra=dict(job_id_identifier=key, bad_value=_val, error_details=err, **ld))
            raise InvalidJobId("%s err: %s" % (msg, err))
        rv[key] = val
    return rv
Пример #32
0
def _build_dict_deps(dg, app_name, deps):
    """Build edges between dependent nodes by looking at listed dependencies

    `dg` (nx.MultiDiGraph instance) - the Tasks configuration as a graph
    `app_name` (str) - the name of a scheduled application
    `deps` (obj) - the dependencies for given `app_name`.  Should be a subclass
        of cb.TasksConfigBaseMapping, and is the value of given app_name's
        "depends_on" field
    """
    log_details = dict(app_name=app_name, key='depends_on', deps=dict(deps))
    if isinstance(deps, cb.TasksConfigBaseMapping) and "app_name" in deps:
        _add_edges(dg,
                   app_name=app_name,
                   dep_name=get_NS().dependency_group_default_name,
                   dep_grp=deps,
                   log_details=log_details)
    else:
        for dep_name, dep_data in deps.items():
            if isinstance(dep_data, cb.TasksConfigBaseMapping):
                _add_edges(dg=dg,
                           app_name=app_name,
                           dep_name=dep_name,
                           dep_grp=dep_data,
                           log_details=log_details)
            elif isinstance(dep_data, cb.TasksConfigBaseSequence):
                for _dep_grp in dep_data:
                    _add_edges(dg=dg,
                               app_name=app_name,
                               dep_name=dep_name,
                               dep_grp=_dep_grp,
                               log_details=log_details)
            else:
                _log_raise("Unrecognized dependency.  Expected a list or dict",
                           dict(dep_name=dep_name,
                                dep_data=dep_data,
                                **log_details),
                           exception_kls=DAGMisconfigured)
Пример #33
0
from stolos import testing_tools as tt
from stolos import get_NS


def setup_qb(func_name):
    return ((), dict(
        app1=tt.makepath(func_name, 'app1'),
        app2=tt.makepath(func_name, 'app2'),
        app3=tt.makepath(func_name, 'app3'),
        app4=tt.makepath(func_name, 'app4'),
        item1="{}-{}".format(func_name, 'a'),
        item2="{}-{}".format(func_name, 'b'),
        item3="{}-{}".format(func_name, 'c'),
        item4="{}-{}".format(func_name, 'd'),
        item5="{}-{}".format(func_name, 'e'),
        item6="{}-{}".format(func_name, 'f'),
    ))


with_setup = tt.with_setup_factory(
    (tt.setup_job_ids, setup_qb, ),
    (tt.teardown_queue_backend, ),
    (tt.post_setup_queue_backend,
     lambda: dict(qbcli=get_NS().queue_backend))
)


with_setup, setup_qb
Пример #34
0
def raw_client():
    NS = get_NS()
    return redis.StrictRedis(
        host=NS.qb_redis_host,
        port=NS.qb_redis_port,
        socket_timeout=NS.qb_redis_socket_timeout)
Пример #35
0
 def get(self, timeout=None):
     """Get an item from the queue or return None."""
     if timeout is None:
         timeout = get_NS().qb_zookeeper_timeout
     return util.frombytes(self._q.get(timeout=timeout))
Пример #36
0
def get_qbclient():
    return get_NS().queue_backend
Пример #37
0
from stolos import testing_tools as tt
from stolos import get_NS


def setup_qb(func_name):
    return ((),
            dict(
                app1=tt.makepath(func_name, 'app1'),
                app2=tt.makepath(func_name, 'app2'),
                app3=tt.makepath(func_name, 'app3'),
                app4=tt.makepath(func_name, 'app4'),
                item1="{}-{}".format(func_name, 'a'),
                item2="{}-{}".format(func_name, 'b'),
                item3="{}-{}".format(func_name, 'c'),
                item4="{}-{}".format(func_name, 'd'),
                item5="{}-{}".format(func_name, 'e'),
                item6="{}-{}".format(func_name, 'f'),
            ))


with_setup = tt.with_setup_factory(
    (
        tt.setup_job_ids,
        setup_qb,
    ), (tt.teardown_queue_backend, ),
    (tt.post_setup_queue_backend, lambda: dict(qbcli=get_NS().queue_backend)))

with_setup, setup_qb
Пример #38
0
def raw_client():
    NS = get_NS()
    return redis.StrictRedis(
        host=NS.qb_redis_host,
        port=NS.qb_redis_port,
        socket_timeout=NS.qb_redis_socket_timeout)
Пример #39
0
def get_qbclient():
    return get_NS().queue_backend
Пример #40
0
 def get(self, timeout=None):
     """Get an item from the queue or return None."""
     if timeout is None:
         timeout = get_NS().qb_zookeeper_timeout
     return util.frombytes(self._q.get(timeout=timeout))