Пример #1
0
def generate_tasks_manual():
    """ Return a formatted listing of all tasks with their descriptions. """
    from siskin.sources import *
    from siskin.workflows import *

    output = StringIO.StringIO()
    # task_tuples = sorted(Register.get_reg().iteritems())
    task_names = Register.task_names()
    output.write(MAN_HEADER)
    output.write('  {0} tasks found\n\n'.format(len(task_names)))

    for name in task_names:
        klass = Register.get_task_cls(name)
        doc = klass.__doc__ or colors.red("@todo: docs")
        output.write('{0} {1}\n'.format(colors.green(name), doc))

        try:
            deps = flatten(klass().requires())
        except Exception:
            # TODO: tasks that have required arguments will fail here
            formatted = colors.yellow("\tUnavailable since task has required parameters.")
        else:
            formatted = '\t{0}'.format(pprint.pformat(deps).replace('\n', '\n\t'))
        output.write(colors.magenta('\n\tDependencies ({0}):\n\n{1}\n\n'.format(len(deps), formatted)))

    return output.getvalue()
Пример #2
0
 def requires(self):
     from luigi.task import Register
     if not self.task in Register.get_reg().keys():
         logger.warn("No such task {} in registry; skipping".format(self.task))
         return []
     else:
         cls = Register.get_reg()[self.task]
         return [cls(target=x) for x in self.generic_wrapper_target]
Пример #3
0
 def requires(self):
     from luigi.task import Register
     if not self.task in Register.get_reg().keys():
         logger.warn("No such task {} in registry; skipping".format(
             self.task))
         return []
     else:
         cls = Register.get_reg()[self.task]
         return [cls(target=x) for x in self.generic_wrapper_target]
Пример #4
0
def main():
    print("siskin %s\n\n" % __version__)
    task_names = Register.task_names()
    print('{0} tasks found\n'.format(len(task_names)))

    for name in task_names:
        if name.islower():
            continue
        klass = Register.get_task_cls(name)
        doc = klass.__doc__ or yellow("@TODO: docs")
        print('{0} {1}\n'.format(green(name), doc))
Пример #5
0
    def requires(self):
        # cache because we anticipate lots of tasks
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException("At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException("Either start needs to be specified or reverse needs to be True")
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(time.time() if self.now is None else self.now)
        now = datetime(now.year, now.month, now.day, now.hour)
        datehours = [now + timedelta(hours=h) for h in range(-self.hours_back, self.hours_forward + 1)]
        datehours = filter(lambda h: (not self.start or h >= self.start) and (not self.stop or h < self.stop), datehours)

        task_cls = Register.get_task_cls(self.of)
        if datehours:
            logger.debug('Actually checking if range [%s, %s] of %s is complete' % (datehours[0], datehours[-1], self.of))
            missing_datehours = sorted(self.missing_datehours(task_cls, datehours))
            logger.debug('Range [%s, %s] lacked %d of expected %d %s instances' % (datehours[0], datehours[-1], len(missing_datehours), len(datehours), self.of))
        else:
            missing_datehours = []

        if self.reverse:
            required_datehours = missing_datehours[-self.task_limit:]
        else:
            required_datehours = missing_datehours[:self.task_limit]
        if required_datehours:
            logger.debug('Requiring %d missing %s instances in range [%s, %s]' % (len(required_datehours), self.of, required_datehours[0], required_datehours[-1]))
        if self.reverse:
            required_datehours.reverse()  # I wish this determined the order tasks were scheduled or executed, but it doesn't. No priorities in Luigi yet

        self._cached_requires = [task_cls(d) for d in required_datehours]
        return self._cached_requires
Пример #6
0
    def requires(self):
        # cache because we anticipate a fair amount of computation
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException(
                "At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException(
                "Either start needs to be specified or reverse needs to be True"
            )
        if self.start and self.stop and self.start > self.stop:
            raise ParameterException("Can't have start > stop")
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(
            time.time() if self.now is None else self.now)

        moving_start = self.moving_start(now)
        finite_start = moving_start if self.start is None else max(
            self.parameter_to_datetime(self.start), moving_start)
        moving_stop = self.moving_stop(now)
        finite_stop = moving_stop if self.stop is None else min(
            self.parameter_to_datetime(self.stop), moving_stop)

        datetimes = self.finite_datetimes(
            finite_start, finite_stop) if finite_start <= finite_stop else []

        task_cls = Register.get_task_cls(self.of)
        if datetimes:
            logger.debug('Actually checking if range %s of %s is complete' %
                         (self._format_range(datetimes), self.of))
            missing_datetimes = sorted(
                self.missing_datetimes(task_cls, datetimes))
            logger.debug('Range %s lacked %d of expected %d %s instances' %
                         (self._format_range(datetimes),
                          len(missing_datetimes), len(datetimes), self.of))
        else:
            missing_datetimes = []
            logger.debug('Empty range. No %s instances expected' % (self.of, ))

        self._emit_metrics(missing_datetimes, finite_start, finite_stop)

        if self.reverse:
            required_datetimes = missing_datetimes[-self.task_limit:]
        else:
            required_datetimes = missing_datetimes[:self.task_limit]
        if required_datetimes:
            logger.debug('Requiring %d missing %s instances in range %s' %
                         (len(required_datetimes), self.of,
                          self._format_range(required_datetimes)))
        if self.reverse:
            required_datetimes.reverse(
            )  # TODO priorities, so that within the batch tasks are ordered too

        self._cached_requires = [
            task_cls(self.datetime_to_parameter(d)) for d in required_datetimes
        ]
        return self._cached_requires
Пример #7
0
    def parse(self, cmdline_args=None, main_task_cls=None):
        parser = PassThroughOptionParser()

        def add_task_option(p):
            if main_task_cls:
                p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family)
            else:
                p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())

        add_global_parameters(parser, optparse=True)

        add_task_option(parser)
        options, args = parser.parse_args(args=cmdline_args)

        task_cls_name = options.task
        if self.__existing_optparse:
            parser = self.__existing_optparse
        else:
            parser = optparse.OptionParser()
        add_task_option(parser)

        task_cls = Register.get_task_cls(task_cls_name)

        # Register all parameters as a big mess
        add_global_parameters(parser, optparse=True)
        add_task_parameters(parser, task_cls, optparse=True)

        # Parse and run
        options, args = parser.parse_args(args=cmdline_args)

        set_global_parameters(options)
        task_params = get_task_parameters(task_cls, options)

        return [task_cls(**task_params)]
Пример #8
0
 def of_cls(self):
     if isinstance(self.of, six.string_types):
         warnings.warn(
             'When using Range programatically, dont pass "of" param as string!'
         )
         return Register.get_task_cls(self.of)
     return self.of
Пример #9
0
def add_global_parameters(parser, optparse=False):
    seen_params = set()
    for task_name, is_without_section, param_name, param in Register.get_all_params():
        if param in seen_params:
            continue
        seen_params.add(param)
        param.add_to_cmdline_parser(parser, param_name, task_name, optparse=optparse, glob=True, is_without_section=is_without_section)
Пример #10
0
def _write_task_import_cache(path):
    """
    Write dict to path.
    """
    with open(path, 'w') as output:
        task_import_cache = dict([(name, Register.get_task_cls(name).__module__) for name in Register.task_names() if name[0].isupper()])
        json.dump(task_import_cache, output)
Пример #11
0
def load_task(module, task_name, params_str):
    """
    Imports task dynamically given a module and a task name.
    """
    __import__(module)
    task_cls = Register.get_task_cls(task_name)
    return task_cls.from_str_params(params_str)
Пример #12
0
    def parse_task(self, cmdline_args=None, main_task_cls=None):
        parser = argparse.ArgumentParser()

        add_global_parameters(parser)

        if main_task_cls:
            add_task_parameters(parser, main_task_cls)

            args = parser.parse_args(args=cmdline_args)
            task_cls = main_task_cls
        else:
            task_names = sorted(Register.get_reg().keys())

            # Parse global arguments and pull out the task name.
            # We used to do this using subparsers+command, but some issues with
            # argparse across different versions of Python (2.7.9) made it hard.
            args, unknown = parser.parse_known_args(args=cmdline_args)
            if len(unknown) == 0:
                raise SystemExit('No task specified')
            task_name = unknown[0]
            if task_name not in task_names:
                error_task_names(task_name, task_names)

            task_cls = Register.get_task_cls(task_name)

            # Add a subparser to parse task-specific arguments
            subparsers = parser.add_subparsers(dest='command')
            subparser = subparsers.add_parser(task_name)

            # Add both task and global params here so that we can support both:
            # test.py --global-param xyz Test --n 42
            # test.py Test --n 42 --global-param xyz
            add_global_parameters(subparser)
            add_task_parameters(subparser, task_cls)

            # Workaround for bug in argparse for Python 2.7.9
            # See https://mail.python.org/pipermail/python-dev/2015-January/137699.html
            subargs = parser.parse_args(args=cmdline_args)
            for key, value in vars(subargs).items():
                if value:  # Either True (for boolean args) or non-None (everything else)
                    setattr(args, key, value)

        # Notice that this is not side effect free because it might set global params
        set_global_parameters(args)
        task_params = get_task_parameters(task_cls, args)

        return [task_cls(**task_params)]
Пример #13
0
 def of_cls(self):
     """
     DONT USE. Will be deleted soon. Use ``self.of``!
     """
     if isinstance(self.of, six.string_types):
         warnings.warn('When using Range programatically, dont pass "of" param as string!')
         return Register.get_task_cls(self.of)
     return self.of
Пример #14
0
def _write_task_import_cache(path):
    """
    Write dictionary of task name module name mappings to given path.
    """
    with open(path, 'w') as output:
        task_import_cache = dict([(name, Register.get_task_cls(name).__module__) for name in Register.task_names()
                                  if name[0].isupper()])
        json.dump(task_import_cache, output)
Пример #15
0
    def requires(self):
        # cache because we anticipate a fair amount of computation
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException(
                "At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException(
                "Either start needs to be specified or reverse needs to be True"
            )
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(
            time.time() if self.now is None else self.now)
        now = datetime(now.year, now.month, now.day, now.hour)
        datehours = [
            now + timedelta(hours=h)
            for h in range(-self.hours_back, self.hours_forward + 1)
        ]
        datehours = filter(
            lambda h: (not self.start or h >= self.start) and
            (not self.stop or h < self.stop), datehours)

        task_cls = Register.get_task_cls(self.of)
        if datehours:
            logger.debug(
                'Actually checking if range [%s, %s] of %s is complete' %
                (datehours[0], datehours[-1], self.of))
            missing_datehours = sorted(
                self.missing_datehours(task_cls, datehours))
            logger.debug(
                'Range [%s, %s] lacked %d of expected %d %s instances' %
                (datehours[0], datehours[-1], len(missing_datehours),
                 len(datehours), self.of))
        else:
            missing_datehours = []

        self._emit_metrics(missing_datehours, now)

        if self.reverse:
            required_datehours = missing_datehours[-self.task_limit:]
        else:
            required_datehours = missing_datehours[:self.task_limit]
        if required_datehours:
            logger.debug(
                'Requiring %d missing %s instances in range [%s, %s]' %
                (len(required_datehours), self.of, required_datehours[0],
                 required_datehours[-1]))
        if self.reverse:
            required_datehours.reverse(
            )  # I wish this determined the order tasks were scheduled or executed, but it doesn't. No priorities in Luigi yet

        self._cached_requires = [task_cls(d) for d in required_datehours]
        return self._cached_requires
Пример #16
0
 def of_cls(self):
     """
     DONT USE. Will be deleted soon. Use ``self.of``!
     """
     if isinstance(self.of, six.string_types):
         warnings.warn(
             'When using Range programatically, dont pass "of" param as string!'
         )
         return Register.get_task_cls(self.of)
     return self.of
Пример #17
0
    def requires(self):
        # cache because we anticipate a fair amount of computation
        if hasattr(self, '_cached_requires'):
            return self._cached_requires

        if not self.start and not self.stop:
            raise ParameterException("At least one of start and stop needs to be specified")
        if not self.start and not self.reverse:
            raise ParameterException("Either start needs to be specified or reverse needs to be True")
        if self.start and self.stop and self.start > self.stop:
            raise ParameterException("Can't have start > stop")
        # TODO check overridden complete() and exists()

        now = datetime.utcfromtimestamp(time.time() if self.now is None else self.now)

        moving_start = self.moving_start(now)
        finite_start = moving_start if self.start is None else max(self.parameter_to_datetime(self.start), moving_start)
        moving_stop = self.moving_stop(now)
        finite_stop = moving_stop if self.stop is None else min(self.parameter_to_datetime(self.stop), moving_stop)

        datetimes = self.finite_datetimes(finite_start, finite_stop) if finite_start <= finite_stop else []

        task_cls = Register.get_task_cls(self.of)
        if datetimes:
            logger.debug('Actually checking if range %s of %s is complete',
                         self._format_range(datetimes), self.of)
            missing_datetimes = sorted(self.missing_datetimes(task_cls, datetimes))
            logger.debug('Range %s lacked %d of expected %d %s instances',
                         self._format_range(datetimes), len(missing_datetimes), len(datetimes), self.of)
        else:
            missing_datetimes = []
            logger.debug('Empty range. No %s instances expected', self.of)

        self._emit_metrics(missing_datetimes, finite_start, finite_stop)

        if self.reverse:
            required_datetimes = missing_datetimes[-self.task_limit:]
        else:
            required_datetimes = missing_datetimes[:self.task_limit]
        if required_datetimes:
            logger.debug('Requiring %d missing %s instances in range %s',
                         len(required_datetimes), self.of, self._format_range(required_datetimes))
        if self.reverse:
            required_datetimes.reverse()  # TODO priorities, so that within the batch tasks are ordered too

        self._cached_requires = [task_cls(self.datetime_to_parameter(d)) for d in required_datetimes]
        return self._cached_requires
Пример #18
0
def get_task_import_cache():
    """
    Load `taskname: modulename` mappings from dictionary. Return a tuple containing
    the dictionary and the path to the cache file.
    """
    task_import_cache = None
    path = os.path.join(tempfile.gettempdir(), 'siskin_task_import_cache_%s' % __version__)
    if not os.path.exists(path):
        from siskin.sources import *
        from siskin.workflows import *
        with open(path, 'w') as output:
            task_import_cache = dict([(name, Register.get_task_cls(name).__module__) for name in Register.task_names() if name[0].isupper()])
            json.dump(task_import_cache, output)

    if task_import_cache is None:
        with open(path) as handle:
            try:
                task_import_cache = json.load(handle)
            except Exception as err:
                print("failed load task import cache, try removing %s and then try again" % path, file=sys.stderr)
                sys.exit(1)

    return task_import_cache, path
Пример #19
0
def gen_sphinx_tasks(entry_point, labels, *_args, **kwargs):
    """
    Writes a file per label, suitable for use by sphinx.ext.autodoc,
    using the classes found from entry_point.

    Also generates toctree.inc, which can be included from the index
    page to provide links to each generated file.

    """
    # Declare file header strings
    warning = '''..  WARNING: DO NOT EDIT THIS FILE DIRECTLY
    Generated by sphinx_source/gen_tasks.py on {now}

    '''.format(now=time.strftime('%c'))

    toctree_header = '''{warning}
:orphan:

.. toctree::
   :maxdepth: 1
'''
    incfile_header = '''{warning}
..  _{category_slug}:

Back to :doc:`index`

{label_heading}
'''

    # Load modules into memory
    stevedore.ExtensionManager(entry_point)

    # Used to filter the classes under entry_point
    entry_point_dot = '{entry_point}.'.format(entry_point=entry_point)

    # Generate a list of output file arguments from the given labels and categories
    output = []
    categories = kwargs.get('categories', [])
    for idx, label in enumerate(labels):
        try:
            category = ''
            if idx < len(categories):
                category = categories[idx]

            # Create a category slug for sphinx, and name the file with it
            category_slug = category.replace(' ', '_') or 'all'
            file_name = '{slug}.rst'.format(slug=category_slug)
            file_path = os.path.join(SPHINX_DIR, file_name)
            file_pointer = open(file_path, "w")
            output.append({
                'fp': file_pointer,
                'file_name': file_name,
                'category': category,
                'category_slug': category_slug,
                'label': label,
                'label_heading': "{label}\n{_}".format(label=label, _='=' * len(label)),
                'modules': {},
            })
        except IOError:
            sys.exit('Unable to write to {file_path}'.format(file_path=file_path))

    # Write the header to the table of contents file
    tocfile_name = os.path.join(SPHINX_DIR, 'toctree.rst')
    try:
        tocfile = open(tocfile_name, "w")
        tocfile.write(toctree_header.format(warning=warning))
    except IOError:
        sys.exit('Unable to write to {file_name}'.format(file_name=tocfile_name))

    # For each Task, sorted by class name
    tasks = Register.get_reg()
    for name in sorted(tasks):
        cls = tasks[name]
        for out in output:
            # Show only tasks under entry_point
            module = cls.__module__
            if module.startswith(entry_point_dot):

                # Strip off entry_point to avoid redundancy in documentation
                module = module.replace(entry_point_dot, '')
                if getattr(cls, 'task_category', '') == out['category']:
                    if module not in out['modules']:
                        out['modules'][module] = {}
                    out['modules'][module][name] = cls

    for out in output:
        modules = sorted(out['modules'].keys())
        if modules:
            tocfile.write("\n   {incfile}".format(incfile=out['file_name']))
            out['fp'].write(incfile_header.format(warning=warning, **out))

        for module in modules:
            module_heading = '{module}'.format(module=module)
            out['fp'].write("\n\n{module_heading}\n{_}".format(
                module_heading=module_heading, _='-' * len(module_heading)))
            out['fp'].write("\n\n.. automodule:: {module}".format(module=module))

            names = out['modules'][module]
            for name in sorted(names):
                out['fp'].write("\n\n.. autoclass:: {name}".format(name=name))

        out['fp'].close()

    tocfile.close()
Пример #20
0
 def of_cls(self):
     if isinstance(self.of, six.string_types):
         warnings.warn('When using Range programatically, dont pass "of" param as string!')
         return Register.get_task_cls(self.of)
     return self.of
Пример #21
0
 def test_cmdline(self):
     # Exposes issue where wrapped tasks are registered twice under
     # the same name
     from luigi.task import Register
     self.assertEqual(Register.get_reg().get('SubtaskDelegator', None),
                      SubtaskDelegator)
Пример #22
0
def gen_sphinx_tasks(entry_point, labels, *_args, **kwargs):
    """
    Writes a file per label, suitable for use by sphinx.ext.autodoc,
    using the classes found from entry_point.

    Also generates toctree.inc, which can be included from the index
    page to provide links to each generated file.

    """
    # Declare file header strings
    warning = '''..  WARNING: DO NOT EDIT THIS FILE DIRECTLY
    Generated by sphinx_source/gen_tasks.py on {now}

    '''.format(now=time.strftime('%c'))

    toctree_header = '''{warning}
.. toctree::
   :maxdepth: 1
'''
    incfile_header = '''{warning}
..  _{category_slug}:

Back to :doc:`index`

{label_heading}
'''

    # Load modules into memory
    stevedore.ExtensionManager(entry_point)

    # Used to filter the classes under entry_point
    entry_point_dot = '{entry_point}.'.format(entry_point=entry_point)

    # Generate a list of output file arguments from the given labels and categories
    output = []
    categories = kwargs.get('categories', [])
    for idx, label in enumerate(labels):
        try:
            category = ''
            if idx < len(categories):
                category = categories[idx]

            # Create a category slug for sphinx, and name the file with it
            category_slug = category.replace(' ', '_') or 'all'
            file_name = '{slug}.rst'.format(slug=category_slug)
            file_path = os.path.join(SPHINX_DIR, file_name)
            file_pointer = open(file_path, "w")
            output.append({
                'fp':
                file_pointer,
                'file_name':
                file_name,
                'category':
                category,
                'category_slug':
                category_slug,
                'label':
                label,
                'label_heading':
                "{label}\n{_}".format(label=label, _='=' * len(label)),
                'modules': {},
            })
        except IOError:
            sys.exit(
                'Unable to write to {file_path}'.format(file_path=file_path))

    # Write the header to the table of contents file
    tocfile_name = os.path.join(SPHINX_DIR, 'toctree.rst')
    try:
        tocfile = open(tocfile_name, "w")
        tocfile.write(toctree_header.format(warning=warning))
    except IOError:
        sys.exit(
            'Unable to write to {file_name}'.format(file_name=tocfile_name))

    # For each Task, sorted by class name
    tasks = Register.get_reg()
    for name in sorted(tasks):
        cls = tasks[name]
        module = cls.__module__
        # Show only tasks under entry_point
        if module.startswith(entry_point_dot):
            for out in output:
                # Show only tasks in the output category
                if getattr(cls, 'task_category', '') == out['category']:
                    if module not in out['modules']:
                        out['modules'][module] = {}
                    out['modules'][module][name] = cls

    for out in output:
        modules = sorted(out['modules'].keys())
        if modules:
            tocfile.write("\n   {incfile}".format(incfile=out['file_name']))
            out['fp'].write(incfile_header.format(warning=warning, **out))

        for module in modules:
            # Strip off entry_point to avoid redundancy in documentation
            module_heading = '{module}'.format(
                module=module.replace(entry_point_dot, ''))
            out['fp'].write("\n\n{module_heading}\n{_}".format(
                module_heading=module_heading, _='-' * len(module_heading)))
            out['fp'].write(
                "\n\n.. automodule:: {module}".format(module=module))

            names = out['modules'][module]
            for name in sorted(names):
                out['fp'].write("\n\n.. autoclass:: {name}".format(name=name))

        out['fp'].close()

    tocfile.close()
Пример #23
0
def set_global_parameters(args):
    # Note that this is not side effect free
    for task_name, is_without_section, param_name, param in Register.get_all_params():
        param.set_global_from_args(param_name, task_name, args, is_without_section=is_without_section)
Пример #24
0
 def add_task_option(p):
     if main_task_cls:
         p.add_option('--task', help='Task to run (one of ' + Register.tasks_str() + ') [default: %default]', default=main_task_cls.task_family)
     else:
         p.add_option('--task', help='Task to run (one of %s)' % Register.tasks_str())
Пример #25
0
 def requires(self):
     task_class = Register.get_task_cls(self.of)
     return [task_class(self.config, key.name) for key in self.data.file_keys]
Пример #26
0
 def test_cmdline(self):
     # Exposes issue where wrapped tasks are registered twice under
     # the same name
     from luigi.task import Register
     self.assertEqual(Register.get_reg().get('SubtaskDelegator', None), SubtaskDelegator)
Пример #27
0
 def requires(self):
     task_class = Register.get_task_cls(self.of)
     return [task_class(key.name) for key in dataset.file_keys]