示例#1
0
 def test_pathlib(self):
     """Test work with pathlib.Path"""
     import pathlib
     path = pathlib.Path(self.path)
     self.assertFalse(path.exists())
     target = LocalTarget(path)
     self.assertFalse(target.exists())
     with path.open('w') as stream:
         stream.write('test me')
     self.assertTrue(target.exists())
示例#2
0
 def test_pathlib(self):
     """Test work with pathlib.Path"""
     import pathlib
     path = pathlib.Path(self.path)
     self.assertFalse(path.exists())
     target = LocalTarget(path)
     self.assertFalse(target.exists())
     with path.open('w') as stream:
         stream.write('test me')
     self.assertTrue(target.exists())
class DirectoryTarget(Target):
    '''
    Similar to :class:`LocalTarget <luigi.LocalTarget>` but automatically creates and destroys temp directories
    '''
    def __init__(self, task):
        self.path = os.path.join('tmp', classpath(task), task.task_id)
        self._target = LocalTarget(self.path)

    def exists(self):
        return self._target.exists()

    def temporary_path(self):
        '''Wraps luigi manager to automatically create and destroy directories.'''
        class _Manager(object):
            _target = self._target

            def __enter__(self):
                self._manager = self._target.temporary_path()
                self._temp_path = self._manager.__enter__()
                os.makedirs(self._temp_path)
                return self._temp_path

            def __exit__(self, exc_type, exc_value, traceback):
                if exc_type is None:
                    # There were no exceptions
                    self._manager.__exit__(exc_type, exc_value, traceback)
                else:
                    # On error, clear the temp directory
                    shutil.rmtree(self._temp_path)

                return False  # False means we don't suppress the exception

        return _Manager()
示例#4
0
    def test_tmp(self):
        t = LocalTarget(is_tmp=True)
        self.assertFalse(t.exists())
        self.assertFalse(os.path.exists(t.path))
        p = t.open('w')
        print('test', file=p)
        self.assertFalse(t.exists())
        self.assertFalse(os.path.exists(t.path))
        p.close()
        self.assertTrue(t.exists())
        self.assertTrue(os.path.exists(t.path))

        q = t.open('r')
        self.assertEqual(q.readline(), 'test\n')
        q.close()
        path = t.path
        del t  # should remove the underlying file
        self.assertFalse(os.path.exists(path))
示例#5
0
    def test_tmp(self):
        t = LocalTarget(is_tmp=True)
        self.assertFalse(t.exists())
        self.assertFalse(os.path.exists(t.path))
        p = t.open('w')
        print('test', file=p)
        self.assertFalse(t.exists())
        self.assertFalse(os.path.exists(t.path))
        p.close()
        self.assertTrue(t.exists())
        self.assertTrue(os.path.exists(t.path))

        q = t.open('r')
        self.assertEqual(q.readline(), 'test\n')
        q.close()
        path = t.path
        del t  # should remove the underlying file
        self.assertFalse(os.path.exists(path))
示例#6
0
class ExternalFileTarget(luigi.target.FileSystemTarget):
    # @property
    # def path(self):

    def __init__(self,
                 path,
                 file_type='regular',
                 root_dir=None,
                 format=None,
                 **kwargs):
        self.is_remote = commons().is_remote
        if root_dir:
            full_path = os.path.join(root_dir, path)
        else:
            if self.is_remote:
                full_path = os.path.join(commons().remote_root, path)
            else:
                full_path = os.path.join(commons().local_root, path)

        self.file_type = file_type
        self.format = format
        if self.is_remote:
            host = commons().SSH_HOST
            port = commons().SSH_PORT
            kwargs['port'] = port
            self._target = RemoteTarget(full_path,
                                        host,
                                        format=format,
                                        **kwargs)
            if file_type == 'apk':  # create temporary local copy
                self.local_path = os.path.join(
                    tempfile.gettempdir(),
                    'luigi-{}-{}.apk'.format(os.path.basename(path),
                                             random.randint(0, 999999999)))
                self._target.get(self.local_path)
        else:
            self._target = LocalTarget(full_path, format=format, **kwargs)

        if self.is_remote and self.file_type == 'apk':
            path = self.local_path
        else:
            path = self._target.path
        super(ExternalFileTarget,
              self).__init__(path)  # XXX: check if this is right

    def fs(self):
        return self._target.fs

    def open(self, mode='r'):
        return self._target.open(mode)

    def exists(self):
        return self._target.exists()

    def remove(self):
        return self._target.remove()

    def cleanup(self):
        try:
            os.remove(self.local_path)
        except (OSError, AttributeError):
            pass
示例#7
0
class StoppableTask(luigi.Task):
    logger = logging.getLogger('luigi-interface')

    def set_class_targets(self):
        self.failed_target = LocalTarget(build_config().output_directory +
                                         "/TASK_FAILED")
        self.create_dependencies_file()
        self.create_timer_files()

    def create_dependencies_file(self):
        self.dependencies_dir = pathlib.Path(
            build_config().output_directory).joinpath("dependencies")
        self.dependencies_task_dir = self.dependencies_dir.joinpath(
            self.task_id)
        self.dependencies_task_dir.mkdir(parents=True, exist_ok=True)
        self.dependencies_requires_file = self.dependencies_task_dir.joinpath(
            "requires.json")
        self.dependencies_dynamic_file = self.dependencies_task_dir.joinpath(
            "dynamic.json")

    def create_timer_files(self):
        self.create_timer_base_directories()
        self.create_creation_timer_file()
        self.create_first_run_timer_file()
        self.create_run_timer_file()

    def create_timer_base_directories(self):
        self.timers_dir = pathlib.Path(
            build_config().output_directory).joinpath("timers")
        self.timers_state_dir = self.timers_dir.joinpath("state")
        self.timers_result_dir = self.timers_dir.joinpath("results")

    def create_creation_timer_file(self):
        self.creation_timer_state_dir = self.timers_state_dir.joinpath(
            "creation")
        self.creation_timer_state_dir.mkdir(parents=True, exist_ok=True)
        self.creation_timer_state_file = self.creation_timer_state_dir.joinpath(
            self.task_id)

    def create_first_run_timer_file(self):
        self.first_run_timer_state_dir = self.timers_state_dir.joinpath(
            "first_run")
        self.first_run_timer_state_dir.mkdir(parents=True, exist_ok=True)
        self.first_run_timer_state_file = self.first_run_timer_state_dir.joinpath(
            self.task_id)

    def create_run_timer_file(self):
        self.run_timer_state_dir = self.timers_state_dir.joinpath("run")
        self.run_timer_state_dir.mkdir(parents=True, exist_ok=True)
        self.run_timer_state_file = self.run_timer_state_dir.joinpath(
            self.task_id)

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.set_class_targets()
        self.write_creation_time()

    def requires(self):
        tasks = self.requires_tasks()
        if tasks is not None:
            if isinstance(tasks, (luigi.Task, list, tuple, dict)):
                return self.handle_requires_value(tasks)
            else:
                return self.handle_requires_generator(tasks)
        else:
            return []

    def handle_requires_value(self, tasks):
        if not self.dependencies_requires_file.exists():
            with self.dependencies_requires_file.open(
                    "w") as dependencies_file:
                self.write_dependency(
                    dependencies_file=dependencies_file,
                    dependency_type=DependencyType.requires,
                    dependency_state=DependencyState.requested,
                    index=0,
                    value=tasks)
                return tasks
        else:
            return tasks

    def handle_requires_generator(self, tasks):
        if not self.dependencies_requires_file.exists():
            with self.dependencies_requires_file.open(
                    "w") as dependencies_file:
                result = list(
                    self.write_dependencies_for_generator(
                        dependencies_file=dependencies_file,
                        task_generator=tasks,
                        dependency_type=DependencyType.requires))
                return result
        else:
            return tasks

    def requires_tasks(self):
        pass

    def write_creation_time(self):
        if not self.creation_timer_state_file.exists():
            with self.creation_timer_state_file.open("w") as f:
                f.write(str(datetime.now().timestamp()))

    def run(self):
        start_time = datetime.now()
        self.write_first_run_start_time(start_time)
        still_running_logger_thread = self.start_still_running_logger()
        try:
            self.fail_if_any_task_failed()
            with self.dependencies_dynamic_file.open("a") as dependencies_file:
                task_generator = self.run_task()
                if task_generator is not None:
                    yield from self.write_dependencies_for_generator(
                        dependencies_file=dependencies_file,
                        task_generator=task_generator,
                        dependency_type=DependencyType.dynamic)
        finally:
            self.write_run_time(start_time)
            self.stop_still_running_logger(still_running_logger_thread)

    def write_dependencies_for_generator(self, dependencies_file,
                                         task_generator,
                                         dependency_type: DependencyType):
        index = 0
        try:
            element = next(task_generator)
            while True:
                self.write_dependency(
                    dependencies_file=dependencies_file,
                    dependency_type=dependency_type,
                    dependency_state=DependencyState.requested,
                    index=index,
                    value=element)
                result = yield element
                element = task_generator.send(result)
                index += 1
        except StopIteration:
            pass

    def write_dependency(self, dependencies_file,
                         dependency_type: DependencyType,
                         dependency_state: DependencyState, index: int, value):
        for task in self.flatten_tasks(value):
            dependency = TaskDependency(source=self.get_task_description(),
                                        target=task.get_task_description(),
                                        type=dependency_type,
                                        index=index,
                                        state=dependency_state)
            dependencies_file.write(f"{dependency.to_json()}")
            dependencies_file.write("\n")

    def get_task_description(self) -> TaskDescription:
        return TaskDescription(id=self.task_id, representation=str(self))

    def flatten_tasks(self, generator: Generator) -> List["StoppableTask"]:
        return [
            task for task in luigi.task.flatten(generator)
            if isinstance(task, StoppableTask)
        ]

    def run_task(self) -> Generator:
        pass

    def write_first_run_start_time(self, start_time):
        if not self.first_run_timer_state_file.exists():
            with self.first_run_timer_state_file.open("w") as f:
                f.write(str(start_time.timestamp()))

    def start_still_running_logger(self):
        # TODO use larger delay for this StillRunningLogger
        still_running_logger = StillRunningLogger(self.logger, self.__repr__(),
                                                  "task")
        still_running_logger_thread = StillRunningLoggerThread(
            still_running_logger)
        still_running_logger_thread.start()
        return still_running_logger_thread

    def write_run_time(self, start_time):
        timedelta = datetime.now() - start_time
        with self.run_timer_state_file.open("a") as f:
            f.write(str(timedelta.total_seconds()))
            f.write("\n")

    def stop_still_running_logger(self, still_running_logger_thread):
        still_running_logger_thread.stop()
        still_running_logger_thread.join()

    def fail_if_any_task_failed(self):
        if self.failed_target.exists():
            with self.failed_target.open("r") as f:
                failed_task = f.read()
            raise StoppingFurtherExecution(
                "Task %s failed. Stopping further execution." % failed_task)

    def on_success(self):
        now = datetime.now()
        self.log_time_since_first_run(now)
        self.log_time_since_creation(now)
        self.log_time_of_runs()
        super().on_success()

    def log_time_since_creation(self, now):
        if self.creation_timer_state_file.exists():
            with self.creation_timer_state_file.open("r") as f:
                start_time_str = f.read()
            start_time = datetime.fromtimestamp(float(start_time_str))
            timedelta = now - start_time
            self.logger.info("Task %s: Time since creation %s s",
                             self.__repr__(), timedelta.total_seconds())
            self.timers_result_dir.mkdir(parents=True, exist_ok=True)
            with self.timers_result_dir.joinpath(
                    self.task_id + "_" + "since_creation").open("w") as f:
                f.write(str(timedelta.total_seconds()))

    def log_time_since_first_run(self, now):
        if self.first_run_timer_state_file.exists():
            with self.first_run_timer_state_file.open("r") as f:
                start_time_str = f.read()
            start_time = datetime.fromtimestamp(float(start_time_str))
            timedelta = now - start_time
            self.logger.info("Task %s: Time since first_run %s s",
                             self.__repr__(), timedelta.total_seconds())
            self.timers_result_dir.mkdir(parents=True, exist_ok=True)
            with self.timers_result_dir.joinpath(
                    self.task_id + "_" + "since_first_run").open("w") as f:
                f.write(str(timedelta.total_seconds()))

    def log_time_of_runs(self):
        if self.run_timer_state_file.exists():
            with self.run_timer_state_file.open("r") as f:
                total_runtime = self.calculate_total_runtime(f.readlines())
            self.logger.info("Task %s: Total runtime of run method %s s",
                             self.__repr__(), total_runtime)
            with self.timers_result_dir.joinpath(self.task_id + "_" +
                                                 "total_run").open("w") as f:
                f.write(str(total_runtime))

    def calculate_total_runtime(self, lines):
        total_runtime = 0
        for line in lines:
            seconds_of_run = float(line)
            total_runtime += seconds_of_run
        return total_runtime

    def on_failure(self, exception):
        if not isinstance(exception, StoppingFurtherExecution):
            if not self.failed_target.exists():
                with self.failed_target.open("w") as f:
                    f.write("%s" % self.task_id)
        super().on_failure(exception)

    def __repr__(self):
        """
        Build a task representation like `MyTask(param1=1.5, param2='5')`
        """
        params = self.get_params()
        param_values = self.get_param_values(params, [], self.param_kwargs)

        # Build up task id
        repr_parts = []
        param_objs = dict(params)
        for param_name, param_value in param_values:
            if param_objs[param_name].significant and \
                    param_objs[param_name].visibility == luigi.parameter.ParameterVisibility.PUBLIC:
                repr_parts.append(
                    '%s=%s' % (param_name,
                               param_objs[param_name].serialize(param_value)))

        task_str = '{}({})'.format(self.get_task_family(),
                                   ', '.join(repr_parts))

        return task_str