Exemplo n.º 1
0
def flatten(files):
    res = []
    if not isinstance(files, list):
        files = [files]
    for entry in files:
        entry = os.path.expanduser(entry)
        if fs.isdir(entry):
            res += fs.ls(entry)
        elif fs.isfile(entry):
            res.append(entry)

    return res
Exemplo n.º 2
0
    def process_workflow(self, store, stats, wflow):
        files = set(fs.ls(wflow.label))
        delete = []
        merged = []
        missing = []

        cleaned = any(w.cleanup_input for w in wflow.dependents)

        for task, task_type in store.failed_tasks(wflow.label):
            for _, filename in wflow.get_outputs(task):
                if filename in files:
                    logger.info(
                        "found output from failed task: {0}".format(filename))
                    stats[wflow.label][0] += 1
                    delete.append(filename)

        for task, task_type in store.merged_tasks(wflow.label):
            for _, filename in wflow.get_outputs(task):
                if filename in files:
                    logger.info(
                        "found output from intermediate merged task: {0}".
                        format(filename))
                    stats[wflow.label][1] += 1
                    merged.append(filename)

        if cleaned:
            for w in wflow.dependents:
                if not w.cleanup_input:
                    continue
                if store.unfinished_units(w.label) == 0:
                    for fn in files:
                        logger.warning(
                            'found output from tasks that should have been cleaned up: {}'
                            .format(fn))
                    stats[wflow.label][2] += len(files)
                    delete.extend(files)
            else:
                logger.error(
                    "can't validate workflow {}, as its dependents have not completed and cleaned it up"
                    .format(wflow.label))
        else:
            for task, task_type in store.successful_tasks(wflow.label):
                for _, filename in wflow.get_outputs(task):
                    if filename not in files:
                        missing.append(task)
                        logger.warning(
                            'output file is missing for {0}'.format(task))

        return delete, merged, missing
Exemplo n.º 3
0
 def validate(self):
     with fs.alternative():
         if not self.dataset.validate():
             msg = "cannot validate configuration for dataset of workflow '{0}'"
             raise AttributeError(msg.format(self.label))
     if fs.exists(self.label) and len(list(fs.ls(self.label))) > 0:
         msg = "stageout directory for '{0}' is not empty"
         raise IOError(msg.format(self.label))
     else:
         # try to create the stageout directory.  if this fails, the
         # user does not have access...
         try:
             fs.makedirs(self.label)
         except Exception:
             msg = "failed to create stageout directory for '{0}'"
             raise IOError(msg.format(self.label))
Exemplo n.º 4
0
 def validate(self):
     with fs.alternative():
         if not self.dataset.validate():
             msg = "cannot validate configuration for dataset of workflow '{0}'"
             raise AttributeError(msg.format(self.label))
     if fs.exists(self.label) and len(list(fs.ls(self.label))) > 0:
         msg = "stageout directory for '{0}' is not empty"
         raise IOError(msg.format(self.label))
     else:
         # try to create the stageout directory.  if this fails, the
         # user does not have access...
         try:
             fs.makedirs(self.label)
         except Exception:
             msg = "failed to create stageout directory for '{0}'"
             raise IOError(msg.format(self.label))
Exemplo n.º 5
0
    def process_workflow(self, store, stats, wflow):
        files = set(fs.ls(wflow.label))
        delete = []
        missing = []

        cleaned = any(w.cleanup_input for w in wflow.dependents)

        for task, task_type in store.failed_tasks(wflow.label):
            for _, filename in wflow.get_outputs(task):
                if filename in files:
                    logger.info("found output from failed task: {0}".format(filename))
                    stats[wflow.label][0] += 1
                    delete.append(filename)

        for task, task_type in store.merged_tasks(wflow.label):
            for _, filename in wflow.get_outputs(task):
                if filename in files:
                    logger.info("found output from intermediate merged task: {0}".format(filename))
                    stats[wflow.label][1] += 1
                    delete.append(filename)

        if cleaned:
            for w in wflow.dependents:
                if not w.cleanup_input:
                    continue
                if store.unfinished_units(w.label) == 0:
                    for fn in files:
                        logger.warning('found output from tasks that should have been cleaned up: {}'.format(fn))
                    stats[wflow.label][2] += len(files)
                    delete.extend(files)
            else:
                logger.error("can't validate workflow {}, as its dependents have not completed and cleaned it up".format(wflow.label))
        else:
            for task, task_type in store.successful_tasks(wflow.label):
                for _, filename in wflow.get_outputs(task):
                    if filename not in files:
                        missing.append(task)
                        logger.warning('output file is missing for {0}'.format(task))

        return delete, missing
Exemplo n.º 6
0
def flatten(files, matches=None):
    """Flatten a list of directories or files to a single list of files.

    Parameters
    ----------
        files : str or list
            A list of paths to expand. Can also be a string containing a path.
        matches : list
            A list of patterns to match files against. Only successfully
            matched files will be returned.

    Returns
    -------
        files : list
            A list of files found in the paths passed in the input
            parameter `files`, optionally matching the extensions in
            `exts`.
    """
    def matchfn(fn):
        base = os.path.basename(fn)
        for m in matches:
            if fnmatch.fnmatch(base, m):
                return True
        return False

    res = []
    if not isinstance(files, list):
        files = [files]
    for entry in files:
        entry = os.path.expanduser(entry)
        if fs.isdir(entry):
            res.extend(fs.ls(entry))
        elif fs.isfile(entry):
            res.append(entry)
    if matches:
        return [fn for fn in res if matchfn(fn)]
    return res
Exemplo n.º 7
0
def flatten(files, matches=None):
    """Flatten a list of directories or files to a single list of files.

    Parameters
    ----------
        files : str or list
            A list of paths to expand. Can also be a string containing a path.
        matches : list
            A list of patterns to match files against. Only successfully
            matched files will be returned.

    Returns
    -------
        files : list
            A list of files found in the paths passed in the input
            parameter `files`, optionally matching the extensions in
            `exts`.
    """
    def matchfn(fn):
        base = os.path.basename(fn)
        for m in matches:
            if fnmatch.fnmatch(base, m):
                return True
        return False
    res = []
    if not isinstance(files, list):
        files = [files]
    for entry in files:
        entry = os.path.expanduser(entry)
        if fs.isdir(entry):
            res.extend(fs.ls(entry))
        elif fs.isfile(entry):
            res.append(entry)
    if matches:
        return [fn for fn in res if matchfn(fn)]
    return res