Пример #1
0
def _get_common_parameters(version):
    global _DEPRECATION_WARNING_PRINTED

    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    try:
        if not _DEPRECATION_WARNING_PRINTED and version_check.version < (2, 0):
            import pypeline.ui as ui
            ui.print_warn("\nWARNING: AdapterRemoval v1.5.x is deprecated;")
            ui.print_warn("         Upgrading to 2.1.x is strongly adviced!\n")
            ui.print_warn(
                "         Download the newest version of AdapterRemoval at ")
            ui.print_warn(
                "         https://github.com/MikkelSchubert/adapterremoval\n")

            _DEPRECATION_WARNING_PRINTED = True
    except versions.VersionRequirementError:
        pass

    return cmd
Пример #2
0
    def __init__(self, parameters):
        self._version = parameters.version
        self._basename = parameters.basename
        if len(parameters.input_files_1) != len(parameters.input_files_2):
            raise CmdError("Number of mate 1 files differ from mate 2 files: %i != %i" \
                               % (len(parameters.input_files_1),
                                  len(parameters.input_files_2)))

        zcat_pair_1 = _build_unicat_command(parameters.input_files_1,
                                            "uncompressed_input_1")
        zcat_pair_2 = _build_unicat_command(parameters.input_files_2,
                                            "uncompressed_input_2")
        zip_pair_1 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair1.truncated")
        zip_pair_2 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair2.truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        commands = [adapterrm, zip_pair_1, zip_pair_2]
        if parameters.version == VERSION_15:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".collapsed")
            zip_aln_trunc = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".collapsed.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.truncated")
            commands += [zip_aln, zip_aln_trunc, zip_unaligned]
        else:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".singleton.aln.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.unaln.truncated")
            commands += [zip_aln, zip_unaligned]
        commands += [zip_discarded, zcat_pair_1, zcat_pair_2]

        # Opening of pipes block, so the order of these commands is dependent upon
        # the order of file-opens in atomiccmd and the the programs themselves.
        commands = ParallelCmds(commands)

        description  = "<PE_AdapterRM: %s -> '%s.*'>" \
            % (fileutils.describe_files(parameters.input_files_1).replace("file", "pair"),
               parameters.output_prefix)

        CommandNode.__init__(self,
                             command=commands,
                             description=description,
                             dependencies=parameters.dependencies)
Пример #3
0
    def __init__(self, commands):
        self._ready = False

        commands = safe_coerce_to_tuple(commands)
        for command in commands:
            if not isinstance(command, (AtomicCmd, _CommandSet)):
                raise CmdError(
                    "ParallelCmds must only contain AtomicCmds or other ParallelCmds!"
                )
        _CommandSet.__init__(self, commands)
Пример #4
0
def _are_fastq_checks_required(version):
    if version == VERSION_14:
        return True
    elif version == VERSION_15:
        try:
            return _VERSION_15_CHECK.version < (2, 0)
        except versions.VersionRequirementError:
            return True
    else:
        raise CmdError("Unknown version: %s" % version)
Пример #5
0
def _build_zip_command(output_format, prefix, name, output=None):
    if output_format == "bz2":
        command, ext = "bzip2", ".bz2"
    elif output_format == "gz":
        command, ext = "gzip", ".gz"
    else:
        raise CmdError("Invalid output-format (%s), please select 'gz' or 'bz2'" \
                       % repr(output_format))

    basename = os.path.basename(prefix)
    return AtomicCmd([command, "-c"],
                     TEMP_IN_STDIN=basename + name,
                     OUT_STDOUT=prefix + (output or name) + ext)
Пример #6
0
def _build_zip_command(output_format, prefix, name, output=None):
    if output_format not in ("gz", "bz2"):
        message = "Invalid output-format (%r), please select 'gz' or 'bz2'"
        raise CmdError(message % (output_format, ))

    basename = os.path.basename(prefix)
    compress = factory.new("zip")
    compress.set_option("--format", output_format)
    compress.add_value("%(TEMP_IN_PIPE)s")
    compress.set_kwargs(TEMP_IN_PIPE=basename + name,
                        OUT_STDOUT=prefix + (output or name) + "." +
                        output_format)

    return compress.finalize()
Пример #7
0
def _get_common_parameters(version):
    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check)

    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)

    return cmd
Пример #8
0
    def _validate_commands(self):
        if len(self._commands) != len(set(self._commands)):
            raise ValueError("Same command included multiple times in %s" \
                             % (self.__class__.__name__,))

        filenames = collections.defaultdict(int)
        for command in self._commands:
            for filename in command.expected_temp_files:
                filenames[filename] += 1
            for filename in command.optional_temp_files:
                filenames[filename] += 1

        clobbered = [
            filename for (filename, count) in filenames.items() if (count > 1)
        ]
        if any(clobbered):
            raise CmdError("Commands clobber each others' files: %s" %
                           (", ".join(clobbered), ))
Пример #9
0
def _get_common_parameters(version):
    if version == VERSION_14:
        version_check = _VERSION_14_CHECK
    elif version == VERSION_15:
        version_check = _VERSION_15_CHECK
    else:
        raise CmdError("Unknown version: %s" % version)

    cmd = AtomicCmdBuilder("AdapterRemoval", CHECK_VERSION=version_check)

    # Allow 1/3 mismatches in the aligned region
    cmd.set_option("--mm", 3, fixed=False)
    # Minimum length of trimmed reads
    cmd.set_option("--minlength", 25, fixed=False)
    # Trim Ns at read ends
    cmd.set_option("--trimns", fixed=False)
    # Trim low quality scores
    cmd.set_option("--trimqualities", fixed=False)
    # Offset of quality scores
    cmd.set_option("--qualitybase", 33, fixed=False)

    return cmd
Пример #10
0
 def stdout(self):
     raise CmdError("%s does not implement property 'stdout'!" \
                    % (self.__class__.__name__,))
Пример #11
0
    def __init__(self, commands):
        self._commands = safe_coerce_to_tuple(commands)
        if not self._commands:
            raise CmdError("Empty list passed to command set")

        self._validate_commands()
Пример #12
0
    def __init__(self, parameters):
        self._quality_offset = parameters.quality_offset
        self._version = parameters.version
        self._basename = parameters.basename
        self._collapse = parameters.collapse
        self._check_fastqs = _are_fastq_checks_required(parameters.version)
        if len(parameters.input_files_1) != len(parameters.input_files_2):
            raise CmdError(
                "Number of mate 1 files differ from mate 2 files: "
                "%i != %i" %
                (len(parameters.input_files_1), len(parameters.input_files_2)))

        zcat_pair_1 = _build_cat_command(parameters.input_files_1,
                                         "uncompressed_input_1")
        zcat_pair_2 = _build_cat_command(parameters.input_files_2,
                                         "uncompressed_input_2")
        zip_pair_1 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair1.truncated")
        zip_pair_2 = _build_zip_command(parameters.output_format,
                                        parameters.output_prefix,
                                        ".pair2.truncated")
        zip_discarded = _build_zip_command(parameters.output_format,
                                           parameters.output_prefix,
                                           ".discarded")
        adapterrm = parameters.command.finalize()

        commands = [adapterrm, zip_pair_1, zip_pair_2]
        if parameters.version == VERSION_15:
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.truncated")
            if parameters.collapse:
                zip_aln = _build_zip_command(parameters.output_format,
                                             parameters.output_prefix,
                                             ".collapsed")
                zip_aln_trunc = _build_zip_command(parameters.output_format,
                                                   parameters.output_prefix,
                                                   ".collapsed.truncated")
                commands += [zip_aln, zip_aln_trunc, zip_unaligned]
            else:
                commands += [zip_unaligned]
        else:
            zip_aln = _build_zip_command(parameters.output_format,
                                         parameters.output_prefix,
                                         ".singleton.aln.truncated")
            zip_unaligned = _build_zip_command(parameters.output_format,
                                               parameters.output_prefix,
                                               ".singleton.unaln.truncated")
            commands += [zip_aln, zip_unaligned]
        commands += [zip_discarded, zcat_pair_1, zcat_pair_2]
        commands = ParallelCmds(commands)

        description  = "<AdapterRM (PE): %s -> '%s.*'>" \
            % (fileutils.describe_paired_files(parameters.input_files_1,
                                               parameters.input_files_2),
               parameters.output_prefix)

        CommandNode.__init__(self,
                             command=commands,
                             description=description,
                             dependencies=parameters.dependencies)