def _get_node_description(name, algorithm, input_files_1, input_files_2 = (), prefix = None, threads = 1): threads_str = ("%i threads" % (threads,)) if (threads > 1) else None prefix_str = os.path.basename(prefix) if prefix else None if prefix_str.endswith(".fasta") or prefix_str.endswith(".fa"): prefix_str = prefix_str.rsplit(".", 1)[0] file_desc = describe_paired_files(input_files_1, input_files_2 or ()) info = ", ".join(filter(None, (prefix_str, algorithm, threads_str))) return "<%s (%s): %s>" % (name, info, file_desc)
def _get_node_description(name, algorithm, input_files_1, input_files_2=(), prefix=None, threads=1): threads_str = ("%i threads" % (threads, )) if (threads > 1) else None prefix_str = os.path.basename(prefix) if prefix else None if prefix_str.endswith(".fasta") or prefix_str.endswith(".fa"): prefix_str = prefix_str.rsplit(".", 1)[0] file_desc = describe_paired_files(input_files_1, input_files_2 or ()) info = ", ".join(filter(None, (prefix_str, algorithm, threads_str))) return "<%s (%s): %s>" % (name, info, file_desc)
def __init__(self, parameters): self._quality_offset = parameters.quality_offset self._version = parameters.version self._basename = parameters.basename self._collapse = parameters.collapse self._check_fastqs = _are_fastq_checks_required(parameters.version) if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError("Number of mate 1 files differ from mate 2 files: " "%i != %i" % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_cat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_cat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") if parameters.collapse: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: commands += [zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] commands = ParallelCmds(commands) description = "<AdapterRM (PE): %s -> '%s.*'>" \ % (fileutils.describe_paired_files(parameters.input_files_1, parameters.input_files_2), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)
def __init__(self, parameters): self._quality_offset = parameters.quality_offset self._version = parameters.version self._basename = parameters.basename self._collapse = parameters.collapse self._check_fastqs = _are_fastq_checks_required(parameters.version) if len(parameters.input_files_1) != len(parameters.input_files_2): raise CmdError( "Number of mate 1 files differ from mate 2 files: " "%i != %i" % (len(parameters.input_files_1), len(parameters.input_files_2))) zcat_pair_1 = _build_cat_command(parameters.input_files_1, "uncompressed_input_1") zcat_pair_2 = _build_cat_command(parameters.input_files_2, "uncompressed_input_2") zip_pair_1 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair1.truncated") zip_pair_2 = _build_zip_command(parameters.output_format, parameters.output_prefix, ".pair2.truncated") zip_discarded = _build_zip_command(parameters.output_format, parameters.output_prefix, ".discarded") adapterrm = parameters.command.finalize() commands = [adapterrm, zip_pair_1, zip_pair_2] if parameters.version == VERSION_15: zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.truncated") if parameters.collapse: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed") zip_aln_trunc = _build_zip_command(parameters.output_format, parameters.output_prefix, ".collapsed.truncated") commands += [zip_aln, zip_aln_trunc, zip_unaligned] else: commands += [zip_unaligned] else: zip_aln = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.aln.truncated") zip_unaligned = _build_zip_command(parameters.output_format, parameters.output_prefix, ".singleton.unaln.truncated") commands += [zip_aln, zip_unaligned] commands += [zip_discarded, zcat_pair_1, zcat_pair_2] commands = ParallelCmds(commands) description = "<AdapterRM (PE): %s -> '%s.*'>" \ % (fileutils.describe_paired_files(parameters.input_files_1, parameters.input_files_2), parameters.output_prefix) CommandNode.__init__(self, command=commands, description=description, dependencies=parameters.dependencies)
def test_describe_paired_files__different_path_and_files(): files_1 = ("foo/1_abc", "bar/2_def") files_2 = ("zed/3_ghi", "not/4_jkl") expected = "2 pair(s) of files" result = describe_paired_files(files_1, files_2) assert_equal(result, expected)
def test_describe_paired_files__same_path__different_file_lens(): files_1 = ("foo/1_a", "foo/2_de") files_2 = ("foo/3_g", "foo/4_jk") expected = "2 pair(s) of files in 'foo'" result = describe_paired_files(files_1, files_2) assert_equal(result, expected)
def test_describe_paired_files__same_path__similar_files__too_different(): files_1 = ("foo/1a_abc", "foo/1a_def") files_2 = ("foo/2b_ghi", "foo/2b_jkl") expected = "2 pair(s) of files in 'foo'" result = describe_paired_files(files_1, files_2) assert_equal(result, expected)
def test_describe_paired_files__same_path__similar_files__different_prefixes(): files_1 = ("foo/1_abc", "foo/1_def") files_2 = ("foo/2_ghi", "foo/2_jkl") expected = "'foo/[12]_???'" result = describe_paired_files(files_1, files_2) assert_equal(result, expected)
def test_describe_paired_files__identical_files(): fpath = "/var/foo/bar" ftuple = (fpath,) assert_equal(describe_paired_files(ftuple, ftuple), repr(fpath))
def test_describe_paired_files__single_file(): fpath = "/var/foo/bar" assert_equal(describe_paired_files((fpath,), ()), repr(fpath))