Exemplo n.º 1
0
def test_multiplex_deplex_num_reads_tsv(expected_fixture, dir_tmp,
                                        multiplex_name):
    """
    Test :py:const:`riboviz.tools.demultiplex_fastq`
    :py:const:`riboviz.demultiplex_fastq.NUM_READS_FILE` for
    equality. See :py:func:`compare_tsv_files`.

    Skipped by ``pytest`` automatically if ``multiplex_name``
    fixture is not injected.

    :param expected_fixture: Expected data directory
    :type expected_fixture: str or unicode
    :param dir_tmp: Temporary directory
    :type dir_tmp: str or unicode
    :param multiplex_name: Multiplexed FASTQ file name prefix
    :type multiplex_name: str or unicode
    """
    deplex_dir = workflow_files.DEPLEX_DIR_FORMAT.format(multiplex_name)
    dir_tmp_name = os.path.basename(os.path.normpath(dir_tmp))
    # Override default TSV comparisons as some columns have string values.
    utils.equal_tsv(os.path.join(expected_fixture, dir_tmp_name, deplex_dir,
                                 demultiplex_fastq.NUM_READS_FILE),
                    os.path.join(dir_tmp, deplex_dir,
                                 demultiplex_fastq.NUM_READS_FILE),
                    ignore_row_order=True,
                    na_to_empty_str=True)
Exemplo n.º 2
0
def test_demultiplex_gz(tmp_dir, file_format):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex` using
    GZIPped FASTQ files.

    Each ``file_format`` consists of a FASTQ GZIP file name format and
    the corresponding non-GZIP FASTQ file name format.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    :param file_format: File name format
    :type file_format: tuple(str or unicode, str or unicode)
    """
    gz_fmt, fmt = file_format
    tmp_fastq_file = os.path.join(tmp_dir,
                                  gz_fmt.format("test_multiplex"))
    with open(os.path.join(riboviz.test.SIMDATA_DIR,
                           "multiplex.fastq"), "rb") as fr:
        with gzip.open(tmp_fastq_file, "wb") as fw:
            shutil.copyfileobj(fr, fw)
    demultiplex_fastq.demultiplex(
        os.path.join(riboviz.test.SIMDATA_DIR,
                     "multiplex_barcodes.tsv"),
        tmp_fastq_file,
        mismatches=2,
        out_dir=tmp_dir)

    actual_num_reads = os.path.join(
        tmp_dir,
        demultiplex_fastq.NUM_READS_FILE)
    expected_num_reads = os.path.join(
        riboviz.test.SIMDATA_DIR,
        "deplex",
        demultiplex_fastq.NUM_READS_FILE)
    utils.equal_tsv(expected_num_reads, actual_num_reads)
    for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]:
        # Actual data has extension matching lower-case version
        # of multiplexed file's extension.
        actual_fq_gz = os.path.join(tmp_dir,
                                    gz_fmt.lower().format(tag))
        actual_fq = os.path.join(tmp_dir,
                                 fmt.format(tag))
        # Simulated data always has a .fastq extension.
        expected_fq = os.path.join(riboviz.test.SIMDATA_DIR,
                                   "deplex",
                                   fastq.FASTQ_FORMAT.format(tag))
        # Decompress actual_fq_gz
        with gzip.open(actual_fq_gz, "rb") as fr:
            with open(actual_fq, "wb") as fw:
                shutil.copyfileobj(fr, fw)
        fastq.equal_fastq(expected_fq, actual_fq)
    # The definition of the simulated data means that Tag3 has no
    # matches, as Tag0|1|2 will match any barcodes first. Check
    # there is no Tag3-related output file.
    assert not os.path.exists(os.path.join(tmp_dir,
                                           gz_fmt.lower().format("Tag3")))
Exemplo n.º 3
0
def compare_files(file1, file2, compare_names=True):
    """
    Compare two files for equality. The following functions are used
    to compare each type of file:

    * ``bai``: :py:func:`riboviz.utils.equal_file_sizes`
    * ``bam``: :py:func:`riboviz.sam_bam.equal_bam`
    * ``bedgraph``: :py:func:`riboviz.bedgraph.equal_bedgraph`
    * ``fq``: :py:func:`riboviz.fastq.equal_fastq`
    * ``h5``: :py:func:`riboviz.h5.equal_h5`
    * ``ht2``: :py:func:`riboviz.utils.equal_file_sizes`
    * ``pdf``: :py:func:`riboviz.utils.equal_file_names`
    * ``sam``: :py:func:`riboviz.sam_bam.equal_sam`
    * ``tsv``: :py:func:`riboviz.utils.equal_tsv`

    :param file1: File name
    :type file1: str or unicode
    :param file2: File name
    :type file2: str or unicode
    :param compare_names: Compare file names?
    :type: bool
    :raise AssertionError: If one or other file does not exist, \
    is a directory or their contents differ
    """
    assert os.path.exists(file1), "Non-existent file: %s" % file1
    assert os.path.exists(file2), "Non-existent file: %s" % file2
    assert not os.path.isdir(file1), "Directory: %s" % file1
    assert not os.path.isdir(file2), "Directory: %s" % file2
    if compare_names:
        utils.equal_file_names(file1, file2)
    ext = utils.get_file_ext(file1)
    if ext.endswith(tuple(["pdf"])):
        utils.equal_file_names(file1, file2)
    elif ext.endswith(tuple([hisat2.HT2_EXT, sam_bam.BAI_EXT])):
        utils.equal_file_sizes(file1, file2)
    elif ext.endswith(tuple([h5.H5_EXT])):
        h5.equal_h5(file1, file2)
    elif ext.endswith(tuple([bedgraph.BEDGRAPH_EXT])):
        bedgraph.equal_bedgraph(file1, file2)
    elif ext.endswith(tuple([sam_bam.BAM_EXT])):
        sam_bam.equal_bam(file1, file2)
    elif ext.endswith(tuple([sam_bam.SAM_EXT])):
        sam_bam.equal_sam(file1, file2)
    elif ext.endswith(tuple(["tsv"])):
        utils.equal_tsv(file1, file2)
    elif ext.endswith(tuple(fastq.FASTQ_ALL_EXTS)):
        fastq.equal_fastq(file1, file2)
    else:
        assert False, "Unknown file type: " + ext
Exemplo n.º 4
0
def test_collate_orf_tpms_and_counts_tsv(expected_fixture, dir_out):
    """
    Test :py:const:`riboviz.workflow_r.COLLATE_TPMS_R` TSV files for
    equality. See :py:func:`riboviz.utils.equal_tsv`.

    :param expected_fixture: Expected data directory
    :type expected_fixture: str or unicode
    :param dir_out: Output directory
    :type dir_out: str or unicode
    """
    dir_out_name = os.path.basename(os.path.normpath(dir_out))
    # Override default TSV comparisons as some columns have string values.
    utils.equal_tsv(os.path.join(expected_fixture, dir_out_name,
                                 workflow_r.TPMS_ALL_CDS_ALL_SAMPLES_TSV),
                    os.path.join(dir_out,
                                 workflow_r.TPMS_ALL_CDS_ALL_SAMPLES_TSV),
                    ignore_row_order=True,
                    na_to_empty_str=True)
Exemplo n.º 5
0
def test_deplex_num_reads(configuration_module):
    """
    Test that the number of reads summary, produced during
    demultiplexing, is as expected.

    :param configuration_module: temporary configuration and \
    configuration file
    :type configuration_module: tuple(dict, str or unicode)
    """
    config, _ = configuration_module
    actual_dir = os.path.join(
        config[params.TMP_DIR],
        workflow_files.DEPLEX_DIR_FORMAT.format(
            "multiplex_umi_barcode_adaptor"))
    actual_output = os.path.join(actual_dir, demultiplex_fastq.NUM_READS_FILE)
    expected_output = os.path.join(riboviz.test.SIMDATA_DIR, "deplex",
                                   demultiplex_fastq.NUM_READS_FILE)
    utils.equal_tsv(expected_output, actual_output)
Exemplo n.º 6
0
def test_demultiplex(tmp_dir, file_format):
    """
    Test :py:func:`riboviz.demultiplex_fastq.demultiplex`.

    :param tmp_dir: Temporary directory
    :type tmp_dir: str or unicode
    :param file_format: FASTQ file format
    :type file_format: str or unicode
    """
    tmp_fastq_file = os.path.join(tmp_dir,
                                  file_format.format("test_multiplex"))
    shutil.copyfile(os.path.join(riboviz.test.SIMDATA_DIR,
                                 "multiplex.fastq"),
                    tmp_fastq_file)
    demultiplex_fastq.demultiplex(
        os.path.join(riboviz.test.SIMDATA_DIR,
                     "multiplex_barcodes.tsv"),
        tmp_fastq_file,
        mismatches=2,
        out_dir=tmp_dir)

    actual_num_reads = os.path.join(
        tmp_dir,
        demultiplex_fastq.NUM_READS_FILE)
    expected_num_reads = os.path.join(
        riboviz.test.SIMDATA_DIR,
        "deplex",
        demultiplex_fastq.NUM_READS_FILE)
    utils.equal_tsv(expected_num_reads, actual_num_reads)
    for tag in ["Tag0", "Tag1", "Tag2", "Unassigned"]:
        # Actual data has extension matching lower-case version
        # of multiplexed file's extension.
        actual_fq = os.path.join(tmp_dir,
                                 file_format.lower().format(tag))
        # Simulated data always has a .fastq extension.
        expected_fq = os.path.join(riboviz.test.SIMDATA_DIR,
                                   "deplex",
                                   fastq.FASTQ_FORMAT.format(tag))
        fastq.equal_fastq(expected_fq, actual_fq)
    # The definition of the simulated data means that Tag3 has no
    # matches, as Tag0|1|2 will match any barcodes first. Check
    # there is no Tag3-related output file.
    assert not os.path.exists(os.path.join(tmp_dir,
                                           file_format.lower().format("Tag3")))
Exemplo n.º 7
0
def compare_tsv_files(expected_fixture, directory, subdirectory, file_name):
    """
    Test TSV files for equality. See
    :py:func:`riboviz.utils.equal_tsv`.

    :param expected_fixture: Expected data directory
    :type expected_fixture: str or unicode
    :param directory: Directory
    :type directory: str or unicode
    :param subdirectory: Subdirectory
    :type subdirectory: str or unicode
    :param file_name: file name
    :type file_name: str or unicode
    """
    directory_name = os.path.basename(os.path.normpath(directory))
    utils.equal_tsv(
        os.path.join(expected_fixture, directory_name, subdirectory,
                     file_name),
        os.path.join(directory, subdirectory, file_name))