def test_sheet_germline_inconsistent_pedigree(
    tsv_sheet_germline_inconsistent_pedigree,
    tsv_sheet_germline_trio_plus,
):
    """Tests Germline sheet for sheet with conflict information for joint field and row."""
    # Sanity check
    shortcuts.GermlineCaseSheet(
        sheet=io_tsv.read_germline_tsv_sheet(tsv_sheet_germline_trio_plus),
        join_by_field='familyId')
    # Expect error as each member of the pedigree has its own `familyId` instead of a common one
    with pytest.raises(InconsistentPedigreeException):
        shortcuts.GermlineCaseSheet(sheet=io_tsv.read_germline_tsv_sheet(
            tsv_sheet_germline_inconsistent_pedigree),
                                    join_by_field='familyId')
Пример #2
0
def yield_ngs_library_names(sheet,
                            min_batch=None,
                            batch_key="batchNo",
                            pedigree_field=None):
    """Yield DNA NGS library names for indexes.

    :param sheet: Sample sheet.
    :type sheet: biomedsheets.models.Sheet

    :param min_batch: Minimum batch number to be extracted from the sheet. All samples in batches below this values
    will be skipped.
    :type min_batch: int

    :param batch_key: Batch number key in sheet. Default: 'batchNo'.
    :type batch_key: str

    :param pedigree_field: Field that should be used to define a pedigree. If none is provided pedigree will be defined
    based on information in the sample sheets rows alone.
    """
    kwargs = {}
    if pedigree_field:
        kwargs = {"join_by_field": pedigree_field}
    shortcut_sheet = shortcuts.GermlineCaseSheet(sheet, **kwargs)
    for pedigree in shortcut_sheet.cohort.pedigrees:
        max_batch = max(
            donor.extra_infos.get(batch_key, 0) for donor in pedigree.donors)
        if (min_batch is None
                or min_batch <= max_batch) and pedigree.index.dna_ngs_library:
            yield pedigree.index.dna_ngs_library.name
Пример #3
0
 def __init__(self, args):
     #: Command line arguments.
     self.args = args
     #: Raw sample sheet.
     self.sheets = load_sheets_tsv(self.args)
     #: Shortcut sample sheet.
     self.shortcut_sheets = [shortcuts.GermlineCaseSheet(sheet) for sheet in self.sheets]
Пример #4
0
    def yield_ngs_library_names(self,
                                sheet,
                                min_batch=None,
                                max_batch=None,
                                batch_key="batchNo",
                                family_key="familyId"):
        """Yield index only NGS library names from sheet.

        When ``min_batch`` is given then only the donors for which the ``extra_infos[batch_key]`` is greater than
        ``min_batch`` will be used.

        This function can be overloaded, for example to only consider the indexes.

        :param sheet: Sample sheet.
        :type sheet: biomedsheets.models.Sheet

        :param min_batch: Minimum batch number to be extracted from the sheet. All samples in batches below this values
        will be skipped.
        :type min_batch: int

        :param max_batch: Maximum batch number to be extracted from the sheet. All samples in batches above this values
        will be skipped.
        :type max_batch: int

        :param batch_key: Batch number key in sheet. Default: 'batchNo'.
        :type batch_key: str

        :param family_key: Family identifier key. Default: 'familyId'.
        :type family_key: str
        """
        family_max_batch = self._build_family_max_batch(
            sheet, batch_key, family_key)

        shortcut_sheet = shortcuts.GermlineCaseSheet(sheet)
        for pedigree in shortcut_sheet.cohort.pedigrees:
            donor = pedigree.index
            if min_batch is not None:
                batch = self._batch_of(donor, family_max_batch, batch_key,
                                       family_key)
                if batch < min_batch:
                    logger.debug(
                        "Skipping donor %s because %s = %d < min_batch = %d",
                        donor.name,
                        batch_key,
                        donor.extra_infos[batch_key],
                        min_batch,
                    )
                    continue
            if max_batch is not None:
                if batch > max_batch:
                    logger.debug(
                        "Skipping donor %s because %s = %d > max_batch = %d",
                        donor.name,
                        batch_key,
                        donor.extra_infos[batch_key],
                        max_batch,
                    )
                    continue
            logger.debug("Processing NGS library for donor %s", donor.name)
            yield donor.dna_ngs_library.name
Пример #5
0
def yield_ngs_library_names(sheet, min_batch=None, batch_key="batchNo"):
    shortcut_sheet = shortcuts.GermlineCaseSheet(sheet)
    for pedigree in shortcut_sheet.cohort.pedigrees:
        donor = pedigree.index
        if min_batch is not None and batch_key in donor.extra_infos:
            if min_batch > donor.extra_infos[batch_key]:
                logger.debug(
                    "Skipping donor %s because %s = %d < min_batch = %d",
                    donor.name,
                    donor.extra_infos[batch_key],
                    batch_key,
                    min_batch,
                )
                continue
        yield donor.dna_ngs_library.name
Пример #6
0
def test_load_sheet_tsv():
    """Tests load_sheet_tsv()"""
    # Define expected
    expected_ngs_library_name_list = [
        "P001-N1-DNA1-WGS1", "P004-N1-DNA1-WGS1", "P007-N1-DNA1-WGS1"
    ]

    # Define input
    sheet_path = pathlib.Path(
        __file__).resolve().parent / "data" / "germline_sheet.tsv"

    # Get actual
    sheet = load_sheet_tsv(path_tsv=sheet_path)
    assert isinstance(sheet, models.Sheet)

    # Convert to manageable format
    shortcut_sheet = shortcuts.GermlineCaseSheet(sheet)
    for pedigree in shortcut_sheet.cohort.pedigrees:
        assert pedigree.index.dna_ngs_library.name in expected_ngs_library_name_list
Пример #7
0
    def yield_ngs_library_names(self,
                                sheet,
                                min_batch=None,
                                batch_key="batchNo",
                                family_key="familyId"):
        family_max_batch = self._build_family_max_batch(
            sheet, batch_key, family_key)

        shortcut_sheet = shortcuts.GermlineCaseSheet(sheet)
        for pedigree in shortcut_sheet.cohort.pedigrees:
            donor = pedigree.index
            if min_batch is not None:
                batch = self._batch_of(donor, family_max_batch, batch_key,
                                       family_key)
                if batch < min_batch:
                    logger.debug(
                        "Skipping donor %s because %s = %d < min_batch = %d",
                        donor.name,
                        batch_key,
                        donor.extra_infos[batch_key],
                        min_batch,
                    )
                    continue
            yield donor.dna_ngs_library.name
def sheet_germline(tsv_sheet_germline):
    """Return ``Sheet`` instance for the germline example"""
    return shortcuts.GermlineCaseSheet(
        io_tsv.read_germline_tsv_sheet(tsv_sheet_germline))
def test_sheet_germline_trio_plus_exception(tsv_sheet_germline_trio_plus):
    """Tests UndefinedFieldException raise while creating GermlineCaseSheet"""
    with pytest.raises(UndefinedFieldException):
        shortcuts.GermlineCaseSheet(
            sheet=io_tsv.read_germline_tsv_sheet(tsv_sheet_germline_trio_plus),
            join_by_field='undefined_field')
def sheet_germline_only_parent_samples(tsv_sheet_germline_only_parent_samples):
    """Return ``Sheet`` instance for the germline example where only parents have samples"""
    return shortcuts.GermlineCaseSheet(
        io_tsv.read_germline_tsv_sheet(
            tsv_sheet_germline_only_parent_samples,
            naming_scheme=naming.NAMING_ONLY_SECONDARY_ID))
def sheet_germline_two_test_samples(tsv_sheet_germline_two_test_samples):
    """Return ``Sheet`` instance for the germline example with two test_samples"""
    return shortcuts.GermlineCaseSheet(
        io_tsv.read_germline_tsv_sheet(
            tsv_sheet_germline_two_test_samples,
            naming_scheme=naming.NAMING_ONLY_SECONDARY_ID))
def sheet_germline_multiple_trio_plus(tsv_sheet_germline_multiple_trio_plus):
    """Return ``Sheet`` instance for the germline multiple trio plus example"""
    return shortcuts.GermlineCaseSheet(sheet=io_tsv.read_germline_tsv_sheet(
        tsv_sheet_germline_multiple_trio_plus),
                                       join_by_field='familyId')