Exemplo n.º 1
0
    def test_make_read(self):
        bases = 'ACG'
        quals = [30, 40, 50]
        cigar = '3M'
        mapq = 42
        chrom = 'chr10'
        start = 123
        name = 'myname'
        read = test_utils.make_read(bases,
                                    quals=quals,
                                    cigar=cigar,
                                    mapq=mapq,
                                    chrom=chrom,
                                    start=start,
                                    name=name)

        self.assertEqual(read.aligned_sequence, bases)
        self.assertEqual(read.aligned_quality, quals)
        self.assertEqual(list(read.alignment.cigar), [
            cigar_pb2.CigarUnit(operation_length=3,
                                operation=cigar_pb2.CigarUnit.ALIGNMENT_MATCH)
        ])
        self.assertEqual(read.alignment.mapping_quality, mapq)
        self.assertEqual(read.alignment.position.reference_name, chrom)
        self.assertEqual(read.alignment.position.position, start)
        self.assertEqual(read.fragment_name, name)
Exemplo n.º 2
0
 def _add_cigar_unit(self, cigar, cigar_op, cigar_len):
     if cigar_len <= 0:
         return
     if cigar and cigar[-1].operation == cigar_op:
         cigar[-1].operation_length += cigar_len
     else:
         cigar_unit = cigar_pb2.CigarUnit(operation=cigar_op,
                                          operation_length=cigar_len)
         cigar.extend([cigar_unit])
Exemplo n.º 3
0
def to_cigar_unit(source):
    """Creates a cigar_pb2 CigarUnit from source.

  This function attempts to convert source into a CigarUnit protobuf. If
  source is a string, it must be a single CIGAR string specification like
  '12M'. If source is a tuple or a list, must have exactly two elements
  (operation_length, opstr). operation_length can be a string or int, and must
  be >= 1. opstr should be a single character CIGAR specification (e.g., 'M').
  If source is already a CigarUnit, it is just passed through unmodified.

  Args:
    source: many types allowed. The object we want to convert to a CigarUnit
      proto.

  Returns:
    CigarUnit proto with operation_length and operation set to values from
      source.

  Raises:
    ValueError: if source cannot be converted or is malformed.
  """
    try:
        if isinstance(source, cigar_pb2.CigarUnit):
            return source
        elif isinstance(source, six.string_types):
            l, op = source[:-1], source[-1]
        elif isinstance(source, (tuple, list)):
            l, op = source
        else:
            raise ValueError('Unexpected source', source)

        if isinstance(op, six.string_types):
            op = CHAR_TO_CIGAR_OPS[op]
        l = int(l)
        if l < 1:
            raise ValueError('Length must be >= 1', l)
        return cigar_pb2.CigarUnit(operation=op, operation_length=int(l))
    except (KeyError, IndexError):
        raise ValueError(
            'Failed to convert {} into a CigarUnit'.format(source))
Exemplo n.º 4
0
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import itertools

from absl.testing import absltest
from absl.testing import parameterized

from third_party.nucleus.protos import cigar_pb2
from third_party.nucleus.util import cigar

_CIGAR_TUPLES_AND_CIGAR_UNITS = [
    ((1, 'M'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.ALIGNMENT_MATCH, operation_length=1)),
    ((2, 'I'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.INSERT, operation_length=2)),
    ((3, 'D'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.DELETE, operation_length=3)),
    ((4, 'N'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.SKIP, operation_length=4)),
    ((5, 'S'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.CLIP_SOFT, operation_length=5)),
    ((6, 'H'),
     cigar_pb2.CigarUnit(
         operation=cigar_pb2.CigarUnit.CLIP_HARD, operation_length=6)),
Exemplo n.º 5
0
def trim_cigar(cigar, ref_trim, ref_length):
    """Trim a cigar string to a certain reference length.

  Args:
    cigar: list of `nucleus.protos.CigarUnit`s of the original read alignment.
    ref_trim: integer. Number of reference bases to trim off the beginning of
      the read.
    ref_length: integer. Number of reference bases to cover with the read, the
      middle part that is not trimmed from the start or end of the read.

  Returns:
    new_cigar: list of `nucleus.protos.CigarUnit`s of the final read alignment,
        after the left and/or right have been trimmed off.
    read_trim: The number of bases of the read that are trimmed off.
    new_read_length: The number of bases of the read that remain after trimming.
        This is different from the final number of reference bases; for example,
        an insertion makes the read longer without affecting the reference.
  """
    # First consume the ref until the trim is covered.
    trim_remaining = ref_trim
    # Then consume the ref until the ref_length is covered.
    ref_to_cover_remaining = ref_length
    read_trim = 0
    new_cigar = []
    new_read_length = 0
    for cigar_unit in cigar:
        c_operation_length = cigar_unit.operation_length
        # Each operation moves forward in the ref, the read, or both.
        advances_ref = cigar_unit.operation in cigar_utils.REF_ADVANCING_OPS
        advances_read = cigar_unit.operation in cigar_utils.READ_ADVANCING_OPS
        ref_step = c_operation_length if advances_ref else 0
        # First, use up each operation until the trimmed area is covered.
        if trim_remaining > 0:
            if ref_step <= trim_remaining:
                # Fully apply to the trim.
                trim_remaining -= ref_step
                read_trim += c_operation_length if advances_read else 0
                continue
            else:
                # Partially apply to finish the trim.
                ref_step -= trim_remaining
                read_trim += trim_remaining if advances_read else 0
                # If trim finishes here, the rest of the ref_step can apply to the
                # next stage and count towards covering the given ref window.
                c_operation_length = ref_step
                trim_remaining = 0

        # Once the trim is done, start applying cigar entries to covering the ref
        # window.
        if trim_remaining == 0:
            if ref_step <= ref_to_cover_remaining:
                # Fully apply to the window.
                new_cigar.append(
                    cigar_pb2.CigarUnit(operation=cigar_unit.operation,
                                        operation_length=c_operation_length))
                ref_to_cover_remaining -= ref_step
                new_read_length += c_operation_length if advances_read else 0
            else:
                # Partially apply to finish the window.
                c_operation_length = ref_to_cover_remaining
                new_cigar.append(
                    cigar_pb2.CigarUnit(operation=cigar_unit.operation,
                                        operation_length=c_operation_length))
                new_read_length += c_operation_length if advances_read else 0
                ref_to_cover_remaining = 0
                break

    return new_cigar, read_trim, new_read_length