def splitBad(self, line, lines): # type: (NewLine, NewLineStorage) -> None s = AlignmentStorage() s.addAll(al for al in line.read_alignments if not al.contradictingRTC()) segs = list(s.filterByCoverage(mi=params.reliable_coverage, k=params.k)) # type: List[Segment] segs = filter(lambda seg: len(seg) >= params.k, segs) if len(segs) == 0: sys.stdout.warn("No part of a unique edge is covered by reads", line.id) lines.removeLine(line) return if len(segs) == 1 and len(segs[0]) > len(line) - 10: sys.stdout.info("Whole line", line.id, "is covered by reads") return sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts") sys.stdout.trace( segs) next_left = segs[-1].left line.cutRight(segs[-1].right) for seg in list(segs)[-2::-1]: if next_left < seg.right: line, new_line = lines.splitLine(line.segment(next_left, seg.right)) else: line, new_line = lines.splitLine(line.segment(next_left, next_left)) line.cutRight(seg.right) next_left = seg.left line.rc.cutRight(len(segs[0]))
def splitBad(self, lines): # type: (NewLineStorage) -> None all_covs = [] for line in lines: for rec in line.read_alignments.calculateCoverage(params.k): all_covs.append(rec) median = self.medianCoverage(all_covs) sys.stdout.info("Median coverage determined as", median) lids = [line.id for line in lines.unique()] for line_id in lids: line = lines[line_id] s = AlignmentStorage() s.addAll(al for al in line.read_alignments if not al.contradictingRTC()) segs = SegmentStorage().addAll(s.filterByCoverage(mi=params.reliable_coverage, ma=median * 7 /4, k=params.k)) segs.mergeSegments(max(params.k - params.bad_end_length * 2, params.k / 2)) if len(segs) == 0: sys.stdout.warn("No part of a unique edge is covered by reads", line.id) lines.removeLine(line) continue if len(segs) == 1 and len(segs[0]) > len(line) - 10: sys.stdout.info("Whole line", line.id, "is covered by reads") continue sys.stdout.info( "Line", line.id, "has poorly covered regions. Splitting into", len(segs), "parts") sys.stdout.trace(segs) next_left = segs[-1].left line.cutRight(segs[-1].right) for seg in list(segs)[-2::-1]: if next_left < seg.right: line, new_line = lines.splitLine(line.segment(next_left, seg.right)) else: line, new_line = lines.splitLine(line.segment(next_left, next_left)) line.cutRight(seg.right) next_left = seg.left line.rc.cutRight(len(segs[0]))
class Disjointig(Contig): def __init__(self, seq, id, rc=None): # type: (str, str, Optional[Disjointig]) -> None self.seq = seq self.id = id if rc is None: self.read_alignments = AlignmentStorage() # type: AlignmentStorage rc = Disjointig(basic.RC(seq), basic.Reverse(id), self) # type: Disjointig self.rc = rc else: self.rc = rc self.read_alignments = self.rc.read_alignments.rc # type: AlignmentStorage Contig.__init__(self, seq, id, rc) self.rc = rc # type:Disjointig def addAlignments(self, als): # type: (Iterable[AlignmentPiece]) -> None self.read_alignments.addAll(als) def addAlignment(self, al): # type: (AlignmentPiece) -> None self.read_alignments.add(al) def getAlignmentsTo(self, seg): # type: (Segment) -> Generator[AlignmentPiece] return self.read_alignments.getAlignmentsTo(seg) def allInter(self, seg, min_inter=1): # type: (Segment, int) -> Generator[AlignmentPiece] return self.read_alignments.allInter(seg, min_inter) def save(self, handler): # type: (TokenWriter) -> None handler.writeTokenLine(self.id) handler.writeTokenLine(self.seq) self.read_alignments.save(handler) def loadDisjointig(self, handler, reads): # type: (TokenReader, ReadCollection) -> None self.id = handler.readToken() self.rc.id = basic.Reverse(self.id) seq = handler.readToken() self.read_alignments.load(handler, reads, self)
def testManual(self): contig1 = Contig("ACGTACGTACGT", "from") contig2 = Contig("ACGTACGTACGT", "to") al1 = AlignmentPiece.Identical(contig1.segment(0, 4), contig2.segment(0, 4)) al2 = AlignmentPiece.Identical(contig1.segment(0, 4), contig2.segment(4, 8)) al3 = AlignmentPiece.Identical(contig1.segment(4, 8), contig2.segment(8, 12)) storage = AlignmentStorage() storage.addAll([al1, al2, al3]) assert str( list(storage) ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]" assert str( list(storage.rc) ) == "[(-from[4:12-4]->-to[0:4]:1.000), (-from[8:12-0]->-to[4:12-4]:1.000), (-from[8:12-0]->-to[8:12-0]:1.000)]" assert str(list(storage.calculateCoverage())) == "[(to[0:12-0], 1)]" assert str(list(storage.filterByCoverage(0, 1))) == "[]" assert str(list(storage.filterByCoverage(1, 2))) == "[to[0:12-0]]" assert str(list(storage.filterByCoverage(2))) == "[]" storage.addAndMergeRight(al3) assert str( list(storage) ) == "[(from[0:4]->to[0:4]:1.000), (from[0:4]->to[4:12-4]:1.000), (from[4:12-4]->to[8:12-0]:1.000)]" al4 = AlignmentPiece.Identical(contig1.segment(2, 8), contig2.segment(2, 8)) al5 = AlignmentPiece.Identical(contig1.segment(4, 10), contig2.segment(4, 10)) storage.addAll([al4, al5]) assert str( list(storage.calculateCoverage()) ) == "[(to[0:2], 1), (to[2:4], 2), (to[4:12-4], 3), (to[8:12-2], 2), (to[10:12-0], 1)]" assert str(list(storage.filterByCoverage( 2, 3))) == "[to[2:4], to[8:12-2]]" assert str(list(storage.filterByCoverage(2))) == "[to[2:12-2]]" assert str( list(storage.getAlignmentsTo(contig2.segment(2, 3))) ) == "[(from[0:4]->to[0:4]:1.000), (from[2:12-4]->to[2:12-4]:1.000)]" assert str(list(storage.getAlignmentsTo(contig2.segment( 2, 6)))) == "[(from[2:12-4]->to[2:12-4]:1.000)]"