def GetPTDPrimerInfo(self, primer_info): #{ # [event_id, chimera.id, chimera.left, chimera.right, gene.id, # gene.left, gene.right] # event_id event_id = primer_info.EventID() # contig_id contig = PrimerSeqCls(primer_info.ctg_id) # contig_left contig.left = min(primer_info.c2t_aligns[0].ctg_end, primer_info.c2t_aligns[1].ctg_start) # contig_right contig.right = max(primer_info.c2t_aligns[0].ctg_end, primer_info.c2t_aligns[1].ctg_start) # gene_id gene = PrimerSeqCls(primer_info.c2t_aligns[0].transcript) # gene.left gene.left = min(primer_info.c2t_aligns[0].transcript_end, primer_info.c2t_aligns[1].transcript_start) # gene.right gene.right = max(primer_info.c2t_aligns[0].transcript_end, primer_info.c2t_aligns[1].transcript_start) # dup_length dup_length = (gene.right - gene.left) + 1 # construct inferred chimera sequence ctg_overlap = primer_info.C2TAlignOverlap() extra = "" if (0 > ctg_overlap): #{ extra = self.contigs[primer_info.ctg_id][contig_left+1:contig_right] #} end if if ("-" == primer_info.c2t_aligns[0].strand): #{ extra = ReverseComplement(extra) #} end if # chimera_id chimera = PrimerSeqCls("G%i_%s_%s" % (primer_info.group_id, primer_info.ctg_id, gene.id)) chimera.seq = (self.transcripts[gene.id][:gene.right] + extra + self.transcripts[gene.id][gene.left-1:]) if (chimera.id in self.transcripts and self.transcripts[chimera.id] != chimera.seq): #{ raise PrimerDesignerError("conflicting chimeric PTD sequences: %s" % chimera.id) #} end if self.transcripts[chimera.id] = chimera.seq # chimera_left chimera.left = gene.left # chimera_right chimera.right = gene.right + len(extra) + dup_length DebugMsg(self, "Overlap: %i, Extra: \"%s\", WT_Length: %i, Dup_Length: " "%i, CT_Length: %i, Calc_Length: %i" % (ctg_overlap, extra, len(self.transcripts[gene.id]), dup_length, len(chimera.seq), len(self.transcripts[gene.id])+len(extra)+dup_length)) return map(str, [event_id, chimera.id, chimera.left, chimera.right, gene.id, gene.left, gene.right])
def GetFusionPrimerInfo(self, primer_info): #{ # [event_id, contig_id, contig_left, contig_right, geneA_id, # geneA_left, geneA_right, geneB_id, geneB_left, geneB_right] # event_id event_id = primer_info.EventID() # contig_id contig = PrimerSeqCls(primer_info.ctg_id) # contig_left contig.left = min(primer_info.c2t_aligns[0].ctg_end, primer_info.c2t_aligns[1].ctg_start) # contig_right contig.right = max(primer_info.c2t_aligns[0].ctg_end, primer_info.c2t_aligns[1].ctg_start) # contig_overlap ctg_overlap = primer_info.C2TAlignOverlap() # ensure that geneA is the 5' gene, if possible if ("+" == primer_info.c2t_aligns[0].strand): #{ alignA = primer_info.c2t_aligns[0] alignB = primer_info.c2t_aligns[1] geneA = self.GeneInfoFromAlign0(alignA, ctg_overlap) geneB = self.GeneInfoFromAlign1(alignB, ctg_overlap) else: alignA = primer_info.c2t_aligns[1] alignB = primer_info.c2t_aligns[0] geneA = self.GeneInfoFromAlign1(alignA, ctg_overlap) geneB = self.GeneInfoFromAlign0(alignB, ctg_overlap) #} end if if (MIN_CTG_LEN > len(self.contigs[contig.id])): #{ DebugMsg(self, "Contig %s too short (%i): attempting to construct " "synthetic chimera" % (contig.id, len(self.contigs[contig.id]))) chimera = self.ConstructFusionSeq(primer_info, contig, ctg_overlap) if (chimera.id in self.transcripts and self.transcripts[chimera.id] != chimera.seq): #{ raise PrimerDesignerError("conflicting chimeric fusion sequences: %s" % chimera.id) #} end if if (len(chimera.seq) > len(self.contigs[contig.id])): #{ DebugMsg(self, "Using synthetic chimera sequence") self.transcripts[chimera.id] = chimera.seq contig = chimera #} end if #} end if return map(str, [event_id, contig.id, contig.left, contig.right, geneA.id, geneA.left, geneA.right, geneB.id, geneB.left, geneB.right])
def ConstructFusionSeq(self, primer_info, contig, ctg_overlap): #{ # chimera_id chimera = PrimerSeqCls("G%i_%s_%s/%s" % (primer_info.group_id, primer_info.ctg_id, primer_info.c2t_aligns[0].transcript, primer_info.c2t_aligns[1].transcript)) left_seq = self.RefSeqFromAlign0(primer_info.c2t_aligns[0]) right_seq = self.RefSeqFromAlign1(primer_info.c2t_aligns[1]) extra = "" if (0 > ctg_overlap): #{ extra = self.contigs[primer_info.ctg_id][contig.left:contig.right-1] else: right_seq = right_seq[ctg_overlap:] #} end if chimera.seq = (left_seq + extra + right_seq) # chimera_left chimera.left = min(len(left_seq), len(left_seq)+ctg_overlap) # chimera_right chimera.right = max(len(left_seq), len(left_seq)+ctg_overlap) DebugMsg(self, "Left: %s\nExtra: %s\nRight: %s\nOverlap: %i, " "WT_Length0: %i, WT_Length1: %i, Extra_len: %i, CT_Length: %i, " "Calc_Length: %i" % (left_seq, extra, right_seq, ctg_overlap, len(left_seq), len(right_seq)+max(0,ctg_overlap), len(extra), len(chimera.seq), len(left_seq)+len(right_seq)+len(extra))) return chimera
def GetITDPrimerInfo(self, primer_info): #{ # [event_id, chimera.id, chimera.left, chimera.right, gene.id, # gene.left, gene.right] DebugMsg(self, "Getting Primer Info for %s %s" % (primer_info.EventID(), primer_info.ctg_id)) # event_id event_id = primer_info.EventID() # contig_id contig = PrimerSeqCls(primer_info.ctg_id) # contig_left contig.left = min(primer_info.c2g_aligns[1][0], primer_info.c2g_aligns[2][0]) # contig_right contig.right = max(primer_info.c2g_aligns[1][1], primer_info.c2g_aligns[2][1]) # alignment to transcript align = primer_info.c2t_aligns[0] # ensure that there are some blocks if (None == align.query_blocks): #{ align.query_blocks =[[align.ctg_start,align.ctg_end]] #} end if if (None == align.target_blocks): #{ align.target_blocks =[ [align.transcript_start,align.transcript_end]] #} end if #align = self.AdjustITDAlign(primer_info) #DebugMsg(self, "Adjusted: %s" % align.ToString()) #cdup_coords = primer_info.c2g_aligns[1] cdup_coords = self.GetContigDuplicationCoords(primer_info.c2g_aligns, align) DebugMsg(self, "CDup: %i-%i" % (cdup_coords[0], cdup_coords[1])) # gene_id gene = PrimerSeqCls(align.transcript) num_blocks = len(align.query_blocks) for index in range(num_blocks): #{ DebugMsg(self, "Block: %i-%i::%i-%i" % (align.query_blocks[index][0], align.query_blocks[index][1], align.target_blocks[index][0], align.target_blocks[index][1])) if (align.query_blocks[index][0] <= cdup_coords[0] and cdup_coords[1] <= align.query_blocks[index][1]): #{ DebugMsg(self, "Found block %i" % index) break #} end if #} end for if (num_blocks <= index): #{ raise PrimerDesignerError("could not find block containing duplication!") #} end if start_offset = (cdup_coords[0] - align.query_blocks[index][0]) if ("-" == align.strand): #{ start_offset = -start_offset #} end if tdup_start = align.target_blocks[index][0] + start_offset #for index in range(index, num_blocks): #{ # if (align.query_blocks[index][1] > cdup_coords[1]): #{ # break # #} end if #} end for end_offset = (cdup_coords[1] - align.query_blocks[index][0]) if ("-" == align.strand): #{ end_offset = -end_offset #} end if tdup_end = align.target_blocks[index][0] + end_offset DebugMsg(self, "In Block: %i-%i::%i-%i, offsets:%i,%i, tdup:%i-%i" % ( align.query_blocks[index][0], align.query_blocks[index][1], align.target_blocks[index][0], align.target_blocks[index][1], start_offset, end_offset, tdup_start, tdup_end)) # gene.left gene.left = min(tdup_start, tdup_end) # gene.right gene.right = max(tdup_start, tdup_end) cdup_seq = self.contigs[contig.id][cdup_coords[0]-1:cdup_coords[1]] tdup_seq = self.transcripts[gene.id][gene.left-1:gene.right] if ("-" == align.strand): #{ tdup_seq = ReverseComplement(tdup_seq) #} end if DebugMsg(self, "Align: %s\nDup-coords: %s-%s;%s-%s\n %s\n %s" % ( align.ToString(), cdup_coords[0], cdup_coords[1], tdup_start, tdup_end, cdup_seq, tdup_seq)) if (cdup_seq.upper() != tdup_seq.upper()): #{ diffs = 0 for i in range(len(cdup_seq)): #{ if (cdup_seq[i].upper() != tdup_seq[i].upper()): #{ diffs += 1 #} end if #} end for if (1 < diffs): #{ raise PrimerDesignerError("Contig duplicated-sequence is not the " "same as transcript duplicated-sequence!") else: LogMsg(self, "single-base mismatch from ref in duplicated seq") #} end if #} end if return map(str, [event_id, contig.id, contig.left, contig.right, gene.id, gene.left, gene.right])