Пример #1
0
 def Run(self): #{
   LogMsg(self, "Adding breakpoint gene annotation...")
   start = time.time()
   group_parser = CandidateGroupParserCls(self.options.barnacle_path)
   self.CreateGroupCoordsFile(group_parser)
   self.overlaps_stream = RunOverlapCode(self.options.genes_path,
     self.options.group_coords_path, STREAM_OUT, dpt=self.options.dpt,
     log_info=self.log_info)
   self.output_file = FileBoxCls(self.options.out_path, "w",
     "cannot create output file")
   overlaps_remain = True
   for group in group_parser: #{
     # get the breakpoint gene annotation for the group
     if (overlaps_remain): #{
       try: #{
         self.GetBreakpointOverlaps(group)
       except StopIteration:
         DebugMsg(self, "No overlaps remain")
         overlaps_remain = False
       #} end try
     #} end if
     # write the group to the output file
     self.output_file.Write(group.FullDataString())
   #} end for
   self.output_file.Close()
   if (not self.options.keep_coords_file): #{
     os.remove(self.options.group_coords_path)
   #} end def
   LogMsg(self, "Time spent adding breakpoint gene annotation: %s" %
     TimeSpent(start))
Пример #2
0
class BreakpointGeneAnnotatorCls: #{
  def __init__(self, options): #{
    SetupMainClass(self, options)
    self.overlaps_stream = None
    self.curr_overlap = None
    self.output_file  = None
  #} end def

  def __del__(self): #{
    if (hasattr(self, "output_file") and None != self.output_file): #{
      self.output_file.Close()
    #} end if
    CloseLogFile(self)
  #} end def

  def Run(self): #{
    LogMsg(self, "Adding breakpoint gene annotation...")
    start = time.time()
    group_parser = CandidateGroupParserCls(self.options.barnacle_path)
    self.CreateGroupCoordsFile(group_parser)
    self.overlaps_stream = RunOverlapCode(self.options.genes_path,
      self.options.group_coords_path, STREAM_OUT, dpt=self.options.dpt,
      log_info=self.log_info)
    self.output_file = FileBoxCls(self.options.out_path, "w",
      "cannot create output file")
    overlaps_remain = True
    for group in group_parser: #{
      # get the breakpoint gene annotation for the group
      if (overlaps_remain): #{
        try: #{
          self.GetBreakpointOverlaps(group)
        except StopIteration:
          DebugMsg(self, "No overlaps remain")
          overlaps_remain = False
        #} end try
      #} end if
      # write the group to the output file
      self.output_file.Write(group.FullDataString())
    #} end for
    self.output_file.Close()
    if (not self.options.keep_coords_file): #{
      os.remove(self.options.group_coords_path)
    #} end def
    LogMsg(self, "Time spent adding breakpoint gene annotation: %s" %
      TimeSpent(start))
  #} end def

  def CreateGroupCoordsFile(self, group_parser): #{
    if (self.options.use_existing_group_coords): #{
      LogMsg(self, "Using existing group coordinates file.")
      return
    #} end if
    # check whether to use "chr" in chromosome names in coordinates file
    use_chr = ShouldChromUseChr(1, self.options.genes_path,
      "exon coordinates", self.log_info)
    # open the group coordinates file
    group_coords_file = FileBoxCls(self.options.group_coords_path, "w",
      "cannot create event coordinates file")
    for group in group_parser: #{
      ExtremeDebugMsg(self, "Writing coordinates for group %i" % group.id)
      self.WriteGroupCoords(group, group_coords_file, use_chr)
    #} end for
    group_parser.Close()
    group_coords_file.Close()
  #} end def

  def WriteGroupCoords(self, group, group_coords_file, use_chr): #{
    for candidate in group.members: #{
      if (candidate.gap): #{
        # write gap group coordinates
        self.WriteGapGroupCoords(candidate, group_coords_file, use_chr)
      else:
        # write split group coordinates
        self.WriteSplitGroupCoords(candidate, group_coords_file, use_chr)
      #} end if
    #} end for
  #} end def

  def WriteGapGroupCoords(self, candidate, group_coords_file, use_chr): #{
    gap_coords = ConstructBEDString(candidate.align_info_B.chrom, use_chr,
      candidate.align_info_B.genome_start, candidate.align_info_B.genome_end,
      "%sA" % candidate.IDString())
    group_coords_file.WriteLine(gap_coords)
  #} end def

  def WriteSplitGroupCoords(self, candidate, group_coords_file, use_chr): #{
    #split_coords_A = GroupCoordsCls(candidate.align_info_A.chrom,
    #  candidate.align_info_A.genome_end - self.options.event_buffer,
    #  candidate.align_info_A.genome_end + self.options.event_buffer,
    #  "%sA" % candidate.IDString(), use_chr)
    #group_coords_file.WriteLine("%s" % split_coords_A.ToString())
    #split_coords_B = GroupCoordsCls(candidate.align_info_B.chrom,
    #  candidate.align_info_B.genome_start - self.options.event_buffer,
    #  candidate.align_info_B.genome_start + self.options.event_buffer,
    #  "%sB" % candidate.IDString(), use_chr)
    #group_coords_file.WriteLine("%s" % split_coords_B.ToString())
    region_ids = ("A", "B")
    for i in [0,1]: #{
      genome_coords = (candidate.alignments[i].genome_start,
        candidate.alignments[i].genome_end)
      split_coords = ConstructBEDString(candidate.alignments[i].chrom, use_chr,
        genome_coords[1-i] - self.options.event_buffer,
        genome_coords[1-i] + self.options.event_buffer,
        "%s%s" % (candidate.IDString(), region_ids[i]))
      group_coords_file.WriteLine(split_coords)
    #} end for
  #} end def

  def GetBreakpointOverlaps(self, group): #{
    # clear any previous breakpoint genes
    group.ClearBPGenes()
    # skip overlaps for groups that come before the current group
    while (not hasattr(self, "curr_overlap") or None == self.curr_overlap or
        self.curr_overlap.group_id < group.id): #{
      self.GetNextOverlap()
    #} end while
    # create a dictionary of the members of the current group
    candidates_dict = dict((candidate.candidate_id, candidate) for
      candidate in group.members)
    # get all overlaps for the current group
    while (self.curr_overlap.group_id == group.id): #{
      ExtremeDebugMsg(self, "Found overlap for %i%s" %
        (self.curr_overlap.group_id, self.curr_overlap.candidate_id))
      if (self.curr_overlap.candidate_id in candidates_dict): #{
        ExtremeDebugMsg(self, "  candidate ID in dictionary!")
        self.AddBreakPointGene(candidates_dict[self.curr_overlap.candidate_id])
      #} end if
      self.GetNextOverlap()
    #} end while
  #} end def

  def GetNextOverlap(self): #{
    if (None == self.overlaps_stream): #{
      raise BreakpointGeneAnnotatorError("breakpoint gene overlap stream "
        "is not open!")
    #} end if
    overlap_line = CleanLine(self.overlaps_stream.next())
    tokenizer = TokenizerCls(overlap_line, delimiter=" ",
      log_info=self.log_info)
    try:
      self.curr_overlap = FeatureOverlapCls(tokenizer, multi_target=True)
    except ValueError,e:
      raise BreakpointGeneAnnotatorError("error parsing overlap line: "
        "%s\n%s" % (overlap_line, e))
    # end try
    ParseFullEventID(self.curr_overlap, self.curr_overlap.query_id)