def parse_pslfile(tdir,pslfile,smoothing_factor):
  # Go through the long reads and make a genepred
  if pslfile != '-':
    fr = FileBasics.GenericFileReader(pslfile)
    fr = sys.stdin
  seennames = {}
  longreadnumber = 0
  of_gpd = open(tdir+'/longreads.gpd','w')
  while True:
    line = fr.readline()
    if not line: break
    if re.match('^#',line): #skip comments
    longreadnumber += 1
    gpd_line = PSLBasics.convert_entry_to_genepred_line(PSLBasics.line_to_entry(line.rstrip()))
    if not gpd_line:
      sys.stderr.write("Warning: malformed psl for "+readname+"\n")
    entry = GenePredBasics.smooth_gaps( \
    readname = entry['name']
    if readname in seennames:
      sys.stderr.write("Warning: repeat name '"+readname+"'\n")
    #set our first name to our bin
    entry['name'] = str(longreadnumber)
    gline = GenePredBasics.entry_to_line(entry)
def get_exons_from_seqs(seqs, d, spcf):
    sind = 0
    oline = ''
    for seq in seqs:
        sind += 1
        psec = 'P'  #primary or secondary
        if sind > 1: psec = 'S'
        d1 = d.copy()
        d1['rname'] = seq[1]
        if seq[2] == '+': d1['flag'] = 0
        else: d1['flag'] = 16
        d1['pos'] = seq[3]
        d1['cigar'] = seq[4]
        d1['cigar_array'] = SamBasics.parse_cigar(seq[4])
        skips = set(['H', 'D', 'N'])
        total_length = 0
        possible_matches = 0
        indels = 0
        qstart = 0
        if d1['cigar_array'][0]['op'] == 'S':
            qstart = d1['cigar_array'][0]['val']
        if d1['cigar_array'][0]['op'] == 'H':
            qstart = d1['cigar_array'][0]['val']
        for ce in d1['cigar_array']:
            if ce['op'] not in skips:
                total_length += ce['val']
            if ce['op'] == 'M': possible_matches += ce['val']
            elif ce['op'] == 'I':
                indels += ce['val']
            elif ce['op'] == 'D' and ce['val'] < 68:
                indels += ce['val']
        fakeseq = 'N' * total_length
        d1['seq'] = fakeseq
        nline = SamBasics.entry_to_line(d1)
        pline = spcf.convert_line(nline)
        pentry = PSLBasics.line_to_entry(pline)
        #mismatch_count = -1
        #if sind == 1 and args.reference_genome: #for primary alignments we can calculate the number of matches
        #  for i in range(0,len(pentry['blockSizes'])):
        #    tseq = spcf.genome[pentry['tName']][pentry['tStarts'][i]:pentry['tStarts'][i]+pentry['blockSizes'][i]]
        #    qseq = sequence[pentry['qStarts'][i]:pentry['qStarts'][i]+pentry['blockSizes'][i]]
        #    print pentry['blockSizes'][i]
        #    print tseq
        #    print qseq
        #    for j in range(0,len(tseq)):
        #      if tseq[j].upper() != qseq[j].upper(): mismatch_count += 1
        gline = PSLBasics.convert_entry_to_genepred_line(pentry)
        gentry = GenePredBasics.line_to_entry(gline)
        gsmooth = GenePredBasics.smooth_gaps(gentry, 68)
        for i in range(0, len(gsmooth['exonStarts'])):
            oline += gsmooth['chrom'] + "\t" + str(
                gsmooth['exonStarts'][i]) + "\t" + str(
                    [i]) + "\t" + gsmooth['strand'] + "\t" + gsmooth[
                        'name'] + "\t" + str(possible_matches) + "\t" + str(
                            indels) + "\t" + psec + "\t" + str(qstart) + "\n"
    return oline
def main():
  parser = argparse.ArgumentParser(description='Use reference junctions when they are close',formatter_class=argparse.ArgumentDefaultsHelpFormatter)
  parser.add_argument('--min_intron_size',type=int,default=68,help="INT min intron size")
  parser.add_argument('--min_local_support',type=int,default=0,help="INT min number of junctions within search_size of a junction in order to count it")
  parser.add_argument('--search_size',type=int,default=10,help="INT search space for reference")
  parser.add_argument('--output_fake_psl',help="FASTAFILE reference genome to make a fake PSL output")
  parser.add_argument('psl',help="PSLFILENAME or '-' for STDIN")
  parser.add_argument('reference_genepred',help="FASTAFILENAME for reference genepred")
  args = parser.parse_args()

  cpus = multiprocessing.cpu_count()

  genome = {}
  if args.output_fake_psl:
    genome = read_fasta_into_hash(args.output_fake_psl)

  #read in the reference genepred first
  gpf = GenePredBasics.GenePredFile(args.reference_genepred)
  #lets sort entries by chromosome
  ref = {}
  for e in [x.entry for x in gpf.entries]:
    if len(e['exonStarts']) <= 1: continue
    if e['chrom'] not in ref:
      ref[e['chrom']] = {}
    for i in range(1,len(e['exonStarts'])):
      if e['exonEnds'][i-1] not in ref[e['chrom']]:
        ref[e['chrom']][e['exonEnds'][i-1]] = {}
      if e['exonStarts'][i]+1 not in ref[e['chrom']][e['exonEnds'][i-1]]:
        ref[e['chrom']][e['exonEnds'][i-1]][e['exonStarts'][i]+1] = e['strand']
  #Stored all junctions as 1-base

  read_info = {}
  pf = GenericFileReader(args.psl)
  fcount_total = 0
  while True:
    line = pf.readline()
    if not line: break
    if re.match('^#',line): continue
    line = line.rstrip()
    pe = PSLBasics.line_to_entry(line)
    if len(pe['tStarts']) != len(pe['blockSizes']) or len(pe['qStarts']) != len(pe['blockSizes']):
      sys.stderr.write("WARNING invalid psl\n")
    genepred_line = PSLBasics.convert_entry_to_genepred_line(pe)
    ge = GenePredBasics.smooth_gaps(GenePredBasics.line_to_entry(genepred_line),args.min_intron_size)
    refjuns = {}
    if pe['tName'] in ref: refjuns = ref[pe['tName']]
    new_ge = nudge(pe,ge,refjuns,args)
    if args.output_fake_psl:
      new_psl_line = GenePredBasics.entry_to_fake_psl_line(new_ge,genome)
      print new_psl_line
      print GenePredBasics.entry_to_line(new_ge)
 def set_mapping_counts(self,psl_filename):
   self.mapping_counts_set = True
   gfr0 = GenericFileReader(psl_filename)
   qcnts = {}
   while True:
     line = gfr0.readline()
     if not line: break
       psle = PSLBasics.line_to_entry(line.rstrip())
       sys.stderr.write("Problem parsing line:\n"+line.rstrip()+"\n")
     if psle['qName'] not in qcnts: qcnts[psle['qName']] = 0
     qcnts[psle['qName']] += 1
   self.mapping_counts = qcnts
def main():
  parser = argparse.ArgumentParser(description="Convert a psl file into a target formated genepred file.")
  parser.add_argument('--fill_gaps',type=int,default=0,help="Close gaps this size or smaller.")
  parser.add_argument('input_name',help="Input PSL file, use - to indicate STDIN.")
  args = parser.parse_args()
  pslfilehandle = sys.stdin
  if args.input_name != '-':
    pslfilehandle = open(args.input_name)
  with pslfilehandle as infile:
    for line in infile:
      psl_entry = PSLBasics.line_to_entry(line)
      genepred_line = PSLBasics.convert_entry_to_genepred_line(psl_entry)
      if args.fill_gaps > 0:
        genepred_entry = GenePredBasics.line_to_entry(genepred_line)
        genepred_entry2 = GenePredBasics.smooth_gaps(genepred_entry,args.fill_gaps)
        genepred_line = GenePredBasics.entry_to_line(genepred_entry2)
      print genepred_line
def main():
  parser = argparse.ArgumentParser(description="Analyze ORDERED psl alignments of long reads.")
  parser.add_argument('psl_file',help="Alignment file. Must be ordered by query name. use - for stdin")
  parser.add_argument('-o','--output',help="Write to output file, default is STDIN")
  parser.add_argument('--minimum_coverage',type=int,help="Only consider alignments with at least this many bp aligned")
  parser.add_argument('--threads',type=int,default=multiprocessing.cpu_count(),help="INT default cpu_count")
  parser.add_argument('--tempbuffer',help="DIRECTORY store the results in a temporary file until they are ready to output.  suggest using /tmp if you don't know what to use")
  args = parser.parse_args()
  seen_names = set()
  last_name = ''
  buffer = PSLBasics.MultiplePSLAlignments()
  inf = sys.stdin
  if args.psl_file != '-':
    inf = open(args.psl_file)
  global of
  tname = None
  if args.tempbuffer:
    if not args.output:
      sys.stderr.write("ERROR if you want to buffer outputs in a temp file you need to specify a final output file.\n")
    rnum = random.randint(1,1000000000);
    tname = args.tempbuffer.rstrip('/')+'/weirathe.'+str(rnum)+'.meta'
    of = open(tname,'w')
  if args.output and not args.tempbuffer:
    of = open(args.output,'w')
  global lock
  if args.threads > 1:
    pool = multiprocessing.Pool(args.threads)
  for line in inf:
    e = PSLBasics.line_to_entry(line.rstrip())
    if e['qName'] != last_name: # we have a new name
      if e['qName'] in seen_names:
        sys.stderr.write("ERROR psl entries are not ordered by query name.\n")
      if buffer.get_alignment_count() > 0:
        if args.threads > 1:
          res = process_buffer(buffer)
      buffer = PSLBasics.MultiplePSLAlignments()
      if args.minimum_coverage > 1:
    last_name = e['qName']
  if buffer.get_alignment_count() > 0:
    if args.threads > 1:
      pool.apply_async(process_buffer,[buffer],callback=print_result) # if we still have something left to do
      res = process_buffer(buffer)
  if args.threads > 1:
  if args.tempbuffer:
    of = open(args.output,'w')
    with open(tname) as inf:
      for line in inf:
def do_combine_operation(best_option,left,right,read,seq,args):
  #print "choice is "+str(best_option)
  left_target = best_option[0]
  right_target = best_option[1]
  left_query = best_option[2]
  right_query = best_option[3]
  # store for output
  q_start_array = []
  t_start_array = []
  block_size_array = []

  left_query_start = left['qStarts'][0]
  left_target_start = left['tStarts'][0]
  for i in range(0,len(left['tStarts'])):
    tstart = left['tStarts'][i]
    tend = left['tStarts'][i]+left['blockSizes'][i]
    qstart = left['qStarts'][i]
    qend = left['qStarts'][i]+left['blockSizes'][i]
    if left_query <= qstart+1: break
    left_query_start = qstart
    left_target_start = tstart
    if left_query <= qend: break

  #print "left things"
  #print [left_query_start+1,left_query]
  #print [left_target_start+1,left_target]

  right_query_end = right['qStarts'][0]+right['blockSizes'][0]
  right_target_end = right['tStarts'][0]+right['blockSizes'][0]
  right_outer_index = 0
  for j in range(0,len(right['tStarts'])):
    tstart = right['tStarts'][j]
    tend = right['tStarts'][j]+right['blockSizes'][j]
    qstart = right['qStarts'][j]
    qend = right['qStarts'][j]+right['blockSizes'][j]
    right_outer_index = j+1
    if right_query <= qstart+1: break
    right_query_end = qend
    right_target_end = tend
    if right_query < qend: break
  #print "right things"
  #print [right_query+1,right_query_end]
  #print [right_target+1,right_target_end]
  working_read = read.upper()
  if left['strand'] == '-': working_read = rc(read.upper())
  pread = working_read[left_query_start:right_query_end]
  tseq = seq[left_target_start:left_target].upper()+seq[right_target-1:right_target_end].upper()
  res = needleman_wunsch(pread,tseq)
  #print "short needleman wunsch"
  #print res[0]
  #print res[1]

  # Fun part of making the new portion of the alignment
  qindex = left_query_start
  tindex = left_target_start
  in_alignment = 0
  alignment = None
  bynumbers = None
  for i in range(0,len(res[0])):
    if res[0][i] == '-':  #insertion in target (gap in query)
      tindex += 1
      in_alignment = 0
    elif res[1][i] == '-':  #insertion in query (gap in target)
      qindex += 1
      in_alignment = 0
    else: # we are in an alignment
      if in_alignment == 0:
        # output buffered result
        if alignment:
          if len(alignment[0]) > 0:
        alignment = ['','']
        bynumbers = [qindex,tindex,qindex,tindex]
      in_alignment = 1
      alignment[0] += res[0][i]
      alignment[1] += res[1][i]
      bynumbers[2] += 1
      bynumbers[3] += 1
    if qindex == right_query: # switch forward
      #print "switch"
      #print str(tindex) + "\t" + str(right_target)
      #print str(qindex) + "\t" + str(right_query)
      if not tindex == right_target: 
        in_alignment = 0
      tindex = right_target
  if alignment:
    if len(alignment[0]) > 0:
    #print bynumbers

  for i in range(right_outer_index,len(right['blockSizes'])):

  #now we can finally construct a psl line
  #we won't keep track of repeats for now
  matches = 0
  misMatches = 0
  repMatches = 0
  nCount = 0
  qNumInsert = 0
  qBaseInsert = 0
  tNumInsert = 0
  tBaseInsert = 0
  strand = left['strand']
  qName = left['qName']
  qSize = len(read)
  qStart = q_start_array[0]
  qEnd = q_start_array[len(q_start_array)-1]+block_size_array[len(block_size_array)-1]
  tName = left['tName']
  tSize = len(seq)
  tStart = t_start_array[0]
  tEnd = t_start_array[len(t_start_array)-1]+block_size_array[len(block_size_array)-1]
  blockCount = len(block_size_array)
  blockSizes = ','.join([str(x) for x in block_size_array])+','
  qStarts = ','.join([str(x) for x in q_start_array])+','
  tStarts = ','.join([str(x) for x in t_start_array])+','

  prev_q_end = None
  prev_t_end = None
  for i in range(0,len(block_size_array)):
    qseg = working_read[q_start_array[i]:q_start_array[i]+block_size_array[i]]
    tseg = seq[t_start_array[i]:t_start_array[i]+block_size_array[i]].upper()
    for j in range(0,len(qseg)):
      if qseg[j] == 'N': nCount += 1
      if qseg[j] == tseg[j]: matches += 1
        misMatches += 1
    if prev_t_end:
      t_dist = t_start_array[i]-prev_t_end
      if t_dist > 0 and t_dist < args.min_intron_size: #we have an insert into the target and its not an intron
        tNumInsert += 1
        tBaseInsert += t_dist
    if prev_q_end:
      q_dist = q_start_array[i]-prev_q_end
      if q_dist > 0:
        qNumInsert += 1
        qBaseInsert += q_dist
    prev_q_end = q_start_array[i]+block_size_array[i]
    prev_t_end = t_start_array[i]+block_size_array[i]

  # now we have everything to make the line
  combo_line = str(matches) + "\t" + str(misMatches) + "\t" + str(repMatches) + "\t" \
             + str(nCount) + "\t" + str(qNumInsert) + "\t" + str(qBaseInsert) + "\t" \
             + str(tNumInsert) + "\t" + str(tBaseInsert) + "\t" \
             + strand + "\t" + qName + "\t" + str(qSize) + "\t" \
             + str(qStart) + "\t" + str(qEnd) + "\t" \
             + tName + "\t" + str(tSize) + "\t" \
             + str(tStart) + "\t" + str(tEnd) + "\t" + str(blockCount) + "\t" \
             + blockSizes + "\t" + qStarts + "\t" + tStarts
  #print combo_line
  #print q_start_array
  #print t_start_array
  #print block_size_array
  #  print str(right['qStarts'][i])+"\t"+str(right['qStarts'][i]+right['blockSizes'][i])
  #  print i
  return PSLBasics.line_to_entry(combo_line)
def main():
  parser = argparse.ArgumentParser(description="splice together partial alignments")
  group1 = parser.add_mutually_exclusive_group(required=True)
  parser.add_argument('--genome',help="FASTA reference genome",required=True)
  parser.add_argument('--genepred',help="Transcriptome genepred")
  parser.add_argument('--max_intron_size',type=int,default=100000,help="INT maximum intron size")
  parser.add_argument('--min_intron_size',type=int,default=68,help="INT minimum intron size")
  parser.add_argument('--max_gap_size',type=int,default=10,help="INT gap size in query to join")
  parser.add_argument('--max_search_expand',type=int,default=10,help="INT max search space to expand search for junction")
  parser.add_argument('--direction_specific',action='store_true',help="The direction of the transcript is known and properly oriented already")
  parser.add_argument('--threads',type=int,default=0,help="INT number of threads to use default cpu_count")
  parser.add_argument('-o','--output',default='-',help="FILENAME output results to here rather than STDOUT which is default")
  parser.add_argument('input_alignment',help="FILENAME input .psl file or '-' for STDIN")
  args = parser.parse_args()

  # Read our reference genome
  sys.stderr.write("Reading reference\n")
  ref = read_fasta_into_hash(args.genome)

  # Make sure our reads are unique
  sys.stderr.write("Checking for unqiuely named reads\n")
  reads = check_for_uniquely_named_reads(args) # does a hard exit and error if there are any names repeated
  sys.stderr.write("Reads are uniquely named\n")
  # Set number of threads to use
  cpu_count = multiprocessing.cpu_count()
  if args.threads > 0:
    cpu_count = args.threads

  #Set reference splices (if any are available)
  reference_splices = {}
  if args.genepred:
    sys.stderr.write("Reading reference splices from genepred\n")
    reference_splices = get_reference_splices(args)

  sys.stderr.write("Reading alignments into loci\n")

  # Get locus division (first stage)
  # Each read (qName) is separated
  # Then each locus will be specific to at chromosome (tName)
  # Then by (strand), but keep in mind this is the is based on the read
  # Each locus should be specific to a direction but we don't necessarily
  # know direction based on the data we have thus far.  
  inf = sys.stdin
  if args.input_alignment != '-': inf = open(args.input_alignment,'r')
  loci = {}
  for line in inf:
    line = line.rstrip()
    if re.match('^#',line): continue
    psl = PSLBasics.line_to_entry(line)
    if psl['qName'] not in loci:
      loci[psl['qName']] = {}
    if psl['tName'] not in loci[psl['qName']]:
      loci[psl['qName']][psl['tName']] = {}
    if psl['strand'] not in loci[psl['qName']][psl['tName']]:
      loci[psl['qName']][psl['tName']][psl['strand']] = {}
    if psl['tStarts'][0] not in loci[psl['qName']][psl['tName']][psl['strand']]:
      loci[psl['qName']][psl['tName']][psl['strand']][psl['tStarts'][0]] = []

  sys.stderr.write("breaking loci by genomic distance\n")
  for qname in loci:
    for chr in loci[qname]:
      for strand in loci[qname][chr]:
        #print qname + "\t" + chr + "\t" + strand
        starts = loci[qname][chr][strand].keys()
        current_set = []
        locus_sets = []
        last_end = -1*(args.max_intron_size+2)
        for start in sorted(starts):
          for e in loci[qname][chr][strand][start]:
            start = e['tStarts'][0]+1 # base-1 start of start of alignment
            if start > last_end+args.max_intron_size:
              # we have the start of a new set
              if len(current_set) > 0: 
              current_set = []
            last_end = e['tStarts'][len(e['tStarts'])-1]+e['blockSizes'][len(e['tStarts'])-1]
        if len(current_set) > 0:
        loci[qname][chr][strand] = locus_sets # replace what was there with these ordered sets

  locus_total = 0
  for qname in loci:
    for chr in loci[qname]:
      for strand in loci[qname][chr]:
        for locus_set in loci[qname][chr][strand]:

  sys.stderr.write("Work on each read in each locus with "+str(cpu_count)+" CPUs\n")
  p = multiprocessing.Pool(processes=cpu_count)
  locus_count = 0
  for qname in loci:
    for chr in loci[qname]:
      for strand in loci[qname][chr]:
        #print qname + "\t" + chr + "\t" + strand
        for locus_set in loci[qname][chr][strand]:
          locus_count += 1
          onum = len(locus_set)
          # send blank reference splices unless we have some
          rsplices = {}
          if chr in reference_splices: rsplices = reference_splices[chr]
          r1 = execute_locus(locus_set,args,rsplices,ref[chr],reads[qname],locus_total,locus_count)
          #nnum = len(new_locus_set)
          #print str(onum) + " to " + str(nnum)
          #for e in new_locus_set:
          #  print PSLBasics.entry_to_line(e)

  ofh = sys.stdout
  if not args.output == '-':
    ofh = open(args.output,'w')

  for line in combo_results:
  def convert_line(self,psl_line,query_sequence=None,quality_sequence=None):
      pe = PSLBasics.line_to_entry(psl_line)
      sys.stderr.write("Problem parsing line:\n"+psl_line.rstrip()+"\n")
      return False
    if len(pe['tStarts']) != len(pe['blockSizes']):
      sys.stderr.write("Warning invalid psl entry: "+pe['qName']+"\n")
      return False
    #work on the positive strand case first
    cigar = '*'
    blocks = len(pe['blockSizes'])
    starts = pe['qStarts']
    #if pe['strand'] == '-':
    #  starts = [x for x in reversed(pe['qStarts_actual'])]
    #  print 'isrev'
    q_coord_start = starts[0]+1 # base-1 converted starting position
    q_coord_end = starts[blocks-1]+pe['blockSizes'][blocks-1] # base-1 position
    t_coord_start = pe['tStarts'][0]+1 # base-1 converted starting position
    t_coord_end = pe['tStarts'][blocks-1]+pe['blockSizes'][blocks-1] # base-1 position
    if pe['qName'] not in self.reads and self.reads_set is True:
      sys.stderr.write("Warning: qName "+pe['qName']+" was not found in reads\n")
    # we will clip the query sequence to begin and end from the aligned region
    #q_seq = ''
    #if self.reads_set:
    #  q_seq = self.reads[pe['qName']]

    # 1. Get the new query to output
    q_seq_trimmed = '*'
    if self.reads_set or query_sequence:
      q_seq_trimmed = query_sequence
      if not query_sequence: # get it from the archive we loaded if we didn't give it
        q_seq_trimmed = self.reads[pe['qName']]
      if pe['strand'] == '-':
        q_seq_trimmed = SequenceBasics.rc(q_seq_trimmed)
      q_seq_trimmed = q_seq_trimmed[q_coord_start-1:q_coord_end]

    qual_trimmed = '*'
    if self.qualities_set or quality_sequence:
      qual_trimmed = quality_sequence
      if not quality_sequence:
        qual_trimmed = self.qualities[pe['qName']]
      if pe['strand'] == '-':
        qual_trimmed = qual_trimmed[::-1]
      qual_trimmed = qual_trimmed[q_coord_start-1:q_coord_end]
    # 2. Get the cigar string to output
    prev_diff = t_coord_start-q_coord_start
    cigar = ''
    #for i in range(0,blocks):
    #  current_diff = pe['tStarts'][i]-starts[i]
    #  delta = current_diff - prev_diff
    #  #print delta
    #  if delta >= self.min_intron_size:
    #    cigar += str(abs(delta))+'N'
    #  elif delta > 0: # we have a
    #    cigar += str(abs(delta))+'D'
    #  elif delta < 0: # we have a
    #    cigar += str(abs(delta))+'I'
    #  cigar += str(pe['blockSizes'][i])+'M' # our matches
    #  #print current_diff
    #  prev_diff = current_diff
    qstarts = [x-pe['qStarts'][0] for x in pe['qStarts']]
    tstarts = [x-pe['tStarts'][0] for x in pe['tStarts']]
    query_index = 0
    target_index = 0
    junctions = []
    for i in range(0,blocks):
      qdif = qstarts[i] - query_index
      tdif = tstarts[i] - target_index
      if qdif > 0:  # we have to insert
        cigar += str(qdif) + 'I'
      if tdif > self.min_intron_size: # we have an intron
        cigar += str(tdif) + 'N'
      elif tdif > 0: # we have to delete
        cigar += str(tdif) + 'D'
      cigar += str(pe['blockSizes'][i]) + 'M'
      query_index = qstarts[i]+pe['blockSizes'][i]
      target_index = tstarts[i]+pe['blockSizes'][i]
    ### cigar done
    # inspect junctions if we have a ref_genome
    spliceflag_set = False
    if self.ref_genome_set:
      canon = 0
      revcanon = 0
      for i in junctions: #blocks following a junction
        left_num = pe['tStarts'][i-1]+pe['blockSizes'][i-1]
        left_val = self.ref_genome[pe['tName']][left_num:left_num+2].upper()
        right_num = pe['tStarts'][i-1]-2
        right_val = self.ref_genome[pe['tName']][right_num:right_num+2].upper()
        junc = left_val + '-' + right_val
        if junc in self.canonical: canon += 1
        if junc in self.revcanonical: revcanon += 1
      if canon > revcanon: 
        spliceflag_set = True
        spliceflag = '+'
      elif revcanon > canon:
        spliceflag_set = True
        spliceflag = '-'
    # if we have junctions, and we should be setting direction but 
    # we can't figure out the direction skip ambiguous direction
    if len(junctions) > 0 and self.skip_directionless_splice and spliceflag_set == False:
      return False
    samline =  pe['qName'] + "\t"        # 1. QNAME
    if pe['strand'] == '-':
      samline += '16' + "\t"             # 2. FLAG
      samline += '0' + "\t"
    samline += pe['tName'] + "\t"        # 3. RNAME
    samline += str(t_coord_start) + "\t" # 4. POS
    samline += '0' + "\t"                # 5. MAPQ
    samline += cigar + "\t"         # 6. CIGAR
    samline += '*' + "\t"           # 7. RNEXT
    samline += '0' + "\t"           # 8. PNEXT
    samline += '0' + "\t"           # 9. TLEN
    samline += q_seq_trimmed + "\t" # 10. SEQ
    samline += qual_trimmed + "\t"  # 11. QUAL
    if spliceflag_set:
      samline += 'XS:A:'+spliceflag + "\t"
    if self.ref_genome_set:
      samline += 'NH:i:'+str(self.mapping_counts[pe['qName']]) + "\t"
    samline += 'XC:i:'+str(len(junctions)) + "\t"
    samline += 'NM:i:0'
    return samline
