Exemplo n.º 1
0
def ReadReferenceFromFile(File):
  '''Read in all sequences in the reference file; check there is only one.'''
  AllSequences, ReferenceLength = ReadSequencesFromFile(File,False)
  if len(AllSequences) != 1:
    print('Found', len(AllSequences), 'sequences in', ReferenceFile+\
    '; expected 1.\nQuitting.', file=sys.stderr)
    exit(1)
  return AllSequences.items(), ReferenceLength
Exemplo n.º 2
0
Aligned=False
SeqDict, FirstSeqLength = ReadSequencesFromFile(DataFile, Aligned)

# We are expecting two sequences.
if len(SeqDict) != 2:
  print 'Expected 2 sequences;', DataFile, 'contains', str(len(SeqDict)) +\
  '.\nQuitting.'
  exit(1)

SeqNames = SeqDict.keys()
Seqs     = SeqDict.values()

# If the two sequences are the same length, there is no shuffling to do. Print
# them as they are.
if len(Seqs[0]) == len(Seqs[1]):
  for SeqName, seq in SeqDict.items():
    print '>'+SeqName
    print seq
  exit(0)

# Find which of the two sequences is the shorter one
if len(SeqDict[SeqNames[0]]) < len(SeqDict[SeqNames[1]]):
  ShorterSeqName, ShorterSeq, LongerSeqName, LongerSeq = \
  SeqNames[0], SeqDict[SeqNames[0]], SeqNames[1], SeqDict[SeqNames[1]]
else:
  ShorterSeqName, ShorterSeq, LongerSeqName, LongerSeq = \
  SeqNames[1], SeqDict[SeqNames[1]], SeqNames[0], SeqDict[SeqNames[0]]

LenLongerSeq = len(LongerSeq)
deficit = LenLongerSeq - len(ShorterSeq)
Exemplo n.º 3
0
SortedList = [['start_of_'+primer,value] for primer,value in \
StartPrimerPositions.items()] + [['end_of_'+primer,value] for primer,value in \
EndPrimerPositions.items()]
SortedList = sorted(SortedList, key=lambda item: item[1])

if args.AlignmentCoords:
    print(' '.join(str(value) for key, value in SortedList))
    exit(0)

# Now convert those primer positions, which were with respect to the alignment,
# into positions with respect to each reference.
# For each primer, sorted left to right, count how many bases to the left
# (ignoring gaps). Only count once through the genome, stopping and restarting
# when we get to each primer.
PositionsDict = {}
for SeqName, seq in SeqDict.items():
    PositionsWRTseq = []
    LastPositionWRTalignment = 0
    PositionWRTseq = 0
    for [name, PrimerPosition] in SortedList:
        for base in seq[LastPositionWRTalignment:PrimerPosition]:
            if not base in GapChars:
                PositionWRTseq += 1
        PositionsWRTseq.append(PositionWRTseq)
        LastPositionWRTalignment = PrimerPosition

    # Replace any zeroes by ones (i.e. map positions off to the left onto the
    # first position for this sequence).
    for i in range(0, len(PositionsWRTseq)):
        if PositionsWRTseq[i] == 0:
            PositionsWRTseq[i] = 1
Exemplo n.º 4
0
# alignment has a gap there in which case we just skip. b) If it's not a
# GapChar, and is a  unique insertion (i.e. at that position in the main
# alignment only that  sequence has a base), we want to excise that base from
# SeqToAdd. c) If it's neither a GapChar nor a unique insertion, we use that
# base from SeqToAdd.
SeqToAdd_WithGaps = ''
ReferenceWithoutInsertions = ''
PositionInSeqToAdd = 0
PositionInFinalAln = 0
TranslationRecord = '"position w.r.t. final alignment","position w.r.t. ref","base"'
for MainPosition, BaseInMainRef in enumerate(RefSeqFromMain):
    if BaseInMainRef == GapChar:
        EveryBaseHereIsAGap = False
        if ExciseUniqueInsertionsOfRefInMainAlignment:
            EveryBaseHereIsAGap = True
            for SeqName, seq in MainAlnSeqDict.items():
                if SeqName == RefSeqName:
                    continue
                elif seq[MainPosition] != GapChar:
                    EveryBaseHereIsAGap = False
                    break
        if EveryBaseHereIsAGap:
            continue
        SeqToAdd_WithGaps += GapChar
        ReferenceWithoutInsertions += GapChar
        PositionInFinalAln += 1
        TranslationRecord += '\n' + str(PositionInFinalAln) + ',-,-'
    else:
        SeqBase = SeqToAdd[PositionInSeqToAdd]
        RefInMainHasUniqueInsertion = False
        if ExciseUniqueInsertionsOfRefInMainAlignment: