def createAncestor(strain1, strain2, neighborStrain): globals.ancestralCounter += 1 ancestor = None ancestralName = 'Ancestor ' + str(globals.ancestralCounter) ancestralFragments = None strain1Copy = copy.deepcopy( strain1) #Do a deep copy of object for when we compare to the neighbor neighborCopy = copy.deepcopy( neighborStrain ) #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either if globals.printToConsole: print( 'Performing a series of alignments for the following strains: %s, %s' % (strain1.name, strain2.name)) globals.enableDeletionReversions = True #Only do the backtrace between these two strains! globals.enableSelfAlignmentDetails = True events, duplicatesStrain1, duplicatesStrain2 = constructEvents( strain1, strain2) globals.enableSelfAlignmentDetails = False globals.enableDeletionReversions = False if globals.printToConsole: print('Constructing dot plot for the following strains: %s, %s' % (strain1.name, strain2.name)) points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1, duplicatesStrain2, strain1, strain2) if globals.printToConsole: createDotPlot(points, strain1, strain2) #createBarGraph(strain1.duplicationCounts, 'Distribution of Duplications for %s'%(strain1.name)) #createBarGraph(strain2.duplicationCounts, 'Distribution of Duplications for %s'%(strain2.name)) #createBarGraph(strain1.deletionCounts, 'Distribution of Deletions for %s'%(strain1.name)) #Remember! Deletions refer to the other strain! #createBarGraph(strain2.deletionCounts, 'Distribution of Deletions for %s'%(strain2.name)) #Remember! Deletions refer to the other strain! #Compute and output the inverted, transposed, and inverted transposed regions FCR, TR, IR, ITR = determineRegions(points) #FCR, TR, IR, ITR, LR = computeOperonArrangements(events) OLD VERSION #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:') #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:') #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:') #Compare one of the siblings to the neighbor if one exists if neighborCopy != None: if globals.printToConsole: print( 'Now performing a series of alignments between the nighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents( strain1Copy, neighborCopy) if globals.printToConsole: print('Constructing dot plot for the neighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot( neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor, strain1Copy, neighborCopy) #createDotPlot(neighborPoints, strain1Copy, neighborCopy) #Compute the various regions for the neighbor #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION NFCR, NTR, NIR, NITR = determineRegions(neighborPoints) ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor( FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR, neighborLostPoints, strain1, strain2) else: if neighborCopy == None: if globals.printToConsole: print('No neighbor found!') elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0: if globals.printToConsole: print('No inverted or transposed regions detected!!') ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor( FCR, TR, IR, ITR, lostPoints, strain2) #Computes the total number of inversions, transpositions, inverted transpositions globals.inversionCounter += len(IR) globals.transposedCounter += len(TR) globals.invertedTransposedCounter += len(ITR) #Increments the counters for the size distributions for each event type updateGlobalDeletionCounter(strain1) updateGlobalDeletionCounter(strain2) updateGlobalDuplicationCounter(strain1) updateGlobalDuplicationCounter(strain2) updateGlobalInversionSizeDistributionCounter(strain1) updateGlobalInversionSizeDistributionCounter(strain2) updateGlobalTranspositionSizeDistributionCounter(strain1) updateGlobalTranspositionSizeDistributionCounter(strain2) updateGlobalInvertedTranspositionSizeDistributionCounter(strain1) updateGlobalInvertedTranspositionSizeDistributionCounter(strain2) #Increment counters (only need to do the count only once otherwise it leads to double counts ie x2 number of events) #updateGlobalCodonMismatchCounter(strain1) updateGlobalCodonMismatchCounter(strain2) #updateGlobalSubstitutionCounter(strain1) updateGlobalSubstitutionCounter(strain2) #Append all details to file here #outputStrainDetailsToFile(outputFileName, strain1) #outputStrainDetailsToFile(outputFileName, strain2) ancestor = BacterialStrain(ancestralName, ancestralFragments) if globals.printToConsole: print(strain1.name) for frag in strain1.genomeFragments: print(frag.originalSequence) print(strain2.name) for frag in strain2.genomeFragments: print(frag.originalSequence) #################################### #Handle the Codon Mismatches here## ################################### if '#' in strain1.codonMismatchDetails: newDetails1 = 'Codon Mismatch:' newDetails2 = 'Codon Mismatch:' line1 = strain1.codonMismatchDetails.replace('Codon Mismatch:', '').strip() line2 = strain2.codonMismatchDetails.replace('Codon Mismatch:', '').strip() subsList1 = filter( None, line1.split(';') ) #Ensures we don't have a list with an empty string as an element subsList2 = filter(None, line2.split(';')) #For each substitution in the list for w in range(0, len(subsList1)): gene1, idNumber1, position1 = parseDetails(subsList1[w]) gene2, idNumber2, position2 = parseDetails(subsList2[w]) processed = False #Tracks whether the current codon mismatch was handled #Check if we have a neighbor if neighborCopy: #Check if the same codon mismatch occurred when comparing to the neighbor if '#' in strain1Copy.codonMismatchDetails: line3 = strain1Copy.codonMismatchDetails.replace( 'Codon Mismatch:', '').strip() subsList3 = filter(None, line3.split(';')) for v in range(0, len(subsList3)): gene3, idNumber3, position3 = parseDetails( subsList3[v]) if gene1 == gene3 and position1 == position3: #We found the same codon mismatch when comparing with the neighbor, therefore we should keep strain 2's verison of the gene! processed = True fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene2) #Put in strain 2's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene2 break break if processed: #We found the codon mismatch and swapped with strain 2's gene therefore strain 1's gene was the codon mismatch so put the codon mismatch details in strain1 newDetails1 += gene1 + ' ' + position1 + ';' else: #We were not able to find the same codon mismatch either due to there being no neighbor or it was just not there. So just assume strain 2 is the codon mismatch newDetails2 += gene2 + ' ' + position2 + ';' fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene1) #Put in strain 1's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene1 break break #Insert the new details about the substitution strain1.codonMismatchDetails = newDetails1 strain2.codonMismatchDetails = newDetails2 ################################ #Handle the substitutions here## ################################ if '@' in strain1.substitutionDetails: newDetails1 = 'Substitution:' newDetails2 = 'Substitution:' line1 = strain1.substitutionDetails.replace('Substitution:', '').strip() line2 = strain2.substitutionDetails.replace('Substitution:', '').strip() subsList1 = filter( None, line1.split(';') ) #Ensures we don't have a list with an empty string as an element subsList2 = filter(None, line2.split(';')) #For each substitution in the list for w in range(0, len(subsList1)): gene1, idNumber1, position1 = parseDetails(subsList1[w]) gene2, idNumber2, position2 = parseDetails(subsList2[w]) processed = False #Tracks whether the current substitution was handled #Check if we have a neighbor if neighborCopy: #Check if the same substitution occurred when comparing to the neighbor if '@' in strain1Copy.substitutionDetails: line3 = strain1Copy.substitutionDetails.replace( 'Substitution:', '').strip() subsList3 = filter(None, line3.split(';')) for v in range(0, len(subsList3)): gene3, idNumber3, position3 = parseDetails( subsList3[v]) if gene1 == gene3 and position1 == position3: #We found the same substitution when comparing with the neighbor, therefore we should keep strain 2's verison of the gene! processed = True fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene2) #Put in strain 2's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene2 break break if processed: #We found the substitution and swapped with strain 2's gene therefore strain 1's gene was the substituion so put the substitution details in strain1 newDetails1 += gene1 + ' ' + position1 + ';' else: #We were not able to find the same substitution either due to there being no neighbor or it was just not there. So just assume strain 2 is the substitution newDetails2 += gene2 + ' ' + position2 + ';' fragments = ancestor.genomeFragments for fragment in fragments: if idNumber1 in fragment.originalSequence: fragment.originalSequence = fragment.originalSequence.replace( gene1 + '-' + idNumber1, gene1) #Put in strain 1's gene for m in range(0, len(fragment.sequence)): if idNumber1 in fragment.sequence[m]: fragment.sequence[m] = gene1 break break #Insert the new details about the substitution strain1.substitutionDetails = newDetails1 strain2.substitutionDetails = newDetails2 #Add any codon mismatches from the self global alignment as those details were stored in another variable so it doesn't mess with codon mismatches and substitution handlers in the previous 2 for loops strain1.codonMismatchDetails += strain1.tempCodonDetails strain2.codonMismatchDetails += strain2.tempCodonDetails strain1.substitutionDetails += strain1.tempSubstitutionDetails strain2.substitutionDetails += strain2.tempSubstitutionDetails return ancestor
def createAncestor(strain1, strain2, neighborStrain): globals.ancestralCounter += 1 ancestor = None ancestralName = 'Ancestor ' + str(globals.ancestralCounter) ancestralFragments = None strain1Copy = copy.deepcopy( strain1) #Do a deep copy of object for when we compare to the neighbor neighborCopy = copy.deepcopy( neighborStrain ) #Do a deep copy of the neighbor as well b/c we don't want to store those comparisons in the strain either print( 'Performing a series of alignments for the following strains: %s, %s' % (strain1.name, strain2.name)) events, duplicatesStrain1, duplicatesStrain2 = constructEvents( strain1, strain2) print('Constructing dot plot for the following strains: %s, %s' % (strain1.name, strain2.name)) points, lostPoints = normalizeIndexesForDotPlot(events, duplicatesStrain1, duplicatesStrain2, strain1, strain2) createDotPlot(points, strain1, strain2) createBarGraph(strain1.duplicationCounts, 'Distribution of Duplications for %s' % (strain1.name)) createBarGraph(strain2.duplicationCounts, 'Distribution of Duplications for %s' % (strain2.name)) createBarGraph( strain1.deletionCounts, 'Distribution of Deletions for %s' % (strain1.name)) #Remember! Deletions refer to the other strain! createBarGraph( strain2.deletionCounts, 'Distribution of Deletions for %s' % (strain2.name)) #Remember! Deletions refer to the other strain! #Compute and output the inverted, transposed, and inverted transposed regions FCR, TR, IR, ITR = determineRegions(points) #FCR, TR, IR, ITR, LR = computeOperonArrangements(events) OLD VERSION #inversionDetails1, inversionDetails2 = computeRegionDetails(IR, 'Inversion:') #transpositionDetails1, transpositionDetails2 = computeRegionDetails(TR, 'Transposition:') #invertedTransposedDetails1, invertedTransposedDetails2 = computeRegionDetails(ITR, 'Inverted Transposition:') #Compare one of the siblings to the neighbor if one exists if neighborCopy != None: print( 'Now performing a series of alignments between the nighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor = constructEvents( strain1Copy, neighborCopy) print('Constructing dot plot for the neighboring strains: %s, %s' % (strain1Copy.name, neighborCopy.name)) neighborPoints, neighborLostPoints = normalizeIndexesForDotPlot( neighborEvents, duplicatesStrain1Copy, duplicatesStrainNeighbor, strain1Copy, neighborCopy) #createDotPlot(neighborPoints, strain1Copy, neighborCopy) #Compute the various regions for the neighbor #NFCR, NTR, NIR, NITR, NLR = computeOperonArrangements(neighborEvents) OLD VERSION NFCR, NTR, NIR, NITR = determineRegions(neighborPoints) ancestralFragments, strain1, strain2 = determineAncestralFragmentArrangementUsingNeighbor( FCR, TR, IR, ITR, lostPoints, NFCR, NTR, NIR, NITR, neighborLostPoints, strain1, strain2) else: if neighborCopy == None: print('No neighbor found!') elif len(TR) == 0 and len(IR) == 0 or len(ITR) == 0: print('No inverted or transposed regions detected!!') ancestralFragments, strain2 = determineAncestralFragmentArrangementWithoutNeighbor( FCR, TR, IR, ITR, lostPoints, strain2) #Computes the total number of inversions, transpositions, inverted transpositions globals.inversionCounter += len(IR) globals.transposedCounter += len(TR) globals.invertedTransposedCounter += len(ITR) #Increments the counters for the size distributions for each event type updateGlobalDeletionCounter(strain1) updateGlobalDeletionCounter(strain2) updateGlobalDuplicationCounter(strain1) updateGlobalDuplicationCounter(strain2) updateGlobalInversionSizeDistributionCounter(strain1) updateGlobalInversionSizeDistributionCounter(strain2) updateGlobalTranspositionSizeDistributionCounter(strain1) updateGlobalTranspositionSizeDistributionCounter(strain2) updateGlobalInvertedTranspositionSizeDistributionCounter(strain1) updateGlobalInvertedTranspositionSizeDistributionCounter(strain2) #Append all details to file here outputStrainDetailsToFile(outputFileName, strain1) outputStrainDetailsToFile(outputFileName, strain2) ancestor = BacterialStrain(ancestralName, ancestralFragments) return ancestor