def fastatomodelalignment(libDirectory,motifID,alignmentfile,outputfile): # read correspondences from the fasta file to the model InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(alignmentfile) print "Read alignment to model from " + alignmentfile FN = libDirectory + "\\" + motifID + "_correspondences.txt" # read correspondences for the given motif group; there are many such correspondences InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModelDummy, ModelHasName, ModelHasScore, ModelInteriorEdit, ModelFullEdit, ModelCutoffValue, ModelCutoffScore, ModelDeficit = readcorrespondencesfromfile(FN) # print HasScore # print ModelHasName HasName.update(ModelHasName) HasScore.update(ModelHasScore) print "Read model correspondences from " + FN # Loop through instances from 3D and from the sequence alignment and put in an alignment to display DisplayColor = {} aligdata = {} # new dictionary for a in InstanceToGroup.iterkeys(): m = re.search("(.+Instance_[0-9]+)",a) aligdata[m.group(1)] = [] # initialize this key with empty list DisplayColor[m.group(1)] = 'blue' # default display color for a in SequenceToModel.iterkeys(): m = re.search("(Sequence_[0-9]+)",a) aligdata[m.group(1)] = [] # initialize this key with empty list DisplayColor[m.group(1)] = 'black' # default display color for a in aligdata.iterkeys(): for j in range(0,len(ModelToColumn)): aligdata[a].append('') # initialize with blank # sorting by key should keep insertions in order for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber): print a m = re.search("(.+Instance_[0-9]+)",a) print m.group(1) t = int(ModelToColumn[GroupToModel[InstanceToGroup[a]]]) # map position in group to the correct column in the model and in the alignment aligdata[m.group(1)][t-1] += a[len(a)-1] # last character of the key is the base for this position for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber): m = re.search("(Sequence_[0-9]+)",a) t = int(ModelToColumn[SequenceToModel[a]]) aligdata[m.group(1)][t-1] += a[len(a)-1] f = open(outputfile,"w") f.write("<html><title>Alignment to "+motifID+"</title>\n") f.write("<h1>Alignment of " + alignmentfile +" to "+motifID+"</h1>\n") f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a><br>") f.write("The correspondence between sequences from 3D structures and the motif group is shown in blue, JAR3D alignments of sequences to the motif group are shown in black, and sequences which are too long or too short to be aligned are indicated by : characters.") f.write("<table>") f.write(alignmentheaderhtml(ModelToColumn, GroupToModel)+'\n') f.write(alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit)) f.write("</table>") InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt" f.write('<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>') f.write('<pre>') with open(InteractionsFile,"r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") ModelFile = libDirectory + "\\" + motifID + "_model.txt" f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>') f.write('<pre>') with open(ModelFile,"r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") f.write("</html>") f.close() print "Wrote html file with alignment of 3D instances and sequences for " + motifID return aligdata
def fastatomodelalignment(libDirectory, motifID, alignmentfile, outputfile): # read correspondences from the fasta file to the model InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile( alignmentfile) print "Read alignment to model from " + alignmentfile FN = libDirectory + "\\" + motifID + "_correspondences.txt" # read correspondences for the given motif group; there are many such correspondences InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModelDummy, ModelHasName, ModelHasScore, ModelInteriorEdit, ModelFullEdit, ModelCutoffValue, ModelCutoffScore, ModelDeficit = readcorrespondencesfromfile( FN) # print HasScore # print ModelHasName HasName.update(ModelHasName) HasScore.update(ModelHasScore) print "Read model correspondences from " + FN # Loop through instances from 3D and from the sequence alignment and put in an alignment to display DisplayColor = {} aligdata = {} # new dictionary for a in InstanceToGroup.iterkeys(): m = re.search("(.+Instance_[0-9]+)", a) aligdata[m.group(1)] = [] # initialize this key with empty list DisplayColor[m.group(1)] = 'blue' # default display color for a in SequenceToModel.iterkeys(): m = re.search("(Sequence_[0-9]+)", a) aligdata[m.group(1)] = [] # initialize this key with empty list DisplayColor[m.group(1)] = 'black' # default display color for a in aligdata.iterkeys(): for j in range(0, len(ModelToColumn)): aligdata[a].append('') # initialize with blank # sorting by key should keep insertions in order for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber): print a m = re.search("(.+Instance_[0-9]+)", a) print m.group(1) t = int( ModelToColumn[GroupToModel[InstanceToGroup[a]]] ) # map position in group to the correct column in the model and in the alignment aligdata[m.group(1)][t - 1] += a[ len(a) - 1] # last character of the key is the base for this position for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber): m = re.search("(Sequence_[0-9]+)", a) t = int(ModelToColumn[SequenceToModel[a]]) aligdata[m.group(1)][t - 1] += a[len(a) - 1] f = open(outputfile, "w") f.write("<html><title>Alignment to " + motifID + "</title>\n") f.write("<h1>Alignment of " + alignmentfile + " to " + motifID + "</h1>\n") f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a><br>") f.write( "The correspondence between sequences from 3D structures and the motif group is shown in blue, JAR3D alignments of sequences to the motif group are shown in black, and sequences which are too long or too short to be aligned are indicated by : characters." ) f.write("<table>") f.write(alignmentheaderhtml(ModelToColumn, GroupToModel) + '\n') f.write( alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit)) f.write("</table>") InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt" f.write('<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>') f.write('<pre>') with open(InteractionsFile, "r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") ModelFile = libDirectory + "\\" + motifID + "_model.txt" f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>') f.write('<pre>') with open(ModelFile, "r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") f.write("</html>") f.close() print "Wrote html file with alignment of 3D instances and sequences for " + motifID return aligdata
def onemodeldiagnostic(motifID,libDirectory,diagDirectory,prevHTML,nextHTML): n = 1 if n > 0: FN = diagDirectory + "\\" + motifID + "_diagnostics.txt" # read correspondences for the given motif group; there are many such correspondences InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(FN) print "Read diagnostics from " + FN # default display color, indexed by instance; each instance will be displayed in one row DisplayColor = {} # loop through instances from the motif group and set the color that it will be displayed for i in InstanceToPDB.iterkeys(): a = re.search("(.+Instance_[0-9]+)",i) DisplayColor[a.group(1)] = 'blue' # default display color # loop through sequences from the motif group and set the default display color in a dictionary for i in SequenceToModel.iterkeys(): a = re.search("(.+Sequence_[0-9]+)",i) DisplayColor[a.group(1)] = 'black' # default display color MisAlign = 0 # print GroupToModel # print SequenceToModel # print InstanceToPDB for nt in sorted(InstanceToPDB.iterkeys()): if GroupToModel[InstanceToGroup[nt]] != SequenceToModel[InstanceToSequence[nt]]: print nt + ' belongs to ' + GroupToModel[InstanceToGroup[nt]] + ' but was aligned to ' + SequenceToModel[InstanceToSequence[nt]] MisAlign += 0.5 # a = re.search("(.+Instance_[0-9]+)",nt) # DisplayColor[a.group(1)] = 'red' a = re.search("(.+Sequence_[0-9]+)",InstanceToSequence[nt]) DisplayColor[a.group(1)] = 'red' # Loop through instances from 3D and from the sequence alignment and put in an alignment to display aligdata = {} # new dictionary for a in InstanceToGroup.iterkeys(): m = re.search("(.+Instance_[0-9]+)",a) aligdata[m.group(1)] = [] # initialize this key with empty list for a in SequenceToModel.iterkeys(): m = re.search("(.+Sequence_[0-9]+)",a) aligdata[m.group(1)] = [] # initialize this key with empty list for a in aligdata.iterkeys(): for j in range(0,len(ModelToColumn)): aligdata[a].append('') # initialize with blank # sorting by key should keep insertions in order for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber): m = re.search("(.+Instance_[0-9]+)",a) t = int(ModelToColumn[GroupToModel[InstanceToGroup[a]]]) # map position in group to the correct column in the model and in the alignment aligdata[m.group(1)][t-1] += a[len(a)-1] # last character of the key is the base for this position for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber): m = re.search("(.+Sequence_[0-9]+)",a) t = int(ModelToColumn[SequenceToModel[a]]) aligdata[m.group(1)][t-1] += a[len(a)-1] # for a,b in aligdata.iteritems(): # for i in range(0,len(b)-1): # print '<td>'+aligdata[a][i]+'</td>', # print f = open(diagDirectory+"\\"+motifID+"_GroupToModelDiagnostic.html","w") f.write("<html><title>"+motifID+" alignment</title>\n") f.write("<h1>Alignment of "+motifID+" sequences from 3D to the JAR3D model</h1>\n") f.write("<a href=\"" + prevHTML + "\">Previous group</a> | ") f.write("<a href=\"" + nextHTML + "\">Next group</a> | ") f.write("<a href=\"GroupToModelDiagnostic.html\">List of all groups</a> | ") f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a> ") f.write("<br>The correspondence between sequences from 3D structures and the motif group is shown in blue and the JAR3D alignment of the sequences to the motif group is shown in black. Occasionally the two disagree, in which case the JAR3D alignment is shown in red.") f.write("<table>") f.write(alignmentheaderhtml(ModelToColumn,GroupToModel)+'\n') f.write(alignmentrowshtml(DisplayColor,aligdata,HasName,HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit)) f.write("</table>") InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt" f.write('<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>') f.write('<pre>') with open(InteractionsFile,"r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") ModelFile = libDirectory + "\\" + motifID + "_model.txt" f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>') f.write('<pre>') with open(ModelFile,"r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") FASTAFile = libDirectory + "\\" + motifID + ".fasta" f.write('<b>Sequences of instances from ' + motifID + ':<b>') f.write('<pre>') with open(FASTAFile,"r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") f.write("</html>") f.close() print "Wrote html file with alignment of 3D instances and sequences for " + motifID return aligdata, MisAlign
def onemodeldiagnostic(motifID, libDirectory, diagDirectory, prevHTML, nextHTML): n = 1 if n > 0: FN = diagDirectory + "\\" + motifID + "_diagnostics.txt" # read correspondences for the given motif group; there are many such correspondences InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile( FN) print "Read diagnostics from " + FN # default display color, indexed by instance; each instance will be displayed in one row DisplayColor = {} # loop through instances from the motif group and set the color that it will be displayed for i in InstanceToPDB.iterkeys(): a = re.search("(.+Instance_[0-9]+)", i) DisplayColor[a.group(1)] = 'blue' # default display color # loop through sequences from the motif group and set the default display color in a dictionary for i in SequenceToModel.iterkeys(): a = re.search("(.+Sequence_[0-9]+)", i) DisplayColor[a.group(1)] = 'black' # default display color MisAlign = 0 # print GroupToModel # print SequenceToModel # print InstanceToPDB for nt in sorted(InstanceToPDB.iterkeys()): if GroupToModel[InstanceToGroup[nt]] != SequenceToModel[ InstanceToSequence[nt]]: print nt + ' belongs to ' + GroupToModel[InstanceToGroup[ nt]] + ' but was aligned to ' + SequenceToModel[ InstanceToSequence[nt]] MisAlign += 0.5 # a = re.search("(.+Instance_[0-9]+)",nt) # DisplayColor[a.group(1)] = 'red' a = re.search("(.+Sequence_[0-9]+)", InstanceToSequence[nt]) DisplayColor[a.group(1)] = 'red' # Loop through instances from 3D and from the sequence alignment and put in an alignment to display aligdata = {} # new dictionary for a in InstanceToGroup.iterkeys(): m = re.search("(.+Instance_[0-9]+)", a) aligdata[m.group(1)] = [] # initialize this key with empty list for a in SequenceToModel.iterkeys(): m = re.search("(.+Sequence_[0-9]+)", a) aligdata[m.group(1)] = [] # initialize this key with empty list for a in aligdata.iterkeys(): for j in range(0, len(ModelToColumn)): aligdata[a].append('') # initialize with blank # sorting by key should keep insertions in order for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber): m = re.search("(.+Instance_[0-9]+)", a) t = int( ModelToColumn[GroupToModel[InstanceToGroup[a]]] ) # map position in group to the correct column in the model and in the alignment aligdata[m.group(1)][t - 1] += a[ len(a) - 1] # last character of the key is the base for this position for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber): m = re.search("(.+Sequence_[0-9]+)", a) t = int(ModelToColumn[SequenceToModel[a]]) aligdata[m.group(1)][t - 1] += a[len(a) - 1] # for a,b in aligdata.iteritems(): # for i in range(0,len(b)-1): # print '<td>'+aligdata[a][i]+'</td>', # print f = open( diagDirectory + "\\" + motifID + "_GroupToModelDiagnostic.html", "w") f.write("<html><title>" + motifID + " alignment</title>\n") f.write("<h1>Alignment of " + motifID + " sequences from 3D to the JAR3D model</h1>\n") f.write("<a href=\"" + prevHTML + "\">Previous group</a> | ") f.write("<a href=\"" + nextHTML + "\">Next group</a> | ") f.write( "<a href=\"GroupToModelDiagnostic.html\">List of all groups</a> | " ) f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a> ") f.write( "<br>The correspondence between sequences from 3D structures and the motif group is shown in blue and the JAR3D alignment of the sequences to the motif group is shown in black. Occasionally the two disagree, in which case the JAR3D alignment is shown in red." ) f.write("<table>") f.write(alignmentheaderhtml(ModelToColumn, GroupToModel) + '\n') f.write( alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit)) f.write("</table>") InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt" f.write( '<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>') f.write('<pre>') with open(InteractionsFile, "r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") ModelFile = libDirectory + "\\" + motifID + "_model.txt" f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>') f.write('<pre>') with open(ModelFile, "r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") FASTAFile = libDirectory + "\\" + motifID + ".fasta" f.write('<b>Sequences of instances from ' + motifID + ':<b>') f.write('<pre>') with open(FASTAFile, "r") as mf: for line in mf.readlines(): f.write(line) f.write("</pre>") f.write("</html>") f.close() print "Wrote html file with alignment of 3D instances and sequences for " + motifID return aligdata, MisAlign