Exemplo n.º 1
0
def fastatomodelalignment(libDirectory,motifID,alignmentfile,outputfile):
  # read correspondences from the fasta file to the model
  InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(alignmentfile)

  print "Read alignment to model from " + alignmentfile

  FN = libDirectory + "\\" + motifID + "_correspondences.txt"

  # read correspondences for the given motif group; there are many such correspondences
  InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModelDummy, ModelHasName, ModelHasScore, ModelInteriorEdit, ModelFullEdit, ModelCutoffValue, ModelCutoffScore, ModelDeficit = readcorrespondencesfromfile(FN)

#  print HasScore
#  print ModelHasName

  HasName.update(ModelHasName)
  HasScore.update(ModelHasScore)

  print "Read model correspondences from " + FN

  # Loop through instances from 3D and from the sequence alignment and put in an alignment to display
  DisplayColor = {}
  aligdata = {}                                      # new dictionary

  for a in InstanceToGroup.iterkeys():
    m = re.search("(.+Instance_[0-9]+)",a)
    aligdata[m.group(1)] = []                        # initialize this key with empty list
    DisplayColor[m.group(1)] = 'blue'                # default display color

  for a in SequenceToModel.iterkeys():
    m = re.search("(Sequence_[0-9]+)",a)
    aligdata[m.group(1)] = []                        # initialize this key with empty list
    DisplayColor[m.group(1)] = 'black'               # default display color

  for a in aligdata.iterkeys():
    for j in range(0,len(ModelToColumn)):
      aligdata[a].append('')                         # initialize with blank
                                                     # sorting by key should keep insertions in order

  for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber):
    print a
    m = re.search("(.+Instance_[0-9]+)",a)
    print m.group(1)
    t = int(ModelToColumn[GroupToModel[InstanceToGroup[a]]]) # map position in group to the correct column in the model and in the alignment
    aligdata[m.group(1)][t-1] += a[len(a)-1]         # last character of the key is the base for this position

  for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber):
    m = re.search("(Sequence_[0-9]+)",a)
    t = int(ModelToColumn[SequenceToModel[a]])
    aligdata[m.group(1)][t-1] += a[len(a)-1]

  f = open(outputfile,"w")
  f.write("<html><title>Alignment to "+motifID+"</title>\n")
  f.write("<h1>Alignment of " + alignmentfile +" to "+motifID+"</h1>\n")
  f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a><br>")
  f.write("The correspondence between sequences from 3D structures and the motif group is shown in blue, JAR3D alignments of sequences to the motif group are shown in black, and sequences which are too long or too short to be aligned are indicated by : characters.")
  f.write("<table>")
  f.write(alignmentheaderhtml(ModelToColumn, GroupToModel)+'\n')
  f.write(alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit))
  f.write("</table>")

  InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt"

  f.write('<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>')
  f.write('<pre>')
  with open(InteractionsFile,"r") as mf:
      for line in mf.readlines():
        f.write(line)
  f.write("</pre>")

  ModelFile = libDirectory + "\\" + motifID + "_model.txt"
  f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>')
  f.write('<pre>')
  with open(ModelFile,"r") as mf:
      for line in mf.readlines():
        f.write(line)
  f.write("</pre>")
  f.write("</html>")
  f.close()

  print "Wrote html file with alignment of 3D instances and sequences for " + motifID

  return aligdata
Exemplo n.º 2
0
def fastatomodelalignment(libDirectory, motifID, alignmentfile, outputfile):
    # read correspondences from the fasta file to the model
    InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(
        alignmentfile)

    print "Read alignment to model from " + alignmentfile

    FN = libDirectory + "\\" + motifID + "_correspondences.txt"

    # read correspondences for the given motif group; there are many such correspondences
    InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModelDummy, ModelHasName, ModelHasScore, ModelInteriorEdit, ModelFullEdit, ModelCutoffValue, ModelCutoffScore, ModelDeficit = readcorrespondencesfromfile(
        FN)

    #  print HasScore
    #  print ModelHasName

    HasName.update(ModelHasName)
    HasScore.update(ModelHasScore)

    print "Read model correspondences from " + FN

    # Loop through instances from 3D and from the sequence alignment and put in an alignment to display
    DisplayColor = {}
    aligdata = {}  # new dictionary

    for a in InstanceToGroup.iterkeys():
        m = re.search("(.+Instance_[0-9]+)", a)
        aligdata[m.group(1)] = []  # initialize this key with empty list
        DisplayColor[m.group(1)] = 'blue'  # default display color

    for a in SequenceToModel.iterkeys():
        m = re.search("(Sequence_[0-9]+)", a)
        aligdata[m.group(1)] = []  # initialize this key with empty list
        DisplayColor[m.group(1)] = 'black'  # default display color

    for a in aligdata.iterkeys():
        for j in range(0, len(ModelToColumn)):
            aligdata[a].append('')  # initialize with blank
            # sorting by key should keep insertions in order

    for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber):
        print a
        m = re.search("(.+Instance_[0-9]+)", a)
        print m.group(1)
        t = int(
            ModelToColumn[GroupToModel[InstanceToGroup[a]]]
        )  # map position in group to the correct column in the model and in the alignment
        aligdata[m.group(1)][t - 1] += a[
            len(a) -
            1]  # last character of the key is the base for this position

    for a in sorted(SequenceToModel.iterkeys(),
                    key=positionkeyforsortbynumber):
        m = re.search("(Sequence_[0-9]+)", a)
        t = int(ModelToColumn[SequenceToModel[a]])
        aligdata[m.group(1)][t - 1] += a[len(a) - 1]

    f = open(outputfile, "w")
    f.write("<html><title>Alignment to " + motifID + "</title>\n")
    f.write("<h1>Alignment of " + alignmentfile + " to " + motifID + "</h1>\n")
    f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID +
            "\" target=\"_blank\">Motif atlas entry for " + motifID +
            "</a><br>")
    f.write(
        "The correspondence between sequences from 3D structures and the motif group is shown in blue, JAR3D alignments of sequences to the motif group are shown in black, and sequences which are too long or too short to be aligned are indicated by : characters."
    )
    f.write("<table>")
    f.write(alignmentheaderhtml(ModelToColumn, GroupToModel) + '\n')
    f.write(
        alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore,
                          HasInteriorEdit, HasFullEdit, HasCutoffValue,
                          HasCutoffScore, HasAlignmentScoreDeficit))
    f.write("</table>")

    InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt"

    f.write('<br><b>Conserved interactions between motif group positions in ' +
            motifID + ':</b>')
    f.write('<pre>')
    with open(InteractionsFile, "r") as mf:
        for line in mf.readlines():
            f.write(line)
    f.write("</pre>")

    ModelFile = libDirectory + "\\" + motifID + "_model.txt"
    f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>')
    f.write('<pre>')
    with open(ModelFile, "r") as mf:
        for line in mf.readlines():
            f.write(line)
    f.write("</pre>")
    f.write("</html>")
    f.close()

    print "Wrote html file with alignment of 3D instances and sequences for " + motifID

    return aligdata
Exemplo n.º 3
0
def onemodeldiagnostic(motifID,libDirectory,diagDirectory,prevHTML,nextHTML):
  n = 1
  if n > 0:

    FN = diagDirectory + "\\" + motifID + "_diagnostics.txt"

    # read correspondences for the given motif group; there are many such correspondences
    InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(FN)

    print "Read diagnostics from " + FN

    # default display color, indexed by instance; each instance will be displayed in one row
    DisplayColor = {}

    # loop through instances from the motif group and set the color that it will be displayed
    for i in InstanceToPDB.iterkeys():
      a = re.search("(.+Instance_[0-9]+)",i)
      DisplayColor[a.group(1)] = 'blue'            # default display color

    # loop through sequences from the motif group and set the default display color in a dictionary
    for i in SequenceToModel.iterkeys():
      a = re.search("(.+Sequence_[0-9]+)",i)
      DisplayColor[a.group(1)] = 'black'            # default display color

    MisAlign = 0

#    print GroupToModel
#    print SequenceToModel
#    print InstanceToPDB

    for nt in sorted(InstanceToPDB.iterkeys()):
      if GroupToModel[InstanceToGroup[nt]] != SequenceToModel[InstanceToSequence[nt]]:
        print nt + ' belongs to ' + GroupToModel[InstanceToGroup[nt]] + ' but was aligned to ' + SequenceToModel[InstanceToSequence[nt]]
        MisAlign += 0.5
#        a = re.search("(.+Instance_[0-9]+)",nt)
#        DisplayColor[a.group(1)] = 'red'
        a = re.search("(.+Sequence_[0-9]+)",InstanceToSequence[nt])
        DisplayColor[a.group(1)] = 'red'

     # Loop through instances from 3D and from the sequence alignment and put in an alignment to display

    aligdata = {}                                      # new dictionary

    for a in InstanceToGroup.iterkeys():
      m = re.search("(.+Instance_[0-9]+)",a)
      aligdata[m.group(1)] = []                        # initialize this key with empty list

    for a in SequenceToModel.iterkeys():
      m = re.search("(.+Sequence_[0-9]+)",a)
      aligdata[m.group(1)] = []                        # initialize this key with empty list

    for a in aligdata.iterkeys():
      for j in range(0,len(ModelToColumn)):
        aligdata[a].append('')                         # initialize with blank
                                                       # sorting by key should keep insertions in order
    for a in sorted(InstanceToGroup.iterkeys(), key=columnkeyforsortbynumber):
      m = re.search("(.+Instance_[0-9]+)",a)
      t = int(ModelToColumn[GroupToModel[InstanceToGroup[a]]]) # map position in group to the correct column in the model and in the alignment
      aligdata[m.group(1)][t-1] += a[len(a)-1]         # last character of the key is the base for this position

    for a in sorted(SequenceToModel.iterkeys(), key=positionkeyforsortbynumber):
      m = re.search("(.+Sequence_[0-9]+)",a)
      t = int(ModelToColumn[SequenceToModel[a]])
      aligdata[m.group(1)][t-1] += a[len(a)-1]

#    for a,b in aligdata.iteritems():
#      for i in range(0,len(b)-1):
#        print '<td>'+aligdata[a][i]+'</td>',
#      print

    f = open(diagDirectory+"\\"+motifID+"_GroupToModelDiagnostic.html","w")
    f.write("<html><title>"+motifID+" alignment</title>\n")
    f.write("<h1>Alignment of "+motifID+" sequences from 3D to the JAR3D model</h1>\n")
    f.write("<a href=\"" + prevHTML + "\">Previous group</a> | ")
    f.write("<a href=\"" + nextHTML + "\">Next group</a> | ")
    f.write("<a href=\"GroupToModelDiagnostic.html\">List of all groups</a> | ")
    f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" + motifID + "\" target=\"_blank\">Motif atlas entry for " + motifID + "</a>  ")
    f.write("<br>The correspondence between sequences from 3D structures and the motif group is shown in blue and the JAR3D alignment of the sequences to the motif group is shown in black.  Occasionally the two disagree, in which case the JAR3D alignment is shown in red.")
    f.write("<table>")
    f.write(alignmentheaderhtml(ModelToColumn,GroupToModel)+'\n')
    f.write(alignmentrowshtml(DisplayColor,aligdata,HasName,HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit))
    f.write("</table>")

    InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt"

    f.write('<br><b>Conserved interactions between motif group positions in ' + motifID + ':</b>')
    f.write('<pre>')
    with open(InteractionsFile,"r") as mf:
        for line in mf.readlines():
          f.write(line)
    f.write("</pre>")

    ModelFile = libDirectory + "\\" + motifID + "_model.txt"

    f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>')
    f.write('<pre>')
    with open(ModelFile,"r") as mf:
        for line in mf.readlines():
          f.write(line)
    f.write("</pre>")

    FASTAFile = libDirectory + "\\" + motifID + ".fasta"

    f.write('<b>Sequences of instances from ' + motifID + ':<b>')
    f.write('<pre>')
    with open(FASTAFile,"r") as mf:
        for line in mf.readlines():
          f.write(line)
    f.write("</pre>")

    f.write("</html>")
    f.close()

    print "Wrote html file with alignment of 3D instances and sequences for " + motifID

    return aligdata, MisAlign
def onemodeldiagnostic(motifID, libDirectory, diagDirectory, prevHTML,
                       nextHTML):
    n = 1
    if n > 0:

        FN = diagDirectory + "\\" + motifID + "_diagnostics.txt"

        # read correspondences for the given motif group; there are many such correspondences
        InstanceToGroup, InstanceToPDB, InstanceToSequence, GroupToModel, ModelToColumn, SequenceToModel, HasName, HasScore, HasInteriorEdit, HasFullEdit, HasCutoffValue, HasCutoffScore, HasAlignmentScoreDeficit = readcorrespondencesfromfile(
            FN)

        print "Read diagnostics from " + FN

        # default display color, indexed by instance; each instance will be displayed in one row
        DisplayColor = {}

        # loop through instances from the motif group and set the color that it will be displayed
        for i in InstanceToPDB.iterkeys():
            a = re.search("(.+Instance_[0-9]+)", i)
            DisplayColor[a.group(1)] = 'blue'  # default display color

        # loop through sequences from the motif group and set the default display color in a dictionary
        for i in SequenceToModel.iterkeys():
            a = re.search("(.+Sequence_[0-9]+)", i)
            DisplayColor[a.group(1)] = 'black'  # default display color

        MisAlign = 0

        #    print GroupToModel
        #    print SequenceToModel
        #    print InstanceToPDB

        for nt in sorted(InstanceToPDB.iterkeys()):
            if GroupToModel[InstanceToGroup[nt]] != SequenceToModel[
                    InstanceToSequence[nt]]:
                print nt + ' belongs to ' + GroupToModel[InstanceToGroup[
                    nt]] + ' but was aligned to ' + SequenceToModel[
                        InstanceToSequence[nt]]
                MisAlign += 0.5
                #        a = re.search("(.+Instance_[0-9]+)",nt)
                #        DisplayColor[a.group(1)] = 'red'
                a = re.search("(.+Sequence_[0-9]+)", InstanceToSequence[nt])
                DisplayColor[a.group(1)] = 'red'

        # Loop through instances from 3D and from the sequence alignment and put in an alignment to display

        aligdata = {}  # new dictionary

        for a in InstanceToGroup.iterkeys():
            m = re.search("(.+Instance_[0-9]+)", a)
            aligdata[m.group(1)] = []  # initialize this key with empty list

        for a in SequenceToModel.iterkeys():
            m = re.search("(.+Sequence_[0-9]+)", a)
            aligdata[m.group(1)] = []  # initialize this key with empty list

        for a in aligdata.iterkeys():
            for j in range(0, len(ModelToColumn)):
                aligdata[a].append('')  # initialize with blank
                # sorting by key should keep insertions in order
        for a in sorted(InstanceToGroup.iterkeys(),
                        key=columnkeyforsortbynumber):
            m = re.search("(.+Instance_[0-9]+)", a)
            t = int(
                ModelToColumn[GroupToModel[InstanceToGroup[a]]]
            )  # map position in group to the correct column in the model and in the alignment
            aligdata[m.group(1)][t - 1] += a[
                len(a) -
                1]  # last character of the key is the base for this position

        for a in sorted(SequenceToModel.iterkeys(),
                        key=positionkeyforsortbynumber):
            m = re.search("(.+Sequence_[0-9]+)", a)
            t = int(ModelToColumn[SequenceToModel[a]])
            aligdata[m.group(1)][t - 1] += a[len(a) - 1]


#    for a,b in aligdata.iteritems():
#      for i in range(0,len(b)-1):
#        print '<td>'+aligdata[a][i]+'</td>',
#      print

        f = open(
            diagDirectory + "\\" + motifID + "_GroupToModelDiagnostic.html",
            "w")
        f.write("<html><title>" + motifID + " alignment</title>\n")
        f.write("<h1>Alignment of " + motifID +
                " sequences from 3D to the JAR3D model</h1>\n")
        f.write("<a href=\"" + prevHTML + "\">Previous group</a> | ")
        f.write("<a href=\"" + nextHTML + "\">Next group</a> | ")
        f.write(
            "<a href=\"GroupToModelDiagnostic.html\">List of all groups</a> | "
        )
        f.write("<a href=\"http://rna.bgsu.edu/rna3dhub/motif/view/" +
                motifID + "\" target=\"_blank\">Motif atlas entry for " +
                motifID + "</a>  ")
        f.write(
            "<br>The correspondence between sequences from 3D structures and the motif group is shown in blue and the JAR3D alignment of the sequences to the motif group is shown in black.  Occasionally the two disagree, in which case the JAR3D alignment is shown in red."
        )
        f.write("<table>")
        f.write(alignmentheaderhtml(ModelToColumn, GroupToModel) + '\n')
        f.write(
            alignmentrowshtml(DisplayColor, aligdata, HasName, HasScore,
                              HasInteriorEdit, HasFullEdit, HasCutoffValue,
                              HasCutoffScore, HasAlignmentScoreDeficit))
        f.write("</table>")

        InteractionsFile = libDirectory + "\\" + motifID + "_interactions.txt"

        f.write(
            '<br><b>Conserved interactions between motif group positions in ' +
            motifID + ':</b>')
        f.write('<pre>')
        with open(InteractionsFile, "r") as mf:
            for line in mf.readlines():
                f.write(line)
        f.write("</pre>")

        ModelFile = libDirectory + "\\" + motifID + "_model.txt"

        f.write('<b>JAR3D SCFG/MRF model for ' + motifID + ':</b>')
        f.write('<pre>')
        with open(ModelFile, "r") as mf:
            for line in mf.readlines():
                f.write(line)
        f.write("</pre>")

        FASTAFile = libDirectory + "\\" + motifID + ".fasta"

        f.write('<b>Sequences of instances from ' + motifID + ':<b>')
        f.write('<pre>')
        with open(FASTAFile, "r") as mf:
            for line in mf.readlines():
                f.write(line)
        f.write("</pre>")

        f.write("</html>")
        f.close()

        print "Wrote html file with alignment of 3D instances and sequences for " + motifID

        return aligdata, MisAlign