def testPlotsHA3DPlotly(self): """ Make a 3D plot of BLAST bit scores, Z scores, and light matter scores. """ for parameterSet in testArgs.parameterSets: affinity = _AFFINITY[parameterSet] dirName = makeOutputDir(DATASET, parameterSet, '3d-plotly') allZScores = [] allBitScores = [] allLmScores = [] allLabels = [] for queryId in sorted(BIT_SCORES): zScores = [] bitScores = [] lmScores = [] labels = [] for subjectId in BIT_SCORES[queryId]: if queryId != subjectId: bitScores.append(BIT_SCORES[queryId][subjectId]) zScores.append(Z_SCORES[queryId][subjectId]) lmScore = getScore(affinity, queryId, subjectId) lmScores.append(lmScore) labels.append(pythonNameToPdbName(subjectId)) allLabels.append('%s vs. %s' % (pythonNameToPdbName(queryId), pythonNameToPdbName(subjectId))) allZScores.extend(zScores) allBitScores.extend(bitScores) allLmScores.extend(lmScores) plot3DPlotly(bitScores, zScores, lmScores, queryId, dirName, interactive=testArgs.interactive, labels=labels) if testArgs.interactive: response = input('Continue? ') if response and response[0].lower() == 'n': return # Plot all scores on one plot. plot3DPlotly(allBitScores, allZScores, allLmScores, 'all', dirName, interactive=testArgs.interactive, labels=allLabels) if testArgs.interactive: response = input('Continue? ') if response and response[0].lower() == 'n': return
def testPlots2HLA3DPlotly(self): """ Make a 3D plot of BLAST bit scores, Z scores, and light matter scores. """ for parameterSet in testArgs.parameterSets: affinity = _AFFINITY[parameterSet] dirName = makeOutputDir(DATASET, parameterSet, '3d-plotly') for subject in SUBJECTS: zScores = [] bitScores = [] lmScores = [] labels = [] for query in QUERIES: if query.id != subject.id: bitScores.append(BIT_SCORES[query.id][subject.id]) zScores.append(Z_SCORES[query.id][subject.id]) lmScore = getScore(affinity, query.id, subject.id) lmScores.append(lmScore) labels.append(pythonNameToPdbName(query.id)) plot3DPlotly(bitScores, zScores, lmScores, subject.id, dirName, interactive=testArgs.interactive, labels=labels)
def testRecognizedLowerCase(self): """ Passing an recognizable Python name must result in the expected PDB name. """ self.assertEqual('1MLA:A', pythonNameToPdbName('pdb_1mla_a'))
def testUnrecognizableDueToPreix(self): """ Passing a name that has a valid suffix but which is too long must result in the original name. """ self.assertEqual('apdb_1mla_a', pythonNameToPdbName('apdb_1mla_a'))
def testUnrecognizable(self): """ Passing an unrecognizable name must result in the original name. """ self.assertEqual('x' * 10, pythonNameToPdbName('x' * 10))
def plot3D(x, y, z, readId, scoreTypeX, scoreTypeY, scoreTypeZ, dirName, interactive=False): """ Make a 3D plot of the test results. @param x: a C{list} of C{float} X axis bit score values. @param y: a C{list} of C{float} Y axis Z score values. @param z: a C{list} of C{float} Z axis light matter score values. @param readId: The C{str} id of the read whose values are being plotted. @param scoreTypeX: A C{str} X-axis title indicating the type of score. @param scoreTypeY: A C{str} Y-axis title indicating the type of score. @param scoreTypeZ: A C{str} Z-axis title indicating the type of score. @param dirName: A C{str} name of the output directory in which to store the plot image. The image will be saved to dirName + readId + '.png' @param interactive: If C{True} use plt.show() to display interactive plots that the user will need to manually dismiss. @raises AssertionError: If the length of C{x} is not the same as the length of C{y} and the length of C{z}. """ assert len(x) == len(y) == len(z) fig = plt.figure(figsize=(10, 10)) ax = fig.add_subplot(111, projection='3d') alpha = 0.1 dotSize = 40 # The initial view onto the 3D plot. cameraDegrees = 11 azimuth = -103 ax.view_init(cameraDegrees, azimuth) # All the Z scores we've observed so far are less than 65. We have a # fixed upper limit here so all graphs produced will have the same Y # axis upper limit. zScoreLimit = 65.0 if y: assert max(y) <= zScoreLimit # Values less than these cutoffs are considered bad, values bigger are # good. Planes will be drawn to separate each axis into bad/good # points (assuming there are any good points in a given dimension). bitScoreCutoff = 50.0 zScoreCutoff = 20.0 lmScoreCutoff = 0.5 # cutoffStringToColor converts a binary string of length 3 to a color. # The positions in the string represent bad/good (0/1) values for bit # score, Z score, light matter score according to the bad/good cutoff # values above. From best to worst: yellow, green, black, blue, red. # Blue isn't bad, it just indicates that two sequences are similar but # that they have no common structure (they may have no structure at # all). cutoffStringToColor = { '000': 'green', # OK - no conflict. '001': 'red', # Really bad - lm disagrees with both other scores. '010': 'black', # Quite bad - Lm disagrees with Z score. '011': 'yellow', # Good - bit score low, structure scores both high. '100': 'blue', # Weird - bit score is the only one that's high. '101': 'black', # Quite bad - Lm disagrees with Z score. '110': 'red', # Really bad - lm disagrees with both other scores. '111': 'green', # OK - no conflict. } if x: # Assign each x, y, z triple a color. colors = [] for bitScore, zScore, lmScore in zip(x, y, z): key = (('1' if bitScore > bitScoreCutoff else '0') + ('1' if zScore > zScoreCutoff else '0') + ('1' if lmScore > lmScoreCutoff else '0')) colors.append(cutoffStringToColor[key]) ax.scatter(x, y, z, c=colors, s=dotSize) ax.set_title(pythonNameToPdbName(readId)) # cg is the contour granularity: the number of points in the mesh used # for plotting the three plane contours that divide each axis into good # & bad regions. cg = 60 # Bit score (x) bad/good plane. if max(x) >= bitScoreCutoff: Y = np.linspace(0.0, zScoreLimit, cg) Z = np.linspace(0.0, 1.0, cg) yy, zz = np.meshgrid(Y, Z) X = np.array([bitScoreCutoff] * (cg * cg)).reshape(cg, cg) ax.contourf(X, yy, zz, colors='blue', alpha=alpha, zdir='x') # Z score (y) bad/good plane. if max(y) >= zScoreCutoff: X = np.linspace(0.0, max(x), cg) Z = np.linspace(0.0, 1.0, cg) xx, zz = np.meshgrid(X, Z) Y = np.array([zScoreCutoff] * (cg * cg)).reshape(cg, cg) ax.contourf(xx, Y, zz, colors='green', alpha=alpha, zdir='y') # LM score (z) bad/good plane. if max(z) >= lmScoreCutoff: X = np.linspace(0.0, max(x), cg) Y = np.linspace(0.0, zScoreLimit, cg) xx, yy = np.meshgrid(X, Y) Z = np.array([lmScoreCutoff] * (cg * cg)).reshape(cg, cg) ax.contourf(xx, yy, Z, colors='purple', alpha=alpha, zdir='z') else: ax.set_title('No x,y,z data given for read %s' % pythonNameToPdbName(readId)) ax.set_xlabel(scoreTypeX) ax.set_ylabel(scoreTypeY) ax.set_zlabel(scoreTypeZ) ax.set_xlim(left=0.0) ax.set_ylim(0.0, zScoreLimit) ax.set_zlim(0.0, 1.0) fig.savefig(join(dirName, '%s.png' % readId)) if interactive: plt.show() plt.close()
def plot3DPlotly(bitScores, zScores, lmScores, readId, dirName, scoreTypeX='Bit score', scoreTypeY='Z score', scoreTypeZ='Light matter score', interactive=False, labels=''): """ Make a 3D plot of the test results using Plotly. @param bitScores: a C{list} of C{float} bit scores for the X axis. @param zScores: a C{list} of C{float} Z scores for the Y axis. @param lmScores: a C{list} of C{float} light matter scores for the Z axis. @param readId: The C{str} id of the read whose values are being plotted. @param dirName: A C{str} name of the output directory in which to store the plot image. The image will be saved to dirName + readId + '.png' @param scoreTypeX: A C{str} X-axis title indicating the type of score. @param scoreTypeY: A C{str} Y-axis title indicating the type of score. @param scoreTypeZ: A C{str} Z-axis title indicating the type of score. @param interactive: If C{True} use plt.show() to display interactive plots that the user will need to manually dismiss. @param labels: A C{list} of C{str} labels for the points in the plot. If a C{str} is passed, that will be used as the label for all points. @raises AssertionError: If the length of C{bitScores} is not the same as the length of C{zScores} and the length of C{lmScores}. """ assert len(bitScores) == len(zScores) == len(lmScores) # Values less than these cutoffs are considered bad, values bigger are # good. Planes will be drawn to separate each axis into good/bad # points (assuming there are any good points in a given dimension). bitScoreCutoff = 50.0 zScoreCutoff = 20.0 lmScoreCutoff = 0.5 # There's no limit on how high a bit score could be and because their # range can be so great it's not practical to have a fixed upper limit # for the X axis upper limit (otherwise the plots look weird when no # bit scores approach that high upper limit). So we set an artificial # max possible bit score based on the values we received and the bit # score cut-off value used to display the good/bad bit score plane. maxPossibleBitScore = max(max(bitScores), (bitScoreCutoff + 5)) maxPossibleZScore = 65.0 maxPossibleLmScore = 1.0 # All the Z scores we've observed so far are less than 65. We set a # fixed upper limit above so all graphs produced will have the same Y # axis upper limit. if zScores: assert max(zScores) <= maxPossibleZScore minPossibleBitScore = minPossibleZScore = minPossibleLmScore = 0.0 # cutoffStringToColor converts a binary string of length 3 to a color. # The positions in the string represent bad/good (0/1) values for bit # score, Z score, light matter score according to the bad/good cutoff # values above. From best to worst: yellow, green, black, blue, red. # Blue isn't bad, it just indicates that two sequences are similar but # that they have no common structure (they may have no structure at # all). cutoffStringToColor = { '000': 'green', # OK - no conflict. '001': 'red', # Really bad - lm disagrees with both other scores. '010': 'black', # Quite bad - Lm disagrees with Z score. '011': 'yellow', # Good - bit score low, structure scores both high. '100': 'blue', # Weird - bit score is the only one that's high. '101': 'black', # Quite bad - Lm disagrees with Z score. '110': 'red', # Really bad - lm disagrees with both other scores. '111': 'green', # OK - no conflict. } # Assign each (bit score, Z score, light matter score) triple a color. colors = [] for bitScore, zScore, lmScore in zip(bitScores, zScores, lmScores): key = (('1' if bitScore > bitScoreCutoff else '0') + ('1' if zScore > zScoreCutoff else '0') + ('1' if lmScore > lmScoreCutoff else '0')) colors.append(cutoffStringToColor[key]) # Plot the score triples. data = [ go.Scatter3d( x=bitScores, y=zScores, z=lmScores, mode='markers', name='Scores', marker={ 'size': 9, 'color': colors, 'opacity': 0.45, }, text=labels, ) ] # The alpha value for the good/bad cut-off planes. planeAlpha = 0.1 planeLine = { 'color': 'black', 'width': 1, } # Bit score (x) good/bad plane. data.append( go.Scatter3d( x=[bitScoreCutoff] * 5, y=[minPossibleZScore, maxPossibleZScore, maxPossibleZScore, minPossibleZScore, minPossibleZScore], z=[maxPossibleLmScore, maxPossibleLmScore, minPossibleLmScore, minPossibleLmScore, maxPossibleLmScore], mode='lines', name=scoreTypeX + ' plane', surfaceaxis=0, surfacecolor='blue', opacity=planeAlpha, hoverinfo='none', line=planeLine)) # Z score (y) good/bad plane. data.append( go.Scatter3d( x=[minPossibleBitScore, maxPossibleBitScore, maxPossibleBitScore, minPossibleBitScore, minPossibleBitScore], y=[zScoreCutoff] * 5, z=[maxPossibleLmScore, maxPossibleLmScore, minPossibleZScore, minPossibleZScore, maxPossibleLmScore], mode='lines', name=scoreTypeY + ' plane', surfaceaxis=1, surfacecolor='green', opacity=planeAlpha, hoverinfo='none', line=planeLine)) # LM score (z) good/bad plane. data.append( go.Scatter3d( x=[minPossibleBitScore, maxPossibleBitScore, maxPossibleBitScore, minPossibleBitScore, minPossibleBitScore], y=[maxPossibleZScore, maxPossibleZScore, minPossibleZScore, minPossibleZScore, maxPossibleZScore], z=[lmScoreCutoff] * 5, mode='lines', name=scoreTypeZ + ' plane', surfaceaxis=2, surfacecolor='purple', opacity=planeAlpha, hoverinfo='none', line=planeLine)) axisFont = { 'size': 16, } layout = go.Layout( title=pythonNameToPdbName(readId), margin={ 'l': 0, 'r': 0, 'b': 0, 't': 50, 'pad': 5, }, scene={ 'xaxis': { 'range': [minPossibleBitScore, maxPossibleBitScore], 'title': scoreTypeX, 'titlefont': axisFont, }, 'yaxis': { 'range': [minPossibleZScore, maxPossibleZScore], 'title': scoreTypeY, 'titlefont': axisFont, }, 'zaxis': { 'range': [minPossibleLmScore, maxPossibleLmScore], 'title': scoreTypeZ, 'titlefont': axisFont, }, }, ) fig = go.Figure(data=data, layout=layout) filename = join(dirName, '%s.html' % readId) plotly.offline.plot(fig, show_link=False, filename=filename, auto_open=interactive)
def plot(x, y, readId, scoreTypeX, scoreTypeY, dirName): """ Make a scatterplot of the test results. @param x: a C{list} of C{float} x coordinates. @param y: a C{list} of C{float} y coordinates. @param readId: The C{str} id of the read whose values are being plotted. @param scoreTypeX: A C{str} X-axis title indicating the type of score. @param scoreTypeY: A C{str} Y-axis title indicating the type of score. @param dirName: A C{str} name of the output directory in which to store the plot image. The image will be saved to dirName + readId + '.png' @raises AssertionError: If the length of C{x} is not the same as the length of C{y}. """ assert len(x) == len(y) fig = plt.figure(figsize=(7, 5)) ax = fig.add_subplot(111) if len(x) > 1: slope, intercept, rValue, pValue, se = stats.linregress(x, y) # Plot. plt.plot(x, y, 'o', markerfacecolor='blue', markeredgecolor='white') plt.plot([0, max(x)], [intercept, slope * max(x) + intercept], '-', color='green' if slope >= 0 else 'red') # Labels. ax.set_title('Read: %s, R^2: %.2f, SE: %.2f, slope: %.2f, p: %.2f' % (pythonNameToPdbName(readId), rValue, se, slope, pValue)) else: ax.set_title('No (or not enough) x,y data given for read %s' % pythonNameToPdbName(readId)) ax.set_ylabel(scoreTypeY) ax.set_xlabel(scoreTypeX) # Light matter scores are always <= 1.0. if scoreTypeX == 'Light matter score': ax.set_xlim(right=1.0) if scoreTypeY == 'Light matter score': ax.set_ylim(top=1.0) # Z scores are always <= 60.0 (with sanity check). if scoreTypeX == 'Z score': if x: assert max(x) <= 65.0 ax.set_xlim(right=65.0) if scoreTypeY == 'Z score': if y: assert max(y) <= 65.0 ax.set_ylim(top=65.0) # No scores can be negative. Explicitly set the lower limits on both # axes to zero. This stops regression lines from causing an axis to # display useless areas with negative ticks and no data points. ax.set_xlim(left=0.0) ax.set_ylim(bottom=0.0) # Axes. ax.spines['top'].set_linewidth(0.5) ax.spines['right'].set_linewidth(0.5) ax.spines['bottom'].set_linewidth(0.5) ax.spines['left'].set_linewidth(0.5) fig.savefig(join(dirName, '%s.png' % readId)) plt.close()
default=False, action='store_true', help=('If given, do not write a summary of how many sequence ids were ' 'processed and put into each category')) args = parser.parse_args() pdbEntries = loadEntries(args.pdbEntriesFile) sequenceCount = yearFoundCount = 0 noYear = set() yearToIds = defaultdict(set) for sequence in SSFastaReads(sys.stdin, checkAlphabet=0): sequenceCount += 1 pdbId = pythonNameToPdbName(sequence.id).split(':', maxsplit=1)[0] try: entry = pdbEntries[pdbId] except KeyError: noYear.add(pdbId) else: yearToIds[entry['year']].add(pdbId) yearFoundCount += 1 verbose = not args.quiet noYearCount = len(noYear) if verbose: print('%d sequence%s read, %d had a year, %d had no year.' %