def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # Get all cells allDir = ap.get_cells() cellCycleLengths = [] generations = [] for idx, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") cellCycleLengths.append((time[-1] - time[0]) / 60. / 60.) generations.append(idx) plt.scatter(generations, cellCycleLengths) plt.xlabel('Generation') plt.ylabel('Time (hr)') plt.title('Cell cycle lengths') plt.xticks(generations) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, 'seedOutDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(simDataFile, 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) for sim_dir in ap.get_cells(): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used main_reader = TableReader(os.path.join(simOutDir, 'Main')) # Load data time = main_reader.readColumn('time') plt.figure() ### Create Plot ### exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells in each seed ap = AnalysisPaths(variantDir, cohort_plot=True) max_cells_in_gen = 0 for genIdx in range(ap.n_generation): n_cells = len(ap.get_cells(generation=[genIdx])) if n_cells > max_cells_in_gen: max_cells_in_gen = n_cells fig, axesList = plt.subplots(ap.n_generation, sharex=True) doubling_time = np.zeros((max_cells_in_gen, ap.n_generation)) for genIdx in range(ap.n_generation): gen_cells = ap.get_cells(generation=[genIdx]) for simDir in gen_cells: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") doubling_time[np.where(simDir == gen_cells)[0], genIdx] = (time.max() - initialTime) / 60. # Plot initial vs final masses if ap.n_generation == 1: axesList = [axesList] for idx, axes in enumerate(axesList): if max_cells_in_gen > 1: axes.hist(doubling_time[:, idx].flatten(), int(np.ceil(np.sqrt(doubling_time[:, idx].size)))) else: axes.plot(doubling_time[:, idx], 1, 'x') axes.set_ylim([0, 2]) axes.axvline(doubling_time[:, idx].mean(), color='k', linestyle='dashed', linewidth=2) axes.text( doubling_time[:, idx].mean(), 1, "Mean: %.3f Var: %.3f" % (doubling_time[:, idx].mean(), doubling_time[:, idx].var())) axesList[-1].set_xlabel("Doubling time (min))") axesList[ap.n_generation / 2].set_ylabel("Frequency") plt.subplots_adjust(hspace=0.2, wspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) # TODO: Declutter Y-axis # Get first cell from each generation firstCellLineage = [] for gen_idx in range(ap.n_generation): firstCellLineage.append(ap.get_cells(generation = [gen_idx])[0]) massNames = [ #"dryMass", "proteinMass", "tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ #"Dry\nmass", "Protein\nmass frac.", "tRNA\nmass frac.", "rRNA\nmass frac.", "mRNA\nmass frac.", "DNA\nmass frac." ] fig, axesList = plt.subplots(len(massNames), sharex = True) for simDir in firstCellLineage: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) massData = np.zeros((len(massNames),time.size)) for idx, massType in enumerate(massNames): massData[idx,:] = mass.readColumn(massNames[idx]) massData = massData / massData.sum(axis = 0) for idx, massType in enumerate(massNames): axesList[idx].plot(time / 60, massData[idx,:]) axesList[idx].set_ylabel(cleanNames[idx]) for axes in axesList: axes.set_yticks(list(axes.get_ylim())) axesList[-1].set_xlabel('Time (min)') exportFigure(plt, plotOutDir, plotOutFileName,metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) # Get all cells allDir = ap.get_cells() massNames = [ "dryMass", "proteinMass", #"tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ "Dry\nmass", "Protein\nmass", #"tRNA\nmass", "rRNA\nmass", "mRNA\nmass", "DNA\nmass" ] fig, axesList = plt.subplots(len(massNames), sharex = True) for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) for idx, massType in enumerate(massNames): massToPlot = mass.readColumn(massNames[idx]) axesList[idx].plot(time / 60. / 60., massToPlot, linewidth = 2) axesList[idx].set_ylabel(cleanNames[idx] + " (fg)") for axes in axesList: axes.get_ylim() axes.set_yticks(list(axes.get_ylim())) axesList[0].set_title("Cell mass fractions") axesList[len(massNames) - 1].set_xlabel("Time (hr)") plt.subplots_adjust(hspace = 0.2, wspace = 0.5) exportFigure(plt, plotOutDir, plotOutFileName,metadata) plt.close("all")
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells in each seed ap = AnalysisPaths(variantDir, cohort_plot = True) max_cells_in_gen = 0 for genIdx in range(ap.n_generation): n_cells = len(ap.get_cells(generation = [genIdx])) if n_cells > max_cells_in_gen: max_cells_in_gen = n_cells fig, axesList = plt.subplots(ap.n_generation, sharey = True, sharex = True, subplot_kw={'aspect': 0.4, 'adjustable': 'box'}) initial_masses = np.zeros((max_cells_in_gen, ap.n_generation)) final_masses = np.zeros((max_cells_in_gen, ap.n_generation)) for genIdx in range(ap.n_generation): gen_cells = ap.get_cells(generation = [genIdx]) for simDir in gen_cells: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses[np.where(simDir == gen_cells)[0], genIdx] = cellMass[0] / 1000. final_masses[np.where(simDir == gen_cells)[0], genIdx] = cellMass[-1] / 1000. # Plot initial vs final masses if ap.n_generation == 1: axesList = [axesList] for idx, axes in enumerate(axesList): axes.plot(initial_masses[:, idx], final_masses[:, idx], 'o') z = np.polyfit(initial_masses[:, idx], final_masses[:, idx], 1) p = np.poly1d(z) axes.plot(initial_masses[:, idx], p(initial_masses[:, idx]), '--') text_x = np.mean(axes.get_xlim()) text_y = np.mean(axes.get_ylim()) + np.mean(axes.get_ylim())*0.1 axes.text(text_x, text_y, r"$m_f$=%.3f$\times$$m_i$ + %.3f" % (z[0], z[1])) axesList[-1].set_xlabel("Initial mass (pg)") axesList[ap.n_generation / 2].set_ylabel("Final mass (pg)") plt.subplots_adjust(hspace = 0.2, wspace = 0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) if DISABLED: print "Currently disabled because it requires too much memory." return ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # Get all cells allDir = ap.get_cells() for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") counts = TableReader(os.path.join( simOutDir, "BulkMolecules")).readColumn("counts") countsToMolar = TableReader( os.path.join(simOutDir, "EnzymeKinetics")).readColumn("countsToMolar") allNames = TableReader(os.path.join( simOutDir, "BulkMolecules")).readAttribute('objectNames') compoundNames = [] nonZeroCounts = counts.T[np.any(counts.T, axis=1)] for idx, counts in enumerate(nonZeroCounts): if (counts[BURN_IN_SECONDS:] > 0).sum() > 100: compartment = allNames[idx][-3:] compoundNames.append(allNames[idx][:20]) concentrations = (counts * countsToMolar) if time[0] < 1: concentrations[:BURN_IN_SECONDS] = np.mean( concentrations[BURN_IN_SECONDS:]) plt.plot(time / 60., concentrations / np.mean(concentrations)) # plt.legend(compoundNames, fontsize=5) plt.title("Protein Concentrations") plt.xlabel("Time (min)") plt.ylabel("Mean-normalized concentration") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata is not None and SHUFFLE_VARIANT_TAG not in metadata[ "variant"]: print "This plot only runs for variants where parameters are shuffled." return if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) validation_data = cPickle.load(open(validationDataFile, "rb")) schmidtCounts = validation_data.protein.schmidt2015Data[ "glucoseCounts"] ap = AnalysisPaths(inputDir, variant_plot=True) pool = Pool(processes=parallelization.plotter_cpus()) args = zip( range(ap.n_variant), [ap] * ap.n_variant, [validation_data.protein.schmidt2015Data["monomerId"].tolist()] * ap.n_variant, [schmidtCounts] * ap.n_variant) result = pool.map(getPCC, args) # cPickle.dump(result, open("pcc_results.cPickle", "w"), cPickle.HIGHEST_PROTOCOL) pool.close() pool.join() # result = cPickle.load(open("pcc_results.cPickle", "r")) controlPcc, controlPvalue = result[0] pccs, pvals = zip(*result[1:]) pccs = np.array(pccs) pvals = np.array(pvals) fig = plt.figure() fig.set_figwidth(5) fig.set_figheight(5) ax = plt.subplot(1, 1, 1) pccs = np.array([x for x in pccs if not np.isnan(x)]) ax.hist(pccs, np.sqrt(pccs.size)) ax.axvline(controlPcc, color="k", linestyle="dashed", linewidth=2) ax.set_xlabel("Proteome correlation (Pearson r)") ax.set_title("Mean: %0.3g Std: %0.3g Control: %0.3g" % (pccs.mean(), pccs.std(), controlPcc)) axes_list = [ax] for a in axes_list: for tick in a.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in a.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) whitePadSparklineAxis(ax) plt.subplots_adjust(bottom=0.2, wspace=0.3) exportFigure(plt, plotOutDir, plotOutFileName, metadata)
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # TODO: Declutter Y-axis # Get all cells allDir = ap.get_cells().tolist() massNames = [ "dryMass", ] cleanNames = [ "Dry\nmass", ] for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime mass = TableReader(os.path.join(simOutDir, "Mass")) for idx, massType in enumerate(massNames): massToPlot = mass.readColumn(massNames[idx]) f = plt.figure(figsize=(1.25, 0.8), frameon=False) ax = f.add_axes([0, 0, 1, 1]) ax.axis("off") ax.plot(time, massToPlot, linewidth=2) ax.set_ylim([massToPlot.min(), massToPlot.max()]) ax.set_xlim([time.min(), time.max()]) exportFigure( plt, plotOutDir, "r01_{}_gen{}".format(massType, allDir.index(simDir))) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() validation_data = cPickle.load(open(validationDataFile, "rb")) essentialRnas = validation_data.essentialGenes.essentialRnas # Get mRNA data sim_data = cPickle.load(open(simDataFile, "rb")) rnaIds = sim_data.process.transcription.rnaData["id"] isMRna = sim_data.process.transcription.rnaData["isMRna"] synthProb = sim_data.process.transcription.rnaSynthProb["basal"] mRnaIndexes = np.where(isMRna)[0] mRnaSynthProb = np.array([synthProb[x] for x in mRnaIndexes]) mRnaIds = np.array([rnaIds[x] for x in mRnaIndexes]) if not USE_CACHE: # Get whether or not mRNAs were transcribed time = [] transcribedBool = [] simulatedSynthProbs = [] transcriptionEvents = [] for gen, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") time += TableReader(os.path.join( simOutDir, "Main")).readColumn("time").tolist() rnaSynthProb = TableReader( os.path.join(simOutDir, "RnaSynthProb")) simulatedSynthProb = np.mean( rnaSynthProb.readColumn("rnaSynthProb")[:, mRnaIndexes], axis=0) rnaSynthProb.close() simulatedSynthProbs.append(simulatedSynthProb) bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") mRnaIndexes_bulk = np.array( [moleculeIds.index(x) for x in mRnaIds]) moleculeCounts = bulkMolecules.readColumn( "counts")[:, mRnaIndexes_bulk] bulkMolecules.close() moleculeCountsSumOverTime = moleculeCounts.sum(axis=0) mRnasTranscribed = np.array( [x != 0 for x in moleculeCountsSumOverTime]) transcribedBool.append(mRnasTranscribed) rnapDataReader = TableReader( os.path.join(simOutDir, "RnapData")) rnaInitEvent = rnapDataReader.readColumn( "rnaInitEvent")[:, mRnaIndexes] rnapDataReader.close() if gen == 0: transcriptionEvents = (rnaInitEvent != 0) else: transcriptionEvents = np.vstack( (transcriptionEvents, (rnaInitEvent != 0))) time = np.array(time) transcribedBool = np.array(transcribedBool) simulatedSynthProbs = np.array(simulatedSynthProbs) indexingOrder = np.argsort(np.mean(simulatedSynthProbs, axis=0)) transcribedBoolOrdered = np.mean(transcribedBool, axis=0)[indexingOrder] simulatedSynthProbsOrdered = np.mean(simulatedSynthProbs, axis=0)[indexingOrder] transcriptionEventsOrdered = transcriptionEvents[:, indexingOrder] mRnaIdsOrdered = mRnaIds[indexingOrder] alwaysPresentIndexes = np.where(transcribedBoolOrdered == 1.)[0] neverPresentIndexes = np.where(transcribedBoolOrdered == 0.)[0] sometimesPresentIndexes = np.array([ x for x in np.arange(len(transcribedBoolOrdered)) if x not in alwaysPresentIndexes and x not in neverPresentIndexes ]) colors = np.repeat("g", len(transcribedBoolOrdered)) colors[alwaysPresentIndexes] = "b" colors[neverPresentIndexes] = "r" # Assemble data alwaysTranscriptionEvents_E = [] alwaysTranscriptionEvents_N = [] alwaysId_E = [] alwaysId_N = [] for i in alwaysPresentIndexes: v = (time[transcriptionEventsOrdered[:, i]] / 3600.).tolist() if transcriptionEventsOrdered[:, i].sum() == 0: v = [-1] if mRnaIdsOrdered[i] in essentialRnas: alwaysTranscriptionEvents_E.append(v) else: alwaysTranscriptionEvents_N.append(v) neverTranscriptionEvents_E = [] neverTranscriptionEvents_N = [] for i in neverPresentIndexes: v = (time[transcriptionEventsOrdered[:, i]] / 3600.).tolist() if transcriptionEventsOrdered[:, i].sum() == 0: v = [-1] if mRnaIdsOrdered[i] in essentialRnas: neverTranscriptionEvents_E.append(v) else: neverTranscriptionEvents_N.append(v) sometimesTranscriptionEvents_E = [] sometimesTranscriptionEvents_N = [] for i in sometimesPresentIndexes: v = (time[transcriptionEventsOrdered[:, i]] / 3600.).tolist() if transcriptionEventsOrdered[:, i].sum() == 0: v = [-1] if mRnaIdsOrdered[i] in essentialRnas: sometimesTranscriptionEvents_E.append(v) else: sometimesTranscriptionEvents_N.append(v) cPickle.dump( { "time": time, "always_E": alwaysTranscriptionEvents_E, "always_N": alwaysTranscriptionEvents_N, "never_E": neverTranscriptionEvents_E, "never_N": neverTranscriptionEvents_N, "sometimes_E": sometimesTranscriptionEvents_E, "sometimes_N": sometimesTranscriptionEvents_N, "transcriptionFrequency": transcribedBoolOrdered, "colors": colors, "id": mRnaIdsOrdered, }, open(os.path.join(plotOutDir, "transcriptionEvents.pickle"), "wb")) if USE_CACHE: D = cPickle.load( open(os.path.join(plotOutDir, "transcriptionEvents.pickle"), "r")) time = D["time"] alwaysTranscriptionEvents_E = D["always_E"] alwaysTranscriptionEvents_N = D["always_N"] neverTranscriptionEvents_E = D["never_E"] neverTranscriptionEvents_N = D["never_N"] sometimesTranscriptionEvents_E = D["sometimes_E"] sometimesTranscriptionEvents_N = D["sometimes_N"] transcribedBoolOrdered = D["transcriptionFrequency"] colors = D["colors"] mRnaIdsOrdered = D["id"] # Plot blue = [0, 0, 1] green = [0, 0.5, 0] red = [1, 0, 0] gray = [0, 0, 0] fig = plt.figure(figsize=(8, 10)) scatterAxis = plt.subplot2grid((5, 4), (0, 0), colspan=3, rowspan=2) histAxis = plt.subplot2grid((5, 4), (0, 3), colspan=1, rowspan=2, sharey=scatterAxis) alwaysAxis = plt.subplot2grid((5, 4), (2, 0), colspan=4, rowspan=1) sometimesAxis = plt.subplot2grid((5, 4), (3, 0), colspan=4, rowspan=1, sharex=alwaysAxis) neverAxis = plt.subplot2grid((5, 4), (4, 0), colspan=4, rowspan=1, sharex=alwaysAxis) scatterAxis.scatter(np.arange(len(transcribedBoolOrdered)), transcribedBoolOrdered, marker='o', facecolors=colors, edgecolors="none", s=5) scatterAxis.set_title( "Frequency of observing at least 1 transcript per generation\n(Genes ordered by simulated synthesis probability)", fontsize=10) scatterAxis.set_xlim([-1, len(transcribedBoolOrdered)]) scatterAxis.set_ylim([-0.1, 1.1]) scatterAxis.tick_params(top="off") scatterAxis.tick_params(right="off") scatterAxis.tick_params(which='both', direction='out', labelsize=8) histAxis.hist(transcribedBoolOrdered, bins=len(allDir) + 1, orientation='horizontal', color="k", alpha=0.5) histAxis.set_xscale("log") histAxis.spines["right"].set_visible(False) histAxis.tick_params(right="off") histAxis.tick_params(which='both', direction='out', labelsize=8) histAxis.text( histAxis.get_xlim()[1] * 1.5, 0, "%s genes\n(%0.1f%%)" % (len(neverTranscriptionEvents_N) + len(neverTranscriptionEvents_E), 100. * (len(neverTranscriptionEvents_N) + len(neverTranscriptionEvents_E)) / float(len(transcribedBoolOrdered))), fontsize=10, verticalalignment="top") histAxis.text(histAxis.get_xlim()[1] * 1.5, 1, "%s genes\n(%0.1f%%)" % (len(alwaysTranscriptionEvents_N) + len(alwaysTranscriptionEvents_E), 100. * (len(alwaysTranscriptionEvents_N) + len(alwaysTranscriptionEvents_E)) / float(len(transcribedBoolOrdered))), fontsize=10, verticalalignment="bottom") histAxis.text(histAxis.get_xlim()[1] * 1.5, 0.5, "%s genes\n(%0.1f%%)" % (len(sometimesTranscriptionEvents_N) + len(sometimesTranscriptionEvents_E), 100. * (len(sometimesTranscriptionEvents_N) + len(sometimesTranscriptionEvents_E)) / float(len(transcribedBoolOrdered))), fontsize=10, verticalalignment="center") histAxis.add_patch( patches.Rectangle( (histAxis.get_xlim()[1] * 0.7, 1. / (len(allDir) + 1)), 1e4, 1. - 2. / (len(allDir) + 1), facecolor=green, edgecolor="none")) alwaysAxis.eventplot(alwaysTranscriptionEvents_N + alwaysTranscriptionEvents_E, orientation="horizontal", linewidths=2., linelengths=1., colors=[blue] * len(alwaysTranscriptionEvents_N) + [gray] * len(alwaysTranscriptionEvents_E)) alwaysAxis.set_ylabel("Always present", fontsize=10) alwaysAxis.set_title("Transcription initiation events", fontsize=10) alwaysAxis.set_yticks([]) alwaysAxis.tick_params(top="off") alwaysAxis.tick_params(which='both', direction='out', labelsize=8) alwaysAxis.set_xlim([0, time[-1] / 3600.]) alwaysAxis.set_ylim([ -1, np.max([ N_GENES_TO_PLOT, len(alwaysTranscriptionEvents_E) + len(alwaysTranscriptionEvents_N) ]) ]) alwaysAxis.text(alwaysAxis.get_xlim()[1] * 1.02, len(alwaysTranscriptionEvents_N) * 0.5, "%s\nnon-essential\ngenes" % len(alwaysTranscriptionEvents_N), fontsize=10, verticalalignment="center") alwaysAxis.text(alwaysAxis.get_xlim()[1] * 1.02, len(alwaysTranscriptionEvents_N) + len(alwaysTranscriptionEvents_E) * 0.5, "%s essential\ngenes" % len(alwaysTranscriptionEvents_E), fontsize=10, verticalalignment="center") sometimesAxis.eventplot( sometimesTranscriptionEvents_N + sometimesTranscriptionEvents_E, orientation="horizontal", linewidths=2., linelengths=1., colors=[green] * len(sometimesTranscriptionEvents_N) + [gray] * len(sometimesTranscriptionEvents_E)) sometimesAxis.set_ylabel("Sub-generational", fontsize=10) sometimesAxis.set_yticks([]) sometimesAxis.tick_params(top="off") sometimesAxis.set_ylim([ -1, np.max([ N_GENES_TO_PLOT, len(sometimesTranscriptionEvents_E) + len(sometimesTranscriptionEvents_N) ]) ]) sometimesAxis.tick_params(which='both', direction='out', labelsize=8) sometimesAxis.text(sometimesAxis.get_xlim()[1] * 1.02, len(sometimesTranscriptionEvents_N) * 0.5, "%s\nnon-essential\ngenes" % len(alwaysTranscriptionEvents_N), fontsize=10, verticalalignment="center") sometimesAxis.text(sometimesAxis.get_xlim()[1] * 1.02, len(sometimesTranscriptionEvents_N) + len(sometimesTranscriptionEvents_E) * 0.5, "%s essential\ngenes" % len(sometimesTranscriptionEvents_E), fontsize=10, verticalalignment="center") neverAxis.eventplot(neverTranscriptionEvents_N + neverTranscriptionEvents_E, orientation="horizontal", linewidths=2., linelengths=1., colors=[red] * len(neverTranscriptionEvents_N) + [gray] * len(neverTranscriptionEvents_E)) neverAxis.set_ylabel("Never present", fontsize=10) neverAxis.set_xlabel("Time (hour)", fontsize=10) neverAxis.set_yticks([]) neverAxis.tick_params(top="off") neverAxis.set_ylim([ -1, np.max([ N_GENES_TO_PLOT, len(neverTranscriptionEvents_E) + len(neverTranscriptionEvents_N) ]) ]) neverAxis.tick_params(which='both', direction='out', labelsize=8) neverAxis.text(neverAxis.get_xlim()[1] * 1.02, len(neverTranscriptionEvents_N) * 0.5, "%s\nnon-essential\ngenes" % len(neverTranscriptionEvents_N), fontsize=10, verticalalignment="center") neverAxis.text(neverAxis.get_xlim()[1] * 1.02, len(neverTranscriptionEvents_N) + len(neverTranscriptionEvents_E) * 0.5, "%s essential\ngenes" % len(neverTranscriptionEvents_E), fontsize=10, verticalalignment="center") plt.subplots_adjust(wspace=0.4, hspace=0.4, right=0.83, bottom=0.05, left=0.07, top=0.95) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() enzymeMonomerId = "GLUTCYSLIG-MONOMER[c]" enzymeRnaId = "EG10418_RNA[c]" reactionId = "GLUTCYSLIG-RXN" transcriptionFreq = 1.0 metaboliteId = "GLUTATHIONE[c]" # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() sim_data = cPickle.load(open(simDataFile, "rb")) rnaIds = sim_data.process.transcription.rnaData["id"] isMRna = sim_data.process.transcription.rnaData["isMRna"] mRnaIndexes = np.where(isMRna)[0] mRnaIds = np.array([rnaIds[x] for x in mRnaIndexes]) simOutDir = os.path.join(allDir[0], "simOut") bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") enzymeMonomerIndex = moleculeIds.index(enzymeMonomerId) enzymeRnaIndex = moleculeIds.index(enzymeRnaId) metaboliteIndex = moleculeIds.index(metaboliteId) bulkMolecules.close() time = [] enzymeFluxes = [] enzymeMonomerCounts = [] enzymeRnaCounts = [] enzymeRnaInitEvent = [] metaboliteCounts = [] for gen, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") time += TableReader(os.path.join( simOutDir, "Main")).readColumn("time").tolist() bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeCounts = bulkMolecules.readColumn("counts") enzymeMonomerCounts += moleculeCounts[:, enzymeMonomerIndex].tolist() enzymeRnaCounts += moleculeCounts[:, enzymeRnaIndex].tolist() metaboliteCounts += moleculeCounts[:, metaboliteIndex].tolist() bulkMolecules.close() fbaResults = TableReader(os.path.join(simOutDir, "FBAResults")) reactionIDs = np.array(fbaResults.readAttribute("reactionIDs")) reactionFluxes = np.array(fbaResults.readColumn("reactionFluxes")) enzymeFluxes += reactionFluxes[:, np.where(reactionIDs == reactionId )[0][0]].tolist() fbaResults.close() rnapDataReader = TableReader(os.path.join(simOutDir, "RnapData")) enzymeRnaInitEvent += rnapDataReader.readColumn( "rnaInitEvent")[:, np.where( mRnaIds == enzymeRnaId)[0][0]].tolist() rnapDataReader.close() time = np.array(time) # Plot fig = plt.figure(figsize=(10, 10)) rnaInitAxis = plt.subplot(5, 1, 1) rnaAxis = plt.subplot(5, 1, 2, sharex=rnaInitAxis) monomerAxis = plt.subplot(5, 1, 3, sharex=rnaInitAxis) fluxAxis = plt.subplot(5, 1, 4, sharex=rnaInitAxis) metAxis = plt.subplot(5, 1, 5, sharex=rnaInitAxis) rnaInitAxis.plot(time / 3600., enzymeRnaInitEvent) rnaInitAxis.set_title("%s transcription initiation events" % enzymeRnaId, fontsize=10) rnaInitAxis.set_ylim([0, rnaInitAxis.get_ylim()[1] * 1.1]) rnaInitAxis.set_xlim([0, time[-1] / 3600.]) rnaAxis.plot(time / 3600., enzymeRnaCounts) rnaAxis.set_title("%s counts" % enzymeRnaId, fontsize=10) monomerAxis.plot(time / 3600., enzymeMonomerCounts) monomerAxis.set_title("%s counts" % enzymeMonomerId, fontsize=10) fluxAxis.plot(time / 3600., enzymeFluxes) fluxAxis.set_title( "%s flux (%s / %s / %s)" % (reactionId, COUNTS_UNITS, VOLUME_UNITS, TIME_UNITS), fontsize=10) metAxis.plot(time / 3600., metaboliteCounts) metAxis.set_title("%s counts" % metaboliteId, fontsize=10) metAxis.set_xlabel( "Time (hour)\n(%s frequency of at least 1 transcription per generation)" % transcriptionFreq, fontsize=10) plt.subplots_adjust( wspace=0.4, hspace=0.4 ) #, right = 0.83, bottom = 0.05, left = 0.07, top = 0.95) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() # allDir = ap.get_cells(generation = [0, 1, 2]) sim_data = cPickle.load(open(simDataFile, "rb")) metaboliteNames = np.array( sorted(sim_data.process.metabolism.concDict.keys())) nMetabolites = len(metaboliteNames) validation_data = cPickle.load(open(validationDataFile, "rb")) toyaReactions = validation_data.reactionFlux.toya2010fluxes[ "reactionID"] toyaFluxes = validation_data.reactionFlux.toya2010fluxes[ "reactionFlux"] toyaStdev = validation_data.reactionFlux.toya2010fluxes[ "reactionFluxStdev"] toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes)) toyaStdevDict = dict(zip(toyaReactions, toyaStdev)) sim_data = cPickle.load(open(simDataFile)) cellDensity = sim_data.constants.cellDensity modelFluxes = {} toyaOrder = [] for rxn in toyaReactions: modelFluxes[rxn] = [] toyaOrder.append(rxn) for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") mainListener = TableReader(os.path.join(simOutDir, "Main")) timeStepSec = mainListener.readColumn("timeStepSec") mainListener.close() massListener = TableReader(os.path.join(simOutDir, "Mass")) cellMass = massListener.readColumn("cellMass") dryMass = massListener.readColumn("dryMass") massListener.close() coefficient = dryMass / cellMass * sim_data.constants.cellDensity.asNumber( MASS_UNITS / VOLUME_UNITS) fbaResults = TableReader(os.path.join(simOutDir, "FBAResults")) reactionIDs = np.array(fbaResults.readAttribute("reactionIDs")) reactionFluxes = (COUNTS_UNITS / MASS_UNITS / TIME_UNITS) * ( fbaResults.readColumn("reactionFluxes").T / coefficient).T fbaResults.close() for toyaReaction in toyaReactions: fluxTimeCourse = [] for rxn in reactionIDs: if re.findall(toyaReaction, rxn): reverse = 1 if re.findall("(reverse)", rxn): reverse = -1 if len(fluxTimeCourse): fluxTimeCourse += reverse * reactionFluxes[:, np. where( reactionIDs == rxn )] else: fluxTimeCourse = reverse * reactionFluxes[:, np.where( reactionIDs == rxn)] if len(fluxTimeCourse): modelFluxes[toyaReaction].append( np.mean(fluxTimeCourse).asNumber(units.mmol / units.g / units.h)) toyaVsReactionAve = [] for rxn, toyaFlux in toyaFluxesDict.iteritems(): if rxn in modelFluxes: toyaVsReactionAve.append( (np.mean(modelFluxes[rxn]), toyaFlux.asNumber(units.mmol / units.g / units.h), np.std(modelFluxes[rxn]), toyaStdevDict[rxn].asNumber( units.mmol / units.g / units.h))) toyaVsReactionAve = np.array(toyaVsReactionAve) correlationCoefficient = np.corrcoef(toyaVsReactionAve[:, 0], toyaVsReactionAve[:, 1])[0, 1] plt.figure(figsize=(8, 8)) plt.title("Central Carbon Metabolism Flux, Pearson R = {:.2}".format( correlationCoefficient)) plt.errorbar(toyaVsReactionAve[:, 1], toyaVsReactionAve[:, 0], xerr=toyaVsReactionAve[:, 3], yerr=toyaVsReactionAve[:, 2], fmt="o", ecolor="k") ylim = plt.ylim() plt.plot([ylim[0], ylim[1]], [ylim[0], ylim[1]], color="k") plt.xlabel("Toya 2010 Reaction Flux [mmol/g/hr]") plt.ylabel("Mean WCM Reaction Flux [mmol/g/hr]") ax = plt.axes() ax.set_ylim(plt.xlim()) whitePadSparklineAxis(plt.axes()) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() sim_data = cPickle.load(open(simDataFile, "rb")) metaboliteNames = np.array( sorted(sim_data.process.metabolism.concDict.keys())) nMetabolites = len(metaboliteNames) fig, axesList = plt.subplots(3) fig.set_size_inches(11, 11) histo = np.zeros(4) limitedCounts = np.zeros(len(metaboliteNames)) ax2 = axesList[2] for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") enzymeKineticsData = TableReader( os.path.join(simOutDir, "EnzymeKinetics")) metaboliteCounts = enzymeKineticsData.readColumn( "metaboliteCountsFinal") normalizedCounts = metaboliteCounts / metaboliteCounts[1, :] enzymeKineticsData.close() # Read time info from the listener initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") metaboliteLimited = np.zeros((len(time), nMetabolites)) diff = np.diff(normalizedCounts, axis=0) limited = [] for i in xrange(diff.shape[0] - WINDOW): currentStepLimited = np.where( np.any(diff[i:i + WINDOW] > 0, axis=0) == False)[0].astype(int) metaboliteLimited[i, currentStepLimited] = 1 limited = np.append(limited, currentStepLimited).astype(int) nLimited = len(np.unique(limited)) if nLimited >= len(histo): histo = np.append(histo, np.zeros(nLimited - len(histo) + 1)) histo[nLimited] += 1 limitedCounts[limited] += 1 ax2.plot(time / 60, metaboliteLimited * range(metaboliteLimited.shape[1])) ax2.axvline(initialTime / 60, color="r", linestyle="--") ax2.set_xlim([0, max(time) / 60]) ax2.set_xlabel("Time (min)") ax2.set_ylabel("Limited") ax0 = axesList[0] labels = np.arange(len(histo)) ax0.bar(labels - 0.5, histo, 1) ax0.set_xticks(labels) ax0.set_xlabel("Number of limited metabolites") ax0.set_ylabel("Number of generations") ax1 = axesList[1] ax1.bar( np.arange(len(np.where(limitedCounts > 0)[0])) - 0.4, limitedCounts[limitedCounts > 0]) ax1.set_xticks(np.arange(len(np.where(limitedCounts > 0)[0]))) ax1.set_xticklabels(metaboliteNames[limitedCounts > 0], fontsize=6) ax1.set_xlabel("Metabolite Limited") ax1.set_ylabel("Number of genreations") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, 'variantDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(variantDir, cohort_plot=True) limited_metabolites = [] for sim_dir in ap.get_cells(): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used kinetics_reader = TableReader( os.path.join(sim_out_dir, "EnzymeKinetics")) # Load data try: metabolite_indices = { m: i for i, m in enumerate( kinetics_reader.readAttribute('metaboliteNames')) } metabolite_counts = kinetics_reader.readColumn( "metaboliteCountsFinal")[1:, :] counts_to_molar = kinetics_reader.readColumn( 'countsToMolar')[1:].reshape(-1, 1) except: print('Error reading data from {}'.format(sim_out_dir)) continue # Calculate concentrations met_idx = np.array( [metabolite_indices[m] for m in LIMITED_METABOLITES]) metabolite_conc = counts_to_molar * metabolite_counts[:, met_idx] limited_metabolites += [metabolite_conc] limited_metabolites = np.vstack(limited_metabolites) # Values to calculate significance between different cohorts print('Metabolites: {}'.format(LIMITED_METABOLITES)) print('Means: {}'.format(limited_metabolites.mean(axis=0))) print('Stds: {}'.format(limited_metabolites.std(axis=0))) print('N: {}'.format(limited_metabolites.shape[0])) plt.figure(figsize=(4, 4)) xticks = [0, 1] # Plot data plt.violinplot(limited_metabolites, xticks, showmeans=True) # Format axes plt.ylim([0, 50]) whitePadSparklineAxis(plt.gca()) plt.xticks(xticks, LIMITED_METABOLITES) plt.ylabel('Concentration (uM)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] plt.figure(figsize=(4, 4)) ax = plt.subplot2grid((1, 1), (0, 0)) options = { "edgecolors": color_cycle[0], "alpha": 0.2, "s": 50, "clip_on": False } labels = ["minimal", "anaerobic", "minimal + AA"] ax.scatter(initial_volumes[2], added_volumes[2], marker="x", label=labels[2], **options) ax.scatter(initial_volumes[0], added_volumes[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(initial_volumes[1], added_volumes[1], facecolors="none", marker="^", label=labels[1], **options) ax.set_xlim([0, 4]) ax.set_ylim([0, 4]) ax.set_xlabel("Birth Volume ($\mu m^3$)") ax.set_ylabel("Added Volume ($\mu m^3$)") ax.legend() ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) BUILD_CACHE = True if os.path.exists(os.path.join(plotOutDir, "figure5D.pickle")): BUILD_CACHE = False enzymeComplexId = "MENE-CPLX[c]" enzymeMonomerId = "O-SUCCINYLBENZOATE-COA-LIG-MONOMER[c]" enzymeRnaId = "EG12437_RNA[c]" reactionId = "O-SUCCINYLBENZOATE-COA-LIG-RXN" metaboliteIds = ["REDUCED-MENAQUINONE[c]", "CPD-12115[c]"] # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) if 0 not in ap._path_data["seed"]: print "Skipping -- figure5D only runs for seed 0" return allDir = ap.get_cells(seed=[0]) sim_data = cPickle.load(open(simDataFile, "rb")) cellDensity = sim_data.constants.cellDensity nAvogadro = sim_data.constants.nAvogadro rnaIds = sim_data.process.transcription.rnaData["id"] isMRna = sim_data.process.transcription.rnaData["isMRna"] mRnaIndexes = np.where(isMRna)[0] mRnaIds = np.array([rnaIds[x] for x in mRnaIndexes]) simOutDir = os.path.join(allDir[0], "simOut") bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") enzymeComplexIndex = moleculeIds.index(enzymeComplexId) enzymeMonomerIndex = moleculeIds.index(enzymeMonomerId) enzymeRnaIndex = moleculeIds.index(enzymeRnaId) metaboliteIndexes = [moleculeIds.index(x) for x in metaboliteIds] bulkMolecules.close() if BUILD_CACHE: time = [] enzymeFluxes = [] enzymeComplexCounts = [] enzymeMonomerCounts = [] enzymeRnaCounts = [] enzymeRnaInitEvent = [] metaboliteCounts = np.array([]) cellMass = [] dryMass = [] timeStepSec = [] generationTicks = [0.] nTranscriptionInitEventsPerGen = [] nAvgTetramersPerGen = [] for gen, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") time += TableReader(os.path.join( simOutDir, "Main")).readColumn("time").tolist() generationTicks.append(time[-1]) timeStepSec += TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec").tolist() cellMass += TableReader(os.path.join( simOutDir, "Mass")).readColumn("cellMass").tolist() dryMass += TableReader(os.path.join( simOutDir, "Mass")).readColumn("dryMass").tolist() bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeCounts = bulkMolecules.readColumn("counts") enzymeComplexCountsInThisGen = moleculeCounts[:, enzymeComplexIndex].tolist( ) enzymeMonomerCounts += moleculeCounts[:, enzymeMonomerIndex].tolist( ) enzymeRnaCounts += moleculeCounts[:, enzymeRnaIndex].tolist() enzymeComplexCounts += enzymeComplexCountsInThisGen nAvgTetramersPerGen.append( np.mean(enzymeComplexCountsInThisGen)) if gen == 0: metaboliteCounts = moleculeCounts[:, metaboliteIndexes] else: metaboliteCounts = np.vstack( (metaboliteCounts, moleculeCounts[:, metaboliteIndexes])) bulkMolecules.close() fbaResults = TableReader(os.path.join(simOutDir, "FBAResults")) reactionIDs = np.array(fbaResults.readAttribute("reactionIDs")) reactionFluxes = np.array( fbaResults.readColumn("reactionFluxes")) enzymeFluxes += reactionFluxes[:, np.where( reactionIDs == reactionId)[0][0]].tolist() fbaResults.close() rnapDataReader = TableReader( os.path.join(simOutDir, "RnapData")) rnaInitEventsInThisGen = rnapDataReader.readColumn( "rnaInitEvent")[:, np.where( rnaIds == enzymeRnaId)[0][0]].tolist() rnapDataReader.close() enzymeRnaInitEvent += rnaInitEventsInThisGen nTranscriptionInitEventsPerGen.append( np.sum(rnaInitEventsInThisGen)) time = np.array(time) cPickle.dump( { "time": time, "enzymeRnaInitEvent": enzymeRnaInitEvent, "enzymeRnaCounts": enzymeRnaCounts, "enzymeMonomerCounts": enzymeMonomerCounts, "enzymeComplexCounts": enzymeComplexCounts, "enzymeFluxes": enzymeFluxes, "metaboliteCounts": metaboliteCounts, "dryMass": dryMass, "cellMass": cellMass, "timeStepSec": timeStepSec, "generationTicks": generationTicks, "nTranscriptionInitEventsPerGen": nTranscriptionInitEventsPerGen, # storing value to report in paper "nAvgTetramersPerGen": nAvgTetramersPerGen, # storing value to report in paper }, open(os.path.join(plotOutDir, "figure5D.pickle"), "wb")) else: D = cPickle.load( open(os.path.join(plotOutDir, "figure5D.pickle"), "r")) time = D["time"] enzymeRnaInitEvent = D["enzymeRnaInitEvent"] enzymeRnaCounts = D["enzymeRnaCounts"] enzymeMonomerCounts = D["enzymeMonomerCounts"] enzymeComplexCounts = D["enzymeComplexCounts"] enzymeFluxes = D["enzymeFluxes"] metaboliteCounts = D["metaboliteCounts"] dryMass = D["dryMass"] cellMass = D["cellMass"] timeStepSec = D["timeStepSec"] generationTicks = D["generationTicks"] cellVolume = units.g * np.array(cellMass) / cellDensity coefficient = (units.fg * np.array(dryMass)) / ( units.fg * np.array(cellMass)) * cellDensity * (timeStepSec * units.s) enzymeFluxes = (((COUNTS_UNITS / VOLUME_UNITS) * enzymeFluxes) / coefficient).asNumber(units.mmol / units.g / units.h) averages = [] indices = [np.where(time == x)[0][0] for x in generationTicks] for x in np.arange(len(indices) - 1): avg = np.average(enzymeComplexCounts[indices[x]:indices[x + 1]]) averages.append(avg) # Plot fig = plt.figure(figsize=(11, 8.5)) plt.suptitle("O-succinylbenzoate-CoA ligase downstream behaviors", fontsize=FONTSIZE) rnaInitAxis = plt.subplot(6, 1, 1) rnaAxis = plt.subplot(6, 1, 2, sharex=rnaInitAxis) monomerAxis = plt.subplot(6, 1, 3, sharex=rnaInitAxis) complexAxis = plt.subplot(6, 1, 4, sharex=rnaInitAxis) fluxAxis = plt.subplot(6, 1, 5, sharex=rnaInitAxis) metAxis = plt.subplot(6, 1, 6) rnaInitLine = rnaInitAxis.plot(time / 3600., enzymeRnaInitEvent, c="b") rnaInitAxis.set_ylabel(r"$menE$" + "\n transcription\nevents", fontsize=FONTSIZE, rotation=0) rnaInitAxis.yaxis.set_label_coords(-.1, 0.25) rnaInitAxis.set_xlim([time[0] / 3600., time[-1] / 3600.]) whitePadSparklineAxis(rnaInitAxis, xAxis=False) rnaInitAxis.set_yticks([0, 1]) rnaLine = rnaAxis.plot(time / 3600., enzymeRnaCounts, c="b") rnaAxis.set_ylabel("menE mRNA\ncounts", fontsize=FONTSIZE, rotation=0) rnaAxis.yaxis.set_label_coords(-.1, 0.25) whitePadSparklineAxis(rnaAxis, xAxis=False) rnaAxis.set_yticks([0, max(enzymeRnaCounts)]) monomerLine = monomerAxis.plot(time / 3600., enzymeMonomerCounts, c="b") monomerAxis.set_ylabel("MenE monomer\ncounts", fontsize=FONTSIZE, rotation=0) monomerAxis.yaxis.set_label_coords(-.1, 0.25) whitePadSparklineAxis(monomerAxis, xAxis=False) monomerAxis.set_yticks([0, 4, max(enzymeMonomerCounts)]) complexLine = complexAxis.plot(time / 3600., enzymeComplexCounts, c="b") complexAxis.set_ylabel("MenE tetramer\ncounts", fontsize=FONTSIZE, rotation=0) complexAxis.yaxis.set_label_coords(-.1, 0.25) whitePadSparklineAxis(complexAxis, xAxis=False) complexAxis.set_yticks([0, max(enzymeComplexCounts)]) fluxLine = fluxAxis.plot(time / 3600., enzymeFluxes, c="b") fluxAxis.set_ylabel("SUCBZL flux\n(mmol/gDCW/hour)", fontsize=FONTSIZE, rotation=0) fluxAxis.yaxis.set_label_coords(-.1, 0.25) whitePadSparklineAxis(fluxAxis, xAxis=False) fluxAxis.set_yticks([min(enzymeFluxes), max(enzymeFluxes)]) metLine = metAxis.plot(time / 3600., np.sum(metaboliteCounts, axis=1), c="b") metAxis.set_ylabel("End product\ncounts", fontsize=FONTSIZE, rotation=0) metAxis.yaxis.set_label_coords(-.1, 0.25) metAxis.set_xlabel("Time (hour)\ntickmarks at each new generation", fontsize=FONTSIZE) metAxis.set_ylim([metAxis.get_ylim()[0] * 0.2, metAxis.get_ylim()[1]]) metAxis.set_xlim([time[0] / 3600., time[-1] / 3600.]) whitePadSparklineAxis(metAxis) metAxis.set_yticklabels( ["%0.1e" % metAxis.get_ylim()[0], "%0.1e" % metAxis.get_ylim()[1]]) metAxis.set_xticks(np.array(generationTicks) / 3600.) xticklabels = np.repeat(" ", len(generationTicks)) xticklabels[0] = "0" xticklabels[-1] = "%0.2f" % (time[-1] / 3600.) metAxis.set_xticklabels(xticklabels) noComplexIndexes = np.where(np.array(enzymeComplexCounts) == 0)[0] patchStart = [] patchEnd = [] if len(noComplexIndexes): prev = noComplexIndexes[0] patchStart.append(prev) for i in noComplexIndexes: if np.abs(i - prev) > 1: patchStart.append(i) patchEnd.append(prev) prev = i patchEnd.append(prev) axesList = [ rnaInitAxis, rnaAxis, monomerAxis, complexAxis, fluxAxis, metAxis ] for axis in axesList: axis.tick_params(labelsize=LABELSIZE) for i in xrange(len(patchStart)): width = time[patchEnd[i]] / 3600. - time[patchStart[i]] / 3600. if width <= 0.1: continue height = axis.get_ylim()[1] - axis.get_ylim()[0] axis.add_patch( patches.Rectangle( (time[patchStart[i]] / 3600., axis.get_ylim()[0]), width, height, alpha=0.25, color="gray", linewidth=0.)) plt.subplots_adjust(hspace=0.5, right=0.9, bottom=0.1, left=0.15, top=0.9) exportFigure(plt, plotOutDir, plotOutFileName, metadata) axesList = [ rnaInitAxis, rnaAxis, monomerAxis, complexAxis, fluxAxis, metAxis ] for a in axesList: clearLabels(a) plt.suptitle("") metAxis.set_xticklabels([]) metAxis.set_xlabel("") exportFigure(plt, plotOutDir, plotOutFileName + "__clean", "") plt.close("all") if PLOT_DOWNSTREAM: fig, axesList = plt.subplots(12, figsize=(11, 8.5)) plt.subplots_adjust(hspace=0.5, right=0.95, bottom=0.05, left=0.15, top=0.95) enzymeIds = [ "MENE-CPLX[c]", "CPLX0-7882[c]", "CPLX0-8128[c]", "DMK-MONOMER[i]", "2-OCTAPRENYL-METHOXY-BENZOQ-METH-MONOMER[c]" ] reactionIds = [ "O-SUCCINYLBENZOATE-COA-LIG-RXN", "NAPHTHOATE-SYN-RXN", "RXN-9311", "DMK-RXN", "ADOMET-DMK-METHYLTRANSFER-RXN" ] reactantIds = ["CPD-12115[c]"] enzymeIndexes = [moleculeIds.index(x) for x in enzymeIds] reactantIndexes = [moleculeIds.index(x) for x in reactantIds] for gen, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") time_ = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") timeStepSec = TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec") cellMass = TableReader(os.path.join( simOutDir, "Mass")).readColumn("cellMass") dryMass = TableReader(os.path.join( simOutDir, "Mass")).readColumn("dryMass") bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeCounts = bulkMolecules.readColumn("counts") enzymeCounts = moleculeCounts[:, enzymeIndexes] metCounts = moleculeCounts[:, metaboliteIndexes[0]] reactantCounts = moleculeCounts[:, reactantIndexes] bulkMolecules.close() fbaResults = TableReader(os.path.join(simOutDir, "FBAResults")) reactionIDs = np.array( fbaResults.readAttribute("reactionIDs")).tolist() reactionIndexes = [reactionIDs.index(x) for x in reactionIds] reactionFluxes = np.array( fbaResults.readColumn("reactionFluxes")) enzymeFluxes = reactionFluxes[:, reactionIndexes] fbaResults.close() cellVolume = units.g * np.array(cellMass) / cellDensity coefficient = (units.fg * np.array(dryMass)) / ( units.fg * np.array(cellMass)) * cellDensity * (units.s * timeStepSec) for i, row in enumerate(xrange(0, 2 * len(enzymeIds), 2)): countAxis = axesList[row] fluxAxis = axesList[row + 1] plotFlux = ( ((COUNTS_UNITS / VOLUME_UNITS) * enzymeFluxes[:, i]) / coefficient).asNumber(units.mmol / units.g / units.h) countAxis.plot(time_ / 3600., enzymeCounts[:, i], color="b") fluxAxis.plot(time_ / 3600., plotFlux, color="b") axesList[-2].plot(time_ / 3600., reactantCounts, color="b") axesList[-1].plot(time_ / 3600., metCounts, color="b") ylabels = [ "menE", "menB", "menI", "menA", "ubiE", "CPD-12115", "Menaquinone" ] for i, axis in enumerate(axesList[::2]): axis.set_xlim([0, time_[-1] / 3600.]) axis.set_ylabel("%s" % ylabels[i], rotation=0) whitePadSparklineAxis(axis, False) for axis in axesList[1::2]: axis.set_xlim([0, time_[-1] / 3600.]) whitePadSparklineAxis(axis) axesList[-1].set_ylabel(ylabels[-1], rotation=0) for axis in axesList: for i in xrange(len(patchStart)): width = time[patchEnd[i]] / 3600. - time[ patchStart[i]] / 3600. if width <= 0.1: continue height = axis.get_ylim()[1] - axis.get_ylim()[0] axis.add_patch( patches.Rectangle( (time[patchStart[i]] / 3600., axis.get_ylim()[0]), width, height, alpha=0.25, color="gray", linewidth=0.)) plt.subplots_adjust(hspace=0.5, right=0.95, bottom=0.05, left=0.11, top=0.95) exportFigure(plt, plotOutDir, plotOutFileName + "__downstreamFluxes", metadata)
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) allDir = ap.get_cells() sim_data = cPickle.load(open(simDataFile, "rb")) cellDensity = sim_data.constants.cellDensity rna_ids = sim_data.process.transcription.rnaData["id"] enzyme_rna_transcription_indexes = np.array([ np.where(rna_ids == enzyme_rna_id)[0][0] for enzyme_rna_id in ENZYME_RNA_IDS ]) simOutDir = os.path.join(allDir[0], "simOut") bulk_molecules_reader = TableReader(os.path.join(simOutDir, "BulkMolecules")) fba_results_reader = TableReader(os.path.join(simOutDir, "FBAResults")) moleculeIDs = bulk_molecules_reader.readAttribute("objectNames") reactionIDs = np.array(fba_results_reader.readAttribute("reactionIDs")) enzyme_rna_count_indexes = np.array([ moleculeIDs.index(enzyme_rna_id) for enzyme_rna_id in ENZYME_RNA_IDS ]) enzyme_monomer_indexes = np.array([ moleculeIDs.index(enzyme_monomer_id) for enzyme_monomer_id in ENZYME_MONOMER_IDS ]) enzyme_complex_indexes = np.array([ moleculeIDs.index(enzyme_complex_id) for enzyme_complex_id in ENZYME_COMPLEX_IDS ]) reaction_indexes = np.array([ np.where(reactionIDs == reaction_id)[0][0] for reaction_id in ENZYME_REACTION_IDS ]) metabolite_index = moleculeIDs.index(METABOLITE_ID) # Initialize arrays time = [] enzyme_rna_init_events = np.empty((0, len(ENZYME_RNA_IDS))) enzyme_rna_counts = np.empty((0, len(ENZYME_RNA_IDS))) enzyme_total_monomer_counts = np.empty((0, len(ENZYME_RNA_IDS))) enzyme_complex_counts = np.empty((0, len(ENZYME_COMPLEX_IDS))) enzyme_fluxes = np.empty((0, len(ENZYME_REACTION_IDS))) metabolite_counts = [] cellMass = [] dryMass = [] timeStepSec = [] generationTicks = [] proteins_produced_per_gen = np.empty((0, len(ENZYME_RNA_IDS))) average_complex_counts_per_gen = [] first_gen = True for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") main_reader = TableReader(os.path.join(simOutDir, "Main")) mass_reader = TableReader(os.path.join(simOutDir, "Mass")) bulk_molecules_reader = TableReader( os.path.join(simOutDir, "BulkMolecules")) fba_results_reader = TableReader(os.path.join(simOutDir, "FBAResults")) rnap_data_reader = TableReader(os.path.join(simOutDir, "RnapData")) time.extend(main_reader.readColumn("time").tolist()) if first_gen: generationTicks.extend([time[0], time[-1]]) first_gen = False else: generationTicks.append(time[-1]) timeStepSec.extend(main_reader.readColumn("timeStepSec").tolist()) cellMass.extend(mass_reader.readColumn("cellMass").tolist()) dryMass.extend(mass_reader.readColumn("dryMass").tolist()) rna_init_events_this_gen = rnap_data_reader.readColumn( "rnaInitEvent")[:, enzyme_rna_transcription_indexes] enzyme_rna_init_events = np.vstack(( enzyme_rna_init_events, rna_init_events_this_gen)) molecule_counts = bulk_molecules_reader.readColumn("counts") enzyme_rna_counts = np.vstack(( enzyme_rna_counts, molecule_counts[:, enzyme_rna_count_indexes])) enzyme_monomer_counts_this_gen = molecule_counts[:, enzyme_monomer_indexes] enzyme_complex_counts_this_gen = molecule_counts[:, enzyme_complex_indexes] enzyme_total_monomer_counts_this_gen = ( enzyme_monomer_counts_this_gen + enzyme_complex_counts_this_gen.sum(axis=1)[:, None]) enzyme_total_monomer_counts = np.vstack(( enzyme_total_monomer_counts, enzyme_total_monomer_counts_this_gen)) enzyme_complex_counts = np.vstack(( enzyme_complex_counts, enzyme_complex_counts_this_gen)) proteins_produced_per_gen = np.vstack(( proteins_produced_per_gen, (enzyme_total_monomer_counts_this_gen[-1, :] - enzyme_total_monomer_counts_this_gen[0, :]) )) average_complex_counts_per_gen.append( enzyme_complex_counts_this_gen.sum(axis=1).mean()) metabolite_counts.extend(molecule_counts[:, metabolite_index]) reactionFluxes = np.array(fba_results_reader.readColumn("reactionFluxes")) enzyme_fluxes = np.vstack(( enzyme_fluxes, reactionFluxes[:, reaction_indexes])) # Sum reaction fluxes and convert units flux_conversion_coeff = (units.fg * np.array(dryMass)) / (units.fg * np.array(cellMass)) * (timeStepSec * units.s) * cellDensity enzyme_fluxes = (((COUNTS_UNITS / VOLUME_UNITS) * enzyme_fluxes.sum(axis=1)) / flux_conversion_coeff).asNumber(units.mmol / units.g / units.h) # Convert time to hours time = np.array(time) time_hours = time / 3600. # Add counts of complexed monomers to monomer counts enzyme_total_monomer_counts += enzyme_complex_counts.sum(axis=1)[:, None] # Plot plt.figure(figsize = (14, 8.5)) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] plt.suptitle( "4-amino-4-deoxychorismate synthase downstream effects", fontsize = FONTSIZE) pre_merge_colors = [color_cycle[0], color_cycle[2]] post_merge_color = color_cycle[3] # Define axes rna_init_axis = plt.subplot(6, 1, 1) rna_axis = plt.subplot(6, 1, 2, sharex=rna_init_axis) monomer_axis = plt.subplot(6, 1, 3, sharex=rna_init_axis) complex_axis = plt.subplot(6, 1, 4, sharex=rna_init_axis) flux_axis = plt.subplot(6, 1, 5, sharex=rna_init_axis) met_axis = plt.subplot(6, 1, 6, sharex=rna_init_axis) # Plot transcription initiation events rna_init_axis.set_prop_cycle(color=pre_merge_colors) rna_init_axis.plot(time_hours, enzyme_rna_init_events) rna_init_axis.set_ylabel("Transcription\nevents", fontsize = FONTSIZE, rotation = 0) rna_init_axis.yaxis.set_label_coords(-.12, 0.25) rna_init_axis.set_xlim([time_hours[0], time_hours[-1]]) rna_init_axis.set_ylim([0, 1]) whitePadSparklineAxis(rna_init_axis, xAxis = False) # Print average transcription frequency of each gene for rna_id, prob in zip(ENZYME_RNA_IDS, enzyme_rna_init_events.sum(axis=0)/len(proteins_produced_per_gen)): print("%s transcription frequency: %.3f"%(rna_id, prob)) rna_axis.set_prop_cycle(color=pre_merge_colors) rna_axis.plot(time_hours, enzyme_rna_counts) rna_axis.set_ylabel("mRNA\ncounts", fontsize = FONTSIZE, rotation = 0) rna_axis.yaxis.set_label_coords(-.12, 0.25) rna_axis.set_ylim([0, np.max(enzyme_rna_counts)]) whitePadSparklineAxis(rna_axis, xAxis = False) monomer_axis.set_prop_cycle(color=pre_merge_colors) monomer_axis.plot(time_hours, enzyme_total_monomer_counts) monomer_axis.set_ylabel("Protein monomer\ncounts", fontsize = FONTSIZE, rotation = 0) monomer_axis.yaxis.set_label_coords(-.12, 0.25) monomer_axis.set_ylim([0, np.max(enzyme_total_monomer_counts)]) whitePadSparklineAxis(monomer_axis, xAxis = False) # Print average number of protein produced per generation for rna_id, count in zip(ENZYME_RNA_IDS, proteins_produced_per_gen.mean(axis=0)): print("%s average proteins produced per gen: %.2f" % (rna_id, count)) complex_axis.plot(time_hours, enzyme_complex_counts.sum(axis=1), color=post_merge_color) complex_axis.set_ylabel("Protein complex\ncounts", fontsize = FONTSIZE, rotation = 0) complex_axis.yaxis.set_label_coords(-.12, 0.25) complex_axis.set_ylim([0, np.max(enzyme_complex_counts.sum(axis=1))]) whitePadSparklineAxis(complex_axis, xAxis = False) # Print mean and std of average complex counts in each gen print("Complex counts average: %.2f" % (np.array(average_complex_counts_per_gen).mean(),)) print("Complex counts std: %.2f" % (np.array(average_complex_counts_per_gen).std(),)) flux_axis.plot(time_hours, enzyme_fluxes, color=post_merge_color) flux_axis.set_yscale("symlog", linthreshy=FLUX_LINEAR_THRESHOLD) flux_axis.set_ylabel("PABASYN-RXN\n(reverse)\ntotal flux\n(mmol/gDCW/hour)", fontsize = FONTSIZE, rotation = 0) flux_axis.yaxis.set_label_coords(-.12, 0.25) flux_axis.set_ylim([0, np.max(enzyme_fluxes)]) whitePadSparklineAxis(flux_axis, xAxis=False) flux_axis.get_yaxis().set_tick_params(which='minor', size=0) flux_axis.get_xaxis().set_tick_params(which='minor', width=0) flux_max = flux_axis.get_ylim()[1] flux_axis.set_yticks([0, FLUX_LINEAR_THRESHOLD, flux_max]) flux_axis.set_yticklabels(["0", "%0.0e"%(FLUX_LINEAR_THRESHOLD, ), "%.2f"%(flux_max, )]) met_axis.plot(time_hours, metabolite_counts, color=post_merge_color) met_axis.set_ylabel("End product\ncounts", fontsize = FONTSIZE, rotation = 0) met_axis.yaxis.set_label_coords(-.12, 0.25) met_axis.set_xlabel("Time (hour)\ntickmarks at each new generation", fontsize = FONTSIZE) met_axis.set_ylim([0, np.max(metabolite_counts)]) met_axis.set_xlim([time_hours[0], time_hours[-1]]) whitePadSparklineAxis(met_axis) met_axis.set_yticklabels([0, "%0.1e" % met_axis.get_ylim()[1]]) met_axis.set_xticks(np.array(generationTicks) / 3600.) xticklabels = np.repeat(" ", len(generationTicks)) xticklabels[0] = "%0.2f" % (time_hours[0]) xticklabels[-1] = "%0.2f" % (time_hours[-1]) met_axis.set_xticklabels(xticklabels) # Add patches to indicate absence of complexes noComplexIndexes = np.where(np.array(enzyme_complex_counts.sum(axis=1)) == 0)[0] patchStart = [] patchEnd = [] if len(noComplexIndexes): prev = noComplexIndexes[0] patchStart.append(prev) for i in noComplexIndexes: if np.abs(i - prev) > 1: patchStart.append(i) patchEnd.append(prev) prev = i patchEnd.append(prev) axesList = [rna_init_axis, rna_axis, monomer_axis, complex_axis, flux_axis, met_axis] for axis in axesList: axis.tick_params(labelsize = LABELSIZE) for i in xrange(len(patchStart)): width = time_hours[patchEnd[i]] - time_hours[patchStart[i]] if width <= 0.1: continue height = axis.get_ylim()[1] - axis.get_ylim()[0] axis.add_patch(patches.Rectangle((time_hours[patchStart[i]], axis.get_ylim()[0]), width, height, alpha = 0.25, color = "gray", linewidth = 0.)) plt.subplots_adjust(hspace = 0.5, right = 0.9, bottom = 0.1, left = 0.15, top = 0.9) exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of plot for a in axesList: clearLabels(a) plt.suptitle("") met_axis.set_xticklabels([]) met_axis.set_xlabel("") exportFigure(plt, plotOutDir, plotOutFileName + "__clean", "") plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDirs = ap.get_cells() # Load data from KB sim_data = cPickle.load(open(simDataFile, "rb")) trpIdx = sim_data.moleculeGroups.aaIDs.index("TRP[c]") plt.figure(figsize=(8.5, 11)) for simDir in allDirs: simOutDir = os.path.join(simDir, "simOut") growthLimits = TableReader(os.path.join(simOutDir, "GrowthLimits")) trpRequests = growthLimits.readColumn("aaRequestSize")[BURN_IN:, trpIdx] growthLimits.close() bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") trpSynIdx = moleculeIds.index("TRYPSYN[c]") trpSynCounts = bulkMolecules.readColumn("counts")[BURN_IN:, trpSynIdx] bulkMolecules.close() trpSynKcat = 2**( (37. - 25.) / 10. ) * 4.1 # From PMID 6402362 (kcat of 4.1/s measured at 25 C) initialTime = TableReader(os.path.join( simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time")[BURN_IN:] timeStep = TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec")[BURN_IN:] trpSynMaxCapacity = trpSynKcat * trpSynCounts * timeStep ############################################################## ax = plt.subplot(3, 1, 1) ax.plot(time / 60., trpSynMaxCapacity, color="b") plt.ylabel("Tryptophan Synthase Max Capacity", fontsize=10) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=10) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(3, 1, 2) ax.plot(time, trpRequests, color="b") plt.ylabel("Trp Requested By Translation", fontsize=10) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=10) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(3, 1, 3) ax.plot(time / 3600., trpSynMaxCapacity / trpRequests, color="b") ax.plot([0, time[-1] / 3600.], [1., 1.], "k--") plt.ylabel("(Max capacity) / (Request)", fontsize=10) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2f" % ymin, "%0.2f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) # ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=10) ax.set_xticks(ax.get_xlim()) ############################################################## exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) # Get first cell from each generation firstCellLineage = [] # For all generation indexes subject to analysis, get first cell for gen_idx in range(ap.n_generation): firstCellLineage.append(ap.get_cells(generation=[gen_idx])[0]) # Get sim data from cPickle file sim_data = cPickle.load(open(simDataFile, "rb")) # Create new figure and set size fig = plt.figure() fig.set_size_inches(15, 12) # Divide figure into subplot grids gs = gridspec.GridSpec(7, 2) ax1 = plt.subplot(gs[0, 0]) ax2 = plt.subplot(gs[1, 0]) ax3 = plt.subplot(gs[2, 0]) ax4 = plt.subplot(gs[3, 0]) ax5 = plt.subplot(gs[4, 0]) ax6 = plt.subplot(gs[5, 0]) ax7 = plt.subplot(gs[6, 0]) ax8 = plt.subplot(gs[0, 1]) ax9 = plt.subplot(gs[1, 1]) ax10 = plt.subplot(gs[2, 1]) ax11 = plt.subplot(gs[3, 1]) ax12 = plt.subplot(gs[4, 1]) ax13 = plt.subplot(gs[5, 1]) ax14 = plt.subplot(gs[6, 1]) # Go through first cells in each generation for gen, simDir in enumerate(firstCellLineage): simOutDir = os.path.join(simDir, "simOut") ## Mass growth rate ## time, growthRate = getMassData(simDir, ["instantaniousGrowthRate"]) timeStep = units.s * TableReader(os.path.join( simOutDir, "Main")).readColumn("timeStepSec") time = units.s * time growthRate = (1 / units.s) * growthRate doublingTime = 1 / growthRate * np.log(2) ## Ribosome counts and statistics ## # Get ids for 30S and 50S subunits proteinIds30S = sim_data.moleculeGroups.s30_proteins rnaIds30S = [ sim_data.process.translation.monomerData['rnaId'][np.where( sim_data.process.translation.monomerData['id'] == pid)[0] [0]] for pid in proteinIds30S ] rRnaIds30S = sim_data.moleculeGroups.s30_16sRRNA complexIds30S = [sim_data.moleculeIds.s30_fullComplex] proteinIds50S = sim_data.moleculeGroups.s50_proteins rnaIds50S = [ sim_data.process.translation.monomerData['rnaId'][np.where( sim_data.process.translation.monomerData['id'] == pid)[0] [0]] for pid in proteinIds50S ] rRnaIds50S = sim_data.moleculeGroups.s50_23sRRNA rRnaIds50S.extend(sim_data.moleculeGroups.s50_5sRRNA) complexIds50S = [sim_data.moleculeIds.s50_fullComplex] # Get molecular weights for 30S and 50S subunits, and add these two for 70S nAvogadro = sim_data.constants.nAvogadro mw30S = sim_data.getter.getMass(complexIds30S) mw50S = sim_data.getter.getMass(complexIds50S) mw70S = mw30S + mw50S # Get indexes for 30S and 50S subunits based on ids bulkMoleculesDataFile = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMoleculesDataFile.readAttribute("objectNames") proteinIndexes30S = np.array( [moleculeIds.index(protein) for protein in proteinIds30S], np.int) rnaIndexes30S = np.array( [moleculeIds.index(rna) for rna in rnaIds30S], np.int) rRnaIndexes30S = np.array( [moleculeIds.index(rRna) for rRna in rRnaIds30S], np.int) complexIndexes30S = np.array( [moleculeIds.index(comp) for comp in complexIds30S], np.int) proteinIndexes50S = np.array( [moleculeIds.index(protein) for protein in proteinIds50S], np.int) rnaIndexes50S = np.array( [moleculeIds.index(rna) for rna in rnaIds50S], np.int) rRnaIndexes50S = np.array( [moleculeIds.index(rRna) for rRna in rRnaIds50S], np.int) complexIndexes50S = np.array( [moleculeIds.index(comp) for comp in complexIds50S], np.int) # Get counts of 30S and 50S mRNA, rProteins, rRNA, and full complex counts freeProteinCounts30S = bulkMoleculesDataFile.readColumn( "counts")[:, proteinIndexes30S] rnaCounts30S = bulkMoleculesDataFile.readColumn( "counts")[:, rnaIndexes30S] freeRRnaCounts30S = bulkMoleculesDataFile.readColumn( "counts")[:, rRnaIndexes30S] complexCounts30S = bulkMoleculesDataFile.readColumn( "counts")[:, complexIndexes30S] freeProteinCounts50S = bulkMoleculesDataFile.readColumn( "counts")[:, proteinIndexes50S] rnaCounts50S = bulkMoleculesDataFile.readColumn( "counts")[:, rnaIndexes50S] freeRRnaCounts50S = bulkMoleculesDataFile.readColumn( "counts")[:, rRnaIndexes50S] complexCounts50S = bulkMoleculesDataFile.readColumn( "counts")[:, complexIndexes50S] bulkMoleculesDataFile.close() # Get active ribosome counts uniqueMoleculeCountsDataFile = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCountsDataFile.readAttribute( "uniqueMoleculeIds").index("activeRibosome") activeRibosome = uniqueMoleculeCountsDataFile.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] uniqueMoleculeCountsDataFile.close() # Get ribosome data ribosomeDataFile = TableReader( os.path.join(simOutDir, "RibosomeData")) didInitialize = ribosomeDataFile.readColumn("didInitialize") actualElongations = ribosomeDataFile.readColumn( "actualElongations") didTerminate = ribosomeDataFile.readColumn("didTerminate") effectiveElongationRate = ribosomeDataFile.readColumn( "effectiveElongationRate") ribosomeDataFile.close() # Get mass data massDataFile = TableReader(os.path.join(simOutDir, "Mass")) cellMass = massDataFile.readColumn("cellMass") massDataFile.close() # Calculate cell volume cellVolume = (1.0 / sim_data.constants.cellDensity) * (units.fg * cellMass) # Calculate molecule counts and molar fraction of active ribosomes counts30S = complexCounts30S counts50S = complexCounts50S activeRibosomeCounts = activeRibosome totalRibosomeCounts = activeRibosomeCounts + np.hstack( (counts30S, counts50S)).min(axis=1) molarFractionActive = activeRibosomeCounts.astype( np.float) / totalRibosomeCounts totalRibosomeConcentration = ( (1 / nAvogadro) * totalRibosomeCounts) / cellVolume activeRibosomeConcentration = ( (1 / nAvogadro) * activeRibosomeCounts) / cellVolume # Calculate molecule masses and mass fraction of active ribosomes mass30S = ((1 / nAvogadro) * counts30S) * mw30S mass50S = ((1 / nAvogadro) * counts50S) * mw50S activeRibosomeMass = ( (1 / nAvogadro) * np.transpose([activeRibosomeCounts])) * mw70S totalRibosomeMass = activeRibosomeMass + mass30S + mass50S massFractionActive = activeRibosomeMass / totalRibosomeMass # ax1: Plot timestep ax1.plot(time.asNumber(units.min), timeStep.asNumber(units.s), linestyle='-') if gen == ap.n_generation - 1: ax1.set_xlim([-5, max(time.asNumber(units.min))]) ax1.set_ylim([0, 1]) ax1.set_ylabel("Length of\ntime step (s)") # ax2: Plot cell volume ax2.plot(time.asNumber(units.min), cellVolume.asNumber(units.L), linestyle='-') if gen == ap.n_generation - 1: ax2.set_xlim([-5, max(time.asNumber(units.min))]) ax2.set_ylim([0, 3e-15]) ax2.set_ylabel("Cell volume\n(L)") # ax3: Plot total ribosome counts ax3.plot(time.asNumber(units.min), totalRibosomeCounts, linestyle='-') if gen == ap.n_generation - 1: ax3.set_xlim([-5, max(time.asNumber(units.min))]) ax3.set_ylim([10000, 35000]) ax3.set_ylabel("Total ribosome\ncount") # ax4: Plot total ribosome concentrations ax4.plot(time.asNumber(units.min), totalRibosomeConcentration.asNumber(units.mmol / units.L), linestyle='-') if gen == ap.n_generation - 1: ax4.set_xlim([-5, max(time.asNumber(units.min))]) ax4.set_ylim([0.019, 0.023]) ax4.set_ylabel("[Total ribosome]\n(mM)") # ax5: Plot active ribosome counts if gen == 0: ax5.plot(time[1:].asNumber(units.min), activeRibosomeCounts[1:], linestyle='-') else: ax5.plot(time.asNumber(units.min), activeRibosomeCounts, linestyle='-') if gen == ap.n_generation - 1: ax5.set_xlim([-5, max(time.asNumber(units.min))]) ax5.set_ylim([10000, 30000]) ax5.set_ylabel("Active ribosome\ncount") # ax6: Plot active ribosome concentrations if gen == 0: ax6.plot(time[1:].asNumber(units.min), activeRibosomeConcentration[1:].asNumber(units.mmol / units.L), linestyle='-') else: ax6.plot(time.asNumber(units.min), activeRibosomeConcentration.asNumber(units.mmol / units.L), linestyle='-') if gen == ap.n_generation - 1: ax6.set_xlim([-5, max(time.asNumber(units.min))]) ax6.set_ylim([0.0, 0.023]) ax6.set_ylabel("[Active ribosome]\n(mM)") # ax7: Plot molar fraction of active ribosomes if gen == 0: ax7.plot(time[1:].asNumber(units.min), molarFractionActive[1:], linestyle='-') else: ax7.plot(time.asNumber(units.min), molarFractionActive, linestyle='-') if gen == ap.n_generation - 1: ax7.set_xlim([-5, max(time.asNumber(units.min))]) ax7.set_ylim([0.7, 1]) ax7.set_ylabel("Molar fraction\nactive ribosomes") # ax8: Plot mass fraction of active ribosomes if gen == 0: ax8.plot(time[1:].asNumber(units.min), massFractionActive[1:], linestyle='-') else: ax8.plot(time.asNumber(units.min), massFractionActive, linestyle='-') if gen == ap.n_generation - 1: ax8.set_xlim([-5, max(time.asNumber(units.min))]) ax8.set_ylim([0.7, 1]) ax8.set_ylabel("Mass fraction\nactive ribosomes") # ax9: Plot number of activations ax9.plot(time.asNumber(units.min), didInitialize, linestyle='-') if gen == ap.n_generation - 1: ax9.set_xlim([-5, max(time.asNumber(units.min))]) ax9.set_ylim([0, 2000]) ax9.set_ylabel("Activations\nper timestep") # ax10: Plot number of deactivations (terminated translations) ax10.plot(time.asNumber(units.min), didTerminate, linestyle='-') if gen == ap.n_generation - 1: ax10.set_xlim([-5, max(time.asNumber(units.min))]) ax10.set_ylim([0, 2000]) ax10.set_ylabel("Deactivations\nper timestep") # ax11: Plot number of activations per time * volume ax11.plot( time.asNumber(units.min), didInitialize / (timeStep.asNumber(units.s) * cellVolume.asNumber(units.L)), linestyle='-') if gen == ap.n_generation - 1: ax11.set_xlim([-5, max(time.asNumber(units.min))]) ax11.set_ylim([0, 1.2e18]) ax11.set_ylabel("Activations\nper time*volume") # ax12: Plot number of deactivations per time * volume ax12.plot( time.asNumber(units.min), didTerminate / (timeStep.asNumber(units.s) * cellVolume.asNumber(units.L)), linestyle='-') if gen == ap.n_generation - 1: ax12.set_xlim([-5, max(time.asNumber(units.min))]) ax12.set_ylim([0, 1.2e18]) ax12.set_ylabel("Deactivations\nper time*volume") # ax13: Plot number of amino acids translated in each timestep ax13.plot(time.asNumber(units.min), actualElongations, linestyle='-') if gen == ap.n_generation - 1: ax13.set_xlim([-5, max(time.asNumber(units.min))]) # ax13.set_ylim([0]) ax13.set_ylabel("AA translated") # ax14: Plot effective ribosome elongation rate for each timestep ax14.plot(time.asNumber(units.min), effectiveElongationRate, linestyle='-') if gen == ap.n_generation - 1: ax14.set_xlim([-5, max(time.asNumber(units.min))]) ax14.set_ylim([10, 22]) ax14.set_ylabel("Effective\nelongation rate") fig.subplots_adjust(hspace=0.5, wspace=0.3) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() expected_n_variants = 2 n_variants = len(variants) if n_variants < expected_n_variants: print('This plot only runs for {} variants.'.format(expected_n_variants)) return # IDs for appropriate proteins ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData['id'].tolist() ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation)) # Stoichiometry matrices equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers() complex_stoich = sim_data.process.complexation.stoichMatrixMonomers() # Protein container views protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = protein_container.countsView(ids_complexation) view_complexation_complexes = protein_container.countsView(ids_complexation_complexes) view_equilibrium = protein_container.countsView(ids_equilibrium) view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes) # Load model data model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) for i, variant in enumerate(variants): if i >= expected_n_variants: print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants)) continue variant_counts = [] for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts')) # Account for bulk molecules (bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein) protein_container.countsIs(bulk_counts.mean(axis=0)) # Account for unique molecules ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome') rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly') n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index] n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index] protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex]) protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts())) # Account for monomers in complexed form view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts())) variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts()) model_counts[:, i] = np.mean(variant_counts, axis=0) model_std[:, i] = np.std(variant_counts, axis=0) # Validation data schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])} schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts'] validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE]) # Process data model_log_counts = np.log10(model_counts) model_log_lower_std = model_log_counts - np.log10(model_counts - model_std) model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts validation_log_counts = np.log10(validation_counts) r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0]) r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1]) # Scatter plot of model vs validation counts max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max())) limits = [0, max_counts] plt.figure() colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] ## Plot data for i in range(expected_n_variants): plt.errorbar(validation_log_counts, model_log_counts[:, i], yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])), fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5) plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_') ## Format axes plt.xlabel('Validation Counts\n(log10(counts))') plt.ylabel('Average Simulation Counts\n(log10(counts))') ax = plt.gca() ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ## Add legend legend_text = [ 'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]), 'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]), ] plt.legend(legend_text, frameon=False) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') plt.figure(figsize=(5, 5)) plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal") plt.scatter(initial_volumes[1], added_volumes[1], s=3, label="anaerobic") plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA") plt.xlim([0, 4]) plt.ylim([0, 4]) plt.xlabel("Birth Volume ($\mu m^3$)") plt.ylabel("Added Volume ($\mu m^3$)") plt.legend() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): print "DISABLED" return if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all ids reqiured sim_data = cPickle.load(open(simDataFile, "rb")) ids_complexation = sim_data.process.complexation.moleculeNames # Complexe of proteins, and protein monomers ids_complexation_complexes = sim_data.process.complexation.ids_complexes # Only complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames # Complexes of proteins + small molecules, small molecules, protein monomers ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes # Only complexes ids_translation = sim_data.process.translation.monomerData[ "id"].tolist() # Only protein monomers # ids_ribosome = data_50s = sim_data.process.complexation.getMonomers( sim_data.moleculeIds.s50_fullComplex) data_30s = sim_data.process.complexation.getMonomers( sim_data.moleculeIds.s30_fullComplex) ribosome_subunit_ids = data_50s["subunitIds"].tolist( ) + data_30s["subunitIds"].tolist() ribosome_subunit_stoich = np.hstack( (data_50s["subunitStoich"], data_30s["subunitStoich"])) data_rnap = sim_data.process.complexation.getMonomers( sim_data.moleculeIds.rnapFull) rnap_subunit_ids = data_rnap["subunitIds"].tolist() rnap_subunit_stoich = data_rnap["subunitStoich"] # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() first_build = True # Pre-allocate variables. Rows = Generations, Cols = Monomers n_monomers = sim_data.process.translation.monomerData['id'].size n_sims = ap.n_generation monomerExistMultigen = np.zeros((n_sims, n_monomers), dtype=np.bool) monomerDoubleMultigen = np.zeros((n_sims, n_monomers), dtype=np.bool) initiationEventsPerMonomerMultigen = np.zeros((n_sims, n_monomers), dtype=np.int) for gen_idx, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") ## READ DATA ## # Read in bulk ids and counts bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) if first_build: moleculeIds = bulkMolecules.readAttribute("objectNames") complexationIdx = np.array([ moleculeIds.index(x) for x in ids_complexation ]) # Complexe of proteins, and protein monomers complexation_complexesIdx = np.array([ moleculeIds.index(x) for x in ids_complexation_complexes ]) # Only complexes equilibriumIdx = np.array( [moleculeIds.index(x) for x in ids_equilibrium] ) # Complexes of proteins + small molecules, small molecules, protein monomers equilibrium_complexesIdx = np.array([ moleculeIds.index(x) for x in ids_equilibrium_complexes ]) # Only complexes translationIdx = np.array([ moleculeIds.index(x) for x in ids_translation ]) # Only protein monomers ribosomeIdx = np.array( [moleculeIds.index(x) for x in ribosome_subunit_ids]) rnapIdx = np.array( [moleculeIds.index(x) for x in rnap_subunit_ids]) first_build = False bulkCounts = bulkMolecules.readColumn("counts") bulkMolecules.close() # Dissociate protein-protein complexes bulkCounts[:, complexationIdx] += np.dot( sim_data.process.complexation.stoichMatrixMonomers(), bulkCounts[:, complexation_complexesIdx].transpose() * -1).transpose() # Dissociate protein-small molecule complexes bulkCounts[:, equilibriumIdx] += np.dot( sim_data.process.equilibrium.stoichMatrixMonomers(), bulkCounts[:, equilibrium_complexesIdx].transpose() * -1).transpose() # Load unique molecule data for RNAP and ribosomes uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() # Add subunits from RNAP and ribosomes ribosomeSubunitCounts = (nActiveRibosome.reshape( (nActiveRibosome.size, 1)) * ribosome_subunit_stoich.reshape( (1, ribosome_subunit_stoich.size))) rnapSubunitCounts = (nActiveRnaPoly.reshape( (nActiveRnaPoly.size, 1)) * rnap_subunit_stoich.reshape( (1, rnap_subunit_stoich.size))) bulkCounts[:, ribosomeIdx] += ribosomeSubunitCounts bulkCounts[:, rnapIdx] += rnapSubunitCounts # Get protein monomer counts for calculations now that all complexes are dissociated proteinMonomerCounts = bulkCounts[:, translationIdx] ## CALCULATIONS ## # Calculate if monomer exists over course of cell cycle monomerExist = proteinMonomerCounts.sum(axis=0) > 1 # Calculate if monomer comes close to doubling ratioFinalToInitialCount = proteinMonomerCounts[ -1:] / proteinMonomerCounts[0, :].astype(np.float) monomerDouble = ratioFinalToInitialCount > (1 - CLOSE_TO_DOUBLE) # Load transcription initiation event data rnapData = TableReader(os.path.join(simOutDir, "RnapData")) initiationEventsPerRna = rnapData.readColumn("rnaInitEvent").sum( axis=0) # Map transcription initiation events to monomers initiationEventsPerMonomer = initiationEventsPerRna[ sim_data.relation.rnaIndexToMonomerMapping] # Log data monomerExistMultigen[gen_idx, :] = monomerExist monomerDoubleMultigen[gen_idx, :] = monomerDouble initiationEventsPerMonomerMultigen[ gen_idx, :] = initiationEventsPerMonomer uniqueBurstSizes = np.unique(initiationEventsPerMonomerMultigen) probExistByBurstSize = np.zeros(uniqueBurstSizes.size) probDoubleByBurstSize = np.zeros(uniqueBurstSizes.size) for idx, burstSize in enumerate(uniqueBurstSizes): mask = initiationEventsPerMonomerMultigen == burstSize mask_sum = mask.sum() probExistByBurstSize[idx] = monomerExistMultigen[mask].sum( ) / float(mask.sum()) probDoubleByBurstSize[idx] = monomerDoubleMultigen[mask].sum( ) / float(mask.sum()) # Calculate generational standard deviation (row is generation, col is burst size) probExistByBurstSizeGen = np.zeros( (ap.n_generation, uniqueBurstSizes.size)) probDoubleByBurstSizeGen = np.zeros( (ap.n_generation, uniqueBurstSizes.size)) for gen_idx in range(ap.n_generation): for idx, burstSize in enumerate(uniqueBurstSizes): mask = initiationEventsPerMonomerMultigen[ gen_idx, :] == burstSize probExistByBurstSize[gen_idx, idx] = monomerExistMultigen[ gen_idx, :][mask].sum() / float(mask.sum()) probDoubleByBurstSize[gen_idx, idx] = monomerDoubleMultigen[ gen_idx, :][mask].sum() / float(mask.sum()) fig, axesList = plt.subplots(4, 1) axesList[0].semilogy(uniqueBurstSizes, probExistByBurstSize) axesList[1].semilogy(uniqueBurstSizes, probDoubleByBurstSize) # axesList[0].set_ylabel("Probability exists") # axesList[1].set_ylabel("Probability doubles") # axesList[1].set_xlabel("Number of transcription events per generation") axesList[2].semilogy(uniqueBurstSizes, probExistByBurstSize) axesList[2].set_xlim([0., 10.]) #axesList[2].set_ylim([0.96, 1.0]) axesList[3].semilogy(uniqueBurstSizes, probDoubleByBurstSize) axesList[3].set_xlim([0., 10.]) #axesList[3].set_ylim([0.96, 1.0]) axesList[0].set_ylabel("Probability\nexists") axesList[1].set_ylabel("Probability\ndoubles") axesList[2].set_ylabel("Probability\nexists") axesList[3].set_ylabel("Probability\ndoubles") axesList[3].set_xlabel("Number of transcription events per generation") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all") # Ignore first 5 generations fig, axesList = plt.subplots(4, 1) uniqueBurstSizes = np.unique(initiationEventsPerMonomerMultigen[5:, :]) probExistByBurstSize = np.zeros(uniqueBurstSizes.size) probDoubleByBurstSize = np.zeros(uniqueBurstSizes.size) for idx, burstSize in enumerate(uniqueBurstSizes): mask = initiationEventsPerMonomerMultigen[5:, :] == burstSize mask_sum = mask.sum() probExistByBurstSize[idx] = monomerExistMultigen[ 5:, :][mask].sum() / float(mask.sum()) probDoubleByBurstSize[idx] = monomerDoubleMultigen[ 5:, :][mask].sum() / float(mask.sum()) axesList[0].plot(uniqueBurstSizes, probExistByBurstSize) axesList[1].plot(uniqueBurstSizes, probDoubleByBurstSize) # axesList[0].set_ylabel("Probability exists") # axesList[1].set_ylabel("Probability doubles") # axesList[1].set_xlabel("Number of transcription events per generation") axesList[2].plot(uniqueBurstSizes, probExistByBurstSize) axesList[2].set_xlim([0., 10.]) # axesList[2].set_ylim([0.96, 1.0]) axesList[3].plot(uniqueBurstSizes, probDoubleByBurstSize) axesList[3].set_xlim([0., 10.]) # axesList[3].set_ylim([0.96, 1.0]) axesList[0].set_ylabel("Probability\nexists") axesList[1].set_ylabel("Probability\ndoubles") axesList[2].set_ylabel("Probability\nexists") axesList[3].set_ylabel("Probability\ndoubles") axesList[3].set_xlabel("Number of transcription events per generation") exportFigure(plt, plotOutDir, plotOutFileName + "_skip_5_gen", metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): massNames = [ "dryMass", "proteinMass", #"tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ "Dry\nmass", "Protein\nmass", #"tRNA\nmass", "rRNA\nmass", "mRNA\nmass", "DNA\nmass" ] if not os.path.isdir(inputDir): raise Exception, "inputDir does not currently exist as a directory" ap = AnalysisPaths(inputDir, variant_plot = True) all_cells = ap.get_cells() # Build a mapping from variant id to color idToColor = {} for idx, (cell_id, color) in enumerate(itertools.izip(all_cells, itertools.cycle(COLORS_LARGE))): idToColor[idx] = color if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) fig, axesList = plt.subplots(len(massNames), sharex = True) currentMaxTime = 0 for cellIdx, simDir in enumerate(all_cells): with open(os.path.join(simDir[:-32],'metadata','short_name')) as f: variant_name = [line for line in f][0] simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) for massIdx, massType in enumerate(massNames): massToPlot = mass.readColumn(massType) axesList[massIdx].plot(((time / 60.) / 60.), massToPlot, linewidth = 2, color=idToColor[cellIdx], label=variant_name) # set axes to size that shows all generations cellCycleTime = ((time[-1] - time[0]) / 60. / 60. ) if cellCycleTime > currentMaxTime: currentMaxTime = cellCycleTime axesList[massIdx].set_xlim(0, currentMaxTime*ap.n_generation*1.1) axesList[massIdx].set_ylabel(cleanNames[massIdx] + " (fg)") for idx, axes in enumerate(axesList): axes.get_ylim() axes.set_yticks(list(axes.get_ylim())) axesList[0].set_title("Cell mass fractions") plt.legend(bbox_to_anchor=(.92, 5), loc=2, borderaxespad=0., prop={'size':6}) axesList[len(massNames) - 1].set_xlabel("Time (hr)") plt.subplots_adjust(hspace = 0.2, wspace = 0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDirs = ap.get_cells() # Load data from KB sim_data = cPickle.load(open(simDataFile, "rb")) nAvogadro = sim_data.constants.nAvogadro cellDensity = sim_data.constants.cellDensity recruitmentColNames = sim_data.process.transcription_regulation.recruitmentColNames tfs = sorted( set([ x.split("__")[-1] for x in recruitmentColNames if x.split("__")[-1] != "alpha" ])) argRIndex = [i for i, tf in enumerate(tfs) if tf == "CPLX0-228"][0] tfBoundIds = [ target + "__CPLX0-228" for target in sim_data.tfToFC["CPLX0-228"].keys() ] synthProbIds = [ target + "[c]" for target in sim_data.tfToFC["CPLX0-228"].keys() ] plt.figure(figsize=(8.5, 13)) nRows = 9 for simDir in allDirs: simOutDir = os.path.join(simDir, "simOut") # Load time initialTime = 0 #TableReader(os.path.join(simOutDir, "Main")).readAttribute("initialTime") time = TableReader(os.path.join( simOutDir, "Main")).readColumn("time") - initialTime # Load mass data # Total cell mass is needed to compute concentrations (since we have cell density) # Protein mass is needed to compute the mass fraction of the proteome that is trpA massReader = TableReader(os.path.join(simOutDir, "Mass")) cellMass = units.fg * massReader.readColumn("cellMass") proteinMass = units.fg * massReader.readColumn("proteinMass") massReader.close() # Load data from ribosome data listener # ribosomeDataReader = TableReader(os.path.join(simOutDir, "RibosomeData")) # nTrpATranslated = ribosomeDataReader.readColumn("numTrpATerminated") # ribosomeDataReader.close() # Load data from bulk molecules bulkMoleculesReader = TableReader( os.path.join(simOutDir, "BulkMolecules")) bulkMoleculeIds = bulkMoleculesReader.readAttribute("objectNames") bulkMoleculeCounts = bulkMoleculesReader.readColumn("counts") # Get the concentration of intracellular arg argId = ["ARG[c]"] argIndex = np.array([bulkMoleculeIds.index(x) for x in argId]) argCounts = bulkMoleculeCounts[:, argIndex].reshape(-1) argMols = 1. / nAvogadro * argCounts volume = cellMass / cellDensity argConcentration = argMols * 1. / volume # Get the amount of active argR (that is promoter bound) argRActiveId = ["CPLX0-228[c]"] argRActiveIndex = np.array( [bulkMoleculeIds.index(x) for x in argRActiveId]) argRActiveCounts = bulkMoleculeCounts[:, argRActiveIndex].reshape(-1) # Get the amount of inactive argR argRInactiveId = ["PC00005[c]"] argRInactiveIndex = np.array( [bulkMoleculeIds.index(x) for x in argRInactiveId]) argRInactiveCounts = bulkMoleculeCounts[:, argRInactiveIndex].reshape( -1) # Get the amount of monomeric argR argRMonomerId = ["PD00194[c]"] argRMonomerIndex = np.array( [bulkMoleculeIds.index(x) for x in argRMonomerId]) argRMonomerCounts = bulkMoleculeCounts[:, argRMonomerIndex].reshape( -1) # Get the promoter-bound status for all regulated genes tfBoundIndex = np.array( [bulkMoleculeIds.index(x) for x in tfBoundIds]) tfBoundCounts = bulkMoleculeCounts[:, tfBoundIndex] # Get the amount of monomeric carA carAProteinId = ["CARBPSYN-SMALL[c]"] carAProteinIndex = np.array( [bulkMoleculeIds.index(x) for x in carAProteinId]) carAProteinCounts = bulkMoleculeCounts[:, carAProteinIndex].reshape( -1) # Get the amount of complexed carA carAComplexId = ["CARBPSYN-CPLX[c]"] carAComplexIndex = np.array( [bulkMoleculeIds.index(x) for x in carAComplexId]) carAComplexCounts = bulkMoleculeCounts[:, carAComplexIndex].reshape( -1) # Get the amount of carA mRNA carARnaId = ["EG10134_RNA[c]"] carARnaIndex = np.array( [bulkMoleculeIds.index(x) for x in carARnaId]) carARnaCounts = bulkMoleculeCounts[:, carARnaIndex].reshape(-1) bulkMoleculesReader.close() # Compute total counts and concentration of carA in monomeric and complexed form # (we know the stoichiometry) carAProteinTotalCounts = carAProteinCounts + 2 * carAComplexCounts carAProteinTotalMols = 1. / nAvogadro * carAProteinTotalCounts carAProteinTotalConcentration = carAProteinTotalMols * 1. / volume # Compute concentration of carA mRNA carARnaMols = 1. / nAvogadro * carARnaCounts carARnaConcentration = carARnaMols * 1. / volume # Compute the carA mass in the cell carAMw = sim_data.getter.getMass(carAProteinId) carAMass = 1. / nAvogadro * carAProteinTotalCounts * carAMw # Compute the proteome mass fraction proteomeMassFraction = carAMass.asNumber( units.fg) / proteinMass.asNumber(units.fg) # Get the synthesis probability for all regulated genes rnaSynthProbReader = TableReader( os.path.join(simOutDir, "RnaSynthProb")) rnaIds = rnaSynthProbReader.readAttribute("rnaIds") synthProbIndex = np.array([rnaIds.index(x) for x in synthProbIds]) synthProbs = rnaSynthProbReader.readColumn( "rnaSynthProb")[:, synthProbIndex] argRBound = rnaSynthProbReader.readColumn( "nActualBound")[:, argRIndex] rnaSynthProbReader.close() # Calculate total argR - active, inactive, bound and monomeric argRTotalCounts = 6 * (argRActiveCounts + argRInactiveCounts + argRBound) + argRMonomerCounts # Compute moving averages width = 100 tfBoundCountsMA = np.array([ np.convolve(tfBoundCounts[:, i], np.ones(width) / width, mode="same") for i in range(tfBoundCounts.shape[1]) ]).T synthProbsMA = np.array([ np.convolve(synthProbs[:, i], np.ones(width) / width, mode="same") for i in range(synthProbs.shape[1]) ]).T ############################################################## ax = plt.subplot(nRows, 1, 1) ax.plot(time, argConcentration.asNumber(units.umol / units.L), color="b") plt.ylabel("Internal ARG Conc. [uM]", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 2) ax.semilogy(time, argRActiveCounts, color="b") ax.semilogy(time, argRInactiveCounts, color="r") ax.semilogy(time, argRTotalCounts, color="g") plt.ylabel("ArgR Counts", fontsize=6) plt.legend([ "Active (hexamer)", "Inactive (hexamer)", "Total (monomeric)" ], fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 3) ax.plot(time, tfBoundCountsMA) plt.ylabel("ArgR Bound To Promoters\n(Moving Average)", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 4) ax.plot(time, synthProbsMA) plt.ylabel("Regulated Gene Synthesis Prob.\n(Moving Average)", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2e" % ymin, "%0.2e" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 5) ax.plot(time, carAProteinTotalCounts, color="b") plt.ylabel("CarA Counts", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 6) ax.plot(time, carAProteinTotalConcentration.asNumber(units.umol / units.L), color="b") plt.ylabel("CarA Concentration", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2f" % ymin, "%0.2f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 7) ax.plot(time, carARnaCounts, color="b") plt.ylabel("CarA mRNA Counts", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.0f" % ymin, "%0.0f" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 8) ax.plot(time, carARnaConcentration.asNumber(units.umol / units.L), color="b") plt.ylabel("CarA mRNA\nConcentration", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2e" % ymin, "%0.2e" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks([]) ############################################################## ############################################################## ax = plt.subplot(nRows, 1, 9) ax.plot(time / 3600., proteomeMassFraction, color="b") plt.ylabel("CarA Mass Fraction\nof Proteome", fontsize=6) ymin, ymax = ax.get_ylim() ax.set_yticks([ymin, ymax]) ax.set_yticklabels(["%0.2e" % ymin, "%0.2e" % ymax]) ax.spines['top'].set_visible(False) ax.spines['bottom'].set_visible(False) # ax.xaxis.set_ticks_position('none') ax.tick_params(which='both', direction='out', labelsize=6) ax.set_xticks(ax.get_xlim()) ############################################################## plt.subplots_adjust(hspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) all_variants = ap.get_variants() variants = -np.ones(N_VARIANTS) for v, variant in enumerate(all_variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: variants[0] = variant elif len(additional_disabled) == 0: variants[1] = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): variants[2] = variant if np.any(variants < 0): print('Not enough variants to analyze') return with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) all_yields = [] for variant in variants: yields = [] for sim_dir in ap.get_cells(variant=[variant]): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used fba_reader = TableReader( os.path.join(sim_out_dir, 'FBAResults')) main_reader = TableReader(os.path.join(sim_out_dir, 'Main')) mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass')) # Load data time_step_sec = main_reader.readColumn('timeStepSec') external_fluxes = fba_reader.readColumn( 'externalExchangeFluxes') external_molecules = fba_reader.readAttribute( 'externalMoleculeIDs') dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass') growth = GROWTH_UNITS * mass_reader.readColumn( 'growth') / time_step_sec # Calculate growth yield on glucose glc_idx = external_molecules.index(GLUCOSE_ID) glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx] glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0] glc_mass_flux = glc_flux * glc_mw * dry_mass glc_mass_yield = growth / -glc_mass_flux yields += list(glc_mass_yield[1:].asNumber()) all_yields += [yields] for i, v1 in enumerate(variants): for j, v2 in enumerate(variants[i + 1:]): t, p = stats.ttest_ind(all_yields[i], all_yields[i + j + 1], equal_var=False) print('p={:.2e} for variant {} vs variant {}'.format( p, v1, v2)) plt.figure(figsize=(4, 4)) xticks = range(N_VARIANTS) # Plot data plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False) plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037') # Format axes ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.xticks(xticks, VARIANT_LABELS) plt.ylabel('Glucose Yield\n(g cell / g glucose)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): massNames = [ "dryMass", "proteinMass", #"tRnaMass", "rRnaMass", 'mRnaMass', "dnaMass" ] cleanNames = [ "Dry\nmass", "Protein\nmass", #"tRNA\nmass", "rRNA\nmass", "mRNA\nmass", "DNA\nmass" ] if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) fig, axesList = plt.subplots(len(massNames), sharex=True) currentMaxTime = 0 # Get all cells in each seed ap = AnalysisPaths(variantDir, cohort_plot=True) all_cells = ap.get_cells() for simDir in all_cells: simOutDir = os.path.join(simDir, "simOut") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") mass = TableReader(os.path.join(simOutDir, "Mass")) for idx, massType in enumerate(massNames): massToPlot = mass.readColumn(massType) axesList[idx].plot(((time / 60.) / 60.), massToPlot, linewidth=2) # set axes to size that shows all generations cellCycleTime = ((time[-1] - time[0]) / 60. / 60.) if cellCycleTime > currentMaxTime: currentMaxTime = cellCycleTime axesList[idx].set_xlim( 0, currentMaxTime * int(ap.n_generation) * 1.1) axesList[idx].set_ylabel(cleanNames[idx] + " (fg)") for axes in axesList: axes.get_ylim() axes.set_yticks(list(axes.get_ylim())) axesList[0].set_title("Cell mass fractions") axesList[len(massNames) - 1].set_xlabel("Time (hr)") plt.subplots_adjust(hspace=0.2, wspace=0.5) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata.get('variant', '') != 'flux_sensitivity': print 'This plot only runs for the flux_sensitivity variant.' return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() succ_fluxes = [] iso_fluxes = [] for variant in variants: for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used fba_reader = TableReader(os.path.join(simOutDir, 'FBAResults')) # Load data reactions = np.array( fba_reader.readAttribute('sensitivity_reactions')) succ_fluxes += [ fba_reader.readColumn('succinate_flux_sensitivity')[1:, :] ] iso_fluxes += [ fba_reader.readColumn('isocitrate_flux_sensitivity')[1:, :] ] succ_fluxes = np.vstack(succ_fluxes) iso_fluxes = np.vstack(iso_fluxes) succ_z = calc_z(succ_fluxes) iso_z = calc_z(iso_fluxes) threshold = -0.1 # Plot data plt.figure() gs = gridspec.GridSpec(2, 2) ## Succinate dehydrogenase all fluxes ax = plt.subplot(gs[0, 0]) plot_lows(ax, succ_z, threshold, 'succinate dehydrogenase') ## Succinate dehydrogenase fluxes over threshold ax = plt.subplot(gs[0, 1]) plot_threshold(ax, succ_z, threshold, reactions) ## Isocitrate dehydrogenase all fluxes ax = plt.subplot(gs[1, 0]) plot_lows(ax, iso_z, threshold, 'isocitrate dehydrogenase') ## Isocitrate dehydrogenase fluxes over threshold ax = plt.subplot(gs[1, 1]) plot_threshold(ax, iso_z, threshold, reactions) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print('This analysis only runs for the "condition" variant.') return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) n_gens = ap.n_generation variants = ap.get_variants() if n_gens - 1 < FIRST_GENERATION: print('Not enough generations to plot.') return all_growth_rates = [] all_rna_to_protein_ratios = [] for variant in variants: doubling_times = np.zeros(0) variant_rna_to_protein_ratios = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=range(FIRST_GENERATION, n_gens)) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) rna_mass = mass.readColumn("rnaMass") protein_mass = mass.readColumn("proteinMass") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_times = np.hstack( (doubling_times, (time[-1] - time[0]) / 3600.)) variant_rna_to_protein_ratios = np.hstack( (variant_rna_to_protein_ratios, rna_mass.mean() / protein_mass.mean())) except: continue variant_growth_rates = np.log(2) / doubling_times all_growth_rates.append(variant_growth_rates) all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios) # Get errorbar plot plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] marker_styles = ['o', '^', 'x'] labels = ['basal', 'anaerobic', '+AA'] ax = plt.subplot2grid((1, 1), (0, 0)) for i in range(3): ax.errorbar(all_growth_rates[i].mean(), all_rna_to_protein_ratios[i].mean(), yerr=all_rna_to_protein_ratios[i].std(), color=color_cycle[0], mec=color_cycle[0], marker=marker_styles[i], markersize=8, mfc='white', linewidth=1, capsize=2, label=labels[i]) # Add linear plot proposed in Scott et al. (2010) x_linear = np.linspace(0.05, 1.95, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2]) ax.set_ylim([0, 0.7]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of errorbar plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all") # Get scatter version of plot plt.figure(figsize=FIGSIZE) ax = plt.subplot2grid((1, 1), (0, 0)) options = {"edgecolors": color_cycle[0], "alpha": 0.25, "s": 20} ax.scatter(all_growth_rates[0], all_rna_to_protein_ratios[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(all_growth_rates[1], all_rna_to_protein_ratios[1], facecolors="none", marker="^", label=labels[1], **options) ax.scatter(all_growth_rates[2], all_rna_to_protein_ratios[2], marker="x", label=labels[2], **options) x_linear = np.linspace(0.05, 2.45, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2.5]) ax.set_ylim([0, 0.8]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName + "_scatter", metadata)
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) analysis_paths = AnalysisPaths(variantDir, cohort_plot=True) n_gens = analysis_paths.n_generation if n_gens - 1 < FIRST_GENERATION: print 'Not enough generations to plot.' return sim_dirs = analysis_paths.get_cells( generation=range(FIRST_GENERATION, n_gens)) sim_data = cPickle.load(open(simDataFile, "rb")) doubling_times_minutes = [] missing_files = [] broken_files = [] for sim_dir in sim_dirs: sim_out_dir = os.path.join(sim_dir, "simOut") path = os.path.join(sim_out_dir, 'Main') if not os.path.exists(path): missing_files.append(path) continue # Assume simulated time == doubling time try: time = TableReader(path).readColumn('time') except Exception as e: broken_files.append(path) continue # Time is relative to the first simulation, so need to take a difference try: doubling_time = time[-1] - time[0] except Exception as e: broken_files.append(path) continue doubling_times_minutes.append(doubling_time / 60.) if missing_files or broken_files: messages = [] if missing_files: messages.append('Missing files:\n{}'.format( '\n'.join(missing_files))) if broken_files: messages.append('Broken files:\n{}'.format( '\n'.join(broken_files))) message = '\n'.join(messages) if THROW_ON_BAD_SIMULATION_OUTPUT: # Throw late so we get a full picture of what files are missing raise Exception(message) else: print message plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] bins = np.linspace( DOUBLING_TIME_BOUNDS_MINUTES[0], DOUBLING_TIME_BOUNDS_MINUTES[1], N_BINS + 1 # +1 because we need n+1 bin bounds for n bins ) plt.hist(doubling_times_minutes, bins=bins) plt.axvline(np.median(doubling_times_minutes), color='k', lw=2, linestyle='--') plt.axvline( sim_data.conditionToDoublingTime[sim_data.condition].asNumber( units.min), color=color_cycle[2], lw=2) plt.title('n = {}'.format(len(doubling_times_minutes))) plt.xlim(*DOUBLING_TIME_BOUNDS_MINUTES) plt.ylim(0, FREQUENCY_MAX) plt.xlabel('Doubling time (minutes)') # TODO (John): How to enforce standard axes dimensions? # TODO (John): plt.tight_layout()? exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() n_variants = len(variants) # Load sim_data with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity.asNumber(MASS_UNITS / VOLUME_UNITS) # Load validation_data with open(validationDataFile, "rb") as f: validation_data = cPickle.load(f) toyaReactions = validation_data.reactionFlux.toya2010fluxes[ "reactionID"] toyaFluxes = validation_data.reactionFlux.toya2010fluxes[ "reactionFlux"] toyaStdev = validation_data.reactionFlux.toya2010fluxes[ "reactionFluxStdev"] toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes)) toyaStdevDict = dict(zip(toyaReactions, toyaStdev)) glc_uptakes = np.zeros(n_variants) log_ratio_succ = np.zeros(n_variants) size_pearson = np.zeros(n_variants) selected_indicies = np.zeros(n_variants, bool) for v, variant in enumerate(variants): # initialize kinetic flux comparison exchange_fluxes = {entry: [] for entry in EXCHANGES} reaction_fluxes = {entry: [] for entry in REACTIONS} modelFluxes = {} toyaOrder = [] for rxn in toyaReactions: modelFluxes[rxn] = [] toyaOrder.append(rxn) for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") try: # Listeners used massListener = TableReader(os.path.join(simOutDir, "Mass")) fbaResults = TableReader( os.path.join(simOutDir, "FBAResults")) enzymeKineticsReader = TableReader( os.path.join(simOutDir, "EnzymeKinetics")) ## Read from mass listener cellMass = massListener.readColumn("cellMass") # skip if no data if cellMass.shape is (): continue dryMass = massListener.readColumn("dryMass") except Exception as e: print(e) continue coefficient = (dryMass / cellMass * cell_density).reshape( -1, 1) ## Read from FBA listener reactionIDs = { r: i for i, r in enumerate( fbaResults.readAttribute("reactionIDs")) } exMolec = { m: i for i, m in enumerate( fbaResults.readAttribute("externalMoleculeIDs")) } reactionFluxes = FLUX_CONVERSION * ( fbaResults.readColumn("reactionFluxes") / coefficient)[1:, :] exFlux = fbaResults.readColumn("externalExchangeFluxes")[1:, :] ## Read from EnzymeKinetics listener constrainedReactions = { r: i for i, r in enumerate( enzymeKineticsReader.readAttribute( "constrainedReactions")) } ## Append values for relevant reactions. # append to exchanges for entry in EXCHANGES: exchange_fluxes[entry].extend( list(exFlux[:, exMolec[entry]])) # append to reaction fluxes for entry in REACTIONS: reaction_fluxes[entry].extend( list(reactionFluxes[:, reactionIDs[entry]])) ## get all Toya reactions, and corresponding simulated fluxes. toya_idx = {r: [] for r in toyaReactions} for rxn, i in reactionIDs.items(): rxn = rxn.split(' (reverse)') if len(rxn) > 1: i = -i rxn = rxn[0].split('__')[0] if rxn in toya_idx: toya_idx[rxn] += [i] for toyaReaction, reaction_idx in toya_idx.items(): flux_time_course = np.sum([ np.sign(i) * reactionFluxes[:, np.abs(i)] for i in reaction_idx ], axis=0) modelFluxes[toyaReaction].append(flux_time_course.mean()) ## Flux comparison with Toya toyaVsReactionAve = [] rxn_order = [] for rxn, toyaFlux in toyaFluxesDict.iteritems(): rxn_order.append(rxn) if rxn in modelFluxes: toyaVsReactionAve.append( (np.mean(modelFluxes[rxn]), toyaFlux.asNumber(OUTPUT_FLUX_UNITS), np.std(modelFluxes[rxn]), toyaStdevDict[rxn].asNumber(OUTPUT_FLUX_UNITS))) toyaVsReactionAve = np.array(toyaVsReactionAve) rWithAll = pearsonr(toyaVsReactionAve[:, 0], toyaVsReactionAve[:, 1]) succ_toya_flux = toyaVsReactionAve[rxn_order.index(SUCC_ID), 1] # Save data for plotting glc_uptakes[v] = -np.mean(exchange_fluxes[GLC_ID]) log_ratio_succ[v] = np.log2( np.mean(reaction_fluxes[SUCC_ID]) / succ_toya_flux) size_pearson[v] = (rWithAll[0] * 8)**2 selected_indicies[v] = np.all([ c not in constrainedReactions for c in HIGHLIGHTED_CONSTRAINTS ]) # Plot scatterplot fig = plt.figure(figsize=(5, 5)) gs = gridspec.GridSpec(40, 40) ## Plot full data plt.scatter(glc_uptakes[~selected_indicies], log_ratio_succ[~selected_indicies], color='blue', alpha=0.6, s=size_pearson[~selected_indicies]) plt.scatter(glc_uptakes[selected_indicies], log_ratio_succ[selected_indicies], color='red', alpha=0.6, s=size_pearson[selected_indicies]) x_min, x_max = plt.xlim() y_max = max(np.abs(plt.ylim())) plt.axvspan(0, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) plt.axhline(y=0, color='k', linestyle='--') ## Format axes plt.ylabel('log2(model flux / Toya flux)') plt.xlabel('glucose uptake (mmol / g DCW / hr)') plt.xlim([np.floor(min(x_min, 10)), np.ceil(x_max)]) plt.ylim([-y_max, y_max]) ## Plot highlighted region data fig.add_subplot(gs[1:28, -20:-1]) in_region = (glc_uptakes < GLC_MAX) & (np.abs(log_ratio_succ) < SUCC_DISTANCE) selected_in = in_region & selected_indicies not_selected_in = in_region & ~selected_indicies constraint_labels = np.array( [[c[:2] for c in constraints] if constraints is not None else [] for _, constraints in map(get_disabled_constraints, variants)]) plt.scatter(glc_uptakes[not_selected_in], log_ratio_succ[not_selected_in], color='blue', alpha=0.6, s=size_pearson[not_selected_in]) plt.scatter(glc_uptakes[selected_in], log_ratio_succ[selected_in], color='red', alpha=0.6, s=size_pearson[selected_in]) for x, y, label in zip(glc_uptakes[in_region], log_ratio_succ[in_region], constraint_labels[in_region]): plt.text(x, y, ', '.join(label), ha='center', va='top', fontsize=6) x_min, _ = plt.xlim() x_min = np.floor(min(x_min, 10)) plt.axvspan(x_min, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) ## Format axes plt.xlim([x_min, GLC_MAX]) plt.ylim([-SUCC_DISTANCE, SUCC_DISTANCE]) ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')