def test_dtype_float32(self): """A BulkObjectsContainer with dtype=np.float32 should support fractional counts and deltas. """ container = BulkObjectsContainer(OBJECT_NAMES, dtype=np.float32) initialCounts = [10, 10.5, 20] container.countsIs(initialCounts) npt.assert_equal(container.counts(), initialCounts) incCounts = [10, 20.5, 30.5] newCounts = [20, 31, 50.5] container.countsInc(incCounts) npt.assert_equal(container.counts(), newCounts) decCounts = [1.5, 2, 3.5] newCounts = [18.5, 29, 47] container.countsDec(decCounts) npt.assert_equal(container.counts(), newCounts) countsView = container.countsView() newCounts = [28.5, 49.5, 77.5] countsView.countsInc(incCounts) npt.assert_equal(countsView.counts(), newCounts) newCounts = [27, 47.5, 74] countsView.countsDec(decCounts) npt.assert_equal(countsView.counts(), newCounts)
def initialize(self, sim, sim_data): super(BulkMolecules, self).initialize(sim, sim_data) self._processIDs = sim.processes.keys() # Load constants self._moleculeIDs = sim_data.internal_state.bulkMolecules.bulkData[ 'id'] self._moleculeMass = sim_data.internal_state.bulkMolecules.bulkData[ 'mass'].asNumber( units.fg / units.mol) / sim_data.constants.nAvogadro.asNumber( 1 / units.mol) self._submassNameToIndex = sim_data.submassNameToIndex # Create the container for molecule counts self.container = BulkObjectsContainer(self._moleculeIDs) # Set up vector of process priorities self._processPriorities = np.empty(self._nProcesses, np.int64) self._processPriorities.fill(REQUEST_PRIORITY_DEFAULT) # Set up ids for division into daughter cells self.divisionIds = {} self.divisionIds[ 'binomial'] = sim_data.moleculeGroups.bulkMoleculesBinomialDivision self.divisionIds[ 'equally'] = sim_data.moleculeGroups.bulkMoleculesEqualDivision self.divisionIds['fullChromosome'] = [ sim_data.moleculeIds.fullChromosome ] self.divisionIds[ 'partialChromosome'] = sim_data.moleculeGroups.partialChromosome self.divisionIds[ 'setTo1'] = sim_data.moleculeGroups.bulkMoleculesSetTo1Division
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() expected_n_variants = 2 n_variants = len(variants) if n_variants < expected_n_variants: print('This plot only runs for {} variants.'.format(expected_n_variants)) return # IDs for appropriate proteins ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData['id'].tolist() ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation)) # Stoichiometry matrices equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers() complex_stoich = sim_data.process.complexation.stoichMatrixMonomers() # Protein container views protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = protein_container.countsView(ids_complexation) view_complexation_complexes = protein_container.countsView(ids_complexation_complexes) view_equilibrium = protein_container.countsView(ids_equilibrium) view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes) # Load model data model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) for i, variant in enumerate(variants): if i >= expected_n_variants: print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants)) continue variant_counts = [] for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts')) # Account for bulk molecules (bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein) protein_container.countsIs(bulk_counts.mean(axis=0)) # Account for unique molecules ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome') rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly') n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index] n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index] protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex]) protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts())) # Account for monomers in complexed form view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts())) variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts()) model_counts[:, i] = np.mean(variant_counts, axis=0) model_std[:, i] = np.std(variant_counts, axis=0) # Validation data schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])} schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts'] validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE]) # Process data model_log_counts = np.log10(model_counts) model_log_lower_std = model_log_counts - np.log10(model_counts - model_std) model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts validation_log_counts = np.log10(validation_counts) r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0]) r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1]) # Scatter plot of model vs validation counts max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max())) limits = [0, max_counts] plt.figure() colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] ## Plot data for i in range(expected_n_variants): plt.errorbar(validation_log_counts, model_log_counts[:, i], yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])), fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5) plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_') ## Format axes plt.xlabel('Validation Counts\n(log10(counts))') plt.ylabel('Average Simulation Counts\n(log10(counts))') ax = plt.gca() ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ## Add legend legend_text = [ 'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]), 'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]), ] plt.legend(legend_text, frameon=False) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def fitSimData_2(kb, simOutDir): subMass = kb.mass.subMass proteinMass = subMass["proteinMass"].asUnit(units.g) rnaMass = subMass["rnaMass"].asUnit(units.g) # Construct bulk container # We want to know something about the distribution of the copy numbers of # macromolecules in the cell. While RNA and protein expression can be # approximated using well-described statistical distributions, we need # absolute copy numbers to form complexes. To get a distribution, we must # instantiate many cells, form complexes, and finally compute the # statistics we will use in the fitting operations. bulkContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id']) rnaView = bulkContainer.countsView(kb.process.transcription.rnaData["id"]) proteinView = bulkContainer.countsView(kb.process.translation.monomerData["id"]) complexationMoleculesView = bulkContainer.countsView(kb.process.complexation.moleculeNames) allMoleculesIDs = list( set(kb.process.transcription.rnaData["id"]) | set(kb.process.translation.monomerData["id"]) | set(kb.process.complexation.moleculeNames) ) allMoleculesView = bulkContainer.countsView(allMoleculesIDs) allMoleculeCounts = np.empty((N_SEEDS, allMoleculesView.counts().size), np.int64) complexationStoichMatrix = kb.process.complexation.stoichMatrix().astype(np.int64, order = "F") complexationPrebuiltMatrices = mccBuildMatrices( complexationStoichMatrix ) rnaDistribution = kb.process.transcription.rnaData["expression"] rnaTotalCounts = countsFromMassAndExpression( rnaMass.asNumber(units.g), kb.process.transcription.rnaData["mw"].asNumber(units.g / units.mol), rnaDistribution, kb.constants.nAvogadro.asNumber(1 / units.mol) ) proteinDistribution = calcProteinDistribution(kb) proteinTotalCounts = calcProteinTotalCounts(kb, proteinMass, proteinDistribution) for seed in xrange(N_SEEDS): randomState = np.random.RandomState(seed) allMoleculesView.countsIs(0) rnaView.countsIs(randomState.multinomial( rnaTotalCounts, rnaDistribution )) proteinView.countsIs(randomState.multinomial( proteinTotalCounts, proteinDistribution )) complexationMoleculeCounts = complexationMoleculesView.counts() updatedCompMoleculeCounts = mccFormComplexesWithPrebuiltMatrices( complexationMoleculeCounts, seed, complexationStoichMatrix, *complexationPrebuiltMatrices ) complexationMoleculesView.countsIs(updatedCompMoleculeCounts) allMoleculeCounts[seed, :] = allMoleculesView.counts() bulkAverageContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id'], np.float64) bulkDeviationContainer = BulkObjectsContainer(kb.state.bulkMolecules.bulkData['id'], np.float64) bulkAverageContainer.countsIs(allMoleculeCounts.mean(0), allMoleculesIDs) bulkDeviationContainer.countsIs(allMoleculeCounts.std(0), allMoleculesIDs) # Free up memory # TODO: make this more functional; one function for returning average & distribution del allMoleculeCounts del bulkContainer # ----- Calculate ppGpp concentration ----- # aminoAcidsInProtein = (bulkAverageContainer.counts(kb.process.translation.monomerData['id']) * kb.process.translation.monomerData['length'].asNumber()).sum() aminoAcidsInComplex = 0. for cplx in list(kb.process.complexation.complexNames): cplx_data = kb.process.complexation.getMonomers(cplx) cplx_subunit = cplx_data['subunitIds'] cplx_stoich = cplx_data['subunitStoich'] subunit_idxs = [] subunit_idxs_to_delete = [] for idx, subunit in enumerate(cplx_subunit): try: subunit_idxs.append(np.where(kb.process.translation.monomerData['id'] == subunit)[0][0]) except IndexError: subunit_idxs_to_delete.append(idx) cplx_stoich = np.delete(cplx_stoich, subunit_idxs_to_delete) subunit_length = kb.process.translation.monomerData['length'][subunit_idxs].asNumber() aminoAcidsInComplex += (bulkAverageContainer.count(cplx) * subunit_length * cplx_stoich).sum() totalAminoAcidsInMacromolecules = (aminoAcidsInComplex + aminoAcidsInProtein) totalAAInSolublePool = totalAminoAcidsInMacromolecules * 0.08 # Approximatly correct for one time calculature. # TODO: Calculate soluble pools here too! totalAminoAcidsInCell = totalAminoAcidsInMacromolecules + totalAAInSolublePool ppGpp_per_cell = (totalAminoAcidsInCell * kb.constants.ppGpp_base_concentration).asUnit(units.count) cellVolume = kb.mass.avgCellDryMassInit / kb.constants.cellDensity ppGpp_concentration = (ppGpp_per_cell.asUnit(units.mol) / cellVolume).asUnit(units.mol / units.L) # Finally set ppGpp concentration to maintain kb.process.metabolism.metabolitePoolConcentrations[kb.process.metabolism.metabolitePoolIDs.index('PPGPP[c]')] = ppGpp_concentration # ----- tRNA synthetase turnover rates ------ # Fit tRNA synthetase kcat values based on expected rates of translation # compute values at initial time point ## Compute rate of AA incorperation proteinComposition = kb.process.translation.monomerData["aaCounts"] initialProteinMass = kb.mass.subMass['proteinMass'] initialProteinCounts = calcProteinCounts(kb, initialProteinMass) initialProteinTranslationRate = ( (np.log(2) / kb.doubling_time + kb.process.translation.monomerData["degRate"]) * initialProteinCounts ).asUnit(1 / units.s) initialAAPolymerizationRate = units.dot( units.transpose(proteinComposition), initialProteinTranslationRate ).asUnit(units.aa / units.s) ## Compute expression of tRNA synthetases ## Assuming independence in variance synthetase_counts_by_group = np.zeros(len(kb.process.translation.AA_SYNTHETASE_GROUPS), dtype = np.float64) synthetase_variance_by_group = np.zeros(len(kb.process.translation.AA_SYNTHETASE_GROUPS), dtype = np.float) for idx, synthetase_group in enumerate(kb.process.translation.AA_SYNTHETASE_GROUPS.itervalues()): group_count = 0. group_variance = 0. for synthetase in synthetase_group: counts = bulkAverageContainer.countsView([synthetase]).counts() variance = bulkDeviationContainer.countsView([synthetase]).counts() group_count += counts group_variance += variance synthetase_counts_by_group[idx] = group_count synthetase_variance_by_group[idx] = group_variance ## Saved for plotting kb.synthetase_counts = synthetase_counts_by_group kb.synthetase_variance = synthetase_variance_by_group kb.initial_aa_polymerization_rate = initialAAPolymerizationRate kb.minimum_trna_synthetase_rates = initialAAPolymerizationRate / synthetase_counts_by_group # TODO: Reimplement this with better fit taking into account the variance in aa # utilization. ## Scaling synthetase counts by -2*variance so that rates will be high enough ## to accomodate stochastic behavior in the model without translation stalling. # scaled_synthetase_counts = synthetase_counts_by_group - (2 * synthetase_variance_by_group) scaled_synthetase_counts = synthetase_counts_by_group assert all(scaled_synthetase_counts > 0) predicted_trna_synthetase_rates = initialAAPolymerizationRate / scaled_synthetase_counts kb.trna_synthetase_rates = 2 * predicted_trna_synthetase_rates
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) sim_data = cPickle.load(open(simDataFile, "rb")) validation_data = cPickle.load(open(validationDataFile, "rb")) ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData["id"].tolist() ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation)) bulkContainer = BulkObjectsContainer(ids_protein, dtype = np.float64) view_complexation = bulkContainer.countsView(ids_complexation) view_complexation_complexes = bulkContainer.countsView(ids_complexation_complexes) view_equilibrium = bulkContainer.countsView(ids_equilibrium) view_equilibrium_complexes = bulkContainer.countsView(ids_equilibrium_complexes) view_translation = bulkContainer.countsView(ids_translation) view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist()) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot = True) allDir = ap.get_cells() View_Validation_Schmidt = [] fig = plt.figure(figsize = (4, 4)) for simDir in allDir: # print simDir simOutDir = os.path.join(simDir, "simOut") bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") proteinIndexes = np.array([moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int) proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes] bulkMolecules.close() # Account for monomers bulkContainer.countsIs(proteinCountsBulk.mean(axis = 0)) # Account for unique molecules uniqueMoleculeCounts = TableReader(os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() bulkContainer.countsInc(nActiveRibosome.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex]) bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsInc( np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1) ) # Account for monomers in complexed form view_complexation.countsInc( np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1) ) view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist()) View_Validation_Schmidt.append(view_validation_schmidt.counts()) simulation_counts = (np.array(View_Validation_Schmidt)).mean(axis = 0) # Schmidt Counts schmidtLabels = validation_data.protein.schmidt2015Data["monomerId"] schmidt_counts = validation_data.protein.schmidt2015Data["glucoseCounts"] # Set up mask for proteins with low counts low_count_mask = schmidt_counts < LOW_COUNT_THRESHOLD n_low_count = low_count_mask.sum() n_high_count = schmidt_counts.size - n_low_count # Take logs schmidt_counts_log = np.log10(schmidt_counts + 1) simulation_counts_log = np.log10(simulation_counts + 1) # Compute deviations deviation_log = np.log10(np.abs(simulation_counts - schmidt_counts)) axis = plt.subplot(1,1,1) axis.plot(schmidt_counts_log, simulation_counts_log, 'o', color = "black", markersize = 6, alpha = 0.1, zorder = 1, markeredgewidth = 0.0) print("R^2 (all proteins) = %.3f (n = %d)" % ( (pearsonr(simulation_counts_log, schmidt_counts_log)[0])**2, schmidt_counts.size )) print("R^2 (low-abundance proteins) = %.3f (n = %d)" % ( (pearsonr(simulation_counts_log[low_count_mask], schmidt_counts_log[low_count_mask])[0])**2, n_low_count )) print("R^2 (high-abundance proteins) = %.3f (n = %d)" % ( (pearsonr(simulation_counts_log[~low_count_mask], schmidt_counts_log[~low_count_mask])[0])**2, n_high_count )) print("Average log deviation (low-abundance proteins) = %.3f" % ( deviation_log[low_count_mask].mean())) print("Average log deviation (high-abundance proteins) = %.3f" % ( deviation_log[~low_count_mask].mean())) maxLine = np.ceil( max(schmidt_counts_log.max(), simulation_counts_log.max()) ) plt.plot([0, maxLine], [0, maxLine], '-k') plt.xlim(xmin=0, xmax=maxLine) plt.ylim(ymin=0, ymax=maxLine) axis.spines["right"].set_visible(False) axis.spines["top"].set_visible(False) axis.spines["left"].set_position(("outward", 10)) axis.spines["bottom"].set_position(("outward", 10)) axis.tick_params(right = "off") axis.tick_params(top = "off") axis.tick_params(which = "both", direction = "out") axis.set_xlim([-0.07, maxLine]) axis.set_ylim([-0.07, maxLine]) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() sim_data = cPickle.load(open(simDataFile, "rb")) tcsComplexToMonomers = sim_data.process.two_component_system.complexToMonomer ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_twoComponent = sim_data.process.two_component_system.moleculeNames.tolist( ) ids_twoComponent_complexes = sim_data.process.two_component_system.complexToMonomer.keys( ) ids_translation = sim_data.process.translation.monomerData[ "id"].tolist() ids_protein = sorted( set(ids_complexation + ids_equilibrium + ids_twoComponent + ids_translation)) bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = bulkContainer.countsView(ids_complexation) view_complexation_complexes = bulkContainer.countsView( ids_complexation_complexes) view_equilibrium = bulkContainer.countsView(ids_equilibrium) view_equilibrium_complexes = bulkContainer.countsView( ids_equilibrium_complexes) view_twoComponent = bulkContainer.countsView(ids_twoComponent) view_twoComponent_complexes = bulkContainer.countsView( ids_twoComponent_complexes) view_translation = bulkContainer.countsView(ids_translation) proteinPresence = [] for simDir in allDir: simOutDir = os.path.join(simDir, "simOut") bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") proteinIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int) proteinCountsBulk = bulkMolecules.readColumn( "counts")[:, proteinIndexes] bulkMolecules.close() # Account for monomers bulkContainer.countsIs(proteinCountsBulk.mean(axis=0)) # Account for unique molecules uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() bulkContainer.countsInc(nActiveRibosome.mean(), [ sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex ]) bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull]) # Account for two-component complexes view_twoComponent.countsInc( np.dot( sim_data.process.two_component_system.stoichMatrixMonomers( ), view_twoComponent_complexes.counts() * -1)) # Account for small-molecule bound complexes view_equilibrium.countsInc( np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)) # Account for monomers in complexed form view_complexation.countsInc( np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1)) # Get boolean protein presence proteinCounts = view_translation.counts() proteinPresence.append(proteinCounts != 0) # Clear counts bulkContainer.countsIs(0) proteinPresence = np.array(proteinPresence) # Plot fig = plt.figure(figsize=(12, 12)) ax = plt.subplot(1, 1, 1) nGens = len(allDir) ax.hist(np.mean(proteinPresence, axis=0), nGens) ax.set_xlabel( "Frequency of observing at least 1 protein copy in 1 generation", fontsize=14) ax.set_ylabel("Number of proteins", fontsize=14) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): return HIGHLIGHT_GENES = False USE_CACHE = False # value of this boolean may change (see line 50) if not os.path.isdir(seedOutDir): raise Exception, "seedOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Check if cache from figure5B_E_F_G.py exist if os.path.exists(os.path.join(plotOutDir, "figure5B.pickle")): figure5B_data = cPickle.load( open(os.path.join(plotOutDir, "figure5B.pickle"), "rb")) colors = figure5B_data["colors"] mrnaIds = figure5B_data["id"].tolist() else: print "Requires figure5B.pickle from figure5B_E_F_G.py" return # Check if cache exists if os.path.exists( os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName)): USE_CACHE = True # Get all cells ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) allDir = ap.get_cells() # Load sim data sim_data = cPickle.load(open(simDataFile, "rb")) rnaIds = sim_data.process.transcription.rnaData["id"][ sim_data.relation. rnaIndexToMonomerMapping] # orders rna IDs to match monomer IDs # Make views for monomers ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData[ "id"].tolist() ids_protein = sorted( set(ids_complexation + ids_equilibrium + ids_translation)) bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = bulkContainer.countsView(ids_complexation) view_complexation_complexes = bulkContainer.countsView( ids_complexation_complexes) view_equilibrium = bulkContainer.countsView(ids_equilibrium) view_equilibrium_complexes = bulkContainer.countsView( ids_equilibrium_complexes) view_translation = bulkContainer.countsView(ids_translation) # Identify monomers that are subunits for multiple complexes monomersInvolvedInManyComplexes = [] monomersInvolvedInComplexes = [] for complexId in ids_complexation_complexes: subunitIds = sim_data.process.complexation.getMonomers( complexId)["subunitIds"] for subunitId in subunitIds: if subunitId in monomersInvolvedInComplexes: monomersInvolvedInManyComplexes.append(subunitId) monomersInvolvedInComplexes.append(subunitId) monomersInvolvedInManyComplexes_id = list( set(monomersInvolvedInManyComplexes)) monomersInvolvedInManyComplexes_dict = {} for x in monomersInvolvedInManyComplexes_id: monomersInvolvedInManyComplexes_dict[x] = {} USE_CACHE = False if not USE_CACHE: # Get average (over timesteps) counts for All genseration (ie. All cells) avgRnaCounts_forAllCells = np.zeros(rnaIds.shape[0], np.float64) avgProteinCounts_forAllCells = np.zeros(rnaIds.shape[0], np.float64) for i, simDir in enumerate(allDir): simOutDir = os.path.join(simDir, "simOut") # Account for bulk molecules bulkMolecules = TableReader( os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") proteinIndexes = np.array([ moleculeIds.index(moleculeId) for moleculeId in ids_protein ], np.int) proteinCountsBulk = bulkMolecules.readColumn( "counts")[:, proteinIndexes] rnaIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in rnaIds], np.int) avgRnaCounts = bulkMolecules.readColumn( "counts")[:, rnaIndexes].mean(axis=0) bulkMolecules.close() if i == 0: # Skip first few time steps for 1st generation (becaused complexes have not yet formed during these steps) bulkContainer.countsIs( np.mean(proteinCountsBulk[5:, :], axis=0)) else: bulkContainer.countsIs(proteinCountsBulk.mean(axis=0)) # Unique molecules uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() # Account for unique molecules bulkContainer.countsInc(nActiveRibosome.mean(), [ sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex ]) bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsInc( np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)) # Average counts of monomers avgMonomerCounts = view_translation.counts() # Get counts of "functional units" (ie. complexed forms) avgProteinCounts = avgMonomerCounts[:] avgComplexCounts = view_complexation_complexes.counts() for j, complexId in enumerate(ids_complexation_complexes): # Map all subsunits to the average counts of the complex (ignores counts of monomers) # Some subunits are involved in multiple complexes - these cases are kept track subunitIds = sim_data.process.complexation.getMonomers( complexId)["subunitIds"] for subunitId in subunitIds: if subunitId not in ids_translation: if subunitId in monomerToTranslationMonomer: # couple monomers have different ID in ids_translation subunitId = monomerToTranslationMonomer[ subunitId] elif "CPLX" in subunitId: # few transcription factors are complexed with ions subunitId = complexToMonomer[subunitId] elif "RNA" in subunitId: continue if subunitId not in monomersInvolvedInManyComplexes_id: avgProteinCounts[ids_translation.index( subunitId)] = avgComplexCounts[j] else: if complexId not in monomersInvolvedInManyComplexes_dict[ subunitId]: monomersInvolvedInManyComplexes_dict[ subunitId][complexId] = 0. monomersInvolvedInManyComplexes_dict[subunitId][ complexId] += avgComplexCounts[j] # Store avgRnaCounts_forAllCells += avgRnaCounts avgProteinCounts_forAllCells += avgProteinCounts # Cache D = { "rna": avgRnaCounts_forAllCells, "protein": avgProteinCounts_forAllCells, "monomersInManyComplexes": monomersInvolvedInManyComplexes_dict } cPickle.dump( D, open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName), "wb")) else: # Using cached data D = cPickle.load( open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName), "rb")) avgRnaCounts_forAllCells = D["rna"] avgProteinCounts_forAllCells = D["protein"] monomersInvolvedInManyComplexes_dict = D["monomersInManyComplexes"] # Per cell avgRnaCounts_perCell = avgRnaCounts_forAllCells / float(len(allDir)) avgProteinCounts_perCell = avgProteinCounts_forAllCells / float( len(allDir)) # Plot fig, ax = plt.subplots(1, 1, figsize=(10, 10)) for monomer in monomersInvolvedInManyComplexes_id: index = ids_translation.index(monomer) color_index = mrnaIds.index(rnaIds[index]) color = colors[color_index] for complexId in monomersInvolvedInManyComplexes_dict[monomer]: avgComplexCount = monomersInvolvedInManyComplexes_dict[ monomer][complexId] / float(len(allDir)) if avgComplexCount == 0: ax.loglog(avgRnaCounts_perCell[index], 2.5e-6, alpha=0.5, marker=".", lw=0., color=color) else: if avgRnaCounts_perCell[index] == 0: ax.loglog(PLOT_ZEROS_ON_LINE, avgComplexCount, alpha=0.5, marker=".", lw=0., color=color) else: ax.loglog(avgRnaCounts_perCell[index], avgComplexCount, alpha=0.5, marker=".", lw=0., color=color) # plot monomers that are not involved in complexes or involved in only 1 complex monomersInvolvedInManyComplexes_index = [ ids_translation.index(x) for x in monomersInvolvedInManyComplexes_id ] A = [ x for x in xrange(len(ids_translation)) if x not in monomersInvolvedInManyComplexes_index ] for i in A: color = colors[mrnaIds.index(rnaIds[i])] ax.loglog(avgRnaCounts_perCell[i], avgProteinCounts_perCell[i], alpha=0.5, marker=".", lw=0., color=color) # ax.loglog(avgRnaCounts_perCell[A], avgProteinCounts_perCell[A], alpha = 0.5, marker = ".", lw = 0., color = plot_colors) # Plot genes with zero transcripts an arbitrary line noTranscripts_indices = [ x for x in np.where(avgRnaCounts_perCell == 0)[0] if x not in monomersInvolvedInManyComplexes_index ] for i in noTranscripts_indices: color = colors[mrnaIds.index(rnaIds[i])] ax.loglog(PLOT_ZEROS_ON_LINE, avgProteinCounts_perCell[i], alpha=0.5, marker=".", lw=0., color=color) # Highlight if HIGHLIGHT_GENES: rnaIds = rnaIds.tolist() highlights_rnaId = ["EG12437_RNA[c]", "EG12058_RNA[c]"] # menE, ccmB colors = ["g", "r"] for i, rna in enumerate(highlights_rnaId): if avgRnaCounts_perCell[rnaIds.index(rna)] == 0: ax.loglog(PLOT_ZEROS_ON_LINE, avgProteinCounts_perCell[rnaIds.index(rna)], marker='.', lw=0., color=colors[i], ms=15) else: ax.loglog(avgRnaCounts_perCell[rnaIds.index(rna)], avgProteinCounts_perCell[rnaIds.index(rna)], marker='.', lw=0., color=colors[i], ms=15) green_dot = mlines.Line2D([], [], color="green", linewidth=0., marker=".", markersize=15, label="menE") red_dot = mlines.Line2D([], [], color="red", linewidth=0., marker=".", markersize=15, label="ccmB") plt.legend(handles=[green_dot, red_dot], loc="lower right") # ax.hlines(1, ax.get_xlim()[0], ax.get_xlim()[1], linestyle = "--") ax.hlines(9786.77, ax.get_xlim()[0], ax.get_xlim()[1], linestyle="--") ax.set_title( "Each (translatable) gene's functional unit is represented as a point\n(ie. x points per gene where x == number of complexes the monomer is involved in)\n(avg across %s generations)" % len(allDir)) ax.set_xlabel("<RNA> per cell") ax.set_ylabel("<Functional units (protein)> per cell") ax.tick_params(which="both", direction="out") plt.subplots_adjust(hspace=0.5, wspace=0.5, left=0.1, bottom=0.1, top=0.9, right=0.95) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def createContainer(): container = BulkObjectsContainer(OBJECT_NAMES) container.countsIs(OBJECT_COUNTS) return container
def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(simOutDir): raise Exception, "simOutDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) # Get the names of proteins from the KB sim_data = cPickle.load(open(simDataFile, "rb")) ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData[ "id"].tolist() ids_protein = sorted( set(ids_complexation + ids_equilibrium + ids_translation)) bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = bulkContainer.countsView(ids_complexation) view_complexation_complexes = bulkContainer.countsView( ids_complexation_complexes) view_equilibrium = bulkContainer.countsView(ids_equilibrium) view_equilibrium_complexes = bulkContainer.countsView( ids_equilibrium_complexes) view_translation = bulkContainer.countsView(ids_translation) bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") proteinIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int) proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes] bulkMolecules.close() # Account for monomers bulkContainer.countsIs(proteinCountsBulk.mean(axis=0)) # Account for unique molecules uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() bulkContainer.countsInc(nActiveRibosome.mean(), [ sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex ]) bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsInc( np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)) # Account for monomers in complexed form view_complexation.countsInc( np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1)) avgCounts = view_translation.counts() relativeCounts = avgCounts / avgCounts.sum() expectedCountsArbitrary = normalize( sim_data.process.transcription.rnaExpression[sim_data.condition][ sim_data.relation.rnaIndexToMonomerMapping] * sim_data.process.translation.translationEfficienciesByMonomer / (np.log(2) / sim_data.doubling_time.asNumber(units.s) + sim_data.process.translation.monomerData["degRate"].asNumber( 1 / units.s))) expectedCountsRelative = expectedCountsArbitrary / expectedCountsArbitrary.sum( ) plt.figure(figsize=(8.5, 11)) maxLine = 1.1 * max(np.log10(expectedCountsRelative.max() + 1), np.log10(relativeCounts.max() + 1)) plt.plot([0, maxLine], [0, maxLine], '--r') plt.plot(np.log10(expectedCountsRelative + 1), np.log10(relativeCounts + 1), 'o', markeredgecolor='k', markerfacecolor='none') plt.xlabel("log10(Expected protein distribution (from fitting))") plt.ylabel( "log10(Actual protein distribution (average over life cycle))") plt.title("PCC (of log values): %0.2f" % pearsonr(np.log10(expectedCountsRelative + 1), np.log10(relativeCounts + 1))[0]) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def getPCC((variant, ap, monomerIds, schmidtCounts)): try: simDir = ap.get_cells(variant=[variant])[0] sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb")) ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData[ "id"].tolist() ids_protein = sorted( set(ids_complexation + ids_equilibrium + ids_translation)) bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = bulkContainer.countsView(ids_complexation) view_complexation_complexes = bulkContainer.countsView( ids_complexation_complexes) view_equilibrium = bulkContainer.countsView(ids_equilibrium) view_equilibrium_complexes = bulkContainer.countsView( ids_equilibrium_complexes) view_translation = bulkContainer.countsView(ids_translation) view_validation_schmidt = bulkContainer.countsView(monomerIds) simOutDir = os.path.join(simDir, "simOut") bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules")) moleculeIds = bulkMolecules.readAttribute("objectNames") proteinIndexes = np.array( [moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int) proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes] bulkMolecules.close() # Account for monomers bulkContainer.countsIs(proteinCountsBulk.mean(axis=0)) # Account for unique molecules uniqueMoleculeCounts = TableReader( os.path.join(simOutDir, "UniqueMoleculeCounts")) ribosomeIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRibosome") rnaPolyIndex = uniqueMoleculeCounts.readAttribute( "uniqueMoleculeIds").index("activeRnaPoly") nActiveRibosome = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, ribosomeIndex] nActiveRnaPoly = uniqueMoleculeCounts.readColumn( "uniqueMoleculeCounts")[:, rnaPolyIndex] uniqueMoleculeCounts.close() bulkContainer.countsInc(nActiveRibosome.mean(), [ sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex ]) bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsInc( np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)) # Account for monomers in complexed form view_complexation.countsInc( np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1)) pcc, pval = pearsonr(np.log10(view_validation_schmidt.counts() + 1), np.log10(schmidtCounts + 1)) return pcc, pval except Exception as e: print e return np.nan, np.nan