def test_dtype_float32(self):
		"""A BulkObjectsContainer with dtype=np.float32 should support
		fractional counts and deltas.
		"""
		container = BulkObjectsContainer(OBJECT_NAMES, dtype=np.float32)
		initialCounts = [10, 10.5, 20]
		container.countsIs(initialCounts)
		npt.assert_equal(container.counts(), initialCounts)

		incCounts = [10, 20.5, 30.5]
		newCounts = [20, 31, 50.5]
		container.countsInc(incCounts)
		npt.assert_equal(container.counts(), newCounts)

		decCounts = [1.5, 2, 3.5]
		newCounts = [18.5, 29, 47]
		container.countsDec(decCounts)
		npt.assert_equal(container.counts(), newCounts)

		countsView = container.countsView()
		newCounts = [28.5, 49.5, 77.5]
		countsView.countsInc(incCounts)
		npt.assert_equal(countsView.counts(), newCounts)

		newCounts = [27, 47.5, 74]
		countsView.countsDec(decCounts)
		npt.assert_equal(countsView.counts(), newCounts)
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, 'inputDir does not currently exist as a directory'

		filepath.makedirs(plotOutDir)

		with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f:
			sim_data = cPickle.load(f)
		with open(validationDataFile, 'rb') as f:
			validation_data = cPickle.load(f)

		ap = AnalysisPaths(inputDir, variant_plot=True)
		variants = ap.get_variants()
		expected_n_variants = 2
		n_variants = len(variants)

		if n_variants < expected_n_variants:
			print('This plot only runs for {} variants.'.format(expected_n_variants))
			return

		# IDs for appropriate proteins
		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData['id'].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))

		# Stoichiometry matrices
		equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers()
		complex_stoich = sim_data.process.complexation.stoichMatrixMonomers()

		# Protein container views
		protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64)
		view_complexation = protein_container.countsView(ids_complexation)
		view_complexation_complexes = protein_container.countsView(ids_complexation_complexes)
		view_equilibrium = protein_container.countsView(ids_equilibrium)
		view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes)

		# Load model data
		model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants))
		for i, variant in enumerate(variants):
			if i >= expected_n_variants:
				print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants))
				continue

			variant_counts = []
			for sim_dir in ap.get_cells(variant=[variant]):
				simOutDir = os.path.join(sim_dir, 'simOut')

				# Listeners used
				unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts'))

				# Account for bulk molecules
				(bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein)
				protein_container.countsIs(bulk_counts.mean(axis=0))

				# Account for unique molecules
				ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome')
				rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly')
				n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index]
				n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index]
				protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
				protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull])

				# Account for small-molecule bound complexes
				view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts()))

				# Account for monomers in complexed form
				view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts()))

				variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts())
			model_counts[:, i] = np.mean(variant_counts, axis=0)
			model_std[:, i] = np.std(variant_counts, axis=0)

		# Validation data
		schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])}
		schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts']
		validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE])

		# Process data
		model_log_counts = np.log10(model_counts)
		model_log_lower_std = model_log_counts - np.log10(model_counts - model_std)
		model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts
		validation_log_counts = np.log10(validation_counts)
		r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0])
		r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1])

		# Scatter plot of model vs validation counts
		max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max()))
		limits = [0, max_counts]
		plt.figure()
		colors = plt.rcParams['axes.prop_cycle'].by_key()['color']

		## Plot data
		for i in range(expected_n_variants):
			plt.errorbar(validation_log_counts, model_log_counts[:, i],
				yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])),
				fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5)
		plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_')

		## Format axes
		plt.xlabel('Validation Counts\n(log10(counts))')
		plt.ylabel('Average Simulation Counts\n(log10(counts))')
		ax = plt.gca()
		ax.spines['right'].set_visible(False)
		ax.spines['top'].set_visible(False)
		ax.spines['left'].set_position(('outward', 10))
		ax.spines['bottom'].set_position(('outward', 10))
		ax.xaxis.set_major_locator(MaxNLocator(integer=True))
		ax.yaxis.set_major_locator(MaxNLocator(integer=True))

		## Add legend
		legend_text = [
			'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]),
			'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]),
			]
		plt.legend(legend_text, frameon=False)

		plt.tight_layout()
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)

		plt.close('all')
示例#3
0
	def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(seedOutDir):
			raise Exception, "seedOutDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		sim_data = cPickle.load(open(simDataFile, "rb"))
		validation_data = cPickle.load(open(validationDataFile, "rb"))

		ids_complexation = sim_data.process.complexation.moleculeNames
		ids_complexation_complexes = sim_data.process.complexation.ids_complexes
		ids_equilibrium = sim_data.process.equilibrium.moleculeNames
		ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
		ids_translation = sim_data.process.translation.monomerData["id"].tolist()
		ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation))
		bulkContainer = BulkObjectsContainer(ids_protein, dtype = np.float64)
		view_complexation = bulkContainer.countsView(ids_complexation)
		view_complexation_complexes = bulkContainer.countsView(ids_complexation_complexes)
		view_equilibrium = bulkContainer.countsView(ids_equilibrium)
		view_equilibrium_complexes = bulkContainer.countsView(ids_equilibrium_complexes)
		view_translation = bulkContainer.countsView(ids_translation)
		view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist())

		# Get all cells
		ap = AnalysisPaths(seedOutDir, multi_gen_plot = True)

		allDir = ap.get_cells()

		View_Validation_Schmidt = []

		fig = plt.figure(figsize = (4, 4))

		for simDir in allDir:
			# print simDir

			simOutDir = os.path.join(simDir, "simOut")

			bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
			moleculeIds = bulkMolecules.readAttribute("objectNames")
			proteinIndexes = np.array([moleculeIds.index(moleculeId) for moleculeId in ids_protein], np.int)
			proteinCountsBulk = bulkMolecules.readColumn("counts")[:, proteinIndexes]
			bulkMolecules.close()

			# Account for monomers
			bulkContainer.countsIs(proteinCountsBulk.mean(axis = 0))

			# Account for unique molecules
			uniqueMoleculeCounts = TableReader(os.path.join(simOutDir, "UniqueMoleculeCounts"))
			ribosomeIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRibosome")
			rnaPolyIndex = uniqueMoleculeCounts.readAttribute("uniqueMoleculeIds").index("activeRnaPoly")
			nActiveRibosome = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, ribosomeIndex]
			nActiveRnaPoly = uniqueMoleculeCounts.readColumn("uniqueMoleculeCounts")[:, rnaPolyIndex]
			uniqueMoleculeCounts.close()
			bulkContainer.countsInc(nActiveRibosome.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex])
			bulkContainer.countsInc(nActiveRnaPoly.mean(), [sim_data.moleculeIds.rnapFull])

			# Account for small-molecule bound complexes
			view_equilibrium.countsInc(
				np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(), view_equilibrium_complexes.counts() * -1)
				)

			# Account for monomers in complexed form
			view_complexation.countsInc(
				np.dot(sim_data.process.complexation.stoichMatrixMonomers(), view_complexation_complexes.counts() * -1)
				)

			view_validation_schmidt = bulkContainer.countsView(validation_data.protein.schmidt2015Data["monomerId"].tolist())
			View_Validation_Schmidt.append(view_validation_schmidt.counts())

		simulation_counts = (np.array(View_Validation_Schmidt)).mean(axis = 0)

		# Schmidt Counts
		schmidtLabels = validation_data.protein.schmidt2015Data["monomerId"]
		schmidt_counts = validation_data.protein.schmidt2015Data["glucoseCounts"]

		# Set up mask for proteins with low counts
		low_count_mask = schmidt_counts < LOW_COUNT_THRESHOLD
		n_low_count = low_count_mask.sum()
		n_high_count = schmidt_counts.size - n_low_count
		
		# Take logs
		schmidt_counts_log = np.log10(schmidt_counts + 1)
		simulation_counts_log = np.log10(simulation_counts + 1)

		# Compute deviations
		deviation_log = np.log10(np.abs(simulation_counts - schmidt_counts))

		axis = plt.subplot(1,1,1)

		axis.plot(schmidt_counts_log, simulation_counts_log, 'o', color = "black", markersize = 6, alpha = 0.1, zorder = 1, markeredgewidth = 0.0)
		print("R^2 (all proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log, schmidt_counts_log)[0])**2,
			schmidt_counts.size
			))
		print("R^2 (low-abundance proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log[low_count_mask],
				schmidt_counts_log[low_count_mask])[0])**2,
			n_low_count
			))
		print("R^2 (high-abundance proteins) = %.3f (n = %d)" % (
			(pearsonr(simulation_counts_log[~low_count_mask],
				schmidt_counts_log[~low_count_mask])[0])**2,
			n_high_count
			))
		
		print("Average log deviation (low-abundance proteins) = %.3f" % (
			deviation_log[low_count_mask].mean()))
		print("Average log deviation (high-abundance proteins) = %.3f" % (
			deviation_log[~low_count_mask].mean()))

		maxLine = np.ceil(
			max(schmidt_counts_log.max(), simulation_counts_log.max())
			)
		plt.plot([0, maxLine], [0, maxLine], '-k')

		plt.xlim(xmin=0, xmax=maxLine)
		plt.ylim(ymin=0, ymax=maxLine)

		axis.spines["right"].set_visible(False)
		axis.spines["top"].set_visible(False)
		axis.spines["left"].set_position(("outward", 10))
		axis.spines["bottom"].set_position(("outward", 10))
		axis.tick_params(right = "off")
		axis.tick_params(top = "off")
		axis.tick_params(which = "both", direction = "out")

		axis.set_xlim([-0.07, maxLine])
		axis.set_ylim([-0.07, maxLine])

		exportFigure(plt, plotOutDir, plotOutFileName, metadata)
		plt.close("all")
    def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(seedOutDir):
            raise Exception, "seedOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Get all cells
        ap = AnalysisPaths(seedOutDir, multi_gen_plot=True)
        allDir = ap.get_cells()

        sim_data = cPickle.load(open(simDataFile, "rb"))
        tcsComplexToMonomers = sim_data.process.two_component_system.complexToMonomer
        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_twoComponent = sim_data.process.two_component_system.moleculeNames.tolist(
        )
        ids_twoComponent_complexes = sim_data.process.two_component_system.complexToMonomer.keys(
        )
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_twoComponent +
                ids_translation))

        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_twoComponent = bulkContainer.countsView(ids_twoComponent)
        view_twoComponent_complexes = bulkContainer.countsView(
            ids_twoComponent_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        proteinPresence = []
        for simDir in allDir:
            simOutDir = os.path.join(simDir, "simOut")
            bulkMolecules = TableReader(
                os.path.join(simOutDir, "BulkMolecules"))
            moleculeIds = bulkMolecules.readAttribute("objectNames")
            proteinIndexes = np.array(
                [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
                np.int)
            proteinCountsBulk = bulkMolecules.readColumn(
                "counts")[:, proteinIndexes]
            bulkMolecules.close()

            # Account for monomers
            bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

            # Account for unique molecules
            uniqueMoleculeCounts = TableReader(
                os.path.join(simOutDir, "UniqueMoleculeCounts"))
            ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRibosome")
            rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
                "uniqueMoleculeIds").index("activeRnaPoly")
            nActiveRibosome = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, ribosomeIndex]
            nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
                "uniqueMoleculeCounts")[:, rnaPolyIndex]
            uniqueMoleculeCounts.close()
            bulkContainer.countsInc(nActiveRibosome.mean(), [
                sim_data.moleculeIds.s30_fullComplex,
                sim_data.moleculeIds.s50_fullComplex
            ])
            bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                    [sim_data.moleculeIds.rnapFull])

            # Account for two-component complexes
            view_twoComponent.countsInc(
                np.dot(
                    sim_data.process.two_component_system.stoichMatrixMonomers(
                    ),
                    view_twoComponent_complexes.counts() * -1))

            # Account for small-molecule bound complexes
            view_equilibrium.countsInc(
                np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                       view_equilibrium_complexes.counts() * -1))

            # Account for monomers in complexed form
            view_complexation.countsInc(
                np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                       view_complexation_complexes.counts() * -1))

            # Get boolean protein presence
            proteinCounts = view_translation.counts()
            proteinPresence.append(proteinCounts != 0)

            # Clear counts
            bulkContainer.countsIs(0)

        proteinPresence = np.array(proteinPresence)

        # Plot
        fig = plt.figure(figsize=(12, 12))
        ax = plt.subplot(1, 1, 1)
        nGens = len(allDir)
        ax.hist(np.mean(proteinPresence, axis=0), nGens)
        ax.set_xlabel(
            "Frequency of observing at least 1 protein copy in 1 generation",
            fontsize=14)
        ax.set_ylabel("Number of proteins", fontsize=14)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
    def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        return

        HIGHLIGHT_GENES = False
        USE_CACHE = False  # value of this boolean may change (see line 50)

        if not os.path.isdir(seedOutDir):
            raise Exception, "seedOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Check if cache from figure5B_E_F_G.py exist
        if os.path.exists(os.path.join(plotOutDir, "figure5B.pickle")):
            figure5B_data = cPickle.load(
                open(os.path.join(plotOutDir, "figure5B.pickle"), "rb"))
            colors = figure5B_data["colors"]
            mrnaIds = figure5B_data["id"].tolist()
        else:
            print "Requires figure5B.pickle from figure5B_E_F_G.py"
            return

        # Check if cache exists
        if os.path.exists(
                os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName)):
            USE_CACHE = True

        # Get all cells
        ap = AnalysisPaths(seedOutDir, multi_gen_plot=True)
        allDir = ap.get_cells()

        # Load sim data
        sim_data = cPickle.load(open(simDataFile, "rb"))
        rnaIds = sim_data.process.transcription.rnaData["id"][
            sim_data.relation.
            rnaIndexToMonomerMapping]  # orders rna IDs to match monomer IDs

        # Make views for monomers
        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))
        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        # Identify monomers that are subunits for multiple complexes
        monomersInvolvedInManyComplexes = []
        monomersInvolvedInComplexes = []
        for complexId in ids_complexation_complexes:
            subunitIds = sim_data.process.complexation.getMonomers(
                complexId)["subunitIds"]
            for subunitId in subunitIds:
                if subunitId in monomersInvolvedInComplexes:
                    monomersInvolvedInManyComplexes.append(subunitId)
                monomersInvolvedInComplexes.append(subunitId)
        monomersInvolvedInManyComplexes_id = list(
            set(monomersInvolvedInManyComplexes))
        monomersInvolvedInManyComplexes_dict = {}
        for x in monomersInvolvedInManyComplexes_id:
            monomersInvolvedInManyComplexes_dict[x] = {}
        USE_CACHE = False
        if not USE_CACHE:
            # Get average (over timesteps) counts for All genseration (ie. All cells)
            avgRnaCounts_forAllCells = np.zeros(rnaIds.shape[0], np.float64)
            avgProteinCounts_forAllCells = np.zeros(rnaIds.shape[0],
                                                    np.float64)
            for i, simDir in enumerate(allDir):
                simOutDir = os.path.join(simDir, "simOut")

                # Account for bulk molecules
                bulkMolecules = TableReader(
                    os.path.join(simOutDir, "BulkMolecules"))
                moleculeIds = bulkMolecules.readAttribute("objectNames")
                proteinIndexes = np.array([
                    moleculeIds.index(moleculeId) for moleculeId in ids_protein
                ], np.int)
                proteinCountsBulk = bulkMolecules.readColumn(
                    "counts")[:, proteinIndexes]
                rnaIndexes = np.array(
                    [moleculeIds.index(moleculeId) for moleculeId in rnaIds],
                    np.int)
                avgRnaCounts = bulkMolecules.readColumn(
                    "counts")[:, rnaIndexes].mean(axis=0)
                bulkMolecules.close()
                if i == 0:
                    # Skip first few time steps for 1st generation (becaused complexes have not yet formed during these steps)
                    bulkContainer.countsIs(
                        np.mean(proteinCountsBulk[5:, :], axis=0))
                else:
                    bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

                # Unique molecules
                uniqueMoleculeCounts = TableReader(
                    os.path.join(simOutDir, "UniqueMoleculeCounts"))
                ribosomeIndex = uniqueMoleculeCounts.readAttribute(
                    "uniqueMoleculeIds").index("activeRibosome")
                rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
                    "uniqueMoleculeIds").index("activeRnaPoly")
                nActiveRibosome = uniqueMoleculeCounts.readColumn(
                    "uniqueMoleculeCounts")[:, ribosomeIndex]
                nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
                    "uniqueMoleculeCounts")[:, rnaPolyIndex]
                uniqueMoleculeCounts.close()

                # Account for unique molecules
                bulkContainer.countsInc(nActiveRibosome.mean(), [
                    sim_data.moleculeIds.s30_fullComplex,
                    sim_data.moleculeIds.s50_fullComplex
                ])
                bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                        [sim_data.moleculeIds.rnapFull])

                # Account for small-molecule bound complexes
                view_equilibrium.countsInc(
                    np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                           view_equilibrium_complexes.counts() * -1))

                # Average counts of monomers
                avgMonomerCounts = view_translation.counts()

                # Get counts of "functional units" (ie. complexed forms)
                avgProteinCounts = avgMonomerCounts[:]
                avgComplexCounts = view_complexation_complexes.counts()

                for j, complexId in enumerate(ids_complexation_complexes):
                    # Map all subsunits to the average counts of the complex (ignores counts of monomers)
                    # Some subunits are involved in multiple complexes - these cases are kept track
                    subunitIds = sim_data.process.complexation.getMonomers(
                        complexId)["subunitIds"]

                    for subunitId in subunitIds:
                        if subunitId not in ids_translation:
                            if subunitId in monomerToTranslationMonomer:
                                # couple monomers have different ID in ids_translation
                                subunitId = monomerToTranslationMonomer[
                                    subunitId]
                            elif "CPLX" in subunitId:
                                # few transcription factors are complexed with ions
                                subunitId = complexToMonomer[subunitId]
                            elif "RNA" in subunitId:
                                continue

                        if subunitId not in monomersInvolvedInManyComplexes_id:
                            avgProteinCounts[ids_translation.index(
                                subunitId)] = avgComplexCounts[j]
                        else:
                            if complexId not in monomersInvolvedInManyComplexes_dict[
                                    subunitId]:
                                monomersInvolvedInManyComplexes_dict[
                                    subunitId][complexId] = 0.
                            monomersInvolvedInManyComplexes_dict[subunitId][
                                complexId] += avgComplexCounts[j]

                # Store
                avgRnaCounts_forAllCells += avgRnaCounts
                avgProteinCounts_forAllCells += avgProteinCounts

            # Cache
            D = {
                "rna": avgRnaCounts_forAllCells,
                "protein": avgProteinCounts_forAllCells,
                "monomersInManyComplexes": monomersInvolvedInManyComplexes_dict
            }
            cPickle.dump(
                D,
                open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName),
                     "wb"))

        else:
            # Using cached data
            D = cPickle.load(
                open(os.path.join(plotOutDir, "%s.cPickle" % plotOutFileName),
                     "rb"))
            avgRnaCounts_forAllCells = D["rna"]
            avgProteinCounts_forAllCells = D["protein"]
            monomersInvolvedInManyComplexes_dict = D["monomersInManyComplexes"]

        # Per cell
        avgRnaCounts_perCell = avgRnaCounts_forAllCells / float(len(allDir))
        avgProteinCounts_perCell = avgProteinCounts_forAllCells / float(
            len(allDir))

        # Plot
        fig, ax = plt.subplots(1, 1, figsize=(10, 10))

        for monomer in monomersInvolvedInManyComplexes_id:
            index = ids_translation.index(monomer)
            color_index = mrnaIds.index(rnaIds[index])
            color = colors[color_index]

            for complexId in monomersInvolvedInManyComplexes_dict[monomer]:
                avgComplexCount = monomersInvolvedInManyComplexes_dict[
                    monomer][complexId] / float(len(allDir))

                if avgComplexCount == 0:
                    ax.loglog(avgRnaCounts_perCell[index],
                              2.5e-6,
                              alpha=0.5,
                              marker=".",
                              lw=0.,
                              color=color)

                else:
                    if avgRnaCounts_perCell[index] == 0:
                        ax.loglog(PLOT_ZEROS_ON_LINE,
                                  avgComplexCount,
                                  alpha=0.5,
                                  marker=".",
                                  lw=0.,
                                  color=color)
                    else:
                        ax.loglog(avgRnaCounts_perCell[index],
                                  avgComplexCount,
                                  alpha=0.5,
                                  marker=".",
                                  lw=0.,
                                  color=color)

        # plot monomers that are not involved in complexes or involved in only 1 complex
        monomersInvolvedInManyComplexes_index = [
            ids_translation.index(x)
            for x in monomersInvolvedInManyComplexes_id
        ]
        A = [
            x for x in xrange(len(ids_translation))
            if x not in monomersInvolvedInManyComplexes_index
        ]
        for i in A:
            color = colors[mrnaIds.index(rnaIds[i])]
            ax.loglog(avgRnaCounts_perCell[i],
                      avgProteinCounts_perCell[i],
                      alpha=0.5,
                      marker=".",
                      lw=0.,
                      color=color)
        # ax.loglog(avgRnaCounts_perCell[A], avgProteinCounts_perCell[A], alpha = 0.5, marker = ".", lw = 0., color = plot_colors)

        # Plot genes with zero transcripts an arbitrary line
        noTranscripts_indices = [
            x for x in np.where(avgRnaCounts_perCell == 0)[0]
            if x not in monomersInvolvedInManyComplexes_index
        ]
        for i in noTranscripts_indices:
            color = colors[mrnaIds.index(rnaIds[i])]
            ax.loglog(PLOT_ZEROS_ON_LINE,
                      avgProteinCounts_perCell[i],
                      alpha=0.5,
                      marker=".",
                      lw=0.,
                      color=color)

        # Highlight
        if HIGHLIGHT_GENES:
            rnaIds = rnaIds.tolist()
            highlights_rnaId = ["EG12437_RNA[c]",
                                "EG12058_RNA[c]"]  # menE, ccmB
            colors = ["g", "r"]
            for i, rna in enumerate(highlights_rnaId):
                if avgRnaCounts_perCell[rnaIds.index(rna)] == 0:
                    ax.loglog(PLOT_ZEROS_ON_LINE,
                              avgProteinCounts_perCell[rnaIds.index(rna)],
                              marker='.',
                              lw=0.,
                              color=colors[i],
                              ms=15)
                else:
                    ax.loglog(avgRnaCounts_perCell[rnaIds.index(rna)],
                              avgProteinCounts_perCell[rnaIds.index(rna)],
                              marker='.',
                              lw=0.,
                              color=colors[i],
                              ms=15)

            green_dot = mlines.Line2D([], [],
                                      color="green",
                                      linewidth=0.,
                                      marker=".",
                                      markersize=15,
                                      label="menE")
            red_dot = mlines.Line2D([], [],
                                    color="red",
                                    linewidth=0.,
                                    marker=".",
                                    markersize=15,
                                    label="ccmB")
            plt.legend(handles=[green_dot, red_dot], loc="lower right")

        # ax.hlines(1, ax.get_xlim()[0], ax.get_xlim()[1], linestyle = "--")
        ax.hlines(9786.77, ax.get_xlim()[0], ax.get_xlim()[1], linestyle="--")

        ax.set_title(
            "Each (translatable) gene's functional unit is represented as a point\n(ie. x points per gene where x == number of complexes the monomer is involved in)\n(avg across %s generations)"
            % len(allDir))
        ax.set_xlabel("<RNA> per cell")
        ax.set_ylabel("<Functional units (protein)> per cell")
        ax.tick_params(which="both", direction="out")

        plt.subplots_adjust(hspace=0.5,
                            wspace=0.5,
                            left=0.1,
                            bottom=0.1,
                            top=0.9,
                            right=0.95)

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
示例#6
0
    def do_plot(self, simOutDir, plotOutDir, plotOutFileName, simDataFile,
                validationDataFile, metadata):
        if not os.path.isdir(simOutDir):
            raise Exception, "simOutDir does not currently exist as a directory"

        if not os.path.exists(plotOutDir):
            os.mkdir(plotOutDir)

        # Get the names of proteins from the KB

        sim_data = cPickle.load(open(simDataFile, "rb"))

        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))
        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)

        bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
        moleculeIds = bulkMolecules.readAttribute("objectNames")
        proteinIndexes = np.array(
            [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
            np.int)
        proteinCountsBulk = bulkMolecules.readColumn("counts")[:,
                                                               proteinIndexes]
        bulkMolecules.close()

        # Account for monomers
        bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

        # Account for unique molecules
        uniqueMoleculeCounts = TableReader(
            os.path.join(simOutDir, "UniqueMoleculeCounts"))
        ribosomeIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRibosome")
        rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRnaPoly")
        nActiveRibosome = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, ribosomeIndex]
        nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, rnaPolyIndex]
        uniqueMoleculeCounts.close()
        bulkContainer.countsInc(nActiveRibosome.mean(), [
            sim_data.moleculeIds.s30_fullComplex,
            sim_data.moleculeIds.s50_fullComplex
        ])
        bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                [sim_data.moleculeIds.rnapFull])

        # Account for small-molecule bound complexes
        view_equilibrium.countsInc(
            np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                   view_equilibrium_complexes.counts() * -1))

        # Account for monomers in complexed form
        view_complexation.countsInc(
            np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                   view_complexation_complexes.counts() * -1))

        avgCounts = view_translation.counts()

        relativeCounts = avgCounts / avgCounts.sum()

        expectedCountsArbitrary = normalize(
            sim_data.process.transcription.rnaExpression[sim_data.condition][
                sim_data.relation.rnaIndexToMonomerMapping] *
            sim_data.process.translation.translationEfficienciesByMonomer /
            (np.log(2) / sim_data.doubling_time.asNumber(units.s) +
             sim_data.process.translation.monomerData["degRate"].asNumber(
                 1 / units.s)))

        expectedCountsRelative = expectedCountsArbitrary / expectedCountsArbitrary.sum(
        )

        plt.figure(figsize=(8.5, 11))

        maxLine = 1.1 * max(np.log10(expectedCountsRelative.max() + 1),
                            np.log10(relativeCounts.max() + 1))
        plt.plot([0, maxLine], [0, maxLine], '--r')
        plt.plot(np.log10(expectedCountsRelative + 1),
                 np.log10(relativeCounts + 1),
                 'o',
                 markeredgecolor='k',
                 markerfacecolor='none')

        plt.xlabel("log10(Expected protein distribution (from fitting))")
        plt.ylabel(
            "log10(Actual protein distribution (average over life cycle))")
        plt.title("PCC (of log values): %0.2f" %
                  pearsonr(np.log10(expectedCountsRelative + 1),
                           np.log10(relativeCounts + 1))[0])

        exportFigure(plt, plotOutDir, plotOutFileName, metadata)
        plt.close("all")
示例#7
0
def getPCC((variant, ap, monomerIds, schmidtCounts)):
    try:
        simDir = ap.get_cells(variant=[variant])[0]

        sim_data = cPickle.load(open(ap.get_variant_kb(variant), "rb"))

        ids_complexation = sim_data.process.complexation.moleculeNames
        ids_complexation_complexes = sim_data.process.complexation.ids_complexes
        ids_equilibrium = sim_data.process.equilibrium.moleculeNames
        ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes
        ids_translation = sim_data.process.translation.monomerData[
            "id"].tolist()
        ids_protein = sorted(
            set(ids_complexation + ids_equilibrium + ids_translation))

        bulkContainer = BulkObjectsContainer(ids_protein, dtype=np.float64)
        view_complexation = bulkContainer.countsView(ids_complexation)
        view_complexation_complexes = bulkContainer.countsView(
            ids_complexation_complexes)
        view_equilibrium = bulkContainer.countsView(ids_equilibrium)
        view_equilibrium_complexes = bulkContainer.countsView(
            ids_equilibrium_complexes)
        view_translation = bulkContainer.countsView(ids_translation)
        view_validation_schmidt = bulkContainer.countsView(monomerIds)

        simOutDir = os.path.join(simDir, "simOut")

        bulkMolecules = TableReader(os.path.join(simOutDir, "BulkMolecules"))
        moleculeIds = bulkMolecules.readAttribute("objectNames")
        proteinIndexes = np.array(
            [moleculeIds.index(moleculeId) for moleculeId in ids_protein],
            np.int)
        proteinCountsBulk = bulkMolecules.readColumn("counts")[:,
                                                               proteinIndexes]
        bulkMolecules.close()

        # Account for monomers
        bulkContainer.countsIs(proteinCountsBulk.mean(axis=0))

        # Account for unique molecules
        uniqueMoleculeCounts = TableReader(
            os.path.join(simOutDir, "UniqueMoleculeCounts"))
        ribosomeIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRibosome")
        rnaPolyIndex = uniqueMoleculeCounts.readAttribute(
            "uniqueMoleculeIds").index("activeRnaPoly")
        nActiveRibosome = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, ribosomeIndex]
        nActiveRnaPoly = uniqueMoleculeCounts.readColumn(
            "uniqueMoleculeCounts")[:, rnaPolyIndex]
        uniqueMoleculeCounts.close()
        bulkContainer.countsInc(nActiveRibosome.mean(), [
            sim_data.moleculeIds.s30_fullComplex,
            sim_data.moleculeIds.s50_fullComplex
        ])
        bulkContainer.countsInc(nActiveRnaPoly.mean(),
                                [sim_data.moleculeIds.rnapFull])

        # Account for small-molecule bound complexes
        view_equilibrium.countsInc(
            np.dot(sim_data.process.equilibrium.stoichMatrixMonomers(),
                   view_equilibrium_complexes.counts() * -1))

        # Account for monomers in complexed form
        view_complexation.countsInc(
            np.dot(sim_data.process.complexation.stoichMatrixMonomers(),
                   view_complexation_complexes.counts() * -1))

        pcc, pval = pearsonr(np.log10(view_validation_schmidt.counts() + 1),
                             np.log10(schmidtCounts + 1))

        return pcc, pval

    except Exception as e:
        print e
        return np.nan, np.nan