def run_task(self, fw_spec):

        startTime = time.time()
        print "\n%s: Running multiple generation analysis" % time.ctime(
            startTime)

        fileList = self.get("plots_to_run", [])
        if not fileList:
            fileList = models.ecoli.analysis.multigen.ACTIVE

        output_filename_prefix = self.get('output_filename_prefix', '')

        if "WC_ANALYZE_FAST" in os.environ:
            pool = mp.Pool(processes=parallelization.cpus())
            results = {}

        exceptionFileList = []
        for f in fileList:
            mod = importlib.import_module("models.ecoli.analysis.multigen." +
                                          f[:-3])
            args = (
                self["input_seed_directory"],
                self["output_plots_directory"],
                output_filename_prefix + f[:-3],
                self["input_sim_data"],
                self["input_validation_data"],
                self["metadata"],
            )

            if "WC_ANALYZE_FAST" in os.environ:
                results[f] = pool.apply_async(run_plot,
                                              args=(mod.Plot, args, f))
            else:
                print "%s: Running %s" % (time.ctime(), f)
                try:
                    mod.Plot.main(*args)
                except Exception:
                    traceback.print_exc()
                    exceptionFileList += [f]

        if "WC_ANALYZE_FAST" in os.environ:
            pool.close()
            pool.join()
            for f, result in results.items():
                if not result.successful():
                    exceptionFileList += [f]

        timeTotal = time.time() - startTime

        if exceptionFileList:
            print "Completed multiple generation analysis in %s with an exception in:" % (
                time.strftime("%H:%M:%S", time.gmtime(timeTotal)))
            for file in exceptionFileList:
                print "\t%s" % file
            raise Exception("Error in multigen analysis")
        else:
            print "Completed multiple generation analysis in %s" % (
                time.strftime("%H:%M:%S", time.gmtime(timeTotal)))
Пример #2
0
	def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(variantDir):
			raise Exception, 'variantDir does not currently exist as a directory'

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		analysis_paths = AnalysisPaths(variantDir, cohort_plot = True)
		n_gens = analysis_paths.n_generation

		# Check for sufficient generations
		if n_gens - 1 < FIRST_GENERATION:
			print 'Not enough generations to plot.'
			return

		sim_dirs = analysis_paths.get_cells(
			generation=range(FIRST_GENERATION, n_gens), seed = range(8))

		sim_data = cPickle.load(open(simDataFile, 'rb'))

		global ribosome_30s_id
		global ribosome_50s_id

		ribosome_30s_id = sim_data.moleculeIds.s30_fullComplex
		ribosome_50s_id = sim_data.moleculeIds.s50_fullComplex

		p = Pool(parallelization.cpus())
		output = p.map(mp_worker, sim_dirs)
		p.close()
		p.join()

		# Filter output from broken files
		ribosome_counts = [x for x in output if x]

		if not len(ribosome_counts):
			print('Skipping plot due to no viable sims.')
			return

		# Plot
		doubling_time = sim_data.conditionToDoublingTime[sim_data.condition].asNumber(units.min)
		params = interpolate.splrep(
			RIBO_VALIDATION['doubling_time'],
			RIBO_VALIDATION['ribosome_abundance'])
		ribosome_abundance_fit = interpolate.splev(doubling_time, params)

		fig, ax = plt.subplots(1, 1, figsize=FIGSIZE)
		ax.violinplot(ribosome_counts)
		ax.axhline(ribosome_abundance_fit, color='tab:orange', lw=1)
		ax.set_ylim(*COUNTS_BOUNDS)
		ax.set_xlim([0.5, 1.5])
		ax.set_xticks([])
		y_ticks = ax.get_yticks()
		ax.set_yticklabels([])
		ax.spines['right'].set_visible(False)
		exportFigure(plt, plotOutDir, '{}__clean'.format(plotOutFileName), None)

		ax.set_title('n = {}'.format(len(ribosome_counts)))
		ax.set_ylabel('Molecule abundance (counts)')
		ax.set_yticks(y_ticks)
		ax.spines['right'].set_visible(True)
		ax.spines['left'].set_visible(True)
		plt.subplots_adjust(left=0.4, bottom=0.2, right=0.6, top=0.8)
		exportFigure(plt, plotOutDir, plotOutFileName, metadata)
		plt.close("all")
	def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata):
		if not os.path.isdir(inputDir):
			raise Exception, "variantDir does not currently exist as a directory"

		if not os.path.exists(plotOutDir):
			os.mkdir(plotOutDir)

		ap = AnalysisPaths(inputDir, variant_plot = True)
		variants = ap.get_variants()

		index_doubling_time = 0
		sim_doubling_time = []

		index_rna_mass = 1
		sim_rna_mass_per_cell = []
		sim_rna_mass_per_cell_std = []

		index_elng_rate = 2
		sim_elng_rate = []
		sim_elng_rate_std = []

		index_n_origin_init = 3
		sim_origins_per_cell_at_initiation = []
		sim_origins_per_cell_at_initiation_std = []

		index_rrn_init_rate = 4
		sim_rrn_init_rate = []
		sim_rrn_init_rate_std = []

		for varIdx in range(ap.n_variant):
			variant = variants[varIdx]
			print("variant {}".format(variant))

			sim_dirs = ap.get_cells(variant=[variant])
			n_sims = len(sim_dirs)
			print("Total cells: {}".format(n_sims))

			try:
				sim_data = cPickle.load(open(ap.get_variant_kb(variant)))

				global is_rRNA
				is_rRNA = sim_data.process.transcription.rnaData["isRRna"]

			except Exception as e:
				print "Couldn't load sim_data object. Exiting.", e
				return

			p = Pool(parallelization.cpus())
			output = np.array(p.map(mp_worker, sim_dirs))
			p.close()
			p.join()

			# Filter output from broken files using np.nanmean and np.nanstd
			sim_doubling_time.append(np.nanmean(output[:, index_doubling_time]) / 60.)

			sim_rna_mass_per_cell.append(np.nanmean(output[:, index_rna_mass]))
			sim_rna_mass_per_cell_std.append(np.nanstd(output[:, index_rna_mass]))

			sim_elng_rate.append(np.nanmean(output[:, index_elng_rate]))
			sim_elng_rate_std.append(np.nanstd(output[:, index_elng_rate]))

			sim_origins_per_cell_at_initiation.append(np.nanmean(output[:, index_n_origin_init]))
			sim_origins_per_cell_at_initiation_std.append(np.nanstd(output[:, index_n_origin_init]))

			sim_rrn_init_rate.append(np.nanmean(output[:, index_rrn_init_rate]))
			sim_rrn_init_rate_std.append(np.nanstd(output[:, index_rrn_init_rate]))

		sim_doubling_time = np.array(sim_doubling_time)

		# Plot
		fig, axes_list = plt.subplots(1, 4, figsize=(15, 5))
		ax0, ax1, ax2, ax3 = axes_list
		sort_sim = np.argsort(sim_doubling_time)[::-1]
		sort_bremer = np.argsort(bremer_tau)[::-1]

		# RNA mass per cell
		ax0.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rna_mass_per_cell)[sort_sim],
			yerr=np.array(sim_rna_mass_per_cell_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax0.errorbar(
			bremer_tau[sort_bremer],
			bremer_rna_mass_per_cell[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax0.set_title('RNA mass per cell (fg)', fontsize=FONT_SIZE)
		ax0.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax0.set_xlim([0, 135])
		ax0.set_ylim([0, 250])
		ax0.legend(loc=1, fontsize='xx-small', markerscale=0.5, frameon=False)

		# Ribosome elongation rate
		ax1.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_elng_rate)[sort_sim],
			yerr=np.array(sim_elng_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax1.errorbar(
			bremer_tau[sort_bremer],
			bremer_elng_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax1.set_title('Ribosome elongation\nrate (aa/s/ribosome)', fontsize=FONT_SIZE)
		ax1.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax1.set_ylim([5, 24])

		# Number of origins at chromosome initiation
		ax2.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_origins_per_cell_at_initiation)[sort_sim],
			yerr=np.array(sim_origins_per_cell_at_initiation_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax2.errorbar(
			bremer_tau[sort_bremer],
			bremer_origins_per_cell_at_initiation[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax2.set_title('Average origins at chrom. init.', fontsize=FONT_SIZE)
		ax2.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)
		ax2.set_ylim([0.5, 4.5])

		# rRNA initiation rate
		ax3.errorbar(
			sim_doubling_time[sort_sim],
			np.array(sim_rrn_init_rate)[sort_sim],
			yerr=np.array(sim_rrn_init_rate_std)[sort_sim],
			color='tab:blue', **SIM_PLOT_STYLE)
		ax3.errorbar(
			bremer_tau[sort_bremer],
			bremer_rrn_init_rate[sort_bremer],
			color=HIGHLIGHT_COLOR, **EXP_PLOT_STYLE)
		ax3.set_title('Rate of rrn initiation (1/min)', fontsize=FONT_SIZE)
		ax3.set_ylim([0, 2500])
		ax3.set_xlabel('Doubling time (min)', fontsize=FONT_SIZE)

		for ax in axes_list:
			ax.set_xlim(X_LIM)
			ax.set_xticks(X_LIM)
			ax.set_ylim(ax.get_ylim())
			ax.set_yticks(ax.get_ylim())

			for tick in ax.yaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)
			for tick in ax.xaxis.get_major_ticks():
				tick.label.set_fontsize(FONT_SIZE)

		plt.subplots_adjust(bottom=0.25, top=0.75, left=0.05, right=0.95, wspace=0.4)
		exportFigure(plt, plotOutDir, '{}__test'.format(plotOutFileName), metadata)
		plt.close('all')