def __init__(self, path): filepath.makedirs(path) self._data = open(os.path.join(path, FILE_DATA), "w") self._offsets = open(os.path.join(path, FILE_OFFSETS), "w") self._dtype = None
def do_plot(self, seedOutDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(seedOutDir): raise Exception, 'seedOutDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(simDataFile, 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(seedOutDir, multi_gen_plot=True) for sim_dir in ap.get_cells(): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used main_reader = TableReader(os.path.join(simOutDir, 'Main')) # Load data time = main_reader.readColumn('time') plt.figure() ### Create Plot ### exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def __init__(self, path): dirMetadata = filepath.makedirs(path, DIR_METADATA) open(os.path.join(dirMetadata, FILE_VERSION), "w").write(VERSION) self._dirAttributes = filepath.makedirs(path, DIR_ATTRIBUTES) self._attributeNames = [] self._dirColumns = filepath.makedirs(path, DIR_COLUMNS) self._columns = None
def run(self, args): kb_directory = fp.makedirs(args.sim_path, "kb") raw_data_file = os.path.join(kb_directory, constants.SERIALIZED_RAW_DATA) sim_data_file = os.path.join(kb_directory, constants.SERIALIZED_FIT1_FILENAME) cached_sim_data_file = os.path.join( fp.ROOT_PATH, 'cached', constants.SERIALIZED_FIT1_FILENAME) most_fit_filename = os.path.join( kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME) raw_validation_data_file = os.path.join( kb_directory, constants.SERIALIZED_RAW_VALIDATION_DATA) validation_data_file = os.path.join( kb_directory, constants.SERIALIZED_VALIDATION_DATA) if args.debug or args.cached: print "{}{}Fitter".format( 'DEBUG ' if args.debug else '', 'CACHED ' if args.cached else '', ) tasks = [ InitRawDataTask( output=raw_data_file, ), FitSimDataTask( fit_level=1, input_data=raw_data_file, output_data=sim_data_file, cached=args.cached, # bool cached_data=cached_sim_data_file, # cached file to copy cpus=args.cpus, debug=args.debug, disable_ribosome_capacity_fitting=args.disable_ribosome_fitting, disable_rnapoly_capacity_fitting=args.disable_rnapoly_fitting, adjust_rna_and_protein_parameters=args.no_expression_adjustment, ), SymlinkTask( to=constants.SERIALIZED_FIT1_FILENAME, link=most_fit_filename, overwrite_if_exists=True, ), InitRawValidationDataTask( output=raw_validation_data_file, ), InitValidationDataTask( validation_data_input=raw_validation_data_file, knowledge_base_raw=raw_data_file, output_data=validation_data_file, ), ] for task in tasks: task.run_task({}) print '\n\t'.join(['Wrote', raw_data_file, sim_data_file, most_fit_filename, raw_validation_data_file, validation_data_file])
def parse_args(self): args = super(RunFitter, self).parse_args() if args.timestamp: args.sim_outdir = fp.timestamp() + '__' + args.sim_outdir.replace( ' ', '_') args.sim_path = fp.makedirs(fp.ROOT_PATH, "out", args.sim_outdir) return args
def __init__(self, **kwargs): # Validate subclassing for attrName in self._definedBySubclass: if not hasattr(self, attrName): raise SimulationException( "Simulation subclasses must define" + " the {} attribute.".format(attrName)) for listenerClass in DEFAULT_LISTENER_CLASSES: if listenerClass in self._listenerClasses: raise SimulationException("The {} listener is included by" + " default in the Simulation class.". format(listenerClass.name())) # Set instance attributes for attrName, value in DEFAULT_SIMULATION_KWARGS.viewitems(): if attrName in kwargs.viewkeys(): value = kwargs[attrName] setattr(self, "_" + attrName, value) unknownKeywords = kwargs.viewkeys( ) - DEFAULT_SIMULATION_KWARGS.viewkeys() if any(unknownKeywords): raise SimulationException( "Unknown keyword arguments: {}".format(unknownKeywords)) # Set time variables self._simulationStep = 0 self.randomState = np.random.RandomState( seed=np.uint32(self._seed % np.iinfo(np.uint32).max)) # divide_cell will fail if _outputDir is no good (e.g. defaulted to # None) so catch it *before* running the simulation in case _logToDisk # doesn't. filepath.makedirs(self._outputDir) # Load KB sim_data = cPickle.load(open(self._simDataLocation, "rb")) # Initialize simulation from fit KB self._initialize(sim_data)
def run(self, args): output_dir = filepath.makedirs(args.sim_path, 'plotOut') task = AnalysisVariantTask( input_directory=args.sim_path, input_validation_data=args.input_validation_data, output_plots_directory=output_dir, metadata=args.metadata, plots_to_run=args.plot, output_filename_prefix=args.output_prefix, ) task.run_task({})
def test_makedirs(self): directories = 'this/is/a/test' expected_path = os.path.join(self.test_dir, directories) self.assertFalse(os.path.exists(expected_path)) # Test creating a directory path. full_path = filepath.makedirs(self.test_dir, directories) self.assertEqual(full_path, expected_path) self.assertTrue(os.path.exists(expected_path)) # Test that it's happy with an existing path. full_path2 = filepath.makedirs(self.test_dir, 'this', 'is', 'a/test') self.assertEqual(full_path2, expected_path) self.assertTrue(os.path.exists(expected_path)) # Test failure to create a directory path because a data file is there. filename = 'data' with open(os.path.join(full_path, filename), 'w') as f: f.write('hi') with nose.tools.assert_raises(OSError): filepath.makedirs(self.test_dir, directories, filename)
def run(self, args): sim_path = args.sim_path variant_dir_name = args.variant_dir_name input_variant_directory = os.path.join(sim_path, variant_dir_name) sim_data_modified = os.path.join( input_variant_directory, 'kb', constants.SERIALIZED_SIM_DATA_MODIFIED) # TODO(jerry): Load simData_Modified into metadata? output_dir = filepath.makedirs(input_variant_directory, 'plotOut') task = AnalysisCohortTask( input_variant_directory=input_variant_directory, input_sim_data=sim_data_modified, input_validation_data=args.input_validation_data, output_plots_directory=output_dir, metadata=args.metadata, plots_to_run=args.plot, output_filename_prefix=args.output_prefix, ) task.run_task({})
def run(self, args): sim_path = args.sim_path variant_dir_name = args.variant_dir_name input_variant_directory = os.path.join(sim_path, variant_dir_name) input_path = os.path.join(input_variant_directory, args.seed_str) sim_data_modified = os.path.join( input_variant_directory, 'kb', constants.SERIALIZED_SIM_DATA_MODIFIED) output_dir = filepath.makedirs(input_path, "plotOut") task = AnalysisMultiGenTask( input_seed_directory=input_path, input_sim_data=sim_data_modified, input_validation_data=args.input_validation_data, output_plots_directory=output_dir, metadata=args.metadata, plots_to_run=args.plot, output_filename_prefix=args.output_prefix, ) task.run_task({})
def run(self, args): sim_path = args.sim_path variant_dir_name = args.variant_dir_name dirs = os.path.join(args.seed_str, args.gen_str, args.daughter_str) input_variant_directory = os.path.join(sim_path, variant_dir_name) input_dir = os.path.join(input_variant_directory, dirs, 'simOut') sim_data_modified = os.path.join( input_variant_directory, 'kb', constants.SERIALIZED_SIM_DATA_MODIFIED) output_dir = filepath.makedirs(input_variant_directory, dirs, 'plotOut') task = AnalysisSingleTask( input_results_directory=input_dir, input_sim_data=sim_data_modified, input_validation_data=args.input_validation_data, output_plots_directory=output_dir, metadata=args.metadata, plots_to_run=args.plot, output_filename_prefix=args.output_prefix, ) task.run_task({})
def do_plot(self, variantDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(variantDir): raise Exception, 'variantDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(variantDir, cohort_plot=True) limited_metabolites = [] for sim_dir in ap.get_cells(): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used kinetics_reader = TableReader( os.path.join(sim_out_dir, "EnzymeKinetics")) # Load data try: metabolite_indices = { m: i for i, m in enumerate( kinetics_reader.readAttribute('metaboliteNames')) } metabolite_counts = kinetics_reader.readColumn( "metaboliteCountsFinal")[1:, :] counts_to_molar = kinetics_reader.readColumn( 'countsToMolar')[1:].reshape(-1, 1) except: print('Error reading data from {}'.format(sim_out_dir)) continue # Calculate concentrations met_idx = np.array( [metabolite_indices[m] for m in LIMITED_METABOLITES]) metabolite_conc = counts_to_molar * metabolite_counts[:, met_idx] limited_metabolites += [metabolite_conc] limited_metabolites = np.vstack(limited_metabolites) # Values to calculate significance between different cohorts print('Metabolites: {}'.format(LIMITED_METABOLITES)) print('Means: {}'.format(limited_metabolites.mean(axis=0))) print('Stds: {}'.format(limited_metabolites.std(axis=0))) print('N: {}'.format(limited_metabolites.shape[0])) plt.figure(figsize=(4, 4)) xticks = [0, 1] # Plot data plt.violinplot(limited_metabolites, xticks, showmeans=True) # Format axes plt.ylim([0, 50]) whitePadSparklineAxis(plt.gca()) plt.xticks(xticks, LIMITED_METABOLITES) plt.ylabel('Concentration (uM)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] plt.figure(figsize=(4, 4)) ax = plt.subplot2grid((1, 1), (0, 0)) options = { "edgecolors": color_cycle[0], "alpha": 0.2, "s": 50, "clip_on": False } labels = ["minimal", "anaerobic", "minimal + AA"] ax.scatter(initial_volumes[2], added_volumes[2], marker="x", label=labels[2], **options) ax.scatter(initial_volumes[0], added_volumes[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(initial_volumes[1], added_volumes[1], facecolors="none", marker="^", label=labels[1], **options) ax.set_xlim([0, 4]) ax.set_ylim([0, 4]) ax.set_xlabel("Birth Volume ($\mu m^3$)") ax.set_ylabel("Added Volume ($\mu m^3$)") ax.legend() ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() n_variants = len(variants) if n_variants <= 1: print('This plot only runs for multiple variants'.format(__name__)) return filepath.makedirs(plotOutDir) # Load validation data validation_data = cPickle.load(open(validationDataFile, 'rb')) toya_reactions = validation_data.reactionFlux.toya2010fluxes['reactionID'] toya_fluxes = np.array([x.asNumber(DCW_FLUX_UNITS) for x in validation_data.reactionFlux.toya2010fluxes['reactionFlux']]) outlier_filter = [False if rxn in OUTLIER_REACTIONS else True for rxn in toya_reactions] # Arrays to populate for plots lambdas = np.zeros(n_variants) n_sims = np.zeros(n_variants) growth_rates = np.zeros(n_variants) conc_correlation = np.zeros(n_variants) n_conc_off_axis = np.zeros(n_variants) flux_correlation = np.zeros(n_variants) nonzero_flux_correlation = np.zeros(n_variants) n_flux_above_0 = np.zeros(n_variants) n_flux_off_axis = np.zeros(n_variants) correlation_coefficient = np.zeros(n_variants) filtered_correlation_coefficient = np.zeros(n_variants) homeostatic_objective_value = np.zeros(n_variants) kinetic_objective_value = np.zeros(n_variants) homeostatic_objective_std = np.zeros(n_variants) kinetic_objective_std = np.zeros(n_variants) # Pull information from sim data and listeners in parallel pool = Pool(processes=parallelization.plotter_cpus()) args = zip( variants, [ap] * n_variants, [toya_reactions] * n_variants, [toya_fluxes] * n_variants, [outlier_filter] * n_variants ) results = pool.map(analyze_variant, args) pool.close() pool.join() for i, result in enumerate(results): (lambdas[i], n_sims[i], growth_rates[i], conc_correlation[i], n_conc_off_axis[i], flux_correlation[i], n_flux_off_axis[i], nonzero_flux_correlation[i], n_flux_above_0[i], correlation_coefficient[i], filtered_correlation_coefficient[i], kinetic_objective_value[i], kinetic_objective_std[i], homeostatic_objective_value[i], homeostatic_objective_std[i], n_metabolites, n_fluxes) = result tick_labels = [r'$10^{%i}$' % (np.log10(x),) if x != 0 else '0' for x in lambdas] lambdas = [np.log10(x) if x != 0 else np.nanmin(np.log10(lambdas[lambdas != 0]))-1 for x in lambdas] plt.figure(figsize = (8.5, 22)) plt.style.use('seaborn-deep') subplots = 8 # Growth rates ax = plt.subplot(subplots, 1, 1) plt.bar(lambdas, growth_rates / growth_rates[0], align='center') plt.axhline(1, linestyle='--', color='k') plt.ylim([0, 2]) plt.ylabel('Growth rate deviation\nfrom no kinetics') whitePadSparklineAxis(ax, xAxis=False) plt.yticks([0, 1, 2]) # Flux target comparisons ax = plt.subplot(subplots, 1, 2) plt.bar(lambdas, nonzero_flux_correlation, align='center') plt.ylim([0, 1]) plt.ylabel('Kinetic target flux PCC') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 3) plt.bar(lambdas, n_flux_above_0 / n_fluxes, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of fluxes\nabove 0') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 4) plt.bar(lambdas, n_flux_off_axis / n_fluxes, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of fluxes\noff axis (>{:.0f}%)'.format(FRAC_FLUX_OFF_AXIS*100)) whitePadSparklineAxis(ax, xAxis=False) # Metabolite comparisons ax = plt.subplot(subplots, 1, 5) plt.bar(lambdas, conc_correlation, align='center') plt.ylim([0, 1]) plt.ylabel('Concentration PCC') whitePadSparklineAxis(ax, xAxis=False) ax = plt.subplot(subplots, 1, 6) plt.bar(lambdas, n_conc_off_axis / n_metabolites, align='center') plt.ylim([0, 1]) plt.ylabel('Fraction of concentrations\noff axis (>{:.0f}%)'.format(FRAC_CONC_OFF_AXIS*100)) whitePadSparklineAxis(ax, xAxis=False) # Toya comparison ax = plt.subplot(subplots, 1, 7) plt.bar(lambdas, filtered_correlation_coefficient, align='center') plt.ylim([0, 1]) plt.ylabel('Central carbon flux PCC') whitePadSparklineAxis(ax, xAxis=False) # Viable sims ax = plt.subplot(subplots, 1, 8) plt.bar(lambdas, n_sims, align='center') plt.ylabel('Number of sims\nwith data') whitePadSparklineAxis(ax) plt.xticks(lambdas, tick_labels) plt.xlabel('lambda') exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Plot kinetic vs homeostatic objective values plt.figure(figsize=(3.5, 3.5)) ax = plt.gca() ax.set_xscale("log", nonposx='clip') ax.set_yscale("log", nonposy='clip') plt.errorbar(homeostatic_objective_value, kinetic_objective_value, xerr=homeostatic_objective_std, yerr=kinetic_objective_std, fmt='none', ecolor='k', alpha=0.5, linewidth=0.5) plt.plot(homeostatic_objective_value, kinetic_objective_value, "ob", markeredgewidth=0.1, alpha=0.9) for i in range(len(lambdas)): plt.text(homeostatic_objective_value[i], 0.6*kinetic_objective_value[i], i, horizontalalignment='center', verticalalignment='center') plt.xlabel('Homeostatic Objective Value') plt.ylabel('Kinetics Objective Value') whitePadSparklineAxis(ax) # Adjust limits to get tick labels to display xlim = ax.get_xlim() xlim = [10**np.floor(np.log10(xlim[0])), 10**np.ceil(np.log10(xlim[1]))] ax.set_xticks(xlim) ylim = ax.get_ylim() ylim = [10**np.floor(np.log10(ylim[0])), 10**np.ceil(np.log10(ylim[1]))] ax.set_yticks(ylim) exportFigure(plt, plotOutDir, '{}_obj'.format(plotOutFileName), metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() n_variants = len(variants) # Load sim_data with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity.asNumber(MASS_UNITS / VOLUME_UNITS) # Load validation_data with open(validationDataFile, "rb") as f: validation_data = cPickle.load(f) toyaReactions = validation_data.reactionFlux.toya2010fluxes[ "reactionID"] toyaFluxes = validation_data.reactionFlux.toya2010fluxes[ "reactionFlux"] toyaStdev = validation_data.reactionFlux.toya2010fluxes[ "reactionFluxStdev"] toyaFluxesDict = dict(zip(toyaReactions, toyaFluxes)) toyaStdevDict = dict(zip(toyaReactions, toyaStdev)) glc_uptakes = np.zeros(n_variants) log_ratio_succ = np.zeros(n_variants) size_pearson = np.zeros(n_variants) selected_indicies = np.zeros(n_variants, bool) for v, variant in enumerate(variants): # initialize kinetic flux comparison exchange_fluxes = {entry: [] for entry in EXCHANGES} reaction_fluxes = {entry: [] for entry in REACTIONS} modelFluxes = {} toyaOrder = [] for rxn in toyaReactions: modelFluxes[rxn] = [] toyaOrder.append(rxn) for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") try: # Listeners used massListener = TableReader(os.path.join(simOutDir, "Mass")) fbaResults = TableReader( os.path.join(simOutDir, "FBAResults")) enzymeKineticsReader = TableReader( os.path.join(simOutDir, "EnzymeKinetics")) ## Read from mass listener cellMass = massListener.readColumn("cellMass") # skip if no data if cellMass.shape is (): continue dryMass = massListener.readColumn("dryMass") except Exception as e: print(e) continue coefficient = (dryMass / cellMass * cell_density).reshape( -1, 1) ## Read from FBA listener reactionIDs = { r: i for i, r in enumerate( fbaResults.readAttribute("reactionIDs")) } exMolec = { m: i for i, m in enumerate( fbaResults.readAttribute("externalMoleculeIDs")) } reactionFluxes = FLUX_CONVERSION * ( fbaResults.readColumn("reactionFluxes") / coefficient)[1:, :] exFlux = fbaResults.readColumn("externalExchangeFluxes")[1:, :] ## Read from EnzymeKinetics listener constrainedReactions = { r: i for i, r in enumerate( enzymeKineticsReader.readAttribute( "constrainedReactions")) } ## Append values for relevant reactions. # append to exchanges for entry in EXCHANGES: exchange_fluxes[entry].extend( list(exFlux[:, exMolec[entry]])) # append to reaction fluxes for entry in REACTIONS: reaction_fluxes[entry].extend( list(reactionFluxes[:, reactionIDs[entry]])) ## get all Toya reactions, and corresponding simulated fluxes. toya_idx = {r: [] for r in toyaReactions} for rxn, i in reactionIDs.items(): rxn = rxn.split(' (reverse)') if len(rxn) > 1: i = -i rxn = rxn[0].split('__')[0] if rxn in toya_idx: toya_idx[rxn] += [i] for toyaReaction, reaction_idx in toya_idx.items(): flux_time_course = np.sum([ np.sign(i) * reactionFluxes[:, np.abs(i)] for i in reaction_idx ], axis=0) modelFluxes[toyaReaction].append(flux_time_course.mean()) ## Flux comparison with Toya toyaVsReactionAve = [] rxn_order = [] for rxn, toyaFlux in toyaFluxesDict.iteritems(): rxn_order.append(rxn) if rxn in modelFluxes: toyaVsReactionAve.append( (np.mean(modelFluxes[rxn]), toyaFlux.asNumber(OUTPUT_FLUX_UNITS), np.std(modelFluxes[rxn]), toyaStdevDict[rxn].asNumber(OUTPUT_FLUX_UNITS))) toyaVsReactionAve = np.array(toyaVsReactionAve) rWithAll = pearsonr(toyaVsReactionAve[:, 0], toyaVsReactionAve[:, 1]) succ_toya_flux = toyaVsReactionAve[rxn_order.index(SUCC_ID), 1] # Save data for plotting glc_uptakes[v] = -np.mean(exchange_fluxes[GLC_ID]) log_ratio_succ[v] = np.log2( np.mean(reaction_fluxes[SUCC_ID]) / succ_toya_flux) size_pearson[v] = (rWithAll[0] * 8)**2 selected_indicies[v] = np.all([ c not in constrainedReactions for c in HIGHLIGHTED_CONSTRAINTS ]) # Plot scatterplot fig = plt.figure(figsize=(5, 5)) gs = gridspec.GridSpec(40, 40) ## Plot full data plt.scatter(glc_uptakes[~selected_indicies], log_ratio_succ[~selected_indicies], color='blue', alpha=0.6, s=size_pearson[~selected_indicies]) plt.scatter(glc_uptakes[selected_indicies], log_ratio_succ[selected_indicies], color='red', alpha=0.6, s=size_pearson[selected_indicies]) x_min, x_max = plt.xlim() y_max = max(np.abs(plt.ylim())) plt.axvspan(0, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) plt.axhline(y=0, color='k', linestyle='--') ## Format axes plt.ylabel('log2(model flux / Toya flux)') plt.xlabel('glucose uptake (mmol / g DCW / hr)') plt.xlim([np.floor(min(x_min, 10)), np.ceil(x_max)]) plt.ylim([-y_max, y_max]) ## Plot highlighted region data fig.add_subplot(gs[1:28, -20:-1]) in_region = (glc_uptakes < GLC_MAX) & (np.abs(log_ratio_succ) < SUCC_DISTANCE) selected_in = in_region & selected_indicies not_selected_in = in_region & ~selected_indicies constraint_labels = np.array( [[c[:2] for c in constraints] if constraints is not None else [] for _, constraints in map(get_disabled_constraints, variants)]) plt.scatter(glc_uptakes[not_selected_in], log_ratio_succ[not_selected_in], color='blue', alpha=0.6, s=size_pearson[not_selected_in]) plt.scatter(glc_uptakes[selected_in], log_ratio_succ[selected_in], color='red', alpha=0.6, s=size_pearson[selected_in]) for x, y, label in zip(glc_uptakes[in_region], log_ratio_succ[in_region], constraint_labels[in_region]): plt.text(x, y, ', '.join(label), ha='center', va='top', fontsize=6) x_min, _ = plt.xlim() x_min = np.floor(min(x_min, 10)) plt.axvspan(x_min, GLC_MAX, facecolor='g', alpha=0.1) plt.axhspan(-SUCC_DISTANCE, SUCC_DISTANCE, facecolor='g', alpha=0.1) ## Format axes plt.xlim([x_min, GLC_MAX]) plt.ylim([-SUCC_DISTANCE, SUCC_DISTANCE]) ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) all_variants = ap.get_variants() variants = -np.ones(N_VARIANTS) for v, variant in enumerate(all_variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: variants[0] = variant elif len(additional_disabled) == 0: variants[1] = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): variants[2] = variant if np.any(variants < 0): print('Not enough variants to analyze') return with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) all_yields = [] for variant in variants: yields = [] for sim_dir in ap.get_cells(variant=[variant]): sim_out_dir = os.path.join(sim_dir, 'simOut') # Listeners used fba_reader = TableReader( os.path.join(sim_out_dir, 'FBAResults')) main_reader = TableReader(os.path.join(sim_out_dir, 'Main')) mass_reader = TableReader(os.path.join(sim_out_dir, 'Mass')) # Load data time_step_sec = main_reader.readColumn('timeStepSec') external_fluxes = fba_reader.readColumn( 'externalExchangeFluxes') external_molecules = fba_reader.readAttribute( 'externalMoleculeIDs') dry_mass = MASS_UNITS * mass_reader.readColumn('dryMass') growth = GROWTH_UNITS * mass_reader.readColumn( 'growth') / time_step_sec # Calculate growth yield on glucose glc_idx = external_molecules.index(GLUCOSE_ID) glc_flux = FLUX_UNITS * external_fluxes[:, glc_idx] glc_mw = sim_data.getter.getMass([GLUCOSE_ID])[0] glc_mass_flux = glc_flux * glc_mw * dry_mass glc_mass_yield = growth / -glc_mass_flux yields += list(glc_mass_yield[1:].asNumber()) all_yields += [yields] for i, v1 in enumerate(variants): for j, v2 in enumerate(variants[i + 1:]): t, p = stats.ttest_ind(all_yields[i], all_yields[i + j + 1], equal_var=False) print('p={:.2e} for variant {} vs variant {}'.format( p, v1, v2)) plt.figure(figsize=(4, 4)) xticks = range(N_VARIANTS) # Plot data plt.violinplot(all_yields, xticks, showmeans=False, showextrema=False) plt.axhline(VALIDATION_YIELD, linestyle='--', color='#eb7037') # Format axes ax = plt.gca() ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) plt.xticks(xticks, VARIANT_LABELS) plt.ylabel('Glucose Yield\n(g cell / g glucose)') plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print('This analysis only runs for the "condition" variant.') return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) n_gens = ap.n_generation variants = ap.get_variants() if n_gens - 1 < FIRST_GENERATION: print('Not enough generations to plot.') return all_growth_rates = [] all_rna_to_protein_ratios = [] for variant in variants: doubling_times = np.zeros(0) variant_rna_to_protein_ratios = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=range(FIRST_GENERATION, n_gens)) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) rna_mass = mass.readColumn("rnaMass") protein_mass = mass.readColumn("proteinMass") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_times = np.hstack( (doubling_times, (time[-1] - time[0]) / 3600.)) variant_rna_to_protein_ratios = np.hstack( (variant_rna_to_protein_ratios, rna_mass.mean() / protein_mass.mean())) except: continue variant_growth_rates = np.log(2) / doubling_times all_growth_rates.append(variant_growth_rates) all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios) # Get errorbar plot plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] marker_styles = ['o', '^', 'x'] labels = ['basal', 'anaerobic', '+AA'] ax = plt.subplot2grid((1, 1), (0, 0)) for i in range(3): ax.errorbar(all_growth_rates[i].mean(), all_rna_to_protein_ratios[i].mean(), yerr=all_rna_to_protein_ratios[i].std(), color=color_cycle[0], mec=color_cycle[0], marker=marker_styles[i], markersize=8, mfc='white', linewidth=1, capsize=2, label=labels[i]) # Add linear plot proposed in Scott et al. (2010) x_linear = np.linspace(0.05, 1.95, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2]) ax.set_ylim([0, 0.7]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Get clean version of errorbar plot ax.set_xlabel("") ax.set_ylabel("") ax.set_yticklabels([]) ax.set_xticklabels([]) exportFigure(plt, plotOutDir, plotOutFileName + "_clean", metadata) plt.close("all") # Get scatter version of plot plt.figure(figsize=FIGSIZE) ax = plt.subplot2grid((1, 1), (0, 0)) options = {"edgecolors": color_cycle[0], "alpha": 0.25, "s": 20} ax.scatter(all_growth_rates[0], all_rna_to_protein_ratios[0], facecolors="none", marker="o", label=labels[0], **options) ax.scatter(all_growth_rates[1], all_rna_to_protein_ratios[1], facecolors="none", marker="^", label=labels[1], **options) ax.scatter(all_growth_rates[2], all_rna_to_protein_ratios[2], marker="x", label=labels[2], **options) x_linear = np.linspace(0.05, 2.45, 100) y_linear = x_linear / 4.5 + 0.087 ax.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) ax.set_xlim([0, 2.5]) ax.set_ylim([0, 0.8]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True) ax.set_xlabel("Growth rate $\lambda$ (hour$^{-1}$)") ax.set_ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName + "_scatter", metadata)
def run(self, args): kb_directory = os.path.join(args.sim_path, 'kb') sim_data_file = os.path.join(kb_directory, 'simData_Fit_1.cPickle') fp.verify_file_exists(sim_data_file, 'Run runFitter?') timestamp, description = parse_timestamp_description(args.sim_path) variant_type = args.variant[0] variants_to_run = xrange(int(args.variant[1]), int(args.variant[2]) + 1) cli_sim_args = data.select_keys( vars(args), ('length_sec', 'timestep_safety_frac', 'timestep_max', 'timestep_update_freq', 'mass_distribution', 'growth_rate_noise', 'd_period_division', 'translation_supply')) # Write the metadata file. cli_metadata_args = data.select_keys( vars(args), ('total_gens', 'mass_distribution', 'growth_rate_noise', 'd_period_division', 'translation_supply', 'variable_elongation_translation', 'variable_elongation_transcription')) metadata = dict( cli_metadata_args, git_hash=fp.run_cmdline("git rev-parse HEAD") or '--', git_branch=fp.run_cmdline("git symbolic-ref --short HEAD") or '--', description=description, time=timestamp, analysis_type=None, variant=variant_type, total_variants=str(len(variants_to_run))) metadata_dir = fp.makedirs(args.sim_path, 'metadata') metadata_path = os.path.join(metadata_dir, constants.SERIALIZED_METADATA_FILE) with open(metadata_path, "wb") as f: cPickle.dump(metadata, f, cPickle.HIGHEST_PROTOCOL) # args.sim_path is called INDIV_OUT_DIRECTORY in fw_queue. for i in variants_to_run: variant_directory = os.path.join(args.sim_path, variant_type + "_%06d" % i) variant_sim_data_directory = os.path.join(variant_directory, "kb") most_fit_filename = os.path.join( kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME) variant_sim_data_modified_file = os.path.join( variant_sim_data_directory, constants.SERIALIZED_SIM_DATA_MODIFIED) fp.makedirs(variant_sim_data_directory) variant_metadata_directory = fp.makedirs(variant_directory, "metadata") task = VariantSimDataTask( variant_function=variant_type, variant_index=i, input_sim_data=most_fit_filename, output_sim_data=variant_sim_data_modified_file, variant_metadata_directory=variant_metadata_directory, ) task.run_task({}) for j in xrange(args.seed, args.seed + args.init_sims): # init sim seeds seed_directory = fp.makedirs(variant_directory, "%06d" % j) for k in xrange(args.generations): # generation number k gen_directory = fp.makedirs(seed_directory, "generation_%06d" % k) # l is the daughter number among all of this generation's cells, # which is 0 for single-daughters but would span range(2**k) if # each parent had 2 daughters. l = 0 cell_directory = fp.makedirs(gen_directory, "%06d" % l) cell_sim_out_directory = fp.makedirs( cell_directory, "simOut") options = dict( cli_sim_args, input_sim_data=variant_sim_data_modified_file, output_directory=cell_sim_out_directory, ) if k == 0: task = SimulationTask(seed=j, **options) else: parent_gen_directory = os.path.join( seed_directory, "generation_%06d" % (k - 1)) parent_cell_directory = os.path.join( parent_gen_directory, "%06d" % (l // 2)) parent_cell_sim_out_directory = os.path.join( parent_cell_directory, "simOut") daughter_state_path = os.path.join( parent_cell_sim_out_directory, "Daughter%d" % (l % 2 + 1)) task = SimulationDaughterTask( inherited_state_path=daughter_state_path, seed=(j + 1) * ((2**k - 1) + l), **options) task.run_task({})
def _populateDerivativeAndJacobian(self): ''' Creates callable functions for computing the derivative and the Jacobian. ''' fixturesDir = filepath.makedirs( os.path.dirname(os.path.dirname(wholecell.__file__)), "fixtures", "twoComponentSystem" ) odeFile = os.path.join( os.path.dirname(os.path.dirname(wholecell.__file__)), "reconstruction", "ecoli", "dataclasses", "process", "two_component_system_odes.py" ) odeFitterFile = os.path.join( os.path.dirname(os.path.dirname(wholecell.__file__)), "reconstruction", "ecoli", "dataclasses", "process", "two_component_system_odes_fitter.py" ) needToCreate = False if not os.path.exists(odeFile): needToCreate = True if not os.path.exists(odeFitterFile): needToCreate = True if os.path.exists(os.path.join(fixturesDir, "S.cPickle")): S = cPickle.load(open(os.path.join(fixturesDir, "S.cPickle"), "rb")) if not np.all(S == self.stoichMatrix()): needToCreate = True else: needToCreate = True if os.path.exists(os.path.join(fixturesDir, "ratesFwd.cPickle")): ratesFwd = cPickle.load(open(os.path.join(fixturesDir, "ratesFwd.cPickle"), "rb")) if not np.all(ratesFwd == self.ratesFwd): needToCreate = True else: needToCreate = True if os.path.exists(os.path.join(fixturesDir, "ratesRev.cPickle")): ratesRev = cPickle.load(open(os.path.join(fixturesDir, "ratesRev.cPickle"), "rb")) if not np.all(ratesRev == self.ratesRev): needToCreate = True else: needToCreate = True if needToCreate: self._makeDerivative() self._makeDerivativeFitter() writeOdeFile(odeFile, self.derivativesSymbolic, self.derivativesJacobianSymbolic) writeOdeFile(odeFitterFile, self.derivativesFitterSymbolic, self.derivativesFitterJacobianSymbolic) import reconstruction.ecoli.dataclasses.process.two_component_system_odes import reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter self.derivatives = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivatives self.derivativesJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivativesJacobian self.derivativesFitter = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivatives self.derivativesFitterJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivativesJacobian cPickle.dump(self.stoichMatrix(), open(os.path.join(fixturesDir, "S.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL) cPickle.dump(self.ratesFwd, open(os.path.join(fixturesDir, "ratesFwd.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL) cPickle.dump(self.ratesRev, open(os.path.join(fixturesDir, "ratesRev.cPickle"), "wb"), protocol = cPickle.HIGHEST_PROTOCOL) else: import reconstruction.ecoli.dataclasses.process.two_component_system_odes import reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter self.derivatives = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivatives self.derivativesJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes.derivativesJacobian self.derivativesFitter = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivatives self.derivativesFitterJacobian = reconstruction.ecoli.dataclasses.process.two_component_system_odes_fitter.derivativesJacobian
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata.get('variant', '') != 'flux_sensitivity': print 'This plot only runs for the flux_sensitivity variant.' return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() succ_fluxes = [] iso_fluxes = [] for variant in variants: for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used fba_reader = TableReader(os.path.join(simOutDir, 'FBAResults')) # Load data reactions = np.array( fba_reader.readAttribute('sensitivity_reactions')) succ_fluxes += [ fba_reader.readColumn('succinate_flux_sensitivity')[1:, :] ] iso_fluxes += [ fba_reader.readColumn('isocitrate_flux_sensitivity')[1:, :] ] succ_fluxes = np.vstack(succ_fluxes) iso_fluxes = np.vstack(iso_fluxes) succ_z = calc_z(succ_fluxes) iso_z = calc_z(iso_fluxes) threshold = -0.1 # Plot data plt.figure() gs = gridspec.GridSpec(2, 2) ## Succinate dehydrogenase all fluxes ax = plt.subplot(gs[0, 0]) plot_lows(ax, succ_z, threshold, 'succinate dehydrogenase') ## Succinate dehydrogenase fluxes over threshold ax = plt.subplot(gs[0, 1]) plot_threshold(ax, succ_z, threshold, reactions) ## Isocitrate dehydrogenase all fluxes ax = plt.subplot(gs[1, 0]) plot_lows(ax, iso_z, threshold, 'isocitrate dehydrogenase') ## Isocitrate dehydrogenase fluxes over threshold ax = plt.subplot(gs[1, 1]) plot_threshold(ax, iso_z, threshold, reactions) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata.get('variant', '') != 'param_sensitivity': print 'This plot only runs for the param_sensitivity variant.' return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) global ap ap = AnalysisPaths(inputDir, variant_plot=True) variants = np.array(ap.get_variants()) # Check to analyze control (variant 0) separately from other variants use_control = False if CONTROL_VARIANT in variants: use_control = True variants = variants[variants != CONTROL_VARIANT] n_variants = len(variants) # Load one instance of sim_data to get number of parameters and ids global sim_data global validation_data with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) # sim_data information total_params = np.sum(number_params(sim_data)) rna_to_gene = {gene['rnaId']: gene['symbol'] for gene in sim_data.process.replication.geneData} monomer_to_gene = {gene['monomerId']: gene['symbol'] for gene in sim_data.process.replication.geneData} rna_ids = sim_data.process.transcription.rnaData['id'] monomer_ids = sim_data.process.translation.monomerData['id'] # IDs must match order from param_indices() from param_sensitivity.py variant param_ids = np.array( ['{} RNA deg Km'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids] + ['{} protein deg rate'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids] + ['{} translation eff'.format(monomer_to_gene[monomer[:-3]]) for monomer in monomer_ids] + ['{} synth prob'.format(rna_to_gene[rna[:-3]]) for rna in rna_ids]) if len(param_ids) != total_params: raise ValueError('Number of adjusted parameters and list of ids do not match.') pool = Pool(processes=parallelization.plotter_cpus()) args = zip( variants, [total_params] * n_variants, ) results = pool.imap_unordered(analyze_variant, args) (increase_params_counts, decrease_params_counts, increase_params_growth_rate, decrease_params_growth_rate, increase_params_flux_correlation, decrease_params_flux_correlation) = reduce(operator.add, results) pool.close() pool.join() # Calculate effects and z score labels = [ 'growth rate', 'flux correlation', ] increase_params_data = np.vstack(( increase_params_growth_rate / increase_params_counts, increase_params_flux_correlation / increase_params_counts, )) decrease_params_data = np.vstack(( decrease_params_growth_rate / decrease_params_counts, decrease_params_flux_correlation / decrease_params_counts, )) n_outputs = len(labels) # Difference between effect when parameter increased vs decreased data_diff = increase_params_data - decrease_params_data mean_diff = np.nanmean(data_diff, axis=1).reshape(-1, 1) std_diff = np.nanstd(data_diff, axis=1).reshape(-1, 1) z_score_diff = (data_diff - mean_diff) / std_diff # Individual increase or decrease effects to check asymmetric effects all_data = np.hstack((increase_params_data, decrease_params_data)) mean = np.nanmean(all_data, axis=1).reshape(-1, 1) std = np.nanstd(all_data, axis=1).reshape(-1, 1) z_score_increase = (increase_params_data - mean) / std z_score_decrease = (decrease_params_data - mean) / std # Get control data if use_control: control_counts, _, control_growth_rate, _, control_flux_correlation, _ = analyze_variant((CONTROL_VARIANT, total_params)) control_data = [ control_growth_rate[0] / control_counts[0], control_flux_correlation[0] / control_counts[0], ] else: control_data = [None] * n_outputs # Multiple hypothesis adjustment for significance of each parameter. # Solves Gaussian CDF for how many standard deviations are needed to # include 1 - 0.05 / total_params of the data (test each parameter for p<0.05). n_stds = special.erfinv(2 * (1 - 0.05 / total_params) - 1) * np.sqrt(2) # Plot histograms plt.figure(figsize=(16, 4*n_outputs)) n_cols = 4 top_limit = 20 # limit of the number of highest/lowest parameters to plot for i, (z_diff, z_increase, z_decrease) in enumerate(zip(z_score_diff, z_score_increase, z_score_decrease)): sorted_idx = np.argsort(z_diff) above_idx = np.where(z_diff[sorted_idx] > n_stds)[0][-top_limit:] below_idx = np.where(z_diff[sorted_idx] < -n_stds)[0][:top_limit] ## Plot z difference data ax = plt.subplot(n_outputs, n_cols, n_cols*i + 1) plt.yscale('symlog', linthreshold=0.01) plt.fill_between(range(total_params), z_diff[sorted_idx]) plt.axhline(n_stds , color='k', linestyle='--') plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax, xAxis=False) plt.xticks([]) plt.yticks([-n_stds, 0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) lim = np.max(np.abs(plt.ylim())) plt.ylim([-lim, lim]) if i == 0: plt.title('Difference of Positive and Negative\nParameter Changes') if i == n_outputs - 1: plt.xlabel('Sorted Parameters') plt.ylabel('Z score\nparameter effect on {}\n(log scale)'.format(labels[i])) ## Plot single direction z data ax = plt.subplot(n_outputs, n_cols, n_cols*i + 2) plt.yscale('symlog', linthreshold=0.01) plt.step(range(total_params), z_increase[sorted_idx], color='g', linewidth=1, alpha=0.5) plt.step(range(total_params), z_decrease[sorted_idx], color='r', linewidth=1, alpha=0.5) plt.axhline(n_stds , color='k', linestyle='--') plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax, xAxis=False) plt.xticks([]) plt.yticks([-n_stds, 0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) plt.ylim([-lim, lim]) if i == 0: plt.title('Positive and Negative\nParameter Changes') if i == n_outputs - 1: plt.xlabel('Sorted Parameters') ## Plot highest parameters ax = plt.subplot(n_outputs, n_cols, n_cols*i + 3) plt.yscale('symlog', linthreshold=0.01) plt.bar(above_idx, z_diff[sorted_idx[above_idx]]) plt.axhline(n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax) ax.spines["bottom"].set_visible(False) ax.tick_params(bottom=False) plt.xticks(above_idx, param_ids[sorted_idx[above_idx]], rotation=90, fontsize=6) plt.yticks([0, n_stds]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) if i == 0: plt.title('Highest Positive Effect Parameters') if i == n_outputs - 1: plt.xlabel('Parameter IDs') ## Plot lowest parameters ax = plt.subplot(n_outputs, n_cols, n_cols*i + 4) plt.yscale('symlog', linthreshold=0.01) plt.bar(below_idx, z_diff[sorted_idx[below_idx]]) plt.axhline(-n_stds, color='k', linestyle='--') ## Format axes sparkline.whitePadSparklineAxis(ax) ax.spines["bottom"].set_visible(False) ax.tick_params(bottom=False) plt.xticks(below_idx, param_ids[sorted_idx[below_idx]], rotation=90, fontsize=6) plt.yticks([-n_stds, 0]) ax.yaxis.set_major_formatter(FormatStrFormatter('%.2f')) if i == 0: plt.title('Highest Negative Effect Parameters') if i == n_outputs - 1: plt.xlabel('Parameter IDs') ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) # Plot individual parameters individual_indices = [ np.nanargmax(z_score_diff[0, :]), np.nanargmin(z_score_diff[0, :]), np.nanargmax(z_score_diff[1, :]), np.nanargmin(z_score_diff[1, :]), ] n_individual = len(individual_indices) x_values = [-1, 0, 1] plt.figure() for i, label in enumerate(labels): shared_ax = None for j, idx in enumerate(individual_indices): ## Shared y axis for each row ax = plt.subplot(n_outputs, n_individual, i*n_individual + j + 1, sharey=shared_ax) if shared_ax is None: shared_ax = ax ## Plot data plt.plot(x_values, [decrease_params_data[i, idx], control_data[i], increase_params_data[i, idx]], 'x') ## Format axes plt.xticks(x_values, ['Decrease', 'Control', 'Increase']) ax.tick_params(labelsize=6) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) if i < n_outputs - 1: ax.tick_params(labelbottom=False) if j > 0: ax.tick_params(labelleft=False) if i == 0: plt.title(param_ids[idx], fontsize=8) if j == 0: plt.ylabel(label, fontsize=7) ## Save figure plt.tight_layout() exportFigure(plt, plotOutDir, '{}_individual'.format(plotOutFileName, metadata)) plt.close('all') # Save z scores to tsv with open(os.path.join(plotOutDir, '{}.tsv'.format(plotOutFileName)), 'w') as f: writer = csv.writer(f, delimiter='\t') writer.writerow( ['Parameter'] + headers(labels, 'Z-score, difference') + headers(labels, 'Z-score, increase') + headers(labels, 'Z-score, decrease') + headers(labels, 'Raw average, difference') + headers(labels, 'Raw average, increase') + headers(labels, 'Raw average, decrease') ) writer.writerows(np.hstack(( param_ids.reshape(-1, 1), z_score_diff.T, z_score_increase.T, z_score_decrease.T, data_diff.T, increase_params_data.T, decrease_params_data.T )))
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() # scan all variants to find variant indexes for comparison old_variant = None new_variant = None for v, variant in enumerate(variants): disable_constraints, additional_disabled = get_disabled_constraints( variant) if additional_disabled is None: old_variant = variant elif ADDITIONAL_DISABLED_CONSTRAINTS == set(additional_disabled): new_variant = variant # if the baseline variant or the new variant are missing, stop plotting if (old_variant is None) or (new_variant is None): print('Variant simulations missing!') return compared_variants = [old_variant, new_variant] # Load sim_data with open( os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) # get reactions from sim_data reactionCatalysts = sim_data.process.metabolism.reactionCatalysts reaction_to_enzyme = {r: reactionCatalysts[r][0] for r in REACTIONS} enzyme_names = reaction_to_enzyme.values() reactions_with_km = sorted(SIMULATION_KMS) km_metabolites = [ SIMULATION_KMS[r]['metabolite'] for r in reactions_with_km ] kms = np.array([SIMULATION_KMS[r]['KM'] for r in reactions_with_km]) km_constraint_indices = [ SIMULATION_KMS[r]['constraint_index'] for r in reactions_with_km ] # initialize dictionaries for fluxes and concentrations all_reaction_fluxes = {} all_enzyme_concentrations = {} all_km_adjustments = {} for variant in compared_variants: reaction_fluxes = {r: [] for r in REACTIONS} enzyme_concentrations = {e: [] for e in enzyme_names} km_adjustments = {r: [] for r in reactions_with_km} for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, "simOut") # Listeners used try: kinetics_reader = TableReader( os.path.join(simOutDir, 'EnzymeKinetics')) fbaResults = TableReader( os.path.join(simOutDir, "FBAResults")) except Exception as e: print(e) continue # read from kinetics listener counts_to_molar = ((COUNTS_UNITS / VOLUME_UNITS) * kinetics_reader.readColumn('countsToMolar') [START_TIME_STEP:].reshape(-1, 1)) all_constraints_used = kinetics_reader.readColumn( 'reactionConstraint')[START_TIME_STEP:] # Store fluxes reactionIDs = np.array(fbaResults.readAttribute("reactionIDs")) reactionFluxes = fbaResults.readColumn("reactionFluxes")[ START_TIME_STEP:, :] reaction_flux_dict = dict(zip(reactionIDs, reactionFluxes.T)) for reaction_id in REACTIONS: reaction_fluxes[reaction_id].extend( list(reaction_flux_dict[reaction_id])) # Store enzyme concentrations enzyme_counts, met_counts = read_bulk_molecule_counts( simOutDir, (enzyme_names, km_metabolites)) enzyme_conc = counts_to_molar.asNumber( COUNTS_UNITS / VOLUME_UNITS) * enzyme_counts[START_TIME_STEP:, :] met_conc = counts_to_molar.asNumber( units.umol / units.L) * met_counts[START_TIME_STEP:, :] for enzyme_id, conc_time_series in zip(enzyme_names, enzyme_conc.T): enzyme_concentrations[enzyme_id].extend( list(conc_time_series)) # Calculate enzyme saturation for reactions with KM values adjust_km = np.zeros( (len(counts_to_molar), len(km_constraint_indices)), bool) for i, idx in enumerate(km_constraint_indices): constraint_used, _ = np.where(all_constraints_used == idx) adjust_km[constraint_used, i] = True enzyme_saturation = met_conc / (met_conc + kms) enzyme_saturation[~adjust_km] = 1 for rxn, saturation in zip(reactions_with_km, enzyme_saturation.T): km_adjustments[rxn].extend(list(saturation)) all_reaction_fluxes[variant] = reaction_fluxes all_enzyme_concentrations[variant] = enzyme_concentrations all_km_adjustments[variant] = km_adjustments ### Make figure ### cols = 1 rows = len(REACTIONS) plt.figure(figsize=(cols * 3, rows * 5)) # go through each reaction to show predicted k_cat distribution for the # new and old variant, and experimental measurements for reaction_idx, reaction_id in enumerate(REACTIONS): enzyme_id = reaction_to_enzyme[reaction_id] # old measurements reaction_measurements = OLD_MEASUREMENTS[reaction_id] measurements = reaction_measurements['measurements'] temps = reaction_measurements['temps'] adjusted_measurements = np.array([ 2**((37. - t) / 10.) * m for (m, t) in zip(measurements, temps) ]) # new measurements reaction_measurements = NEW_MEASUREMENTS.get(reaction_id, {}) measurements = reaction_measurements.get('measurements', []) temps = reaction_measurements.get('temps', []) new_adjusted_measurements = np.array([ 2**((37. - t) / 10.) * m for (m, t) in zip(measurements, temps) ]) # get effective kcat for GLUTATHIONE-REDUCT if reaction_id == 'GLUTATHIONE-REDUCT-NADPH-RXN': # saturated_fraction calculated from Smirnova, et al. (2005). "Effects of cystine and # hydrogen peroxideon glutathione status and expression of antioxidant genes in Escherichia coli" # Oxidized glutathione (GSSG in table 2) gives ~19 uM concentration (with 0.3 dry fraction and 1.1 g/mL density) # With 61 uM Km for this reaction, that gives a saturated fraction of 0.238 saturated_fraction = 0.238 new_adjusted_measurements = adjusted_measurements * saturated_fraction # Initialize subplots ax = plt.subplot(rows, cols, reaction_idx + 1) # calculate the reaction's k_cat distribution for each compared variant k_cat_distribution = {} for variant in compared_variants: ## Get data rxn_fluxes = np.array( all_reaction_fluxes[variant][reaction_id]) # mmol / L / s enzyme_concs = np.array( all_enzyme_concentrations[variant][enzyme_id]) # mmol / L saturation = np.array(all_km_adjustments[variant].get( reaction_id, [1] * len(rxn_fluxes))) # calculate k_cats (adjusted for saturation in the sim), remove zeros, save to this variant's distribution k_cats = rxn_fluxes / enzyme_concs / saturation k_cats = k_cats[k_cats > 1e-10] k_cat_distribution[variant] = k_cats data = [ k_cat_distribution[old_variant], k_cat_distribution[new_variant] ] # plot violin_pos = [1, 3] # position of violin plots [old, new] measure_pos = 2 # position of measurements ax.violinplot(data, violin_pos, widths=1.0, showmeans=False, showextrema=False, showmedians=False) ax.scatter(np.full_like(adjusted_measurements, measure_pos), adjusted_measurements, marker='o', color='#eb7037', s=50, alpha=0.7) ax.scatter(np.full_like(new_adjusted_measurements, measure_pos), new_adjusted_measurements, marker='o', color='#eb7037', s=50, alpha=0.7) # format rxn_id_length = 25 text_reaction_id = ('reaction: %s' % reaction_id[:rxn_id_length]) labels = [ '\nModel Predicted\n(Old Constraints)', 'Measured', '\nModel Predicted\n(New Constraints)' ] ax.set_title(text_reaction_id, fontsize=8) ax.set_ylabel('$k_{cat}$ (1/s)', fontsize=8) set_ticks(ax, labels) ax.set_yscale('log') ### Create Plot ### plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def divide_cell(sim): """ Divides simulated states (chromosome, bulkMolecules, and uniqueMolecules) of a dividing cell randomly into two daughter cells. """ # Assign data from simulation required randomState = sim.randomState bulkMolecules = sim.internal_states['BulkMolecules'] uniqueMolecules = sim.internal_states['UniqueMolecules'] # TODO (Eran): division should be based on both nutrient and gene perturbation condition current_nutrients = sim.external_states['Environment'].nutrients # Create output directories filepath.makedirs(sim._outputDir, "Daughter1") filepath.makedirs(sim._outputDir, "Daughter2") # Check for uneven numbers of partial chromosomes. This should not happen # too often if the four partial chromosomes are elongated in a roughly # synchronized way. # TODO (Gwanggyu): try to handle this case instead of raising an exception partial_chromosome_counts = bulkMolecules.container.counts( bulkMolecules.divisionIds['partialChromosome']) uneven_counts = partial_chromosome_counts - partial_chromosome_counts.min() if uneven_counts.any(): raise Exception("You won the lottery! There is an uneven number of partial chromosomes...") # Transform any leftover partial chromosomes into full a chromosome. This # should have happened in the chromosome_formation process but we could get # unlucky and miss this in the final timestep. bulkMolecules.container.countInc( partial_chromosome_counts.min(), bulkMolecules.divisionIds['fullChromosome'][0] ) # Check if the cell is dead isDead = False if bulkMolecules.container.count( bulkMolecules.divisionIds['fullChromosome'][0]) == 0 and ( sim.time() - sim.initialTime()) > sim.lengthSec(): # If the cell does not have any full chromosomes at the end of its # maximal simulation duration, the cell is considered dead isDead = True elif sim._isDead: isDead = True with open(os.path.join(sim._outputDir, "Daughter1", "IsDead.cPickle"), 'wb') as f: cPickle.dump(isDead, f) with open(os.path.join(sim._outputDir, "Daughter2", "IsDead.cPickle"), 'wb') as f: cPickle.dump(isDead, f) if isDead: # Cell is dead - set daughter cell containers to empty values d1_bulkMolCntr = bulkMolecules.container.emptyLike() d2_bulkMolCntr = bulkMolecules.container.emptyLike() d1_uniqueMolCntr = uniqueMolecules.container.emptyLike() d2_uniqueMolCntr = uniqueMolecules.container.emptyLike() daughter_elng_rates = {"d1_elng_rate": 0., "d2_elng_rate": 0., "d1_elng_rate_factor": 0., "d2_elng_rate_factor": 0.} else: # Divide the chromosome into two daughter cells # The output is used when dividing both bulk molecules and unique # molecules chromosome_counts = chromosomeDivision(bulkMolecules, randomState) # Create divided containers d1_bulkMolCntr, d2_bulkMolCntr = divideBulkMolecules( bulkMolecules, randomState, chromosome_counts) d1_uniqueMolCntr, d2_uniqueMolCntr, daughter_elng_rates = ( divideUniqueMolecules(uniqueMolecules, randomState, chromosome_counts, current_nutrients, sim) ) # Save divided containers saveContainer(d1_bulkMolCntr, os.path.join( sim._outputDir, "Daughter1", "BulkMolecules")) saveContainer(d2_bulkMolCntr, os.path.join( sim._outputDir, "Daughter2", "BulkMolecules")) saveContainer(d1_uniqueMolCntr, os.path.join( sim._outputDir, "Daughter1", "UniqueMolecules")) saveContainer(d2_uniqueMolCntr, os.path.join( sim._outputDir, "Daughter2", "UniqueMolecules")) with open(os.path.join(sim._outputDir, "Daughter1", "ElngRate.cPickle"), 'wb') as f: cPickle.dump(daughter_elng_rates["d1_elng_rate"], f) with open(os.path.join(sim._outputDir, "Daughter2", "ElngRate.cPickle"), 'wb') as f: cPickle.dump(daughter_elng_rates["d2_elng_rate"], f) with open(os.path.join(sim._outputDir, "Daughter1", "elng_rate_factor.cPickle"), 'wb') as f: cPickle.dump(daughter_elng_rates["d1_elng_rate_factor"], f) with open(os.path.join(sim._outputDir, "Daughter2", "elng_rate_factor.cPickle"), 'wb') as f: cPickle.dump(daughter_elng_rates["d2_elng_rate_factor"], f) # Save daughter cell initial time steps saveTime(sim.time(), os.path.join(sim._outputDir, "Daughter1", "Time"), sim.timeStepSec()) saveTime(sim.time(), os.path.join(sim._outputDir, "Daughter2", "Time"), sim.timeStepSec())
def run(self, args): kb_directory = fp.makedirs(args.sim_path, "kb") raw_data_file = os.path.join(kb_directory, constants.SERIALIZED_RAW_DATA) sim_data_file = os.path.join(kb_directory, constants.SERIALIZED_FIT1_FILENAME) cell_specs_file = os.path.join(kb_directory, constants.SERIALIZED_CELL_SPECS) cached_sim_data_file = os.path.join(fp.ROOT_PATH, 'cached', constants.SERIALIZED_FIT1_FILENAME) most_fit_filename = os.path.join( kb_directory, constants.SERIALIZED_SIM_DATA_MOST_FIT_FILENAME) raw_validation_data_file = os.path.join( kb_directory, constants.SERIALIZED_RAW_VALIDATION_DATA) validation_data_file = os.path.join( kb_directory, constants.SERIALIZED_VALIDATION_DATA) if args.debug or args.cached: print "{}{}Fitter".format( 'DEBUG ' if args.debug else '', 'CACHED ' if args.cached else '', ) tasks = [ InitRawDataTask(output=raw_data_file, ), FitSimDataTask( fit_level=1, input_data=raw_data_file, output_data=sim_data_file, cached=args.cached, # bool cached_data=cached_sim_data_file, # cached file to copy cpus=args.cpus, debug=args.debug, disable_ribosome_capacity_fitting=args. disable_ribosome_fitting, disable_rnapoly_capacity_fitting=args.disable_rnapoly_fitting, variable_elongation_transcription=args. variable_elongation_transcription, variable_elongation_translation=args. variable_elongation_translation, rnapoly_activity_fitting=args.rnapoly_activity_fitting, mrna_half_life_fitting=args.mrna_half_life_fitting, max_rnap_activity=args.max_rnap_activity, adjust_rna_and_protein_parameters=args. no_expression_adjustment, adjust_rnase_expression=args.adjust_rnase_expression, disable_measured_protein_deg=args.disable_measured_protein_deg, alternate_mass_fraction_protein=args. alternate_mass_fraction_protein, alternate_mass_fraction_rna=args.alternate_mass_fraction_rna, alternate_mass_fraction_mrna=args.alternate_mass_fraction_mrna, alternate_r_protein_degradation=args. alternate_r_protein_degradation, alternate_rna_seq=args.alternate_rna_seq, alternate_rna_half_life=args.alternate_rna_half_life, alternate_translation_efficiency=args. alternate_translation_efficiency, alternate_ribosome_activity=args.alternate_ribosome_activity, disable_rnap_fraction_increase=args. disable_rnap_fraction_increase, disable_ribosome_activity_fix=args. disable_ribosome_activity_fix, save_cell_specs=args.save_cell_specs, cell_specs_file=cell_specs_file, write_translation_efficiencies=args. write_translation_efficiencies), SymlinkTask( to=constants.SERIALIZED_FIT1_FILENAME, link=most_fit_filename, overwrite_if_exists=True, ), InitRawValidationDataTask(output=raw_validation_data_file, ), InitValidationDataTask( validation_data_input=raw_validation_data_file, knowledge_base_raw=raw_data_file, output_data=validation_data_file, ), ] for task in tasks: task.run_task({}) print '\n\t'.join([ 'Wrote', raw_data_file, sim_data_file, most_fit_filename, raw_validation_data_file, validation_data_file ])
def exportFigure(plt, plotOutDir, plotOutFileName, metadata=None, transparent=False): if metadata != None and "analysis_type" in metadata: if metadata["analysis_type"] == 'single': # Format metadata signature for single gen figure metadata_signature = "_".join([ str(metadata["time"])[:13], str(metadata["variant_function"]), str(metadata["variant_index"]), "Seed", str(metadata["seed"]), "Gen", str(metadata["gen"]) + '/' + str(int(metadata["total_gens"]) - 1), "Githash", str(metadata["git_hash"])[:10], "Desc", str(metadata["description"]) ]) elif metadata["analysis_type"] == 'multigen': # Format metadata signature for multi gen figure metadata_signature = "_".join([ str(metadata["time"][:13]), str(metadata["variant_function"]), str(metadata["variant_index"]), "Seed", str(metadata["seed"]), str(metadata["total_gens"]), "gens", "Githash", str(metadata["git_hash"])[:10], "Desc", str(metadata["description"]) ]) elif metadata["analysis_type"] == 'cohort': # Format metadata signature for cohort figure metadata_signature = "_".join([ str(metadata["time"][:13]), str(metadata["variant_function"]), str(metadata["variant_index"]), str(metadata["total_gens"]), "gens", "Githash", str(metadata["git_hash"])[:10], "Desc", str(metadata["description"]) ]) elif metadata["analysis_type"] == 'variant': # Format metadata signature for variant figure metadata_signature = "_".join([ str(metadata["time"][:13]), str(metadata["total_variants"]), "variants", str(metadata["total_gens"]), "gens", "Githash", str(metadata["git_hash"])[:10], "Desc", str(metadata["description"]) ]) # Add metadata signature to the bottom of the plot plt.figtext(0, 0, metadata_signature, size=8) # Make folders for holding alternate types of images filepath.makedirs(plotOutDir, LOW_RES_DIR) filepath.makedirs(plotOutDir, SVG_DIR) # Save PDF image plt.savefig(os.path.join(plotOutDir, plotOutFileName + DEFAULT_IMAGE_TYPE), transparent=transparent) # Save SVG image plt.savefig(os.path.join(plotOutDir, SVG_DIR, plotOutFileName + '.svg'), transparent=transparent) # Save PNG image plt.savefig(os.path.join(plotOutDir, LOW_RES_DIR, plotOutFileName + '.png'), dpi=LOW_RES_DPI, transparent=transparent)
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print("This plot only runs for the 'condition' variant.") return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() gens = [2, 3] initial_volumes = [] added_volumes = [] for variant in variants: with open(ap.get_variant_kb(variant), 'rb') as f: sim_data = cPickle.load(f) cell_density = sim_data.constants.cellDensity initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(variant=[variant], generation=gens) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("cellMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except: continue added_masses = final_masses - initial_masses initial_volume = initial_masses / cell_density.asNumber( units.fg / units.um**3) added_volume = added_masses / cell_density.asNumber( units.fg / units.um**3) initial_volumes.append(initial_volume) added_volumes.append(added_volume) plt.style.use('seaborn-deep') plt.figure(figsize=(5, 5)) plt.scatter(initial_volumes[0], added_volumes[0], s=3, label="minimal") plt.scatter(initial_volumes[1], added_volumes[1], s=3, label="anaerobic") plt.scatter(initial_volumes[2], added_volumes[2], s=3, label="+AA") plt.xlim([0, 4]) plt.ylim([0, 4]) plt.xlabel("Birth Volume ($\mu m^3$)") plt.ylabel("Added Volume ($\mu m^3$)") plt.legend() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) with open(os.path.join(inputDir, 'kb', constants.SERIALIZED_FIT1_FILENAME), 'rb') as f: sim_data = cPickle.load(f) with open(validationDataFile, 'rb') as f: validation_data = cPickle.load(f) ap = AnalysisPaths(inputDir, variant_plot=True) variants = ap.get_variants() expected_n_variants = 2 n_variants = len(variants) if n_variants < expected_n_variants: print('This plot only runs for {} variants.'.format(expected_n_variants)) return # IDs for appropriate proteins ids_complexation = sim_data.process.complexation.moleculeNames ids_complexation_complexes = sim_data.process.complexation.ids_complexes ids_equilibrium = sim_data.process.equilibrium.moleculeNames ids_equilibrium_complexes = sim_data.process.equilibrium.ids_complexes ids_translation = sim_data.process.translation.monomerData['id'].tolist() ids_protein = sorted(set(ids_complexation + ids_equilibrium + ids_translation)) # Stoichiometry matrices equil_stoich = sim_data.process.equilibrium.stoichMatrixMonomers() complex_stoich = sim_data.process.complexation.stoichMatrixMonomers() # Protein container views protein_container = BulkObjectsContainer(ids_protein, dtype=np.float64) view_complexation = protein_container.countsView(ids_complexation) view_complexation_complexes = protein_container.countsView(ids_complexation_complexes) view_equilibrium = protein_container.countsView(ids_equilibrium) view_equilibrium_complexes = protein_container.countsView(ids_equilibrium_complexes) # Load model data model_counts = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) model_std = np.zeros((len(PROTEINS_WITH_HALF_LIFE), expected_n_variants)) for i, variant in enumerate(variants): if i >= expected_n_variants: print('Skipping variant {} - only runs for {} variants.'.format(variant, expected_n_variants)) continue variant_counts = [] for sim_dir in ap.get_cells(variant=[variant]): simOutDir = os.path.join(sim_dir, 'simOut') # Listeners used unique_counts_reader = TableReader(os.path.join(simOutDir, 'UniqueMoleculeCounts')) # Account for bulk molecules (bulk_counts,) = read_bulk_molecule_counts(simOutDir, ids_protein) protein_container.countsIs(bulk_counts.mean(axis=0)) # Account for unique molecules ribosome_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRibosome') rnap_index = unique_counts_reader.readAttribute('uniqueMoleculeIds').index('activeRnaPoly') n_ribosomes = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, ribosome_index] n_rnap = unique_counts_reader.readColumn('uniqueMoleculeCounts')[:, rnap_index] protein_container.countsInc(n_ribosomes.mean(), [sim_data.moleculeIds.s30_fullComplex, sim_data.moleculeIds.s50_fullComplex]) protein_container.countsInc(n_rnap.mean(), [sim_data.moleculeIds.rnapFull]) # Account for small-molecule bound complexes view_equilibrium.countsDec(equil_stoich.dot(view_equilibrium_complexes.counts())) # Account for monomers in complexed form view_complexation.countsDec(complex_stoich.dot(view_complexation_complexes.counts())) variant_counts.append(protein_container.countsView(PROTEINS_WITH_HALF_LIFE).counts()) model_counts[:, i] = np.mean(variant_counts, axis=0) model_std[:, i] = np.std(variant_counts, axis=0) # Validation data schmidt_ids = {m: i for i, m in enumerate(validation_data.protein.schmidt2015Data['monomerId'])} schmidt_counts = validation_data.protein.schmidt2015Data['glucoseCounts'] validation_counts = np.array([schmidt_counts[schmidt_ids[p]] for p in PROTEINS_WITH_HALF_LIFE]) # Process data model_log_counts = np.log10(model_counts) model_log_lower_std = model_log_counts - np.log10(model_counts - model_std) model_log_upper_std = np.log10(model_counts + model_std) - model_log_counts validation_log_counts = np.log10(validation_counts) r_before = stats.pearsonr(validation_log_counts, model_log_counts[:, 0]) r_after = stats.pearsonr(validation_log_counts, model_log_counts[:, 1]) # Scatter plot of model vs validation counts max_counts = np.ceil(max(validation_log_counts.max(), model_log_upper_std.max())) limits = [0, max_counts] plt.figure() colors = plt.rcParams['axes.prop_cycle'].by_key()['color'] ## Plot data for i in range(expected_n_variants): plt.errorbar(validation_log_counts, model_log_counts[:, i], yerr=np.vstack((model_log_lower_std[:, i], model_log_upper_std[:, i])), fmt='o', color=colors[i], ecolor='k', capsize=3, alpha=0.5) plt.plot(limits, limits, 'k--', linewidth=0.5, label='_nolegend_') ## Format axes plt.xlabel('Validation Counts\n(log10(counts))') plt.ylabel('Average Simulation Counts\n(log10(counts))') ax = plt.gca() ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.spines['left'].set_position(('outward', 10)) ax.spines['bottom'].set_position(('outward', 10)) ax.xaxis.set_major_locator(MaxNLocator(integer=True)) ax.yaxis.set_major_locator(MaxNLocator(integer=True)) ## Add legend legend_text = [ 'Before: r={:.2f}, p={:.3f}'.format(r_before[0], r_before[1]), 'After: r={:.2f}, p={:.3f}'.format(r_after[0], r_after[1]), ] plt.legend(legend_text, frameon=False) plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close('all')
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if metadata["variant"] != "condition": print('This analysis only runs for the "condition" variant.') return if not os.path.isdir(inputDir): raise Exception, 'inputDir does not currently exist as a directory' filepath.makedirs(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot=True) n_gens = ap.n_generation variants = ap.get_variants() if n_gens - 1 < FIRST_GENERATION: print('Not enough generations to plot.') return all_growth_rates = [] all_rna_to_protein_ratios = [] for variant in variants: doubling_times = np.zeros(0) variant_rna_to_protein_ratios = np.zeros(0) all_cells = ap.get_cells( variant=[variant], generation=range(FIRST_GENERATION, n_gens)) if len(all_cells) == 0: continue for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) rna_mass = mass.readColumn("rnaMass") protein_mass = mass.readColumn("proteinMass") time = TableReader(os.path.join(simOutDir, "Main")).readColumn("time") doubling_times = np.hstack( (doubling_times, (time[-1] - time[0])/3600.) ) variant_rna_to_protein_ratios = np.hstack( (variant_rna_to_protein_ratios, rna_mass.mean()/protein_mass.mean()) ) except: continue variant_growth_rates = np.log(2)/doubling_times all_growth_rates.append(variant_growth_rates) all_rna_to_protein_ratios.append(variant_rna_to_protein_ratios) plt.figure(figsize=FIGSIZE) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] for i in range(3): plt.errorbar( all_growth_rates[i].mean(), all_rna_to_protein_ratios[i].mean(), yerr=all_rna_to_protein_ratios[i].std(), color=color_cycle[0], marker='o', markersize=5, linewidth=1, capsize=2) # Add linear plot proposed in Scott et al. (2010) x_linear = np.linspace(0, 3, 100) y_linear = x_linear/4.5 + 0.087 plt.plot(x_linear, y_linear, linewidth=2, color=color_cycle[2]) plt.xlim([0, 3]) plt.ylim([0, 1.6]) plt.xlabel("Growth rate $\lambda$ (hour$^{-1}$)") plt.ylabel("RNA/protein mass ratio") exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
ALTERNATE_RNA_HALF_LIFE = bool(int(os.environ.get("ALTERNATE_RNA_HALF_LIFE", "0"))) ALTERNATE_TRANSLATION_EFFICIENCY = bool(int(os.environ.get("ALTERNATE_TRANSLATION_EFFICIENCY", "0"))) ALTERNATE_RIBOSOME_ACTIVITY = bool(int(os.environ.get("ALTERNATE_RIBOSOME_ACTIVITY", "0"))) DISABLE_RNAP_FRACTION_INCREASE = bool(int(os.environ.get("DISABLE_RNAP_FRACTION_INCREASE", "0"))) DISABLE_RIBOSOME_ACTIVITY_FIX = bool(int(os.environ.get("DISABLE_RIBOSOME_ACTIVITY_FIX", "0"))) SAVE_CELL_SPECS = bool(int(os.environ.get("SAVE_CELL_SPECS", "0"))) CELL_SPECS_FILE = bool(int(os.environ.get("CELL_SPECS_FILE", "0"))) WRITE_TRANSLATION_EFFICIENCIES = bool(int(os.environ.get("WRITE_TRANSLATION_EFFICIENCIES", "0"))) if not RUN_AGGREGATE_ANALYSIS: COMPRESS_OUTPUT = False ### Set path variables and create directories WC_ECOLI_DIRECTORY = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) OUT_DIRECTORY = filepath.makedirs(WC_ECOLI_DIRECTORY, "out") CACHED_SIM_DATA_DIRECTORY = os.path.join(WC_ECOLI_DIRECTORY, "cached") SUBMISSION_TIME = filepath.timestamp() INDIV_OUT_DIRECTORY = filepath.makedirs(OUT_DIRECTORY, SUBMISSION_TIME + "__" + SIM_DESCRIPTION) KB_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, "kb") METADATA_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, "metadata") if VERBOSE_QUEUE: print "Building filestructure." for i in VARIANTS_TO_RUN: VARIANT_DIRECTORY = filepath.makedirs(INDIV_OUT_DIRECTORY, VARIANT + "_%06d" % i) VARIANT_SIM_DATA_DIRECTORY = filepath.makedirs(VARIANT_DIRECTORY, "kb") VARIANT_METADATA_DIRECTORY = filepath.makedirs(VARIANT_DIRECTORY, "metadata")
""" default_output_dir = FILE_LOCATION parser = argparse.ArgumentParser(description='Script to save lists of' ' included genes, metabolites and kinetic constraints in the model') parser.add_argument('-r', '--raw-data', default='', help='Path to raw_data cPickle object to load, recalculates raw_data if not specified') parser.add_argument('-s', '--sim-data', default='', help='Path to sim_data cPickle object to load, recalculates sim_data if not specified') parser.add_argument('-o', '--output', default=default_output_dir, help='Directory path to save tsv files (default: {})'.format(default_output_dir)) return parser.parse_args() if __name__ == '__main__': # Parse command line args args = parse_args() # Load required data raw_data = load_raw_data(args.raw_data) sim_data = load_sim_data(args.sim_data, raw_data) # Analyze data and save tsv files filepath.makedirs(args.output) save_genes(raw_data, sim_data, os.path.join(args.output, GENES_FILE)) save_metabolites(raw_data, sim_data, os.path.join(args.output, METABOLITES_FILE)) save_kinetics(sim_data, os.path.join(args.output, KINETICS_FILE))