def simulatingDels(): import pandas from time import time import cobra.test from cobra.flux_analysis import (single_gene_deletion, single_reaction_deletion, double_gene_deletion, double_reaction_deletion) cobra_model = cobra.test.create_test_model("textbook") ecoli_model = cobra.test.create_test_model("ecoli") print('complete model: ', cobra_model.optimize()) with cobra_model: cobra_model.reactions.PFK.knock_out() print('pfk knocked out: ', cobra_model.optimize()) print('complete model: ', cobra_model.optimize()) with cobra_model: cobra_model.genes.b1723.knock_out() print('pfkA knocked out: ', cobra_model.optimize()) cobra_model.genes.b3916.knock_out() print('pfkB knocked out: ', cobra_model.optimize()) deletion_results = single_gene_deletion(cobra_model) print(single_gene_deletion(cobra_model, cobra_model.genes[:20])) #subset print(single_reaction_deletion(cobra_model, cobra_model.reactions[:20])) print('Hello world!')
def capitulo_5(): file = open("resultados_capitulo_5.txt", "w") cobra_model = cobra.test.create_test_model("textbook") ecoli_model = cobra.test.create_test_model("ecoli") file.write(str(cobra_model.optimize())) file.write("\n") with cobra_model: cobra_model.reactions.PFK.knock_out() file.write(str(cobra_model.optimize())) file.write("\n") file.write(str(cobra_model.optimize())) file.write("\n") with cobra_model: cobra_model.genes.b1723.knock_out() file.write(str(cobra_model.optimize())) file.write("\n") cobra_model.genes.b3916.knock_out() file.write(str(cobra_model.optimize())) file.write("\n") deletion_results = single_gene_deletion(cobra_model) single_gene_deletion(cobra_model, cobra_model.genes[:20]) single_reaction_deletion(cobra_model, cobra_model.reactions[:20]) double_gene_deletion(cobra_model, cobra_model.genes[-5:], return_frame=True).round(4) start = time() double_gene_deletion(ecoli_model, ecoli_model.genes[:300], number_of_processes=2) t1 = time() - start file.write("Double gene deletions for 200 genes completed in " "%.2f sec with 2 cores" % t1) file.write("\n") start = time() double_gene_deletion(ecoli_model, ecoli_model.genes[:300], number_of_processes=1) t2 = time() - start file.write("Double gene deletions for 200 genes completed in " "%.2f sec with 1 core" % t2) file.write("\n") file.write("Speedup of %.2fx" % (t2 / t1)) file.write("\n") double_reaction_deletion(cobra_model, cobra_model.reactions[2:7], return_frame=True).round(4) file.close()
def eval_ind(individual, initial_pop, model, base_biomass, exp_ess, distance): # Set this as warning model.solver = 'gurobi' old_biomass = list(linear_reaction_coefficients(model).keys())[0] # index removed old_biomass.remove_from_model() # Make a biomass reaction and optimize for it biomass = Reaction('BIOMASS') model.add_reaction(biomass) index = initial_pop.index for i in range(len(index)): if individual[i] == 1: biomass.add_metabolites({initial_pop.index[i]: -0.1}) biomass.add_metabolites(base_biomass) biomass.objective_coefficient = 1. # Generate deletion results --> BOTTLENECK FOR SURE deletion_results = single_gene_deletion(model, model.genes, processes=1) # Filter the results to get a boolean result a = [(str(next(iter(i))), 1) for i in deletion_results[deletion_results['growth'] > 1e-3].index] b = [(str(next(iter(i))), 0) for i in deletion_results[deletion_results['growth'] <= 1e-3].index] c = a + b pred_ess = pd.DataFrame(c, columns=['Genes', 'Predicted_growth']) compare_df = pd.merge(right=exp_ess, left=pred_ess, on='Genes', how='inner') # Apply hamming distance u = np.array([f for f in compare_df.Measured_growth]) v = np.array([x for x in compare_df.Predicted_growth]) if distance == 'hd': dist = hamming(u, v) elif distance == 'mcc': dist = matthews_corrcoef(u, v) else: print('Error: Invalid distance metric') return dist, sum(individual)
def ensemble_single_gene_deletion(ensemble, num_models=None, specific_models=[], specific_genes=[]): ''' Performs single reaction deletions on models within an ensemble and returns the objective value after optimization with each reaction removed. Parameters ---------- ensemble: medusa.core.Ensemble The ensemble with which to perform reaction deletions num_models: int, optional Number of models for which reaction deletions will be performed. The number of models indicated will be randomly sampled and reaction deletions will be performed on the sampled models. If None, all models will be selected (default), or the models specified by specific_models will be selected. Cannot be passed concurrently with specific_models. specific_models: list of str, optional List of member.id corresponding to the models for which reaction deletions will be performed. If None, all models will be selected (default), or num_models will be randomly sampled and selected. Cannot be passed concurrently with num_models. specific_genes: list of str, optionsl List of gene.id corresponding to the genes for which deletions should be performed. If none, all genes will be selected (default). We recommend identifying genes that are essential in all ensemble members first, then excluding those genes from specific_genes. This will generally speed up computation. Returns ------- pandas.DataFrame A dataframe in which each row (index) represents a model within the ensemble, and each column represents a reaction for which values of objective when the reaction is deleted are returned. ''' if not num_models: num_models = len(ensemble.members) if specific_models: model_list = specific_models else: model_list = sample(ensemble.members, num_models) deletion_results = {} with ensemble.base_model: for model in model_list: print('performing deletions for ' + model.id) ensemble.set_state(model) deletion_result = single_gene_deletion(ensemble.base_model, specific_genes) deletion_results[model.id] = deletion_result return deletion_results
def results_genes_flux(model, rxn_name, flux_gene_off_on): ''' results_genes_flux utility: essential genes of model outputted nicely AND flux through the reactions being changed outputted Input: (1) model: model structure (2) rxn_name: string reaction name in list e.g. ['rxn00123','rxn00456',....] (3) flux_gene_off_on is list containing two binary values to turn off the reaction functionalities either [1,0] [0,0] or [0,1] first element is flux second is gene deletions is on Output: essential genes in pandas data arrary ''' #two models taken from the original try: if flux_gene_off_on[0] == 0: del_results = [] print('no gene del results returned') else: #Essential Genes Before - Biomass Results Before - Gene Knockouts print('Single Gene Deletion of Model Entered') model_genes = copy.deepcopy(model) del_results = single_gene_deletion(model_genes) if flux_gene_off_on[1] == 0: fluxes = [] print('No fluxes returned') else: model_flux = copy.deepcopy(model) #flux through reaction fluxes = [] loop_reactions = [] print('Rxn flux loop entered') for i in range(0, len(rxn_name)): loop_reactions.append( model_flux.reactions.get_by_id(rxn_name[i])) if i % 10 == 0: print('Flux reaction number', i) fluxes = (flux_variability_analysis(model_flux, reaction_list=loop_reactions, loopless=True) ) #range of fluxes through the reaction except: print('You must enter a cobra model structure') raise return [del_results, fluxes]
def test_single_gene_deletion_fba(self, model): growth_dict = { "b0008": 0.87, "b0114": 0.80, "b0116": 0.78, "b2276": 0.21, "b1779": 0.00 } rates, statuses = single_gene_deletion(model, gene_list=growth_dict.keys(), method="fba") for gene, expected_value in six.iteritems(growth_dict): assert statuses[gene] == 'optimal' assert abs(rates[gene] - expected_value) < 0.01
def list_excluded_reactions(model): """ Define a list of reactions that can be knocked-out. It excludes exchange reactions, reactions with no genes, essential reactions and reactions with essential genes. Objective function is set for biomass production to study if there is growth after single deletion strategy Input: model, cobrapy model structure Output: list of reactions to be knocked-out """ essential_reactions = [] essential_genes = [] possible_reactions = [] possible_reactionsygenes = [] null_genes = [] model.objective = 'EX_biomass' model.reactions.EX_glyc.lower_bound = -1 model.reactions.EX_glc.lower_bound = 0 # #model.reactions.get_by_id('EX_o2').lower_bound=0. # #smodel.reactions.get_by_id('EX_o2').upper_bound=0. # Calculating essential reactions reaction = single_reaction_deletion(model, model.reactions[0:]) for x in reaction: for i in range(len(reaction[x])): if reaction.growth[i] < 10E-06: essential_reactions.append(model.reactions[i]) gene = single_gene_deletion(model, model.genes[0:]) #Calculate essential genes for x in gene: for i in range(len(gene[x])): if gene.growth[i] < 10E-06: essential_genes.append(model.genes[i]) #print(essential_reactions,essential_genes) for i in range(len(model.reactions)): if model.reactions[i] not in essential_reactions: possible_reactions.append(model.reactions[i]) for i in range(len(possible_reactions)): if possible_reactions[i].genes != frozenset( []): #checks If there is no associated gene if possible_reactions[ i].genes not in essential_genes: # Exclude the essential genes possible_reactionsygenes.append(possible_reactions[i].id) return possible_reactionsygenes
def evaluate(self, model): """Use the defined parameters to predict single gene essentiality.""" with model: if self.medium is not None: self.medium.apply(model) if self.objective is not None: model.objective = self.objective model.add_cons_vars(self.constraints) essen = single_gene_deletion(model, gene_list=self.data["gene"], processes=1) essen["gene"] = [list(g)[0] for g in essen.index] essen["essential"] = (essen["growth"] < self.minimal_growth_rate) \ | essen["growth"].isna() return essen
def gene_deletion(self) -> pd.DataFrame: """Create pd.DataFrame with results of gene deletion. https://cobrapy.readthedocs.io/en/latest/deletions.html :return: pandas.DataFrame """ model = self.read_model() df = single_gene_deletion(model, model.genes) print(df) return pd.DataFrame({ "model": self.model_path.name, "objective": self.objective_id, "gene": [set(ids).pop() for ids in df.ids], "status": df.status, "value": df.growth, })
def evaluate(self, model): """Use the defined parameters to predict single gene essentiality.""" with model: if self.medium is not None: self.medium.apply(model) if self.objective is not None: model.objective = self.objective model.add_cons_vars(self.constraints) max_val = model.slim_optimize() essen = single_gene_deletion( model, gene_list=self.data["gene"], processes=1) essen["gene"] = [list(g)[0] for g in essen.index] essen.index = essen["gene"] essen["essential"] = (essen["growth"] < (max_val * 0.1)) \ | essen["growth"].isna() return essen
def _eval_metab(metab, model, exp_ess): """ This function is used to evaluate the fitness of each metabolite individually :param metab: :param model: :param exp_ess: :return: """ # Set this as warning model.solver = 'gurobi' old_biomass = list( linear_reaction_coefficients(model).keys())[0] # index removed old_biomass.remove_from_model() # Make a biomass reaction and optimize for it biomass = Reaction('BIOMASS') model.add_reaction(biomass) biomass.add_metabolites({model.metabolites.get_by_id(metab): -0.1}) model.reactions.BIOMASS.objective_coefficient = 1. # Generate deletion results --> BOTTLENECK FOR SURE deletion_results = single_gene_deletion(model, model.genes, processes=1) # Filter the results to get a boolean result a = [(str(next(iter(i))), 1) for i in deletion_results[deletion_results['growth'] > 1e-3].index] b = [(str(next(iter(i))), 0) for i in deletion_results[deletion_results['growth'] <= 1e-3].index] c = a + b pred_ess = pd.DataFrame(c, columns=['Genes', 'Predicted_growth']) compare_df = pd.merge(right=exp_ess, left=pred_ess, on='Genes', how='inner') # Apply mcc u = np.array([f for f in compare_df.Measured_growth]) v = np.array([x for x in compare_df.Predicted_growth]) return matthews_corrcoef(u, v)
def run_example_6_2(): model = cobra.test.create_test_model("textbook") single_deletion_results = single_gene_deletion( cobra_model=model, gene_list=[fba.ENO_gene, fba.PFK_B_gene]) print(single_deletion_results)
def ko_tfa(model): return single_gene_deletion(model)
import os import matplotlib.pyplot as plt #Cargando datos print('--------CARGANDO DATOS----------') sbml_fname = './Datos/models/Recon2.2.1_RPMI_trimed_gene_symbols.xml' reference_model = read_sbml_model(sbml_fname) df_ceres = pd.read_csv("./Datos/depmap/M19Q2_ceres_metabol_python.csv", sep = '\t') df_ceres = df_ceres.set_index('cell_line') # Identificando los genes esenciales, como los genes cuya deleccion individual no producen crecimiento (Ecuación biomasa < 0.01). print('------SINGLE GENE DELETION-----') lista_genes = reference_model.genes resultado_knocked_out = single_gene_deletion(reference_model, lista_genes) rename_dict = {i:list(i)[0] for i in resultado_knocked_out.index} #Renonbro la frozen set para sea una lista df_deletion_renamed = resultado_knocked_out.rename(rename_dict, axis=0) threshold = 0.01 * df_deletion_renamed.growth.max() #Este es el threshold para luego poder seleccionar los que esten por debajo de el. mask = df_deletion_renamed.growth < threshold essential = df_deletion_renamed.index[mask] mask2 = df_deletion_renamed.growth >= threshold non_essential = df_deletion_renamed.index[mask2] df_ceres2 = df_ceres.T essential_in_ceres = set(essential) & set(df_ceres2.index) non_essential_in_ceres = set(non_essential) & set(df_ceres2.index)
def run_single_gene_deletion(self, selected_solver): solution = single_gene_deletion(self.model) solution.method = "single_gene_deletion" self.add_solution(solution)
def essen_test(model_tb, dic_return, dataset_name, dataset_excel, growth_thresh_mult): model = model_tb fal_pos_dic, fal_neg_dic = {}, {} true_neg_dic, true_pos_dic = {}, {} growth_rates = single_gene_deletion(model) indexes = [] for x in growth_rates.index: indexes.append(list(i for i in x)[0]) growth_rates["genes"] = indexes orig_growth_thres = growth_thresh_mult * model.optimize().objective_value true_pos, true_neg, fal_pos, fal_neg = 0, 0, 0, 0 # set grif essen threshold -- iSM810 paper uses 0.1 as "confident essential" grif_thres = 0.1 for index, row in dataset_excel.iterrows(): if dataset_name == "griffin": gene = str(row["Locus"]) try: growth = growth_rates[growth_rates.genes.isin([gene ])].growth[0] try: # True Positive - predicts that it grows (not essential) and is correct. if float(row["p value"] ) > grif_thres and growth > orig_growth_thres: true_pos = true_pos + 1 true_pos_dic.update( {gene: [growth, float(row["p value"])]}) # False Positive - predicts that it grows (not essential) when it actually essential elif float(row["p value"] ) < grif_thres and growth > orig_growth_thres: fal_pos = fal_pos + 1 fal_pos_dic.update( {gene: [growth, float(row["p value"])]}) # True Negative - predicts that the gene is essential (no growth) and is correct elif float(row["p value"] ) < grif_thres and growth < orig_growth_thres: true_neg = true_neg + 1 true_neg_dic.update( {gene: [growth, float(row["p value"])]}) elif float(row["p value"] ) > grif_thres and growth < orig_growth_thres: fal_neg = fal_neg + 1 fal_neg_dic.update( {gene: [growth, float(row["p value"])]}) else: # Hay algunos que tiene growth = nan pass #print(str(gene)) except: pass # print ("Algo mas lo rompio") # print(gene) # print(row.get("p value")) # print(grif_thres) # print(growth) # print(orig_growth_thres) except: pass # print("Ese gen no esta en el modelo: "+str(gene)) elif dataset_name == "loerger": gene = str(row["ORF ID"]) """ ES being near 0 NE being near the mean GD approximately 1/10 the mean GA 5 times the mean """ try: growth = growth_rates[growth_rates.genes.isin([gene ])].growth[0] # growth = growth_rates.loc[gene, "flux"] try: # TP if (row["Final Call"] == "NE" or row["Final Call"] == "GA") and growth > orig_growth_thres: true_pos = true_pos + 1 true_pos_dic.update({gene: [growth]}) # FN elif (row["Final Call"] == "NE" or row["Final Call"] == "GA") and growth < orig_growth_thres: fal_neg = fal_neg + 1 fal_neg_dic.update({gene: [growth]}) # TN elif (row["Final Call"] == "ES" or row["Final Call"] == "ESD" or row["Final Call"] == "GD") and growth < orig_growth_thres: # if (row["Final Call"] == "ES") and growth < orig_growth_thres: true_neg = true_neg + 1 true_neg_dic.update({gene: [growth]}) # FP elif (row["Final Call"] == "ES" or row["Final Call"] == "ESD" or row["Final Call"] == "GD") and growth > orig_growth_thres: # if (row["Final Call"] == "ES") and growth > orig_growth_thres: fal_pos = fal_pos + 1 fal_pos_dic.update({gene: [growth]}) else: # Hay algunos que tiene growth = nan pass # print(str(gene)) except: pass # print("algo mas rompio a loerger") except: pass # print("Este gen no esta en el mdoelo: "+str(gene)) # ---Analyze and Print results --- print("TP - TN - FP - FN") print(true_pos, true_neg, fal_pos, fal_neg) # percent of correct predictions perc_correct = (true_pos + true_neg) / (true_pos + true_neg + fal_pos + float(fal_neg)) print("percent correct: ", perc_correct) # mcc calculation MCC_root = math.sqrt((true_pos + fal_pos) * (true_pos + fal_neg) * (true_neg + fal_pos) * (true_neg + fal_neg)) MCC = (true_pos * true_neg - fal_pos * fal_neg) / MCC_root print("Matthew Correlation Coefficient", MCC) if dic_return == "Yes": return fal_neg_dic, fal_pos_dic elif dic_return == "Yes both": return fal_neg_dic, fal_pos_dic, true_neg_dic, true_pos_dic
print('------RECONSTRUYENDO EL MODELO-----') # Reconstruyes el modelo celula_especifico CORDA_builder = CORDA(reference_model, rxns_conf) CORDA_builder.build() csm2 = CORDA_builder.cobra_model() write_sbml_model( csm2, './thresholding/Th_upper_y_lower/' + cell_line + '/csm2_%s_%s.sbml' % (th_l, th_u)) print('Numero de reacciones de modelo reconstruido:', str(len(csm2.reactions))) # Identificando los genes esenciales, como los genes cuya deleccion individual no producen crecimiento (Ecuación biomasa < 0.01). print('------SINGLE GENE DELETION-----') resultado_knocked_out = single_gene_deletion( csm2, proprocesses=1) rename_dict = { i: list(i)[0] for i in resultado_knocked_out.index } print('Seleccionando los genes esenciales y no esenciales') df_deletion_renamed = resultado_knocked_out.rename( rename_dict, axis=0) threshold = 0.01 * df_deletion_renamed.growth.max() mask = df_deletion_renamed.growth < threshold essential = df_deletion_renamed.index[mask] mask = df_deletion_renamed.growth >= threshold non_essential = df_deletion_renamed.index[mask]