def intersect_directly(R, internal_metabolites, network, verbose=True, tol=1e-12, sort_order='min_adj', intermediate_cone_path='', manual_override = ''): """ :param R: :param internal_metabolites: :param network: :param verbose: :param tol: :param sort_order: Different options for determining metabolite intersection order. As a default we will intersect the metabolite that adds the minimal number of adjacencies in the model. Other options are 'min_lp', 'max_lp_per_adj', and 'min_connections'. :return: """ if intermediate_cone_path: R, internal_metabolites, network = pick_up_intermediate_cone(internal_metabolites, network, intermediate_cone_path) # rows are metabolites deleted = np.array([]) it = 1 internal = list(internal_metabolites) internal.sort() rows_removed_redund = 0 while len(internal) > 0: sorting = sort_order # For each internal metabolite, calculate the number of producing reactions times the number of consuming # R[j-len(deleted[deleted<j]) is the current row for the metabolite that was once at the j-th place n_lps = [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0) for j in internal] # If there is a metabolite that can be deleted without any lps being done, we will do it immediately if np.min(n_lps) == 0: sorting = 'min_lp' if manual_override and (it == 1): i = internal[int(manual_override)] elif sorting == 'min_lp': i = internal[np.argmin(n_lps)] elif sorting == 'min_connections': # Alternative way of choosing metabolite, choose the one that is minimally connected connections = [] adj = get_metabolite_adjacency(R) for met in internal: curr_ind = met - len(deleted[deleted < met]) connections.append(int(np.sum(adj[:, curr_ind]))) min_connect_inds = np.array(internal)[np.where(connections == np.min(connections))[0]] # Pick the one with least LPs to be done, if equally connected i = min_connect_inds[np.argmin( [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0) for j in min_connect_inds])] elif sorting == 'min_adj' or sorting == 'max_lp_per_adj': # Alternative way of choosing metabolite, choose the one that increases adjacencies the least adj_added = [] # This will contain for each metabolite the number connections between metabs removal adds adj = get_metabolite_adjacency(R) old_n_adjs = np.sum(adj) for met in internal: new_adj = adj.copy() curr_ind = met - len(deleted[deleted < met]) new_adj[np.where(adj[:, curr_ind] != 0), :] += new_adj[curr_ind, :] np.fill_diagonal(new_adj, 0) new_adj = np.minimum(new_adj, 1) new_adj = np.delete(np.delete(new_adj, curr_ind, axis=0), curr_ind, axis=1) new_n_adjs = np.sum(new_adj) adj_added.append(int(new_n_adjs - old_n_adjs)) if sorting == 'min_adj': min_adj_inds = np.array(internal)[np.where(adj_added == np.min(adj_added))[0]] # Pick the one with least LPs to be done, if adding equal adjacencies i = min_adj_inds[np.argmin( [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum( R[j - len(deleted[deleted < j]), :] < 0) for j in min_adj_inds])] elif sorting == 'max_lp_per_adj': lp_per_adj = np.array( [np.sum(R[j - len(deleted[deleted < j]), :] > 0) * np.sum(R[j - len(deleted[deleted < j]), :] < 0) for j in internal]) / (np.array(adj_added) - np.min(adj_added) + 1) i = internal[np.argmax(lp_per_adj)] # i - len(deleted[deleted<i] is the current row for the metabolite that was once at the ith place to_remove = i - len(deleted[deleted < i]) if verbose: mp_print("\n\nIteration %d (internal metabolite = %d: %s) of %d" % ( it, to_remove, [m.id for m in network.metabolites][to_remove], len(internal_metabolites))) mp_print("Possible LP amounts for this step:\n" + ", ".join(np.array(n_lps).astype(str))) mp_print("Total: %d" % sum(n_lps)) if manual_override and (it == 1): mp_print("Sorting was manually chosen for this first step.\n") elif sorting == 'min_adj': mp_print("Possible adjacencies added for this step:\n" + ", ".join(np.array(adj_added).astype(str))) mp_print("Minimal adjacency option chosen.\n") elif sorting == 'max_lp_per_adj': mp_print("Possible lps per adjacency added for this step:\n" + ", ".join( np.round(np.array(lp_per_adj), 2).astype(str))) mp_print("Rescaled maximal LPs per added adjacency option chosen.\n") elif sorting == 'min_connections': mp_print("Possible connectedness of metabolites for this sstep:\n" + ", ".join( np.array(connections).astype(str))) mp_print("Minimally connected option chosen.\n") elif sorting == 'min_lp': mp_print("Minimal LPs chosen.\n") it += 1 # input("waiting") if np.sum(R[i - len(deleted[deleted < i]), :] > 0) * np.sum(R[i - len(deleted[deleted < i]), :] < 0) == 0: R = iteration_without_lps(R, to_remove, network) else: R, removed = eliminate_metabolite(R, to_remove, network, calculate_adjacency=True) rows_removed_redund += removed deleted = np.append(deleted, i) internal.remove(i) if get_process_rank() == 0: try: metab_ids = [metab.id for metab in network.metabolites] np.savetxt('intermediate_conversion_cone.csv', np.transpose(R), delimiter=',', header=','.join(metab_ids), comments='') except OverflowError: mp_print('Intermediate result cannot be stored due to too large numbers.') # remove artificial rays introduced by splitting metabolites R, ids = unsplit_metabolites(R, network) if verbose: mp_print("\n\tRows removed by redund overall: %d\n" % rows_removed_redund) if rows_removed_redund != 0: pass # input("Waiting...") return R, ids
verbose=args.verbose, only_rays=args.only_rays, redund_after_polco=args.redund_after_polco) # if external_cycles: # T_intersected = np.transpose(cone) # external_cycles_array = to_fractions(np.zeros((T_intersected.shape[0], len(external_cycles)))) # for ind, cycle in enumerate(external_cycles): # for cycle_metab in cycle: # metab_ind = [ind for ind, metab in enumerate(ids) if metab == cycle_metab][0] # external_cycles_array[metab_ind, ind] = cycle[cycle_metab] # # T_intersected = np.concatenate((T_intersected, external_cycles_array, -external_cycles_array), axis=1) # cone = np.transpose(T_intersected) cone_transpose, ids = unsplit_metabolites(np.transpose(cone), network) cone = np.transpose(cone_transpose) # internal_ids = [] for metab in network.metabolites: if not metab.is_external: id_ind = [ind for ind, id in enumerate(ids) if id == metab.id] if len(id_ind): internal_ids.append(id_ind[0]) ids = list(np.delete(ids, internal_ids)) cone = np.delete(cone, internal_ids, axis=1) if mpi_wrapper.is_first_process(): try: np.savetxt(args.out_path,
def calc_ECMs(file_path, print_results=False, input_file_path=''): """ Calculates ECMs using ECMtool :return ecms: np.array This array contains the ECMs as columns and the metabolites as rows :param file_path: string String with path to the SBML-file. :param reactions_to_tag: list with strings List with reaction-IDs of reactions that need to be tagged :param print_results: Boolean :param hide_metabs: indices of metabolites that should be ignored """ # Stap 1: netwerk bouwen network = extract_sbml_stoichiometry(file_path, determine_inputs_outputs=True) external_inds = [ind for ind, metab in enumerate(network.metabolites) if metab.is_external] """The following are just for checking the inputs to this program.""" metab_info_ext = [(ind, metab.id, metab.name, metab.direction) for ind, metab in enumerate(network.metabolites) if metab.is_external] """I extract some information about the external metabolites for checking""" metab_info_ext_df = pd.DataFrame(metab_info_ext, columns=['metab_ind', 'metab_id', 'metab_name', 'Direction']) """You can choose to save this information, by uncommenting this line""" # metab_info_ext_df.to_csv(path_or_buf='external_info_iJR904.csv', index=False) """If an input file is supplied, we set in input, output, and hide metabolites from this""" if input_file_path: # If no input file is supplied, standard detection of ecmtool is used info_metabs_df = pd.read_csv(input_file_path) info_metabs_input = info_metabs_df[info_metabs_df.Input == 1] info_metabs_output = info_metabs_df[info_metabs_df.Output == 1] info_metabs_hidden = info_metabs_df[info_metabs_df.Hidden == 1] # Get the indices that correspond to the metabolites that are inputs, outputs, or hidden. input_inds = list(info_metabs_input.Index.values) output_inds = list(info_metabs_output.Index.values) + [ind for ind, metab in enumerate(network.metabolites) if metab.id == 'objective'] hide_inds = list(info_metabs_hidden.Index.values) prohibit_inds = [ind for ind, metab in enumerate(network.metabolites) if (metab.is_external) & (not ind in input_inds + output_inds + hide_inds) & ( not metab.id == 'objective')] both_inds = [ind for ind in range(len(network.metabolites)) if (ind in input_inds) and (ind in output_inds)] # Use input information to set input, output, hidden, and prohibited metabolites network.set_inputs(input_inds) network.set_outputs(output_inds) network.set_both(both_inds) network.prohibit(prohibit_inds) network.hide(hide_inds) # Print comma-separated lists of input information. These lists can be used for running the same computation # via command line, for example to use other arguments print(','.join(map(str, input_inds))) print(','.join(map(str, output_inds))) print(','.join(map(str, hide_inds))) print(','.join(map(str, prohibit_inds))) """Keep a copy of the full network before compression. This can be nice for later.""" full_network = copy.deepcopy(network) orig_N = network.N """"Split in and out metabolites, to facilitate ECM computation""" network.split_in_out(only_rays=False) """Stap 2: compress network""" network.compress(verbose=True) """Stap 3: Ecms enumereren""" # In this script, indirect intersection is used. Use command line options to use direct intersection cone = get_conversion_cone(network.N, network.external_metabolite_indices(), network.reversible_reaction_indices(), network.input_metabolite_indices(), network.output_metabolite_indices(), only_rays=False, verbose=True) cone_transpose, ids = unsplit_metabolites(np.transpose(cone), network) cone = np.transpose(cone_transpose) if print_results: print_ecms_direct(np.transpose(cone), ids) cone = cone.transpose() # columns will be the different ECMs, rows are metabolites return cone, ids, full_network