def main():
    try:
        search_dir = argv[1]
        model_name = argv[2]
    except:
        raise Exception('Not enough arguments.')

    model = load_model(model_name)
    ids = [id_for_new_id_style(x.id) for x in model.reactions]
    
    scores = []; size=len(listdir(search_dir))
    for i, path in enumerate(listdir(search_dir)):

        # progress
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("%d / %d" % (i + 1, size))
        sys.stdout.flush()
                        
        if path.endswith('.gz'):
            f = gzip.open(join(search_dir, path), 'r')
        else:
            f = open(join(search_dir, path), 'r')
        # (1) Compare the metabolite count 
        m = json.load(f)
        try:
            met_count = len(m['MAPNODE'])
            reaction_count = len(m['MAPREACTION'])
            # diff = abs(len(m['MAPNODE']) - metabolite_count)
        except KeyError:
            continue
        # (2) Compare the reaction ids to the cobra model
        # f.seek(0)
        num_matches = 0
        try:
            reactions = m['MAPREACTION']
        except KeyError:
            continue
        for reaction in reactions:
            try:
                an_id = reaction['REACTIONABBREVATION'] 
            except KeyError:
                continue
            if id_for_new_id_style(an_id) in ids:
                num_matches = num_matches + 1
        # quit if not > 90%
        frac = 0.9
        if num_matches < frac*len(reactions): continue
        scores.append((join(search_dir, path),
                       float(num_matches) / len(reactions),
                       met_count, reaction_count))
        f.close()
    scores = sorted(scores, key=itemgetter(2), reverse=True)
    scores = sorted(scores, key=itemgetter(1))
    outfile = '%s_maps.tsv' % model_name
    print
    print 'saving to %s' % outfile
    (pd
     .DataFrame(scores, columns=['path', 'score', 'n_metabolites', 'n_reactions'])
     .to_csv(outfile, sep='\t'))
def main():
    """Load an old Escher map, and generate a validated map.

    """
    try:
        in_file = sys.argv[1]
        model_path = sys.argv[2]
    except IndexError:
        raise Exception("Usage: python convert_map.py old_map.json path/to/model.sbml")
   
    # get the cobra model
    try:
        model = load_model(model_path)
    except Exception:
        try:
            model = cobra.io.load_json_model(model_path)
        except (IOError, ValueError):
            try:
                model = cobra.io.read_sbml_model(model_path)
            except IOError:
                raise Exception('Could not find model in theseus or filesystem: %s' % model_path)

    # get the current map
    with open(in_file, 'r') as f:
        out = json.load(f)

    the_map = convert(out, model)
    
    # don't replace the file
    out_file = in_file.replace('.json', '_converted.json')
    print('Saving validated map to %s' % out_file)
    with open(out_file, 'w') as f:
        json.dump(the_map, f, allow_nan=False)
示例#3
0
文件: main.py 项目: pvilaca/hindsight
def download_or_load_model_me_placeholder(name):
    if name == 'ME':
        return 'placeholder'
    elif name in private_models:
        return load_model(name)
    else:
        return download_model(name, host='http://bigg.ucsd.edu/api/v2/')
示例#4
0
文件: main.py 项目: pvilaca/hindsight
def setup_for_series(series, loaded_models, use_greedy_knockouts):
    """Get a SimulationSetup for the series."""
    # copy the model
    model_id = series['model']
    if model_id == 'ME':
        # this is necessary because I can't copy ME models right now
        model = load_model(model_id)
    else:
        model = loaded_models[model_id].copy()

    # get the substrates and supplements
    substrate_exchanges, supplement_exchanges = exchange_for_metabolite_name(
        series['substrate'])

    # aerobicity
    aerobic = series['aerobicity'].strip().lower() == 'aerobic'

    # heterologous pathway
    additions = series['additions']
    heterologous_pathway = None if additions.strip() == '' else additions

    # knockouts
    gene_knockouts = series['deletions_b']

    # target_exchange
    try:
        target_exchange = exchange_for_metabolite_name(series['target'])[0][0]
    except NotFoundError:
        return error_series(series, 'Bad target name: %s' % target)

    # other bounds
    other_bounds = {}
    if target_exchange == 'EX_h2_e':
        if 'FHL' in model.reactions:
            other_bounds['FHL'] = (0, 1000)

    # environment is generic for any model
    environment = Environment(substrate_exchanges, supplement_exchanges,
                              aerobic, other_bounds)
    design = Design(heterologous_pathway, gene_knockouts, target_exchange)
    return SimulationSetup(model, environment, design, use_greedy_knockouts)
示例#5
0
def main():
    """Load an old Escher map, and generate a validated map.

    """
    try:
        in_file = sys.argv[1]
        model_path = sys.argv[2]
    except IndexError:
        raise Exception(
            "Usage: python convert_map.py old_map.json path/to/model.sbml")

    # get the cobra model
    try:
        model = load_model(model_path)
    except Exception:
        try:
            model = cobra.io.load_json_model(model_path)
        except (IOError, ValueError):
            try:
                model = cobra.io.read_sbml_model(model_path)
            except IOError:
                raise Exception(
                    'Could not find model in theseus or filesystem: %s' %
                    model_path)

    # get the current map
    with open(in_file, 'r') as f:
        out = json.load(f)

    the_map = convert(out, model)

    # don't replace the file
    out_file = in_file.replace('.json', '_converted.json')
    print('Saving validated map to %s' % out_file)
    with open(out_file, 'w') as f:
        json.dump(the_map, f, allow_nan=False)
示例#6
0
def me_model():
    return load_model('ME')
示例#7
0
def save_map(filename, out_directory, model_name):

    if filename.endswith('.json.gz'):
        with gzip.open(filename, "r") as f:
            data = json.load(f)
        out_file = join(out_directory,
                        basename(filename).replace('.json.gz', '_map.json'))
    elif filename.endswith('.json'):
        with open(filename, "r") as f:
            data = json.load(f)
        out_file = join(out_directory,
                        basename(filename).replace('.json', '_map.json'))
    else:
        logging.warn('Not loading file %s' % filename)

    # get the cobra model
    model = load_model(model_name)

    # get the compartment dictionary
    df = pd.DataFrame.from_csv("compartment_id_key.csv")
    compartment_id_dictionary = {}
    for row in df.itertuples(index=True):
        compartment_id_dictionary[row[0]] = row[1:3]

    # major categories
    reactions = []
    line_segments = []
    text_labels = []
    nodes = []
    for k, v in data.iteritems():
        if k == "MAPREACTION": reactions = v
        elif k == "MAPLINESEGMENT": segments = v
        elif k == "MAPTEXT": text_labels = v
        elif k == "MAPNODE": nodes = v
        else: raise Exception('Unrecognized category: %s' % k)

    # do the nodes
    nodes = parse_nodes(nodes, compartment_id_dictionary)

    # do the segments
    parse_segments(segments, reactions, nodes)

    # do the reactions
    reactions = parse_reactions(reactions, model, nodes)

    # do the text labels
    text_labels = parse_labels(text_labels)

    # compile the data
    out = {}
    out['nodes'] = nodes
    out['reactions'] = reactions
    out['text_labels'] = text_labels

    # translate everything so x > 0 and y > 0
    # out = translate_everything(out)

    # for export, only keep the necessary stuff
    node_keys_to_keep = [
        'node_type', 'x', 'y', 'name', 'bigg_id', 'label_x', 'label_y',
        'node_is_primary', 'connected_segments'
    ]
    segment_keys_to_keep = ['from_node_id', 'to_node_id', 'b1', 'b2']
    reaction_keys_to_keep = [
        'segments', 'name', 'reversibility', 'bigg_id', 'label_x', 'label_y',
        'metabolites', 'gene_reaction_rule'
    ]
    text_label_keys_to_keep = ['x', 'y', 'text']
    for k, node in out['nodes'].iteritems():
        only_keep_keys(node, node_keys_to_keep)
    for k, reaction in out['reactions'].iteritems():
        if 'segments' not in reaction: continue
        for k, segment in reaction['segments'].iteritems():
            only_keep_keys(segment, segment_keys_to_keep)
        only_keep_keys(reaction, reaction_keys_to_keep)
    for k, text_label in out['text_labels'].iteritems():
        only_keep_keys(text_label, text_label_keys_to_keep)

    # get max width and height
    min_max = {'x': [inf, -inf], 'y': [inf, -inf]}
    for node in nodes.itervalues():
        if node['x'] < min_max['x'][0]: min_max['x'][0] = node['x']
        if node['x'] > min_max['x'][1]: min_max['x'][1] = node['x']
        if node['y'] < min_max['y'][0]: min_max['y'][0] = node['y']
        if node['y'] > min_max['y'][1]: min_max['y'][1] = node['y']
    width = min_max['x'][1] - min_max['x'][0]
    height = min_max['y'][1] - min_max['y'][0]
    out['canvas'] = {
        'x': min_max['x'][0] - 0.05 * width,
        'y': min_max['y'][0] - 0.05 * height,
        'width': width + 0.10 * width,
        'height': height + 0.10 * height
    }

    header = {
        "schema":
        "https://zakandrewking.github.io/escher/escher/jsonschema/1-0-0#",
        "homepage": "https://zakandrewking.github.io/escher",
        "map_id": basename(filename).replace('.json', '').replace('.gz', ''),
        "map_name": "",
        "map_description": ""
    }

    the_map = [header, out]

    from escher.convert_map import convert
    the_map = convert(the_map, model)

    with open(out_file, 'w') as f:
        json.dump(the_map, f, allow_nan=False)
示例#8
0
def secretions_for_knockouts(setup,
                             knockouts=[],
                             max_depth=1000,
                             depth=0,
                             ignore_exchanges=[],
                             return_if_found=None,
                             raise_if_found=None,
                             growth_cutoff=min_biomass,
                             flux_cutoff=1.0):
    """Accepts a SimulationSetup and a set of knockouts.

    Returns a tree of secretions using nested dictionaries.

    Arguments
    ---------

    setup: SimulationSetup.

    knockouts: A list of reaction IDs to knock out.

    max_depth: The maximum depth to search.

    depth: The current depth.

    ignore_exchanges: Exchanges to not knock out.

    raise_if_found: A reaction ID that, if found, will raise FoundReaction exception.

    return_if_found: A reaction ID that, if found, will return True as the first
    return value.

    growth_cutoff: Below this growth rate, the simulation is considered lethal.

    flux_cutoff: The minimum flux required to raise for return_if_found.

    """
    # check depth
    if depth > max_depth:
        print('Max depth')
        return False, 'MAX_DEPTH'
    if depth >= 20 and depth % 10 == 0:
        print(depth)

    # always copy the model
    model = setup.model
    if model.id == 'ME':
        model = load_model(model.id)
    else:
        model = model.copy()

    # copy environment for changes, knock out the reactions by adding them to
    # other_bounds
    environment = Environment(
        setup.environment.substrate_exchanges,
        setup.environment.supplement_exchanges, setup.environment.aerobic,
        dict({ko: (0, 0)
              for ko in knockouts}, **setup.environment.other_bounds))

    # set up model. Have to do this every time because the ME model cannot be
    # copied
    model = apply_design(model, setup.design, setup.use_greedy_knockouts)
    model = apply_environment(model, environment)

    # solve the problem
    sol = me_optimize_growth(model) if model.id == 'ME' else model.optimize()

    if sol.f is None or sol.f <= growth_cutoff:
        return False, None
    else:
        secretion = dict(get_secretion(model, sol.x_dict, sort=False))
        if raise_if_found is not None and raise_if_found in secretion and secretion[
                raise_if_found] > flux_cutoff:
            raise FoundReaction(str(secretion))
        elif return_if_found is not None and return_if_found in secretion and secretion[
                return_if_found] > flux_cutoff:
            can_secrete = True
            children = None
        else:
            children_raw = {
                new_knockout:
                secretions_for_knockouts(setup,
                                         knockouts=knockouts + [new_knockout],
                                         max_depth=max_depth,
                                         depth=depth + 1,
                                         ignore_exchanges=ignore_exchanges,
                                         return_if_found=return_if_found,
                                         raise_if_found=raise_if_found,
                                         growth_cutoff=growth_cutoff,
                                         flux_cutoff=flux_cutoff)
                for new_knockout, flux in secretion.iteritems()
                if new_knockout not in ignore_exchanges and flux > flux_cutoff
            }
            can_secrete = any(v[0] for v in children_raw.itervalues())
            children = {k: v[1] for k, v in children_raw.iteritems()}
        return can_secrete, {
            'knockouts': knockouts,
            'growth_rate': sol.f,
            'secretion': secretion,
            'children': children,
        }
示例#9
0
from theseus import load_model
import math
import cobra.io
import random
import json
import pandas as pd

# model
ijo = load_model('iJO1366')
ijo.genes.get_by_id('b1779').name = 'gapA'
cobra.io.save_json_model(ijo, 'iJO1366.json')

# gene data
gene_data = {gene.id: random.random() * 20 for gene in ijo.genes}
with open('gene_data_iJO1366.json', 'w') as f:
    json.dump(gene_data, f)

(pd.DataFrame.from_records(gene_data.items(), columns=['gene', 'value'])
 .to_csv('gene_data_iJO1366.csv', index=None))

# reaction text data
reaction_text_data = {reaction.id: reaction.build_reaction_string() for reaction in ijo.reactions}

(pd.DataFrame.from_records(reaction_text_data.items(), columns=['reaction', 'value'])
 .to_csv('reaction_text_data_iJO1366.csv', index=None))

# convert RNA-seq data to normalize data that looks like array data
with open('aerobic_anaerobic_E_coli_RNA-seq.json', 'r') as f:
    gene_comparison = json.load(f)
all_vals = gene_comparison[0].values() + gene_comparison[1].values()
# log values
示例#10
0
def main():
    try:
        search_dir = argv[1]
        model_name = argv[2]
    except:
        raise Exception('Not enough arguments.')

    model = load_model(model_name)
    ids = [id_for_new_id_style(x.id) for x in model.reactions]

    scores = []
    size = len(listdir(search_dir))
    for i, path in enumerate(listdir(search_dir)):

        # progress
        sys.stdout.write('\r')
        # the exact output you're looking for:
        sys.stdout.write("%d / %d" % (i + 1, size))
        sys.stdout.flush()

        if path.endswith('.gz'):
            f = gzip.open(join(search_dir, path), 'r')
        else:
            f = open(join(search_dir, path), 'r')
        # (1) Compare the metabolite count
        m = json.load(f)
        try:
            met_count = len(m['MAPNODE'])
            reaction_count = len(m['MAPREACTION'])
            # diff = abs(len(m['MAPNODE']) - metabolite_count)
        except KeyError:
            continue
        # (2) Compare the reaction ids to the cobra model
        # f.seek(0)
        num_matches = 0
        try:
            reactions = m['MAPREACTION']
        except KeyError:
            continue
        for reaction in reactions:
            try:
                an_id = reaction['REACTIONABBREVATION']
            except KeyError:
                continue
            if id_for_new_id_style(an_id) in ids:
                num_matches = num_matches + 1
        # quit if not > 90%
        frac = 0.9
        if num_matches < frac * len(reactions): continue
        scores.append((join(search_dir,
                            path), float(num_matches) / len(reactions),
                       met_count, reaction_count))
        f.close()
    scores = sorted(scores, key=itemgetter(2), reverse=True)
    scores = sorted(scores, key=itemgetter(1))
    outfile = '%s_maps.tsv' % model_name
    print
    print 'saving to %s' % outfile
    (pd.DataFrame(scores,
                  columns=['path', 'score', 'n_metabolites',
                           'n_reactions']).to_csv(outfile, sep='\t'))
示例#11
0
                                                         names=['Paper', 'Model']))

# propogate info
cols_to_propagate = ['year', 'target_exchange', 'Deletions', 'Aerobicity',
                     'Additions', 'Target', 'c_byproduct_order', 'Evolved', 'citation_key',
                     'substrate_exchange', 'PMID', 'authors', 'title', 'strategies', 'In silico prediction',
                     'Native?', 'Parent strain', 'Substrate']
all_sims3[cols_to_propagate] = all_sims3[cols_to_propagate].groupby(level='Paper').fillna(method='backfill')
all_sims3[cols_to_propagate] = all_sims3[cols_to_propagate].groupby(level='Paper').fillna(method='pad')
# add year to index
all_sims3 = all_sims3.set_index('year', append=True)
all_sims3 = all_sims3.sort_index()

### Temporary solution to Single_Exchange_FVA bug

hete_model = add_all_heterologous_pathways(load_model('iJO1366'))

# year being NaN causes trouble for idx'ing
all_sims = all_sims3.reset_index(level='year')
all_sims.loc[idx[:, 'ME'], 'min'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_secretion, axis=1)
all_sims.loc[idx[:, 'ME'], 'max'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_secretion, axis=1)
all_sims.loc[idx[:, 'ME'], 'yield_min'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_yield, axis=1, t='min')
all_sims.loc[idx[:, 'ME'], 'yield_max'] = all_sims.loc[idx[:, 'ME'], :].apply(get_batch_target_yield, axis=1, t='max')
all_sims = all_sims.set_index('year', append=True)
all_sims = all_sims.sort_index()

## Check for sims where iJO grows and ME dies (and vice versa)

ijo_grows_vs_me = (all_sims
 .sort_index()
 .loc[idx[:, ['iJO1366', 'ME'], :], :]
def save_map(filename, out_directory, model_name):
    
    if filename.endswith('.json.gz'):
        with gzip.open(filename, "r") as f:
            data = json.load(f)
        out_file = join(out_directory, basename(filename).replace('.json.gz', '_map.json'))
    elif filename.endswith('.json'):
        with open(filename, "r") as f:
            data = json.load(f)
        out_file = join(out_directory, basename(filename).replace('.json', '_map.json'))
    else:
        logging.warn('Not loading file %s' % filename)

    # get the cobra model
    model = load_model(model_name)

    # get the compartment dictionary
    df = pd.DataFrame.from_csv("compartment_id_key.csv")
    compartment_id_dictionary = {}
    for row in df.itertuples(index=True):
        compartment_id_dictionary[row[0]] = row[1:3]
        
    # major categories
    reactions = []; line_segments = []; text_labels = []; nodes = []
    for k, v in data.iteritems():
        if k=="MAPREACTION": reactions = v
        elif k=="MAPLINESEGMENT": segments = v
        elif k=="MAPTEXT": text_labels = v
        elif k=="MAPNODE": nodes = v
        else: raise Exception('Unrecognized category: %s' % k)
        
    # do the nodes
    nodes = parse_nodes(nodes, compartment_id_dictionary)

    # do the segments
    parse_segments(segments, reactions, nodes)
        
    # do the reactions
    reactions = parse_reactions(reactions, model, nodes)

    # do the text labels
    text_labels = parse_labels(text_labels)

    # compile the data
    out = {}
    out['nodes'] = nodes
    out['reactions'] = reactions
    out['text_labels'] = text_labels

    # translate everything so x > 0 and y > 0
    # out = translate_everything(out)
    
    # for export, only keep the necessary stuff
    node_keys_to_keep = ['node_type', 'x', 'y', 'name', 'bigg_id', 'label_x',
                         'label_y', 'node_is_primary', 'connected_segments']
    segment_keys_to_keep = ['from_node_id', 'to_node_id', 'b1', 'b2']
    reaction_keys_to_keep = ['segments', 'name', 'reversibility',
                             'bigg_id', 'label_x', 'label_y', 'metabolites',
                             'gene_reaction_rule']
    text_label_keys_to_keep = ['x', 'y', 'text']    
    for k, node in out['nodes'].iteritems():
            only_keep_keys(node, node_keys_to_keep)
    for k, reaction in out['reactions'].iteritems():
        if 'segments' not in reaction: continue
        for k, segment in reaction['segments'].iteritems():
            only_keep_keys(segment, segment_keys_to_keep)
        only_keep_keys(reaction, reaction_keys_to_keep)
    for k, text_label in out['text_labels'].iteritems():
        only_keep_keys(text_label, text_label_keys_to_keep)

    # get max width and height
    min_max = {'x': [inf, -inf], 'y': [inf, -inf]}
    for node in nodes.itervalues():
        if node['x'] < min_max['x'][0]: min_max['x'][0] = node['x']
        if node['x'] > min_max['x'][1]: min_max['x'][1] = node['x']
        if node['y'] < min_max['y'][0]: min_max['y'][0] = node['y']
        if node['y'] > min_max['y'][1]: min_max['y'][1] = node['y']
    width = min_max['x'][1] - min_max['x'][0]
    height = min_max['y'][1] - min_max['y'][0]
    out['canvas'] = { 'x': min_max['x'][0] - 0.05 * width,
                      'y': min_max['y'][0] - 0.05 * height,
                      'width': width + 0.10 * width,
                      'height': height + 0.10 * height}

    header = {
        "schema": "https://zakandrewking.github.io/escher/escher/jsonschema/1-0-0#",
        "homepage": "https://zakandrewking.github.io/escher",
        "map_id": basename(filename).replace('.json', '').replace('.gz', ''),
        "map_name": "",
        "map_description": ""
        }

    the_map = [header, out]
        
    from escher.convert_map import convert
    the_map = convert(the_map, model)
    
    with open(out_file, 'w') as f: json.dump(the_map, f, allow_nan=False)