Пример #1
0
    def output(self, outputfile):
        #this creates PGDB object associated with meta(MetaCyc)
        meta = pythoncyc.select_organism('meta')
        #prints pathways of compound specified
        #print meta.pathways_of_compound('1,2-dilinoleoyl-sn-glycero-3-phosphocholine')
        #print meta.pathways_of_compound('sucrose')

        mapping = dict()
        for pway in meta.all_pathways():
            print("PATHWAY: ", pway)
            compounds = meta.compounds_of_pathway(pway)
            for compound in compounds:
                if (compound not in mapping):
                    myframe = PFrame(compound, meta, getFrameData=True)
                    #print(myframe.__dict__)
                    #print('|HMDB|' in myframe.__dict__['dblinks'])
                    #x = input()
                    if ('dblinks' in myframe.__dict__
                            and '|HMDB|' in myframe.__dict__['dblinks']):
                        mapping[compound] = myframe.__dict__['dblinks'][
                            '|HMDB|'][0]

        print("WRITING FILE...")
        outfile = open(outputfile, 'w')
        for compound in mapping.keys():
            outfile.write(compound + "\t" + mapping[compound] + "\n")
def connect_to_ptools(ptools_org):
    try:
        cyc = pythoncyc.select_organism(ptools_org)
    except:
        print "Error: Could not connect to ptools"
        exit()
    return cyc
def main():
   (opts, args) = parser.parse_args()
   check_arguments(opts)
   # print available organisms
   if opts.list_organisms:
       print "Listing Availalbe PGDBs:"
       org_list = pythoncyc.all_orgids()
       org_list = map(clean_ptools_output, org_list)
       for organism in org_list:
           print organism
       exit()
   
   # connect to ePGDB
   global cyc
   try:
       cyc = pythoncyc.select_organism(opts.ptools_org)
   except:
       print "Could not connect to Pathway Tools. Run pathway-tools/pathway-tools -lisp -python-local-only"
       exit()
   
   # create compatible pathway file for MicroDex
   if opts.micro_dex:
       create_micro_dex_file(cyc, opts.output_file)
       exit()
   
   if os.path.exists("metabolite_count.pk"):
       fh = open("metabolite_count.pk", 'r')
       metabolite_count = pickle.load(fh)
       fh.close()
       sorted_metabolite_count = sorted(metabolite_count.items(), key=operator.itemgetter(1), reverse=True)
       for metabolite in sorted_metabolite_count[0:500]:
           if metabolite[1] >= 1000:
               excluded_substrates[metabolite[0]] = metabolite[1]
       
   print "Constructing Graph"
   construct_graph(cyc)
   print len(edges), "edges"
   print len(verticies), "verticies"
   print len(substrate_to_verticies), "substrate_to_verticies"
   print id_index, "id_index"
   print "Done"
   
   
   exit()
Пример #4
0
    def input(self, inputfile):
        params = open(inputfile, 'r')
        self.parameters = dict()
        for line in params:
            contents = line.strip().split('\t')
            self.parameters[contents[0]] = contents[1]
        config.set_host_name(self.parameters['hostname'])
        self.meta = pythoncyc.select_organism('meta')
        abundancefile = open(self.parameters['csvfile'], 'r')
        mappingfile = open(self.parameters['mapping'], 'r')

        junk = mappingfile.readline()
        mapping = dict()
        self.reversemapping = dict()
        for line in mappingfile:
            myline = line.strip()
            contents = myline.split('\t')
            mapping["X" + contents[0]] = contents[1]
            if (contents[1] != "NOTFOUND"):
                self.reversemapping[contents[1]] = "X" + contents[0]

        firstline = abundancefile.readline().strip()
        entries = firstline.split(',')
        entries.remove(entries[0])
        self.microbes = dict()
        self.metabolites = []
        #print(entries)
        for entry in entries:
            # Microbe
            entry2 = entry[1:len(entry) - 1]
            if entry2[len(entry2) - 3] == '.' or entry2 == "Unassigned":
                # Not classified as lowest level
                if entry2.find('.') != len(entry2) - 3:
                    microbe = entry2[2:len(entry2) - 3]
                else:
                    microbe = entry2[0:len(entry2) - 3]
                self.microbes[microbe] = entry
            # Metabolite
            else:
                metabolite = mapping[entry2]
                if (metabolite != "NOTFOUND"):
                    self.metabolites.append(metabolite)
Пример #5
0
# needs: pathway-tools -python-local-only-non-strict -lisp

import pythoncyc
import sys
import re
import pdb

meta = pythoncyc.select_organism('meta')
pythoncyc.sendQueryToPTools("(select-organism :org-id 'META)")


def getReaInfos(pwy):
    meta.allpwy = meta.all_pathways()
    #pwy = "ANAEROFRUCAT-PWY"
    #pwy = "ALL-CHORISMATE-PWY"
    if meta[pwy] == None:
        print(pwy, "Pathway does not exist")
        return ([[], []])

    rea_list = []
    if meta[pwy].key_reactions != None:
        keyRea_list = meta[pwy].key_reactions
    else:
        keyRea_list = []
    check_list = meta[pwy]["reaction_list"]
    if meta[pwy].sub_pathways != None:
        check_list.extend(
            [p for p in meta[pwy].sub_pathways if p not in check_list])
    isSuperPwy = False
    while len(check_list) > 0:
        ptmp = check_list.pop()
Пример #6
0
from pprint import pprint, pformat

import pythoncyc
from pythoncyc.PToolsFrame import PFrame

pgdb = dict()
pgdb['tigr4'] = pythoncyc.select_organism('spne170187')
pgdb['d39'] = pythoncyc.select_organism('spne373153')
pgdb['t19f'] = pythoncyc.select_organism('spne487213')


def show_gpr(db, rxn):
    rframe = PFrame(rxn, db)
    if rframe is not None and rframe.enzymatic_reaction is not None:
        for er in rframe.enzymatic_reaction:
            erframe = PFrame(er, db)
            print "   " + er
            pframe = PFrame(erframe.enzyme, db)
            genes = [PFrame(g, db).accession_1 for g in pframe.gene]
            print "      " + erframe.enzyme + ' -- ' + " ".join(genes)


def show_gprs(rxn):
    for db in pgdb:
        print db
        show_gpr(pgdb[db], rxn)


def build_python_command(command, *args, **kwargs):
    args_str = [pformat(arg) for arg in args]
Пример #7
0
def PGDB_select(name):
    meta = pythoncyc.select_organism(name)
    return (meta)
Пример #8
0
def get_pathways_from_biocyc(model,
                             csv_fn,
                             biocyc_subsystem_fn,
                             db="meta",
                             add_annotations_to_model=True):
    """
    ** Warning: Must be run using python 2.7 and PathwayTools running in the background **

    This function use pythoncyc to extract pathway information from the BioCyc database based 
    on the BioCyc annotation of each reaction. The result is a table the rows are reactions (IDs)
    and the columns are the biocyc pathway annotations. Because BioCyC use very small pathways we 
    use the parent pathways as annotations.
    Some key steps are required to run this function:
    - PathwayTools must be running in the background ()
    - You need the pythoncyc package (https://github.com/latendre/PythonCyc), 
      more info at https://bioinformatics.ai.sri.com/ptools/pythoncyc.html
    - Pythoncyc only works with python 2.7

    # Parameters
    - model: SBML-model (imported with cobrapy)
    - csv_fn: Where to store the created csv-file
    - biocyc_subsystem_fn: This is in general the All_pathways_of_MetaCyC.txt file, but can be 
      replaced by similar csv-files.
    - db: Which db in BioCyc to use.
    - add_annotations_to_model: A flag used to turn on/off writing the biocyc annotations to the model reactions

    """
    import sys
    assert sys.version_info[0] < 3, ("Can't use PythonCyc with python 3")
    import pythoncyc  # Add this import here, so it is only imported if used

    df_subsystem = pd.read_csv(biocyc_subsystem_fn, sep="\t", index_col=0)
    biocyc_db = pythoncyc.select_organism(db)
    pathway_list = []

    for r in model.reactions[::2]:
        print(r.id, end="\t")
        try:
            biocyc = r.annotation["biocyc"]
        except KeyError:
            print()
            continue

        # Fix erroneous annotations
        if biocyc[:5] == "META:":
            biocyc = biocyc[5:]

        r_db = biocyc_db[biocyc]

        try:
            pathways = r_db["in_pathway"]
        except TypeError:
            print(biocyc, " is not in sco-db")
            continue

        if isinstance(pathways, list):
            sub1_list = []
            sub2_list = []
            for pathway in pathways:
                print(pathway, end=", ")
                pwy = pathway.replace("|", "")
                try:
                    sub1 = df_subsystem.loc[pwy, "Subsystem 1"].split(
                        "//")[0].strip()
                    sub2 = df_subsystem.loc[pwy, "Subsystem 2"].split(
                        "//")[0].strip()
                except KeyError:
                    pass
                else:
                    sub1_list.append(sub1)
                    sub2_list.append(sub2)

            pathway_list.append([
                r.id, ";".join(pathway), ";".join(list(set(sub1_list))),
                ";".join(list(set(sub2_list)))
            ])

            if len(sub1_list) and add_annotations_to_model:
                r.annotation["biocyc.subsystem1"] = list(set(sub1_list))
                r.annotation["biocyc.subsystem2"] = list(set(sub2_list))

            print(sub1_list, sub2_list)
        else:
            print("No pathways given for ", biocyc)

    df = pd.DataFrame(pathway_list)
    df.columns = ["Reaction ID", "Pathway", "Subsystem 1", "Subsystem 2"]
    df.to_csv(csv_fn)
    return model
Пример #9
0
 def __init__(self):
     self.pgdb = pythoncyc.select_organism('eco')
Пример #10
0
"""
Note that this script can only be run if BioCyc's pathway tools is installed 
locally and is being served via the following command:

./pathway-tools -lisp -python
"""
#%%
import numpy as np
import pandas as pd
import tqdm as tqdm
import pythoncyc

annotations = pd.read_csv('../../../data/ecoli_genelist_master.csv')
data = pd.read_csv('../../../data/compiled_absolute_measurements.csv')
# %%
ecocyc = pythoncyc.select_organism('ECOLI')

prot_cplx = list(ecocyc.all_protein_complexes())
for frame in ecocyc['Protein-RNA-Complexes']:
    prot_cplx.append(frame['frameid'])

#%%
# Instantiate the dataframe.
_df = pd.DataFrame([])
for c in tqdm.tqdm(prot_cplx):
    components, counts = ecocyc.monomers_of_protein(c)
    if (components is not None) & (counts is not None):
        for prot, sub in zip(components, counts):
            if ecocyc[prot].abbrev_name is None:
                if ecocyc[prot].synonyms is None:
                    gene = prot.split('-')[0][1:]
Пример #11
0
 def __init__(self):
     self.pgdb = pythoncyc.select_organism('eco')
    genes = root.findall("./Gene")
    
    for gene in genes:
        geneID = gene.attrib['frameid']
        geneName = gene.findall("common-name")[0].text
        print "Gene: " + geneName + ", GeneID: " + geneID
        getPromoter(geneID)

# this function pulls the promoterIDs associated with the geneID passed into it
def getPromoter(geneID):
    resp  = requests.get("http://ecocyc.org/apixml?fn=transcription-units-of-gene&id=ECOLI:{}".format(geneID))
    root=ET.fromstring(resp.text)
    #print(resp.text)
    promoters=root.findall("./Transcription-Unit/component/Promoter")
    #print promoters
    for promoter in promoters:
        pID = promoter.attrib['frameid']
        #regulon = ecoli.containing_chromosome(pID)
        print "promoter: ",pID 
        #print(regulon)

ecoli = pc.select_organism("ECOLI")
pathwayFromCompound("SER")
#genesFromPathway("HISTSYN-PWY")


# In[ ]:



Пример #13
0
def selectOrganism(code):
	if checkPathwayTools(verbose = False):
		return pythoncyc.select_organism(code)
Пример #14
0

def writecsv(data, ofile, delim='\t'):
    f = open(ofile, 'wb')
    ofile = csv.writer(
        f, delimiter=delim
    )  # dialect='excel',delimiter=';', quotechar='"', quoting=csv.QUOTE_ALL
    for row in data:
        #row = [item.encode("utf-8") if isinstance(item, unicode) else str(item) for item in row]
        ofile.writerow(row)
    f.close()


import pythoncyc as pc

ecoli = pc.select_organism('ECOLI')

allcofs = ecoli.all_cofactors()

allcofs = [cf for cf in allcofs if 'CPD' in cf]

cfnames = [ecoli[cf].common_name for cf in allcofs]

cofs = ['|PYRIDOXAL_PHOSPHATE|']

rxns = ecoli['|PYRIDOXAL_PHOSPHATE|'].cofactors_of
#cmplx=list(set([ecoli[e].enzyme for e in enzs]))

genes = []

for r in rxns: