def __init__(self, forcedUpdate=False, organismCode="HSA"): """Initialization. HSA is the organism code for human.""" self.update(forced=forcedUpdate) self.net = MetabolicNetwork() self.organismCode = organismCode
class KEGG: rawDataFiles = {'compound': 'ftp://ftp.genome.jp/pub/kegg/ligand/compound/compound', 'enzyme': 'ftp://ftp.genome.jp/pub/kegg/ligand/enzyme/enzyme'} def __init__(self, forcedUpdate=False, organismCode="HSA"): """Initialization. HSA is the organism code for human.""" self.update(forced=forcedUpdate) self.net = MetabolicNetwork() self.organismCode = organismCode def update(self, forced=False): """update data files. The default directory is data/.""" if forced: self.retrieveFiles(self.rawDataFiles.keys()) else: dataFiles = os.listdir('data/') toBeUpdated = filter(lambda x: x not in dataFiles, self.rawDataFiles.keys()) self.retrieveFiles(toBeUpdated) def retrieveFiles(self, files): """retrieve files from KEGG database.""" if len(files) == 0: print "There is no file to be updated." else: print "updating following files:", ','.join(files) for f in files: print "downloading %s..." % (f) urllib.urlretrieve(self.rawDataFiles[f], 'data/' + f) def writeNet(self): self.net.writeNet() def constructNetwork(self): """docstring for constructNetwork""" self.parseEnzyme(organismCode=self.organismCode) self.parseCompound() self.net.connectNodes() def parseEnzyme(self, filename="data/enzyme", organismCode="HSA"): print "parsing the enzyme list..." entries = open(filename).read().split('///') for aEntry in entries: enzyme = Enzyme() organismFlag = 0 for line in aEntry.split('\n'): temp = line[:12].strip() context = temp if temp != '' else context data = line[12:].strip() if context == "ENTRY": enzyme.entry = data.split()[1] elif context == "NAME": enzyme.names.append(data.strip(';')) elif context == "CLASS": enzyme.classes.append(data.strip(';')) elif context in ["SUBSTRATE", "PRODUCT", "COFACTOR"]: try: enzyme.compounds.append(data.split(':')[1].strip('];')) except IndexError: continue elif context == "PATHWAY": try: enzyme.pathways.append(data.split(':')[1].split(' ')) except IndexError: enzyme.pathways.append(('', data.strip())) elif context == "GENES": if organismCode in data: organismFlag = 1 else: continue if not organismFlag: continue else: self.net.addNode(enzyme) def parseCompound(self, filename="data/compound"): """docstring for parseCompound""" print "parsing the compound list..." entries = open(filename).read().split('///') for aEntry in entries: compound = Compound() for line in aEntry.split('\n'): temp = line[:12].strip() context = temp if temp != '' else context data = line[12:].strip() if context == "ENTRY": compound.entry = data.split()[0] elif context == "NAME": compound.names.append(data.strip(';')) elif context == "REACTION": [compound.reactions.append(r) for r in data.split()] elif context == "PATHWAY": try: compound.pathways.append(data.split(':')[1].split(' ')) except: compound.pathways.append(('', data.strip())) elif context == "ENZYME": [compound.enzymes.append(e) for e in data.split()] else: continue self.net.addNode(compound)