def read_cog_categories(filename): """ Read COG functional categories (see http://www.ncbi.nlm.nih.gov/COG/grace/fiew.cgi) """ cog_categories = [] parent = None with open(filename, 'r') as f: for line in f: c = OpenStruct() if re.match("[A-Z]\t.*", line): fields = line.rstrip("\n").split("\t") c.id = fields[0] c.name = fields[3] c.parents = (parent,) c.namespace = "cog subcategory" else: c.name = line.rstrip("\n") c.namespace = "cog category" parent = c.name cog_categories.append(c) return cog_categories
def read_cogs(filename): """ Read COG functions. """ cog_re = re.compile(r'\[(\w+)\]\s+(COG\d+)\s+(.*)') cogs = [] with open(filename, 'r') as f: for line in f: m = cog_re.match(line) if m: cog = OpenStruct() cog.id = m.group(2) cog.name = m.group(3) cog.parents = m.group(1) cog.namespace = 'cog' cogs.append(cog) return cogs