def parse_pato(ontology_file): terms = [] for elt in parseGOOBO(ontology_file): terms.append(elt["name"]) if 'synonym' in elt: if isinstance(elt['synonym'], list): for syn in elt['synonym']: try: terms.append(syn.split('"')[1]) except: print 'error parsing ontology synonym' else: try: terms.append(elt['synonym'].split('"')[1]) except: print 'error parsing ontology synonym non list' blacklist = read_blacklist() blacklist.extend([stem(b) for b in blacklist]) #blacklist = [stem_word(b) for b in blacklist] return [ pheno.lower() for pheno in terms if pheno.lower() not in blacklist and len(pheno) > 1 ] return terms
def parse_pato(ontology_file): terms = [] for elt in parseGOOBO(ontology_file): #terms.append(elt["name"]) #if len(elt["name"].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', elt["name"]) terms.extend(p.split()) if 'synonym' in elt: if isinstance(elt['synonym'], list): for syn in elt['synonym']: try: #terms.append(syn.split('"')[1]) #if len(syn.split('"')[1].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', syn.split('"')[1]) terms.extend(p.split()) except: print 'error parsing ontology synonym' else: try: #terms.append(elt['synonym'].split('"')[1]) #if len(elt['synonym'].split('"')[1].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', elt['synonym'].split('"')[1]) terms.extend(p.split()) except: print 'error parsing ontology synonym non list' terms = list(set(terms)) blacklist = read_blacklist() return [ pheno.lower() for pheno in terms if pheno.lower() not in blacklist and len(pheno) > 1 and re.search(r'[a-zA-Z]', pheno) ]
def parse_ontology(ontology_file): terms = [] for elt in parseGOOBO(ontology_file): #terms.append(elt["name"]) #if len(elt["name"].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', elt["name"]) terms.extend(p.split()) if 'synonym' in elt: if isinstance(elt['synonym'], list): for syn in elt['synonym']: try: #terms.append(syn.split('"')[1]) #if len(syn.split('"')[1].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', syn.split('"')[1]) terms.extend(p.split()) except: print 'error parsing ontology synonym' else: try: #terms.append(elt['synonym'].split('"')[1]) #if len(elt['synonym'].split('"')[1].split()) > 1: p = re.sub(r'\([^)]*\)', ' ', elt['synonym'].split('"')[1]) terms.extend(p.split()) except: print 'error parsing ontology synonym non list' terms = list(set(terms)) return terms
def parse_pato(ontology_file): terms = [] for elt in parseGOOBO(ontology_file): terms.append(elt["name"]) if 'synonym' in elt: if isinstance(elt['synonym'], list): for syn in elt['synonym']: try: terms.append(syn.split('"')[1]) except: print 'error parsing ontology synonym' else: try: terms.append(elt['synonym'].split('"')[1]) except: print 'error parsing ontology synonym non list' return terms
""" Output fields: id, name, synonyms, related terms, alt IDs, parent, MeSh terms """ import argparse from obo_parser import parseGOOBO if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('infile', help='Input HPO file in OBO v1.2 format.') parser.add_argument('outfile', help='Output TSV file name.') args = parser.parse_args() with open(args.outfile, 'w') as out: for term in parseGOOBO(args.infile): id = term['id'][0] name = term['name'][0] alt_ids = '|'.join(term['alt_id']) if 'alt_id' in term else '' is_a = '|'.join(x.partition(' ')[0] for x in term['is_a']) if 'is_a' in term else '' synonyms = set() related = set() mesh = set() for s in term.get('synonym', []): if ' EXACT [' in s: synonyms.add(s.split(' EXACT [')[0].strip('" ')) else: # RELATED, BROAD, etc. related.add(s.split('" ')[0].strip('"')) for n in term.get('xref', []):
import argparse from obo_parser import parseGOOBO if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('infile', help='Input HPO file in OBO v1.2 format.') parser.add_argument('outfile', help='Output TSV file name.') args = parser.parse_args() with open(args.outfile, 'w') as out: for term in parseGOOBO(args.infile): id = term['id'][0] name = term['name'][0] alt_ids = '|'.join(term['alt_id']) if 'alt_id' in term else '' is_a = '|'.join(x.partition(' ')[0] for x in term['is_a']) if 'is_a' in term else '' synonyms = set() related = set() for s in term.get('synonym', []): if ' EXACT [' in s: synonyms.add(s.split(' EXACT [')[0].strip('" ')) else: # RELATED, BROAD, etc. related.add(s.split('" ')[0].strip('"')) for n in term.get('xref', []): if ' ' in n: synonyms.add(n.partition(' ')[-1].strip('" ')) synonyms.discard(name) related.discard(name) synonyms = '|'.join(sorted(synonyms)) if synonyms else ''