Exemplo n.º 1
0
    gobp_dict,
    name='go-biological-process',
    prefix='gobp',
    domain=['process'])
gocc_data = GOData(
    gocc_dict,
    name='go-cellular-component',
    prefix='gocc',
    domain=[
        'complex',
         'location'])
baseline_data['go.xml.gz'] = (
    go_file, parsers.GOParser, [
        gobp_data, gocc_data])

mesh_file = get_latest_MeSH_filename(
    'ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/', 'd', '.bin')
meshcl_dict, meshd_dict, meshpp_dict, meshc_dict, mesha_dict = {}, {}, {}, {}, {}
meshcl_data = MESHData(
    meshcl_dict,
    name='mesh-cellular-structures',
    prefix='meshcs',
    domain=['location'],
    scheme_type=[
        'ns',
         'anno'])
meshd_data = MESHData(
    meshd_dict,
    name='mesh-diseases',
    prefix='meshd',
    domain=['disease'],
    scheme_type=[
Exemplo n.º 2
0
                MNs = set()
    os.chdir(os.pardir)
    return MESH_dict


# data is dictionary key = name, value = list of tuples with source url and id
owl_data = {
    'anatomy': [('http://purl.obolibrary.org/obo/uberon.owl', 'UBERON')],
    'disease': [('http://purl.obolibrary.org/obo/doid.owl', 'DOID')],
    'cell': [('http://purl.obolibrary.org/obo/cl.owl', 'CL')],
    'cell-line': [('http://purl.obolibrary.org/obo/clo.owl', 'CLO'),
                  ('http://www.ebi.ac.uk/efo/efo.owl', 'EFO')]
}

# MeSH file is updated yearly - get latest
mesh_url = get_latest_MeSH_filename(
    'ftp://nlmpubs.nlm.nih.gov/online/mesh/.asciimesh/', 'd', '.bin')
# parse version from MeSH filename
mesh_ver = mesh_url.split('/')[-1].lstrip('d').rstrip('.bin')

# names of .belanno files from MeSH
mesh_anno_names = ['cell-structure', 'mesh-diseases', 'mesh-anatomy']

# MeSH sub-branches to imclude in mesh-anatomy
anatomy_branches = ('A01', 'A02', 'A03', 'A04', 'A05', 'A06', 'A07', 'A08',
                    'A09', 'A10', 'A11', 'A12', 'A14', 'A15', 'A16', 'A17')

disease_branches = ('C', 'F')

# command line argument
parser = argparse.ArgumentParser(
    description="""Generate BEL annotation files. """)
changelog_data = OrderedDict()
changelog_data['gene_history.gz'] = \
	('ftp://ftp.ncbi.nih.gov/gene/DATA/gene_history.gz', parsers.EntrezGeneHistoryParser)
changelog_data['hgnc.tsv'] = \
	('http://www.genenames.org/cgi-bin/hgnc_downloads?title=HGNC+output+data&hgnc_dbtag=on&preset=all&status=Approved&status=Entry+Withdrawn&status_opt=2&level=pri&=on&where=&order_by=gd_app_sym_sort&limit=&format=text&submit=submit&.cgifields=&.cgifields=level&.cgifields=chr&.cgifields=status&.cgifields=hgnc_dbtag', parsers.HGNCParser)
changelog_data['MRK_List1.rpt'] = \
	('ftp://ftp.informatics.jax.org/pub/reports/MRK_List1.rpt', parsers.MGIParser)
changelog_data['rgd.txt'] = \
	('ftp://rgd.mcw.edu/pub/data_release/GENES_RAT.txt', parsers.RGDParser)
changelog_data['rgd_obsolete.txt'] = \
	('ftp://rgd.mcw.edu/pub/data_release/GENES_OBSOLETE_IDS.txt', parsers.RGDObsoleteParser)
changelog_data['delac_sp.txt'] = \
	('ftp://ftp.uniprot.org/pub/databases/uniprot/knowledgebase/docs/delac_sp.txt', parsers.SwissWithdrawnParser)
# get the latest GO archive file name and URL
go_file = get_latest_GO_filename('http://archive.geneontology.org/latest-full')
changelog_data['gobp.xml.gz'] = (go_file, parsers.GOBPParser)
changelog_data['gocc.xml.gz'] = (go_file, parsers.GOCCParser)
#changelog_data['gobp.xml.gz'] = \
#	 ('http://archive.geneontology.org/latest-full/go_201309-termdb.obo-xml.gz', parsers.GOBPParser)
#changelog_data['gocc.xml.gz'] = \
#	 ('http://archive.geneontology.org/latest-full/go_201309-termdb.obo-xml.gz', parsers.GOCCParser)
changelog_data['chebi.owl'] = \
	('ftp://ftp.ebi.ac.uk/pub/databases/chebi/ontology/chebi.owl', parsers.CHEBIParser)
mesh_replace_file = get_latest_MeSH_filename('ftp://nlmpubs.nlm.nih.gov/online/mesh/.newterms/', 'replace', '.txt')
changelog_data['replace2013.txt'] = \
	(mesh_replace_file, parsers.MESHChangesParser)
changelog_data['doid.owl'] = \
	('http://purl.obolibrary.org/obo/doid.owl', parsers.DODeprecatedParser)
# vim: ts=4 sts=4 sw=4 noexpandtab