Пример #1
0
    def process_gene_desc(self, limit):
        raw = '/'.join((self.rawdir, self.files['gene_desc']['file']))

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        gu = GraphUtils(curie_map.get())

        logger.info("Processing Gene descriptions")
        line_counter = 0
        # geno = Genotype(g)  # TODO unused
        with gzip.open(raw, 'rb') as csvfile:
            filereader = csv.reader(
                io.TextIOWrapper(csvfile, newline=""), delimiter='\t',
                quotechar='\"')
            for row in filereader:
                if re.match(r'\#', ''.join(row)):
                    continue
                line_counter += 1
                if line_counter == 1:
                    continue
                (gene_num, public_name, molecular_name, concise_description,
                 provisional_description, detailed_description,
                 automated_description, gene_class_description) = row

                if self.testMode and gene_num not in self.test_ids['gene']:
                    continue

                gene_id = 'WormBase:'+gene_num

                if concise_description != 'none available':
                    gu.addDefinition(g, gene_id, concise_description)

                # remove the description if it's identical to the concise
                descs = {
                    'provisional': provisional_description,
                    'automated': automated_description,
                    'detailed': detailed_description,
                    'gene class': gene_class_description
                }
                for d in descs:
                    text = descs.get(d)
                    if text == concise_description \
                            or re.match(r'none', text) or text == '':
                        pass  # don't use it
                    else:
                        text = ' '.join((text, '['+d+']'))
                        descs[d] = text
                        gu.addDescription(g, gene_id, text)

                if not self.testMode \
                        and limit is not None and line_counter > limit:
                    break

        return
Пример #2
0
    def _process_nlx_157874_1_view(self, raw, limit=None):
        """
        This table contains the Elements of Morphology data that has been
        screen-scraped into DISCO.
        Note that foaf:depiction is inverse of foaf:depicts relationship.

        Since it is bad form to have two definitions,
        we concatenate the two into one string.

        Triples:
            <eom id> a owl:Class
                rdf:label Literal(eom label)
                OIO:hasRelatedSynonym Literal(synonym list)
                IAO:definition Literal(objective_def. subjective def)
                foaf:depiction Literal(small_image_url),
                               Literal(large_image_url)
                foaf:page Literal(page_url)
                rdfs:comment Literal(long commented text)


        :param raw:
        :param limit:
        :return:
        """

        gu = GraphUtils(curie_map.get())
        line_counter = 0
        with open(raw, 'r') as f1:
            f1.readline()  # read the header row; skip
            filereader = csv.reader(f1, delimiter='\t', quotechar='\"')
            for line in filereader:
                line_counter += 1
                (morphology_term_id, morphology_term_num,
                 morphology_term_label, morphology_term_url,
                 terminology_category_label, terminology_category_url,
                 subcategory, objective_definition, subjective_definition,
                 comments, synonyms, replaces, small_figure_url,
                 large_figure_url, e_uid, v_uid, v_uuid,
                 v_last_modified) = line

                # note:
                # e_uid v_uuid v_last_modified terminology_category_url
                # subcategory v_uid morphology_term_num
                # terminology_category_label hp_label notes
                # are currently unused.

                # Add morphology term to graph as a class
                # with label, type, and description.
                gu.addClassToGraph(self.graph, morphology_term_id,
                                   morphology_term_label)

                # Assemble the description text

                if subjective_definition != '' and not (
                        re.match(r'.+\.$', subjective_definition)):
                    # add a trailing period.
                    subjective_definition = subjective_definition.strip() + '.'
                if objective_definition != '' and not (
                        re.match(r'.+\.$', objective_definition)):
                    # add a trailing period.
                    objective_definition = objective_definition.strip() + '.'

                definition = \
                    '  '.join(
                        (objective_definition, subjective_definition)).strip()

                gu.addDefinition(self.graph, morphology_term_id, definition)

                # <term id> FOAF:depicted_by literal url
                # <url> type foaf:depiction

                # do we want both images?
                # morphology_term_id has depiction small_figure_url
                if small_figure_url != '':
                    gu.addDepiction(self.graph, morphology_term_id,
                                    small_figure_url)

                # morphology_term_id has depiction large_figure_url
                if large_figure_url != '':
                    gu.addDepiction(self.graph, morphology_term_id,
                                    large_figure_url)

                # morphology_term_id has comment comments
                if comments != '':
                    gu.addComment(self.graph, morphology_term_id,
                                  comments.strip())

                if synonyms != '':
                    for s in synonyms.split(';'):
                        gu.addSynonym(
                            self.graph, morphology_term_id, s.strip(),
                            gu.properties['hasExactSynonym'])

                # morphology_term_id hasRelatedSynonym replaces (; delimited)
                if replaces != '' and replaces != synonyms:
                    for s in replaces.split(';'):
                        gu.addSynonym(
                            self.graph, morphology_term_id, s.strip(),
                            gu.properties['hasRelatedSynonym'])

                # morphology_term_id has page morphology_term_url
                gu.addPage(self.graph, morphology_term_id, morphology_term_url)

                if limit is not None and line_counter > limit:
                    break
        return
Пример #3
0
class GeneReviews(Source):
    """
    Here we process the GeneReviews mappings to OMIM,
    plus inspect the GeneReviews (html) books to pull the clinical descriptions
    in order to populate the definitions of the terms in the ontology.
    We define the GeneReviews items as classes that are either grouping classes
    over OMIM disease ids (gene ids are filtered out),
    or are made as subclasses of DOID:4 (generic disease).

    Note that GeneReviews
    [copyright policy](http://www.ncbi.nlm.nih.gov/books/NBK138602/)
    (as of 2015.11.20) says:

    GeneReviews® chapters are owned by the University of Washington, Seattle,
    © 1993-2015. Permission is hereby granted to reproduce, distribute,
    and translate copies of content materials provided that
    (i) credit for source (www.ncbi.nlm.nih.gov/books/NBK1116/)
        and copyright (University of Washington, Seattle)
        are included with each copy;
    (ii) a link to the original material is provided whenever the material is
        published elsewhere on the Web; and
    (iii) reproducers, distributors, and/or translators comply with this
        copyright notice and the GeneReviews Usage Disclaimer.

    This script doesn't pull the GeneReviews books from the NCBI Bookshelf
    directly; scripting this task is expressly prohibited by
    [NCBIBookshelf policy](http://www.ncbi.nlm.nih.gov/books/NBK45311/).
    However, assuming you have acquired the books (in html format) via
    permissible means, a parser for those books is provided here to extract
    the clinical descriptions to define the NBK identified classes.

    """

    files = {
        'idmap': {'file': 'NBKid_shortname_OMIM.txt',
                  'url': GRDL + '/NBKid_shortname_OMIM.txt'},
        'titles': {'file': 'GRtitle_shortname_NBKid.txt',
                   'url': GRDL + '/GRtitle_shortname_NBKid.txt'}
        }

    def __init__(self):
        Source.__init__(self, 'genereviews')

        self.load_bindings()

        self.dataset = Dataset(
            'genereviews', 'Gene Reviews', 'http://genereviews.org/',
            None, 'http://www.ncbi.nlm.nih.gov/books/NBK138602/')
        self.dataset.set_citation('GeneReviews:NBK1116')

        self.gu = GraphUtils(curie_map.get())

        self.book_ids = set()
        self.all_books = {}

        if 'test_ids' not in config.get_config() or\
                'disease' not in config.get_config()['test_ids']:
            logger.warning("not configured with disease test ids.")
            self.test_ids = list()
        else:
            # select ony those test ids that are omim's.
            self.test_ids = config.get_config()['test_ids']['disease']

        return

    def fetch(self, is_dl_forced=False):
        """
        We fetch GeneReviews id-label map and id-omim mapping files from NCBI.
        :return: None
        """

        self.get_files(is_dl_forced)

        return

    def parse(self, limit=None):
        """
        :return: None
        """

        if self.testOnly:
            self.testMode = True

        self._get_titles(limit)
        self._get_equivids(limit)

        self.create_books()
        self.process_nbk_html(limit)

        self.load_bindings()

        # no test subset for now; test == full graph
        self.testgraph = self.graph

        logger.info("Found %d nodes", len(self.graph))

        return

    def _get_equivids(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes(not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.
        (This assumption is poor for those omims that are actually genes,
        but we have no way of knowing what those are here...
        we will just have to deal with that for now.)
        :param limit:
        :return:

        """
        raw = '/'.join((self.rawdir, self.files['idmap']['file']))
        gu = GraphUtils(curie_map.get())
        line_counter = 0

        # we look some stuff up in OMIM, so initialize here
        omim = OMIM()
        id_map = {}
        allomimids = set()
        with open(raw, 'r', encoding="utf8") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                if line_counter == 1:  # skip header
                    continue
                (nbk_num, shortname, omim_num) = row
                gr_id = 'GeneReviews:'+nbk_num
                omim_id = 'OMIM:'+omim_num
                if not (
                        (self.testMode and
                         len(self.test_ids) > 0 and
                         omim_id in self.test_ids) or not
                        self.testMode):
                    continue

                # sometimes there's bad omim nums
                if len(omim_num) > 6:
                    logger.warning(
                        "OMIM number incorrectly formatted " +
                        "in row %d; skipping:\n%s",
                        line_counter, '\t'.join(row))
                    continue

                # build up a hashmap of the mappings; then process later
                if nbk_num not in id_map:
                    id_map[nbk_num] = set()
                id_map[nbk_num].add(omim_num)

                # add the class along with the shortname
                gu.addClassToGraph(self.graph, gr_id, None)
                gu.addSynonym(self.graph, gr_id, shortname)

                allomimids.add(omim_num)

                if not self.testMode and \
                        limit is not None and line_counter > limit:
                    break

            # end looping through file

        # get the omim ids that are not genes
        entries_that_are_phenotypes = \
            omim.process_entries(
                list(allomimids), filter_keep_phenotype_entry_ids,
                None, None, limit)

        logger.info("Filtered out %d/%d entries that are genes or features",
                    len(allomimids)-len(entries_that_are_phenotypes),
                    len(allomimids))

        for nbk_num in self.book_ids:
            gr_id = 'GeneReviews:'+nbk_num
            if nbk_num in id_map:
                omim_ids = id_map.get(nbk_num)
                for omim_num in omim_ids:
                    omim_id = 'OMIM:'+omim_num
                    # add the gene reviews as a superclass to the omim id,
                    # but only if the omim id is not a gene
                    if omim_id in entries_that_are_phenotypes:
                        gu.addClassToGraph(self.graph, omim_id, None)
                        gu.addSubclass(self.graph, gr_id, omim_id)
            # add this as a generic subclass of DOID:4
            gu.addSubclass(self.graph, 'DOID:4', gr_id)

        return

    def _get_titles(self, limit):
        """
        The file processed here is of the format:
        #NBK_id GR_shortname    OMIM
        NBK1103 trimethylaminuria       136132
        NBK1103 trimethylaminuria       602079
        NBK1104 cdls    122470
        Where each of the rows represents a mapping between
        a gr id and an omim id. These are a 1:many relationship,
        and some of the omim ids are genes (not diseases).
        Therefore, we need to create a loose coupling here.
        We make the assumption that these NBKs are generally higher-level
        grouping classes; therefore the OMIM ids are treated as subclasses.
        (This assumption is poor for those omims that are actually genes,
        but we have no way of knowing what those are here...
        we will just have to deal with that for now.)
        :param limit:
        :return:
        """
        raw = '/'.join((self.rawdir, self.files['titles']['file']))
        gu = GraphUtils(curie_map.get())
        line_counter = 0
        with open(raw, 'r', encoding='latin-1') as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                if line_counter == 1:  # skip header
                    continue
                (shortname, title, nbk_num) = row
                gr_id = 'GeneReviews:'+nbk_num

                self.book_ids.add(nbk_num)  # a global set of the book nums

                if limit is None or line_counter < limit:
                    gu.addClassToGraph(self.graph, gr_id, title)
                    gu.addSynonym(self.graph, gr_id, shortname)

        return

    def create_books(self):

        # note that although we put in the url to the book,
        # NCBI Bookshelf does not allow robots to download content
        book_item = {'file': 'books/',
                     'url': ''}

        for nbk in self.book_ids:
            b = book_item.copy()
            b['file'] = '/'.join(('books', nbk+'.html'))
            b['url'] = 'http://www.ncbi.nlm.nih.gov/books/'+nbk
            self.all_books[nbk] = b

        return

    def process_nbk_html(self, limit):
        """
        Here we process the gene reviews books to fetch
        the clinical descriptions to include in the ontology.
        We only use books that have been acquired manually,
        as NCBI Bookshelf does not permit automated downloads.
        This parser will only process the books that are found in
        the ```raw/genereviews/books``` directory,
        permitting partial completion.

        :param limit:
        :return:
        """
        c = 0
        books_not_found = set()
        for nbk in self.book_ids:
            c += 1
            nbk_id = 'GeneReviews:'+nbk
            book_item = self.all_books.get(nbk)
            url = '/'.join((self.rawdir, book_item['file']))

            # figure out if the book is there; if so, process, otherwise skip
            book_dir = '/'.join((self.rawdir, 'books'))
            book_files = os.listdir(book_dir)
            if ''.join((nbk, '.html')) not in book_files:
                # logger.warning("No book found locally for %s; skipping", nbk)
                books_not_found.add(nbk)
                continue
            logger.info("Processing %s", nbk)

            page = open(url)
            soup = BeautifulSoup(page.read())

            # sec0 == clinical description
            clin_summary = \
                soup.find(
                    'div', id=re.compile(".*Summary.sec0"))
            if clin_summary is not None:
                p = clin_summary.find('p')
                ptext = p.text
                ptext = re.sub(r'\s+', ' ', ptext)

                ul = clin_summary.find('ul')
                if ul is not None:
                    item_text = list()
                    for li in ul.find_all('li'):
                        item_text.append(re.sub(r'\s+', ' ', li.text))
                    ptext += ' '.join(item_text)

                # add in the copyright and citation info to description
                ptext = \
                    ' '.join(
                        (ptext,
                         '[GeneReviews:NBK1116, GeneReviews:NBK138602, ' +
                         nbk_id+']'))

                self.gu.addDefinition(self.graph, nbk_id, ptext.strip())

            # get the pubs
            pmid_set = set()
            pub_div = soup.find('div', id=re.compile(r".*Literature_Cited"))
            if pub_div is not None:
                ref_list = pub_div.find_all('div', attrs={'class': "bk_ref"})
                for r in ref_list:
                    for a in r.find_all(
                            'a', attrs={'href': re.compile(r"pubmed")}):
                        if re.match(r'PubMed:', a.text):
                            pmnum = re.sub(r'PubMed:\s*', '', a.text)
                        else:
                            pmnum = \
                                re.search(
                                    r'\/pubmed\/(\d+)$', a['href']).group(1)
                        if pmnum is not None:
                            pmid = 'PMID:'+str(pmnum)
                            self.gu.addTriple(
                                self.graph, pmid,
                                self.gu.object_properties['is_about'],
                                nbk_id)
                            pmid_set.add(pmnum)
                            r = Reference(
                                pmid, Reference.ref_types['journal_article'])
                            r.addRefToGraph(self.graph)

            # TODO add author history, copyright, license to dataset

            # TODO get PMID-NBKID equivalence (near foot of page),
            # and make it "is about" link
            # self.gu.addTriple(
            #   self.graph, pmid,
            #   self.gu.object_properties['is_about'], nbk_id)
            # for example: NBK1191 PMID:20301370

            # add the book to the dataset
            self.dataset.setFileAccessUrl(book_item['url'])

            if limit is not None and c > limit:
                break

            # finish looping through books

        l = len(books_not_found)
        if len(books_not_found) > 0:
            if l > 100:
                logger.warning("There were %d books not found.", l)
            else:
                logger.warning(
                    "The following %d books were not found locally: %s",
                    l, str(books_not_found))
        logger.info(
            "Finished processing %d books for clinical descriptions", c-l)

        return

    def getTestSuite(self):
        import unittest
        from tests.test_genereviews import GeneReviewsTestCase

        test_suite = \
            unittest.TestLoader().loadTestsFromTestCase(GeneReviewsTestCase)

        return test_suite
Пример #4
0
    def _process_genes(self, limit=None):
        """
        This method processes the KEGG gene IDs.
        The label for the gene is pulled as the first symbol in the list of gene symbols; the rest
        are added as synonyms.  The long-form of the gene name is added as a definition.
        This is hardcoded to just processes human genes.

        Triples created:
        <gene_id> is a SO:gene
        <gene_id> rdfs:label <gene_name>

        :param limit:
        :return:
        """

        logger.info("Processing genes")
        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph
        line_counter = 0
        gu = GraphUtils(curie_map.get())
        geno = Genotype(g)
        raw = '/'.join((self.rawdir, self.files['hsa_genes']['file']))
        with open(raw, 'r', encoding="iso-8859-1") as csvfile:
            filereader = csv.reader(csvfile, delimiter='\t', quotechar='\"')
            for row in filereader:
                line_counter += 1
                (gene_id, gene_name) = row

                gene_id = 'KEGG-'+gene_id.strip()

                # the gene listing has a bunch of labels that are delimited, like:
                # DST, BP240, BPA, BPAG1, CATX-15, CATX15, D6S1101, DMH, DT, EBSB2, HSAN6, MACF2; dystonin; K10382 dystonin
                # it looks like the list is semicolon delimited (symbol, name, gene_class)
                # where the symbol is a comma-delimited list

                # here, we split them up.  we will take the first abbreviation and make it the symbol
                # then take the rest as synonyms

                gene_stuff = re.split(';', gene_name)
                symbollist = re.split(',', gene_stuff[0])
                first_symbol = symbollist[0].strip()

                if gene_id not in self.label_hash:
                    self.label_hash[gene_id] = first_symbol

                if self.testMode and gene_id not in self.test_ids['genes']:
                    continue

                # Add the gene as a class.
                geno.addGene(gene_id, first_symbol)

                # add the long name as the description
                if len(gene_stuff) > 1:
                    description = gene_stuff[1].strip()
                    gu.addDefinition(g, gene_id, description)

                # add the rest of the symbols as synonyms
                for i in enumerate(symbollist, start=1):
                    gu.addSynonym(g, gene_id, i[1].strip())

                # TODO add the KO here?

                if (not self.testMode) and (limit is not None and line_counter > limit):
                    break

        logger.info("Done with genes")
        return
Пример #5
0
    def _process_all(self, limit):
        """
        This takes the list of omim identifiers from the omim.txt.Z file,
        and iteratively queries the omim api for the json-formatted data.
        This will create OMIM classes, with the label, definition, and some synonyms.
        If an entry is "removed", it is added as a deprecated class.
        If an entry is "moved", it is deprecated and consider annotations are added.

        Additionally, we extract:
        *phenotypicSeries ids as superclasses
        *equivalent ids for Orphanet and UMLS

        If set to testMode, it will write only those items in the test_ids to the testgraph.

        :param limit:
        :return:
        """
        omimids = self._get_omim_ids()  # store the set of omim identifiers

        omimparams = {
            'format': 'json',
            'include': 'all',
        }
        # you will need to add the API key into the conf.json file, like:
        # keys : { 'omim' : '<your api key here>' }
        omimparams.update({'apiKey': config.get_config()['keys']['omim']})

        # http://api.omim.org/api/entry?mimNumber=100100&include=all

        if self.testMode:
            g = self.testgraph
        else:
            g = self.graph

        gu = GraphUtils(curie_map.get())

        it = 0  # for counting

        # note that you can only do request batches of 20
        # see info about "Limits" at http://omim.org/help/api
        groupsize = 20
        if not self.testMode and limit is not None:
            # just in case the limit is larger than the number of records, max it out
            max = min((limit, omimids.__len__()))
        else:
            max = omimids.__len__()
        # max = 10 #for testing

        # TODO write the json to local files - make the assumption that downloads within 24 hrs are the same
        # now, loop through the omim numbers and pull the records as json docs
        while it < max:
            end = min((max, it+groupsize))
            # iterate through the omim ids list, and fetch from the OMIM api in batches of 20

            if self.testMode:
                intersect = list(set([str(i) for i in self.test_ids]) & set(omimids[it:end]))
                if len(intersect) > 0:  # some of the test ids are in the omimids
                    logger.info("found test ids: %s", intersect)
                    omimparams.update({'mimNumber': ','.join(intersect)})
                else:
                    it += groupsize
                    continue
            else:
                omimparams.update({'mimNumber': ','.join(omimids[it:end])})

            p = urllib.parse.urlencode(omimparams)
            url = '/'.join((self.OMIM_API, 'entry'))+'?%s' % p
            logger.info('fetching: %s', '/'.join((self.OMIM_API, 'entry'))+'?%s' % p)

            # ### if you want to test a specific entry number, uncomment the following code block
            # if ('101600' in omimids[it:end]):  #104000
            #     print("FOUND IT in",omimids[it:end])
            # else:
            #    #testing very specific record
            #     it+=groupsize
            #     continue
            # ### end code block for testing

            # print ('fetching:',(',').join(omimids[it:end]))
            # print('url:',url)
            d = urllib.request.urlopen(url)
            resp = d.read().decode()
            request_time = datetime.now()
            it += groupsize

            myjson = json.loads(resp)
            entries = myjson['omim']['entryList']

            geno = Genotype(g)

            # add genome and taxon
            tax_num = '9606'
            tax_id = 'NCBITaxon:9606'
            tax_label = 'Human'

            geno.addGenome(tax_id, str(tax_num))   # tax label can get added elsewhere
            gu.addClassToGraph(g, tax_id, None)   # label added elsewhere

            for e in entries:

                # get the numbers, labels, and descriptions
                omimnum = e['entry']['mimNumber']
                titles = e['entry']['titles']
                label = titles['preferredTitle']

                other_labels = []
                if 'alternativeTitles' in titles:
                    other_labels += self._get_alt_labels(titles['alternativeTitles'])
                if 'includedTitles' in titles:
                    other_labels += self._get_alt_labels(titles['includedTitles'])

                # add synonyms of alternate labels
                # preferredTitle": "PFEIFFER SYNDROME",
                # "alternativeTitles": "ACROCEPHALOSYNDACTYLY, TYPE V; ACS5;;\nACS V;;\nNOACK SYNDROME",
                # "includedTitles": "CRANIOFACIAL-SKELETAL-DERMATOLOGIC DYSPLASIA, INCLUDED"

                # remove the abbreviation (comes after the ;) from the preferredTitle, and add it as a synonym
                abbrev = None
                if len(re.split(';', label)) > 1:
                    abbrev = (re.split(';', label)[1].strip())
                newlabel = self._cleanup_label(label)

                description = self._get_description(e['entry'])
                omimid = 'OMIM:'+str(omimnum)

                if e['entry']['status'] == 'removed':
                    gu.addDeprecatedClass(g, omimid)
                else:
                    omimtype = self._get_omimtype(e['entry'])
                    # this uses our cleaned-up label
                    gu.addClassToGraph(g, omimid, newlabel, omimtype)

                    # add the original OMIM label as a synonym
                    gu.addSynonym(g, omimid, label)

                    # add the alternate labels and includes as synonyms
                    for l in other_labels:
                        gu.addSynonym(g, omimid, l)

                    # for OMIM, we're adding the description as a definition
                    gu.addDefinition(g, omimid, description)
                    if abbrev is not None:
                        gu.addSynonym(g, omimid, abbrev)

                    # if this is a genetic locus (but not sequenced) then add the chrom loc info
                    if omimtype == Genotype.genoparts['biological_region']:
                        if 'geneMapExists' in e['entry'] and e['entry']['geneMapExists']:
                            genemap = e['entry']['geneMap']
                            if 'cytoLocation' in genemap:
                                cytoloc = genemap['cytoLocation']
                                # parse the cytoloc.  add this omim thing as a subsequence of the cytofeature
                                # 18p11.3-p11.2
                                # for now, just take the first one
                                # FIXME add the other end of the range, but not sure how to do that
                                # not sure if saying subsequence of feature is the right relationship
                                cytoloc = cytoloc.split('-')[0]
                                f = Feature(omimid, None, None)
                                if 'chromosome' in genemap:
                                    chrom = makeChromID(str(genemap['chromosome']), tax_num, 'CHR')
                                    geno.addChromosomeClass(str(genemap['chromosome']), tax_id, tax_label)
                                    loc = makeChromID(cytoloc, tax_num, 'CHR')
                                    gu.addClassToGraph(g, loc, cytoloc)   # this is the chr band
                                    f.addSubsequenceOfFeature(g, loc)
                                    f.addFeatureToGraph(g)
                                pass

                    # check if moved, if so, make it deprecated and replaced/consider class to the other thing(s)
                    # some entries have been moved to multiple other entries and use the joining raw word "and"
                    # 612479 is movedto:  "603075 and 603029"  OR
                    # others use a comma-delimited list, like:
                    # 610402 is movedto: "609122,300870"
                    if e['entry']['status'] == 'moved':
                        if re.search('and', str(e['entry']['movedTo'])):
                            # split the movedTo entry on 'and'
                            newids = re.split('and', str(e['entry']['movedTo']))
                        elif len(str(e['entry']['movedTo']).split(',')) > 0:
                            # split on the comma
                            newids = str(e['entry']['movedTo']).split(',')
                        else:
                            # make a list of one
                            newids = [str(e['entry']['movedTo'])]
                        # cleanup whitespace and add OMIM prefix to numeric portion
                        fixedids = []
                        for i in newids:
                            fixedids.append('OMIM:'+i.strip())

                        gu.addDeprecatedClass(g, omimid, fixedids)

                    self._get_phenotypicseries_parents(e['entry'], g)
                    self._get_mappedids(e['entry'], g)

                    self._get_pubs(e['entry'], g)

                    self._get_process_allelic_variants(e['entry'], g)

                ### end iterating over batch of entries

            # can't have more than 4 req per sec,
            # so wait the remaining time, if necessary
            dt = datetime.now() - request_time
            rem = 0.25 - dt.total_seconds()
            if rem > 0:
                logger.info("waiting %d sec", rem)
                time.sleep(rem/1000)

            gu.loadAllProperties(g)

        return