Python Gene.obsolete примеры использования

Язык программирования: Python
Пространство имен/Пакет: genes.models
Класс/Тип: Gene
Метод/Функция: obsolete
Примеров на hotexamples.com: 1
Python Gene.obsolete - 1 пример найден. Это лучшие примеры Python кода для genes.models.Gene.obsolete, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.
Основные методы
Показать Скрыть
Gene(8)
save(5)
__unicode__(1)
get_absolute_url(1)
obsolete(1)
Пример #1
Показать файл
Файл: genes_load_geneinfo.py Проект: rzelayafavila/django-genes
    def handle(self, *args, **options):
        # Load the organism.
        tax_id = options.get('taxonomy_id')
        org = Organism.objects.get(taxonomy_id=tax_id)

        # geneinfo file information.
        geneinfo_filename = options.get('geneinfo')
        symb_col = int(options.get('symbol_col'))
        syst_col = int(options.get('systematic_col'))
        alias_col = int(options.get('alias_col'))
        systematic_xrdb = options.get('systematic_xrdb')

        # Open the geneinfo file.
        if geneinfo_filename:
            geneinfo_fh = open(geneinfo_filename)

        # yeast has a taxonomy_id that changed, in this case when we look at
        # the id from NCBI we have to use the new one.
        gi_tax_id = tax_id
        if options.get('gi_tax_id'):
            gi_tax_id = options.get('gi_tax_id')

        # Get all genes for this organism from the database.
        entrez_in_db = set(
            Gene.objects.filter(organism=org).values_list('entrezid',
                                                          flat=True))

        # Get all cross reference pairs that refer to a gene from this
        # organism.
        xr_in_db = set()
        for x in CrossRef.objects.filter(
                gene__entrezid__in=entrez_in_db).prefetch_related(
                    'crossrefdb', 'gene'):
            xr_in_db.add((x.crossrefdb.name, x.xrid, x.gene.entrezid))

        if tax_id and geneinfo_fh:
            # Store all the genes seen thus far so we can remove obsolete
            # entries.
            entrez_seen = set()
            # Store all the crossref pairs seen thus far to avoid duplicates.
            # Cache of cross reference databases, which saves hits to DB.
            xrdb_cache = {}
            # Check to make sure the organism matched so that we don't mass-
            # delete for no reason.
            org_matches = 0
            entrez_found = 0  # Found from before.
            entrez_updated = 0  # Found from before and updated.
            entrez_created = 0  # Didn't exist, added.
            for line in geneinfo_fh:
                toks = line.strip().split('\t')
                if toks[symb_col] == "NEWENTRY":
                    logger.info("NEWENTRY line skipped")
                    continue

                if not (toks[0] == gi_tax_id):  # From wrong organism, skip.
                    continue

                org_matches += 1  # Count lines that came from this organism.
                # Grab requested fields from tab delimited file.
                (entrezid, standard_name, systematic_name, aliases, crossrefs,
                 description, status,
                 chromosome) = (int(toks[1]), toks[symb_col], toks[syst_col],
                                toks[alias_col], toks[5], toks[8], toks[9],
                                toks[6])

                # This column only gets filled in for certain organisms.
                if (not systematic_name) or (systematic_name == '-'):
                    systematic_name = standard_name
                # Gene is actually mitochondrial, change symbol to avoid
                # duplicates (analogous to what GeneCards does).
                if chromosome == "MT":
                    if not systematic_name.startswith('MT'):
                        logger.debug(
                            "Renaming %s to %s, mitochondrial version",
                            systematic_name, "MT-" + systematic_name)
                        systematic_name = "MT-" + systematic_name

                alias_str = ""
                alias_num = 0
                if aliases and (aliases != '-'):
                    alias_list = [unicode(x) for x in aliases.split('|')]
                    alias_num = len(alias_list)
                    alias_str = ' '.join(alias_list)

                # Handle cross references.
                xref_tuples = []
                if crossrefs and (crossrefs != '-'):
                    xref_tuples = set()
                    if (systematic_xrdb):
                        xref_tuples.add((unicode(systematic_xrdb),
                                         unicode(systematic_name)))

                    xrefs = [unicode(x) for x in crossrefs.split('|')]
                    for x in xrefs:
                        xref_tuples.add(tuple(x.split(':')))

                xref_num = len(xref_tuples)
                # Arbitrary weight for search results.
                # The principle of weighting is that we think people are more
                # likely to want a gene that occurs in more databases or has
                # more aliases b/c it is better-known.  This helps break
                # ordering ties where gene names are identical.
                weight = 2 * xref_num + alias_num

                # We also assume that people are much more likely to want
                # protein coding genes.  In the long term we could measure
                # actual selections and estimate weight per gene.
                if status == 'protein-coding':
                    weight = weight * 2

                gene_object = None
                entrez_seen.add(entrezid)
                if entrezid in entrez_in_db:  # This existed already.
                    logger.debug("Entrez %s existed already.", entrezid)
                    entrez_found += 1
                    gene_object = Gene.objects.get(entrezid=entrezid,
                                                   organism=org)
                    changed = False
                    # The following lines update characteristics that may have
                    # changed.
                    if gene_object.systematic_name != systematic_name:
                        gene_object.systematic_name = systematic_name
                        changed = True
                    if gene_object.standard_name != standard_name:
                        gene_object.standard_name = standard_name
                        changed = True
                    if gene_object.description != description:
                        gene_object.description = description
                        changed = True
                    if gene_object.aliases != alias_str:
                        gene_object.aliases = alias_str
                        changed = True
                    if gene_object.weight != weight:
                        gene_object.weight = weight
                        changed = True
                    # If the gene was marked obsolete but occurs in the
                    # gene_info file, then it's not obsolete.
                    if gene_object.obsolete:
                        gene_object.obsolete = False
                        changed = True
                    if changed:
                        entrez_updated += 1
                        # To save time, only call save() if something has been
                        # changed.
                        gene_object.save()

                else:  # New entrezid observed.
                    logger.debug(
                        "Entrez %s did not exist and will be created.",
                        entrezid)
                    gene_object = Gene(entrezid=entrezid,
                                       organism=org,
                                       systematic_name=systematic_name,
                                       standard_name=standard_name,
                                       description=description,
                                       obsolete=False,
                                       weight=weight)
                    gene_object.save()
                    entrez_created += 1

                # Add crossreferences.
                for xref_tuple in xref_tuples:
                    try:
                        xrdb = xrdb_cache[xref_tuple[0]]
                    except KeyError:
                        try:
                            xrdb = CrossRefDB.objects.get(name=xref_tuple[0])
                        except CrossRefDB.DoesNotExist:
                            xrdb = None
                        xrdb_cache[xref_tuple[0]] = xrdb
                    if xrdb is None:  # Don't understand crossrefdb, skip.
                        logger.warning(
                            "We encountered an xrdb (%s) not in our"
                            " database for pair %s.", xref_tuple[0],
                            xref_tuple)
                        continue
                    logger.debug('Found crossreference pair %s.', xref_tuple)
                    # If the record doesn't exist in database, create it.
                    if not (xref_tuple[0], xref_tuple[1],
                            entrezid) in xr_in_db:
                        xr_obj = CrossRef(crossrefdb=xrdb,
                                          xrid=xref_tuple[1],
                                          gene=gene_object)
                        xr_obj.save()

            # Update "obsolete" attribute for entrez records that are in the
            # database but not in input file.
            for id in entrez_in_db:
                if id not in entrez_seen:
                    gene_object = Gene.objects.get(entrezid=id, organism=org)
                    if not gene_object.obsolete:
                        gene_object.obsolete = True
                        gene_object.save()

            logger.info(
                "%s entrez identifiers existed in the database and "
                "were found in the new gene_info file", entrez_found)
            logger.info(
                "%s entrez identifiers existed in the database and "
                "were changed in the new gene_info file", entrez_updated)
            logger.info(
                "%s entrez identifiers did not exist and were created"
                "in the new gene_info file", entrez_created)
            if org_matches < 10:
                logger.error('Less than ten matches were encountered for '
                             'this organism.  Check the organism ID.')
                sys.exit(1)
        else:
            logger.error('Couldn\'t load geneinfo %s for org %s.',
                         options.get('geneinfo'),
                         tax_id,
                         exc_info=sys.exc_info(),
                         extra={'options': options})