Пример #1
0
    def get_gene(self, gene_name):
        """
        Get a gene from the cache or attempt to disambiguate or add a new
        record.
        """
        if not gene_name:
            return

        gene_pk = self.gene_cache.get(gene_name, None)
        if gene_pk:
            return Gene(pk=gene_pk)

        # Attempt to disambiguate, only if this is the only synonym may it be
        # associated.
        potential_genes = list(Gene.objects.filter(
            synonyms__label__iexact=gene_name).distinct())
        if len(potential_genes) == 1:
            self.gene_cache[gene_name] = potential_genes[0].pk
            return potential_genes[0]

        # Only if there are no matches should we create a new record,
        # otherwise the synonym war will continue
        if len(potential_genes) == 0:
            gene = Gene(chr=self.get_chromosome(self.chrom), symbol=gene_name)
            gene.save()
            self.gene_cache[gene_name] = gene.pk
            return gene
Пример #2
0
    def setUp(self):
        import string
        chr1 = Chromosome(value='1', label='1')
        chr1.save()

        genes = {}
        for char in string.lowercase:
            g = Gene(chr=chr1, symbol=char)
            g.save()
            genes[char] = g

        geneset = GeneSet(name='test')
        geneset.save()
        geneset.bulk([genes[c] for c in 'someday'])

        self.genes = genes
        self.geneset = geneset
Пример #3
0
    def setUp(self):
        import string
        chr1 = Chromosome(value='1', label='1')
        chr1.save()

        genes = {}
        for char in string.lowercase:
            g = Gene(chr=chr1, symbol=char)
            g.save()
            genes[char] = g

        geneset = GeneSet(name='test')
        geneset.save()
        geneset.bulk([genes[c] for c in 'someday'])

        self.genes = genes
        self.geneset = geneset
Пример #4
0
    def _get_or_create_gene(self, record):
        target = {}

        # Link HGNC id
        target['hgnc_id'] = int(record['hgnc_id'])

        # Parse and map chromosome
        if record['chromosome'] == 'mitochondria':
            target['chr_id'] = self.chromosomes['MT']
        elif ' and ' in record['chromosome']:
            target['chr_id'] = self.chromosomes['XY']
        else:
            match = self.chrom_re.match(record['chromosome'])
            if not match:
                log.warning('unable to match gene chromosome from HGNC',
                            extra={
                                'hgnc_id': record['hgnc_id'],
                                'raw_chr': record['chromosome'],
                            })
                return None, False
            target['chr_id'] = self.chromosomes[match.groups()[0]]

        target['symbol'] = record['approved_symbol'].encode('utf8')
        target['name'] = record['approved_name'].encode('utf8')

        # If the gene already exists by hgnc_id, fetch it. Next check
        # by symbol (in case of a new approved gene) and set the
        # hgnc_id. Fallback to creating a new gene
        try:
            return Gene.objects.get(hgnc_id=target['hgnc_id']), False
        except Gene.DoesNotExist:
            try:
                gene = Gene.objects.get(symbol=target['symbol'])
                gene.hgnc_id = target['hgnc_id']
            except Gene.DoesNotExist:
                gene = Gene(**target)
            gene.save()
            return gene, True
Пример #5
0
    def get_gene(self, gene_name):
        "Get a gene from the cache or attempt to disambiguate or add a new record."
        if not gene_name:
            return

        gene_pk = self.gene_cache.get(gene_name, None)
        if gene_pk:
            return Gene(pk=gene_pk)

        # Attempt to disambiguate, only if this is the only synonym may it be
        # associated
        potential_genes = list(Gene.objects.filter(synonyms__label__iexact=gene_name).distinct())
        if len(potential_genes) == 1:
            self.gene_cache[gene_name] = potential_genes[0].pk
            return potential_genes[0]

        # Only if there are no matches should we create a new record,
        # otherwise the synonym war will continue
        if len(potential_genes) == 0:
            gene = Gene(chr=self.get_chromosome(self.chrom), symbol=gene_name)
            gene.save()
            self.gene_cache[gene_name] = gene.pk
            return gene
Пример #6
0
    def _get_or_create_gene(self, record):
        target = {}

        # Link HGNC id
        target["hgnc_id"] = int(record["hgnc_id"])

        # Parse and map chromosome
        if record["chromosome"] == "mitochondria":
            target["chr_id"] = self.chromosomes["MT"]
        elif " and " in record["chromosome"]:
            target["chr_id"] = self.chromosomes["XY"]
        else:
            match = self.chrom_re.match(record["chromosome"])
            if not match:
                log.warning(
                    "unable to match gene chromosome from HGNC",
                    extra={"hgnc_id": record["hgnc_id"], "raw_chr": record["chromosome"]},
                )
                return None, False
            target["chr_id"] = self.chromosomes[match.groups()[0]]

        target["symbol"] = record["approved_symbol"].encode("utf8")
        target["name"] = record["approved_name"].encode("utf8")

        # If the gene already exists by hgnc_id, fetch it. Next check
        # by symbol (in case of a new approved gene) and set the
        # hgnc_id. Fallback to creating a new gene
        try:
            return Gene.objects.get(hgnc_id=target["hgnc_id"]), False
        except Gene.DoesNotExist:
            try:
                gene = Gene.objects.get(symbol=target["symbol"])
                gene.hgnc_id = target["hgnc_id"]
            except Gene.DoesNotExist:
                gene = Gene(**target)
            gene.save()
            return gene, True
Пример #7
0
    def _get_or_create_gene(self, record):
        target = {}

        # Link HGNC id
        target['hgnc_id'] = int(record['hgnc_id'])

        # Parse and map chromosome
        if record['chromosome'] == 'mitochondria':
            target['chr_id'] = self.chromosomes['MT']
        elif ' and ' in record['chromosome']:
            target['chr_id'] = self.chromosomes['XY']
        else:
            match = self.chrom_re.match(record['chromosome'])
            if not match:
                log.warning('unable to match gene chromosome from HGNC', extra={
                    'hgnc_id': record['hgnc_id'],
                    'raw_chr': record['chromosome'],
                })
                return None, False
            target['chr_id'] = self.chromosomes[match.groups()[0]]

        target['symbol'] = record['approved_symbol'].encode('utf8')
        target['name'] = record['approved_name'].encode('utf8')

        # If the gene already exists by hgnc_id, fetch it. Next check
        # by symbol (in case of a new approved gene) and set the
        # hgnc_id. Fallback to creating a new gene
        try:
            return Gene.objects.get(hgnc_id=target['hgnc_id']), False
        except Gene.DoesNotExist:
            try:
                gene = Gene.objects.get(symbol=target['symbol'])
                gene.hgnc_id = target['hgnc_id']
            except Gene.DoesNotExist:
                gene = Gene(**target)
            gene.save()
            return gene, True