def score_producer(data, scorer, loader, r_server, lookup_data,
                   datasources_to_datatypes, dry_run):
    target, disease, evidence, is_direct = data

    logger = logging.getLogger(__name__)

    if evidence:
        score = scorer.score(target, disease, evidence, is_direct,
                             datasources_to_datatypes)
        # skip associations only with data with score 0
        if score:

            gene_data = Gene()
            try:
                gene_data.load_json(
                    lookup_data.available_genes.get_gene(target, r_server))

            except KeyError as e:
                logger.debug('Cannot find gene code "%s" '
                             'in lookup table' % target)
                raise e
            score.set_target_data(gene_data)

            # create a hpa expression empty jsonserializable class
            # to fill from Redis cache lookup_data
            hpa_data = HPAExpression()
            try:
                hpa_data.update(
                    lookup_data.available_hpa.get_hpa(target, r_server))
            except KeyError:
                pass
            except Exception as e:
                raise e
            try:
                score.set_hpa_data(hpa_data)
            except KeyError:
                pass
            except Exception as e:
                raise e

            disease_data = EFO()
            try:
                disease_data.load_json(
                    lookup_data.available_efos.get_efo(disease, r_server))
            except KeyError as e:
                logger.debug('Cannot find EFO code "%s" '
                             'in lookup table' % disease)
                logger.exception(e)

            score.set_disease_data(disease_data)

            element_id = '%s-%s' % (target, disease)
            if not dry_run:
                loader.put(Const.ELASTICSEARCH_DATA_ASSOCIATION_INDEX_NAME,
                           Const.ELASTICSEARCH_DATA_ASSOCIATION_DOC_NAME,
                           element_id, score)

        else:
            logger.warning('Skipped association with score 0: %s-%s' %
                           (target, disease))
示例#2
0
def score_producer(data, 
        scorer, lookup_data, datasources_to_datatypes, dry_run):
    target, disease, evidence, is_direct = data

    if evidence:
        score = scorer.score(target, disease, evidence, is_direct, 
            datasources_to_datatypes)
        # skip associations only with data with score 0
        if score: 

            gene_data = Gene()
            gene_data_index = lookup_data.available_genes.get_gene(target)
            if gene_data_index != None:
                gene_data.load_json(gene_data_index)
            score.set_target_data(gene_data)

            # create a hpa expression empty jsonserializable class
            hpa_data = HPAExpression()
            try:
                hpa_index = lookup_data.available_hpa.get_hpa(target)
                if hpa_index is not None:
                    hpa_data.update(hpa_index)
            except KeyError:
                pass
            except Exception as e:
                raise e
            try:
                score.set_hpa_data(hpa_data)
            except KeyError:
                pass
            except Exception as e:
                raise e


            disease_data = EFO()
            disease_data.load_json(
                lookup_data.available_efos.get_efo(disease))

            score.set_disease_data(disease_data)


            element_id = '%s-%s' % (target, disease)

            #convert the score into a JSON-compatible object
            #otherwise Python serialization consumes too much memory
            return (element_id, score.to_json())

        return None
示例#3
0
 def _get_gene_obj(self, geneid):
     gene = Gene(geneid)
     gene.load_json(self.available_genes.get_gene(geneid))
     return gene