示例#1
0
    def test_load_top_associations_by_top_threshold(self):
        """Test if top associations by thresholds """
        top_associations, thresholds = hdf5.get_top_associations(self.hdf5_file, 5, maf=0, top_or_threshold='threshold')
        assert isinstance(top_associations, np.core.records.recarray)
        assert len(top_associations) == 14
        for assoc in top_associations:
            assert assoc['score'] >= 5.0

        top_associations_by_e, thresholds = hdf5.get_top_associations(self.hdf5_file, 1e-5, maf=0, top_or_threshold='threshold')
        assert isinstance(top_associations, np.core.records.recarray)
        assert len(top_associations) == len(top_associations_by_e)
        for i, assoc in enumerate(top_associations_by_e):
            assert assoc.tolist() == top_associations[i].tolist()
示例#2
0
def index_study(study_id, perm_threshold=None):
    study = Study.objects.get(pk=study_id)
    """ used to index a study in elasticseach """
    hdf5_file = os.path.join(settings.HDF5_FILE_PATH, 'gwas_results',
                             '%s.hdf5' % study.pk)
    top_associations, thresholds = hdf5.get_top_associations(
        hdf5_file, val=1e-4, top_or_threshold='threshold', maf=0)
    logger.info('Retrieved top associations from GWAS %s' % study_id)
    if perm_threshold:
        thresholds['permutation_threshold'] = perm_threshold
    indexed_assoc, failed_assoc = elastic.index_associations(
        study, top_associations, thresholds)
    if failed_assoc > 0:
        logger.error(
            'Following associations failed to index for "%s" in elasticsearch'
            % (failed_assoc, indexed_assoc + failed_assoc, study_id))
    elif indexed_assoc == 0:
        logger.warn(
            'No associations found that match the threshold. Skipping "%s" in elasticsearch'
            % study_id)
    else:
        logger.info(
            'Successfully indexed all %s assocations for "%s" in elasticsearch.'
            % (indexed_assoc, study_id))
    return (indexed_assoc, failed_assoc), study_id
示例#3
0
文件: rest.py 项目: mtog/AraGWAS
    def assocations_from_hdf5(self, request, pk):
        """ Retrieve associations from the HDF5 file of the study. Must provide 'filter_type' (which can be = 'top', to only retrieve the top N associations, or 'threshold', to retrieve all associations above the threshold) and 'filter' (which is either the threshold or the number of desired associations) params in url. """
        filter_type = request.query_params.get('filter_type', 'threshold')
        if filter_type not in ('threshold', 'top'):
            raise ValueError('filter_type must be either "threshold" or "top"')
        threshold_or_top = float(request.query_params.get('filter', 1))
        if filter_type == 'top':
            threshold_or_top = int(threshold_or_top)

        association_file = os.path.join(settings.HDF5_FILE_PATH,
                                        '%s.hdf5' % pk)
        top_associations, thresholds = get_top_associations(
            association_file,
            maf=0,
            val=threshold_or_top,
            top_or_threshold=filter_type)
        output = {}
        prev_idx = 0
        for chrom in range(1, 6):
            chr_idx = top_associations['chr'].searchsorted(str(chrom + 1))
            output['chr%s' % chrom] = {
                'scores': top_associations['score'][prev_idx:chr_idx],
                'positions': top_associations['position'][prev_idx:chr_idx],
                'mafs': top_associations['maf'][prev_idx:chr_idx]
            }
            prev_idx = chr_idx
        for key, value in thresholds.items():
            value = int(value) if key == 'total_associations' else float(value)
            thresholds[key] = value
        output['thresholds'] = thresholds
        return Response(output, status=status.HTTP_200_OK)
示例#4
0
文件: tests.py 项目: grimmlab/AraGWAS
    def test_load_top_associations_by_top_hits_and_maf(self):
        """Test if top associations by number of hits cann be retrieved"""
        top_hit_num = 15
        top_hits = [('1', 6369772, 5.559458119903501, 0.1386861313868613, 19,
                     0.360335870170728, 0.0761941875889666),
                    ('2', 18351161, 5.221548337450959, 0.08029197080291971, 11,
                     0.328720498341187, 0.0747141063333232),
                    ('3', 18057816, 4.795206143400829, 0.2116788321167883, 29,
                     -0.336795159960789, 0.0737295910747224),
                    ('4', 429928, 6.555416448260276, 0.4233576642335766, 58,
                     0.368255762771892, 0.0711756042811744),
                    ('5', 18577788, 6.219812361173065, 0.15328467153284672, 21,
                     -0.327934944673749, 0.0833854459419328)]
        top_associations, thresholds = hdf5.get_top_associations(
            self.hdf5_file, top_hit_num, top_or_threshold='top')
        assert thresholds['bonferroni_threshold01'] == 7.294197188903931
        assert thresholds['bonferroni_threshold05'] == 6.5952271845679125
        assert thresholds['bh_threshold'] == 6.6150447667600778
        assert thresholds['total_associations'] == 196878
        assert len(top_associations) == top_hit_num * 5
        assert np.count_nonzero(top_associations['maf'] < 0.05) == 0

        self._check_return_array(top_associations)
        for i in range(0, 5):
            assert top_associations[i * top_hit_num].tolist() == top_hits[i]
示例#5
0
文件: tests.py 项目: zhaijj/AraGWAS
 def test_regroup_top_assocations(self):
     top_associations, thresholds = hdf5.get_top_associations(
         self.hdf5_file, 5, maf=0, top_or_threshold='threshold')
     top_associations = hdf5.regroup_associations(top_associations)
     top_associations[0].tolist() == ('4', 429928, 6.55541645, 0.42335766,
                                      58)
     top_associations[-1].tolist() == ('5', 18606578, 5.07844918,
                                       0.47445255, 65)
示例#6
0
def index_study(study_id, perm_threshold=None):
    study = Study.objects.get(pk=study_id)
    """ used to index a study in elasticseach """
    hdf5_file = os.path.join(settings.HDF5_FILE_PATH,'%s.hdf5' %  study.pk)
    top_associations, thresholds = hdf5.get_top_associations(hdf5_file, val=1e-4, top_or_threshold='threshold',maf=0)
    if perm_threshold:
        thresholds['permutation_threshold'] = perm_threshold
    return elastic.index_associations(study, top_associations, thresholds)
示例#7
0
文件: tests.py 项目: zhaijj/AraGWAS
    def test_load_top_associations_by_top_hits_and_maf(self):
        top_hit_num = 15
        """Test if top associations by number of hits cann be retrieved"""
        top_hits = [
            ('1', 6369772, 5.559458119903501, 0.1386861313868613, 19),
            ('2', 18351161, 5.221548337450959, 0.08029197080291971, 11),
            ('3', 18057816, 4.795206143400829, 0.2116788321167883, 29),
            ('4', 429928, 6.555416448260276, 0.4233576642335766, 58),
            ('5', 18577788, 6.219812361173065, 0.15328467153284672, 21)
        ]

        top_associations, thresholds = hdf5.get_top_associations(
            self.hdf5_file, top_hit_num, top_or_threshold='top')
        assert thresholds['bonferroni_threshold01'] == 7.294197188903931
        assert thresholds['bonferroni_threshold05'] == 6.5952271845679125
        assert thresholds['bh_threshold'] == 6.6150447667600778
        assert thresholds['total_associations'] == 196878
        assert len(top_associations) == top_hit_num * 5
        assert np.count_nonzero(top_associations['maf'] < 0.05) == 0
        self._check_return_array(top_associations)
        for i in range(0, 5):
            assert top_associations[i * top_hit_num].tolist() == top_hits[i]
示例#8
0
文件: tests.py 项目: grimmlab/AraGWAS
 def test_load_top_associations_by_top_threshold_and_maf(self):
     """Test if top associations by thresholds """
     top_associations, thresholds = hdf5.get_top_associations(
         self.hdf5_file, 1e-5, maf=0.1, top_or_threshold='threshold')
     assert len(top_associations) == 13
     assert np.count_nonzero(top_associations['maf'] < 0.1) == 0