Пример #1
0
    def show_large_buckets(self, sizes, bucketkeys, use_spatial_code=False):
        from brainsearch import vizu
        indices = np.argsort(sizes)[::-1]
        #indices = range(len(sizes))

        #all_distances = []
        means = []
        stds = []

        nb_samples = 1000
        rng = np.random.RandomState(42)

        for idx in indices:
            print "{:,} neighbors".format(sizes[idx])
            patches = self.engine.storage.retrieve([bucketkeys[idx]], attribute=self.metadata['patch'])[0]
            labels = self.engine.storage.retrieve([bucketkeys[idx]], attribute=self.metadata['label'])[0]
            energies = np.sqrt(np.sum(patches**2, axis=tuple(range(1, patches.ndim))))

            indices = rng.randint(0, len(patches), 2*nb_samples)
            distances = np.sqrt(np.sum((patches[indices[1::2]] - patches[indices[::2]])**2, axis=tuple(range(1, patches.ndim))))

            #all_distances.append(distances)
            means.append(np.mean(distances))
            stds.append(np.std(distances))

            print means[-1]
            print stds[-1]

            #import pylab as plt
            #plt.hist(distances, bins=100)
            #plt.show()

            #print "0:{:,}, 1:{:,}".format(*np.bincount(labels.flatten()))
            #import pylab as plt
            #plt.hist(energies, bins=100)
            #plt.show()
            from ipdb import set_trace as dbg
            dbg()
            #vizu.show_images3d(patches, shape=self.metadata['patch'].shape, blocking=True)

        import pylab as plt
        plt.plot(means)
        plt.plot(stds)
        #plt.figure()
        #plt.hist(all_distances, bins=100)
        plt.show()
Пример #2
0
 def execute(self, status):
     try:
         from ipdb import set_trace as dbg
     except ImportError:
         from pdb import set_trace as dbg
     dbg()
Пример #3
0
    def execute(self, status):
        from ipdb import set_trace as dbg

        dbg()
Пример #4
0
def main(brain_manager=None):
    parser = buildArgsParser()
    args = parser.parse_args()

    # Build processing pipeline
    pipeline = BrainPipelineProcessing()
    if args.do_normalization:
        pipeline.add(BrainNormalization(type=0))
    if args.resampling_factor > 1:
        pipeline.add(BrainResampling(args.resampling_factor))

    #controls = defaultdict(lambda: [])
    #parkinsons = defaultdict(lambda: [])
    mean_controls = None
    mean_parkinsons = None
    nb_controls = 0
    nb_parkinsons = 0
    dtype = np.float32

    with Timer("Computing mean of samples"):
        for config in args.configs:
            config = json.load(open(config))
            brain_data = brain_data_factory(config, pipeline=pipeline)

            for brain in brain_data:
                if mean_controls is None and mean_parkinsons is None:
                    mean_controls = np.zeros_like(brain.image, dtype=dtype)
                    mean_parkinsons = np.zeros_like(brain.image, dtype=dtype)

                if brain.image.shape != mean_controls.shape or brain.image.shape != mean_parkinsons.shape:
                    print "Oups shapes not the same!"
                    from ipdb import set_trace as dbg
                    dbg()

                with Timer("Processing {}".format(brain.name)):
                    if brain.label == 0:
                        nb_controls += 1
                        mean_controls += brain.image
                    elif brain.label == 1:
                        nb_parkinsons += 1
                        mean_parkinsons += brain.image
                    else:
                        print "Unknown brain label: {}".format(brain.label)

        mean_controls /= nb_controls
        mean_parkinsons /= nb_parkinsons

    std_controls = np.zeros_like(mean_controls, dtype=dtype)
    std_parkinsons = np.zeros_like(mean_parkinsons, dtype=dtype)
    with Timer("Computing standard deviation of samples"):
        for config in args.configs:
            config = json.load(open(config))
            brain_data = brain_data_factory(config, pipeline=pipeline)

            for brain in brain_data:
                with Timer("Processing {}".format(brain.name)):
                    if brain.label == 0:
                        std_controls += (brain.image - mean_controls)**2
                    elif brain.label == 1:
                        std_parkinsons += (brain.image - mean_parkinsons)**2

        std_controls = np.sqrt(std_controls / (nb_controls-1))
        std_parkinsons = np.sqrt(std_parkinsons / (nb_parkinsons-1))

    s1 = std_controls
    n1 = nb_controls
    s2 = std_parkinsons
    n2 = nb_parkinsons

    # Compute the test statistic t
    stderror = np.sqrt((s1**2/n1) + (s2**2/n2))
    # The Null hypothesis : mu1 - mu2 = 0
    tmap = ((mean_parkinsons-mean_controls) - 0) / stderror
    tmap[stderror == 0] = 0  # Empty voxels

    # Compute p-value
    DF_numerator = (s1**2/n1 + s2**2/n2)**2
    DF_devisor = ((s1**2/n1)**2/(n1-1)) + ((s2**2/n2)**2/(n2-1))
    DF = DF_numerator // DF_devisor
    DF[DF_devisor == 0] = 0  # Empty voxels

    import scipy.stats as stat
    pmap = 2 * stat.t.cdf(-abs(tmap), DF)  # Two-tailed test, take twice the lower tail.
    pmap[np.isnan(pmap)] = 1  # Empty voxels
    save_nifti(tmap, brain.infos['affine'], 'tmap.nii.gz')
    save_nifti(pmap, brain.infos['affine'], 'pmap.nii.gz')
    save_nifti(1-pmap, brain.infos['affine'], 'inv_pmap.nii.gz')
Пример #5
0
def create_map(brain_manager, name, brain_data, K=100, threshold=np.inf, min_nonempty=0, spatial_weight=0., use_dist=False):
    brain_db = brain_manager[name.strip("/").split("/")[-1]]
    if brain_db is None:
        raise ValueError("Unexisting brain database: " + name)

    patch_shape = brain_db.metadata['patch'].shape

    brain_db.engine.distance = nearpy.distances.EuclideanDistance(brain_db.metadata['patch'])
    #brain_db.engine.distance = nearpy.distances.CorrelationDistance(brain_db.metadata['patch'])

    # TODO: find how to compute a good threshood :/ ?!?
    #brain_db.engine.filters = [DistanceThresholdFilter(threshold), NearestFilter(K)]
    brain_db.engine.filters = [NearestFilter(K)]

    half_patch_size = np.array(patch_shape) // 2

    print "Found {} brains to map".format(len(brain_data))
    for i, brain in enumerate(brain_data):
        print "Mapping {}...".format(brain.name)
        brain_patches = brain.extract_patches(patch_shape, min_nonempty=min_nonempty)
        vectors = brain_patches.create_vectors(spatial_weight=spatial_weight)

        # Position of extracted patches represent to top left corner.
        center_positions = brain_patches.positions + half_patch_size

        nids = -1 * np.ones((len(brain_patches), K), dtype=np.int32)
        nlabels = -1 * np.ones((len(brain_patches), K), dtype=np.uint8)
        ndists = np.nan * np.ones((len(brain_patches), K), dtype=np.float32)
        #npositions = -1 * np.ones((len(brain_patches), K, 3), dtype=np.uint16)
        #npatches = -1 * np.ones((len(brain_patches), K, int(np.prod(patch_shape))), dtype=np.float32)

        start_brain = time.time()
        for patch_id, neighbors in brain_db.get_neighbors(vectors, brain_patches.patches, attributes=["id", "label"]):
            nlabels[patch_id, :len(neighbors['label'])] = neighbors['label'].flatten()
            nids[patch_id, :len(neighbors['id'])] = neighbors['id'].flatten()
            ndists[patch_id, :len(neighbors['dist'])] = neighbors['dist'].flatten()
            #npositions[patch_id, :len(neighbors['position']), :] = neighbors['position']
            #npatches[patch_id, :len(neighbors['patch']), :] = neighbors['patch'].reshape((-1, int(np.prod(patch_shape))))

            # if np.all(center_positions[patch_id] == (84, 114, 115)):
            #     print patch_id
            #     from ipdb import set_trace; set_trace()

        print "{4}. Brain #{0} ({3:,} patches) found {1:,} neighbors in {2:.2f} sec.".format(brain.id, np.sum(nlabels != -1), time.time()-start_brain, len(brain_patches), i)
        print "Patches with no neighbors: {:,}".format(np.all(nlabels == -1, axis=1).sum())

        ## Generate map of p-values ##

        # Use leave-one-out strategy, i.e. do not use neighbors patches coming from the query brain.
        control = np.sum(np.logical_and(nlabels == 0, nids != brain.id), axis=1)
        parkinson = np.sum(np.logical_and(nlabels == 1, nids != brain.id), axis=1)

        if use_dist:
            # Weight the proportion by the distance of the query patch from neighbors patch
            nsimilarities = np.exp(-ndists)
            # Min-max normalize
            nsimilarities -= np.nanmin(nsimilarities, axis=1, keepdims=True)
            nsimilarities /= np.nanmax(nsimilarities, axis=1, keepdims=True)
            control = np.nansum(nsimilarities * np.logical_and(nlabels == 0, nids != brain.id), axis=1)
            parkinson = np.nansum(nsimilarities * np.logical_and(nlabels == 1, nids != brain.id), axis=1)
            control = np.nan_to_num(control)
            parkinson = np.nan_to_num(parkinson)
            # control = np.sum(np.exp(-ndists) * np.logical_and(nlabels == 0, nids != brain.id), axis=1)
            # parkinson = np.sum(np.exp(-ndists) * np.logical_and(nlabels == 1, nids != brain.id), axis=1)
            # control = np.sum((1-ndists) * np.logical_and(nlabels == 0, nids != brain.id), axis=1)
            # parkinson = np.sum((1-ndists) * np.logical_and(nlabels == 1, nids != brain.id), axis=1)

        P0 = brain_db.label_proportions()[1]  # Hypothesized population proportion
        p = parkinson / (parkinson+control)  # sample proportion
        p[np.isnan(p)] = P0
        n = np.sum(nlabels != -1, axis=1)     # sample size

        z_statistic, pvalue = two_tailed_test_of_population_proportion(P0, p, n)

        #prop = np.zeros_like(brain.image, dtype=np.float32)
        #prop[zip(*center_positions)] = p

        zmap = np.zeros_like(brain.image, dtype=np.float32)
        zmap_smooth = np.zeros_like(brain.image, dtype=np.float32)
        pmap = np.ones_like(brain.image, dtype=np.float32)
        counts = np.zeros_like(brain.image, dtype=np.float32)

        # Patches composite z-scores
        # see https://en.wikipedia.org/wiki/Fisher%27s_method#Relation_to_Stouffer.27s_Z-score_method
        for z in range(patch_shape[2]):
            for y in range(patch_shape[1]):
                for x in range(patch_shape[0]):
                    pos = brain_patches.positions + np.array((x, y, z))
                    zmap_smooth[zip(*pos)] += z_statistic * np.sqrt(n)
                    counts[zip(*pos)] += n

        #zmap_smooth[zip(*center_positions)] /= np.sqrt(counts2[zip(*center_positions)])
        zmap_smooth /= np.sqrt(counts)
        #zmap_smooth[zip(*center_positions)] /= np.sqrt(np.prod(patch_shape))
        zmap_smooth[np.isnan(zmap_smooth)] = 0.

        zmap[zip(*center_positions)] = z_statistic
        zmap[np.isnan(zmap)] = 0.

        import scipy.stats as stat
        pmap = 2 * stat.norm.cdf(-abs(zmap_smooth))  # Two-tailed test, take twice the lower tail.
        pmap[np.isnan(pmap)] = 1.

        #pmap[zip(*center_positions)] = pvalue
        #pmap[np.isnan(pmap)] = 1.
        counts = np.zeros_like(brain.image, dtype=np.float32)
        counts[zip(*center_positions)] = n

        results_folder = pjoin('.', 'results', brain_db.name, brain_data.name)
        if use_dist:
            results_folder = pjoin('.', 'results', brain_db.name, brain_data.name, "distance_weighting")

        if not os.path.isdir(results_folder):
            os.makedirs(results_folder)

        save_nifti(brain.image, brain.infos['affine'], pjoin(results_folder, "{}.nii.gz".format(brain.name)))
        #save_nifti(prop, brain.infos['affine'], pjoin(results_folder, "{}_prop.nii.gz".format(brain.name)))
        save_nifti(pmap, brain.infos['affine'], pjoin(results_folder, "{}_pmap.nii.gz".format(brain.name)))
        #save_nifti(1-pmap, brain.infos['affine'], pjoin(results_folder, "{}_pmap_inv.nii.gz".format(brain.name)))
        save_nifti(zmap, brain.infos['affine'], pjoin(results_folder, "{}_zmap.nii.gz".format(brain.name)))
        save_nifti(zmap_smooth, brain.infos['affine'], pjoin(results_folder, "{}_zmap_smooth.nii.gz".format(brain.name)))
        save_nifti(counts, brain.infos['affine'], pjoin(results_folder, "{}_count.nii.gz".format(brain.name)))
        #np.savez(pjoin(results_folder, name), dists=ndists, labels=nlabels, ids=nids, positions=npositions, voxels_positions=center_positions)

        from ipdb import set_trace as dbg
        dbg()