Пример #1
0
def initialize(pl, settings):
    from pytom.basic.structures import Particle
    # from pytom.alignment.alignmentFunctions import average2
    from pytom.basic.filter import lowpassFilter

    print("Initializing the class centroids ...")
    pl = pl.copy()
    pl.sortByScore()
    if settings["noise"]:
        pl = pl[:int((1-settings["noise"])*len(pl))]

    K = settings["ncluster"]
    freq = settings["frequency"]
    kn = len(pl)//K 
    references = {}
    frequencies = {}
    # get the first class centroid
    pp = pl[:kn]
    # avg, fsc = average2(pp, norm=True, verbose=False)
    pp.setClassAllParticles('0')
    res, tmp, tmp2 = calculate_averages(pp, settings["binning"], None, outdir=settings["output_directory"])
    avg = res['0']
    avg = lowpassFilter(avg, freq, freq/10.)[0]
    avg.write(os.path.join(settings['output_directory'], 'initial_0.em') )
    p = Particle(os.path.join(settings['output_directory'], 'initial_0.em'))
    p.setClass('0')
    references['0'] = p
    frequencies['0'] = freq

    for k in range(1, K):
        distances = [4]*len(pl)
        for c, ref in references.items():
            args = list(zip(pl, [ref]*len(pl), [freq]*len(pl), [settings["fmask"]]*len(pl), [settings["binning"]]*len(pl)))
            dist = mpi.parfor(distance, args)
            for i in range(len(pl)):
                if distances[i] > dist[i]:
                    distances[i] = dist[i]
        
        distances = np.asarray(distances)
        print('sum distances: ', distances.sum())
        distances = distances/np.sum(distances)
        idx = np.random.choice(len(pl), kn, replace=False, p=distances)
        pp = ParticleList()
        for i in idx:
            pp.append(pl[int(i)])
        # avg, fsc = average2(pp, norm=True, verbose=False)
        pp.setClassAllParticles('0')
        res, tmp, tmp2 = calculate_averages(pp, settings["binning"], None, outdir=settings["output_directory"])
        avg = res['0']
        avg = lowpassFilter(avg, freq, freq/10.)[0]
        kname = os.path.join(settings['output_directory'], 'initial_{}.em'.format(k))
        avg.write(kname)
        p = Particle(kname)
        p.setClass(str(k))
        references[str(k)] = p
        frequencies[str(k)] = freq
    
    return references, frequencies
Пример #2
0
def calculate_difference_map_proxy(r1, band1, r2, band2, mask, focus_mask, binning, iteration, sigma, threshold, outdir='./'):
    from pytom_volume import read, vol, pasteCenter
    from pytom.basic.structures import Particle, Mask
    import os
    from pytom.basic.transformations import resize

    v1 = r1.getVolume()
    v2 = r2.getVolume()
    if mask:
        maskBin = read(mask, 0,0,0,0,0,0,0,0,0, binning, binning, binning)
        if v1.sizeX() != maskBin.sizeX() or v1.sizeY() != maskBin.sizeY() or v1.sizeZ() != maskBin.sizeZ():
            mask = vol(v1.sizeX(), v1.sizeY(), v1.sizeZ())
            mask.setAll(0)
            pasteCenter(maskBin, mask)
        else:
            mask = maskBin

    else:
        mask = None

    if focus_mask:
        focusBin = read(focus_mask, 0,0,0,0,0,0,0,0,0, binning, binning, binning)
        if v1.sizeX() != focusBin.sizeX() or v1.sizeY() != focusBin.sizeY() or v1.sizeZ() != focusBin.sizeZ():
            focus_mask = vol(v1.sizeX(), v1.sizeY(), v1.sizeZ())
            focus_mask.setAll(0)
            pasteCenter(focusBin, focus_mask)
        else:
            focus_mask = focusBin
    else:
        focus_mask = None

    if not focus_mask is None and not mask is None:
        if mask.sizeX() != focus_mask.sizeX():
            raise Exception('Focussed mask and alignment mask do not have the same dimensions. This cannot be correct.')

    (dmap1, dmap2) = calculate_difference_map(v1, band1, v2, band2, mask, focus_mask, True, sigma, threshold)
    fname1 = os.path.join(outdir, 'iter'+str(iteration)+'_dmap_'+str(r1.getClass())+'_'+str(r2.getClass())+'.em')
    dmap1.write(fname1)
    fname2 = os.path.join(outdir, 'iter'+str(iteration)+'_dmap_'+str(r2.getClass())+'_'+str(r1.getClass())+'.em')
    dmap2.write(fname2)

    dp1 = Particle(fname1)
    dp1.setClass(r1.getClass())
    dp2 = Particle(fname2)
    dp2.setClass(r2.getClass())

    return (dp1, dp2)
Пример #3
0
def classify(pl, settings):
    """
    auto-focused classification
    @param pl: particle list
    @type pl: L{pytom.basic.structures.ParticleList}
    @param settings: settings for autofocus classification
    @type settings: C{dict}
    """
    from pytom.basic.structures import Particle, Shift, Rotation
    from pytom.basic.filter import lowpassFilter

    # make the particle list picklable
    pl.pickle()

    # define the starting status
    offset = settings["offset"]
    binning = settings["binning"]
    mask = settings["mask"]
    sfrequency = settings["frequency"]  # starting frequency
    outdir = settings["output_directory"]

    references = {}
    frequencies = {}
    ncluster = 0
    if settings["external"]:  # use external references
        for class_label, fname in enumerate(settings["external"]):
            p = Particle(fname)
            p.setClass(str(class_label))
            references[str(class_label)] = p
            frequencies[str(class_label)] = sfrequency
            ncluster += 1
    else:
        if not settings["resume"]:
            if not settings["ncluster"]:
                print("Must specify the number of clusters!")
                return

            # k-means++ way to initialize
            ncluster = settings["ncluster"]
            references, frequencies = initialize(pl, settings)
        else:
            avgs, tmp, tmp2 = calculate_averages(pl,
                                                 binning,
                                                 mask,
                                                 outdir=outdir)

            for class_label, r in avgs.items():
                fname = os.path.join(
                    outdir, 'initial_class' + str(class_label) + '.em')
                rr = lowpassFilter(r, sfrequency, sfrequency / 10.)[0]
                rr.write(fname)
                p = Particle(fname)
                p.setClass(str(class_label))
                references[str(class_label)] = p
                frequencies[str(class_label)] = sfrequency
                ncluster += 1

    # start the classification
    for i in range(settings["niteration"]):
        if ncluster < 2:
            print('Not enough number of clusters. Exit!')
            break

        print("Starting iteration %d ..." % i)
        old_pl = pl.copy()

        # compute the difference maps
        print("Calculate difference maps ...")
        args = []
        for pair in combinations(list(references.keys()), 2):
            args.append((references[pair[0]], frequencies[pair[0]],
                         references[pair[1]], frequencies[pair[1]], mask,
                         settings["fmask"], binning, i, settings["sigma"],
                         settings["threshold"], outdir))

        dmaps = {}
        res = mpi.parfor(calculate_difference_map_proxy, args)
        for r in res:
            dmaps[(r[0].getClass(), r[1].getClass())] = r

        # start the alignments
        print("Start alignments ...")
        scores = calculate_scores(pl, references, frequencies, offset, binning,
                                  mask, settings["noalign"])

        # determine the class labels & track the class changes
        pl = determine_class_labels(pl, references, frequencies, scores, dmaps,
                                    binning, settings["noise"])

        # kick out the small classes
        pls = pl.copy().splitByClass()
        nlabels = {}
        for pp in pls:
            nlabels[pp[0].getClass()] = len(pp)
            print("Number of class " + str(pp[0].getClass()) + ": " +
                  str(len(pp)))

        max_labels = np.max(list(nlabels.values()))
        to_delete = []
        if settings["dispersion"]:
            min_labels = float(max_labels) / settings["dispersion"]
            for key, value in nlabels.items():
                if value <= min_labels:
                    to_delete.append(key)

        for pp in pls:
            if pp[0].getClass() in to_delete:
                pp.setClassAllParticles('-1')
                print("Set class " + str(pp[0].getClass()) + " to noise")

        # split the top n classes
        pl = split_topn_classes(pls, len(to_delete))

        # update the references
        print("Calculate averages ...")
        avgs, freqs, wedgeSum = calculate_averages(pl,
                                                   binning,
                                                   mask,
                                                   outdir=outdir)
        ncluster = 0
        references = {}
        for class_label, r in avgs.items():
            if not settings["fixed_frequency"]:
                freq = freqs[str(class_label)]
            else:
                freq = sfrequency
            frequencies[str(class_label)] = int(freq)
            print('Resolution of class %s: %d' % (str(class_label), freq))

            fname = os.path.join(
                outdir, 'iter' + str(i) + '_class' + str(class_label) + '.em')
            rr = lowpassFilter(r, freq, freq / 10.)[0]
            rr.write(fname)
            p = Particle(fname)
            p.setClass(str(class_label))
            references[str(class_label)] = p
            ncluster += 1

            w = wedgeSum[str(class_label)]
            fname = os.path.join(
                outdir,
                'iter' + str(i) + '_class' + str(class_label) + '_wedge.em')
            w.write(fname)

        # write the result to the disk
        pl.toXMLFile(
            os.path.join(outdir, 'classified_pl_iter' + str(i) + '.xml'))

        # check the stopping criterion
        if compare_pl(old_pl, pl):
            break