def initialize(pl, settings): from pytom.basic.structures import Particle # from pytom.alignment.alignmentFunctions import average2 from pytom.basic.filter import lowpassFilter print("Initializing the class centroids ...") pl = pl.copy() pl.sortByScore() if settings["noise"]: pl = pl[:int((1-settings["noise"])*len(pl))] K = settings["ncluster"] freq = settings["frequency"] kn = len(pl)//K references = {} frequencies = {} # get the first class centroid pp = pl[:kn] # avg, fsc = average2(pp, norm=True, verbose=False) pp.setClassAllParticles('0') res, tmp, tmp2 = calculate_averages(pp, settings["binning"], None, outdir=settings["output_directory"]) avg = res['0'] avg = lowpassFilter(avg, freq, freq/10.)[0] avg.write(os.path.join(settings['output_directory'], 'initial_0.em') ) p = Particle(os.path.join(settings['output_directory'], 'initial_0.em')) p.setClass('0') references['0'] = p frequencies['0'] = freq for k in range(1, K): distances = [4]*len(pl) for c, ref in references.items(): args = list(zip(pl, [ref]*len(pl), [freq]*len(pl), [settings["fmask"]]*len(pl), [settings["binning"]]*len(pl))) dist = mpi.parfor(distance, args) for i in range(len(pl)): if distances[i] > dist[i]: distances[i] = dist[i] distances = np.asarray(distances) print('sum distances: ', distances.sum()) distances = distances/np.sum(distances) idx = np.random.choice(len(pl), kn, replace=False, p=distances) pp = ParticleList() for i in idx: pp.append(pl[int(i)]) # avg, fsc = average2(pp, norm=True, verbose=False) pp.setClassAllParticles('0') res, tmp, tmp2 = calculate_averages(pp, settings["binning"], None, outdir=settings["output_directory"]) avg = res['0'] avg = lowpassFilter(avg, freq, freq/10.)[0] kname = os.path.join(settings['output_directory'], 'initial_{}.em'.format(k)) avg.write(kname) p = Particle(kname) p.setClass(str(k)) references[str(k)] = p frequencies[str(k)] = freq return references, frequencies
def calculate_difference_map_proxy(r1, band1, r2, band2, mask, focus_mask, binning, iteration, sigma, threshold, outdir='./'): from pytom_volume import read, vol, pasteCenter from pytom.basic.structures import Particle, Mask import os from pytom.basic.transformations import resize v1 = r1.getVolume() v2 = r2.getVolume() if mask: maskBin = read(mask, 0,0,0,0,0,0,0,0,0, binning, binning, binning) if v1.sizeX() != maskBin.sizeX() or v1.sizeY() != maskBin.sizeY() or v1.sizeZ() != maskBin.sizeZ(): mask = vol(v1.sizeX(), v1.sizeY(), v1.sizeZ()) mask.setAll(0) pasteCenter(maskBin, mask) else: mask = maskBin else: mask = None if focus_mask: focusBin = read(focus_mask, 0,0,0,0,0,0,0,0,0, binning, binning, binning) if v1.sizeX() != focusBin.sizeX() or v1.sizeY() != focusBin.sizeY() or v1.sizeZ() != focusBin.sizeZ(): focus_mask = vol(v1.sizeX(), v1.sizeY(), v1.sizeZ()) focus_mask.setAll(0) pasteCenter(focusBin, focus_mask) else: focus_mask = focusBin else: focus_mask = None if not focus_mask is None and not mask is None: if mask.sizeX() != focus_mask.sizeX(): raise Exception('Focussed mask and alignment mask do not have the same dimensions. This cannot be correct.') (dmap1, dmap2) = calculate_difference_map(v1, band1, v2, band2, mask, focus_mask, True, sigma, threshold) fname1 = os.path.join(outdir, 'iter'+str(iteration)+'_dmap_'+str(r1.getClass())+'_'+str(r2.getClass())+'.em') dmap1.write(fname1) fname2 = os.path.join(outdir, 'iter'+str(iteration)+'_dmap_'+str(r2.getClass())+'_'+str(r1.getClass())+'.em') dmap2.write(fname2) dp1 = Particle(fname1) dp1.setClass(r1.getClass()) dp2 = Particle(fname2) dp2.setClass(r2.getClass()) return (dp1, dp2)
def classify(pl, settings): """ auto-focused classification @param pl: particle list @type pl: L{pytom.basic.structures.ParticleList} @param settings: settings for autofocus classification @type settings: C{dict} """ from pytom.basic.structures import Particle, Shift, Rotation from pytom.basic.filter import lowpassFilter # make the particle list picklable pl.pickle() # define the starting status offset = settings["offset"] binning = settings["binning"] mask = settings["mask"] sfrequency = settings["frequency"] # starting frequency outdir = settings["output_directory"] references = {} frequencies = {} ncluster = 0 if settings["external"]: # use external references for class_label, fname in enumerate(settings["external"]): p = Particle(fname) p.setClass(str(class_label)) references[str(class_label)] = p frequencies[str(class_label)] = sfrequency ncluster += 1 else: if not settings["resume"]: if not settings["ncluster"]: print("Must specify the number of clusters!") return # k-means++ way to initialize ncluster = settings["ncluster"] references, frequencies = initialize(pl, settings) else: avgs, tmp, tmp2 = calculate_averages(pl, binning, mask, outdir=outdir) for class_label, r in avgs.items(): fname = os.path.join( outdir, 'initial_class' + str(class_label) + '.em') rr = lowpassFilter(r, sfrequency, sfrequency / 10.)[0] rr.write(fname) p = Particle(fname) p.setClass(str(class_label)) references[str(class_label)] = p frequencies[str(class_label)] = sfrequency ncluster += 1 # start the classification for i in range(settings["niteration"]): if ncluster < 2: print('Not enough number of clusters. Exit!') break print("Starting iteration %d ..." % i) old_pl = pl.copy() # compute the difference maps print("Calculate difference maps ...") args = [] for pair in combinations(list(references.keys()), 2): args.append((references[pair[0]], frequencies[pair[0]], references[pair[1]], frequencies[pair[1]], mask, settings["fmask"], binning, i, settings["sigma"], settings["threshold"], outdir)) dmaps = {} res = mpi.parfor(calculate_difference_map_proxy, args) for r in res: dmaps[(r[0].getClass(), r[1].getClass())] = r # start the alignments print("Start alignments ...") scores = calculate_scores(pl, references, frequencies, offset, binning, mask, settings["noalign"]) # determine the class labels & track the class changes pl = determine_class_labels(pl, references, frequencies, scores, dmaps, binning, settings["noise"]) # kick out the small classes pls = pl.copy().splitByClass() nlabels = {} for pp in pls: nlabels[pp[0].getClass()] = len(pp) print("Number of class " + str(pp[0].getClass()) + ": " + str(len(pp))) max_labels = np.max(list(nlabels.values())) to_delete = [] if settings["dispersion"]: min_labels = float(max_labels) / settings["dispersion"] for key, value in nlabels.items(): if value <= min_labels: to_delete.append(key) for pp in pls: if pp[0].getClass() in to_delete: pp.setClassAllParticles('-1') print("Set class " + str(pp[0].getClass()) + " to noise") # split the top n classes pl = split_topn_classes(pls, len(to_delete)) # update the references print("Calculate averages ...") avgs, freqs, wedgeSum = calculate_averages(pl, binning, mask, outdir=outdir) ncluster = 0 references = {} for class_label, r in avgs.items(): if not settings["fixed_frequency"]: freq = freqs[str(class_label)] else: freq = sfrequency frequencies[str(class_label)] = int(freq) print('Resolution of class %s: %d' % (str(class_label), freq)) fname = os.path.join( outdir, 'iter' + str(i) + '_class' + str(class_label) + '.em') rr = lowpassFilter(r, freq, freq / 10.)[0] rr.write(fname) p = Particle(fname) p.setClass(str(class_label)) references[str(class_label)] = p ncluster += 1 w = wedgeSum[str(class_label)] fname = os.path.join( outdir, 'iter' + str(i) + '_class' + str(class_label) + '_wedge.em') w.write(fname) # write the result to the disk pl.toXMLFile( os.path.join(outdir, 'classified_pl_iter' + str(i) + '.xml')) # check the stopping criterion if compare_pl(old_pl, pl): break