Пример #1
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog [options]
	This program will take a set of reference-free class-averages (or other projections) and generate a set of possible
	3-D initial models. It does this by heavily downsampling the data, then running a number of very fast, full iterative
	refinements, each seeded with a random starting model. The results are sorted in order of apparent agreement with the
	data, such that at the end, the first numbered model should be the best result. Ideally the top few answers will all
	qualtitatively agree on the overall structure. If they do not, the results should be thoroughly assessed manually to
	insure a sensible result. By default this routine will generate 10 initial models, but this may be fewer or more than
	is strictly necessary depending on a number of factors. If the data is highly structurally heterogeneous, particularly
	if combined with a strongly preferred orientation, a correct solution using this technique may not be possible, but
	for most situations it will work well. For other situations, single particle tomography presents a good alternative
	for generating initial models."""
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	parser.add_header(name="initialmodelheader", help='Options below this label are specific to e2initialmodel', title="### e2initialmodel options ###", row=1, col=0, rowspan=1, colspan=3)
	parser.add_argument("--input", dest="input", default=None,type=str, help="This file should contain good class-averages to use in constructing the initial model", browser='EMBrowserWidget(withmodal=True,multiselect=False)', guitype='filebox', row=0, col=0, rowspan=1, colspan=3)
	parser.add_argument("--iter", type = int, default=8, help = "The total number of refinement iterations to perform, typically 5-10", guitype='intbox', row=2, col=0, rowspan=1, colspan=1)
	parser.add_argument("--tries", type=int, default=10, help="The number of different initial models to generate in search of a good one", guitype='intbox', row=2, col=1, rowspan=1, colspan=1)
	parser.add_argument("--shrink", dest="shrink", type = int, default=0, help="Optionally shrink the input particles by an integer factor prior to reconstruction. Default=0, no shrinking", guitype='shrinkbox', row=2, col=2, rowspan=1, colspan=1)
	parser.add_argument("--sym", dest = "sym", help = "Specify symmetry - choices are: c<n>, d<n>, h<n>, tet, oct, icos",default="c1", guitype='symbox', row=4, col=0, rowspan=1, colspan=2)
	parser.add_argument("--randorient",action="store_true",help="Instead of seeding with a random volume, seeds by randomizing input orientations",default=False, guitype='boolbox', row=4, col=2, rowspan=1, colspan=1)
	parser.add_argument("--maskproc", default=None, type=str,help="Default=none. If specified, this mask will be performed after the built-in automask, eg - mask.soft to remove the core of a virus", )
#	parser.add_argument("--savemore",action="store_true",help="Will cause intermediate results to be written to flat files",default=False, guitype='boolbox', expert=True, row=5, col=0, rowspan=1, colspan=1)
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--orientgen",type=str, default="eman:delta=9.0:inc_mirror=0:perturb=1",help="The type of orientation generator. Default is eman:delta=9.0:inc_mirror=0:perturb=1. See e2help.py orientgens", guitype='strbox', expert=True, row=4, col=2, rowspan=1, colspan=1)
	parser.add_argument("--parallel","-P",type=str,help="Run in parallel, specify type:<option>=<value>:<option>=<value>. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel",default="thread:1", guitype='strbox', row=6, col=0, rowspan=1, colspan=2)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	# Database Metadata storage
	#parser.add_argument("--dbls",type=str,default=None,help="data base list storage, used by the workflow. You can ignore this argument.")

	(options, args) = parser.parse_args()
	verbose=options.verbose

	try: ptcls=EMData.read_images(options.input)
	except:
		print "Error: bad input file"
		exit(1)
	apix=ptcls[0]["apix_x"]
	if options.shrink>1 : apix*=options.shrink

	for i in range(len(ptcls)):
		ptcls[i].process_inplace("normalize.edgemean",{})
		if options.shrink>1 :
			ptcls[i]=ptcls[i].process("math.meanshrink",{"n":options.shrink})
	if ptcls[0]["nx"]>160 : print "WARNING: using a large box size may be slow. Suggest trying --shrink="
	if not ptcls or len(ptcls)==0 : parser.error("Bad input file")
	boxsize=ptcls[0].get_xsize()
	if verbose>0 : print "%d particles %dx%d"%(len(ptcls),boxsize,boxsize)
	print "Models will be %1.3f A/pix"%apix

	[og_name,og_args] = parsemodopt(options.orientgen)

	try:
			sfcurve=XYData()
			sfcurve.read_file("strucfac.txt")

			sfcurve.update()
	except : sfcurve=None

	if options.maskproc!=None :
		mask2=EMData(boxsize,boxsize,boxsize)
		mask2.to_one()
		parms=parsemodopt(options.maskproc)
		if parms[0]=="mask.auto3d":
			print "Error, maskproc may not be mask.auto3d, it must be a processor that does not rely on the input map density to function"
			sys.exit(1)
		mask2.process_inplace(parms[0],parms[1])
	else: mask2=None

	# angles to use for refinement
	sym_object = parsesym(options.sym)
	orts = sym_object.gen_orientations(og_name,og_args)

	logid=E2init(sys.argv,options.ppid)
	results=[]

	try: os.mkdir("initial_models")
	except: pass
	iters=[int(i[10:12]) for i in os.listdir("initial_models") if i[:10]=="particles_"]
	try : newiter=max(iters)+1
	except : newiter=0
	results_name="initial_models/model_%02d"%newiter
	particles_name="initial_models/particles_%02d.hdf"%newiter

	# we write the pre-processed "particles" (usually class-averages) to disk, both as a record and to prevent collisions
	for i,p in enumerate(ptcls):
		p.write_image(particles_name,i)

	# parallelism
	from EMAN2PAR import EMTaskCustomer			# we need to put this here to avoid a circular reference

	etc=EMTaskCustomer(options.parallel)
	pclist=[particles_name]

	etc.precache(pclist)		# make sure the input particles are precached on the compute nodes

	tasks=[]
	for t in xrange(options.tries):
		tasks.append(InitMdlTask(particles_name,len(ptcls),orts,t,sfcurve,options.iter,options.sym,mask2,options.randorient,options.verbose))

	taskids=etc.send_tasks(tasks)
	alltaskids=taskids[:]			# we keep a copy for monitoring progress

	# This loop runs until all subtasks are complete (via the parallelism system
	ltime=0
	while len(taskids)>0 :
		time.sleep(0.1)
		curstat=etc.check_task(taskids)			# a list of the progress on each task
		if options.verbose>1 :
			if time.time()-ltime>1 :
				print "progress: ",curstat
				ltime=time.time()
		for i,j in enumerate(curstat):
			if j==100 :
				rslt=etc.get_results(taskids[i])		# read the results back from a completed task as a one item dict
				results.append(rslt[1]["result"])
				if options.verbose==1 : print "Task {} ({}) complete".format(i,taskids[i])

		# filter out completed tasks. We can't do this until after the previous loop completes
		taskids=[taskids[i] for i in xrange(len(taskids)) if curstat[i]!=100]


	# Write out the final results
	results.sort()
	for i,j in enumerate(results):
		out_name = results_name+"_%02d.hdf"%(i+1)
		j[1].write_image(out_name,0)
		j[4].write_image(results_name+"_%02d_init.hdf"%(i+1),0)
		print out_name,j[1]["quality"],j[0],j[1]["apix_x"]
		for k,l in enumerate(j[3]): l[0].write_image(results_name+"_%02d_proj.hdf"%(i+1),k)	# set of projection images
		for k,l in enumerate(j[2]):
			l.process("normalize").write_image(results_name+"_%02d_aptcl.hdf"%(i+1),k*2)						# set of aligned particles
			j[3][l["match_n"]][0].process("normalize").write_image(results_name+"_%02d_aptcl.hdf"%(i+1),k*2+1)	# set of projections matching aligned particles


	E2end(logid)
Пример #2
0
class EMParallelSimMX(object):
    def __init__(self, options, args, logger=None):
        '''
		@param options the options produced by (options, args) = parser.parse_args()
		@param args the options produced by (options, args) = parser.parse_args()
		@param logger and EMAN2 logger, i.e. logger=E2init(sys.argv)
		assumes you have already called the check function.
		'''
        self.options = options
        self.args = args
        self.logger = logger

        from EMAN2PAR import EMTaskCustomer
        self.etc = EMTaskCustomer(options.parallel,
                                  module="e2simmx.EMSimTaskDC")
        if options.colmasks != None:
            self.etc.precache([args[0], args[1], options.colmasks])
        else:
            self.etc.precache([args[0], args[1]])
        self.num_cpus = self.etc.cpu_est()
        if self.num_cpus < 32:  # lower limit
            self.num_cpus = 32

        self.__task_options = None

    def __get_task_options(self, options):
        '''
		Get the options required by each task as a dict
		@param options is always self.options - the initialization argument. Could be changed.
		'''
        if self.__task_options == None:
            d = {}
            d["align"] = parsemodopt(options.align)
            d["aligncmp"] = parsemodopt(options.aligncmp)
            d["cmp"] = parsemodopt(options.cmp)

            if hasattr(options, "ralign") and options.ralign != None:
                d["ralign"] = parsemodopt(options.ralign)
                d["raligncmp"] = parsemodopt(
                    options.raligncmp
                )  # raligncmp must be specified if using ralign
            else:
                d["ralign"] = None
                d["raligncmp"] = None
            d["prefilt"] = options.prefilt

            if hasattr(options, "shrink") and options.shrink != None:
                d["shrink"] = options.shrink
            else:
                d["shrink"] = None

            self.__task_options = d

        return self.__task_options

    def __init_memory(self, options):
        '''
		@param options is always self.options - the initialization argument. Could be changed.
		Establishes several important attributes they are:
		----
		self.clen - the number of images in the image defined by args[0], the number of columns in the similarity matrix
		self.rlen - the number of images in the image defined by args[1], the number of rows in the similarity matrix
		----
		Also, since we adopted region output writing as our preferred approach, this function makes sure the output
		image(s) exists on disk and has the correct dimensions - seeing as this is the way region writing works (the image
		has to exist on disk and have its full dimensions)
		'''
        self.clen = EMUtil.get_image_count(self.args[0])
        self.rlen = EMUtil.get_image_count(self.args[1])

        output = self.args[2]

        if file_exists(output) and not options.fillzero:
            remove_file(output)

        e = EMData(self.clen, self.rlen)
        e.to_zero()
        e.set_attr(PROJ_FILE_ATTR, self.args[0])
        e.set_attr(PART_FILE_ATTR, self.args[1])
        n = 1
        if self.options.saveali:
            n = 6  # the total number of images written to disk
        if not options.fillzero: e.write_image(output, 0)
        for i in range(1, n):
            e.write_image(output, i)

    def __get_blocks(self):
        '''
		Gets the blocks that will be processed in parallel, these are essentially ranges
		'''

        steve_factor = 3  # increase number of jobs a bit for better distribution
        total_jobs = steve_factor * self.num_cpus

        [col_div,
         row_div] = opt_rectangular_subdivision(self.clen, self.rlen,
                                                total_jobs)

        block_c = old_div(self.clen, col_div)
        block_r = old_div(self.rlen, row_div)

        residual_c = self.clen - block_c * col_div  # residual left over by integer division

        blocks = []

        current_c = 0
        for c in range(0, col_div):
            last_c = current_c + block_c
            if residual_c > 0:
                last_c += 1
                residual_c -= 1

            current_r = 0
            residual_r = self.rlen - block_r * row_div  # residual left over by integer division
            for r in range(0, row_div):
                last_r = current_r + block_r
                if residual_r > 0:
                    last_r += 1
                    residual_r -= 1

                blocks.append([current_c, last_c, current_r, last_r])
                current_r = last_r

            current_c = last_c

#		print col_div,row_div,col_div*row_div
#		print self.clen,self.rlen,residual_c,residual_r
        return blocks

    def execute(self):
        '''
		The main function to be called
		'''
        if len(self.options.parallel) > 1:
            self.__init_memory(self.options)
            blocks = self.__get_blocks()
            #			print blocks

            #			self.check_blocks(blocks) # testing function can be removed at some point

            tasks = []
            for bn, block in enumerate(blocks):

                data = {}
                data["references"] = ("cache", self.args[0], block[0],
                                      block[1])
                data["particles"] = ("cache", self.args[1], block[2], block[3])
                if self.options.colmasks != None:
                    data["colmasks"] = ("cache", self.options.colmasks,
                                        block[0], block[1])
                if self.options.mask != None:
                    data["mask"] = ("cache", self.options.mask, 0, 1)
                if self.options.fillzero:
                    # for each particle check to see which portion of the matrix we need to fill
                    if (bn % 10 == 0):
                        print("%d/%d     \r" % (bn, len(blocks)), end=' ')
                    sys.stdout.flush()
                    rng = []
                    for i in range(block[2], block[3]):
                        c = EMData()
                        c.read_image(
                            self.args[2], 0, False,
                            Region(block[0], i, block[1] - block[0] + 1, 1))
                        inr = 0
                        st = 0
                        for j in range(c["nx"]):
                            if c[j] == 0 and not inr:
                                st = j
                                inr = 1
                            if c[j] != 0 and inr:
                                rng.append(
                                    (i, st + block[0], j - 1 + block[0]))
                                inr = 0
                        if inr:
                            rng.append((i, st + block[0], j + block[0]))
                    data["partial"] = rng


#					print "%d) %s\t"%(bn,str(block)),rng

                if self.options.fillzero and len(data["partial"]) == 0:
                    continue  # nothing to compute in this block, skip it completely
                else:
                    task = EMSimTaskDC(data=data,
                                       options=self.__get_task_options(
                                           self.options))
                    #print "Est %d CPUs"%etc.cpu_est()
                    tasks.append(task)

            # This just verifies that all particles have at least one class
            #a=set()
            #for i in tasks:
            #for k in i.data["partial"] : a.add(k[0])

            #b=set(range(self.rlen))
            #b-=a
            #print b

            print("%d/%d         " % (bn, len(blocks)))
            self.tids = self.etc.send_tasks(tasks)
            print(len(self.tids), " tasks submitted")
            #
            while 1:
                if len(self.tids) == 0: break
                print(len(self.tids),
                      "simmx tasks left in main loop   \r",
                      end=' ')
                sys.stdout.flush()
                st_vals = self.etc.check_task(self.tids)
                for i in range(len(self.tids) - 1, -1, -1):
                    st = st_vals[i]
                    if st == 100:
                        tid = self.tids[i]

                        try:
                            rslts = self.etc.get_results(tid)
                            #							display(rslts[1]["rslt_data"][0])
                            self.__store_output_data(rslts[1])
                        except:
                            traceback.print_exc()
                            print(
                                "ERROR storing results for task %d. Rerunning."
                                % tid)
                            self.etc.rerun_task(tid)
                            continue
                        if self.logger != None:
                            E2progress(
                                self.logger, 1.0 -
                                old_div(len(self.tids), float(len(blocks))))
                            if self.options.verbose > 0:
                                print("%d/%d\r" %
                                      (len(self.tids), len(blocks)))
                                sys.stdout.flush()

                        self.tids.pop(i)
                    print(len(self.tids),
                          "simmx tasks left in main loop   \r",
                          end=' ')
                    sys.stdout.flush()

                time.sleep(10)
            print("\nAll simmx tasks complete ")

            # if using fillzero, we must fix the -1.0e38 values placed into empty cells
            if self.options.fillzero:
                l = EMData(self.args[2], 0, True)
                rlen = l["ny"]
                clen = l["nx"]
                #				launch_childprocess("e2proc2d.py %s %s"%(self.args[2],self.args[2]+"_x"))
                print(
                    "Filling noncomputed regions in similarity matrix (%dx%d)"
                    % (clen, rlen))
                l = EMData()
                for r in range(rlen):
                    l.read_image(self.args[2], 0, False, Region(0, r, clen, 1))
                    fill = l["maximum"] + .0001
                    l.process_inplace("threshold.belowtominval", {
                        "minval": -1.0e37,
                        "newval": fill
                    })
                    l.write_image(self.args[2], 0,
                                  EMUtil.ImageType.IMAGE_UNKNOWN, False,
                                  Region(0, r, clen, 1))

                print("Filling complete")

        else:
            raise NotImplementedError(
                "The parallelism option you specified (%s) is not supported" %
                self.options.parallel)

    def __store_output_data(self, rslts):
        '''
		Store output data to internal images (matrices)
		@param a dictionary return by the EMSimTaskDC
		'''

        result_data = rslts["rslt_data"]
        output = self.args[2]

        insertion_c = rslts["min_ref_idx"]
        insertion_r = rslts["min_ptcl_idx"]
        result_mx = result_data[0]
        r = Region(insertion_c, insertion_r, result_mx.get_xsize(),
                   result_mx.get_ysize())

        # Note this is region io - the init_memory function made sure the images exist and are the right dimensions (on disk)
        for i, mxout in enumerate(result_data):
            mxout.write_image(output, i, EMUtil.ImageType.IMAGE_UNKNOWN, False,
                              r)
Пример #3
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options]
	This program will take a set of reference-free class-averages (or other projections) and generate a set of possible
	3-D initial models. It does this by heavily downsampling the data, then running a number of very fast, full iterative
	refinements, each seeded with a random starting model. The results are sorted in order of apparent agreement with the
	data, such that at the end, the first numbered model should be the best result. Ideally the top few answers will all
	qualtitatively agree on the overall structure. If they do not, the results should be thoroughly assessed manually to
	insure a sensible result. By default this routine will generate 10 initial models, but this may be fewer or more than
	is strictly necessary depending on a number of factors. If the data is highly structurally heterogeneous, particularly
	if combined with a strongly preferred orientation, a correct solution using this technique may not be possible, but
	for most situations it will work well. For other situations, single particle tomography presents a good alternative
	for generating initial models."""
    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_header(
        name="initialmodelheader",
        help='Options below this label are specific to e2initialmodel',
        title="### e2initialmodel options ###",
        row=1,
        col=0,
        rowspan=1,
        colspan=3)
    parser.add_argument(
        "--input",
        dest="input",
        default=None,
        type=str,
        help=
        "This file should contain good class-averages to use in constructing the initial model",
        browser='EMBrowserWidget(withmodal=True,multiselect=False)',
        guitype='filebox',
        row=0,
        col=0,
        rowspan=1,
        colspan=3)
    parser.add_argument(
        "--iter",
        type=int,
        default=8,
        help=
        "The total number of refinement iterations to perform, typically 5-10",
        guitype='intbox',
        row=2,
        col=0,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--tries",
        type=int,
        default=10,
        help=
        "The number of different initial models to generate in search of a good one",
        guitype='intbox',
        row=2,
        col=1,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--shrink",
        dest="shrink",
        type=int,
        default=0,
        help=
        "Optionally shrink the input particles by an integer factor prior to reconstruction. Default=0, no shrinking",
        guitype='shrinkbox',
        row=2,
        col=2,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--sym",
        dest="sym",
        help="Specify symmetry - choices are: c<n>, d<n>, h<n>, tet, oct, icos",
        default="c1",
        guitype='symbox',
        row=4,
        col=0,
        rowspan=1,
        colspan=2)
    parser.add_argument(
        "--automaskexpand",
        default=-1,
        type=int,
        help=
        "Number of voxels of post-threshold expansion in the mask, for use when peripheral features are truncated. (default=shrunk boxsize/20)",
        guitype='intbox',
        row=6,
        col=2,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--randorient",
        action="store_true",
        help=
        "Instead of seeding with a random volume, seeds by randomizing input orientations",
        default=False,
        guitype='boolbox',
        row=4,
        col=2,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--maskproc",
        default=None,
        type=str,
        help=
        "Default=none. If specified, this mask will be performed after the built-in automask, eg - mask.soft to remove the core of a virus",
    )
    #	parser.add_argument("--savemore",action="store_true",help="Will cause intermediate results to be written to flat files",default=False, guitype='boolbox', expert=True, row=5, col=0, rowspan=1, colspan=1)
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higher number means higher level of verboseness")
    parser.add_argument(
        "--orientgen",
        type=str,
        default="eman:delta=9.0:inc_mirror=0:perturb=1",
        help=
        "The type of orientation generator. Default is eman:delta=9.0:inc_mirror=0:perturb=1. See e2help.py orientgens",
        guitype='strbox',
        expert=True,
        row=4,
        col=2,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--parallel",
        "-P",
        type=str,
        help=
        "Run in parallel, specify type:<option>=<value>:<option>=<value>. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel",
        default="thread:1",
        guitype='strbox',
        row=6,
        col=0,
        rowspan=1,
        colspan=2)
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)

    # Database Metadata storage
    #parser.add_argument("--dbls",type=str,default=None,help="data base list storage, used by the workflow. You can ignore this argument.")

    (options, args) = parser.parse_args()
    verbose = options.verbose

    try:
        ptcls = EMData.read_images(options.input)
    except:
        print("Error: bad input file")
        exit(1)
    apix = ptcls[0]["apix_x"]
    if options.shrink > 1: apix *= options.shrink

    if options.tries < 10:
        print(
            "Warning: suggest using --tries >=10. The first 8 starting maps are generated deterministically, and it is good to have several random seeds as well to increase the liklihood of a good outcome."
        )

    for i in range(len(ptcls)):
        ptcls[i].process_inplace("normalize.edgemean", {})
        if options.shrink > 1:
            ptcls[i] = ptcls[i].process("math.meanshrink",
                                        {"n": options.shrink})
    if ptcls[0]["nx"] > 160:
        print(
            "WARNING: using a large box size may be slow. Suggest trying --shrink="
        )
    if not ptcls or len(ptcls) == 0: parser.error("Bad input file")
    boxsize = ptcls[0].get_xsize()
    if verbose > 0:
        print("%d particles %dx%d" % (len(ptcls), boxsize, boxsize))
    print("Models will be %1.3f A/pix" % apix)

    [og_name, og_args] = parsemodopt(options.orientgen)

    try:
        sfcurve = XYData()
        sfcurve.read_file("strucfac.txt")

        sfcurve.update()
    except:
        sfcurve = None

    if options.maskproc != None:
        mask2 = EMData(boxsize, boxsize, boxsize)
        mask2.to_one()
        parms = parsemodopt(options.maskproc)
        if parms[0] == "mask.auto3d":
            print(
                "Error, maskproc may not be mask.auto3d, it must be a processor that does not rely on the input map density to function"
            )
            sys.exit(1)
        mask2.process_inplace(parms[0], parms[1])
    else:
        mask2 = None

    # angles to use for refinement
    sym_object = parsesym(options.sym)
    orts = sym_object.gen_orientations(og_name, og_args)

    logid = E2init(sys.argv, options.ppid)
    results = []

    try:
        os.mkdir("initial_models")
    except:
        pass
    iters = [
        int(i[10:12]) for i in os.listdir("initial_models")
        if i[:10] == "particles_"
    ]
    try:
        newiter = max(iters) + 1
    except:
        newiter = 0
    results_name = "initial_models/model_%02d" % newiter
    particles_name = "initial_models/particles_%02d.hdf" % newiter

    # we write the pre-processed "particles" (usually class-averages) to disk, both as a record and to prevent collisions
    for i, p in enumerate(ptcls):
        p.write_image(particles_name, i)

    # parallelism
    from EMAN2PAR import EMTaskCustomer  # we need to put this here to avoid a circular reference

    etc = EMTaskCustomer(options.parallel, module="e2initialmodel.InitMdlTask")
    pclist = [particles_name]

    etc.precache(
        pclist
    )  # make sure the input particles are precached on the compute nodes

    tasks = []
    for t in range(options.tries):
        tasks.append(
            InitMdlTask(particles_name, len(ptcls), orts, t, sfcurve,
                        options.iter, options.sym, mask2, options.randorient,
                        options.automaskexpand, options.verbose))

    taskids = etc.send_tasks(tasks)
    alltaskids = taskids[:]  # we keep a copy for monitoring progress

    # This loop runs until all subtasks are complete (via the parallelism system
    ltime = 0
    while len(taskids) > 0:
        time.sleep(0.1)
        curstat = etc.check_task(
            taskids)  # a list of the progress on each task
        if options.verbose > 1:
            if time.time() - ltime > 1:
                print("progress: ", curstat)
                ltime = time.time()
        for i, j in enumerate(curstat):
            if j == 100:
                rslt = etc.get_results(
                    taskids[i]
                )  # read the results back from a completed task as a one item dict
                results.append(rslt[1]["result"])
                if options.verbose == 1:
                    print("Task {} ({}) complete".format(i, taskids[i]))

        # filter out completed tasks. We can't do this until after the previous loop completes
        taskids = [
            taskids[i] for i in range(len(taskids)) if curstat[i] != 100
        ]

    # Write out the final results
    results.sort()
    for i, j in enumerate(results):
        out_name = results_name + "_%02d.hdf" % (i + 1)
        j[1].write_image(out_name, 0)
        j[4].write_image(results_name + "_%02d_init.hdf" % (i + 1), 0)
        print(out_name, j[1]["quality"], j[0], j[1]["apix_x"])
        for k, l in enumerate(j[3]):
            l[0].write_image(results_name + "_%02d_proj.hdf" % (i + 1),
                             k)  # set of projection images
        for k, l in enumerate(j[2]):
            l.process("normalize").write_image(
                results_name + "_%02d_aptcl.hdf" % (i + 1),
                k * 2)  # set of aligned particles
            j[3][l["match_n"]][0].process("normalize").write_image(
                results_name + "_%02d_aptcl.hdf" % (i + 1),
                k * 2 + 1)  # set of projections matching aligned particles

    E2end(logid)
Пример #4
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options] stack1.hdf stack2.mrcs ...

	Program to erase gold fiducials and other high-density features from images, such as frames in DDD movies or images in tiltseries. Requires scipy.
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    #parser.add_argument("--average", default=False, action="store_true", help="Erase gold from average of input stack(s).")
    parser.add_argument(
        "--lowpass",
        default=False,
        action="store_true",
        help=
        "Also lowpass filter noise based on local properties. Useful for processing tomographic tilt series."
    )
    parser.add_argument(
        "--keepdust",
        default=False,
        action="store_true",
        help=
        "Do not remove 'dust' from mask (include objects smaller than gold fiducials)."
    )
    parser.add_argument(
        "--goldsize",
        default=30,
        type=float,
        help="Diameter (in pixels) of gold fiducials to erase.")
    #parser.add_argument("--downsample", default=1.0, type=float, help="Downsample the input stack(s). Default is 1, i.e. no downsampling.")
    parser.add_argument(
        "--oversample",
        default=4,
        type=int,
        help=
        "Oversample noise image to smooth transitions from regions with different noise."
    )
    parser.add_argument("--boxsize",
                        default=128,
                        type=int,
                        help="Box size to use when computing local noise.")
    parser.add_argument("--debug",
                        default=False,
                        action="store_true",
                        help="Save noise and mask/masked image(s).")
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higner number means higher level of verboseness")
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-2)
    parser.add_argument(
        "--parallel",
        type=str,
        default=None,
        help=
        """Default=None (not used). Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel"""
    )

    (options, args) = parser.parse_args()

    nfiles = len(args)

    if options.parallel:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer(options.parallel)

    for argnum, arg in enumerate(args):

        t0 = time.time()

        newarg = ''
        originalarg = arg

        hdr = EMData(arg, 0,
                     True)  #load header only to get parameters used below
        apix = hdr['apix_x']
        nx = hdr['nx']
        ny = hdr['ny']

        if '.ali' == arg[-4:] or '.mrc' == arg[-4:]:

            #Unfortunately, e2proc2d.py appends to existing files instead of overwriting them. If you run this program two consecutive times and the first one failed for whatever reason,
            #you'll find your stack growing.
            #To prevent this, we create a 'dummy' file, but first remove any dummy files from previous failed runs. (If the program runs successfully to the end, the dummy file gets renamed).
            try:
                os.remove('dummy_stack.hdf')
            except:
                pass

            #turn .ali or .mrc 3D images into a stack of 2D images that can be processed by this program.
            cmd = 'e2proc2d.py ' + arg + ' dummy_stack.hdf --threed2twod'
            runcmd(options, cmd)

            #make the new stack of 2D images (dummy_stack.hdf) the new input (the name of the input file but with .hdf format); this intermediate file will be deleted in the end.
            newarg = arg.replace(arg[-4:], '.hdf')
            os.rename('dummy_stack.hdf', newarg)
            arg = newarg

        outf = "{}_proc.hdf".format(os.path.splitext(arg)[0])
        if os.path.isfile(outf):
            print(
                "Results are already stored in {}. Please erase or move and try again."
                .format(outf))
            sys.exit(1)

        nfs = EMUtil.get_image_count(arg)

        tasks = []
        results = []
        results = None

        #parallelized tasks don't run "in order"; therefore, a dummy stack needs to be pre-created with as many images as the final stack will have
        #(otherwise, writing output images to stack indexes randomly makes the program crash or produces garbage output)
        dummy = EMData(8, 8)
        dummy.to_one()
        dummy['apix_x'] = apix
        dummy['apix_y'] = apix
        for j in range(nfs):
            dummy.write_image(outf, j)

        #EMAN2 does not allow stacks of images with different size; this, and possibly some bug, prevent images written from the parallelization task from
        #having the corret size if the pre-created dummy doesn't have the correct size to begin with. No point in writing big images for the dummy from the start.
        #re-writing the index=0 image will change the size of all images in the stack to the correct size
        dummy_correct_size = EMData(nx, ny)
        dummy_correct_size.to_one()
        dummy_correct_size['apix_x'] = apix
        dummy_correct_size['apix_y'] = apix
        dummy.write_image(outf, 0)

        for i in range(nfs):
            if options.verbose:
                sys.stdout.write("\rstaging images ({}/{})".format(i + 1, nfs))
                sys.stdout.flush()

            if options.parallel:
                #print "parallelism started"
                task = EraseGold2DTask(options, arg, i, outf)
                tasks.append(task)
            else:
                results = fiximage(options, arg, i, outf)

        if options.parallel:
            if tasks:
                tids = etc.send_tasks(tasks)
                if options.verbose:
                    print "\n(erase_gold) %d tasks queued" % (len(tids))

                results = get_results(etc, tids, options)

        if results:
            #pass

            if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
                #intermediate = arg.replace('.hdf','.mrcs')
                finaloutput = arg.replace('.hdf', originalarg[-4:])
                cmd = 'e2proc2d.py ' + arg + ' ' + finaloutput + ' --twod2threed --outmode int16'
                runcmd(options, cmd)
                os.remove(arg)

            if newarg: os.remove(newarg)

        dt = time.time() - t0
        if options.verbose:
            print("\n")
            sys.stdout.write("Erased fiducials from {} ({} minutes)\n".format(
                arg, round(dt / 60., 2)))
Пример #5
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options] stack1.hdf stack2.mrcs ...

	Program to erase gold fiducials and other high-density features from images, such as frames in DDD movies or images in tiltseries. Requires scipy.
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    #parser.add_argument("--average", default=False, action="store_true", help="Erase gold from average of input stack(s).")
    parser.add_argument("--apix",
                        default=None,
                        type=float,
                        help="Override Apix in image header.")
    parser.add_argument(
        "--lowpass",
        default=1.11,
        type=float,
        help=
        "Multiply lowpass filter frequency by this constant when filtering noise image. Default is 1.11."
    )
    parser.add_argument(
        "--coords",
        default="",
        type=str,
        required=True,
        help=
        "Specify box file with x and y gold coordinates. Must follow standard box file format (x<tab>y<tab>xsize<ysize>) although box sizes are ignored."
    )
    parser.add_argument(
        "--keepdust",
        default=False,
        action="store_true",
        help=
        "Do not remove 'dust' from mask (include objects smaller than gold fiducials)."
    )
    parser.add_argument(
        "--goldsize",
        default=30,
        type=float,
        help="Diameter (in pixels) of gold fiducials to erase.")
    parser.add_argument(
        "--oversample",
        default=4,
        type=int,
        help=
        "Oversample noise image to smooth transitions from regions with different noise."
    )
    parser.add_argument("--boxsize",
                        default=128,
                        type=int,
                        help="Box size to use when computing local noise.")
    parser.add_argument("--debug",
                        default=False,
                        action="store_true",
                        help="Save noise and mask/masked image(s).")
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higner number means higher level of verboseness")
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-2)
    parser.add_argument(
        "--parallel",
        type=str,
        default=None,
        help=
        """Default=None (not used). Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel"""
    )
    parser.add_argument(
        "--subset",
        default=0,
        type=int,
        help=
        "Default=0 (not used). Apply algorithm to only a subset of images in each stack file."
    )
    parser.add_argument(
        "--nsigmas",
        default=3.0,
        type=float,
        help=
        "Default=3.0. Number of standard deviations above the mean to determine pixels to mask out (erase)."
    )

    (options, args) = parser.parse_args()

    nfiles = len(args)

    if options.parallel:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer(options.parallel)

    if options.coords:
        try:
            coords = np.loadtxt(options.coords)
        except:
            print(
                "Failed to read coordinates. Check input box file path and contents."
            )
            sys.exit(1)

    for argnum, arg in enumerate(args):

        t0 = time.time()

        newarg = ''
        originalarg = arg

        hdr = EMData(arg, 0,
                     True)  #load header only to get parameters used below
        if options.apix: apix = options.apix
        else: apix = hdr['apix_x']
        nx = hdr['nx']
        ny = hdr['ny']

        if '.ali' == arg[-4:] or '.mrc' == arg[-4:]:

            #Unfortunately, e2proc2d.py appends to existing files instead of overwriting them. If you run this program two consecutive times and the first one failed for whatever reason,
            #you'll find your stack growing.
            #To prevent this, we create a 'dummy' file, but first remove any dummy files from previous failed runs. (If the program runs successfully to the end, the dummy file gets renamed).
            try:
                os.remove('dummy_stack.hdf')
            except:
                pass

            #turn .ali or .mrc 3D images into a stack of 2D images that can be processed by this program.
            cmd = 'e2proc2d.py ' + arg + ' dummy_stack.hdf --threed2twod'
            if options.subset:
                cmd += ' --first 0 --last ' + str(options.subset - 1)

            runcmd(options, cmd)

            #make the new stack of 2D images (dummy_stack.hdf) the new input (the name of the input file but with .hdf format); this intermediate file will be deleted in the end.
            newarg = arg.replace(arg[-4:], '.hdf')
            os.rename('dummy_stack.hdf', newarg)
            arg = newarg

        outf = "{}_efd.hdf".format(os.path.splitext(arg)[0])
        if os.path.isfile(outf):
            print((
                "Results are already stored in {}. Please erase or move and try again."
                .format(outf)))
            sys.exit(1)

        nfs = EMUtil.get_image_count(arg)

        tasks = []
        results = []
        results = None

        #parallelized tasks don't run "in order"; therefore, a dummy stack needs to be pre-created with as many images as the final stack will have
        #(otherwise, writing output images to stack indexes randomly makes the program crash or produces garbage output)
        dummy = EMData(nx, ny)
        dummy.to_one()
        dummy['apix_x'] = apix
        dummy['apix_y'] = apix
        for j in range(nfs):
            dummy.write_image(outf, j)

        #EMAN2 does not allow stacks of images with different size; this, and possibly some bug, prevent images written from the parallelization task from
        #having the corret size if the pre-created dummy doesn't have the correct size to begin with. No point in writing big images for the dummy from the start.
        #re-writing the index=0 image will change the size of all images in the stack to the correct size
        dummy_correct_size = EMData(nx, ny)
        dummy_correct_size.to_one()
        dummy_correct_size['apix_x'] = apix
        dummy_correct_size['apix_y'] = apix
        dummy.write_image(outf, 0)

        for i in range(nfs):
            if options.verbose:
                sys.stdout.write("\rstaging images ({}/{})".format(i + 1, nfs))
                sys.stdout.flush()

            if options.parallel:
                #print "parallelism started"
                task = EraseGold2DTask(options, arg, i, outf)
                tasks.append(task)
            else:
                results = fiximage(options, arg, i, outf)

        if options.parallel:
            if tasks:
                tids = etc.send_tasks(tasks)
                if options.verbose:
                    print("\n(erase_gold) %d tasks queued" % (len(tids)))

                results = get_results(etc, tids, options)

        #if results:
        #	#pass
        #
        #	if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
        #		#intermediate = arg.replace('.hdf','.mrcs')
        #		finaloutput = arg.replace('.hdf',originalarg[-4:])
        #		cmd = 'e2proc2d.py ' + arg + ' ' + finaloutput + ' --twod2threed --outmode int16'
        #		runcmd(options,cmd)
        #		os.remove(arg)
        #
        #	if newarg: os.remove(newarg)

        if results:
            #pass

            # not sure what this was supposed to do

            # if options.parallel:
            # 	outfstem = outf.replace('.hdf','')
            # 	cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_proc.hdf --stackname ' + outfstem
            # 	runcmd(options,cmdbuildstack)

            # 	if options.debug:
            # 		outfmasked = outf.replace('.hdf','_masked.hdf')
            # 		cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_masked.hdf --stackname ' + outfmasked
            # 		runcmd(options,cmdbuildstack)

            # 		outfnoise= outf.replace('.hdf','_noise.hdf')
            # 		cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_noise.hdf --stackname ' + outfnoise
            # 		runcmd(options,cmdbuildstack)

            if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
                #intermediate = arg.replace('.hdf','.mrcs')
                finaloutput = outf.replace('.hdf', originalarg[-4:])
                cmd = 'e2proc2d.py ' + outf + ' ' + finaloutput + ' --twod2threed --outmode int16'
                #print "\ncomand to generate finaloutput",cmd
                runcmd(options, cmd)
                os.remove(arg)

            # if newarg:
            # 	try:
            # 		os.remove(newarg)
            # 	except:
            # 		try:
            # 			#print "would have removed",newarg.replace('.hdf','_proc.hdf')
            # 			os.remove(newarg.replace('.hdf','_proc.hdf'))
            # 		except:
            # 			pass
        try:
            filelist = [
                tmpf for tmpf in os.listdir(".") if 'erasegold_tmp' in tmpf
            ]
            for tf in filelist:
                os.remove(tf)
        except:
            print("WARNING: cleanup failed.")

        dt = time.time() - t0
        if options.verbose:
            print("\n")
            sys.stdout.write("Erased fiducials from {} ({} minutes)\n".format(
                arg, round(old_div(dt, 60.), 2)))
    return
Пример #6
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog <output> [options]

	The goal of this program is to reduce the heterogeneity of a reconstruction by splitting a single map
	into two maps, each more homogeneous. You must run e2refine_easy to completion before using this program.
	It will take the class-averaging results from the final iteration, and split the particles from each 
	class-average into 2 groups, producing 2 class-averages for each. The program then attempts to construct
	a maximally self-consistent grouping of these pairs of class averages into 2 3-D maps. 
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_argument(
        "--path",
        default=None,
        type=str,
        help=
        "The name of an existing refine_xx folder, where e2refine_easy ran to completion",
        guitype='filebox',
        filecheck=False,
        browser="EMBrowserWidget(withmodal=True,multiselect=False)",
        row=3,
        col=0,
        rowspan=1,
        colspan=3)
    parser.add_argument(
        "--usebasis",
        default=0,
        type=int,
        help=
        "Select which Eigenimage to use for separation. With novarimax, n=0 is highest energy.",
        guitype='intbox',
        row=5,
        col=0,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--nbasis",
        default=-1,
        type=int,
        help=
        "Number of basis vectors to compute. Must be at least usebasis+1. Default 6 or usebasis+1.",
        guitype='intbox',
        row=4,
        col=0,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--novarimax",
        action="store_true",
        default=False,
        help="Disable varimax rotation among computed basis vectors.",
        guitype='boolbox',
        row=7,
        col=0,
        rowspan=1,
        colspan=1)
    parser.add_argument(
        "--mask",
        default=None,
        help="Optional 3D mask to focus the classification",
        guitype='filebox',
        browser='EMSetsTable(withmodal=True,multiselect=False)',
        filecheck=False,
        row=6,
        col=0,
        rowspan=1,
        colspan=3,
        mode="refinement")
    parser.add_argument("--parallel",
                        default="thread:2",
                        help="Standard parallelism option. Default=thread:2",
                        guitype='strbox',
                        row=8,
                        col=0,
                        rowspan=1,
                        colspan=2)
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higner number means higher level of verboseness")
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)

    (options, args) = parser.parse_args()

    if options.nbasis <= 1:
        options.nbasis = 6
        if options.nbasis <= options.usebasis + 1:
            options.nbasis = options.usebasis + 1
            print "--nbasis adjusted to ", options.nbasis

    if options.path == None:
        paths = [i for i in os.listdir(".") if "refine_" in i and len(i) == 9]
        paths.sort()
        options.path = paths[-1]

    pathnum = options.path[-2:]

    # check the specified path for the files we need
    try:
        olddb = js_open_dict(options.path + "/0_refine_parms.json")
        last_map = olddb["last_map"]
        targetres = olddb["targetres"]
        last_iter = int(last_map.split("_")[-1][:2])
        try:
            ptcls = olddb["inputavg"]
            if ptcls == None: raise Exception
        except:
            ptcls = olddb["input"]

        sym = olddb["sym"]
        if options.verbose:
            print "Found iteration {} in {}, using {}".format(
                last_iter, options.path, " & ".join(ptcls))
    except:
        traceback.print_exc()
        print "Error: Cannot find necessary files in ", options.path
        sys.exit(1)

    logger = E2init(sys.argv, options.ppid)

    # classmx is a list with 2 elements. Each element is a list of EMData from the corresponding cls_result file
    classmx = []
    classmx.append(
        EMData.read_images("{}/cls_result_{:02d}_even.hdf".format(
            options.path, last_iter)))
    classmx.append(
        EMData.read_images("{}/cls_result_{:02d}_odd.hdf".format(
            options.path, last_iter)))
    ncls = max(
        int(classmx[0][0]["maximum"]) + 1,
        int(classmx[1][0]["maximum"]) + 1)

    # Rearrange the info in classmx
    classlists = [[] for i in xrange(ncls)]  # empty list for each class

    # This will produce a list of particles with Transforms for each class
    for eo in (0, 1):
        for y in xrange(classmx[eo][0]["ny"]):
            ptcl = [
                eo, y,
                Transform({
                    "type": "2d",
                    "tx": classmx[eo][2][0, y],
                    "ty": classmx[eo][3][0, y],
                    "alpha": classmx[eo][4][0, y],
                    "mirror": int(classmx[eo][5][0, y])
                })
            ]
            #print ptcl,
            #print int(classmx[eo][0][0,y])
            classlists[int(classmx[eo][0][0, y])].append(ptcl)

    #if len(classlists[0])>100 :
    #print "Warning: this program is normally intended for use with downsampled data and fairly coarse angular sampling. If you try to use it with a large number of class-averages you may have a variety of problems, and should insure that your machine has sufficient RAM."

    # Initialize parallelism
    from EMAN2PAR import EMTaskCustomer
    etc = EMTaskCustomer(options.parallel)

    # Empty image to pad classes file
    zero = EMData(str(ptcls[0]), 0)
    zero.to_zero()
    zero["ptcl_repr"] = 0

    # Euler angles for averages
    projin = "{}/projections_{:02d}_even.hdf".format(options.path, last_iter)
    eulers = [
        EMData(projin, i, True)["xform.projection"] for i in xrange(ncls)
    ]

    # Prepare mask if specified
    if options.mask != None:
        mask = EMData(options.mask)

    else:
        mask = None

    # prepare tasks
    tasks = []
    gc = 0
    ns = [classmx[eo][0]["ny"] for eo in (0, 1)]
    for c, cl in enumerate(classlists):
        if len(
                cl
        ) < 20:  # we require at least 20 particles in a class to make the attempt
            #			zero.write_image(classout[0],c)
            #			zero.write_image(classout[1],c)
            continue
        if mask != None:
            maskp = mask.project("standard", eulers[c])
        else:
            maskp = None
        tasks.append(
            ClassSplitTask(ptcls, ns, cl, c, eulers[c], maskp,
                           options.usebasis, options.nbasis, options.novarimax,
                           options.verbose - 1))
        gc += 1


#	for t in tasks: t.execute()

# execute task list
    taskids = etc.send_tasks(tasks)
    alltaskids = taskids[:]

    classes = []
    while len(taskids) > 0:
        curstat = etc.check_task(taskids)
        for i, j in enumerate(curstat):
            if j == 100:
                rslt = etc.get_results(taskids[i])
                rsltd = rslt[1]
                cls = rslt[0].options["classnum"]
                if rsltd.has_key("failed"):
                    print "Bad average in ", cls
                else:
                    #rsltd["avg1"].write_image(classout[0],cls)
                    #rsltd["avg2"].write_image(classout[1],cls)
                    ncls = rsltd["avg1"]["ptcl_repr"] + rsltd["avg2"][
                        "ptcl_repr"]
                    # note that the 2 results we get back are in arbitrary order!
                    # the next section of code with 3D reconstruction is designed to sort out
                    # which average should be paired with which
                    classes.append([
                        ncls, rsltd["avg1"]["xform.projection"], rsltd["avg1"],
                        rsltd["avg2"], rsltd["basis"], cls
                    ])  # list of (ptcl_repr,xform,avg1,avg2)

        taskids = [j for i, j in enumerate(taskids) if curstat[i] != 100]

        if options.verbose and 100 in curstat:
            print "%d/%d tasks remain" % (len(taskids), len(alltaskids))
        if 100 in curstat:
            E2progress(logger, 1.0 - (float(len(taskids)) / len(alltaskids)))

    if options.verbose:
        print "Completed all tasks\nGrouping consistent averages"

    classes.sort(
        reverse=True)  # we want to start with the largest number of particles
    apix = classes[0][2]["apix_x"]

    boxsize = classes[0][2]["ny"]
    pad = good_size(boxsize * 1.5)
    if options.verbose:
        print "Boxsize -> {}, padding to {}".format(boxsize, pad)

    # a pair of reconstructors. we will then simultaneously reconstruct in the pair, and use each to decide on the best target for each particle
    recon = [
        Reconstructors.get("fourier", {
            "size": [pad, pad, pad],
            "sym": sym,
            "mode": "gauss_5"
        }) for i in (0, 1)
    ]
    for r in recon:
        r.setup()

    # We insert the first class-average (with the most particles) randomly into reconstructor 1 or 2
    p2 = classes[0][2].get_clip(
        Region(-(pad - boxsize) / 2, -(pad - boxsize) / 2, pad, pad))
    p3 = recon[0].preprocess_slice(p2, classes[0][1])
    recon[0].insert_slice(p3, classes[0][1],
                          classes[0][2].get_attr_default("ptcl_repr", 1.0))

    p2 = classes[0][3].get_clip(
        Region(-(pad - boxsize) / 2, -(pad - boxsize) / 2, pad, pad))
    p3 = recon[1].preprocess_slice(p2, classes[0][1])
    recon[1].insert_slice(p3, classes[0][1],
                          classes[0][3].get_attr_default("ptcl_repr", 1.0))

    classes[0].append(0)

    if options.verbose: print "Reconstruction: pass 1"
    for i, c in enumerate(classes[1:]):
        proj = EMData(projin,
                      c[5])  # the projection corresponding to this average
        # while this does cost us a final interpolation, high resolution isn't the primary aim anyway, and getting the alignment consistent is important
        # also gives us a chance to normalize
        c[2]["xform.align2d"] = Transform()
        ali2 = c[2].align("refine", proj)
        ali2.process_inplace("normalize.toimage", {
            "to": proj,
            "ignore_zero": 1
        })
        c[3]["xform.align2d"] = Transform()
        ali3 = c[3].align("refine", proj)
        ali3.process_inplace("normalize.toimage", {
            "to": proj,
            "ignore_zero": 1
        })

        #		print "ROT:\t",ali2["xform.align2d"].get_params("2d"),"\t",ali3["xform.align2d"].get_params("2d")

        # note that ali2 and c[2] are the same except for a final alignment
        a2 = ali2.get_clip(
            Region(-(pad - boxsize) / 2, -(pad - boxsize) / 2, pad,
                   pad))  # first class-average
        a3 = recon[0].preprocess_slice(a2, classes[0][1])
        a3n = c[2].get_attr_default("ptcl_repr", 1.0)

        # similarly ali3 and c[3] are the same
        b2 = ali3.get_clip(
            Region(-(pad - boxsize) / 2, -(pad - boxsize) / 2, pad, pad))
        b3 = recon[1].preprocess_slice(
            b2, classes[0][1]
        )  # I don't believe it matters if we use recon[0] or 1 here, but haven't checked
        b3n = c[3].get_attr_default("ptcl_repr", 1.0)

        recon[0].determine_slice_agreement(a3, c[1], a3n, False)
        #		print a3.get_attr_dict()
        q0a = a3[
            "reconstruct_absqual_lowres"]  # quality for average a in reconstruction0
        #		n0a=a3["reconstruct_norm"]			# normalization for same

        recon[1].determine_slice_agreement(a3, c[1], a3n, False)
        q1a = a3[
            "reconstruct_absqual_lowres"]  # quality for average a in reconstruction0
        #		n1a=a3["reconstruct_norm"]			# normalization for same

        recon[0].determine_slice_agreement(b3, c[1], b3n, False)
        q0b = b3[
            "reconstruct_absqual_lowres"]  # quality for average a in reconstruction0
        #		n0b=b3["reconstruct_norm"]			# normalization for same

        recon[1].determine_slice_agreement(b3, c[1], b3n, False)
        q1b = b3[
            "reconstruct_absqual_lowres"]  # quality for average a in reconstruction0
        #		n1b=b3["reconstruct_norm"]			# normalization for same

        if options.verbose > 1:
            print i, q0a, q1a, q0b, q1b, q0a + q1b, q1a + q0b
        if options.verbose > 2: print "\t\t", n0a, n1a, n0b, n1b

        if q0a + q1b > q1a + q0b:  # if true, a -> recon0 and b -> recon1
            c.append(
                0
            )  # we put a 0 at the end of the classes element if we use a->0,b->1 ordering, 1 if swapped
            #			a3.mult(n0a)
            recon[0].insert_slice(a3, c[1], a3n)
            #			b3.mult(n1b)
            recon[1].insert_slice(b3, c[1], b3n)
        else:
            c.append(1)
            #			a3.mult(n1a)
            recon[1].insert_slice(a3, c[1], a3n)
            #			b3.mult(n0b)
            recon[0].insert_slice(b3, c[1], b3n)

    if options.verbose: print "Reconstruction: pass 2"

    # another pass with the filled reconstruction to make sure our initial assignments were ok
    #	for i,c in enumerate(classes[1:]):
    #		a2=c[2].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))		# first class-average
    #		a3=recon[0].preprocess_slice(a2,classes[0][1])
    #		a3n=c[2].get_attr_default("ptcl_repr",1.0)
    #
    #		b2=c[3].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))
    #		b3=recon[1].preprocess_slice(b2,classes[0][1])						# I don't believe it matters if we use recon[0] or 1 here, but haven't checked
    #		b3n=c[3].get_attr_default("ptcl_repr",1.0)
    #
    #		recon[0].determine_slice_agreement(a3,c[1],a3n,0) # c[-1]==0
    #		q0a=a3["reconstruct_absqual"]			# quality for average a in reconstruction0
    #		n0a=a3["reconstruct_norm"]			# normalization for same
    #
    #		recon[1].determine_slice_agreement(a3,c[1],a3n,0) # c[-1]==1
    #		q1a=a3["reconstruct_absqual"]			# quality for average a in reconstruction0
    #		n1a=a3["reconstruct_norm"]			# normalization for same
    #
    #		recon[0].determine_slice_agreement(b3,c[1],b3n,0) # c[-1]==1
    #		q0b=b3["reconstruct_absqual"]			# quality for average a in reconstruction0
    #		n0b=b3["reconstruct_norm"]			# normalization for same
    #
    #		recon[1].determine_slice_agreement(b3,c[1],b3n,0) # c[-1]==0
    #		q1b=b3["reconstruct_absqual"]			# quality for average a in reconstruction0
    #		n1b=b3["reconstruct_norm"]			# normalization for same
    #
    #		if options.verbose>1 : print i,q0a,q1a,q0b,q1b,q0a+q1b,q1a+q0b
    #
    #		if q0a+q1b>q1a+q0b :		# if true, a -> recon0 and b -> recon1
    #			if c[-1]==1 :
    #				c[-1]=0
    #				print i," 1->0"
    #
    #			c.append(0)				# we put a 0 at the end of the classes element if we use a->0,b->1 ordering, 1 if swapped
    #			a3.mult(n0a)
    #			recon[0].insert_slice(a3,c[1],a3n)
    #			b3.mult(n1b)
    #			recon[1].insert_slice(b3,c[1],b3n)
    #		else:
    #			if c[-1]==0 :
    #				c[-1]=1
    #				print i," 0->1"
    #
    #			c.append(1)
    #			a3.mult(n1a)
    #			recon[1].insert_slice(a3,c[1],a3n)
    #			b3.mult(n0b)
    #
    #
    if options.verbose: print "All done, writing output"

    if mask != None: msk = "_msk"
    else: msk = ""
    classout = [
        "{}/classes_{:02d}_bas{}{}_split0.hdf".format(options.path, last_iter,
                                                      options.usebasis, msk),
        "{}/classes_{:02d}_bas{}{}_split1.hdf".format(options.path, last_iter,
                                                      options.usebasis, msk)
    ]
    basisout = "{}/classes_{:02d}{}_basis".format(options.path, last_iter, msk)
    threedout = "{}/threed_{:02d}{}_split.hdf".format(options.path, last_iter,
                                                      msk)
    threedout2 = "{}/threed_{:02d}{}_split_filt_bas{}.hdf".format(
        options.path, last_iter, msk, options.usebasis)
    setout = [
        "sets/split_{}{}_bas{}_0.lst".format(pathnum, msk, options.usebasis),
        "sets/split_{}{}_bas{}_1.lst".format(pathnum, msk, options.usebasis)
    ]
    split = [
        r.finish(True).get_clip(
            Region((pad - boxsize) / 2, (pad - boxsize) / 2,
                   (pad - boxsize) / 2, boxsize, boxsize, boxsize))
        for r in recon
    ]
    split[0]["apix_x"] = apix
    split[0]["apix_y"] = apix
    split[0]["apix_z"] = apix
    split[1]["apix_x"] = apix
    split[1]["apix_y"] = apix
    split[1]["apix_z"] = apix
    split[0].process_inplace("mask.soft", {"outer_radius": -8, "width": 4})
    split[1].process_inplace("mask.soft", {"outer_radius": -8, "width": 4})
    split[0].write_image(threedout, 0)
    split[1].write_image(threedout, 1)

    # now we write the class-averages and the new (split) particle files
    lstin = [LSXFile(ptcls[0], True), LSXFile(ptcls[1], True)]
    try:
        os.unlink("sets/split0.lst")
        os.unlink("sets/split1.lst")
    except:
        pass
    lstout = [LSXFile("sets/split0.lst"), LSXFile("sets/split1.lst")]
    for i, c in enumerate(classes):
        c[2].write_image(classout[c[-1]], i)  # class-average
        ptcln = c[2]["class_eoidxs"]  # eofile/ptcl# pairs
        for p in xrange(0, len(ptcln), 2):
            lstout[0][-1] = lstin[ptcln[p]][ptcln[
                p + 1]]  # wierd syntax, but the -1 here appends

        c[3].write_image(classout[c[-1] ^ 1], i)
        ptcln = c[3]["class_eoidxs"]  # eofile/ptcl# pairs
        for p in xrange(0, len(ptcln), 2):
            lstout[1][-1] = lstin[ptcln[p]][ptcln[
                p + 1]]  # wierd syntax, but the -1 here appends

        if options.verbose > 2:
            c[4][0].write_image(basisout + "1.hdf", i)
            c[4][1].write_image(basisout + "2.hdf", i)
            c[4][2].write_image(basisout + "3.hdf", i)

    launch_childprocess("e2proclst.py sets/split0.lst --mergesort {}".format(
        setout[0]))
    launch_childprocess("e2proclst.py sets/split1.lst --mergesort {}".format(
        setout[1]))

    try:
        os.unlink("sets/split0.lst")
        os.unlink("sets/split1.lst")
    except:
        pass

    if os.path.exists("strucfac.txt"):
        launch_childprocess(
            "e2proc3d.py {} {} --setsf strucfac.txt --process filter.wiener.byfsc:fscfile={}/fsc_masked_{:02d}.txt:snrmult=2:sscale=1.1:maxfreq={} --process mask.soft:outer_radius=-9:width=4"
            .format(threedout, threedout2, options.path, last_iter,
                    1.0 / targetres))
    else:
        print "Missing structure factor, cannot filter properly"
        launch_childprocess(
            "e2proc3d.py {} {} --process filter.wiener.byfsc:fscfile={}/fsc_masked_{:02d}.txt:snrmult=2:sscale=1.1:maxfreq={} --process mask.soft:outer_radius=-9:width=4"
            .format(threedout, threedout2, options.path, last_iter,
                    1.0 / targetres))

    E2end(logger)
Пример #7
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog <output> [options]

	This program is used to preprocess subtomograms before aligning them. The same can be accomplished with 
	e2proc3d, except that this program is parallelized and thus should be substantially faster for large subtomograms.
	"""
			
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	

	parser.add_argument("--input", type=str, default='',help="""Default=None. The name of the input volume stack. MUST be HDF since volume stack support is required.""")
	
	parser.add_argument("--output", type=str, default='',help="""Default=None. Specific name of HDF file to write processed particles to.""")
		
	parser.add_argument("--parallel",type=str, default='', help="""default=None. Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel""")
	
	parser.add_argument("--ppid", type=int, help="""Default=-1. Set the PID of the parent process, used for cross platform PPID""",default=-1)
	
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="""Default=0. Verbose level [0-9], higner number means higher level of verboseness""")
		
	parser.add_argument("--subset",type=int,default=0,help="""Default=0 (not used). Refine only this substet of particles from the stack provided through --input""")

	parser.add_argument("--apix",type=float,default=0.0,help="""Default=0.0 (not used). Use this apix value where relevant instead of whatever is in the header of the reference and the particles. Will overwrite particle header as well.""")

	parser.add_argument("--shrink", type=int,default=0,help="""Default=0 (no shrinking). Optionally shrink the input volumes by an integer amount for coarse alignment.""")
		
	parser.add_argument("--threshold",type=str,default='',help="""Default=None. A threshold applied to the subvolumes after normalization. For example, --threshold=threshold.belowtozero:minval=0 makes all negative pixels equal 0, so that they do not contribute to the correlation score.""")
	
	parser.add_argument("--mask",type=str,default='', help="""Default=None. Masking processor applied to particles before alignment. IF using --clip, make sure to express outer mask radii as negative pixels from the edge.""")
	
	parser.add_argument("--maskfile",type=str,default='',help="""Default=None. Mask file (3D IMAGE) applied to particles before alignment. Must be in HDF format. Default is None.""")
	
	parser.add_argument("--normproc",type=str, default='',help="""Default=None (see 'e2help.py processors -v 10' at the command line). Normalization processor applied to particles before alignment. If normalize.mask is used, results of the mask option will be passed in automatically. If you want to turn this option off specify \'None\'""")
	
	parser.add_argument("--preprocess",type=str,default='',help="""Any processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""")
	
	parser.add_argument("--lowpass",type=str,default='',help="""Default=None. A lowpass filtering processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""")
	
	parser.add_argument("--highpass",type=str,default='',help="""Default=None. A highpass filtering processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""")
	
	parser.add_argument("--clip",type=int,default=0,help="""Default=0 (which means it's not used). Boxsize to clip particles. For example, the boxsize of the particles might be 100 pixels, but the particles are only 50 pixels in diameter. Aliasing effects are not always as deleterious for all specimens, and sometimes 2x padding isn't necessary.""")
	
	parser.add_argument("--nopath",action='store_true',default=False,help="""If supplied, this option will save results in the directory where the command is run. A directory to store the results will not be made.""")

	parser.add_argument("--path",type=str,default='sptpreproc',help="""Default=spt. Directory to store results in. The default is a numbered series of directories containing the prefix 'sptpreproc'; for example, sptpreproc_02 will be the directory by default if 'sptpreproc_01' already exists.""")

	
	(options, args) = parser.parse_args()
	
	logger = E2init(sys.argv, options.ppid)
	print "\n(e2spt_preproc)(main) started log"
	
	
	
	from e2spt_classaverage import sptmakepath
	
	if options.path and not options.nopath:
	
		options = sptmakepath(options,'sptpreproc')

	if options.parallel=='None' or options.parallel=='none':
		options.parallel=None
	
	
	
	if not options.input:
		try:
			options.input = sys.argv[1]
		except:
			print "\n(e2spt_preproc)(main) ERROR: invalid input file"
			
	if options.mask or options.maskfile or options.threshold or options.clip or options.threshold or options.normproc or options.preprocess or options.lowpass or options.highpass or int(options.shrink) > 1:
		
		preprocstack =  str(os.path.basename(options.input).replace('.hdf','_preproc.hdf'))
		
		if options.path and not options.nopath:
			preprocstack = options.path + '/' + preprocstack
		
		if options.output:
			if '.hdf' in options.output[-4:]:
				preprocstack = options.output
			else:
				print "\n(e2spt_preproc)(main) ERROR: '.hdf' must be the last four characters of the output filename."
			
		print "\n(e2spt_preproc)(main) output stack will be %s" %( preprocstack)

		n = 0
		try:
			n = EMUtil.get_image_count( options.input )
		except:
			print "\n(e2spt_preproc)(main) ERROR: --input stack seems to be invalid"
			sys.exit()
		
		print "\n(e2spt_preproc)(main) number of particles is %d" %( n) 
		
		
		c = os.getcwd()
		
		findir = os.listdir( c )
		
		if preprocstack not in findir:
		
			dimg = EMData(8,8,8)
			dimg.to_one()

			for i in range(n):
				dimg.write_image( preprocstack, i )
		
		else:
			print "\n(e2spt_preproc)(main) WARNING: a file with the name of the output stack %s is already in the current directory and will be DELETED" %( preprocstack )
			os.remove( preprocstack )
			
			dimg = EMData(8,8,8)
			dimg.to_one()

			for i in range(n):
				dimg.write_image( preprocstack, i )
		

		finalbox = EMData(options.input,0,True)['nx']
		if options.clip:
			finalbox=options.clip

		
		#dimglarge=EMData(finalbox,finalbox,finalbox)
		#dimglarge.to_one()
		#dimglarge.write_image(preprocstack,0)
		#n=EMUtil.get_image_count(options.input)
		#if options.subset:
		#	n=options.subset
		#dimglarge.write_image(preprocstack,n-1)

		if options.verbose:
			print "\n(e2spt_preproc)(main) wrote dummy ptcls to %s" %( preprocstack)
	
		
		print "\n(e2spt_preproc)(main) - INITIALIZING PARALLELISM!\n"
		
		if options.parallel:
			from EMAN2PAR import EMTaskCustomer
			etc=EMTaskCustomer(options.parallel)
			pclist=[options.input]

			etc.precache(pclist)
			print "\n(e2spt_preproc)(main) - precaching --input"

			tasks=[]
			results=[]
		
		
		from e2spt_classaverage import sptOptionsParser
		options = sptOptionsParser( options )
		
		
		for j in range(n):
			#print "processing  particle", j
			
			img = EMData( options.input, j )
			
			if options.parallel:
				#task = Preproc3DTask( ["cache",options.input,j], options, j, preprocstack )
				task = Preproc3DTask( img, options, j, preprocstack )
				tasks.append(task)
		
			else:
				img = EMData( options.input, j )
				pimg = preprocfunc( img, options, j, preprocstack)
								
		
		
		if options.parallel and tasks:
			tids = etc.send_tasks(tasks)
			if options.verbose: 
				print "\n(e2spt_preproc)(main) preprocessing %d tasks queued" % (len(tids)) 

	
			results = get_results( etc, tids, options )
		#print "\n(e2spt_preproc)(main) preprocessing results are", results	
		
		
		#print "\n(e2spt_preproc)(main) input changing to preprocstack"
		#options.input = preprocstack

		#cache needs to be reloaded with the new options.input		
		
	else:
		print "\n(e2spt_preproc)(main) Nothing to do. No preprocessing parameters specified."
		
	E2end(logger)
	
	return
Пример #8
0
def main():
	
	usage="""e2classifytree.py <projection> <particle> [options]
	
	Classify particles using a binary tree. Can be used as an alternative for e2simmx2stage.py + e2classify.py.
	"""
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	parser.add_argument("--threads", type=int,help="", default=12)
	parser.add_argument("--nodes", type=str,help="", default="nodes.hdf")
	#parser.add_argument("--clsmx", type=str,help="", default="clsmx.hdf")
	parser.add_argument("--output", type=str,help="", default="clsmx.hdf")
	parser.add_argument("--align",type=str,help="The name of an 'aligner' to use prior to comparing the images", default=None)
	parser.add_argument("--aligncmp",type=str,help="Name of the aligner along with its construction arguments",default="dot")
	parser.add_argument("--ralign",type=str,help="The name and parameters of the second stage aligner which refines the results of the first alignment", default=None)
	parser.add_argument("--raligncmp",type=str,help="The name and parameters of the comparitor used by the second stage aligner. Default is dot.",default="dot")
	parser.add_argument("--cmp",type=str,help="The name of a 'cmp' to be used in comparing the aligned images", default="dot:normalize=1")
	parser.add_argument("--cmpdiff", action="store_true", default=False ,help="Compare using the difference of the two children")
	parser.add_argument("--incomplete", type=int,help="The degree of incomplete allowed in the tree on each level", default=0)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)
	parser.add_argument("--parallel", default=None, help="parallelism argument")
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")

	(options, args) = parser.parse_args()
	E2n=E2init(sys.argv,options.ppid)
	
	options.align=parsemodopt(options.align)
	options.aligncmp=parsemodopt(options.aligncmp)
	options.ralign=parsemodopt(options.ralign)
	options.raligncmp=parsemodopt(options.raligncmp)
	options.cmp=parsemodopt(options.cmp)
	
	projs=args[0]
	#projsimmx=args[1]
	ptcl=args[1]
	npj=EMUtil.get_image_count(projs)
	npt=EMUtil.get_image_count(ptcl)
	if options.parallel==None:
		par="thread:{:d}".format(options.threads)
	else:
		par=options.parallel
		
	### Build tree
	### always overwrite the tree here now
	#if not os.path.isfile(options.nodes):
	print "Building binary tree..."
	buildtree(projs,par,options.nodes,options.incomplete,options.verbose)
	#else:
		#print "Using existing tree..."
	
	## Generate children pairs for comparison
	print "Generating children pairs for comparison..."
	if options.cmpdiff:
		nodepath= os.path.dirname(options.nodes)
		masktmp='/'.join([nodepath,"tmp_msk.hdf"])
		if os.path.isfile(masktmp): os.remove(masktmp)
		cmptmp='/'.join([nodepath,"tmp_cmp.hdf"])
		if os.path.isfile(cmptmp):
			os.remove(cmptmp)
		makechildpair(options.nodes, cmptmp, masktmp)
	else:
		masktmp=None
		cmptmp=None
	
	E2progress(E2n,0.5)
	#exit()
	print "Starting classification..."
	### Classify particles
	
		
	clsmx=[EMData(1,npt) for i in range(7)]
	nnod=EMUtil.get_image_count(options.nodes)
	if options.parallel :
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer(options.parallel)
		tasks=[]
		step=50
		tt=[range(i,i+step) for i in range(0,npt-step,step)]
		tt.append(range(tt[-1][-1]+1,npt))
		
		for it in tt:
			tasks.append(TreeClassifyTask(ptcl, it, options.nodes, options.align, options.aligncmp, options.cmp, options.ralign, options.raligncmp, cmptmp, masktmp))
		
		taskids=etc.send_tasks(tasks)
		ptclpernode=[0 for i in range(nnod)]
		nfinished=0
		while len(taskids)>0 :
			haveprogress=False
			time.sleep(3)
			curstat=etc.check_task(taskids)
			for i,j in enumerate(curstat):
				if j==100 :
					haveprogress=True
					rslt=etc.get_results(taskids[i])
					rslt= rslt[1]
					for r in rslt:
						nfinished+=1
						if options.verbose>0: print "Particle:",r["id"],"\tnodes:",r["choice"]
						for c in r["choice"]:
							ptclpernode[c]+=1
						clsmx[0].set_value_at(0,r["id"],r["cls"])
						for nt in range(1,7):
							clsmx[nt].set_value_at(0,r["id"],r["simmx"][nt])
			
			taskids=[j for i,j in enumerate(taskids) if curstat[i]!=100]
			if haveprogress: print "{:d}/{:d} finished".format(nfinished,npt)
			E2progress(E2n, 0.5 + float(nfinished)/npt)
			
		for i in range(nnod):
			ndtmp=EMData(options.nodes,i,True)
			ndtmp["tree_nptls"]=ptclpernode[i]
			ndtmp.write_image(options.nodes,i)
	
	else:
		
		### To record the number of particles in each branch of the tree
		for i in range(nnod):
			ndtmp=EMData(options.nodes,i,True)
			ndtmp["tree_nptls"]=0
			ndtmp.write_image(options.nodes,i)
		t={}
		clsmx=[EMData(1,npt) for i in range(7)]
		for i in range(options.threads):
			ai=[x for x in range(npt) if x%options.threads==i]
			t[i]=threading.Thread(target=classify,args=(ptcl,ai,options.nodes,clsmx,options.align,options.aligncmp,options.cmp,options.ralign,options.raligncmp,cmptmp,masktmp))
			t[i].start()
		for i in range(options.threads):
			t[i].join()
		
	if os.path.isfile(options.output):
		os.remove(options.output)
	for  i in clsmx:
		i.write_image(options.output,-1)
	
	if options.cmpdiff:	
		os.remove(cmptmp)
		os.remove(masktmp)
	print "Finished~"
	E2progress(E2n,1.0)
	E2end(E2n)
Пример #9
0
class EMParallelSimMX:
	def __init__(self,options,args,logger=None):
		'''
		@param options the options produced by (options, args) = parser.parse_args()
		@param args the options produced by (options, args) = parser.parse_args()
		@param logger and EMAN2 logger, i.e. logger=E2init(sys.argv)
		assumes you have already called the check function.
		'''
		self.options = options
		self.args = args
		self.logger = logger


		from EMAN2PAR import EMTaskCustomer
		self.etc=EMTaskCustomer(options.parallel)
		if options.colmasks!=None : self.etc.precache([args[0],args[1],options.colmasks])
		else : self.etc.precache([args[0],args[1]])
		self.num_cpus = self.etc.cpu_est()
		if self.num_cpus < 32: # lower limit
			self.num_cpus = 32

		self.__task_options = None

	def __get_task_options(self,options):
		'''
		Get the options required by each task as a dict
		@param options is always self.options - the initialization argument. Could be changed.
		'''
		if self.__task_options == None:
			d = {}
			d["align"] = parsemodopt(options.align)
			d["aligncmp"] = parsemodopt(options.aligncmp)
			d["cmp"] = parsemodopt(options.cmp)

			if hasattr(options,"ralign") and options.ralign != None:
				d["ralign"] = parsemodopt(options.ralign)
				d["raligncmp"] = parsemodopt(options.raligncmp)  # raligncmp must be specified if using ralign
			else:
				d["ralign"] = None
				d["raligncmp"] = None
			d["prefilt"]=options.prefilt

			if hasattr(options,"shrink") and options.shrink != None: d["shrink"] = options.shrink
			else: d["shrink"] = None


			self.__task_options = d

		return self.__task_options

	def __init_memory(self,options):
		'''
		@param options is always self.options - the initialization argument. Could be changed.
		Establishes several important attributes they are:
		----
		self.clen - the number of images in the image defined by args[0], the number of columns in the similarity matrix
		self.rlen - the number of images in the image defined by args[1], the number of rows in the similarity matrix
		----
		Also, since we adopted region output writing as our preferred approach, this function makes sure the output
		image(s) exists on disk and has the correct dimensions - seeing as this is the way region writing works (the image
		has to exist on disk and have its full dimensions)
		'''
		self.clen=EMUtil.get_image_count(self.args[0])
		self.rlen=EMUtil.get_image_count(self.args[1])

		output = self.args[2]

		if file_exists(output) and not options.fillzero:
			if options.force: remove_file(output)
			else: raise RuntimeError("The output file exists. Please remove it or specify the force option")

		e = EMData(self.clen,self.rlen)
		e.to_zero()
		e.set_attr(PROJ_FILE_ATTR,self.args[0])
		e.set_attr(PART_FILE_ATTR,self.args[1])
		n = 1
		if self.options.saveali: n = 6 # the total number of images written to disk
		if not options.fillzero : e.write_image(output,0)
		for i in range(1,n):
			e.write_image(output,i)

	def __get_blocks(self):
		'''
		Gets the blocks that will be processed in parallel, these are essentially ranges
		'''

		steve_factor = 3 # increase number of jobs a bit for better distribution
		total_jobs = steve_factor*self.num_cpus

		[col_div,row_div] = opt_rectangular_subdivision(self.clen,self.rlen,total_jobs)


		block_c = self.clen/col_div
		block_r = self.rlen/row_div

		residual_c = self.clen-block_c*col_div # residual left over by integer division

		blocks = []

		current_c = 0
		for c in xrange(0,col_div):
			last_c = current_c + block_c
			if residual_c > 0:
				last_c += 1
				residual_c -= 1

			current_r = 0
			residual_r = self.rlen-block_r*row_div # residual left over by integer division
			for r in xrange(0,row_div) :
				last_r = current_r + block_r
				if residual_r > 0:
					last_r += 1
					residual_r -= 1


				blocks.append([current_c,last_c,current_r,last_r])
				current_r = last_r

			current_c = last_c

#		print col_div,row_div,col_div*row_div
#		print self.clen,self.rlen,residual_c,residual_r
		return blocks

	def execute(self):
		'''
		The main function to be called
		'''
		if len(self.options.parallel) > 1 :
			self.__init_memory(self.options)
			blocks = self.__get_blocks()
#			print blocks

#			self.check_blocks(blocks) # testing function can be removed at some point

			tasks=[]
			for bn,block in enumerate(blocks):

				data = {}
				data["references"] = ("cache",self.args[0],block[0],block[1])
				data["particles"] = ("cache",self.args[1],block[2],block[3])
				if self.options.colmasks!=None : data["colmasks"] = ("cache",self.options.colmasks,block[0],block[1])
				if self.options.mask!=None : data["mask"] = ("cache",self.options.mask,0,1)
				if self.options.fillzero :
					# for each particle check to see which portion of the matrix we need to fill
					if (bn%10==0) : print "%d/%d     \r"%(bn,len(blocks)),
					sys.stdout.flush()
					rng=[]
					for i in range(block[2],block[3]):
						c=EMData()
						c.read_image(self.args[2],0,False,Region(block[0],i,block[1]-block[0]+1,1))
						inr=0
						st=0
						for j in range(c["nx"]):
							if c[j]==0 and not inr:
								st=j
								inr=1
							if c[j]!=0 and inr:
								rng.append((i,st+block[0],j-1+block[0]))
								inr=0
						if inr :
							rng.append((i,st+block[0],j+block[0]))
					data["partial"]=rng
#					print "%d) %s\t"%(bn,str(block)),rng

				if self.options.fillzero and len(data["partial"])==0 : continue		# nothing to compute in this block, skip it completely
				else :
					task = EMSimTaskDC(data=data,options=self.__get_task_options(self.options))
					#print "Est %d CPUs"%etc.cpu_est()
					tasks.append(task)

			# This just verifies that all particles have at least one class
			#a=set()
			#for i in tasks:
				#for k in i.data["partial"] : a.add(k[0])

			#b=set(range(self.rlen))
			#b-=a
			#print b

			print "%d/%d         "%(bn,len(blocks))
			self.tids=self.etc.send_tasks(tasks)
			print len(self.tids)," tasks submitted"
#
			while 1:
				if len(self.tids) == 0: break
				print len(self.tids),"simmx tasks left in main loop   \r",
				sys.stdout.flush()
				st_vals = self.etc.check_task(self.tids)
				for i in xrange(len(self.tids)-1,-1,-1):
					st = st_vals[i]
					if st==100:
						tid = self.tids[i]

						try:
							rslts = self.etc.get_results(tid)
#							display(rslts[1]["rslt_data"][0])
							self.__store_output_data(rslts[1])
						except:
							traceback.print_exc()
							print "ERROR storing results for task %d. Rerunning."%tid
							self.etc.rerun_task(tid)
							continue
						if self.logger != None:
							E2progress(self.logger,1.0-len(self.tids)/float(len(blocks)))
							if self.options.verbose>0:
								print "%d/%d\r"%(len(self.tids),len(blocks))
								sys.stdout.flush()

						self.tids.pop(i)
					print len(self.tids),"simmx tasks left in main loop   \r",
					sys.stdout.flush()


				time.sleep(10)
			print "\nAll simmx tasks complete "

			# if using fillzero, we must fix the -1.0e38 values placed into empty cells
			if self.options.fillzero :
				l=EMData(self.args[2],0,True)
				rlen=l["ny"]
				clen=l["nx"]
#				launch_childprocess("e2proc2d.py %s %s"%(self.args[2],self.args[2]+"_x"))
				print "Filling noncomputed regions in similarity matrix (%dx%d)"%(clen,rlen)
				l=EMData()
				for r in range(rlen):
					l.read_image(self.args[2],0,False,Region(0,r,clen,1))
					fill=l["maximum"]+.0001
					l.process_inplace("threshold.belowtominval",{"minval":-1.0e37,"newval":fill})
					l.write_image(self.args[2],0,EMUtil.ImageType.IMAGE_UNKNOWN,False,Region(0,r,clen,1))

				print "Filling complete"



		else: raise NotImplementedError("The parallelism option you specified (%s) is not supported" %self.options.parallel )

	def __store_output_data(self,rslts):
		'''
		Store output data to internal images (matrices)
		@param a dictionary return by the EMSimTaskDC
		'''

		result_data = rslts["rslt_data"]
		output = self.args[2]

		insertion_c = rslts["min_ref_idx"]
		insertion_r = rslts["min_ptcl_idx"]
		result_mx = result_data[0]
		r = Region(insertion_c,insertion_r,result_mx.get_xsize(),result_mx.get_ysize())

		# Note this is region io - the init_memory function made sure the images exist and are the right dimensions (on disk)
		for i,mxout in enumerate(result_data):
			mxout.write_image(output,i,EMUtil.ImageType.IMAGE_UNKNOWN,False,r)
Пример #10
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog [options] stack1.hdf stack2.mrcs ...

	Program to erase gold fiducials and other high-density features from images, such as frames in DDD movies or images in tiltseries. Requires scipy.
	"""

	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	parser.add_argument("--average", default=False, action="store_true", help="Erase gold from average of input stack(s).")
	parser.add_argument("--lowpass", default=False, action="store_true", help="Also lowpass filter noise based on local properties. Useful for processing tomographic tilt series.")
	parser.add_argument("--keepdust", default=False, action="store_true", help="Do not remove 'dust' from mask (include objects smaller than gold fiducials).")
	parser.add_argument("--goldsize", default=30, type=float, help="Diameter (in pixels) of gold fiducials to erase.")
	parser.add_argument("--downsample", default=1.0, type=float, help="Downsample the input stack(s). Default is 1, i.e. no downsampling.")
	parser.add_argument("--oversample", default=4, type=int, help="Oversample noise image to smooth transitions from regions with different noise.")
	parser.add_argument("--boxsize", default=128, type=int, help="Box size to use when computing local noise.")
	parser.add_argument("--debug", default=False, action="store_true", help="Save noise and mask/masked image(s).")
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-2)
	parser.add_argument("--parallel",type=str, default=None, help="""Default=None (not used). Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel""")
	parser.add_argument("--subset", default=0, type=int, help="Default=0 (not used). Apply algorithm to only a subset of images in each stack file.")
	parser.add_argument("--nsigmas", default=3.0,type=float, help="Default=3.0. Number of standard deviations above the mean to determine pixels to mask out (erase).")



	(options, args) = parser.parse_args()

	nfiles = len(args)

	logger = E2init(sys.argv, options.ppid)
	print "\n(e2tomopreproc)(main) started log"	

	if options.parallel == 'None' or options.parallel == 'none':
		options.parallel == None

	if options.parallel:
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer(options.parallel)

	for arg in args:
		newarg=''
		originalarg = arg

		hdr = EMData(arg,0,True) #load header only to get parameters used below
		apix = hdr['apix_x']
		nx=hdr['nx']
		ny=hdr['ny']

		if '.ali' == arg[-4:] or '.mrc' == arg[-4:]:
			
			#Unfortunately, e2proc2d.py appends to existing files instead of overwriting them. If you run this program two consecutive times and the first one failed for whatever reason, 
			#you'll find your stack growing.
			#To prevent this, we create a 'dummy' file, but first remove any dummy files from previous failed runs. (If the program runs successfully to the end, the dummy file gets renamed).
			try: os.remove('dummy_stack.hdf')
			except: pass

			#turn .ali or .mrc 3D images into a stack of 2D images that can be processed by this program. 
			cmd = 'e2proc2d.py ' + arg + ' dummy_stack.hdf --threed2twod'
			if options.subset:
					cmd += ' --first 0 --last ' + str(options.subset-1)
			runcmd(options,cmd)

			#make the new stack of 2D images (dumy_stack.hdf) the new input (the name of the input file but with .hdf format); this intermediate file will be deleted in the end.
			newarg = arg.replace(arg[-4:],'.hdf')
			os.rename('dummy_stack.hdf',newarg)
			arg = newarg

		if options.verbose: print("processing {} ({} images)".format(arg, EMUtil.get_image_count(arg)))
		
		#Averaging can be outsorced to e2proc2d via the command line, and the average can be read in as the new input
		if options.average:
			
			newarg = arg.replace('.hdf','_avg.hdf')
			
			cmdavg = 'e2proc2d.py ' + arg + ' ' + newarg + ' --average'

			if ds > 1.0:
				cmdavg += ' --process math.fft.resample:n=' + str(ds)

			cmdavg += ' --process normalize'

			runcmd(options,cmdavg)

			arg = newarg

		#The code to operate on frame averages seems to be the same as that to operate on single images; no need for redundancy.
		'''
			avgr = Averagers.get("mean")
			for i in range(EMUtil.get_image_count(fn)):
				f = EMData(fn,i) * -1
				if ds > 1.0: f.process_inplace("math.fft.resample",{"n":ds})
				avgr.add_image(f)
			img = avgr.finish()
			img.process_inplace("normalize")

			sharp_msk, soft_msk = generate_masks(options,img)
			mskd_sharp = sharp_msk*img
			sub_sharp = img-mskd_sharp
			noise = local_noise(options,sub_sharp)

			if options.debug: noise.write_image("{}_noise.hdf".format(arg))

			mskd_soft = soft_msk*img
			sub_soft = img-mskd_soft
			result = sub_soft + noise * soft_msk
			result *= -1

			print("Writing result to {}".format(outf))

			result.write_image(outf,0)
			avg.write_image("{}_compare.hdf".format(arg),0)
			result.write_image("{}_compare.hdf".format(arg),1)
		'''
		#else:
		#ctr = 0

		outf = "{}_proc.hdf".format( os.path.splitext(arg)[0] )

		nfs = EMUtil.get_image_count(arg)

		tasks=[]
		results=[]
		results=None

		#parallelized tasks don't run "in order"; therefore, a dummy stack needs to be pre-created with as many images as the final stack will have 
		#(otherwise, writing output images to stack indexes randomly makes the program crash or produces garbage output)
		dummy=EMData(8,8)
		dummy.to_one()
		dummy['apix_x']=apix
		dummy['apix_y']=apix
		for j in range(nfs):
			dummy.write_image(outf,j)

		#EMAN2 does not allow stacks of images with different size; this, and possibly some bug, prevent images written from the parallelization task from
		#having the corret size if the pre-created dummy doesn't have the correct size to begin with. No point in writing big images for the dummy from the start.
		#re-writing the index=0 image will change the size of all images in the stack to the correct size
		dummy_correct_size = EMData(nx,ny)
		dummy_correct_size.to_one()
		dummy_correct_size['apix_x']=apix
		dummy_correct_size['apix_y']=apix
		dummy.write_image(outf,0)

		print "outf",outf

		if options.parallel:
			cmdunstacking = 'e2proc2d.py ' + arg + ' erasegold_tmp.hdf --unstacking'
			runcmd(options,cmdunstacking)

		if options.subset:
			nfs=options.subset

		for i in range(nfs):
			
				#if i > options.subset -1:
				#	break

			if options.verbose: print "processing image {}/{}".format(i,nfs)
			
			if options.parallel:
				print "parallelism started"
				thisimg = 'erasegold_tmp-' + str(i+1).zfill(len(str(nfs))) + '.hdf'			#c: when e2proc2d.py unstacks images, it starts from 1, not from 0
				thisoutf = 'erasegold_tmp-' + str(i+1).zfill(len(str(nfs))) + '_proc.hdf'
				task = EraseGold2DTask( options, thisimg, 0, thisoutf,nfs)
				tasks.append(task)
			else:
				results=fiximage( options, arg, i, outf,nfs)

		if options.parallel:	
			if tasks:
				tids = etc.send_tasks(tasks)
				if options.verbose: 
					print "\n(erase_gold)(main) preprocessing %d tasks queued" % (len(tids)) 

				results = get_results( etc, tids, options )

		if results:
			#pass

			if options.parallel:
				#outfstem = outf.replace('.hdf','')
				cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_proc.hdf --stackname ' + outf
				runcmd(options,cmdbuildstack)

				if options.debug:
					outfmasked = outf.replace('.hdf','_masked.hdf')
					cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_masked.hdf --stackname ' + outfmasked
					runcmd(options,cmdbuildstack)

					outfnoise= outf.replace('.hdf','_noise.hdf')
					cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_noise.hdf --stackname ' + outfnoise
					runcmd(options,cmdbuildstack)



			if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
				#intermediate = arg.replace('.hdf','.mrcs')
				finaloutput = outf.replace('.hdf',originalarg[-4:])
				cmd = 'e2proc2d.py ' + outf + ' ' + finaloutput + ' --twod2threed --outmode int16'
				
				#print "\ncomand to generate finaloutput",cmd
				runcmd(options,cmd)
				os.remove(arg)

			if newarg: 
				try:
					os.remove(newarg)
				except:
					try:
						#print "would have removed",newarg.replace('.hdf','_proc.hdf')
						os.remove(newarg.replace('.hdf','_proc.hdf'))
					except:
						pass
		try:
			filelist = [ tmpf for tmpf in os.listdir(".") if 'erasegold_tmp' in tmpf ]
			for tf in filelist:
			    os.remove(tf)
		except:
			print "WARNING: cleanup failed."

	
	E2end(logger)

	return
Пример #11
0
def main():

	usage = """e2tomopreproc.py <imgs> <options> . 
	This program takes a tiltseries ('.st' or '.ali' file from IMOD) and applies preprocessing operations to them, such as lowpass, highpass, masking, etc.
	The options should be supplied in "--option=value" format, replacing "option" for a valid option name, and "value" for an acceptable value for that option. 
	"""
			
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)	
	
	parser.add_argument("--path",type=str,default='',help="""Directory to store results in. 
		The default is a numbered series of directories containing the prefix 'tomopreproc';
		for example, tomopreproc_02 will be the directory by default if 'tomopreproc_01' 
		already exists.""")
	
	parser.add_pos_argument(name="stack_files",default="",help="Stacks or images to process.")
	
	parser.add_argument("--input",type=str,default='',help=""""tiltseries to process. redundant with --tiltseries, or with providing images as arguments (separated by a space: e2tomopreproc.py stack1.hdf stack2.hdf), but --input takes precedence.""")
	
	parser.add_argument("--tiltseries",type=str,default='',help=""""tiltseries to process. redundant with --input""")

	parser.add_argument("--tltfile",type=str,default='',help="""".tlt file containing the tilt angles for --tiltseries""")
	
	parser.add_argument("--outmode", type=str, default='', help="""All EMAN2 programs write images with 4-byte floating point values when possible by default. This allows specifying an alternate format when supported: float, int8, int16, int32, uint8, uint16, uint32. Values are rescaled to fill MIN-MAX range.""")
	
	parser.add_argument("--dontcleanup", action='store_true', default=False, help="""If specified, intermediate files will be kept.""")
	
	parser.add_argument("--clip",type=str,default='',help="""Default=None. This resizes the 2-D images in the tilt series. If one number is provided, then x and y dimensions will be made the same. To specify both dimensions, supply two numbers, --clip=x,y. Clipping will be about the center of the image.""")
			
	#parser.add_argument("--apix",type=float,default=0.0,help="""True apix of images to be written on final stack.""")
	
	parser.add_argument("--shrink", type=float,default=0.0,help="""Default=0.0 (no shrinking). Can use decimal numbers, larger than 1.0. Optionally shrink the images by this factor. Uses processor math.fft.resample.""")
		
	parser.add_argument("--threshold",type=str,default='',help="""Default=None. A threshold processor applied to each image.""")
	
	parser.add_argument("--mask",type=str,default='', help="""Default=None. Masking processor applied to each image.""")
	
	parser.add_argument("--maskbyangle",action='store_true',default=False,help="""Default=False. Requires --tltfile. This will mask out from tilted images the info that isn't present at the 0 tilt angle. It uses the tomo.tiltedgemask processor (type 'e2help.py processors' at the commandline to read a description of the processor and its parameters). Provide --maskbyanglefalloff and --maskbyanglesigma to modify the default parameters.""")
	
	parser.add_argument("--maskbyanglefalloff", type=int, default=4,help="""Default=4. Number of pixels over which --maskbyangle will fall off to zero.""")
	
	parser.add_argument("--maskbyanglesigma", type=float, default=2.0,help="""Default=2.0. Number of sigmas for the width of the gaussian fall off in --maskbyangle and --maskbyanglefalloff""")
	
	parser.add_argument("--normproc",type=str, default='',help="""Default=None (see 'e2help.py processors -v 10' at the command line). Normalization processor applied to each image.""")
	
	parser.add_argument("--normalizeimod",action='store_true',default=False,help="""Default=False. This will apply 'newstack -float 2' to the input stack. requires IMOD.""")
	
	parser.add_argument("--preprocess",type=str,default='',help="""Any processor (see 'e2help.py processors -v 10' at the command line) to be applied to each image.""")
	
	parser.add_argument("--lowpassfrac",type=float,default=0.0,help="""Default=0.0 (not used). Fraction of Nyquist to lowpass at. The processor used is filter.lowpass.tanh""")
	
	parser.add_argument("--highpasspix",type=int,default=0,help="""Default=0 (not used). Number of Fourier pixels to apply highpass filter at. The processor used is filter.highpass.gauss.""")
	
	parser.add_argument("--parallel",type=str, default="thread:1", help="""default=thread:1. Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel""")
	
	parser.add_argument("--prenadminite",type=int, default=0, help="""Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --minite parameter in IMOD's preNAD program (minimum number of iterations).""")
	
	parser.add_argument("--prenadmaxite",type=int, default=0, help="""Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --maxite parameter in IMOD's preNAD program (maximum number of iterations).""")
	
	parser.add_argument("--prenadsigma",type=int, default=0, help="""Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --sigma parameter in IMOD's preNAD program (initial sigma for 'smoothing structure tensor').""")
	
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n",type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness.")
	
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	(options, args) = parser.parse_args()	
	

	logger = E2init(sys.argv, options.ppid)
	print "\n(e2tomopreproc)(main) started log"	
	
	from e2spt_classaverage import sptmakepath
	
	options = sptmakepath(options,'tomopreproc')
	
	#print "args are",args

	infiles = []
	if not options.input:
		#try:
		#	infiles.append( sys.argv[1] )
		#except:
		if options.tiltseries:
			infiles.append( options.tiltseries )
		else:
			if args:
				print "copying args to infiles"
				infiles = list(args)
				print "infiles are", infiles
			else:
				print "\n(e2tomopreproc)(main) ERROR: must provide input files as arguments or via the --input or --tiltseries parameters."


	if infiles:
		print "\n(e2tomopreproc)(main) identified --input", options.input
		#print " .ali in options.input[:-4]", '.ali' in options.input[-4:]
		#print "options.input[-4] is", options.input[-4:]
		
		for infile in infiles:
			if '.ali' in infile[-4:] or '.st' in infile[-3:] or '.mrc' in infile[-4:] or '.mrcs' in infile[-5:] or '.hdf' in infile[-4:]:
				pass
			else:
				print "\n(e2tomopreproc)(main) ERROR: invalid image extension %s for image %s. Extension must be .st, .ali, .hdf, .mrc or .mrcs" %(options.input.split('.')[-1], infile)
				sys.exit(1)
	else:
		print "\n(e2tomopreproc)(main) ERROR: no images found/provided"
		sys.exit(1)
		
	originalextension = infiles[0].split('.')[-1]
	
	angles = {}
	if options.maskbyangle or (options.prenadminite and options.prenadmaxite and options.prenadsigma):
	
		if not options.tltfile:
			print "\n(e2tomopreproc)(main) ERROR: --maskbyangle and --prenad parameters require --tltfile"
			sys.exit(1)
		
		else:
			f = open( options.tltfile, 'r' )
			lines = f.readlines()
			print "\nnumber of lines read from --tltfile", len(lines)
			f.close()
			#print "lines in tlt file are", lines
			k=0
			for line in lines:
				line = line.replace('\t','').replace('\n','')
	
				if line:
					angle = float(line)
					angles.update( { k:angle } )
					if options.verbose:
						print "appending angle", angle
					k+=1
			if len(angles) < 2:
				print "\nERROR: something went terribly wrong with parsing the --tltlfile. This program does not work on single images"
				sys.exit()

		if len(angles) < 2:
			print "\nERROR: (second angle check) something went terribly wrong with parsing the --tltlfile. This program does not work on single images"
			sys.exit()
				
	
	
	
	
	print "\n(e2spt_preproc)(main) - INITIALIZING PARALLELISM!\n"

	from EMAN2PAR import EMTaskCustomer
	etc=EMTaskCustomer(options.parallel)
	pclist=[options.input]

	etc.precache(pclist)
	print "\n(e2spt_preproc)(main) - precaching --input"

	tasks=[]
	results=[]
	
	mrcstacks = []
	print "there are these many infiles to loop over", len(infiles)



	if options.lowpassfrac:
		hdr = EMData( infiles[0], 0, True )
		apix = hdr['apix_x']
		print "\n(e2spt_preproc)(main) apix is",apix
		nyquist = 2.0 * apix
		print "\n(e2spt_preproc)(main) therefore nyquist resolution is", nyquist
		print
		lowpassres = nyquist/options.lowpassfrac
		
		options.lowpassfrac = 1.0/(lowpassres)
		if float(options.shrink) > 1.0:
			options.lowpassfrac /= float(options.shrink)
			
			print "there's shrinking", options.shrink
			lowpassres = nyquist/options.lowpassfrac

		print "\n(e2spt_preproc)(main) and final lowpass frequency is", options.lowpassfrac

		print "corresponding to lowpassres of",lowpassres

	for infile in infiles:
	
		mrcstack = options.path + '/' + infile
		print "infile is", infile
		print "infile[-5:] is ", infile[-5:]
		if '.hdf' in infile[-5:]:
			print "replacing .hdf extension"
			mrcstack = options.path + '/' + infile.replace('.hdf','.mrc')
	
		if '.mrcs' in infile[-5:]:
			print "replacing .mrcs extension"
			mrcstack = options.path + '/' + infile.replace('.mrcs','.mrc')
	
		if '.st' in infile[-5:]:
			print "replacing .st extension"
			mrcstack = options.path + '/' + infile.replace('.st','.mrc')	

		if '.ali' in infile[-5:]:
			print "replacing .ali extension"
			mrcstack = options.path + '/' + infile.replace('.ali','.mrc')
			
		if '.tif' in infile[-5:]:
			print "replacing .ali extension"
			mrcstack = options.path + '/' + infile.replace('.tif','.mrc')
	
		#go = 0
		#if go:
		print "mrcstack is",mrcstack
		
		#outname = outname.replace('.mrc','.mrcs')
	
		mrcstacks.append( mrcstack )
		
		go = 0
		if options.maskbyangle:
			outname = mrcstack.replace('.mrc','_UNSTACKED.mrc')
			print "therefore, outname is", outname
	
			cmd = 'e2proc2d.py ' + infile + ' ' + outname + ' --unstacking --threed2twod'

			#from shutil import copyfile
			#copyfile(options.input, outname)
			#print "copied input to", outname

			if options.outmode:
				cmd += ' --outmode=' + options.outmode

			if options.verbose:
				cmd += ' --verbose=' + str(options.verbose)
				print "\ncommand to unstack original input tiltseries is", cmd	

			print "\n(e2tomopreproc)(main) unstacking command is", cmd

			p = subprocess.Popen( cmd , shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			#p = subprocess.Popen( cmd , shell=True, stdout=subprocess.PIPE)

			text = p.communicate()	
			#p.stdout.close()

			p.wait()
		
			if p.returncode == 0:
				go = 1
		else:
			go = 1
	
		
		if go:

			imgs = []
			if options.maskbyangle:
				c = os.getcwd() + '/' + options.path 
				findir = os.listdir( os.getcwd() + '/' + options.path )

				print "\n(e2tomopreproc)(main) directory to look for images is", c	
				for f in findir:
					#if '.mrcs' in f:
					if "_UNSTACKED" in f:
						imgs.append( options.path + '/' +f )

				kk=0
				imgs.sort()
				print "\n(e2spt_preproc)(main) found these many images", len( imgs )		

				for img in imgs:
					#task=None

					#if options.maskbyangle:
					outimage = img.replace('.mrc','_preproc.mrc')
					task = TomoPreproc2DTask( img, options, angles[kk], outimage )
					tasks.append(task)
					kk+=1
			else:
				outimage = options.path + '/' + infile.replace('.mrc','_preproc.mrcs')
				task = TomoPreproc2DTask( infile, options, 0, outimage )
				tasks.append(task)
				
					
			#else:
			#	newmrcs = mrcstack.replace('.mrc','.mrcs')
			#	print "copying file %s to %s" %(infile,newmrcs)
			#	copyfile( infile, newmrcs  )
			#	imgs.append( newmrcs )
			
			

			

				
				#print "and the final lowpass frequency will be", options.lowpassfrac

			

			
	tids = etc.send_tasks(tasks)
	if options.verbose: 
		print "\n(e2spt_preproc)(main) preprocessing %d tasks queued" % (len(tids)) 

	results = get_results( etc, tids, options )

	print "\n(e2tomopreproc)(main) these many images have been processsed",len(results)

	
	imgspreproc = []
	findir = os.listdir( os.getcwd() + '/' + options.path )
	
	#for mrcstack in mrcstacks:


	for f in findir:
		if "_preproc.mrc" in f:
			print "found preprocessed image", f
			imgspreproc.append( options.path + '/' + f )
		else:
			print "this file is NOT a preprocessed image", f

	imgspreproc.sort()

	print "\n(e2tomopreproc)(main) these many preprocessed images loaded", len(imgspreproc)
	
	finalfiles=[]
	
	if options.maskbyangle:
		
		outfile = mrcstack.replace('.mrc','.mrcs')
		print "for RESTACKING"
		print "\n\n\noutfile is", outfile

		for f in imgspreproc:
			print "appending image %s to outfile %s" %(f,outfile)			
			cmd = 'e2proc2d.py ' + f + ' ' + outfile
			if options.outmode:
				cmd += ' --outmode=' + options.outmode

			if options.verbose:
				cmd += ' --verbose ' + str(options.verbose)

			print "\ncmd is with .mrcs outputformat is", cmd
			print "becauase outfile is",outfile	
			p = subprocess.Popen( cmd , shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
			text = p.communicate()	
			p.stdout.close()		
	
		finaloutput = outfile.replace('.mrcs', '.' + originalextension)
		os.rename( outfile, finaloutput )
		
		finalfiles.append( finaloutput )
	else:
		finalfiles = list( imgspreproc )
	
	
	for finalf in finalfiles:
		if not options.tltfile:
			break
	
		if options.normalizeimod:
			try:
				cmd = 'newstack ' + finalf + ' ' + finalf + ' --float 2'
				print "normalizeimod cmd is", cmd
				p = subprocess.Popen( cmd , shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
				text = p.communicate()	
				p.wait()
			except:
				print "\nERROR: --normalizeimod skipped. Doesn't seem like IMOD is installed on this machine"		

		if not options.dontcleanup and options.maskbyangle:
			purge( options.path, '_preproc.mrc')
			purge( options.path, '_UNSTACKED')	
			purge( options.path, '~')
		
		if options.tltfile:
			if options.prenadminite or options.prenadmaxite or options.prenadsigma:

				if options.prenadminite and options.prenadmaxite and options.prenadsigma:
					cmd = 'preNAD -input ' + finalf + ' -output ' + finalf.replace('.'+originalextension, '_prenad.' + originalextension) + ' -minite ' + str(options.prenadminite) + ' -maxite ' + str(options.prenadmaxite) + ' -sigma ' + str(options.prenadsigma) + ' -angles ' + options.tltfile 
					if options.verbose:
						print "\n(e2tomopreproc)(main) prenad cmd to run is", cmd
					try:
						p = subprocess.Popen( cmd , shell=True,stdout=subprocess.PIPE, stderr=subprocess.PIPE)
						text = p.communicate()	
						p.wait()
					except:
						print "\nERROR: check that a version of IMOD containing the preNAD program is correctly installed on this machine"

				else:
					if options.prenadminite:
						if not options.prenadmaxite:
							print "\nERROR: --prenadmaxite required with --prenadminite"
						if not options.prenadsigma:
							print "\nERROR: --prenadsigma required with --prenadminite"

					if options.prenadmaxite:
						if not options.prenadminite:
							print "\nERROR: --prenadminite required with --prenadmaxite"
						if not options.prenadsigma:
							print "\nERROR: --prenadsigma required with --prenadmaxite"

					if options.prenadsigma:
						if not options.prenadminite:
							print "\nERROR: --prenadminite required with --prenadsigma"
						if not options.prenadmaxite:
							print "\nERROR: --prenadmaxite required with --prenadsigma"
					
		
	E2end(logger)	
	return()
Пример #12
0
def main():
	"""Program to validate a reconstruction by the Richard Henderson tilt validation method. A volume to validate, a small stack (~100 imgs) of untilted and ~10-15 degree
	tilted particles must be presented. The untilted and tilted particle stack must have a one-to-one relationship. In the contour plot, the Tiltaxis is along positive 'Y'
	The tiltaxis angle can be determined from e2RCTboxer.py uisng PairPicker mode. For example, if the tiltaxis is 45 degrees and the tilt angle is -15 degrees, there should
	be a peak in the -X, -Y quadrant at 225 degrees at a magnitude of 15.
	For more details see:
	Optiomal Determination of Particle Orientation, Absolute Hand, and COntrast Loss in Single-particle Electron Cryomicroscopy. Rosenthal, P.B., and Henderson, R. JMB, 333 (2003) pg 721-745
	"""
	progname = os.path.basename(sys.argv[0])
	usage = """prog [options]
	Tiltvalidation using Richard Henderson's technique. To use a stack of untilted and tiltimages whose set relationship is one-to-one is required along with a
	volume to validate. This can be generated using e2RCTboxer.py. After running this program two bits of data are products. A contour plot similar to Figure 5 in the Henderson paper(see below), and a list of
	titlangles and tiltaxes between particle paris, which can be used to makes plot similar to Figure 6 in Hendersons paper. The contour plot is stored as contour.hdf and the tiltpairs data is
	stored as bdb:perparticletilts.
	For more information see:
	Optimal determination of particle orientation, absolute hand, and contrast loss in 
	single-particle electron cryomicroscopy.
	Rosenthal PB, Henderson R.
	J Mol Biol. 2003 Oct 31;333(4):721-45 
	"""
	parser = EMArgumentParser(usage=usage,version=EMANVERSION)
	
	# options associated with e2tiltvalidate.py
	parser.add_header(name="tvheader", help='Options below this label are specific to e2tiltvalidate', title="### e2tiltvalidate options ###", row=3, col=0, rowspan=1, colspan=2, mode="analysis,gui")

	
	# "analysys" mode options
	parser.add_argument("--untiltdata", type=str,help="Stack of untilted images",default=None, guitype='filebox', browser='EMSetsTable(withmodal=True,multiselect=False)', row=0, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--tiltdata", type=str,help="Stack of tilted images",default=None, guitype='filebox', browser='EMSetsTable(withmodal=True,multiselect=False)', row=1, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--volume", type=str,help="3D volume to validate",default=None, guitype='filebox', browser='EMModelsTable(withmodal=True,multiselect=False)', row=2, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--maxtiltangle", type=float, help="Maximum tiltangle permitted when finding tilt distances", default=180.0, guitype='floatbox', row=4, col=0, rowspan=1, colspan=1, mode="analysis")
	parser.add_argument("--quaternion",action="store_true",help="Use Quaterions for tilt distance computation",default=False, guitype='boolbox', row=4, col=1, rowspan=1, colspan=1, mode='analysis')
	parser.add_argument("--sym",  type=str,help="The recon symmetry", default="c1", guitype='symbox', row=5, col=0, rowspan=1, colspan=1, mode="analysis")
	parser.add_argument("--docontourplot",action="store_true",help="Compute a contour plot",default=False, guitype='boolbox',row=6,col=0, rowspan=1, colspan=1, expert=True, mode="analysis") 
	parser.add_argument("--tiltrange", type=int,help="The angular tiltrange to search",default=15, guitype='intbox', row=6, col=1, rowspan=1, colspan=1, expert=True, mode="analysis")
	parser.add_argument("--align", type=str,help="The name of a aligner to be used in comparing the aligned images",default="translational", guitype='comboparambox', choicelist='re_filter_list(dump_aligners_list(),\'refine|3d\', 1)', expert=True, row=7, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--cmp", type=str,help="The name of a 'cmp' to be used in comparing the aligned images",default="ccc", guitype='comboparambox', choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)', expert=True, row=8, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_header(name="projheader", help='Options below this label are specific to e2project', title="### e2project options ###", row=10, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--delta", type=float,help="The angular step size for alingment", default=5.0, guitype='floatbox', row=11, col=0, rowspan=1, colspan=1, mode="analysis")
	# options associated with e2simmx.py
	parser.add_header(name="simmxheader", help='Options below this label are specific to e2simmx', title="### e2simmx options ###", row=12, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--shrink", dest="shrink", type = int, default=0, help="Optionally shrink the input particles by an integer amount prior to computing similarity scores. For speed purposes. Defulat = 0, no shrinking", guitype='shrinkbox', row=13, col=0, rowspan=1, colspan=1, mode="analysis")
	parser.add_argument("--simcmp",type=str,help="The name of a 'cmp' to be used in comparing the aligned images (default=ccc)", default="ccc", guitype='comboparambox', choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)', row=14, col=0, rowspan=1, colspan=2, mode="analysis")
	# options associated with e2projector3d.py
	parser.add_argument("--simalign",type=str,help="The name of an 'aligner' to use prior to comparing the images (default=rotate_translate)", default="rotate_translate", guitype='comboparambox', choicelist='re_filter_list(dump_aligners_list(),\'refine|3d\', 1)', row=15, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--simaligncmp",type=str,help="Name of the aligner along with its construction arguments (default=ccc)",default="ccc", guitype='comboparambox', choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)', row=16, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--simralign",type=str,help="The name and parameters of the second stage aligner which refines the results of the first alignment", default=None, guitype='comboparambox', choicelist='re_filter_list(dump_aligners_list(),\'refine\', 0)', row=17, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--simraligncmp",type=str,help="The name and parameters of the comparitor used by the second stage aligner. (default=dot).",default="dot", guitype='comboparambox', choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)', row=18, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--parallel",type=str,help="Parallelism string",default=None, guitype='strbox', row=9, col=0, rowspan=1, colspan=2, mode="analysis")
	parser.add_argument("--verbose", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness", guitype='intbox', row=19, col=0, rowspan=1, colspan=1, mode="analysis")
	# "gui" mode options
	parser.add_argument("--path", type=str,help="The folder the results are placed", default="", guitype='dirbox', dirbasename='TiltValidate', row=0, col=0,rowspan=1, colspan=2, mode="gui")
	parser.add_argument("--radcut", type = float, default=-1, help="For use in the GUI, truncate the polar plot after R. -1 = no truncation", guitype='floatbox', row=4, col=0, rowspan=1, colspan=1, mode="gui")
	parser.add_argument("--gui",action="store_true",help="Start the GUI for viewing the tiltvalidate plots",default=False, guitype='boolbox', row=4, col=1, rowspan=1, colspan=1, mode="gui[True]")
	parser.add_argument("--planethres", type=float, help="Maximum out of plane threshold for the tiltaxis. 0 = perfectly in plane, 1 = normal to plane", default=360.0, guitype='floatbox', row=5, col=0, rowspan=1, mode="gui")
	parser.add_argument("--datalabelscolor", type=str, help="Set the color of the data labels. Any vaild matplotlib color is ok", default='#00ff00', guitype='strbox', row=6, col=0, rowspan=1, colspan=1, mode="gui")
	parser.add_argument("--datalabels", action="store_true",help="Add data labels to the plot", default=False, guitype='boolbox', row=6, col=1, rowspan=1, mode="gui")
	parser.add_argument("--colorzaxis", action="store_true",help="Color scatter dots by Z axis", default=False, guitype='boolbox', row=7, col=0, rowspan=1, mode="gui")
	#other options
	parser.add_argument("--eulerfile",type=str,help="Euler angles file, to create tiltdistance from pre-aligned particles. Format is: imgnum, name, az, alt, phi",default=None)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)
	(options, args) = parser.parse_args()
		
	# Run the GUI if in GUI mode
	#print options
	if options.gui:
		display_validation_plots(options.path, options.radcut, options.planethres, plotdatalabels=options.datalabels, color=options.datalabelscolor, plotzaxiscolor=options.colorzaxis)
		exit(0)
		
	if not (options.volume or options.eulerfile):
		print "Error a volume to validate must be presented"
		exit(1)
		
	if not (options.tiltdata or options.eulerfile):
		print "Error a stack of tilted images must be presented"
		exit(1)
		
	if not (options.untiltdata or options.eulerfile):
		print "Error a stack of untiled images must be presented"
		exit(1)
	
	logid=E2init(sys.argv,options.ppid)
	
	options.cmp=parsemodopt(options.cmp)
	options.align=parsemodopt(options.align)
	
	# Make a new dir for each run
	if not options.path : 
		#options.path=numbered_path("TiltValidate",True)
		# Create the run directory structure if it does not exist
		i = 1
		found = 1
		while found == 1:
			if i < 10:
				run_dir = '0' + str(i)
			else:
				run_dir = str(i)
			found = os.path.exists("TiltValidate_" + run_dir)
			i = i+1
		os.mkdir("TiltValidate_" + run_dir)
		options.path="TiltValidate_"+run_dir
	
	#Make tilt distance generator
	tiltgenerator = ComputeTilts(options)
	
	# Compute tilt distances from file if desired. 
	if options.eulerfile:
		# Format is:
		# untilt_imgnum name az alt phi
		# tilt_imgnum name az alt phi
		eulerfile = open(options.eulerfile,"r")
		eulers = eulerfile.readlines()
		eulerfile.close()
		untilteulerlist = []
		tilteulerlist = []
		for i, euler in enumerate(eulers):
			fields = euler.split()
			if i % 2:
				tilteulerlist.append({'alt':float(fields[2]),'az':float(fields[3]),'phi':float(fields[4])})
			else:
				untilteulerlist.append({'alt':float(fields[2]),'az':float(fields[3]),'phi':float(fields[4])})
		tiltgenerator.findtilts_fromeulers(untilteulerlist, tilteulerlist)
		exit(0)

	# Initialize parallelism if being used
	if options.parallel :
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer(options.parallel)
	else:
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer("thread:1")
		#etc.precache(pclist)
	
	# Otherwise compute tilt distances from data
	#Read in the images
	tiltimgs = EMData.read_images(options.tiltdata)
	untiltimgs = EMData.read_images(options.untiltdata)
	if len(tiltimgs) != len(untiltimgs):
		print "The untilted image stack is not the same length as the tilted stack!!!"
		exit(1)
	
	# write projection command to DB. If we rerun this program no need to reproject if it was done using same pars before
	cdb = js_open_dict('info/cmdcache.json')
	projparmas = "%s%f%s"%(options.volume,options.delta, options.sym)
#	try:
#		if (cdb.has_key('projparmas') and  cdb['projparmas'] == projparmas): raise IOError("Projection file does not exist")
#		run("e2proc2d.py bdb:%s#projections_00 bdb:%s#projections_00"%(cdb['previouspath'], options.path))
#	except:	
	# Do projections
	e2projectcmd = "e2project3d.py %s --orientgen=eman:delta=%f:inc_mirror=1:perturb=0 --outfile=%s/projections_00.hdf --projector=standard --sym=%s" % (options.volume,options.delta,options.path, options.sym) # Seems to work better when I check all possibilites	
	if options.parallel: e2projectcmd += " --parallel=%s" %options.parallel
	run(e2projectcmd)
	cdb['projparmas'] = projparmas
	cdb['previouspath'] = options.path
	cdb.close()
		
	# Make simmx
	e2simmxcmd = "e2simmx.py %s/projections_00.hdf %s %s/simmx.hdf -f --saveali --cmp=%s --align=%s --aligncmp=%s --verbose=%d" % (options.path,options.untiltdata,options.path,options.simcmp,options.simalign,options.simaligncmp,options.verbose)
	if options.simralign: e2simmxcmd += " --ralign=%s --raligncmp=%s" %(options.simralign,options.simraligncmp)
	if options.parallel: e2simmxcmd += " --parallel=%s" %options.parallel
	if options.shrink: e2simmxcmd += " --shrink=%d" %options.shrink
	run(e2simmxcmd)
	
	e2simmxcmd = "e2simmx.py %s/projections_00.hdf %s %s/simmx_tilt.hdf -f --saveali --cmp=%s --align=%s --aligncmp=%s --verbose=%d" % (options.path,options.tiltdata,options.path,options.simcmp,options.simalign,options.simaligncmp,options.verbose)
	if options.simralign: e2simmxcmd += " --ralign=%s --raligncmp=%s" %(options.simralign,options.simraligncmp)
	if options.parallel: e2simmxcmd += " --parallel=%s" %options.parallel
	if options.shrink: e2simmxcmd += " --shrink=%d" %options.shrink
	run(e2simmxcmd)

	# Read in the data
	simmx= EMData.read_images("%s/simmx.hdf"%options.path)
	simmx_tilt= EMData.read_images("%s/simmx_tilt.hdf"%options.path)
	projections = EMData.read_images("%s/projections_00.hdf"%options.path)
	volume = EMData() 
	volume.read_image(options.volume) # I don't know why I cant EMData.read_image.......
	
	# Generate tilts from data
	tiltgenerator.findtilts_fromdata(simmx, simmx_tilt, projections, volume, untiltimgs, tiltimgs) 
	
	if options.docontourplot:
		# Make contour plot to validate each particle
		tasks=[]
		distplot = EMData(options.tiltrange*2+1,options.tiltrange*2+1)
		distplot.to_zero()
		for imgnum in range(simmx[0].get_ysize()):
			bestscore = float('inf')
			bestrefnum = 0
			for refnum in range(simmx[0].get_xsize()):
				if simmx[0].get_value_at(refnum, imgnum) < bestscore:
					bestscore = simmx[0].get_value_at(refnum, imgnum)
					bestrefnum = refnum
			# Get the euler angle for this particle and call compare to tilt"bdb:%s#
			euler_xform = projections[bestrefnum].get_attr('xform.projection')
			tasks.append(CompareToTiltTask(volume, tiltimgs[imgnum], imgnum, euler_xform, simmx[3].get_value_at(bestrefnum, imgnum), distplot, options.tiltrange, 1, options))
		
		# Farm out the work and hang till finished!
		tids=etc.send_tasks(tasks)
		while 1:
			time.sleep(5)
			proglist=etc.check_task(tids)
			tids=[j for i,j in enumerate(tids) if proglist[i]!=100]		# remove any completed tasks from the list we ask about
			if len(tids)==0: break
		
	
		# Make scoremx avg
		scoremxs = EMData.read_images("%s/scorematrix.hdf"%options.path)
		avgmxavger = Averagers.get('mean')
		for mx in scoremxs:
			avgmxavger.add_image(mx)
		avgmx = avgmxavger.finish()
		avgmx.write_image("%s/contour.hdf"%options.path)
		distplot.write_image("%s/distplot.hdf"%options.path)
	
	E2end(logid)
Пример #13
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog <output> [options]

	This program produces iterative class-averages, one of the secrets to EMAN's rapid convergence.
	Normal usage is to provide a stack of particle images and a classification matrix file defining
	class membership. Members of each class are then iteratively aligned to each other and averaged
	together with (optional) CTF correction.  It is also possible to use this program on all of the
	images in a single stack.

	"""

	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	parser.add_argument("--input", type=str, help="The name of the input particle stack", default=None)
	parser.add_argument("--output", type=str, help="The name of the output class-average stack", default=None)
	parser.add_argument("--oneclass", type=int, help="Create only a single class-average. Specify the number.",default=None)
	parser.add_argument("--classmx", type=str, help="The name of the classification matrix specifying how particles in 'input' should be grouped. If omitted, all particles will be averaged.", default=None)
	parser.add_argument("--ref", type=str, help="Reference image(s). Used as an initial alignment reference and for final orientation adjustment if present. Also used to assign euler angles to the generated classes. This is typically the projections that were used for classification.", default=None)
	parser.add_argument("--storebad", action="store_true", help="Even if a class-average fails, write to the output. Forces 1->1 numbering in output",default=False)
	parser.add_argument("--decayedge", action="store_true", help="Applies an edge decay to zero on the output class-averages. A very good idea if you plan on 3-D reconstruction.",default=False)
	parser.add_argument("--resultmx",type=str,help="Specify an output image to store the result matrix. This contains 5 images where row is particle number. Rows in the first image contain the class numbers and in the second image consist of 1s or 0s indicating whether or not the particle was included in the class. The corresponding rows in the third, fourth and fifth images are the refined x, y and angle (respectively) used in the final alignment, these are updated and accurate, even if the particle was excluded from the class.", default=None)
	parser.add_argument("--iter", type=int, help="The number of iterations to perform. Default is 1.", default=1)
	parser.add_argument("--prefilt",action="store_true",help="Filter each reference (c) to match the power spectrum of each particle (r) before alignment and comparison",default=False)
	parser.add_argument("--align",type=str,help="This is the aligner used to align particles to the previous class average. Default is None.", default=None)
	parser.add_argument("--aligncmp",type=str,help="The comparitor used for the --align aligner. Default is ccc.",default="ccc")
	parser.add_argument("--ralign",type=str,help="This is the second stage aligner used to refine the first alignment. This is usually the \'refine\' aligner.", default=None)
	parser.add_argument("--raligncmp",type=str,help="The comparitor used by the second stage aligner.",default="ccc")
	parser.add_argument("--averager",type=str,help="The type of averager used to produce the class average.",default="mean")
	parser.add_argument("--setsfref",action="store_true",help="This will impose the 1-D structure factor of the reference on the class-average (recommended when a reference is available)",default=False)
	parser.add_argument("--cmp",type=str,help="The comparitor used to generate quality scores for the purpose of particle exclusion in classes, strongly linked to the keep argument.", default="ccc")
	parser.add_argument("--keep",type=float,help="The fraction of particles to keep in each class.",default=1.0)
	parser.add_argument("--keepsig", action="store_true", help="Causes the keep argument to be interpreted in standard deviations.",default=False)
	parser.add_argument("--automask",action="store_true",help="Applies a 2-D automask before centering. Can help with negative stain data, and other cases where centering is poor.")
	parser.add_argument("--center",type=str,default="xform.center",help="If the default centering algorithm (xform.center) doesn't work well, you can specify one of the others here (e2help.py processor center)")
	parser.add_argument("--bootstrap",action="store_true",help="Ignored. Present for historical reasons only.")
	parser.add_argument("--normproc",type=str,help="Normalization processor applied to particles before alignment. Default is normalize.edgemean. If you want to turn this option off specify \'None\'", default="normalize.edgemean")
	parser.add_argument("--usefilt", dest="usefilt", default=None, help="Specify a particle data file that has been low pass or Wiener filtered. Has a one to one correspondence with your particle data. If specified will be used to align particles to the running class average, however the original particle will be used to generate the actual final class average")
	parser.add_argument("--idxcache", default=False, action="store_true", help="Ignored. Present for historical reasons.")
	parser.add_argument("--dbpath", help="Ignored. Present for historical reasons.", default=".")
	parser.add_argument("--resample",action="store_true",help="If set, will perform bootstrap resampling on the particle data for use in making variance maps.",default=False)
	parser.add_argument("--odd", default=False, help="Used by EMAN2 when running eotests. Includes only odd numbered particles in class averages.", action="store_true")
	parser.add_argument("--even", default=False, help="Used by EMAN2 when running eotests. Includes only even numbered particles in class averages.", action="store_true")
	parser.add_argument("--parallel", default=None, help="parallelism argument")
	parser.add_argument("--force", "-f",dest="force",default=False, action="store_true",help="Force overwrite the output file if it exists.")
	parser.add_argument("--saveali",action="store_true",help="Writes aligned particle images to aligned.hdf. Normally resultmx produces more useful informtation. This can be used for debugging.",default=False)
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n",type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--debug","-d",action="store_true",help="Print debugging infromation while the program is running. Default is off.",default=False)
	parser.add_argument("--nofilecheck",action="store_true",help="Turns file checking off in the check functionality - used by e2refine.py.",default=False)
	parser.add_argument("--check","-c",action="store_true",help="Performs a command line argument check only.",default=False)
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	(options, args) = parser.parse_args()

	if (options.check): options.verbose = 9 # turn verbose on if the user is only checking...

	error = check(options,True)

	if options.align : options.align=parsemodopt(options.align)
	if options.ralign : options.ralign=parsemodopt(options.ralign)
	if options.aligncmp : options.aligncmp=parsemodopt(options.aligncmp)
	if options.raligncmp : options.raligncmp=parsemodopt(options.raligncmp)
	if options.averager : options.averager=parsemodopt(options.averager)
	if options.cmp : options.cmp=parsemodopt(options.cmp)
	if options.normproc : options.normproc=parsemodopt(options.normproc)

	if options.resultmx!=None : options.storebad=True

	if (options.verbose>0):
		if (error):
			print "e2classaverage.py command line arguments test.... FAILED"
		else:
			print "e2classaverage.py command line arguments test.... PASSED"

	# returning a different error code is currently important to e2refine.py - returning 0 tells e2refine.py that it has enough
	# information to execute this script
	if error : exit(1)
	if options.check: exit(0)

	logger=E2init(sys.argv,options.ppid)
	print "Class averaging beginning"

	try:
		classmx=EMData.read_images(options.classmx)		# we keep the entire classification matrix in memory, since we need to update it in most cases
		ncls=int(classmx[0]["maximum"])+1
	except:
		ncls=1
		if options.resultmx!=None :
			print "resultmx can only be specified in conjunction with a valid classmx input."
			sys.exit(1)

	nptcl=EMUtil.get_image_count(options.input)

	try: apix=EMData(options.input,0,True)["apix_x"]
	except:
		apix=1.0
		print "WARNING: could not get apix from first image. Setting to 1.0. May impact results !"

	# Initialize parallelism
	if options.parallel :
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer(options.parallel)
		pclist=[options.input]
		if options.ref: pclist.append(options.ref)
		if options.usefilt: pclist.append(options.usefilt)
		etc.precache(pclist)

	# prepare tasks
	tasks=[]
	if ncls>1:
		if options.oneclass==None : clslst=range(ncls)
		else : clslst=[options.oneclass]

		for cl in clslst:
			ptcls=classmx_ptcls(classmx[0],cl)
			if options.resample : ptcls=[random.choice(ptcls) for i in ptcls]	# this implements bootstrap resampling of the class-average
			if options.odd : ptcls=[i for i in ptcls if i%2==1]
			if options.even: ptcls=[i for i in ptcls if i%2==0]
			tasks.append(ClassAvTask(options.input,ptcls,options.usefilt,options.ref,options.iter,options.normproc,options.prefilt,
			  options.align,options.aligncmp,options.ralign,options.raligncmp,options.averager,options.cmp,options.keep,options.keepsig,
			  options.automask,options.saveali,options.setsfref,options.verbose,cl,options.center))

	else:
		ptcls=range(nptcl)
		if options.resample : ptcls=[random.choice(ptcls) for i in ptcls]
		if options.odd : ptcls=[i for i in ptcls if i%2==1]
		if options.even: ptcls=[i for i in ptcls if i%2==0]
		tasks.append(ClassAvTask(options.input,range(nptcl),options.usefilt,options.ref,options.iter,options.normproc,options.prefilt,
			  options.align,options.aligncmp,options.ralign,options.raligncmp,options.averager,options.cmp,options.keep,options.keepsig,
			  options.automask,options.saveali,options.setsfref,options.verbose,0,options.center))

	# execute task list
	if options.parallel:				# run in parallel
		taskids=etc.send_tasks(tasks)
		alltaskids=taskids[:]

		while len(taskids)>0 :
			curstat=etc.check_task(taskids)
			for i,j in enumerate(curstat):
				if j==100 :
					rslt=etc.get_results(taskids[i])
					if rslt[1]["average"]!=None:
						rslt[1]["average"]["class_ptcl_src"]=options.input
						if options.decayedge:
							nx=rslt[1]["average"]["nx"]
							rslt[1]["average"].process_inplace("normalize.circlemean",{"radius":nx/2-nx/15})
							rslt[1]["average"].process_inplace("mask.gaussian",{"inner_radius":nx/2-nx/15,"outer_radius":nx/20})
							#rslt[1]["average"].process_inplace("mask.decayedge2d",{"width":nx/15})

						if options.ref!=None : rslt[1]["average"]["projection_image"]=options.ref
						if options.storebad : rslt[1]["average"].write_image(options.output,rslt[1]["n"])
						else: rslt[1]["average"].write_image(options.output,-1)


						# Update the resultsmx if requested
						if options.resultmx!=None:
							allinfo=rslt[1]["info"]				# the info result array list of (qual,xform,used) tuples
							pnums=rslt[0].data["images"][2]		# list of image numbers corresponding to information

							for n,info in enumerate(allinfo):
								y=pnums[n]		# actual particle number

								# find the matching class in the existing classification matrix
								for x in range(classmx[0]["nx"]):
									if classmx[0][x,y]==rslt[1]["n"] :		# if the class number in the classmx matches the current class-average number
										break
								else :
									print "Resultmx error: no match found ! (%d %d %d)"%(x,y,rslt[1]["n"])
									continue
								xform=info[1].get_params("2d")
								classmx[1][x,y]=info[2]					# used
								classmx[2][x,y]=xform["tx"]				# dx
								classmx[3][x,y]=xform["ty"]				# dy
								classmx[4][x,y]=xform["alpha"]			# da
								classmx[5][x,y]=xform["mirror"]			# flip
								try: classmx[6][x,y]=xform["scale"]
								except: pass
					# failed average
					elif options.storebad :
						blk=EMData(options.ref,0)
						apix=blk["apix_x"]
						blk=EMData(blk["nx"],blk["ny"],1)
						blk["apix_x"]=apix
						blk.to_zero()
						blk.set_attr("ptcl_repr", 0)
						blk.set_attr("apix_x",apix)
						blk.write_image(options.output,rslt[1]["n"])

			taskids=[j for i,j in enumerate(taskids) if curstat[i]!=100]

			if options.verbose and 100 in curstat :
				print "%d/%d tasks remain"%(len(taskids),len(alltaskids))
			if 100 in curstat :
				E2progress(logger,1.0-(float(len(taskids))/len(alltaskids)))

			time.sleep(3)


		if options.verbose : print "Completed all tasks"

	# single thread
	else:
		for t in tasks:
			rslt=t.execute()
			if rslt==None : sys.exit(1)

			if rslt["average"]!=None :
				rslt["average"]["class_ptcl_src"]=options.input
				if options.decayedge:
					nx=rslt["average"]["nx"]
					rslt["average"].process_inplace("normalize.circlemean",{"radius":nx/2-nx/15})
					rslt["average"].process_inplace("mask.gaussian",{"inner_radius":nx/2-nx/15,"outer_radius":nx/20})
					#rslt["average"].process_inplace("mask.decayedge2d",{"width":nx/15})
				if options.ref!=None : rslt["average"]["projection_image"]=options.ref
				if options.storebad : rslt["average"].write_image(options.output,t.options["n"])
				else: rslt["average"].write_image(options.output,-1)

				# Update the resultsmx if requested
				if options.resultmx!=None:
					allinfo=rslt["info"]				# the info result array list of (qual,xform,used) tuples
					pnums=t.data["images"][2]		# list of image numbers corresponding to information
					for n,info in enumerate(allinfo):
						y=pnums[n]		# actual particle number

						# find the matching class in the existing classification matrix
						for x in range(classmx[0]["nx"]):
							if classmx[0][x,y]==rslt["n"] :		# if the class number in the classmx matches the current class-average number
								break
						else :
							print "Resultmx error: no match found ! (%d %d %d)"%(x,y,rslt[1]["n"])
							continue
						xform=info[1].get_params("2d")
						classmx[1][x,y]=info[2]					# used
						classmx[2][x,y]=xform["tx"]				# dx
						classmx[3][x,y]=xform["ty"]				# dy
						classmx[4][x,y]=xform["alpha"]			# da
						classmx[5][x,y]=xform["mirror"]			# flip
						try: classmx[6][x,y]=xform["scale"]
						except: pass

			# Failed average
			elif options.storebad :
				blk=EMData(options.ref,0)
				apix=blk["apix_x"]
				blk=EMData(blk["nx"],blk["ny"],1)
				blk["apix_x"]=apix
				blk.to_zero()
				blk.set_attr("ptcl_repr", 0)
				blk.set_attr("apix_x",apix)
				blk.write_image(options.output,t.options["n"])

	if options.resultmx!=None:
		if options.verbose : print "Writing results matrix"
		for i,j in enumerate(classmx) : j.write_image(options.resultmx,i)

	print "Class averaging complete"
	E2end(logger)
Пример #14
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options] 
	This program aligns a paricle to its symmetry axis. There are two algorithmic modes. 
	A coarse search followed by simplex minimization (not yet implimented) OR monte carlo course 
	search followed by simplex minimization. The Goal is to align the paricle to its 
	symmetry axis so symmetry can be applied for avergaing and for alignment speed up 
	(it is only necessary to search over one asymmetric unit!
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_header(
        name="symsearch3dheader",
        help="""Options below this label are specific to e2symsearch3d""",
        title="### e2symsearch3d options ###",
        row=3,
        col=0,
        rowspan=1,
        colspan=2,
    )

    parser.add_argument(
        "--input",
        dest="input",
        default="",
        type=str,
        help="""The name of input volume or hdf stack of volumes""",
        guitype="filebox",
        browser="EMBrowserWidget(withmodal=True,multiselect=False)",
        row=0,
        col=0,
        rowspan=1,
        colspan=2,
    )

    # parser.add_argument("--output", dest="output", default="""e2symsearch3d_OUTPUT.hdf""", type=str, help="The name of the output volume", guitype='strbox', filecheck=False, row=1, col=0, rowspan=1, colspan=2)

    parser.add_argument(
        "--ref",
        type=str,
        default="",
        help="""Default=None. If provided and --average is also provided and --keep < 1.0 or --keepsig is specified, 'good particles' will be determined by correlation to --ref.""",
    )

    parser.add_argument(
        "--mirror",
        type=str,
        default="",
        help="""Axis across of which to generate a mirrored copy of --ref. All particles will be compared to it in addition to the unmirrored image in --ref if --keepsig is provided or if --keep < 1.0.""",
    )

    parser.add_argument(
        "--path",
        type=str,
        default="",
        help="""Name of path for output file""",
        guitype="strbox",
        row=2,
        col=0,
        rowspan=1,
        colspan=2,
    )

    parser.add_argument(
        "--plots",
        action="store_true",
        default=False,
        help="""Default=False. Turn this option on to generate a plot of the ccc scores if --average is supplied. Running on a cluster or via ssh remotely might not support plotting.""",
    )

    parser.add_argument(
        "--sym",
        dest="sym",
        default="c1",
        help="""Specify symmetry -choices are: c<n>, d<n>, h<n>, tet, oct, icos. For asymmetric reconstruction ommit this option or specify c1.""",
        guitype="symbox",
        row=4,
        col=0,
        rowspan=1,
        colspan=2,
    )

    parser.add_argument(
        "--shrink",
        dest="shrink",
        type=int,
        default=0,
        help="""Optionally shrink the input particles by an integer amount prior to computing similarity scores. For speed purposes. Default=0, no shrinking""",
        guitype="shrinkbox",
        row=5,
        col=0,
        rowspan=1,
        colspan=1,
    )

    parser.add_argument(
        "--mask",
        type=str,
        help="""Mask processor applied to particles before alignment. Default is mask.sharp:outer_radius=-2. IF using --clipali, make sure to express outer mask radii as negative pixels from the edge.""",
        returnNone=True,
        default="mask.sharp:outer_radius=-2",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_processors_list(),'mask')",
        row=11,
        col=0,
        rowspan=1,
        colspan=3,
    )

    parser.add_argument(
        "--maskfile",
        type=str,
        default="",
        help="""Mask file (3D IMAGE) applied to particles before alignment. Must be in HDF format. Default is None.""",
    )

    parser.add_argument(
        "--normproc",
        type=str,
        default="",
        help="""Normalization processor applied to particles before alignment. Default is to use normalize. If normalize.mask is used, results of the mask option will be passed in automatically. If you want to turn this option off specify \'None\'""",
    )

    parser.add_argument(
        "--nopreprocprefft",
        action="store_true",
        default=False,
        help="""Turns off all preprocessing that happens only once before alignment (--normproc, --mask, --maskfile, --clipali, --threshold; i.e., all preprocessing excepting filters --highpass, --lowpass, --preprocess, and --shrink.""",
    )

    parser.add_argument(
        "--threshold",
        default="",
        type=str,
        help="""A threshold applied to the subvolumes after normalization. For example, --threshold=threshold.belowtozero:minval=0 makes all negative pixels equal 0, so that they do not contribute to the correlation score.""",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_processors_list(),'filter')",
        row=10,
        col=0,
        rowspan=1,
        colspan=3,
    )

    parser.add_argument(
        "--preprocess",
        default="",
        type=str,
        help="""Any processor (as in e2proc3d.py) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_processors_list(),'filter')",
        row=10,
        col=0,
        rowspan=1,
        colspan=3,
    )

    parser.add_argument(
        "--lowpass",
        type=str,
        default="",
        help="""A lowpass filtering processor (from e2proc3d.py; see e2help.py processors) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_processors_list(),'filter')",
        row=17,
        col=0,
        rowspan=1,
        colspan=3,
    )

    parser.add_argument(
        "--highpass",
        type=str,
        default="",
        help="""A highpass filtering processor (from e2proc3d.py, see e2help.py processors) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging.""",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_processors_list(),'filter')",
        row=18,
        col=0,
        rowspan=1,
        colspan=3,
    )

    parser.add_argument(
        "--clipali",
        type=int,
        default=0,
        help="""Boxsize to clip particles as part of preprocessing to speed up alignment. For example, the boxsize of the particles might be 100 pixels, but the particles are only 50 pixels in diameter. Aliasing effects are not always as deleterious for all specimens, and sometimes 2x padding isn't necessary; still, there are some benefits from 'oversampling' the data during averaging; so you might still want an average of size 2x, but perhaps particles in a box of 1.5x are sufficiently good for alignment. In this case, you would supply --clipali=75""",
    )

    parser.add_argument(
        "--savepreproc",
        action="store_true",
        default=False,
        help="""Default=False. Will save stacks of preprocessed particles (one for coarse alignment and one for fine alignment if preprocessing options are different).""",
    )

    parser.add_argument(
        "--average",
        action="store_true",
        default=False,
        help="""Default=False. If supplied and a stack is provided through --input, the average of the aligned and/or symmetrized stack will also be saved.""",
    )

    parser.add_argument(
        "--averager",
        type=str,
        default="mean.tomo",
        help="""Default=mean.tomo. The type of averager used to produce the class average. Default=mean.tomo.""",
    )

    parser.add_argument(
        "--keep",
        type=float,
        default=1.0,
        help="""Fraction of particles to include if --average is on, after correlating the particles with the average.""",
    )

    parser.add_argument(
        "--keepsig",
        action="store_true",
        default=False,
        help="""Default=False. Causes theoptions.keep argument to be interpreted in standard deviations.""",
        guitype="boolbox",
        row=6,
        col=1,
        rowspan=1,
        colspan=1,
        mode="alignment,breaksym",
    )

    parser.add_argument(
        "--avgiter",
        type=int,
        default=1,
        help="""Default=1. If --keep is different from 1.0 and --average is on, the initial average will include all the particles, but then the percent specified byoptions.keep will be kept (the rest thrown away) and a new average will be computed. If --avgiter > 1, this new average will be compared again against all the particles. The procedure will be repeated for however many iterations --avgiter is given, or the process will stop automatically if in two consecutive rounds exactly the same particles are kept""",
    )

    parser.add_argument(
        "--subset",
        type=int,
        default=0,
        help="""Number of particles in a subset of particles from the --input stack of particles to run the alignments on.""",
    )

    parser.add_argument(
        "--steps",
        dest="steps",
        type=int,
        default=10,
        help="""Number of steps (for the MC). Default=10.""",
        guitype="intbox",
        row=5,
        col=1,
        rowspan=1,
        colspan=1,
    )

    parser.add_argument(
        "--symmetrize",
        default=False,
        action="store_true",
        help="""Symmetrize volume after alignment.""",
        guitype="boolbox",
        row=6,
        col=0,
        rowspan=1,
        colspan=1,
    )

    parser.add_argument(
        "--cmp",
        type=str,
        help="""The name of a 'cmp' to be used in comparing the symmtrized object to unsymmetrized""",
        default="ccc",
        guitype="comboparambox",
        choicelist="re_filter_list(dump_cmps_list(),'tomo', True)",
        row=7,
        col=0,
        rowspan=1,
        colspan=2,
    )

    parser.add_argument(
        "--parallel",
        "-P",
        type=str,
        help="""Run in parallel, specify type:<option>=<value>:<option>:<value>""",
        default=None,
        guitype="strbox",
        row=8,
        col=0,
        rowspan=1,
        colspan=2,
    )

    parser.add_argument(
        "--ppid", type=int, help="""Set the PID of the parent process, used for cross platform PPID.""", default=-1
    )

    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help="""verbose level [0-9], higner number means higher level ofoptions.verboseness.""",
    )

    parser.add_argument(
        "--nopath",
        action="store_true",
        default=False,
        help="""If supplied, this option will save results in the directory where the command is run. A directory to store the results will not be made.""",
    )

    parser.add_argument(
        "--nolog",
        action="store_true",
        default=False,
        help="""If supplied, this option will prevent logging the command run in .eman2log.txt.""",
    )

    parser.add_argument(
        "--saveali", action="store_true", default=False, help="""Save the stack of aligned/symmetrized particles."""
    )

    parser.add_argument(
        "--savesteps",
        action="store_true",
        default=False,
        help="""If --avgiter > 1, save all intermediate averages and intermediate aligned kept stacks.""",
    )

    parser.add_argument(
        "--notmatchimgs",
        action="store_true",
        default=False,
        help="""Default=True. This option prevents applying filter.match.to to one image so that it matches the other's spectral profile during preprocessing for alignment purposes.""",
    )

    parser.add_argument(
        "--preavgproc1",
        type=str,
        default="",
        help="""Default=None. A processor (see 'e2help.py processors -v 10' at the command line) to be applied to the raw particle after alignment but before averaging (for example, a threshold to exclude extreme values, or a highphass filter if you have phaseplate data.)""",
    )

    parser.add_argument(
        "--preavgproc2",
        type=str,
        default="",
        help="""Default=None. A processor (see 'e2help.py processors -v 10' at the command line) to be applied to the raw particle after alignment but before averaging (for example, a threshold to exclude extreme values, or a highphass filter if you have phaseplate data.)""",
    )

    parser.add_argument(
        "--weighbytiltaxis",
        type=str,
        default="",
        help="""Default=None. A,B, where A is an integer number and B a decimal. A represents the location of the tilt axis in the tomogram in pixels (eg.g, for a 4096x4096xZ tomogram, this value should be 2048), and B is the weight of the particles furthest from the tomogram. For example, --weighbytiltaxis=2048,0.5 means that praticles at the tilt axis (with an x coordinate of 2048) will have a weight of 1.0 during averaging, while the distance in the x coordinates of particles not-on the tilt axis will be used to weigh their contribution to the average, with particles at the edge(0+radius or 4096-radius) weighing 0.5, as specified by the value provided for B.""",
    )

    parser.add_argument(
        "--weighbyscore",
        action="store_true",
        default=False,
        help="""Default=False. This option will weigh the contribution of each subtomogram to the average by score/bestscore.""",
    )

    parser.add_argument(
        "--align",
        type=str,
        default="symalignquat",
        help="""Default=symalignquat. WARNING: The aligner cannot be changed for this program currently. Option ignored.""",
    )

    parser.add_argument(
        "--tweak",
        action="store_true",
        default=False,
        help="""WARNING: Not used for anything yet. This will perform a final alignment with no downsampling [without using --shrink or --shrinkfine] if --shrinkfine > 1.""",
    )

    (options, args) = parser.parse_args()

    if not options.input:
        parser.print_help()
        sys.exit(0)

        # If no failures up until now, initialize logger
    log = 0
    if not options.nolog:
        logid = E2init(sys.argv, options.ppid)
        log = 1

        # inimodeldir = os.path.join(".",options.path)
        # if not os.access(inimodeldir, os.R_OK):
        # 	os.mkdir(options.path)

        # Make directory to save results
    from e2spt_classaverage import (
        sptmakepath,
        preprocessingprefft,
        Preprocprefft3DTask,
        get_results_preproc,
        preprocfilter,
        sptOptionsParser,
    )

    options = sptmakepath(options, "symsearch")

    if options.nopath:
        options.path = "."

    rootpath = os.getcwd()

    if rootpath not in options.path:
        options.path = rootpath + "/" + options.path

    if options.parallel:
        from EMAN2PAR import EMTaskCustomer

    options = sptOptionsParser(options)

    avgr = Averagers.get(options.averager[0], options.averager[1])
    resultsdict = {}
    scores = []

    outputstack = options.path + "/all_ptcls_ali.hdf"

    # Determine number of particles in the stack
    n = EMUtil.get_image_count(options.input)
    if options.subset and options.subset < n:
        n = options.subset

    options.raw = options.input

    if not options.nopreprocprefft:

        if options.mask or options.normproc or options.threshold or options.clipali:

            preprocprefftstack = options.path + "/" + os.path.basename(options.input).replace(".hdf", "_preproc.hdf")

            # save "dummy" images for preproc images
            for i in range(n):
                dimg = EMData(8, 8, 8)
                dimg.to_one()
                dimg.write_image(preprocprefftstack, i)

            originalsavepreproc = options.savepreproc

            options.savepreproc = True

            print "\n(e2spt_hac.py) (allvsall) Initializing parallelism for preprocessing"
            if options.parallel:  # Initialize parallelism if being used
                # from EMAN2PAR import EMTaskCustomer
                etc = EMTaskCustomer(options.parallel)
                pclist = [options.input]
                etc.precache(pclist)

            tasks = []
            results = []

            # preprocprefftstack = options.path + '/' + options.input.replace('.hdf','_preproc.hdf')

            for i in range(n):

                img = EMData(options.input, i)

                if options.parallel:
                    task = Preprocprefft3DTask(["cache", options.input, i], options, i, preprocprefftstack)
                    tasks.append(task)

                else:
                    pimg = preprocessingprefft(img, options)
                    pimg.write_image(preprocprefftstack, i)

            print "\nthere are these many tasks to send", len(tasks)
            if options.parallel and tasks:
                tids = etc.send_tasks(tasks)
                print "therefore these many tids", len(tids)

                if options.verbose:
                    print "%d preprocessing tasks queued" % (len(tids))

            results = get_results_preproc(etc, tids, options.verbose)

            print "results are", results

            options.input = preprocprefftstack

            options.savepreproc = originalsavepreproc

    for i in range(n):

        print "\nI'll look for symmetry in particle number", i
        # Load particle and make a copy to modify if preprocessing options are specified
        volume = EMData(options.input, i)
        preprocvol = volume.copy()

        # Preprocess volume if any preprocessing options are specified

        preprocprefftstack = options.path + "/" + os.path.basename(options.input).replace(".hdf", "_preproc.hdf")

        if (
            (options.shrink and options.shrink > 1)
            or options.lowpass
            or options.highpass
            or options.normproc
            or options.preprocess
            or options.threshold
            or options.clipali
        ):
            print "\nHowever, I will first preprocess particle number", i

            print "\nWill call preprocessing on ptcl", i
            preprocvol = preprocfilter(preprocvol, options, i)

            if options.savepreproc:
                preprocvol.write_image(preprocprefftstack, i)
                # preprocessing(s2image,options, ptclindx, savetagp ,'no',round)

            print "\nDone preprocessing on ptcl", i

        if options.parallel:
            etc = EMTaskCustomer(options.parallel)
        else:
            etc = EMTaskCustomer("thread:1")

        symalgorithm = SymALignStrategy(preprocvol, options.sym, options.steps, options.cmp, etc)
        ret = symalgorithm.execute()
        symxform = ret[0]
        score = ret[1]
        scores.append(score)

        resultsdict.update({score: [symxform, i]})

        print "\nWriting output for best alignment found for particle number", i

        if options.shrink and options.shrink > 1:
            trans = symxform.get_trans()
            symxform.set_trans(trans[0] * options.shrink, trans[1] * options.shrink, trans[2] * options.shrink)

        print "\nWrittng to output ptcl", i

        # Rotate volume to the best orientation found, set the orientation in the header, apply symmetry if specified and write out the aligned (and symmetrized) particle to the output stack
        output = volume.process("xform", {"transform": symxform})
        output.set_attr("symxform", symxform)
        print "\nApplying this transform to particle", symxform
        if options.symmetrize:
            output = output.process("xform.applysym", {"sym": options.sym})

        output["spt_score"] = score
        output.write_image(outputstack, -1)

        # Averaging here only makes sense if all particles are going to be kept. Otherwise, different code is needed (below)
        if options.average:
            avgr.add_image(output)

            # Finalize average of all particles if non were set to be excluded. Otherwise, determine the discrimination threshold and then average the particles that pass it.
    if options.average:

        final_avg = avgr.finish()

        final_avg["origin_x"] = 0
        final_avg["origin_y"] = 0  # The origin needs to be reset to ZERO to avoid display issues in Chimera
        final_avg["origin_z"] = 0
        final_avg["xform.align3d"] = Transform()

        if options.keep == 1.0 and not options.keepsig:
            final_avg.write_image(options.path + "/final_avg.hdf", 0)

            if options.avgiter > 1:
                print """WARNING: --avgiter > 1 must be accompanied by --keepsig, or by --keep < 1.0"""

        elif options.keep < 1.0 or options.keepsig:

            if options.ref:
                ref = EMData(options.ref, 0)
                refComp(options, outputstack, ref, resultsdict, "")

                if options.mirror:
                    ref.process_inplace("xform.mirror", {"axis": options.mirror})
                    refComp(options, outputstack, ref, results, "_vs_mirror")
            else:
                ref2compare = final_avg
                refComp(options, outputstack, final_avg, resultsdict, "")

        del final_avg

    if log:
        E2end(logid)

    return
Пример #15
0
def main():

    usage = """e2tomopreproc.py <imgs> <options> . 
	This program takes a tiltseries ('.st' or '.ali' file from IMOD) and applies preprocessing operations to them, such as lowpass, highpass, masking, etc.
	The options should be supplied in "--option=value" format, replacing "option" for a valid option name, and "value" for an acceptable value for that option. 
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_argument("--path",
                        type=str,
                        default='',
                        help="""Directory to store results in. 
		The default is a numbered series of directories containing the prefix 'tomopreproc';
		for example, tomopreproc_02 will be the directory by default if 'tomopreproc_01' 
		already exists.""")

    parser.add_pos_argument(name="stack_files",
                            default="",
                            help="Stacks or images to process.")

    parser.add_argument(
        "--input",
        type=str,
        default='',
        help=
        """"tiltseries to process. redundant with --tiltseries, or with providing images as arguments (separated by a space: e2tomopreproc.py stack1.hdf stack2.hdf), but --input takes precedence."""
    )

    parser.add_argument(
        "--tiltseries",
        type=str,
        default='',
        help=""""tiltseries to process. redundant with --input""")

    parser.add_argument(
        "--tltfile",
        type=str,
        default='',
        help="""".tlt file containing the tilt angles for --tiltseries""")

    parser.add_argument(
        "--outmode",
        type=str,
        default='',
        help=
        """All EMAN2 programs write images with 4-byte floating point values when possible by default. This allows specifying an alternate format when supported: float, int8, int16, int32, uint8, uint16, uint32. Values are rescaled to fill MIN-MAX range."""
    )

    parser.add_argument(
        "--dontcleanup",
        action='store_true',
        default=False,
        help="""If specified, intermediate files will be kept.""")

    parser.add_argument(
        "--clip",
        type=str,
        default='',
        help=
        """Default=None. This resizes the 2-D images in the tilt series. If one number is provided, then x and y dimensions will be made the same. To specify both dimensions, supply two numbers, --clip=x,y. Clipping will be about the center of the image."""
    )

    #parser.add_argument("--apix",type=float,default=0.0,help="""True apix of images to be written on final stack.""")

    parser.add_argument(
        "--shrink",
        type=float,
        default=0.0,
        help=
        """Default=0.0 (no shrinking). Can use decimal numbers, larger than 1.0. Optionally shrink the images by this factor. Uses processor math.fft.resample."""
    )

    parser.add_argument(
        "--threshold",
        type=str,
        default='',
        help="""Default=None. A threshold processor applied to each image.""")

    parser.add_argument(
        "--erasegold",
        action='store_true',
        default='',
        help="""Default=False. Runs erase_gold.py on the stack.""")

    parser.add_argument(
        "--mask",
        type=str,
        default='',
        help="""Default=None. Masking processor applied to each image.""")

    parser.add_argument(
        "--maskbyangle",
        action='store_true',
        default=False,
        help=
        """Default=False. Requires --tltfile. This will mask out from tilted images the info that isn't present at the 0 tilt angle. It uses the tomo.tiltedgemask processor (type 'e2help.py processors' at the commandline to read a description of the processor and its parameters). Provide --maskbyanglefalloff and --maskbyanglesigma to modify the default parameters."""
    )

    parser.add_argument(
        "--maskbyanglefalloff",
        type=int,
        default=4,
        help=
        """Default=4. Number of pixels over which --maskbyangle will fall off to zero."""
    )

    parser.add_argument(
        "--maskbyanglesigma",
        type=float,
        default=2.0,
        help=
        """Default=2.0. Number of sigmas for the width of the gaussian fall off in --maskbyangle and --maskbyanglefalloff"""
    )

    parser.add_argument(
        "--normproc",
        type=str,
        default='',
        help=
        """Default=None (see 'e2help.py processors -v 10' at the command line). Normalization processor applied to each image."""
    )

    parser.add_argument(
        "--normalizeimod",
        action='store_true',
        default=False,
        help=
        """Default=False. This will apply 'newstack -float 2' to the input stack. Requires IMOD."""
    )

    parser.add_argument(
        "--preprocess",
        type=str,
        default='',
        help=
        """Any processor (see 'e2help.py processors -v 10' at the command line) to be applied to each image."""
    )

    parser.add_argument(
        "--lowpassfrac",
        type=float,
        default=0.0,
        help=
        """Default=0.0 (not used). Fraction of Nyquist to lowpass at. The processor used is filter.lowpass.tanh"""
    )

    parser.add_argument(
        "--highpasspix",
        type=int,
        default=0,
        help=
        """Default=0 (not used). Number of Fourier pixels to apply highpass filter at. The processor used is filter.highpass.gauss."""
    )

    parser.add_argument(
        "--parallel",
        type=str,
        default="thread:1",
        help=
        """default=thread:1. Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel"""
    )

    parser.add_argument(
        "--prenadminite",
        type=int,
        default=0,
        help=
        """Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --minite parameter in IMOD's preNAD program (minimum number of iterations)."""
    )

    parser.add_argument(
        "--prenadmaxite",
        type=int,
        default=0,
        help=
        """Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --maxite parameter in IMOD's preNAD program (maximum number of iterations)."""
    )

    parser.add_argument(
        "--prenadsigma",
        type=int,
        default=0,
        help=
        """Default=0. Requires IMOD to be installed. Used to apply prenad filtering to a tiltseries. This is the --sigma parameter in IMOD's preNAD program (initial sigma for 'smoothing structure tensor')."""
    )

    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higner number means higher level of verboseness."
    )

    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)

    (options, args) = parser.parse_args()

    logger = E2init(sys.argv, options.ppid)
    print "\n(e2tomopreproc)(main) started log"

    from e2spt_classaverage import sptmakepath

    options = sptmakepath(options, 'tomopreproc')

    #print "args are",args

    infiles = []
    if not options.input:
        #try:
        #	infiles.append( sys.argv[1] )
        #except:
        if options.tiltseries:
            infiles.append(options.tiltseries)
        else:
            if args:
                print "copying args to infiles"
                infiles = list(args)
                print "infiles are", infiles
            else:
                print "\n(e2tomopreproc)(main) ERROR: must provide input files as arguments or via the --input or --tiltseries parameters."

    elif options.input:
        infiles.append(options.input)

    if infiles:
        print "\n(e2tomopreproc)(main) identified --input", options.input
        #print " .ali in options.input[:-4]", '.ali' in options.input[-4:]
        #print "options.input[-4] is", options.input[-4:]

        for infile in infiles:
            if '.ali' in infile[-4:] or '.st' in infile[
                    -3:] or '.mrc' in infile[-4:] or '.mrcs' in infile[
                        -5:] or '.hdf' in infile[-4:]:
                pass
            else:
                print "\n(e2tomopreproc)(main) ERROR: invalid image extension %s for image %s. Extension must be .st, .ali, .hdf, .mrc or .mrcs" % (
                    options.input.split('.')[-1], infile)
                sys.exit(1)
    else:
        print "\n(e2tomopreproc)(main) ERROR: no images found/provided"
        sys.exit(1)

    originalextension = infiles[0].split('.')[-1]

    angles = {}
    if options.maskbyangle or (options.prenadminite and options.prenadmaxite
                               and options.prenadsigma):

        if not options.tltfile:
            print "\n(e2tomopreproc)(main) ERROR: --maskbyangle and --prenad parameters require --tltfile"
            sys.exit(1)

        else:
            f = open(options.tltfile, 'r')
            lines = f.readlines()
            print "\nnumber of lines read from --tltfile", len(lines)
            f.close()
            #print "lines in tlt file are", lines
            k = 0
            for line in lines:
                line = line.replace('\t', '').replace('\n', '')

                if line:
                    angle = float(line)
                    angles.update({k: angle})
                    if options.verbose:
                        print "appending angle", angle
                    k += 1
            if len(angles) < 2:
                print "\nERROR: something went terribly wrong with parsing the --tltlfile. This program does not work on single images"
                sys.exit()

        if len(angles) < 2:
            print "\nERROR: (second angle check) something went terribly wrong with parsing the --tltlfile. This program does not work on single images"
            sys.exit()

    print "\n(e2spt_preproc)(main) - INITIALIZING PARALLELISM!\n"

    from EMAN2PAR import EMTaskCustomer
    etc = EMTaskCustomer(options.parallel)
    pclist = [options.input]

    etc.precache(pclist)
    print "\n(e2spt_preproc)(main) - precaching --input"

    tasks = []
    results = []

    mrcstacks = []
    print "there are these many infiles to loop over", len(infiles)

    if options.lowpassfrac:
        hdr = EMData(infiles[0], 0, True)
        apix = hdr['apix_x']
        print "\n(e2spt_preproc)(main) apix is", apix
        nyquist = 2.0 * apix
        print "\n(e2spt_preproc)(main) therefore nyquist resolution is", nyquist
        print
        lowpassres = nyquist / options.lowpassfrac

        options.lowpassfrac = 1.0 / (lowpassres)
        if float(options.shrink) > 1.0:
            options.lowpassfrac /= float(options.shrink)

            print "there's shrinking", options.shrink
            lowpassres = nyquist / options.lowpassfrac

        print "\n(e2spt_preproc)(main) and final lowpass frequency is", options.lowpassfrac

        print "corresponding to lowpassres of", lowpassres

    for infile in infiles:

        mrcstack = options.path + '/' + infile
        print "infile is", infile
        print "infile[-5:] is ", infile[-5:]
        if '.hdf' in infile[-5:]:
            print "replacing .hdf extension"
            mrcstack = options.path + '/' + infile.replace('.hdf', '.mrc')

        if '.mrcs' in infile[-5:]:
            print "replacing .mrcs extension"
            mrcstack = options.path + '/' + infile.replace('.mrcs', '.mrc')

        if '.st' in infile[-5:]:
            print "replacing .st extension"
            mrcstack = options.path + '/' + infile.replace('.st', '.mrc')

        if '.ali' in infile[-5:]:
            print "replacing .ali extension"
            mrcstack = options.path + '/' + infile.replace('.ali', '.mrc')

        if '.tif' in infile[-5:]:
            print "replacing .ali extension"
            mrcstack = options.path + '/' + infile.replace('.tif', '.mrc')

        #go = 0
        #if go:
        print "mrcstack is", mrcstack

        #outname = outname.replace('.mrc','.mrcs')

        mrcstacks.append(mrcstack)

        go = 0
        if options.maskbyangle:
            outname = mrcstack.replace('.mrc', '_UNSTACKED.mrc')
            print "therefore, outname is", outname

            cmd = 'e2proc2d.py ' + infile + ' ' + outname + ' --unstacking --threed2twod'

            #from shutil import copyfile
            #copyfile(options.input, outname)
            #print "copied input to", outname

            if options.outmode:
                cmd += ' --outmode=' + options.outmode

            if options.verbose:
                cmd += ' --verbose=' + str(options.verbose)
                print "\ncommand to unstack original input tiltseries is", cmd

            print "\n(e2tomopreproc)(main) unstacking command is", cmd

            p = subprocess.Popen(cmd,
                                 shell=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            #p = subprocess.Popen( cmd , shell=True, stdout=subprocess.PIPE)

            text = p.communicate()
            #p.stdout.close()

            p.wait()

            if p.returncode == 0:
                go = 1
        else:
            go = 1

        if go:

            imgs = []
            if options.maskbyangle:
                c = os.getcwd() + '/' + options.path
                findir = os.listdir(os.getcwd() + '/' + options.path)

                print "\n(e2tomopreproc)(main) directory to look for images is", c
                for f in findir:
                    #if '.mrcs' in f:
                    if "_UNSTACKED" in f:
                        imgs.append(options.path + '/' + f)

                kk = 0
                imgs.sort()
                print "\n(e2spt_preproc)(main) found these many images", len(
                    imgs)

                for img in imgs:
                    #task=None

                    #if options.maskbyangle:
                    outimage = img.replace('.mrc', '_preproc.mrc')
                    task = TomoPreproc2DTask(img, options, angles[kk],
                                             outimage)
                    tasks.append(task)
                    kk += 1
            else:
                outimage = options.path + '/' + infile.replace(
                    '.mrc', '_preproc.mrcs')
                task = TomoPreproc2DTask(infile, options, 0, outimage)
                tasks.append(task)

            #else:
            #	newmrcs = mrcstack.replace('.mrc','.mrcs')
            #	print "copying file %s to %s" %(infile,newmrcs)
            #	copyfile( infile, newmrcs  )
            #	imgs.append( newmrcs )

            #print "and the final lowpass frequency will be", options.lowpassfrac

    tids = etc.send_tasks(tasks)
    if options.verbose:
        print "\n(e2spt_preproc)(main) preprocessing %d tasks queued" % (
            len(tids))

    results = get_results(etc, tids, options)

    print "\n(e2tomopreproc)(main) these many images have been processsed", len(
        results)

    imgspreproc = []
    findir = os.listdir(os.getcwd() + '/' + options.path)

    #for mrcstack in mrcstacks:

    for f in findir:
        if "_preproc.mrc" in f:
            print "found preprocessed image", f
            imgspreproc.append(options.path + '/' + f)
        else:
            print "this file is NOT a preprocessed image", f

    imgspreproc.sort()

    print "\n(e2tomopreproc)(main) these many preprocessed images loaded", len(
        imgspreproc)

    finalfiles = []

    if options.maskbyangle:

        outfile = mrcstack.replace('.mrc', '.mrcs')
        print "for RESTACKING"
        print "\n\n\noutfile is", outfile

        for f in imgspreproc:
            print "appending image %s to outfile %s" % (f, outfile)
            cmd = 'e2proc2d.py ' + f + ' ' + outfile
            if options.outmode:
                cmd += ' --outmode=' + options.outmode

            if options.verbose:
                cmd += ' --verbose ' + str(options.verbose)

            print "\ncmd is with .mrcs outputformat is", cmd
            print "becauase outfile is", outfile
            p = subprocess.Popen(cmd,
                                 shell=True,
                                 stdout=subprocess.PIPE,
                                 stderr=subprocess.PIPE)
            text = p.communicate()
            p.stdout.close()

        finaloutput = outfile.replace('.mrcs', '.' + originalextension)
        os.rename(outfile, finaloutput)

        finalfiles.append(finaloutput)
    else:
        finalfiles = list(imgspreproc)

    for finalf in finalfiles:
        if not options.tltfile:
            break

        if options.normalizeimod:
            try:
                cmd = 'newstack ' + finalf + ' ' + finalf + ' --float 2'
                print "normalizeimod cmd is", cmd
                p = subprocess.Popen(cmd,
                                     shell=True,
                                     stdout=subprocess.PIPE,
                                     stderr=subprocess.PIPE)
                text = p.communicate()
                p.wait()
            except:
                print "\nERROR: --normalizeimod skipped. Doesn't seem like IMOD is installed on this machine"

        if not options.dontcleanup and options.maskbyangle:
            purge(options.path, '_preproc.mrc')
            purge(options.path, '_UNSTACKED')
            purge(options.path, '~')

        if options.tltfile:
            if options.prenadminite or options.prenadmaxite or options.prenadsigma:

                if options.prenadminite and options.prenadmaxite and options.prenadsigma:
                    cmd = 'preNAD -input ' + finalf + ' -output ' + finalf.replace(
                        '.' + originalextension,
                        '_prenad.' + originalextension) + ' -minite ' + str(
                            options.prenadminite) + ' -maxite ' + str(
                                options.prenadmaxite) + ' -sigma ' + str(
                                    options.prenadsigma
                                ) + ' -angles ' + options.tltfile
                    if options.verbose:
                        print "\n(e2tomopreproc)(main) prenad cmd to run is", cmd
                    try:
                        p = subprocess.Popen(cmd,
                                             shell=True,
                                             stdout=subprocess.PIPE,
                                             stderr=subprocess.PIPE)
                        text = p.communicate()
                        p.wait()
                    except:
                        print "\nERROR: check that a version of IMOD containing the preNAD program is correctly installed on this machine"

                else:
                    if options.prenadminite:
                        if not options.prenadmaxite:
                            print "\nERROR: --prenadmaxite required with --prenadminite"
                        if not options.prenadsigma:
                            print "\nERROR: --prenadsigma required with --prenadminite"

                    if options.prenadmaxite:
                        if not options.prenadminite:
                            print "\nERROR: --prenadminite required with --prenadmaxite"
                        if not options.prenadsigma:
                            print "\nERROR: --prenadsigma required with --prenadmaxite"

                    if options.prenadsigma:
                        if not options.prenadminite:
                            print "\nERROR: --prenadminite required with --prenadsigma"
                        if not options.prenadmaxite:
                            print "\nERROR: --prenadmaxite required with --prenadsigma"

    E2end(logger)
    return ()
Пример #16
0
def main():
    """Program to validate a reconstruction by the Richard Henderson tilt validation method. A volume to validate, a small stack (~100 imgs) of untilted and ~10-15 degree
	tilted particles must be presented. The untilted and tilted particle stack must have a one-to-one relationship. In the contour plot, the Tiltaxis is along positive 'Y'
	The tiltaxis angle can be determined from e2RCTboxer.py uisng PairPicker mode. For example, if the tiltaxis is 45 degrees and the tilt angle is -15 degrees, there should
	be a peak in the -X, -Y quadrant at 225 degrees at a magnitude of 15.
	For more details see:
	Optiomal Determination of Particle Orientation, Absolute Hand, and COntrast Loss in Single-particle Electron Cryomicroscopy. Rosenthal, P.B., and Henderson, R. JMB, 333 (2003) pg 721-745
	"""
    progname = os.path.basename(sys.argv[0])
    usage = """prog [options]
	Tiltvalidation using Richard Henderson's technique. To use a stack of untilted and tiltimages whose set relationship is one-to-one is required along with a
	volume to validate. This can be generated using e2RCTboxer.py. After running this program two bits of data are products. A contour plot similar to Figure 5 in the Henderson paper(see below), and a list of
	titlangles and tiltaxes between particle paris, which can be used to makes plot similar to Figure 6 in Hendersons paper. The contour plot is stored as contour.hdf and the tiltpairs data is
	stored as bdb:perparticletilts.
	For more information see:
	Optimal determination of particle orientation, absolute hand, and contrast loss in 
	single-particle electron cryomicroscopy.
	Rosenthal PB, Henderson R.
	J Mol Biol. 2003 Oct 31;333(4):721-45 
	"""
    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    # options associated with e2tiltvalidate.py
    parser.add_header(
        name="tvheader",
        help='Options below this label are specific to e2tiltvalidate',
        title="### e2tiltvalidate options ###",
        row=3,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis,gui")

    # "analysys" mode options
    parser.add_argument(
        "--untiltdata",
        type=str,
        help="Stack of untilted images",
        default=None,
        guitype='filebox',
        browser='EMSetsTable(withmodal=True,multiselect=False)',
        row=0,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--tiltdata",
        type=str,
        help="Stack of tilted images",
        default=None,
        guitype='filebox',
        browser='EMSetsTable(withmodal=True,multiselect=False)',
        row=1,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--volume",
        type=str,
        help="3D volume to validate",
        default=None,
        guitype='filebox',
        browser='EMModelsTable(withmodal=True,multiselect=False)',
        row=2,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--maxtiltangle",
        type=float,
        help="Maximum tiltangle permitted when finding tilt distances",
        default=180.0,
        guitype='floatbox',
        row=4,
        col=0,
        rowspan=1,
        colspan=1,
        mode="analysis")
    parser.add_argument("--quaternion",
                        action="store_true",
                        help="Use Quaterions for tilt distance computation",
                        default=False,
                        guitype='boolbox',
                        row=4,
                        col=1,
                        rowspan=1,
                        colspan=1,
                        mode='analysis')
    parser.add_argument("--sym",
                        type=str,
                        help="The recon symmetry",
                        default="c1",
                        guitype='symbox',
                        row=5,
                        col=0,
                        rowspan=1,
                        colspan=1,
                        mode="analysis")
    parser.add_argument("--docontourplot",
                        action="store_true",
                        help="Compute a contour plot",
                        default=False,
                        guitype='boolbox',
                        row=6,
                        col=0,
                        rowspan=1,
                        colspan=1,
                        expert=True,
                        mode="analysis")
    parser.add_argument("--tiltrange",
                        type=int,
                        help="The angular tiltrange to search",
                        default=15,
                        guitype='intbox',
                        row=6,
                        col=1,
                        rowspan=1,
                        colspan=1,
                        expert=True,
                        mode="analysis")
    parser.add_argument(
        "--align",
        type=str,
        help="The name of a aligner to be used in comparing the aligned images",
        default="translational",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_aligners_list(),\'refine|3d\', 1)',
        expert=True,
        row=7,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--cmp",
        type=str,
        help="The name of a 'cmp' to be used in comparing the aligned images",
        default="ccc",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)',
        expert=True,
        row=8,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_header(
        name="projheader",
        help='Options below this label are specific to e2project',
        title="### e2project options ###",
        row=10,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument("--delta",
                        type=float,
                        help="The angular step size for alingment",
                        default=5.0,
                        guitype='floatbox',
                        row=11,
                        col=0,
                        rowspan=1,
                        colspan=1,
                        mode="analysis")
    # options associated with e2simmx.py
    parser.add_header(name="simmxheader",
                      help='Options below this label are specific to e2simmx',
                      title="### e2simmx options ###",
                      row=12,
                      col=0,
                      rowspan=1,
                      colspan=2,
                      mode="analysis")
    parser.add_argument(
        "--shrink",
        dest="shrink",
        type=int,
        default=0,
        help=
        "Optionally shrink the input particles by an integer amount prior to computing similarity scores. For speed purposes. Defulat = 0, no shrinking",
        guitype='shrinkbox',
        row=13,
        col=0,
        rowspan=1,
        colspan=1,
        mode="analysis")
    parser.add_argument(
        "--simcmp",
        type=str,
        help=
        "The name of a 'cmp' to be used in comparing the aligned images (default=ccc)",
        default="ccc",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)',
        row=14,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    # options associated with e2projector3d.py
    parser.add_argument(
        "--simalign",
        type=str,
        help=
        "The name of an 'aligner' to use prior to comparing the images (default=rotate_translate)",
        default="rotate_translate",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_aligners_list(),\'refine|3d\', 1)',
        row=15,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--simaligncmp",
        type=str,
        help=
        "Name of the aligner along with its construction arguments (default=ccc)",
        default="ccc",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)',
        row=16,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--simralign",
        type=str,
        help=
        "The name and parameters of the second stage aligner which refines the results of the first alignment",
        default=None,
        guitype='comboparambox',
        choicelist='re_filter_list(dump_aligners_list(),\'refine\', 0)',
        row=17,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument(
        "--simraligncmp",
        type=str,
        help=
        "The name and parameters of the comparitor used by the second stage aligner. (default=dot).",
        default="dot",
        guitype='comboparambox',
        choicelist='re_filter_list(dump_cmps_list(),\'tomo\', True)',
        row=18,
        col=0,
        rowspan=1,
        colspan=2,
        mode="analysis")
    parser.add_argument("--parallel",
                        type=str,
                        help="Parallelism string",
                        default=None,
                        guitype='strbox',
                        row=9,
                        col=0,
                        rowspan=1,
                        colspan=2,
                        mode="analysis")
    parser.add_argument(
        "--verbose",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higher number means higher level of verboseness",
        guitype='intbox',
        row=19,
        col=0,
        rowspan=1,
        colspan=1,
        mode="analysis")
    # "gui" mode options
    parser.add_argument("--path",
                        type=str,
                        help="The folder the results are placed",
                        default="",
                        guitype='dirbox',
                        dirbasename='TiltValidate',
                        row=0,
                        col=0,
                        rowspan=1,
                        colspan=2,
                        mode="gui")
    parser.add_argument(
        "--radcut",
        type=float,
        default=-1,
        help=
        "For use in the GUI, truncate the polar plot after R. -1 = no truncation",
        guitype='floatbox',
        row=4,
        col=0,
        rowspan=1,
        colspan=1,
        mode="gui")
    parser.add_argument(
        "--gui",
        action="store_true",
        help="Start the GUI for viewing the tiltvalidate plots",
        default=False,
        guitype='boolbox',
        row=4,
        col=1,
        rowspan=1,
        colspan=1,
        mode="gui[True]")
    parser.add_argument(
        "--planethres",
        type=float,
        help=
        "Maximum out of plane threshold for the tiltaxis. 0 = perfectly in plane, 1 = normal to plane",
        default=360.0,
        guitype='floatbox',
        row=5,
        col=0,
        rowspan=1,
        mode="gui")
    parser.add_argument(
        "--datalabelscolor",
        type=str,
        help=
        "Set the color of the data labels. Any vaild matplotlib color is ok",
        default='#00ff00',
        guitype='strbox',
        row=6,
        col=0,
        rowspan=1,
        colspan=1,
        mode="gui")
    parser.add_argument("--datalabels",
                        action="store_true",
                        help="Add data labels to the plot",
                        default=False,
                        guitype='boolbox',
                        row=6,
                        col=1,
                        rowspan=1,
                        mode="gui")
    parser.add_argument("--colorzaxis",
                        action="store_true",
                        help="Color scatter dots by Z axis",
                        default=False,
                        guitype='boolbox',
                        row=7,
                        col=0,
                        rowspan=1,
                        mode="gui")
    #other options
    parser.add_argument(
        "--eulerfile",
        type=str,
        help=
        "Euler angles file, to create tiltdistance from pre-aligned particles. Format is: imgnum, name, az, alt, phi",
        default=None)
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)
    (options, args) = parser.parse_args()

    # Run the GUI if in GUI mode
    #print options
    if options.gui:
        display_validation_plots(options.path,
                                 options.radcut,
                                 options.planethres,
                                 plotdatalabels=options.datalabels,
                                 color=options.datalabelscolor,
                                 plotzaxiscolor=options.colorzaxis)
        exit(0)

    if not (options.volume or options.eulerfile):
        print("Error a volume to validate must be presented")
        exit(1)

    if not (options.tiltdata or options.eulerfile):
        print("Error a stack of tilted images must be presented")
        exit(1)

    if not (options.untiltdata or options.eulerfile):
        print("Error a stack of untiled images must be presented")
        exit(1)

    logid = E2init(sys.argv, options.ppid)

    options.cmp = parsemodopt(options.cmp)
    options.align = parsemodopt(options.align)

    # Make a new dir for each run
    if not options.path:
        #options.path=numbered_path("TiltValidate",True)
        # Create the run directory structure if it does not exist
        i = 1
        found = 1
        while found == 1:
            if i < 10:
                run_dir = '0' + str(i)
            else:
                run_dir = str(i)
            found = os.path.exists("TiltValidate_" + run_dir)
            i = i + 1
        os.mkdir("TiltValidate_" + run_dir)
        options.path = "TiltValidate_" + run_dir

    #Make tilt distance generator
    tiltgenerator = ComputeTilts(options)

    # Compute tilt distances from file if desired.
    if options.eulerfile:
        # Format is:
        # untilt_imgnum name az alt phi
        # tilt_imgnum name az alt phi
        eulerfile = open(options.eulerfile, "r")
        eulers = eulerfile.readlines()
        eulerfile.close()
        untilteulerlist = []
        tilteulerlist = []
        for i, euler in enumerate(eulers):
            fields = euler.split()
            if i % 2:
                tilteulerlist.append({
                    'alt': float(fields[2]),
                    'az': float(fields[3]),
                    'phi': float(fields[4])
                })
            else:
                untilteulerlist.append({
                    'alt': float(fields[2]),
                    'az': float(fields[3]),
                    'phi': float(fields[4])
                })
        tiltgenerator.findtilts_fromeulers(untilteulerlist, tilteulerlist)
        exit(0)

    # Initialize parallelism if being used
    if options.parallel:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer(options.parallel,
                             "e2tiltvalidate.CompareToTiltTask")
    else:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer("thread:1", "e2tiltvalidate.CompareToTiltTask")
        #etc.precache(pclist)

    # Otherwise compute tilt distances from data
    #Read in the images
    tiltimgs = EMData.read_images(options.tiltdata)
    untiltimgs = EMData.read_images(options.untiltdata)
    if len(tiltimgs) != len(untiltimgs):
        print(
            "The untilted image stack is not the same length as the tilted stack!!!"
        )
        exit(1)

    # write projection command to DB. If we rerun this program no need to reproject if it was done using same pars before
    cdb = js_open_dict('info/cmdcache.json')
    projparmas = "%s%f%s" % (options.volume, options.delta, options.sym)
    #	try:
    #		if (cdb.has_key('projparmas') and  cdb['projparmas'] == projparmas): raise IOError("Projection file does not exist")
    #		run("e2proc2d.py bdb:%s#projections_00 bdb:%s#projections_00"%(cdb['previouspath'], options.path))
    #	except:
    # Do projections
    e2projectcmd = "e2project3d.py %s --orientgen=eman:delta=%f:inc_mirror=1:perturb=0 --outfile=%s/projections_00.hdf --projector=standard --sym=%s" % (
        options.volume, options.delta, options.path, options.sym
    )  # Seems to work better when I check all possibilites
    if options.parallel: e2projectcmd += " --parallel=%s" % options.parallel
    run(e2projectcmd)
    cdb['projparmas'] = projparmas
    cdb['previouspath'] = options.path
    cdb.close()

    # Make simmx
    e2simmxcmd = "e2simmx.py %s/projections_00.hdf %s %s/simmx.hdf -f --saveali --cmp=%s --align=%s --aligncmp=%s --verbose=%d" % (
        options.path, options.untiltdata, options.path, options.simcmp,
        options.simalign, options.simaligncmp, options.verbose)
    if options.simralign:
        e2simmxcmd += " --ralign=%s --raligncmp=%s" % (options.simralign,
                                                       options.simraligncmp)
    if options.parallel: e2simmxcmd += " --parallel=%s" % options.parallel
    if options.shrink: e2simmxcmd += " --shrink=%d" % options.shrink
    run(e2simmxcmd)

    e2simmxcmd = "e2simmx.py %s/projections_00.hdf %s %s/simmx_tilt.hdf -f --saveali --cmp=%s --align=%s --aligncmp=%s --verbose=%d" % (
        options.path, options.tiltdata, options.path, options.simcmp,
        options.simalign, options.simaligncmp, options.verbose)
    if options.simralign:
        e2simmxcmd += " --ralign=%s --raligncmp=%s" % (options.simralign,
                                                       options.simraligncmp)
    if options.parallel: e2simmxcmd += " --parallel=%s" % options.parallel
    if options.shrink: e2simmxcmd += " --shrink=%d" % options.shrink
    run(e2simmxcmd)

    # Read in the data
    simmx = EMData.read_images("%s/simmx.hdf" % options.path)
    simmx_tilt = EMData.read_images("%s/simmx_tilt.hdf" % options.path)
    projections = EMData.read_images("%s/projections_00.hdf" % options.path)
    volume = EMData()
    volume.read_image(
        options.volume)  # I don't know why I cant EMData.read_image.......

    # Generate tilts from data
    tiltgenerator.findtilts_fromdata(simmx, simmx_tilt, projections, volume,
                                     untiltimgs, tiltimgs)

    if options.docontourplot:
        # Make contour plot to validate each particle
        tasks = []
        distplot = EMData(options.tiltrange * 2 + 1, options.tiltrange * 2 + 1)
        distplot.to_zero()
        for imgnum in range(simmx[0].get_ysize()):
            bestscore = float('inf')
            bestrefnum = 0
            for refnum in range(simmx[0].get_xsize()):
                if simmx[0].get_value_at(refnum, imgnum) < bestscore:
                    bestscore = simmx[0].get_value_at(refnum, imgnum)
                    bestrefnum = refnum
            # Get the euler angle for this particle and call compare to tilt"bdb:%s#
            euler_xform = projections[bestrefnum].get_attr('xform.projection')
            tasks.append(
                CompareToTiltTask(volume, tiltimgs[imgnum], imgnum,
                                  euler_xform,
                                  simmx[3].get_value_at(bestrefnum, imgnum),
                                  distplot, options.tiltrange, 1, options))

        # Farm out the work and hang till finished!
        tids = etc.send_tasks(tasks)
        while 1:
            time.sleep(5)
            proglist = etc.check_task(tids)
            tids = [j for i, j in enumerate(tids) if proglist[i] != 100
                    ]  # remove any completed tasks from the list we ask about
            if len(tids) == 0: break

        # Make scoremx avg
        scoremxs = EMData.read_images("%s/scorematrix.hdf" % options.path)
        avgmxavger = Averagers.get('mean')
        for mx in scoremxs:
            avgmxavger.add_image(mx)
        avgmx = avgmxavger.finish()
        avgmx.write_image("%s/contour.hdf" % options.path)
        distplot.write_image("%s/distplot.hdf" % options.path)

    E2end(logid)
Пример #17
0
def main():

    parser = EMArgumentParser(usage=get_usage())

    parser.add_argument("--tiltseries",
                        default=None,
                        help="""The input projections. 
		Project should usually have the xform.projection header attribute, which is 
		used for slice insertion""")
    parser.add_argument("--tltfile",
                        type=str,
                        default=None,
                        help="""An IMOD-like .tlt file containing 
		alignment angles. If specified slices will be inserted using these angles in the 
		IMOD convention""")
    parser.add_argument("--output",
                        default="threed.hdf",
                        help="""Output reconstructed 
		tomogram file name.""")
    parser.add_argument("--path",
                        type=str,
                        default='tvrecon_3d',
                        help="""Directory in which 
		results will be stored.""")
    parser.add_argument("--iter",
                        default=10,
                        type=int,
                        help="""Specify the number of 
		iterative reconstructions to complete before returning the final reconstructed volume. 
			The default number is 50.""")
    parser.add_argument("--beta",
                        default=1.0,
                        type=float,
                        help="""Specify the total-variation 
		penalization/regularization weight parameter 'beta'. The default is 5.0.""")
    parser.add_argument("--subpix",
                        default=1,
                        type=int,
                        help="""Specify the number of linear 
		subdivisions used to compute the projection of one image pixel onto a detector pixel."""
                        )
    parser.add_argument("--savesinograms",
                        action="store_true",
                        default=False,
                        help="""If provided,
		this option will save the sinogram for each 2-D slice (along Y) in the reconstruction 
		to disk.""")

    parser.add_argument("--inmemory",
                        action='store_true',
                        default=False,
                        help="""If provided,
		this option will keep certain files open in memory instead of writing them and
		reading from disk every time. While this can be faster, it is very memory-intensive."""
                        )

    parser.add_argument("--saveslices",
                        action="store_true",
                        default=False,
                        help="""If provided,
		this option will save each reconstructed 2-D slice (along Y) to disk.""")
    parser.add_argument("--verbose",
                        "-v",
                        dest="verbose",
                        action="store",
                        metavar="n",
                        type=int,
                        default=0,
                        help="""
		verbose level [0-9], higher number means higher level of verboseness.""")
    parser.add_argument("--parallel",
                        type=str,
                        default='thread:1',
                        help="""Default=thread:1. 
		See http://blake.bcm.edu/emanwiki/EMAN2/Parallel""")
    parser.add_argument("--ppid",
                        type=int,
                        help="""Set the PID of the parent process, 
		used for cross platform PPID.""",
                        default=-1)

    (options, args) = parser.parse_args()

    #Check that the minimum data required are available and sane, otherwise exit
    if not options.tiltseries:
        print("\nERROR: You must specficy --tiltseries")
        sys.exit(1)
    if not options.tltfile:
        print("\nERROR: You must specficy --tlt")
        sys.exit(1)
    if options.beta < 0.0:
        print("\nERROR: Parameter beta must be a positive, real number.")
        sys.exit(1)

    #Parse and count tilt angles
    tiltangles = np.asarray([float(i) for i in open(options.tltfile, "r")])
    tiltangles = tiltangles.tolist()

    nimgs = EMUtil.get_image_count(options.tiltseries)
    nangles = len(tiltangles)
    if nimgs != nangles:
        print(
            """\nERROR: The number of images in the tiltseries, %d, does not match
			the number of angles in the tlt file, %d""" % (nimgs, nangles))
        sys.exit(1)

    #Read essential info from image header
    hdr = EMData(options.tiltseries, 0, True)
    apix = hdr['apix_x']
    xsize = hdr['nx']
    ysize = hdr['ny']

    #Once all parameters and data have passed wholesomeness checks, initialize logging
    logger = E2init(sys.argv, options.ppid)

    #Create new output directory for this run of the program
    options = makepath(options, options.path)

    if options.verbose > 2:
        print("\nGenerating this new directory to save results to:",
              options.path)

    options.path = os.getcwd() + "/" + options.path

    #Generate one projection operator for all 2D slice reconstructions
    if options.verbose:
        print("\nBuilding projection operator...")
    projection_operator = build_projection_operator(options, tiltangles, xsize,
                                                    nimgs, None, 0, None)

    #Initialize parallelism
    if options.verbose:
        print("\n\n(e2tvrecon.py) INITIALIZING PARALLELISM\n\n")

    from EMAN2PAR import EMTaskCustomer
    etc = EMTaskCustomer(options.parallel, "e2tvrecon.TVReconTask")

    tasks = []
    nimgs = len(tiltangles)
    for y in range(ysize):
        task = TVReconTask(options, xsize, ysize, y, projection_operator,
                           tiltangles, nimgs)
        tasks.append(task)

    tids = etc.send_tasks(tasks)

    results = get_results(etc, tids, options)
    if options.verbose:
        print(
            "\nThese many results %d were computed because there were these many tasks %d"
            % (len(results), len(tasks)))

    results.sort()
    np_recons = []
    for i in range(len(results)):
        recon = results[i][-1]

        # Store 2D reconstructions in options.path if requested
        if options.saveslices:
            twodpath = options.path + "/slices.hdf"
            from_numpy(recon).write_image(twodpath, i)

        np_recons.append(recon)

    reconstack = np.dstack(np_recons)
    threed_recon = from_numpy(reconstack)
    threed_recon['apix_x'] = apix
    threed_recon['apix_y'] = apix
    threed_recon['apix_z'] = apix

    threed_recon.rotate(0, -90, -90)
    threed_recon.write_image(options.path + '/' + options.output, 0)

    E2end(logger)
    return
Пример #18
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog <output> [options]

	This program produces iterative class-averages, one of the secrets to EMAN's rapid convergence.
	Normal usage is to provide a stack of particle images and a classification matrix file defining
	class membership. Members of each class are then iteratively aligned to each other and averaged
	together with (optional) CTF correction.  It is also possible to use this program on all of the
	images in a single stack.

	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_argument("--input",
                        type=str,
                        help="The name of the input particle stack",
                        default=None)
    parser.add_argument("--output",
                        type=str,
                        help="The name of the output class-average stack",
                        default=None)
    parser.add_argument(
        "--oneclass",
        type=int,
        help="Create only a single class-average. Specify the number.",
        default=None)
    parser.add_argument(
        "--classmx",
        type=str,
        help=
        "The name of the classification matrix specifying how particles in 'input' should be grouped. If omitted, all particles will be averaged.",
        default=None)
    parser.add_argument(
        "--ref",
        type=str,
        help=
        "Reference image(s). Used as an initial alignment reference and for final orientation adjustment if present. Also used to assign euler angles to the generated classes. This is typically the projections that were used for classification.",
        default=None)
    parser.add_argument(
        "--storebad",
        action="store_true",
        help=
        "Even if a class-average fails, write to the output. Forces 1->1 numbering in output",
        default=False)
    parser.add_argument(
        "--decayedge",
        action="store_true",
        help=
        "Applies an edge decay to zero on the output class-averages. A very good idea if you plan on 3-D reconstruction.",
        default=False)
    parser.add_argument(
        "--resultmx",
        type=str,
        help=
        "Specify an output image to store the result matrix. This contains 5 images where row is particle number. Rows in the first image contain the class numbers and in the second image consist of 1s or 0s indicating whether or not the particle was included in the class. The corresponding rows in the third, fourth and fifth images are the refined x, y and angle (respectively) used in the final alignment, these are updated and accurate, even if the particle was excluded from the class.",
        default=None)
    parser.add_argument(
        "--iter",
        type=int,
        help="The number of iterations to perform. Default is 1.",
        default=1)
    parser.add_argument(
        "--prefilt",
        action="store_true",
        help=
        "Filter each reference (c) to match the power spectrum of each particle (r) before alignment and comparison",
        default=False)
    parser.add_argument(
        "--align",
        type=str,
        help=
        "This is the aligner used to align particles to the previous class average. Default is None.",
        default=None)
    parser.add_argument(
        "--aligncmp",
        type=str,
        help="The comparitor used for the --align aligner. Default is ccc.",
        default="ccc")
    parser.add_argument(
        "--ralign",
        type=str,
        help=
        "This is the second stage aligner used to refine the first alignment. This is usually the \'refine\' aligner.",
        default=None)
    parser.add_argument(
        "--raligncmp",
        type=str,
        help="The comparitor used by the second stage aligner.",
        default="ccc")
    parser.add_argument(
        "--averager",
        type=str,
        help="The type of averager used to produce the class average.",
        default="mean")
    parser.add_argument(
        "--setsfref",
        action="store_true",
        help=
        "This will impose the 1-D structure factor of the reference on the class-average (recommended when a reference is available)",
        default=False)
    parser.add_argument(
        "--cmp",
        type=str,
        help=
        "The comparitor used to generate quality scores for the purpose of particle exclusion in classes, strongly linked to the keep argument.",
        default="ccc")
    parser.add_argument(
        "--keep",
        type=float,
        help="The fraction of particles to keep in each class.",
        default=1.0)
    parser.add_argument(
        "--keepsig",
        action="store_true",
        help=
        "Causes the keep argument to be interpreted in standard deviations.",
        default=False)
    parser.add_argument(
        "--automask",
        action="store_true",
        help=
        "Applies a 2-D automask before centering. Can help with negative stain data, and other cases where centering is poor."
    )
    parser.add_argument(
        "--center",
        type=str,
        default="xform.center",
        help=
        "If the default centering algorithm (xform.center) doesn't work well, you can specify one of the others here (e2help.py processor center)"
    )
    parser.add_argument("--bootstrap",
                        action="store_true",
                        help="Ignored. Present for historical reasons only.")
    parser.add_argument(
        "--normproc",
        type=str,
        help=
        "Normalization processor applied to particles before alignment. Default is normalize.edgemean. If you want to turn this option off specify \'None\'",
        default="normalize.edgemean")
    parser.add_argument(
        "--usefilt",
        dest="usefilt",
        default=None,
        help=
        "Specify a particle data file that has been low pass or Wiener filtered. Has a one to one correspondence with your particle data. If specified will be used to align particles to the running class average, however the original particle will be used to generate the actual final class average"
    )
    parser.add_argument("--idxcache",
                        default=False,
                        action="store_true",
                        help="Ignored. Present for historical reasons.")
    parser.add_argument("--dbpath",
                        help="Ignored. Present for historical reasons.",
                        default=".")
    parser.add_argument(
        "--resample",
        action="store_true",
        help=
        "If set, will perform bootstrap resampling on the particle data for use in making variance maps.",
        default=False)
    parser.add_argument(
        "--odd",
        default=False,
        help=
        "Used by EMAN2 when running eotests. Includes only odd numbered particles in class averages.",
        action="store_true")
    parser.add_argument(
        "--even",
        default=False,
        help=
        "Used by EMAN2 when running eotests. Includes only even numbered particles in class averages.",
        action="store_true")
    parser.add_argument("--parallel",
                        default=None,
                        help="parallelism argument")
    parser.add_argument("--force",
                        "-f",
                        dest="force",
                        default=False,
                        action="store_true",
                        help="Force overwrite the output file if it exists.")
    parser.add_argument(
        "--saveali",
        action="store_true",
        help=
        "Writes aligned particle images to aligned.hdf. Normally resultmx produces more useful informtation. This can be used for debugging.",
        default=False)
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higner number means higher level of verboseness")
    parser.add_argument(
        "--debug",
        "-d",
        action="store_true",
        help=
        "Print debugging infromation while the program is running. Default is off.",
        default=False)
    parser.add_argument(
        "--nofilecheck",
        action="store_true",
        help=
        "Turns file checking off in the check functionality - used by e2refine.py.",
        default=False)
    parser.add_argument("--check",
                        "-c",
                        action="store_true",
                        help="Performs a command line argument check only.",
                        default=False)
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)

    (options, args) = parser.parse_args()

    if (options.check):
        options.verbose = 9  # turn verbose on if the user is only checking...

    error = check(options, True)

    if options.align: options.align = parsemodopt(options.align)
    if options.ralign: options.ralign = parsemodopt(options.ralign)
    if options.aligncmp: options.aligncmp = parsemodopt(options.aligncmp)
    if options.raligncmp: options.raligncmp = parsemodopt(options.raligncmp)
    if options.averager: options.averager = parsemodopt(options.averager)
    if options.cmp: options.cmp = parsemodopt(options.cmp)
    if options.normproc: options.normproc = parsemodopt(options.normproc)

    if options.resultmx != None: options.storebad = True

    if (options.verbose > 0):
        if (error):
            print "e2classaverage.py command line arguments test.... FAILED"
        else:
            print "e2classaverage.py command line arguments test.... PASSED"

    # returning a different error code is currently important to e2refine.py - returning 0 tells e2refine.py that it has enough
    # information to execute this script
    if error: exit(1)
    if options.check: exit(0)

    logger = E2init(sys.argv, options.ppid)
    print "Class averaging beginning"

    try:
        classmx = EMData.read_images(
            options.classmx
        )  # we keep the entire classification matrix in memory, since we need to update it in most cases
        ncls = int(classmx[0]["maximum"]) + 1
    except:
        ncls = 1
        if options.resultmx != None:
            print "resultmx can only be specified in conjunction with a valid classmx input."
            sys.exit(1)

    nptcl = EMUtil.get_image_count(options.input)

    try:
        apix = EMData(options.input, 0, True)["apix_x"]
    except:
        apix = 1.0
        print "WARNING: could not get apix from first image. Setting to 1.0. May impact results !"

    # Initialize parallelism
    if options.parallel:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer(options.parallel)
        pclist = [options.input]
        if options.ref: pclist.append(options.ref)
        if options.usefilt: pclist.append(options.usefilt)
        etc.precache(pclist)

    # prepare tasks
    tasks = []
    if ncls > 1:
        if options.oneclass == None: clslst = range(ncls)
        else: clslst = [options.oneclass]

        for cl in clslst:
            ptcls = classmx_ptcls(classmx[0], cl)
            if options.resample:
                ptcls = [
                    random.choice(ptcls) for i in ptcls
                ]  # this implements bootstrap resampling of the class-average
            if options.odd: ptcls = [i for i in ptcls if i % 2 == 1]
            if options.even: ptcls = [i for i in ptcls if i % 2 == 0]
            tasks.append(
                ClassAvTask(options.input, ptcls, options.usefilt, options.ref,
                            options.iter, options.normproc, options.prefilt,
                            options.align, options.aligncmp, options.ralign,
                            options.raligncmp, options.averager, options.cmp,
                            options.keep, options.keepsig, options.automask,
                            options.saveali, options.setsfref, options.verbose,
                            cl, options.center))

    else:
        ptcls = range(nptcl)
        if options.resample: ptcls = [random.choice(ptcls) for i in ptcls]
        if options.odd: ptcls = [i for i in ptcls if i % 2 == 1]
        if options.even: ptcls = [i for i in ptcls if i % 2 == 0]
        tasks.append(
            ClassAvTask(options.input, range(nptcl), options.usefilt,
                        options.ref, options.iter, options.normproc,
                        options.prefilt, options.align, options.aligncmp,
                        options.ralign, options.raligncmp, options.averager,
                        options.cmp, options.keep, options.keepsig,
                        options.automask, options.saveali, options.setsfref,
                        options.verbose, 0, options.center))

    # execute task list
    if options.parallel:  # run in parallel
        taskids = etc.send_tasks(tasks)
        alltaskids = taskids[:]

        while len(taskids) > 0:
            curstat = etc.check_task(taskids)
            for i, j in enumerate(curstat):
                if j == 100:
                    rslt = etc.get_results(taskids[i])
                    if rslt[1]["average"] != None:
                        rslt[1]["average"]["class_ptcl_src"] = options.input
                        if options.decayedge:
                            nx = rslt[1]["average"]["nx"]
                            rslt[1]["average"].process_inplace(
                                "normalize.circlemean",
                                {"radius": nx / 2 - nx / 15})
                            rslt[1]["average"].process_inplace(
                                "mask.gaussian", {
                                    "inner_radius": nx / 2 - nx / 15,
                                    "outer_radius": nx / 20
                                })
                            #rslt[1]["average"].process_inplace("mask.decayedge2d",{"width":nx/15})

                        if options.ref != None:
                            rslt[1]["average"][
                                "projection_image"] = options.ref
                        if options.storebad:
                            rslt[1]["average"].write_image(
                                options.output, rslt[1]["n"])
                        else:
                            rslt[1]["average"].write_image(options.output, -1)

                        # Update the resultsmx if requested
                        if options.resultmx != None:
                            allinfo = rslt[1][
                                "info"]  # the info result array list of (qual,xform,used) tuples
                            pnums = rslt[0].data["images"][
                                2]  # list of image numbers corresponding to information

                            for n, info in enumerate(allinfo):
                                y = pnums[n]  # actual particle number

                                # find the matching class in the existing classification matrix
                                for x in range(classmx[0]["nx"]):
                                    if classmx[0][x, y] == rslt[1][
                                            "n"]:  # if the class number in the classmx matches the current class-average number
                                        break
                                else:
                                    print "Resultmx error: no match found ! (%d %d %d)" % (
                                        x, y, rslt[1]["n"])
                                    continue
                                xform = info[1].get_params("2d")
                                classmx[1][x, y] = info[2]  # used
                                classmx[2][x, y] = xform["tx"]  # dx
                                classmx[3][x, y] = xform["ty"]  # dy
                                classmx[4][x, y] = xform["alpha"]  # da
                                classmx[5][x, y] = xform["mirror"]  # flip
                                try:
                                    classmx[6][x, y] = xform["scale"]
                                except:
                                    pass
                    # failed average
                    elif options.storebad:
                        blk = EMData(options.ref, 0)
                        apix = blk["apix_x"]
                        blk = EMData(blk["nx"], blk["ny"], 1)
                        blk["apix_x"] = apix
                        blk.to_zero()
                        blk.set_attr("ptcl_repr", 0)
                        blk.set_attr("apix_x", apix)
                        blk.write_image(options.output, rslt[1]["n"])

            taskids = [j for i, j in enumerate(taskids) if curstat[i] != 100]

            if options.verbose and 100 in curstat:
                print "%d/%d tasks remain" % (len(taskids), len(alltaskids))
            if 100 in curstat:
                E2progress(logger,
                           1.0 - (float(len(taskids)) / len(alltaskids)))

            time.sleep(3)

        if options.verbose: print "Completed all tasks"

    # single thread
    else:
        for t in tasks:
            rslt = t.execute()
            if rslt == None: sys.exit(1)

            if rslt["average"] != None:
                rslt["average"]["class_ptcl_src"] = options.input
                if options.decayedge:
                    nx = rslt["average"]["nx"]
                    rslt["average"].process_inplace(
                        "normalize.circlemean", {"radius": nx / 2 - nx / 15})
                    rslt["average"].process_inplace(
                        "mask.gaussian", {
                            "inner_radius": nx / 2 - nx / 15,
                            "outer_radius": nx / 20
                        })
                    #rslt["average"].process_inplace("mask.decayedge2d",{"width":nx/15})
                if options.ref != None:
                    rslt["average"]["projection_image"] = options.ref
                try:
                    if options.storebad:
                        rslt["average"].write_image(options.output,
                                                    t.options["n"])
                    else:
                        rslt["average"].write_image(options.output, -1)
                except:
                    traceback.print_exc()
                    print "Error writing class average {} to {}".format(
                        t.options["n"], options.output)
                    print "Image attr: ", rslt["average"].get_attr_dict()
                    display(rslt["average"])
                    sys.exit(1)

                # Update the resultsmx if requested
                if options.resultmx != None:
                    allinfo = rslt[
                        "info"]  # the info result array list of (qual,xform,used) tuples
                    pnums = t.data["images"][
                        2]  # list of image numbers corresponding to information
                    for n, info in enumerate(allinfo):
                        y = pnums[n]  # actual particle number

                        # find the matching class in the existing classification matrix
                        for x in range(classmx[0]["nx"]):
                            if classmx[0][x, y] == rslt[
                                    "n"]:  # if the class number in the classmx matches the current class-average number
                                break
                        else:
                            print "Resultmx error: no match found ! (%d %d %d)" % (
                                x, y, rslt[1]["n"])
                            continue
                        xform = info[1].get_params("2d")
                        classmx[1][x, y] = info[2]  # used
                        classmx[2][x, y] = xform["tx"]  # dx
                        classmx[3][x, y] = xform["ty"]  # dy
                        classmx[4][x, y] = xform["alpha"]  # da
                        classmx[5][x, y] = xform["mirror"]  # flip
                        try:
                            classmx[6][x, y] = xform["scale"]
                        except:
                            pass

            # Failed average
            elif options.storebad:
                blk = EMData(options.ref, 0)
                apix = blk["apix_x"]
                blk = EMData(blk["nx"], blk["ny"], 1)
                blk["apix_x"] = apix
                blk.to_zero()
                blk.set_attr("ptcl_repr", 0)
                blk.set_attr("apix_x", apix)
                blk.write_image(options.output, t.options["n"])

    if options.resultmx != None:
        if options.verbose: print "Writing results matrix"
        for i, j in enumerate(classmx):
            j.write_image(options.resultmx, i)

    print "Class averaging complete"
    E2end(logger)
Пример #19
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog <output> [options]

	The goal of this program is to reduce the heterogeneity of a reconstruction by splitting a single map
	into two maps, each more homogeneous. You must run e2refine_easy to completion before using this program.
	It will take the class-averaging results from the final iteration, and split the particles from each 
	class-average into 2 groups, producing 2 class-averages for each. The program then attempts to construct
	a maximally self-consistent grouping of these pairs of class averages into 2 3-D maps. 
	"""

	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	parser.add_argument("--path", default=None, type=str,help="The name of an existing refine_xx folder, where e2refine_easy ran to completion",guitype='filebox', filecheck=False,browser="EMBrowserWidget(withmodal=True,multiselect=False)", row=3, col=0, rowspan=1, colspan=3)
	parser.add_argument("--basisn", default=1,type=int,help="Select which Eigenimage to use for separation. 1 = highest energy. max = 5", guitype='intbox', row=4, col=0, rowspan=1, colspan=1)
	parser.add_argument("--parallel", default="thread:2", help="Standard parallelism option. Default=thread:2", guitype='strbox', row=5, col=0, rowspan=1, colspan=2)
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n",type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-1)

	(options, args) = parser.parse_args()

	if options.basisn<1 or options.basisn>5 : 
		print "Error: basisn must be in the 1-5 range"
		sys.exit(1)

	if options.path==None:
		paths=[i for i in os.listdir(".") if "refine_" in i and len(i)==9]
		paths.sort()
		options.path=paths[-1]

	pathnum=options.path[-2:]

	# check the specified path for the files we need
	try:
		olddb = js_open_dict(options.path+"/0_refine_parms.json")
		last_map=olddb["last_map"]
		targetres=olddb["targetres"]
		last_iter=int(last_map.split("_")[-1][:2])
		try: 
			ptcls=olddb["inputavg"]
			if ptcls==None : raise Exception
		except: ptcls=olddb["input"]
		
		sym=olddb["sym"]
		if options.verbose : print "Found iteration {} in {}, using {}".format(last_iter,options.path," & ".join(ptcls))
	except:
		traceback.print_exc()
		print "Error: Cannot find necessary files in ",options.path
		sys.exit(1)
		
	logger=E2init(sys.argv,options.ppid)

	# classmx is a list with 2 elements. Each element is a list of EMData from the corresponding cls_result file
	classmx=[]
	classmx.append(EMData.read_images("{}/cls_result_{:02d}_even.hdf".format(options.path,last_iter)))
	classmx.append(EMData.read_images("{}/cls_result_{:02d}_odd.hdf".format(options.path,last_iter)))
	ncls=max(int(classmx[0][0]["maximum"])+1,int(classmx[1][0]["maximum"])+1)

	# Rearrange the info in classmx
	classlists=[[] for i in xrange(ncls)]	# empty list for each class
	
	# This will produce a list of particles with Transforms for each class
	for eo in (0,1):
		for y in xrange(classmx[eo][0]["ny"]):
			ptcl=[eo,y,Transform({"type":"2d","tx":classmx[eo][2][0,y],"ty":classmx[eo][3][0,y],"alpha":classmx[eo][4][0,y],"mirror":int(classmx[eo][5][0,y])})]
			#print ptcl, 
			#print int(classmx[eo][0][0,y])
			classlists[int(classmx[eo][0][0,y])].append(ptcl)
	
	#if len(classlists[0])>100 :
		#print "Warning: this program is normally intended for use with downsampled data and fairly coarse angular sampling. If you try to use it with a large number of class-averages you may have a variety of problems, and should insure that your machine has sufficient RAM."
		

	# Initialize parallelism
	from EMAN2PAR import EMTaskCustomer
	etc=EMTaskCustomer(options.parallel)

	# Empty image to pad classes file
	zero=EMData(str(ptcls[0]),0)
	zero.to_zero()
	zero["ptcl_repr"]=0
	
	# Euler angles for averages
	projin="{}/projections_{:02d}_even.hdf".format(options.path,last_iter)
	eulers=[EMData(projin,i,True)["xform.projection"] for i in xrange(ncls)]
	
	# prepare tasks
	tasks=[]
	gc=0
	ns=[classmx[eo][0]["ny"] for eo in (0,1)]
	for c,cl in enumerate(classlists):
		if len(cl)<20 : 							# we require at least 20 particles in a class to make the attempt
#			zero.write_image(classout[0],c)
#			zero.write_image(classout[1],c)
			continue
		tasks.append(ClassSplitTask(ptcls,ns,cl,c,eulers[c],options.basisn,options.verbose-1))
		gc+=1
	
#	for t in tasks: t.execute()

	# execute task list
	taskids=etc.send_tasks(tasks)
	alltaskids=taskids[:]

	classes=[]
	while len(taskids)>0 :
		curstat=etc.check_task(taskids)
		for i,j in enumerate(curstat):
			if j==100 :
				rslt=etc.get_results(taskids[i])
				rsltd=rslt[1]
				cls=rslt[0].options["classnum"]
				if rsltd.has_key("failed") :
					print "Bad average in ",cls
				else:
					#rsltd["avg1"].write_image(classout[0],cls)
					#rsltd["avg2"].write_image(classout[1],cls)
					ncls=rsltd["avg1"]["ptcl_repr"]+rsltd["avg2"]["ptcl_repr"]
					# note that the 2 results we get back are in arbitrary order!
					# the next section of code with 3D reconstruction is designed to sort out
					# which average should be paired with which
					classes.append([ncls,rsltd["avg1"]["xform.projection"],rsltd["avg1"],rsltd["avg2"],rsltd["basis"]])	# list of (ptcl_repr,xform,avg1,avg2)
				
		taskids=[j for i,j in enumerate(taskids) if curstat[i]!=100]

		if options.verbose and 100 in curstat :
			print "%d/%d tasks remain"%(len(taskids),len(alltaskids))
		if 100 in curstat :
			E2progress(logger,1.0-(float(len(taskids))/len(alltaskids)))

	if options.verbose : print "Completed all tasks\nGrouping consistent averages"

	classes.sort(reverse=True)		# we want to start with the largest number of particles
	apix=classes[0][2]["apix_x"]

	boxsize=classes[0][2]["ny"]
	pad=good_size(boxsize*1.5)
	if options.verbose: print "Boxsize -> {}, padding to {}".format(boxsize,pad)
		
	# a pair of reconstructors. we will then simultaneously reconstruct in the pair, and use each to decide on the best target for each particle
	recon=[Reconstructors.get("fourier",{"size":[pad,pad,pad],"sym":sym,"mode":"gauss_5"}) for i in (0,1)]
	for r in recon: r.setup()
	
	# We insert the first class-average (with the most particles) randomly into reconstructor 1 or 2
	p2=classes[0][2].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))
	p3=recon[0].preprocess_slice(p2,classes[0][1])
	recon[0].insert_slice(p3,classes[0][1],classes[0][2].get_attr_default("ptcl_repr",1.0))

	p2=classes[0][3].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))
	p3=recon[1].preprocess_slice(p2,classes[0][1])
	recon[1].insert_slice(p3,classes[0][1],classes[0][3].get_attr_default("ptcl_repr",1.0))
	
	classes[0].append(0)

	if options.verbose : print "Reconstruction: pass 1"
	for i,c in enumerate(classes[1:]):
		a2=c[2].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))		# first class-average
		a3=recon[0].preprocess_slice(a2,classes[0][1])
		a3n=c[2].get_attr_default("ptcl_repr",1.0)
		
		b2=c[3].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))
		b3=recon[1].preprocess_slice(b2,classes[0][1])						# I don't believe it matters if we use recon[0] or 1 here, but haven't checked
		b3n=c[3].get_attr_default("ptcl_repr",1.0)
		
		recon[0].determine_slice_agreement(a3,c[1],a3n,False)
		q0a=a3["reconstruct_absqual"]		# quality for average a in reconstruction0
		n0a=a3["reconstruct_norm"]			# normalization for same
		
		recon[1].determine_slice_agreement(a3,c[1],a3n,False)
		q1a=a3["reconstruct_absqual"]		# quality for average a in reconstruction0
		n1a=a3["reconstruct_norm"]			# normalization for same
		
		recon[0].determine_slice_agreement(b3,c[1],b3n,False)
		q0b=b3["reconstruct_absqual"]		# quality for average a in reconstruction0
		n0b=b3["reconstruct_norm"]			# normalization for same
		
		recon[1].determine_slice_agreement(b3,c[1],b3n,False)
		q1b=b3["reconstruct_absqual"]		# quality for average a in reconstruction0
		n1b=b3["reconstruct_norm"]			# normalization for same
		
		if options.verbose>1 : print i,q0a,q1a,q0b,q1b,q0a+q1b,q1a+q0b
			
		if q0a+q1b>q1a+q0b :		# if true, a -> recon0 and b -> recon1 
			c.append(0)				# we put a 0 at the end of the classes element if we use a->0,b->1 ordering, 1 if swapped
			a3.mult(n0a)
			recon[0].insert_slice(a3,c[1],a3n)
			b3.mult(n1b)
			recon[1].insert_slice(b3,c[1],b3n)
		else:
			c.append(1)
			a3.mult(n1a)
			recon[1].insert_slice(a3,c[1],a3n)
			b3.mult(n0b)
			recon[0].insert_slice(b3,c[1],b3n)

	if options.verbose : print "Reconstruction: pass 2"
	
	# another pass with the filled reconstruction to make sure our initial assignments were ok
#	for i,c in enumerate(classes[1:]):
#		a2=c[2].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))		# first class-average
#		a3=recon[0].preprocess_slice(a2,classes[0][1])
#		a3n=c[2].get_attr_default("ptcl_repr",1.0)
#		
#		b2=c[3].get_clip(Region(-(pad-boxsize)/2,-(pad-boxsize)/2,pad,pad))
#		b3=recon[1].preprocess_slice(b2,classes[0][1])						# I don't believe it matters if we use recon[0] or 1 here, but haven't checked
#		b3n=c[3].get_attr_default("ptcl_repr",1.0)
#		
#		recon[0].determine_slice_agreement(a3,c[1],a3n,0) # c[-1]==0
#		q0a=a3["reconstruct_absqual"]			# quality for average a in reconstruction0
#		n0a=a3["reconstruct_norm"]			# normalization for same
#		
#		recon[1].determine_slice_agreement(a3,c[1],a3n,0) # c[-1]==1
#		q1a=a3["reconstruct_absqual"]			# quality for average a in reconstruction0
#		n1a=a3["reconstruct_norm"]			# normalization for same
#		
#		recon[0].determine_slice_agreement(b3,c[1],b3n,0) # c[-1]==1
#		q0b=b3["reconstruct_absqual"]			# quality for average a in reconstruction0
#		n0b=b3["reconstruct_norm"]			# normalization for same
#		
#		recon[1].determine_slice_agreement(b3,c[1],b3n,0) # c[-1]==0
#		q1b=b3["reconstruct_absqual"]			# quality for average a in reconstruction0
#		n1b=b3["reconstruct_norm"]			# normalization for same
#		
#		if options.verbose>1 : print i,q0a,q1a,q0b,q1b,q0a+q1b,q1a+q0b
#			
#		if q0a+q1b>q1a+q0b :		# if true, a -> recon0 and b -> recon1 
#			if c[-1]==1 :
#				c[-1]=0
#				print i," 1->0"
#			
#			c.append(0)				# we put a 0 at the end of the classes element if we use a->0,b->1 ordering, 1 if swapped
#			a3.mult(n0a)
#			recon[0].insert_slice(a3,c[1],a3n)
#			b3.mult(n1b)
#			recon[1].insert_slice(b3,c[1],b3n)
#		else:
#			if c[-1]==0 :
#				c[-1]=1
#				print i," 0->1"
#
#			c.append(1)
#			a3.mult(n1a)
#			recon[1].insert_slice(a3,c[1],a3n)
#			b3.mult(n0b)
#	
#		
	if options.verbose : print "All done, writing output"

	classout=["{}/classes_{:02d}_bas{}_split0.hdf".format(options.path,last_iter,options.basisn),"{}/classes_{:02d}_bas{}_split1.hdf".format(options.path,last_iter,options.basisn)]
	basisout="{}/classes_{:02d}_basis".format(options.path,last_iter)
	threedout="{}/threed_{:02d}_split.hdf".format(options.path,last_iter)
	threedout2="{}/threed_{:02d}_split_filt_bas{}.hdf".format(options.path,last_iter,options.basisn)
	setout=["sets/split_{}_0.lst".format(pathnum),"sets/split_{}_1.lst".format(pathnum)]
	split=[r.finish(True).get_clip(Region((pad-boxsize)/2,(pad-boxsize)/2,(pad-boxsize)/2,boxsize,boxsize,boxsize)) for r in recon]
	split[0]["apix_x"]=apix
	split[0]["apix_y"]=apix
	split[0]["apix_z"]=apix
	split[1]["apix_x"]=apix
	split[1]["apix_y"]=apix
	split[1]["apix_z"]=apix
	split[0].process_inplace("mask.soft",{"outer_radius":-8,"width":4})
	split[1].process_inplace("mask.soft",{"outer_radius":-8,"width":4})
	split[0].write_image(threedout,0)
	split[1].write_image(threedout,1)

	# now we write the class-averages and the new (split) particle files
	lstin =[LSXFile(ptcls[0],True),LSXFile(ptcls[1],True)]
	try:
		os.unlink("sets/split0.lst")
		os.unlink("sets/split1.lst")
	except: pass
	lstout=[LSXFile("sets/split0.lst"),LSXFile("sets/split1.lst")]
	for i,c in enumerate(classes):
		c[2].write_image(classout[c[-1]],i)	# class-average
		ptcln=c[2]["class_eoidxs"]		# eofile/ptcl# pairs
		for p in xrange(0,len(ptcln),2):
			lstout[0][-1]=lstin[ptcln[p]][ptcln[p+1]]		# wierd syntax, but the -1 here appends
			
		c[3].write_image(classout[c[-1]^1],i)
		ptcln=c[3]["class_eoidxs"]		# eofile/ptcl# pairs
		for p in xrange(0,len(ptcln),2):
			lstout[1][-1]=lstin[ptcln[p]][ptcln[p+1]]		# wierd syntax, but the -1 here appends

		if options.verbose>2:
			c[4][0].write_image(basisout+"1.hdf",i)
			c[4][1].write_image(basisout+"2.hdf",i)
			c[4][2].write_image(basisout+"3.hdf",i)

	launch_childprocess("e2proclst.py sets/split0.lst --mergesort {}".format(setout[0]))
	launch_childprocess("e2proclst.py sets/split1.lst --mergesort {}".format(setout[1]))

	try:
		os.unlink("sets/split0.lst")
		os.unlink("sets/split1.lst")
	except:
		pass

	if os.path.exists("strucfac.txt"):
		launch_childprocess("e2proc3d.py {} {} --setsf strucfac.txt --process filter.wiener.byfsc:fscfile={}/fsc_masked_{:02d}.txt:snrmult=2:sscale=1.1:maxfreq={} --process mask.soft:outer_radius=-9:width=4".format(threedout,threedout2,options.path,last_iter,1.0/targetres))
	else:
		print "Missing structure factor, cannot filter properly"
		launch_childprocess("e2proc3d.py {} {} --process filter.wiener.byfsc:fscfile={}/fsc_masked_{:02d}.txt:snrmult=2:sscale=1.1:maxfreq={} --process mask.soft:outer_radius=-9:width=4".format(threedout,threedout2,options.path,last_iter,1.0/targetres))

	E2end(logger)
Пример #20
0
def main():
    progname = os.path.basename(sys.argv[0])
    usage = """prog <output> [options]

	This program is used to preprocess subtomograms before aligning them. The same can be accomplished with 
	e2proc3d, except that this program is parallelized and thus should be substantially faster for large subtomograms.
	"""

    parser = EMArgumentParser(usage=usage, version=EMANVERSION)

    parser.add_argument(
        "--input",
        type=str,
        default='',
        help=
        """Default=None. The name of the input volume stack. MUST be HDF since volume stack support is required."""
    )

    parser.add_argument(
        "--output",
        type=str,
        default='',
        help=
        """Default=None. Specific name of HDF file to write processed particles to."""
    )

    parser.add_argument(
        "--parallel",
        type=str,
        default='',
        help=
        """default=None. Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel"""
    )

    parser.add_argument(
        "--ppid",
        type=int,
        help=
        """Default=-1. Set the PID of the parent process, used for cross platform PPID""",
        default=-1)

    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        """Default=0. Verbose level [0-9], higner number means higher level of verboseness"""
    )

    parser.add_argument(
        "--subset",
        type=int,
        default=0,
        help=
        """Default=0 (not used). Refine only this substet of particles from the stack provided through --input"""
    )

    parser.add_argument(
        "--apix",
        type=float,
        default=0.0,
        help=
        """Default=0.0 (not used). Use this apix value where relevant instead of whatever is in the header of the reference and the particles. Will overwrite particle header as well."""
    )

    parser.add_argument(
        "--shrink",
        type=int,
        default=0,
        help=
        """Default=0 (no shrinking). Optionally shrink the input volumes by an integer amount for coarse alignment."""
    )

    parser.add_argument(
        "--threshold",
        type=str,
        default='',
        help=
        """Default=None. A threshold applied to the subvolumes after normalization. For example, --threshold=threshold.belowtozero:minval=0 makes all negative pixels equal 0, so that they do not contribute to the correlation score."""
    )

    parser.add_argument(
        "--mask",
        type=str,
        default='',
        help=
        """Default=None. Masking processor applied to particles before alignment. IF using --clip, make sure to express outer mask radii as negative pixels from the edge."""
    )

    parser.add_argument(
        "--maskfile",
        type=str,
        default='',
        help=
        """Default=None. Mask file (3D IMAGE) applied to particles before alignment. Must be in HDF format. Default is None."""
    )

    parser.add_argument(
        "--normproc",
        type=str,
        default='',
        help=
        """Default=None (see 'e2help.py processors -v 10' at the command line). Normalization processor applied to particles before alignment. If normalize.mask is used, results of the mask option will be passed in automatically. If you want to turn this option off specify \'None\'"""
    )

    parser.add_argument(
        "--preprocess",
        type=str,
        default='',
        help=
        """Any processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging."""
    )

    parser.add_argument(
        "--lowpass",
        type=str,
        default='',
        help=
        """Default=None. A lowpass filtering processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging."""
    )

    parser.add_argument(
        "--highpass",
        type=str,
        default='',
        help=
        """Default=None. A highpass filtering processor (see 'e2help.py processors -v 10' at the command line) to be applied to each volume prior to COARSE alignment. Not applied to aligned particles before averaging."""
    )

    parser.add_argument(
        "--clip",
        type=int,
        default=0,
        help=
        """Default=0 (which means it's not used). Boxsize to clip particles. For example, the boxsize of the particles might be 100 pixels, but the particles are only 50 pixels in diameter. Aliasing effects are not always as deleterious for all specimens, and sometimes 2x padding isn't necessary."""
    )

    parser.add_argument(
        "--nopath",
        action='store_true',
        default=False,
        help=
        """If supplied, this option will save results in the directory where the command is run. A directory to store the results will not be made."""
    )

    parser.add_argument(
        "--path",
        type=str,
        default='sptpreproc',
        help=
        """Default=spt. Directory to store results in. The default is a numbered series of directories containing the prefix 'sptpreproc'; for example, sptpreproc_02 will be the directory by default if 'sptpreproc_01' already exists."""
    )

    (options, args) = parser.parse_args()

    logger = E2init(sys.argv, options.ppid)
    print "\n(e2spt_preproc)(main) started log"

    from e2spt_classaverage import sptmakepath

    if options.path and not options.nopath:

        options = sptmakepath(options, 'sptpreproc')

    if options.parallel == 'None' or options.parallel == 'none':
        options.parallel = None

    if not options.input:
        try:
            options.input = sys.argv[1]
        except:
            print "\n(e2spt_preproc)(main) ERROR: invalid input file"

    if options.mask or options.maskfile or options.threshold or options.clip or options.threshold or options.normproc or options.preprocess or options.lowpass or options.highpass or int(
            options.shrink) > 1:

        preprocstack = str(
            os.path.basename(options.input).replace('.hdf', '_preproc.hdf'))

        if options.path and not options.nopath:
            preprocstack = options.path + '/' + preprocstack

        if options.output:
            if '.hdf' in options.output[-4:]:
                preprocstack = options.output
            else:
                print "\n(e2spt_preproc)(main) ERROR: '.hdf' must be the last four characters of the output filename."

        print "\n(e2spt_preproc)(main) output stack will be %s" % (
            preprocstack)

        n = 0
        try:
            n = EMUtil.get_image_count(options.input)
        except:
            print "\n(e2spt_preproc)(main) ERROR: --input stack seems to be invalid"
            sys.exit()

        print "\n(e2spt_preproc)(main) number of particles is %d" % (n)

        c = os.getcwd()

        findir = os.listdir(c)

        if preprocstack not in findir:

            dimg = EMData(8, 8, 8)
            dimg.to_one()

            for i in range(n):
                dimg.write_image(preprocstack, i)

        else:
            print "\n(e2spt_preproc)(main) WARNING: a file with the name of the output stack %s is already in the current directory and will be DELETED" % (
                preprocstack)
            os.remove(preprocstack)

            dimg = EMData(8, 8, 8)
            dimg.to_one()

            for i in range(n):
                dimg.write_image(preprocstack, i)

        finalbox = EMData(options.input, 0, True)['nx']
        if options.clip:
            finalbox = options.clip

        #dimglarge=EMData(finalbox,finalbox,finalbox)
        #dimglarge.to_one()
        #dimglarge.write_image(preprocstack,0)
        #n=EMUtil.get_image_count(options.input)
        #if options.subset:
        #	n=options.subset
        #dimglarge.write_image(preprocstack,n-1)

        if options.verbose:
            print "\n(e2spt_preproc)(main) wrote dummy ptcls to %s" % (
                preprocstack)

        print "\n(e2spt_preproc)(main) - INITIALIZING PARALLELISM!\n"

        if options.parallel:
            from EMAN2PAR import EMTaskCustomer
            etc = EMTaskCustomer(options.parallel)
            pclist = [options.input]

            etc.precache(pclist)
            print "\n(e2spt_preproc)(main) - precaching --input"

            tasks = []
            results = []

        from e2spt_classaverage import sptOptionsParser
        options = sptOptionsParser(options)

        for j in range(n):
            #print "processing  particle", j

            img = EMData(options.input, j)

            if options.parallel:
                #task = Preproc3DTask( ["cache",options.input,j], options, j, preprocstack )
                task = Preproc3DTask(img, options, j, preprocstack)
                tasks.append(task)

            else:
                img = EMData(options.input, j)
                pimg = preprocfunc(img, options, j, preprocstack)

        if options.parallel and tasks:
            tids = etc.send_tasks(tasks)
            if options.verbose:
                print "\n(e2spt_preproc)(main) preprocessing %d tasks queued" % (
                    len(tids))

            results = get_results(etc, tids, options)
        #print "\n(e2spt_preproc)(main) preprocessing results are", results

        #print "\n(e2spt_preproc)(main) input changing to preprocstack"
        #options.input = preprocstack

        #cache needs to be reloaded with the new options.input

    else:
        print "\n(e2spt_preproc)(main) Nothing to do. No preprocessing parameters specified."

    E2end(logger)

    return
Пример #21
0
def main():

    usage = """e2classifytree.py <projection> <particle> [options]
	
	Classify particles using a binary tree. Can be used as an alternative for e2simmx2stage.py + e2classify.py.
	"""
    parser = EMArgumentParser(usage=usage, version=EMANVERSION)
    parser.add_argument("--threads", type=int, help="", default=12)
    parser.add_argument("--nodes", type=str, help="", default="nodes.hdf")
    #parser.add_argument("--clsmx", type=str,help="", default="clsmx.hdf")
    parser.add_argument("--output", type=str, help="", default="clsmx.hdf")
    parser.add_argument(
        "--align",
        type=str,
        help="The name of an 'aligner' to use prior to comparing the images",
        default=None)
    parser.add_argument(
        "--aligncmp",
        type=str,
        help="Name of the aligner along with its construction arguments",
        default="dot")
    parser.add_argument(
        "--ralign",
        type=str,
        help=
        "The name and parameters of the second stage aligner which refines the results of the first alignment",
        default=None)
    parser.add_argument(
        "--raligncmp",
        type=str,
        help=
        "The name and parameters of the comparitor used by the second stage aligner. Default is dot.",
        default="dot")
    parser.add_argument(
        "--cmp",
        type=str,
        help="The name of a 'cmp' to be used in comparing the aligned images",
        default="dot:normalize=1")
    parser.add_argument(
        "--cmpdiff",
        action="store_true",
        default=False,
        help="Compare using the difference of the two children")
    parser.add_argument(
        "--incomplete",
        type=int,
        help="The degree of incomplete allowed in the tree on each level",
        default=0)
    parser.add_argument(
        "--ppid",
        type=int,
        help="Set the PID of the parent process, used for cross platform PPID",
        default=-1)
    parser.add_argument("--parallel",
                        default=None,
                        help="parallelism argument")
    parser.add_argument(
        "--verbose",
        "-v",
        dest="verbose",
        action="store",
        metavar="n",
        type=int,
        default=0,
        help=
        "verbose level [0-9], higher number means higher level of verboseness")

    (options, args) = parser.parse_args()
    E2n = E2init(sys.argv, options.ppid)

    options.align = parsemodopt(options.align)
    options.aligncmp = parsemodopt(options.aligncmp)
    options.ralign = parsemodopt(options.ralign)
    options.raligncmp = parsemodopt(options.raligncmp)
    options.cmp = parsemodopt(options.cmp)

    projs = args[0]
    #projsimmx=args[1]
    ptcl = args[1]
    npj = EMUtil.get_image_count(projs)
    npt = EMUtil.get_image_count(ptcl)
    if options.parallel == None:
        par = "thread:{:d}".format(options.threads)
    else:
        par = options.parallel

    ### Build tree
    ### always overwrite the tree here now
    #if not os.path.isfile(options.nodes):
    print("Building binary tree...")
    buildtree(projs, par, options.nodes, options.incomplete, options.verbose)
    #else:
    #print "Using existing tree..."

    ## Generate children pairs for comparison
    print("Generating children pairs for comparison...")
    if options.cmpdiff:
        nodepath = os.path.dirname(options.nodes)
        masktmp = '/'.join([nodepath, "tmp_msk.hdf"])
        if os.path.isfile(masktmp): os.remove(masktmp)
        cmptmp = '/'.join([nodepath, "tmp_cmp.hdf"])
        if os.path.isfile(cmptmp):
            os.remove(cmptmp)
        makechildpair(options.nodes, cmptmp, masktmp)
    else:
        masktmp = None
        cmptmp = None

    E2progress(E2n, 0.5)
    #exit()
    print("Starting classification...")
    ### Classify particles

    clsmx = [EMData(1, npt) for i in range(7)]
    nnod = EMUtil.get_image_count(options.nodes)
    if options.parallel:
        from EMAN2PAR import EMTaskCustomer
        etc = EMTaskCustomer(options.parallel,
                             module="e2classifytree.TreeClassifyTask")
        tasks = []
        step = 50
        tt = [list(range(i, i + step)) for i in range(0, npt - step, step)]
        tt.append(list(range(tt[-1][-1] + 1, npt)))

        for it in tt:
            tasks.append(
                TreeClassifyTask(ptcl, it, options.nodes, options.align,
                                 options.aligncmp, options.cmp, options.ralign,
                                 options.raligncmp, cmptmp, masktmp))

        taskids = etc.send_tasks(tasks)
        ptclpernode = [0 for i in range(nnod)]
        nfinished = 0
        while len(taskids) > 0:
            haveprogress = False
            time.sleep(3)
            curstat = etc.check_task(taskids)
            for i, j in enumerate(curstat):
                if j == 100:
                    haveprogress = True
                    rslt = etc.get_results(taskids[i])
                    rslt = rslt[1]
                    for r in rslt:
                        nfinished += 1
                        if options.verbose > 0:
                            print("Particle:", r["id"], "\tnodes:",
                                  r["choice"])
                        for c in r["choice"]:
                            ptclpernode[c] += 1
                        clsmx[0].set_value_at(0, r["id"], r["cls"])
                        for nt in range(1, 7):
                            clsmx[nt].set_value_at(0, r["id"], r["simmx"][nt])

            taskids = [j for i, j in enumerate(taskids) if curstat[i] != 100]
            if haveprogress: print("{:d}/{:d} finished".format(nfinished, npt))
            E2progress(E2n, 0.5 + old_div(float(nfinished), npt))

        for i in range(nnod):
            ndtmp = EMData(options.nodes, i, True)
            ndtmp["tree_nptls"] = ptclpernode[i]
            ndtmp.write_image(options.nodes, i)

    else:

        ### To record the number of particles in each branch of the tree
        for i in range(nnod):
            ndtmp = EMData(options.nodes, i, True)
            ndtmp["tree_nptls"] = 0
            ndtmp.write_image(options.nodes, i)
        t = {}
        clsmx = [EMData(1, npt) for i in range(7)]
        for i in range(options.threads):
            ai = [x for x in range(npt) if x % options.threads == i]
            t[i] = threading.Thread(target=classify,
                                    args=(ptcl, ai, options.nodes, clsmx,
                                          options.align, options.aligncmp,
                                          options.cmp, options.ralign,
                                          options.raligncmp, cmptmp, masktmp))
            t[i].start()
        for i in range(options.threads):
            t[i].join()

    if os.path.isfile(options.output):
        os.remove(options.output)
    for i in clsmx:
        i.write_image(options.output, -1)

    if options.cmpdiff:
        os.remove(cmptmp)
        os.remove(masktmp)
    print("Finished~")
    E2progress(E2n, 1.0)
    E2end(E2n)
Пример #22
0
def main():
	progname = os.path.basename(sys.argv[0])
	usage = """prog [options] stack1.hdf stack2.mrcs ...

	Program to erase gold fiducials and other high-density features from images, such as frames in DDD movies or images in tiltseries. Requires scipy.
	"""

	parser = EMArgumentParser(usage=usage,version=EMANVERSION)

	#parser.add_argument("--average", default=False, action="store_true", help="Erase gold from average of input stack(s).")
	parser.add_argument("--apix", default=None, type=float, help="Override Apix in image header.")
	parser.add_argument("--lowpass", default=False, action="store_true", help="Also lowpass filter noise based on local properties. Useful for processing tomographic tilt series.")
	parser.add_argument("--keepdust", default=False, action="store_true", help="Do not remove 'dust' from mask (include objects smaller than gold fiducials).")
	parser.add_argument("--goldsize", default=30, type=float, help="Diameter (in pixels) of gold fiducials to erase.")
	#parser.add_argument("--downsample", default=1.0, type=float, help="Downsample the input stack(s). Default is 1, i.e. no downsampling.")
	parser.add_argument("--oversample", default=4, type=int, help="Oversample noise image to smooth transitions from regions with different noise.")
	parser.add_argument("--boxsize", default=128, type=int, help="Box size to use when computing local noise.")
	parser.add_argument("--debug", default=False, action="store_true", help="Save noise and mask/masked image(s).")
	parser.add_argument("--verbose", "-v", dest="verbose", action="store", metavar="n", type=int, default=0, help="verbose level [0-9], higner number means higher level of verboseness")
	parser.add_argument("--ppid", type=int, help="Set the PID of the parent process, used for cross platform PPID",default=-2)
	parser.add_argument("--parallel",type=str, default=None, help="""Default=None (not used). Parallelism. See http://blake.bcm.edu/emanwiki/EMAN2/Parallel""")

	parser.add_argument("--subset", default=0, type=int, help="Default=0 (not used). Apply algorithm to only a subset of images in each stack file.")
	parser.add_argument("--nsigmas", default=3.0,type=float, help="Default=3.0. Number of standard deviations above the mean to determine pixels to mask out (erase).")


	(options, args) = parser.parse_args()

	nfiles = len(args)

	if options.parallel:
		from EMAN2PAR import EMTaskCustomer
		etc=EMTaskCustomer(options.parallel)

	for argnum,arg in enumerate(args):

		t0 = time.time()

		newarg=''
		originalarg = arg

		hdr = EMData(arg,0,True) #load header only to get parameters used below
		if options.apix: apix = options.apix
		else: apix = hdr['apix_x']
		nx=hdr['nx']
		ny=hdr['ny']

		if '.ali' == arg[-4:] or '.mrc' == arg[-4:]:

			#Unfortunately, e2proc2d.py appends to existing files instead of overwriting them. If you run this program two consecutive times and the first one failed for whatever reason,
			#you'll find your stack growing.
			#To prevent this, we create a 'dummy' file, but first remove any dummy files from previous failed runs. (If the program runs successfully to the end, the dummy file gets renamed).
			try: os.remove('dummy_stack.hdf')
			except: pass

			#turn .ali or .mrc 3D images into a stack of 2D images that can be processed by this program.
			cmd = 'e2proc2d.py ' + arg + ' dummy_stack.hdf --threed2twod'
			if options.subset:
				cmd += ' --first 0 --last ' + str(options.subset-1)

			runcmd(options,cmd)

			#make the new stack of 2D images (dummy_stack.hdf) the new input (the name of the input file but with .hdf format); this intermediate file will be deleted in the end.
			newarg = arg.replace(arg[-4:],'.hdf')
			os.rename('dummy_stack.hdf',newarg)
			arg = newarg

		outf = "{}_proc.hdf".format( os.path.splitext(arg)[0] )
		if os.path.isfile(outf):
			print("Results are already stored in {}. Please erase or move and try again.".format(outf))
			sys.exit(1)

		nfs = EMUtil.get_image_count(arg)

		tasks=[]
		results=[]
		results=None

		#parallelized tasks don't run "in order"; therefore, a dummy stack needs to be pre-created with as many images as the final stack will have
		#(otherwise, writing output images to stack indexes randomly makes the program crash or produces garbage output)
		dummy=EMData(8,8)
		dummy.to_one()
		dummy['apix_x']=apix
		dummy['apix_y']=apix
		for j in range(nfs):
			dummy.write_image(outf,j)

		#EMAN2 does not allow stacks of images with different size; this, and possibly some bug, prevent images written from the parallelization task from
		#having the corret size if the pre-created dummy doesn't have the correct size to begin with. No point in writing big images for the dummy from the start.
		#re-writing the index=0 image will change the size of all images in the stack to the correct size
		dummy_correct_size = EMData(nx,ny)
		dummy_correct_size.to_one()
		dummy_correct_size['apix_x']=apix
		dummy_correct_size['apix_y']=apix
		dummy.write_image(outf,0)

		for i in range(nfs):
			if options.verbose:
				sys.stdout.write("\rstaging images ({}/{})".format(i+1,nfs))
				sys.stdout.flush()

			if options.parallel:
				#print "parallelism started"
				task = EraseGold2DTask( options, arg, i, outf)
				tasks.append(task)
			else:
				results=fiximage( options, arg, i, outf)

		if options.parallel:
			if tasks:
				tids = etc.send_tasks(tasks)
				if options.verbose:
					print "\n(erase_gold) %d tasks queued" % (len(tids))

				results = get_results( etc, tids, options )

		#if results:
		#	#pass
		#
		#	if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
		#		#intermediate = arg.replace('.hdf','.mrcs')
		#		finaloutput = arg.replace('.hdf',originalarg[-4:])
		#		cmd = 'e2proc2d.py ' + arg + ' ' + finaloutput + ' --twod2threed --outmode int16'
		#		runcmd(options,cmd)
		#		os.remove(arg)
		#
		#	if newarg: os.remove(newarg)

		if results:
			#pass

			if options.parallel:
				#outfstem = outf.replace('.hdf','')
				cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_proc.hdf --stackname ' + outf
				runcmd(options,cmdbuildstack)

				if options.debug:
					outfmasked = outf.replace('.hdf','_masked.hdf')
					cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_masked.hdf --stackname ' + outfmasked
					runcmd(options,cmdbuildstack)

					outfnoise= outf.replace('.hdf','_noise.hdf')
					cmdbuildstack = 'e2buildstacks.py erasegold_tmp-*_noise.hdf --stackname ' + outfnoise
					runcmd(options,cmdbuildstack)

			if '.ali' == originalarg[-4:] or '.mrc' == originalarg[-4:]:
				#intermediate = arg.replace('.hdf','.mrcs')
				finaloutput = outf.replace('.hdf',originalarg[-4:])
				cmd = 'e2proc2d.py ' + outf + ' ' + finaloutput + ' --twod2threed --outmode int16'
				
				#print "\ncomand to generate finaloutput",cmd
				runcmd(options,cmd)
				os.remove(arg)

			if newarg: 
				try:
					os.remove(newarg)
				except:
					try:
						#print "would have removed",newarg.replace('.hdf','_proc.hdf')
						os.remove(newarg.replace('.hdf','_proc.hdf'))
					except:
						pass
		try:
			filelist = [ tmpf for tmpf in os.listdir(".") if 'erasegold_tmp' in tmpf ]
			for tf in filelist:
			    os.remove(tf)
		except:
			print "WARNING: cleanup failed."


		dt = time.time() - t0
		if options.verbose:
			print("\n")
			sys.stdout.write("Erased fiducials from {} ({} minutes)\n".format(arg,round(dt/60.,2)))
	return