def main(): from sp_logger import Logger, BaseLogger_Files arglist = [] i = 0 while (i < len(sys.argv)): if sys.argv[i] == '-p4pg': i = i + 2 elif sys.argv[i] == '-p4wd': i = i + 2 else: arglist.append(sys.argv[i]) i = i + 1 progname = os.path.basename(arglist[0]) usage = progname + " stack outdir <mask> --focus=3Dmask --radius=outer_radius --delta=angular_step" +\ "--an=angular_neighborhood --maxit=max_iter --CTF --sym=c1 --function=user_function --independent=indenpendent_runs --number_of_images_per_group=number_of_images_per_group --low_pass_filter=.25 --seed=random_seed" parser = OptionParser(usage, version=SPARXVERSION) parser.add_option("--focus", type="string", default='', help="bineary 3D mask for focused clustering ") parser.add_option( "--ir", type="int", default=1, help="inner radius for rotational correlation > 0 (set to 1)") parser.add_option( "--radius", type="int", default=-1, help= "particle radius in pixel for rotational correlation <nx-1 (set to the radius of the particle)" ) parser.add_option("--maxit", type="int", default=25, help="maximum number of iteration") parser.add_option( "--rs", type="int", default=1, help="step between rings in rotational correlation >0 (set to 1)") parser.add_option( "--xr", type="string", default='1', help="range for translation search in x direction, search is +/-xr ") parser.add_option( "--yr", type="string", default='-1', help= "range for translation search in y direction, search is +/-yr (default = same as xr)" ) parser.add_option( "--ts", type="string", default='0.25', help= "step size of the translation search in both directions direction, search is -xr, -xr+ts, 0, xr-ts, xr " ) parser.add_option("--delta", type="string", default='2', help="angular step of reference projections") parser.add_option("--an", type="string", default='-1', help="angular neighborhood for local searches") parser.add_option( "--center", type="int", default=0, help= "0 - if you do not want the volume to be centered, 1 - center the volume using cog (default=0)" ) parser.add_option( "--nassign", type="int", default=1, help= "number of reassignment iterations performed for each angular step (set to 3) " ) parser.add_option( "--nrefine", type="int", default=0, help= "number of alignment iterations performed for each angular step (set to 0)" ) parser.add_option("--CTF", action="store_true", default=False, help="do CTF correction during clustring") parser.add_option( "--stoprnct", type="float", default=3.0, help="Minimum percentage of assignment change to stop the program") parser.add_option("--sym", type="string", default='c1', help="symmetry of the structure ") parser.add_option("--function", type="string", default='do_volume_mrk05', help="name of the reference preparation function") parser.add_option("--independent", type="int", default=3, help="number of independent run") parser.add_option("--number_of_images_per_group", type="int", default=1000, help="number of groups") parser.add_option( "--low_pass_filter", type="float", default=-1.0, help= "absolute frequency of low-pass filter for 3d sorting on the original image size" ) parser.add_option("--nxinit", type="int", default=64, help="initial image size for sorting") parser.add_option("--unaccounted", action="store_true", default=False, help="reconstruct the unaccounted images") parser.add_option( "--seed", type="int", default=-1, help="random seed for create initial random assignment for EQ Kmeans") parser.add_option("--smallest_group", type="int", default=500, help="minimum members for identified group") parser.add_option("--sausage", action="store_true", default=False, help="way of filter volume") parser.add_option("--chunk0", type="string", default='', help="chunk0 for computing margin of error") parser.add_option("--chunk1", type="string", default='', help="chunk1 for computing margin of error") parser.add_option( "--PWadjustment", type="string", default='', help= "1-D power spectrum of PDB file used for EM volume power spectrum correction" ) parser.add_option( "--protein_shape", type="string", default='g', help= "protein shape. It defines protein preferred orientation angles. Currently it has g and f two types " ) parser.add_option( "--upscale", type="float", default=0.5, help=" scaling parameter to adjust the power spectrum of EM volumes") parser.add_option("--wn", type="int", default=0, help="optimal window size for data processing") parser.add_option( "--interpolation", type="string", default="4nn", help="3-d reconstruction interpolation method, two options trl and 4nn" ) (options, args) = parser.parse_args(arglist[1:]) if len(args) < 1 or len(args) > 4: sxprint("Usage: " + usage) sxprint("Please run \'" + progname + " -h\' for detailed options") ERROR( "Invalid number of parameters used. Please see usage information above." ) return else: if len(args) > 2: mask_file = args[2] else: mask_file = None orgstack = args[0] masterdir = args[1] sp_global_def.BATCH = True #---initialize MPI related variables nproc = mpi.mpi_comm_size(mpi.MPI_COMM_WORLD) myid = mpi.mpi_comm_rank(mpi.MPI_COMM_WORLD) mpi_comm = mpi.MPI_COMM_WORLD main_node = 0 # import some utilities from sp_utilities import get_im, bcast_number_to_all, cmdexecute, write_text_file, read_text_file, wrap_mpi_bcast, get_params_proj, write_text_row from sp_applications import recons3d_n_MPI, mref_ali3d_MPI, Kmref_ali3d_MPI from sp_statistics import k_means_match_clusters_asg_new, k_means_stab_bbenum from sp_applications import mref_ali3d_EQ_Kmeans, ali3d_mref_Kmeans_MPI # Create the main log file from sp_logger import Logger, BaseLogger_Files if myid == main_node: log_main = Logger(BaseLogger_Files()) log_main.prefix = masterdir + "/" else: log_main = None #--- fill input parameters into dictionary named after Constants Constants = {} Constants["stack"] = args[0] Constants["masterdir"] = masterdir Constants["mask3D"] = mask_file Constants["focus3Dmask"] = options.focus Constants["indep_runs"] = options.independent Constants["stoprnct"] = options.stoprnct Constants[ "number_of_images_per_group"] = options.number_of_images_per_group Constants["CTF"] = options.CTF Constants["maxit"] = options.maxit Constants["ir"] = options.ir Constants["radius"] = options.radius Constants["nassign"] = options.nassign Constants["rs"] = options.rs Constants["xr"] = options.xr Constants["yr"] = options.yr Constants["ts"] = options.ts Constants["delta"] = options.delta Constants["an"] = options.an Constants["sym"] = options.sym Constants["center"] = options.center Constants["nrefine"] = options.nrefine #Constants["fourvar"] = options.fourvar Constants["user_func"] = options.function Constants[ "low_pass_filter"] = options.low_pass_filter # enforced low_pass_filter #Constants["debug"] = options.debug Constants["main_log_prefix"] = args[1] #Constants["importali3d"] = options.importali3d Constants["myid"] = myid Constants["main_node"] = main_node Constants["nproc"] = nproc Constants["log_main"] = log_main Constants["nxinit"] = options.nxinit Constants["unaccounted"] = options.unaccounted Constants["seed"] = options.seed Constants["smallest_group"] = options.smallest_group Constants["sausage"] = options.sausage Constants["chunk0"] = options.chunk0 Constants["chunk1"] = options.chunk1 Constants["PWadjustment"] = options.PWadjustment Constants["upscale"] = options.upscale Constants["wn"] = options.wn Constants["3d-interpolation"] = options.interpolation Constants["protein_shape"] = options.protein_shape # ----------------------------------------------------- # # Create and initialize Tracker dictionary with input options Tracker = {} Tracker["constants"] = Constants Tracker["maxit"] = Tracker["constants"]["maxit"] Tracker["radius"] = Tracker["constants"]["radius"] #Tracker["xr"] = "" #Tracker["yr"] = "-1" # Do not change! #Tracker["ts"] = 1 #Tracker["an"] = "-1" #Tracker["delta"] = "2.0" #Tracker["zoom"] = True #Tracker["nsoft"] = 0 #Tracker["local"] = False #Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] Tracker["upscale"] = Tracker["constants"]["upscale"] #Tracker["upscale"] = 0.5 Tracker[ "applyctf"] = False # Should the data be premultiplied by the CTF. Set to False for local continuous. #Tracker["refvol"] = None Tracker["nxinit"] = Tracker["constants"]["nxinit"] #Tracker["nxstep"] = 32 Tracker["icurrentres"] = -1 #Tracker["ireachedres"] = -1 #Tracker["lowpass"] = 0.4 #Tracker["falloff"] = 0.2 #Tracker["inires"] = options.inires # Now in A, convert to absolute before using Tracker["fuse_freq"] = 50 # Now in A, convert to absolute before using #Tracker["delpreviousmax"] = False #Tracker["anger"] = -1.0 #Tracker["shifter"] = -1.0 #Tracker["saturatecrit"] = 0.95 #Tracker["pixercutoff"] = 2.0 #Tracker["directory"] = "" #Tracker["previousoutputdir"] = "" #Tracker["eliminated-outliers"] = False #Tracker["mainiteration"] = 0 #Tracker["movedback"] = False #Tracker["state"] = Tracker["constants"]["states"][0] #Tracker["global_resolution"] =0.0 Tracker["orgstack"] = orgstack #-------------------------------------------------------------------- # import from utilities from sp_utilities import sample_down_1D_curve, get_initial_ID, remove_small_groups, print_upper_triangular_matrix, print_a_line_with_timestamp from sp_utilities import print_dict, get_resolution_mrk01, partition_to_groups, partition_independent_runs, get_outliers from sp_utilities import merge_groups, save_alist, margin_of_error, get_margin_of_error, do_two_way_comparison, select_two_runs, get_ali3d_params from sp_utilities import counting_projections, unload_dict, load_dict, get_stat_proj, create_random_list, get_number_of_groups, recons_mref from sp_utilities import apply_low_pass_filter, get_groups_from_partition, get_number_of_groups, get_complementary_elements_total, update_full_dict from sp_utilities import count_chunk_members, set_filter_parameters_from_adjusted_fsc, get_two_chunks_from_stack ####------------------------------------------------------------------ # # Get the pixel size; if none, set to 1.0, and the original image size from sp_utilities import get_shrink_data_huang if (myid == main_node): line = strftime("%Y-%m-%d_%H:%M:%S", localtime()) + " =>" sxprint((line + "Initialization of 3-D sorting")) a = get_im(orgstack) nnxo = a.get_xsize() if (Tracker["nxinit"] > nnxo): sp_global_def.ERROR( "Image size less than minimum permitted $d" % Tracker["nxinit"]) nnxo = -1 else: if Tracker["constants"]["CTF"]: i = a.get_attr('ctf') pixel_size = i.apix fq = pixel_size / Tracker["fuse_freq"] else: pixel_size = 1.0 # No pixel size, fusing computed as 5 Fourier pixels fq = 5.0 / nnxo del a else: nnxo = 0 fq = 0.0 pixel_size = 1.0 nnxo = bcast_number_to_all(nnxo, source_node=main_node) if (nnxo < 0): return pixel_size = bcast_number_to_all(pixel_size, source_node=main_node) fq = bcast_number_to_all(fq, source_node=main_node) if Tracker["constants"]["wn"] == 0: Tracker["constants"]["nnxo"] = nnxo else: Tracker["constants"]["nnxo"] = Tracker["constants"]["wn"] nnxo = Tracker["constants"]["nnxo"] Tracker["constants"]["pixel_size"] = pixel_size Tracker["fuse_freq"] = fq del fq, nnxo, pixel_size if (Tracker["constants"]["radius"] < 1): Tracker["constants"][ "radius"] = Tracker["constants"]["nnxo"] // 2 - 2 elif ((2 * Tracker["constants"]["radius"] + 2) > Tracker["constants"]["nnxo"]): sp_global_def.ERROR("Particle radius set too large!", myid=myid) ####----------------------------------------------------------------------------------------- # Master directory if myid == main_node: if masterdir == "": timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir = "master_sort3d" + timestring li = len(masterdir) cmd = "{} {}".format("mkdir -p", masterdir) os.system(cmd) else: li = 0 li = mpi.mpi_bcast(li, 1, mpi.MPI_INT, main_node, mpi.MPI_COMM_WORLD)[0] if li > 0: masterdir = mpi.mpi_bcast(masterdir, li, mpi.MPI_CHAR, main_node, mpi.MPI_COMM_WORLD) import string masterdir = string.join(masterdir, "") if myid == main_node: print_dict(Tracker["constants"], "Permanent settings of 3-D sorting program") ######### create a vstack from input stack to the local stack in masterdir # stack name set to default Tracker["constants"]["stack"] = "bdb:" + masterdir + "/rdata" Tracker["constants"]["ali3d"] = os.path.join(masterdir, "ali3d_init.txt") Tracker["constants"]["ctf_params"] = os.path.join( masterdir, "ctf_params.txt") Tracker["constants"]["partstack"] = Tracker["constants"][ "ali3d"] # also serves for refinement if myid == main_node: total_stack = EMUtil.get_image_count(Tracker["orgstack"]) else: total_stack = 0 total_stack = bcast_number_to_all(total_stack, source_node=main_node) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) from time import sleep while not os.path.exists(masterdir): sxprint("Node ", myid, " waiting...") sleep(5) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) if myid == main_node: log_main.add("Sphire sort3d ") log_main.add("the sort3d master directory is " + masterdir) ##### ###---------------------------------------------------------------------------------- # Initial data analysis and handle two chunk files from random import shuffle # Compute the resolution #### make chunkdir dictionary for computing margin of error import sp_user_functions user_func = sp_user_functions.factory[Tracker["constants"] ["user_func"]] chunk_dict = {} chunk_list = [] if myid == main_node: chunk_one = read_text_file(Tracker["constants"]["chunk0"]) chunk_two = read_text_file(Tracker["constants"]["chunk1"]) else: chunk_one = 0 chunk_two = 0 chunk_one = wrap_mpi_bcast(chunk_one, main_node) chunk_two = wrap_mpi_bcast(chunk_two, main_node) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) ######################## Read/write bdb: data on main node ############################ if myid == main_node: if (orgstack[:4] == "bdb:"): cmd = "{} {} {}".format( "e2bdb.py", orgstack, "--makevstack=" + Tracker["constants"]["stack"]) else: cmd = "{} {} {}".format("sp_cpy.py", orgstack, Tracker["constants"]["stack"]) junk = cmdexecute(cmd) cmd = "{} {} {}".format( "sp_header.py --params=xform.projection", "--export=" + Tracker["constants"]["ali3d"], orgstack) junk = cmdexecute(cmd) cmd = "{} {} {}".format( "sp_header.py --params=ctf", "--export=" + Tracker["constants"]["ctf_params"], orgstack) junk = cmdexecute(cmd) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) ########----------------------------------------------------------------------------- Tracker["total_stack"] = total_stack Tracker["constants"]["total_stack"] = total_stack Tracker["shrinkage"] = float( Tracker["nxinit"]) / Tracker["constants"]["nnxo"] Tracker[ "radius"] = Tracker["constants"]["radius"] * Tracker["shrinkage"] if Tracker["constants"]["mask3D"]: Tracker["mask3D"] = os.path.join(masterdir, "smask.hdf") else: Tracker["mask3D"] = None if Tracker["constants"]["focus3Dmask"]: Tracker["focus3D"] = os.path.join(masterdir, "sfocus.hdf") else: Tracker["focus3D"] = None if myid == main_node: if Tracker["constants"]["mask3D"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"], Tracker["constants"]["mask3D"]) mask_3D.write_image(Tracker["mask3D"]) if Tracker["constants"]["focus3Dmask"]: mask_3D = get_shrink_3dmask( Tracker["nxinit"], Tracker["constants"]["focus3Dmask"]) st = Util.infomask(mask_3D, None, True) if (st[0] == 0.0): ERROR( "Incorrect focused mask, after binarize all values zero" ) mask_3D.write_image(Tracker["focus3D"]) del mask_3D if Tracker["constants"]["PWadjustment"] != '': PW_dict = {} nxinit_pwsp = sample_down_1D_curve( Tracker["constants"]["nxinit"], Tracker["constants"]["nnxo"], Tracker["constants"]["PWadjustment"]) Tracker["nxinit_PW"] = os.path.join(masterdir, "spwp.txt") if myid == main_node: write_text_file(nxinit_pwsp, Tracker["nxinit_PW"]) PW_dict[Tracker["constants"] ["nnxo"]] = Tracker["constants"]["PWadjustment"] PW_dict[Tracker["constants"]["nxinit"]] = Tracker["nxinit_PW"] Tracker["PW_dict"] = PW_dict mpi.mpi_barrier(mpi.MPI_COMM_WORLD) #-----------------------From two chunks to FSC, and low pass filter-----------------------------------------### for element in chunk_one: chunk_dict[element] = 0 for element in chunk_two: chunk_dict[element] = 1 chunk_list = [chunk_one, chunk_two] Tracker["chunk_dict"] = chunk_dict Tracker["P_chunk0"] = len(chunk_one) / float(total_stack) Tracker["P_chunk1"] = len(chunk_two) / float(total_stack) ### create two volumes to estimate resolution if myid == main_node: for index in range(2): write_text_file( chunk_list[index], os.path.join(masterdir, "chunk%01d.txt" % index)) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) vols = [] for index in range(2): data, old_shifts = get_shrink_data_huang( Tracker, Tracker["constants"]["nxinit"], os.path.join(masterdir, "chunk%01d.txt" % index), Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) vol = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) if myid == main_node: vol.write_image(os.path.join(masterdir, "vol%d.hdf" % index)) vols.append(vol) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) if myid == main_node: low_pass, falloff, currentres = get_resolution_mrk01( vols, Tracker["constants"]["radius"], Tracker["constants"]["nxinit"], masterdir, Tracker["mask3D"]) if low_pass > Tracker["constants"]["low_pass_filter"]: low_pass = Tracker["constants"]["low_pass_filter"] else: low_pass = 0.0 falloff = 0.0 currentres = 0.0 bcast_number_to_all(currentres, source_node=main_node) bcast_number_to_all(low_pass, source_node=main_node) bcast_number_to_all(falloff, source_node=main_node) Tracker["currentres"] = currentres Tracker["falloff"] = falloff if Tracker["constants"]["low_pass_filter"] == -1.0: Tracker["low_pass_filter"] = min( .45, low_pass / Tracker["shrinkage"]) # no better than .45 else: Tracker["low_pass_filter"] = min( .45, Tracker["constants"]["low_pass_filter"] / Tracker["shrinkage"]) Tracker["lowpass"] = Tracker["low_pass_filter"] Tracker["falloff"] = .1 Tracker["global_fsc"] = os.path.join(masterdir, "fsc.txt") ############################################################################################ if myid == main_node: log_main.add("The command-line inputs are as following:") log_main.add( "**********************************************************") for a in sys.argv: if myid == main_node: log_main.add(a) if myid == main_node: log_main.add("number of cpus used in this run is %d" % Tracker["constants"]["nproc"]) log_main.add( "**********************************************************") from sp_filter import filt_tanl ### START 3-D sorting if myid == main_node: log_main.add("----------3-D sorting program------- ") log_main.add( "current resolution %6.3f for images of original size in terms of absolute frequency" % Tracker["currentres"]) log_main.add("equivalent to %f Angstrom resolution" % (Tracker["constants"]["pixel_size"] / Tracker["currentres"] / Tracker["shrinkage"])) log_main.add("the user provided enforced low_pass_filter is %f" % Tracker["constants"]["low_pass_filter"]) #log_main.add("equivalent to %f Angstrom resolution"%(Tracker["constants"]["pixel_size"]/Tracker["constants"]["low_pass_filter"])) for index in range(2): filt_tanl( get_im(os.path.join(masterdir, "vol%01d.hdf" % index)), Tracker["low_pass_filter"], Tracker["falloff"]).write_image( os.path.join(masterdir, "volf%01d.hdf" % index)) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) from sp_utilities import get_input_from_string delta = get_input_from_string(Tracker["constants"]["delta"]) delta = delta[0] from sp_utilities import even_angles n_angles = even_angles(delta, 0, 180) this_ali3d = Tracker["constants"]["ali3d"] sampled = get_stat_proj(Tracker, delta, this_ali3d) if myid == main_node: nc = 0 for a in sampled: if len(sampled[a]) > 0: nc += 1 log_main.add("total sampled direction %10d at angle step %6.3f" % (len(n_angles), delta)) log_main.add( "captured sampled directions %10d percentage covered by data %6.3f" % (nc, float(nc) / len(n_angles) * 100)) number_of_images_per_group = Tracker["constants"][ "number_of_images_per_group"] if myid == main_node: log_main.add("user provided number_of_images_per_group %d" % number_of_images_per_group) Tracker["number_of_images_per_group"] = number_of_images_per_group number_of_groups = get_number_of_groups(total_stack, number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups generation = 0 partition_dict = {} full_dict = {} workdir = os.path.join(masterdir, "generation%03d" % generation) Tracker["this_dir"] = workdir if myid == main_node: log_main.add("---- generation %5d" % generation) log_main.add("number of images per group is set as %d" % number_of_images_per_group) log_main.add("the initial number of groups is %10d " % number_of_groups) cmd = "{} {}".format("mkdir", workdir) os.system(cmd) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) list_to_be_processed = list(range(Tracker["constants"]["total_stack"])) Tracker["this_data_list"] = list_to_be_processed create_random_list(Tracker) ################################# full_dict = {} for iptl in range(Tracker["constants"]["total_stack"]): full_dict[iptl] = iptl Tracker["full_ID_dict"] = full_dict ################################# for indep_run in range(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][ indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d" % indep_run) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) Tracker["this_data_list"] = list_to_be_processed Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_particle_text_file"] = os.path.join( workdir, "independent_list_%03d.txt" % indep_run) # for get_shrink_data if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_particle_text_file"]) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) outdir = os.path.join(workdir, "EQ_Kmeans%03d" % indep_run) ref_vol = apply_low_pass_filter(ref_vol, Tracker) mref_ali3d_EQ_Kmeans(ref_vol, outdir, Tracker["this_particle_text_file"], Tracker) partition_dict[indep_run] = Tracker["this_partition"] Tracker["partition_dict"] = partition_dict Tracker["total_stack"] = len(Tracker["this_data_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] from time import sleep number_of_ref_class = [] for igrp in range(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] Tracker["this_data_list_file"] = os.path.join( workdir, "stable_class%d.txt" % igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) data, old_shifts = get_shrink_data_huang( Tracker, Tracker["nxinit"], Tracker["this_data_list_file"], Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) if myid == main_node: log_main.add("group %d members %d " % (igrp, len(Tracker["this_data_list"]))) Tracker["number_of_ref_class"] = number_of_ref_class nx_of_image = ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"][ "PWadjustment"] # no PW adjustment if myid == main_node: for iref in range(len(ref_vol_list)): refdata = [None] * 4 refdata[0] = ref_vol_list[iref] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image(os.path.join(workdir, "volf_stable.hdf"), iref) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir, "Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI( ref_vol_list, outdir, Tracker["this_accounted_text"], Tracker) Tracker["this_unaccounted_list"] = get_complementary_elements( list_to_be_processed, final_list) if myid == main_node: log_main.add("the number of particles not processed is %d" % len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"], Tracker["this_unaccounted_text"]) update_full_dict(Tracker["this_unaccounted_list"], Tracker) ####################################### number_of_groups = len(res_groups) vol_list = [] number_of_ref_class = [] for igrp in range(number_of_groups): data, old_shifts = get_shrink_data_huang( Tracker, Tracker["constants"]["nnxo"], os.path.join(outdir, "Class%d.txt" % igrp), Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) vol_list.append(volref) if (myid == main_node): npergroup = len( read_text_file(os.path.join(outdir, "Class%d.txt" % igrp))) else: npergroup = 0 npergroup = bcast_number_to_all(npergroup, main_node) number_of_ref_class.append(npergroup) Tracker["number_of_ref_class"] = number_of_ref_class mpi.mpi_barrier(mpi.MPI_COMM_WORLD) nx_of_image = vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in range(len(vol_list)): refdata = [None] * 4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image( os.path.join(workdir, "volf_of_Classes.hdf"), ivol) log_main.add("number of unaccounted particles %10d" % len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d" % len(Tracker["this_accounted_list"])) Tracker["this_data_list"] = Tracker[ "this_unaccounted_list"] # reset parameters for the next round calculation Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] number_of_groups = get_number_of_groups( len(Tracker["this_unaccounted_list"]), number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups while number_of_groups >= 2: generation += 1 partition_dict = {} workdir = os.path.join(masterdir, "generation%03d" % generation) Tracker["this_dir"] = workdir if myid == main_node: log_main.add("*********************************************") log_main.add("----- generation %5d " % generation) log_main.add("number of images per group is set as %10d " % number_of_images_per_group) log_main.add("the number of groups is %10d " % number_of_groups) log_main.add(" number of particles for clustering is %10d" % Tracker["total_stack"]) cmd = "{} {}".format("mkdir", workdir) os.system(cmd) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) create_random_list(Tracker) for indep_run in range(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][ indep_run] ref_vol = recons_mref(Tracker) if myid == main_node: log_main.add("independent run %10d" % indep_run) outdir = os.path.join(workdir, "EQ_Kmeans%03d" % indep_run) Tracker["this_data_list"] = Tracker["this_unaccounted_list"] #ref_vol=apply_low_pass_filter(ref_vol,Tracker) mref_ali3d_EQ_Kmeans(ref_vol, outdir, Tracker["this_unaccounted_text"], Tracker) partition_dict[indep_run] = Tracker["this_partition"] Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["partition_dict"] = partition_dict Tracker["this_total_stack"] = Tracker["total_stack"] total_list_of_this_run = Tracker["this_unaccounted_list"] ############################### do_two_way_comparison(Tracker) ############################### ref_vol_list = [] number_of_ref_class = [] for igrp in range(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][ igrp] Tracker["this_data_list_file"] = os.path.join( workdir, "stable_class%d.txt" % igrp) if myid == main_node: write_text_file(Tracker["this_data_list"], Tracker["this_data_list_file"]) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) data, old_shifts = get_shrink_data_huang( Tracker, Tracker["constants"]["nxinit"], Tracker["this_data_list_file"], Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) volref = recons3d_4nn_ctf_MPI( myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) if myid == main_node: volref.write_image(os.path.join(workdir, "vol_stable.hdf"), iref) #volref = resample(volref,Tracker["shrinkage"]) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) Tracker["number_of_ref_class"] = number_of_ref_class Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir, "Kmref") empty_group, res_groups, final_list = ali3d_mref_Kmeans_MPI( ref_vol_list, outdir, Tracker["this_accounted_text"], Tracker) # calculate the 3-D structure of original image size for each group number_of_groups = len(res_groups) Tracker["this_unaccounted_list"] = get_complementary_elements( total_list_of_this_run, final_list) if myid == main_node: log_main.add("the number of particles not processed is %d" % len(Tracker["this_unaccounted_list"])) write_text_file(Tracker["this_unaccounted_list"], Tracker["this_unaccounted_text"]) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) update_full_dict(Tracker["this_unaccounted_list"], Tracker) vol_list = [] for igrp in range(number_of_groups): data, old_shifts = get_shrink_data_huang( Tracker, Tracker["constants"]["nnxo"], os.path.join(outdir, "Class%d.txt" % igrp), Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) volref = recons3d_4nn_ctf_MPI( myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) vol_list.append(volref) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) nx_of_image = ref_vol_list[0].get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if myid == main_node: for ivol in range(len(vol_list)): refdata = [None] * 4 refdata[0] = vol_list[ivol] refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) volref.write_image( os.path.join(workdir, "volf_of_Classes.hdf"), ivol) log_main.add("number of unaccounted particles %10d" % len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d" % len(Tracker["this_accounted_list"])) del vol_list mpi.mpi_barrier(mpi.MPI_COMM_WORLD) number_of_groups = get_number_of_groups( len(Tracker["this_unaccounted_list"]), number_of_images_per_group) Tracker["number_of_groups"] = number_of_groups Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) if Tracker["constants"]["unaccounted"]: data, old_shifts = get_shrink_data_huang( Tracker, Tracker["constants"]["nnxo"], Tracker["this_unaccounted_text"], Tracker["constants"]["partstack"], myid, main_node, nproc, preshift=True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data, symmetry=Tracker["constants"]["sym"], finfo=None) nx_of_image = volref.get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if (myid == main_node): refdata = [None] * 4 refdata[0] = volref refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) #volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) volref.write_image( os.path.join(workdir, "volf_unaccounted.hdf")) # Finish program if myid == main_node: log_main.add("sxsort3d finishes") mpi.mpi_barrier(mpi.MPI_COMM_WORLD) return
def main(): from time import sleep from sp_logger import Logger, BaseLogger_Files arglist = [] i = 0 while( i < len(sys.argv) ): if sys.argv[i]=='-p4pg': i = i+2 elif sys.argv[i]=='-p4wd': i = i+2 else: arglist.append( sys.argv[i] ) i = i+1 progname = os.path.basename(arglist[0]) usage = progname + " stack outdir <mask> --focus=3Dmask --radius=outer_radius --delta=angular_step" +\ "--an=angular_neighborhood --maxit=max_iter --CTF --sym=c1 --function=user_function --independent=indenpendent_runs --number_of_images_per_group=number_of_images_per_group --low_pass_filter=.25 --seed=random_seed" parser = OptionParser(usage,version=SPARXVERSION) parser.add_option("--focus", type="string", default=None, help="3D mask for focused clustering ") parser.add_option("--ir", type= "int", default= 1, help="inner radius for rotational correlation > 0 (set to 1)") parser.add_option("--radius", type= "int", default=-1, help="outer radius for rotational correlation <nx-1 (set to the radius of the particle)") parser.add_option("--maxit", type= "int", default=50, help="maximum number of iteration") parser.add_option("--rs", type= "int", default=1, help="step between rings in rotational correlation >0 (set to 1)" ) parser.add_option("--xr", type="string", default='1', help="range for translation search in x direction, search is +/-xr ") parser.add_option("--yr", type="string", default='-1', help="range for translation search in y direction, search is +/-yr (default = same as xr)") parser.add_option("--ts", type="string", default='0.25', help="step size of the translation search in both directions direction, search is -xr, -xr+ts, 0, xr-ts, xr ") parser.add_option("--delta", type="string", default='2', help="angular step of reference projections") parser.add_option("--an", type="string", default='-1', help="angular neighborhood for local searches") parser.add_option("--center", type="int", default=0, help="0 - if you do not want the volume to be centered, 1 - center the volume using cog (default=0)") parser.add_option("--nassign", type="int", default=1, help="number of reassignment iterations performed for each angular step (set to 3) ") parser.add_option("--nrefine", type="int", default=0, help="number of alignment iterations performed for each angular step (set to 1) ") parser.add_option("--CTF", action="store_true", default=False, help="Consider CTF correction during the alignment ") parser.add_option("--stoprnct", type="float", default=3.0, help="Minimum percentage of assignment change to stop the program") parser.add_option("--sym", type="string", default='c1', help="symmetry of the structure ") parser.add_option("--function", type="string", default='do_volume_mrk05', help="name of the reference preparation function") parser.add_option("--independent", type="int", default= 3, help="number of independent run") parser.add_option("--number_of_images_per_group", type='int', default=1000, help="number of images per groups") parser.add_option("--low_pass_filter", type="float", default=-1.0, help="absolute frequency of low-pass filter for 3d sorting on the original image size" ) parser.add_option("--nxinit", type="int", default=64, help="initial image size for sorting" ) parser.add_option("--unaccounted", action="store_true", default=False, help="reconstruct the unaccounted images") parser.add_option("--seed", type="int", default=-1, help="random seed for create initial random assignment for EQ Kmeans") parser.add_option("--smallest_group", type="int", default=500, help="minimum members for identified group" ) parser.add_option("--previous_run1", type="string", default='', help="two previous runs" ) parser.add_option("--previous_run2", type="string", default='', help="two previous runs" ) parser.add_option("--group_size_for_unaccounted", type="int", default=500, help="size for unaccounted particles" ) parser.add_option("--chunkdir", type="string", default='', help="chunkdir for computing margin of error") parser.add_option("--sausage", action="store_true", default=False, help="way of filter volume") parser.add_option("--PWadjustment", type="string", default='', help="1-D power spectrum of PDB file used for EM volume power spectrum correction") parser.add_option("--protein_shape", type="string", default='g', help="protein shape. It defines protein preferred orientation angles. Currently it has g and f two types ") parser.add_option("--upscale", type="float", default=0.5, help=" scaling parameter to adjust the power spectrum of EM volumes") parser.add_option("--wn", type="int", default=0, help="optimal window size for data processing") parser.add_option("--interpolation", type="string", default="4nn", help="3-d reconstruction interpolation method, two options, trl and 4nn") (options, args) = parser.parse_args(arglist[1:]) if len(args) < 1 or len(args) > 4: sxprint("Usage: " + usage) sxprint("Please run \'" + progname + " -h\' for detailed options") ERROR( "Invalid number of parameters used. Please see usage information above." ) return else: if len(args)>2: mask_file = args[2] else: mask_file = None orgstack =args[0] masterdir =args[1] sp_global_def.BATCH = True #---initialize MPI related variables nproc = mpi.mpi_comm_size( mpi.MPI_COMM_WORLD ) myid = mpi.mpi_comm_rank( mpi.MPI_COMM_WORLD ) mpi_comm = mpi.MPI_COMM_WORLD main_node = 0 # Create the main log file from sp_logger import Logger,BaseLogger_Files if myid ==main_node: log_main=Logger(BaseLogger_Files()) log_main.prefix=masterdir+"/" else: log_main = None #--- fill input parameters into dictionary named after Constants Constants ={} Constants["stack"] =args[0] Constants["masterdir"] =masterdir Constants["mask3D"] =mask_file Constants["focus3Dmask"] =options.focus Constants["indep_runs"] =options.independent Constants["stoprnct"] =options.stoprnct Constants["number_of_images_per_group"] =options.number_of_images_per_group Constants["CTF"] =options.CTF Constants["maxit"] =options.maxit Constants["ir"] =options.ir Constants["radius"] =options.radius Constants["nassign"] =options.nassign Constants["rs"] =options.rs Constants["xr"] =options.xr Constants["yr"] =options.yr Constants["ts"] =options.ts Constants["delta"] =options.delta Constants["an"] =options.an Constants["sym"] =options.sym Constants["center"] =options.center Constants["nrefine"] =options.nrefine Constants["user_func"] =options.function Constants["low_pass_filter"] =options.low_pass_filter # enforced low_pass_filter Constants["main_log_prefix"] =args[1] #Constants["importali3d"] =options.importali3d Constants["myid"] =myid Constants["main_node"] =main_node Constants["nproc"] =nproc Constants["log_main"] =log_main Constants["nxinit"] =options.nxinit Constants["unaccounted"] =options.unaccounted Constants["seed"] =options.seed Constants["smallest_group"] =options.smallest_group Constants["previous_runs"] =options.previous_run1+" "+options.previous_run2 Constants["sausage"] =options.sausage Constants["chunkdir"] =options.chunkdir Constants["PWadjustment"] =options.PWadjustment Constants["upscale"] =options.upscale Constants["wn"] =options.wn Constants["3d-interpolation"] =options.interpolation Constants["protein_shape"] =options.protein_shape #Constants["frequency_stop_search"] =options.frequency_stop_search #Constants["scale_of_number"] =options.scale_of_number # ------------------------------------------------------------- # # Create and initialize Tracker dictionary with input options Tracker = {} Tracker["constants"] = Constants Tracker["maxit"] = Tracker["constants"]["maxit"] Tracker["radius"] = Tracker["constants"]["radius"] #Tracker["xr"] = "" #Tracker["yr"] = "-1" # Do not change! #Tracker["ts"] = 1 #Tracker["an"] = "-1" #Tracker["delta"] = "2.0" #Tracker["zoom"] = True #Tracker["nsoft"] = 0 #Tracker["local"] = False Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] Tracker["upscale"] = Tracker["constants"]["upscale"] Tracker["applyctf"] = False # Should the data be premultiplied by the CTF. Set to False for local continuous. #Tracker["refvol"] = None Tracker["nxinit"] = Tracker["constants"]["nxinit"] #Tracker["nxstep"] = 32 Tracker["icurrentres"] = -1 #Tracker["ireachedres"] = -1 Tracker["lowpass"] = Tracker["constants"]["low_pass_filter"] Tracker["falloff"] = 0.1 #Tracker["inires"] = options.inires # Now in A, convert to absolute before using Tracker["fuse_freq"] = 50 # Now in A, convert to absolute before using #Tracker["delpreviousmax"]= False #Tracker["anger"] = -1.0 #Tracker["shifter"] = -1.0 #Tracker["saturatecrit"] = 0.95 #Tracker["pixercutoff"] = 2.0 #Tracker["directory"] = "" #Tracker["previousoutputdir"] = "" #Tracker["eliminated-outliers"] = False #Tracker["mainiteration"] = 0 #Tracker["movedback"] = False #Tracker["state"] = Tracker["constants"]["states"][0] #Tracker["global_resolution"] = 0.0 Tracker["orgstack"] = orgstack #-------------------------------------------------------------------- # import from utilities from sp_utilities import sample_down_1D_curve,get_initial_ID,remove_small_groups,print_upper_triangular_matrix,print_a_line_with_timestamp from sp_utilities import convertasi,prepare_ptp,print_dict,get_resolution_mrk01,partition_to_groups,partition_independent_runs,get_outliers from sp_utilities import merge_groups, save_alist, margin_of_error, get_margin_of_error, do_two_way_comparison, select_two_runs, get_ali3d_params from sp_utilities import counting_projections, unload_dict, load_dict, get_stat_proj, create_random_list, get_number_of_groups, recons_mref from sp_utilities import apply_low_pass_filter, get_groups_from_partition, get_number_of_groups, get_complementary_elements_total, update_full_dict from sp_utilities import count_chunk_members, set_filter_parameters_from_adjusted_fsc, get_two_chunks_from_stack ####------------------------------------------------------------------ # another part from sp_utilities import get_class_members, remove_small_groups, get_number_of_groups, get_stable_members_from_two_runs from sp_utilities import two_way_comparison_single, get_leftover_from_stable, get_initial_ID, Kmeans_exhaustive_run from sp_utilities import print_a_line_with_timestamp, split_a_group # # Get the pixel size; if none, set to 1.0, and the original image size from sp_utilities import get_shrink_data_huang from time import sleep import sp_user_functions user_func = sp_user_functions.factory[Tracker["constants"]["user_func"]] if(myid == main_node): line = '' sxprint((line+"Initialization of 3-D sorting")) a = get_im(Tracker["orgstack"]) nnxo = a.get_xsize() if( Tracker["nxinit"] > nnxo ): ERROR( "Image size less than minimum permitted $d"%Tracker["nxinit"] ) nnxo = -1 # we break here, so not sure what this is supposed to accomplish return else: if Tracker["constants"]["CTF"]: i = a.get_attr('ctf') pixel_size = i.apix fq = pixel_size/Tracker["fuse_freq"] else: pixel_size = 1.0 # No pixel size, fusing computed as 5 Fourier pixels fq = 5.0/nnxo del a else: nnxo = 0 fq = 0.0 pixel_size = 1.0 nnxo = bcast_number_to_all(nnxo, source_node = main_node) if( nnxo < 0 ): return pixel_size = bcast_number_to_all(pixel_size, source_node = main_node) fq = bcast_number_to_all(fq, source_node = main_node) if Tracker["constants"]["wn"]==0: Tracker["constants"]["nnxo"] = nnxo else: Tracker["constants"]["nnxo"] = Tracker["constants"]["wn"] nnxo= Tracker["constants"]["wn"] Tracker["constants"]["pixel_size"] = pixel_size Tracker["fuse_freq"] = fq del fq, nnxo, pixel_size if(Tracker["constants"]["radius"] < 1): Tracker["constants"]["radius"] = Tracker["constants"]["nnxo"]//2-2 elif((2*Tracker["constants"]["radius"] +2) > Tracker["constants"]["nnxo"]): ERROR( "Particle radius set too large!", myid=myid ) return ####----------------------------------------------------------------------------------------- # create the master directory if myid == main_node: if masterdir =="": timestring = strftime("_%d_%b_%Y_%H_%M_%S", localtime()) masterdir ="master_sort3d"+timestring li =len(masterdir) else: li = 0 cmd="{} {}".format("mkdir -p", masterdir) os.system(cmd) sp_global_def.write_command(masterdir) else: li=0 li = mpi.mpi_bcast( li, 1, mpi.MPI_INT, main_node, mpi.MPI_COMM_WORLD )[0] if li>0: masterdir = mpi.mpi_bcast( masterdir, li,MPI_CHAR, main_node, mpi.MPI_COMM_WORLD ) masterdir = string.join(masterdir,"") ####--- masterdir done! if myid == main_node: print_dict(Tracker["constants"],"Permanent settings of 3-D sorting program") from time import sleep while not os.path.exists(masterdir): # Be sure each proc is able to access the created dir sxprint("Node ",myid," waiting...") sleep(5) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) ######### create a vstack from input stack to the local stack in masterdir # stack name set to default Tracker["constants"]["stack"] = "bdb:"+masterdir+"/rdata" Tracker["constants"]["ali3d"] = os.path.join(masterdir, "ali3d_init.txt") Tracker["constants"]["partstack"] = Tracker["constants"]["ali3d"] Tracker["constants"]["ctf_params"] = os.path.join(masterdir, "ctf_params.txt") ###### if myid == main_node: if(Tracker["orgstack"][:4] == "bdb:"): cmd = "{} {} {}".format("e2bdb.py", Tracker["orgstack"],"--makevstack="+Tracker["constants"]["stack"]) else: cmd = "{} {} {}".format("sp_cpy.py", orgstack, Tracker["constants"]["stack"]) cmdexecute(cmd) cmd = "{} {} {} {} ".format("sp_header.py", Tracker["constants"]["stack"],"--params=xform.projection","--export="+Tracker["constants"]["ali3d"]) cmdexecute(cmd) cmd = "{} {} {} {} ".format("sp_header.py", Tracker["constants"]["stack"],"--params=ctf","--export="+Tracker["constants"]["ctf_params"]) cmdexecute(cmd) #keepchecking = False total_stack = EMUtil.get_image_count(Tracker["orgstack"]) else: total_stack =0 total_stack = bcast_number_to_all(total_stack, source_node = main_node) """ if myid==main_node: from EMAN2db import db_open_dict OB = db_open_dict(orgstack) DB = db_open_dict(Tracker["constants"]["stack"]) for i in xrange(total_stack): DB[i] = OB[i] OB.close() DB.close() mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if myid==main_node: params= [] for i in xrange(total_stack): e=get_im(orgstack,i) phi,theta,psi,s2x,s2y = get_params_proj(e) params.append([phi,theta,psi,s2x,s2y]) write_text_row(params,Tracker["constants"]["ali3d"]) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) """ #Tracker["total_stack"] = total_stack Tracker["constants"]["total_stack"] = total_stack Tracker["shrinkage"] = float(Tracker["nxinit"])/Tracker["constants"]["nnxo"] #####------------------------------------------------------------------------------ if Tracker["constants"]["mask3D"]: Tracker["mask3D"] = os.path.join(masterdir,"smask.hdf") else:Tracker["mask3D"] = None if Tracker["constants"]["focus3Dmask"]: Tracker["focus3D"]=os.path.join(masterdir,"sfocus.hdf") else: Tracker["focus3D"] = None if myid ==main_node: if Tracker["constants"]["mask3D"]: get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["mask3D"]).write_image(Tracker["mask3D"]) if Tracker["constants"]["focus3Dmask"]: mask_3D = get_shrink_3dmask(Tracker["nxinit"],Tracker["constants"]["focus3Dmask"]) st = Util.infomask(mask_3D, None, True) if( st[0] == 0.0 ): ERROR( "sxrsort3d","incorrect focused mask, after binarize all values zero" ) mask_3D.write_image(Tracker["focus3D"]) del mask_3D if Tracker["constants"]["PWadjustment"]: PW_dict={} nxinit_pwsp=sample_down_1D_curve(Tracker["constants"]["nxinit"],Tracker["constants"]["nnxo"],Tracker["constants"]["PWadjustment"]) Tracker["nxinit_PW"] = os.path.join(masterdir,"spwp.txt") if myid ==main_node: write_text_file(nxinit_pwsp,Tracker["nxinit_PW"]) PW_dict[Tracker["constants"]["nnxo"]] =Tracker["constants"]["PWadjustment"] PW_dict[Tracker["constants"]["nxinit"]] =Tracker["nxinit_PW"] Tracker["PW_dict"] = PW_dict ###---------------------------------------------------------------------------------- ####--------------------------- Extract the previous results ##################################################### from random import shuffle if myid ==main_node: log_main.add(" Sphire rsort3d ") log_main.add("extract stable groups from two previous runs") stable_member_list = get_stable_members_from_two_runs(Tracker["constants"]["previous_runs"], Tracker["constants"]["total_stack"], log_main) Tracker["this_unaccounted_list"], new_stable_P1 = get_leftover_from_stable(stable_member_list, Tracker["constants"]["total_stack"], Tracker["constants"]["smallest_group"]) Tracker["this_unaccounted_list"].sort() Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) log_main.add("new stable is %d"%len(new_stable_P1)) else: Tracker["total_stack"] = 0 Tracker["this_unaccounted_list"] = 0 stable_member_list =0 stable_member_list = wrap_mpi_bcast(stable_member_list, main_node) Tracker["total_stack"] = bcast_number_to_all(Tracker["total_stack"], source_node = main_node) left_one_from_old_two_runs = wrap_mpi_bcast(Tracker["this_unaccounted_list"], main_node) if myid ==main_node: write_text_file(left_one_from_old_two_runs, os.path.join(masterdir,"unaccounted_from_two_previous_runs.txt")) sxprint(" Extracting results of two previous runs is done!") #################################### Estimate resolution----------------------############# #### make chunkdir dictionary for computing margin of error chunk_list = [] if Tracker["constants"]["chunkdir"] !="": ##inhere previous random assignment of odd and even if myid == main_node: chunk_one = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk0.txt")) chunk_two = read_text_file(os.path.join(Tracker["constants"]["chunkdir"],"chunk1.txt")) else: chunk_one = 0 chunk_two = 0 chunk_one = wrap_mpi_bcast(chunk_one, main_node) chunk_two = wrap_mpi_bcast(chunk_two, main_node) else: ## if traces are lost, then creating new random assignment of odd, even particles chunks = list(range(Tracker["constants"]["total_stack"])) shuffle(chunks) chunk_one =chunks[0:Tracker["constants"]["total_stack"]//2] chunk_two =chunks[Tracker["constants"]["total_stack"]//2:Tracker["constants"]["total_stack"]] chunk_one = wrap_mpi_bcast(chunk_one, main_node) chunk_two = wrap_mpi_bcast(chunk_two, main_node) ###### Fill chunk ID into headers when calling get_shrink_data_huang if myid ==main_node: sxprint(" random odd and even assignment done !") mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) #------------------------------------------------------------------------------ Tracker["chunk_dict"] = {} for element in chunk_one: Tracker["chunk_dict"][element] = 0 for element in chunk_two: Tracker["chunk_dict"][element] = 1 Tracker["P_chunk0"] = len(chunk_one)/float(Tracker["constants"]["total_stack"]) Tracker["P_chunk1"] = len(chunk_two)/float(Tracker["constants"]["total_stack"]) ### create two volumes to estimate resolution if myid == main_node: write_text_file(chunk_one, os.path.join(masterdir,"chunk0.txt")) write_text_file(chunk_two, os.path.join(masterdir,"chunk1.txt")) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) vols = [] for index in range(2): data1,old_shifts1 = get_shrink_data_huang(Tracker,Tracker["constants"]["nxinit"], os.path.join(masterdir,"chunk%d.txt"%index), Tracker["constants"]["partstack"], myid, main_node, nproc, preshift = True) vol1 = recons3d_4nn_ctf_MPI(myid=myid, prjlist=data1, symmetry=Tracker["constants"]["sym"], finfo=None) if myid ==main_node: vol1_file_name = os.path.join(masterdir, "vol%d.hdf"%index) vol1.write_image(vol1_file_name) vols.append(vol1) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if myid ==main_node: low_pass, falloff, currentres = get_resolution_mrk01(vols, Tracker["constants"]["radius"]*Tracker["shrinkage"], Tracker["constants"]["nxinit"], masterdir,Tracker["mask3D"]) if low_pass > Tracker["constants"]["low_pass_filter"]: low_pass = Tracker["constants"]["low_pass_filter"] else: low_pass = 0.0 falloff = 0.0 currentres = 0.0 currentres = bcast_number_to_all(currentres,source_node = main_node) low_pass = bcast_number_to_all(low_pass,source_node = main_node) falloff = bcast_number_to_all(falloff,source_node = main_node) Tracker["currentres"] = currentres #################################################################### Tracker["falloff"] = falloff if Tracker["constants"]["low_pass_filter"] == -1.0: Tracker["low_pass_filter"] = low_pass*Tracker["shrinkage"] else: Tracker["low_pass_filter"] = Tracker["constants"]["low_pass_filter"]/Tracker["shrinkage"] Tracker["lowpass"] = Tracker["low_pass_filter"] Tracker["falloff"] = 0.1 Tracker["global_fsc"] = os.path.join(masterdir,"fsc.txt") ################################################################## if myid ==main_node: log_main.add("The command-line inputs are :") log_main.add("**********************************************************") for a in sys.argv: log_main.add(a) log_main.add("**********************************************************") from sp_filter import filt_tanl ##################### START 3-D sorting ########################## if myid ==main_node: log_main.add("----------3-D sorting program------- ") log_main.add("current resolution %6.3f for images of original size in terms of absolute frequency"%Tracker["currentres"]) log_main.add("equivalent to %f Angstrom resolution"%(round((Tracker["constants"]["pixel_size"]/Tracker["currentres"]/Tracker["shrinkage"]),4))) filt_tanl(get_im(os.path.join(masterdir, "vol0.hdf")), Tracker["low_pass_filter"], 0.1).write_image(os.path.join(masterdir, "volf0.hdf")) filt_tanl(get_im(os.path.join(masterdir, "vol1.hdf")), Tracker["low_pass_filter"], 0.1).write_image(os.path.join(masterdir, "volf1.hdf")) sxprint(" random odd and even assignment done !") mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ## ---------------------------------------------------------------------------------------------######## ## Stop program and output results when the leftover from two sort3d runs is not sufficient for a new run ######## ## --------------------------------------------------- --------------------------------------- ###### Tracker["number_of_groups"] = get_number_of_groups(len(left_one_from_old_two_runs), Tracker["constants"]["number_of_images_per_group"]) if Tracker["number_of_groups"] <=1 : # programs finishes if myid == main_node: log_main.add("the unaccounted ones are no sufficient for a simple two-group run, output results!") log_main.add("this implies your two sort3d runs already achieved high reproducibale ratio. ") log_main.add("Or your number_of_images_per_group is too large ") log_main.add("the final reproducibility is %f"%((Tracker["constants"]["total_stack"]-len(Tracker["this_unaccounted_list"]))/float(Tracker["constants"]["total_stack"]))) for i in range(len(stable_member_list)): write_text_file(stable_member_list[i], os.path.join(masterdir,"P2_final_class%d.txt"%i)) mask3d = get_im(Tracker["constants"]["mask3D"]) else: mask3d = model_blank(Tracker["constants"]["nnxo"],Tracker["constants"]["nnxo"],Tracker["constants"]["nnxo"]) bcast_EMData_to_all(mask3d, myid, main_node) for igrp in range(len(stable_member_list)): #name_of_class_file = os.path.join(masterdir, "P2_final_class%d.txt"%igrp) data, old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"], os.path.join(masterdir, "P2_final_class%d.txt"%igrp), Tracker["constants"]["partstack"], myid, main_node, nproc,preshift = True) if Tracker["constants"]["CTF"]: volref, fscc = rec3D_two_chunks_MPI(data, 1.0, Tracker["constants"]["sym"], mask3d,os.path.join(masterdir,"resolution_%02d.txt"%igrp), myid, main_node, index =-1, npad=2) else: sxprint("Missing CTF flag!") return mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) #nx_of_image=volref.get_xsize() if Tracker["constants"]["PWadjustment"] : Tracker["PWadjustment"] = Tracker["PW_dict"][Tracker["constants"]["nnxo"]] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] if myid ==main_node: try: lowpass = search_lowpass(fscc) falloff = 0.1 except: lowpass = 0.4 falloff = 0.1 log_main.add(" lowpass and falloff from fsc are %f %f"%(lowpass, falloff)) lowpass = round(lowpass,4) falloff = round(min(0.1,falloff),4) Tracker["lowpass"] = lowpass Tracker["falloff"] = falloff refdata = [None]*4 refdata[0] = volref refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) cutoff = Tracker["constants"]["pixel_size"]/lowpass log_main.add("%d vol low pass filer %f %f cut to %f Angstrom"%(igrp,Tracker["lowpass"],Tracker["falloff"],cutoff)) volref.write_image(os.path.join(masterdir,"volf_final%d.hdf"%igrp)) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) return else: # Continue clustering on unaccounted ones that produced by two_way comparison of two previous runs ######################################################################################################################### #if Tracker["constants"]["number_of_images_per_group"] ==-1: # Estimate number of images per group from delta, and scale up # or down by scale_of_number # number_of_images_per_group = int(Tracker["constants"]["scale_of_number"]*len(n_angles)) # #########################################################################################################################P2 if myid ==main_node: sxprint(" Now continue clustering on accounted ones because they can make at least two groups!") P2_partitions = [] number_of_P2_runs = 2 # Notice P2 start from two P1 runs ### input list_to_be_processed import copy mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) for iter_P2_run in range(number_of_P2_runs): # two runs such that one can obtain reproducibility list_to_be_processed = left_one_from_old_two_runs[:]#Tracker["this_unaccounted_list"][:] Tracker["this_unaccounted_list"] = left_one_from_old_two_runs[:] if myid == main_node : new_stable1 = new_stable_P1[:] total_stack = len(list_to_be_processed) # This is the input from two P1 runs #number_of_images_per_group = Tracker["constants"]["number_of_images_per_group"] P2_run_dir = os.path.join(masterdir, "P2_run%d"%iter_P2_run) Tracker["number_of_groups"] = get_number_of_groups(total_stack, Tracker["constants"]["number_of_images_per_group"]) if myid == main_node: cmd="{} {}".format("mkdir", P2_run_dir) os.system(cmd) log_main.add("----------------P2 independent run %d--------------"%iter_P2_run) log_main.add("user provided number_of_images_per_group %d"%Tracker["constants"]["number_of_images_per_group"]) sxprint("----------------P2 independent run %d--------------"%iter_P2_run) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) # #Tracker["number_of_groups"] = get_number_of_groups(total_stack,Tracker["constants"]["number_of_images_per_group"]) generation = 0 if myid == main_node: log_main.add("number of groups is %d"%Tracker["number_of_groups"]) log_main.add("total stack %d"%total_stack) while( Tracker["number_of_groups"]>=2 ): partition_dict = {} full_dict = {} workdir = os.path.join(P2_run_dir,"generation%03d"%generation) Tracker["this_dir"] = workdir if myid ==main_node: cmd="{} {}".format("mkdir", workdir) os.system(cmd) log_main.add("---- generation %5d"%generation) log_main.add("number of images per group is set as %d"%Tracker["constants"]["number_of_images_per_group"]) log_main.add("the initial number of groups is %d "%Tracker["number_of_groups"]) log_main.add(" the number to be processed in this generation is %d"%len(list_to_be_processed)) sxprint("---- generation %5d"%generation) #core=read_text_row(Tracker["constants"]["ali3d"],-1) #write_text_row(core, os.path.join(workdir,"node%d.txt"%myid)) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) Tracker["this_data_list"] = list_to_be_processed # leftover of P1 runs Tracker["total_stack"] = len(list_to_be_processed) create_random_list(Tracker) ###------ For super computer ############## update_full_dict(list_to_be_processed, Tracker) ###---- ##### ----------------Independent runs for EQ-Kmeans ------------------------------------ for indep_run in range(Tracker["constants"]["indep_runs"]): Tracker["this_particle_list"] = Tracker["this_indep_list"][indep_run] ref_vol = recons_mref(Tracker) if myid ==main_node: log_main.add("independent run %10d"%indep_run) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) #this_particle_text_file = # for get_shrink_data if myid ==main_node: write_text_file(list_to_be_processed, os.path.join(workdir, "independent_list_%03d.txt"%indep_run)) mref_ali3d_EQ_Kmeans(ref_vol, os.path.join(workdir, "EQ_Kmeans%03d"%indep_run), os.path.join(workdir, "independent_list_%03d.txt"%indep_run), Tracker) partition_dict[indep_run] = Tracker["this_partition"] del ref_vol Tracker["partition_dict"] = partition_dict Tracker["this_total_stack"] = Tracker["total_stack"] do_two_way_comparison(Tracker) ############################## if myid ==main_node: log_main.add("Now calculate stable volumes") if myid ==main_node: for igrp in range(len(Tracker["two_way_stable_member"])): Tracker["this_data_list"] = Tracker["two_way_stable_member"][igrp] write_text_file(Tracker["this_data_list"], os.path.join(workdir,"stable_class%d.txt"%igrp)) Tracker["this_data_list_file"] = -1 mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ### number_of_ref_class = [] ref_vol_list = [] for igrp in range(len(Tracker["two_way_stable_member"])): data, old_shifts = get_shrink_data_huang(Tracker,Tracker["nxinit"], os.path.join(workdir, "stable_class%d.txt"%igrp), Tracker["constants"]["partstack"], myid, main_node, nproc, preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid,prjlist=data,symmetry=Tracker["constants"]["sym"],finfo = None) ref_vol_list.append(volref) number_of_ref_class.append(len(Tracker["this_data_list"])) if myid ==main_node: log_main.add("group %d members %d "%(igrp,len(Tracker["this_data_list"]))) #ref_vol_list=apply_low_pass_filter(ref_vol_list,Tracker) for iref in range(len(ref_vol_list)): ref_vol_list[iref].write_image(os.path.join(workdir,"vol_stable.hdf"),iref) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ################################ Tracker["number_of_ref_class"] = number_of_ref_class Tracker["this_data_list"] = Tracker["this_accounted_list"] outdir = os.path.join(workdir, "Kmref") empty_groups,res_classes,final_list = ali3d_mref_Kmeans_MPI(ref_vol_list, outdir, os.path.join(workdir,"Accounted.txt"), Tracker) Tracker["this_unaccounted_list"] = get_complementary_elements(list_to_be_processed,final_list) if myid == main_node: log_main.add("the number of particles not processed is %d"%len(Tracker["this_unaccounted_list"])) update_full_dict(Tracker["this_unaccounted_list"], Tracker) if myid == main_node: write_text_file(Tracker["this_unaccounted_list"], Tracker["this_unaccounted_text"]) Tracker["number_of_groups"] = len(res_classes) ### Update data mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if myid == main_node: number_of_ref_class=[] log_main.add(" Compute volumes of original size") for igrp in range(Tracker["number_of_groups"]): if os.path.exists( os.path.join( outdir,"Class%d.txt"%igrp ) ): new_stable1.append( read_text_file( os.path.join( outdir, "Class%d.txt"%igrp ) ) ) log_main.add(" read Class file %d"%igrp) number_of_ref_class.append(len(new_stable1)) else: number_of_ref_class = 0 number_of_ref_class = wrap_mpi_bcast(number_of_ref_class,main_node) ################################ mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if myid ==main_node: vol_list = [] for igrp in range(Tracker["number_of_groups"]): if myid ==main_node: log_main.add("start vol %d"%igrp) data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"], os.path.join(outdir,"Class%d.txt"%igrp), Tracker["constants"]["partstack"],myid, main_node, nproc, preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry = Tracker["constants"]["sym"],finfo= None) if myid == main_node: vol_list.append(volref) log_main.add(" vol %d is done"%igrp) Tracker["number_of_ref_class"] = number_of_ref_class mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) generation +=1 ################################# if myid ==main_node: for ivol in range(len(vol_list)): vol_list[ivol].write_image(os.path.join(workdir, "vol_of_Classes.hdf"),ivol) filt_tanl(vol_list[ivol],Tracker["constants"]["low_pass_filter"],.1).write_image(os.path.join(workdir, "volf_of_Classes.hdf"),ivol) log_main.add("number of unaccounted particles %10d"%len(Tracker["this_unaccounted_list"])) log_main.add("number of accounted particles %10d"%len(Tracker["this_accounted_list"])) del vol_list Tracker["this_data_list"] = Tracker["this_unaccounted_list"] Tracker["total_stack"] = len(Tracker["this_unaccounted_list"]) Tracker["this_total_stack"] = Tracker["total_stack"] #update_full_dict(complementary) #number_of_groups = int(float(len(Tracker["this_unaccounted_list"]))/number_of_images_per_group) del list_to_be_processed list_to_be_processed = copy.deepcopy(Tracker["this_unaccounted_list"]) Tracker["number_of_groups"] = get_number_of_groups(len(list_to_be_processed),Tracker["constants"]["number_of_images_per_group"]) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ############################################################################################################################# ### Reconstruct the unaccounted is only done once if (Tracker["constants"]["unaccounted"] and (len(Tracker["this_unaccounted_list"]) != 0)): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"],Tracker["this_unaccounted_text"],Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"],finfo=None) volref = filt_tanl(volref, Tracker["constants"]["low_pass_filter"],.1) if myid ==main_node: volref.write_image(os.path.join(workdir, "volf_unaccounted.hdf")) ######## Exhaustive Kmeans ############################################# if myid ==main_node: if len(Tracker["this_unaccounted_list"])>=Tracker["constants"]["smallest_group"]: new_stable1.append(Tracker["this_unaccounted_list"]) unaccounted = get_complementary_elements_total(Tracker["constants"]["total_stack"], final_list) Tracker["number_of_groups"] = len(new_stable1) log_main.add("----------------Exhaustive Kmeans------------------") log_main.add("number_of_groups is %d"%Tracker["number_of_groups"]) else: Tracker["number_of_groups"] = 0 ### prepare references for final K-means if myid == main_node: final_list =[] for alist in new_stable1: for element in alist:final_list.append(int(element)) unaccounted = get_complementary_elements_total(Tracker["constants"]["total_stack"],final_list) if len(unaccounted) > Tracker["constants"]["smallest_group"]: # treat unaccounted ones also as a group if it is not too small. new_stable1.append(unaccounted) Tracker["number_of_groups"] = len(new_stable1) for any in unaccounted:final_list.append(any) log_main.add("total number %d"%len(final_list)) else: final_list = 0 Tracker["number_of_groups"] = bcast_number_to_all(Tracker["number_of_groups"],source_node = main_node) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) final_list = wrap_mpi_bcast(final_list, main_node) workdir = os.path.join(P2_run_dir,"Exhaustive_Kmeans") # new workdir if myid==main_node: os.mkdir(workdir) write_text_file(final_list, os.path.join(workdir,"final_list.txt")) else: new_stable1 = 0 mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ## Create reference volumes if myid == main_node: number_of_ref_class = [] for igrp in range(Tracker["number_of_groups"]): class_file = os.path.join(workdir,"final_class%d.txt"%igrp) write_text_file(new_stable1[igrp],class_file) log_main.add(" group %d number of particles %d"%(igrp,len(new_stable1[igrp]))) number_of_ref_class.append(len(new_stable1[igrp])) else: number_of_ref_class= 0 number_of_ref_class = wrap_mpi_bcast(number_of_ref_class,main_node) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ref_vol_list = [] for igrp in range(Tracker["number_of_groups"]): if myid ==main_node : sxprint(" prepare reference %d"%igrp) #Tracker["this_data_list_file"] = os.path.join(workdir,"final_class%d.txt"%igrp) data,old_shifts = get_shrink_data_huang(Tracker, Tracker["nxinit"],os.path.join(workdir,"final_class%d.txt"%igrp), Tracker["constants"]["partstack"], myid,main_node,nproc,preshift = True) volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo = None) #volref = filt_tanl(volref, Tracker["low_pass_filter"],.1) #if myid == main_node: # volref.write_image(os.path.join(masterdir,"volf_stable.hdf"),iref) #volref = resample(volref,Tracker["shrinkage"]) bcast_EMData_to_all(volref, myid, main_node) ref_vol_list.append(volref) mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) ### -------variables used in Kmeans_exhaustive_run----- Tracker["number_of_ref_class"] = number_of_ref_class Tracker["this_data_list"] = final_list Tracker["total_stack"] = len(final_list) Tracker["this_dir"] = workdir Tracker["this_data_list_file"] = os.path.join(workdir,"final_list.txt") KE_group = Kmeans_exhaustive_run(ref_vol_list,Tracker) # P2_partitions.append(KE_group[:][:]) if myid ==main_node: log_main.add(" the number of groups after exhaustive Kmeans is %d"%len(KE_group)) for ike in range(len(KE_group)):log_main.add(" group %d number of objects %d"%(ike,len(KE_group[ike]))) del new_stable1 mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if myid == main_node: log_main.add("P2 runs are done, now start two-way comparision to exclude those that are not reproduced ") reproduced_groups = two_way_comparison_single(P2_partitions[0],P2_partitions[1],Tracker)# Here partition IDs are original indexes. ###### ----------------Reconstruct reproduced groups------------------------####### ###### if myid == main_node: for index_of_reproduced_groups in range(len(reproduced_groups)): name_of_class_file = os.path.join(masterdir, "P2_final_class%d.txt"%index_of_reproduced_groups) write_text_file(reproduced_groups[index_of_reproduced_groups],name_of_class_file) log_main.add("-------start to reconstruct reproduced volumes individully to orignal size-----------") mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) if Tracker["constants"]["mask3D"]: mask_3d = get_shrink_3dmask(Tracker["constants"]["nnxo"],Tracker["constants"]["mask3D"]) else: mask_3d = None for igrp in range(len(reproduced_groups)): data,old_shifts = get_shrink_data_huang(Tracker,Tracker["constants"]["nnxo"],os.path.join(masterdir, "P2_final_class%d.txt"%igrp),Tracker["constants"]["partstack"],myid,main_node,nproc,preshift = True) #volref = recons3d_4nn_ctf_MPI(myid=myid, prjlist = data, symmetry=Tracker["constants"]["sym"], finfo=None) if Tracker["constants"]["CTF"]: volref, fscc = rec3D_two_chunks_MPI(data,1.0,Tracker["constants"]["sym"],mask_3d, \ os.path.join(masterdir,"resolution_%02d.txt"%igrp),myid,main_node,index =-1,npad =2,finfo=None) else: sxprint("Missing CTF flag!") return mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) fscc = read_text_file(os.path.join(masterdir, "resolution_%02d.txt"%igrp),-1) nx_of_image = volref.get_xsize() if Tracker["constants"]["PWadjustment"]: Tracker["PWadjustment"] = Tracker["PW_dict"][nx_of_image] else: Tracker["PWadjustment"] = Tracker["constants"]["PWadjustment"] try: lowpass = search_lowpass(fscc) falloff = 0.1 except: lowpass= 0.4 falloff= 0.1 sxprint(lowpass) lowpass=round(lowpass,4) falloff=round(min(.1,falloff),4) Tracker["lowpass"]= lowpass Tracker["falloff"]= falloff if myid == main_node: refdata =[None]*4 refdata[0] = volref refdata[1] = Tracker refdata[2] = Tracker["constants"]["myid"] refdata[3] = Tracker["constants"]["nproc"] volref = user_func(refdata) cutoff = Tracker["constants"]["pixel_size"]/lowpass log_main.add("%d vol low pass filer %f %f cut to %f Angstrom"%(igrp,Tracker["lowpass"],Tracker["falloff"],cutoff)) volref.write_image(os.path.join(masterdir,"volf_final%d.hdf"%igrp)) if myid==main_node: log_main.add(" sxsort3d_P2 finishes. ") # Finish program mpi.mpi_barrier( mpi.MPI_COMM_WORLD ) return
def main(): global Tracker, Blockdata progname = os.path.basename(sys.argv[0]) usage = progname + " --output_dir=output_dir --isac_dir=output_dir_of_isac " parser = optparse.OptionParser(usage, version=sp_global_def.SPARXVERSION) parser.add_option( "--pw_adjustment", type="string", default="analytical_model", help= "adjust power spectrum of 2-D averages to an analytic model. Other opions: no_adjustment; bfactor; a text file of 1D rotationally averaged PW", ) #### Four options for --pw_adjustment: # 1> analytical_model(default); # 2> no_adjustment; # 3> bfactor; # 4> adjust_to_given_pw2(user has to provide a text file that contains 1D rotationally averaged PW) # options in common parser.add_option( "--isac_dir", type="string", default="", help="ISAC run output directory, input directory for this command", ) parser.add_option( "--output_dir", type="string", default="", help="output directory where computed averages are saved", ) parser.add_option( "--pixel_size", type="float", default=-1.0, help= "pixel_size of raw images. one can put 1.0 in case of negative stain data", ) parser.add_option( "--fl", type="float", default=-1.0, help= "low pass filter, = -1.0, not applied; =0.0, using FH1 (initial resolution), = 1.0 using FH2 (resolution after local alignment), or user provided value in absolute freqency [0.0:0.5]", ) parser.add_option("--stack", type="string", default="", help="data stack used in ISAC") parser.add_option("--radius", type="int", default=-1, help="radius") parser.add_option("--xr", type="float", default=-1.0, help="local alignment search range") # parser.add_option("--ts", type ="float", default =1.0, help= "local alignment search step") parser.add_option( "--fh", type="float", default=-1.0, help="local alignment high frequencies limit", ) # parser.add_option("--maxit", type ="int", default =5, help= "local alignment iterations") parser.add_option("--navg", type="int", default=1000000, help="number of aveages") parser.add_option( "--local_alignment", action="store_true", default=False, help="do local alignment", ) parser.add_option( "--noctf", action="store_true", default=False, help= "no ctf correction, useful for negative stained data. always ctf for cryo data", ) parser.add_option( "--B_start", type="float", default=45.0, help= "start frequency (Angstrom) of power spectrum for B_factor estimation", ) parser.add_option( "--Bfactor", type="float", default=-1.0, help= "User defined bactors (e.g. 25.0[A^2]). By default, the program automatically estimates B-factor. ", ) (options, args) = parser.parse_args(sys.argv[1:]) adjust_to_analytic_model = (True if options.pw_adjustment == "analytical_model" else False) no_adjustment = True if options.pw_adjustment == "no_adjustment" else False B_enhance = True if options.pw_adjustment == "bfactor" else False adjust_to_given_pw2 = ( True if not (adjust_to_analytic_model or no_adjustment or B_enhance) else False) # mpi nproc = mpi.mpi_comm_size(mpi.MPI_COMM_WORLD) myid = mpi.mpi_comm_rank(mpi.MPI_COMM_WORLD) Blockdata = {} Blockdata["nproc"] = nproc Blockdata["myid"] = myid Blockdata["main_node"] = 0 Blockdata["shared_comm"] = mpi.mpi_comm_split_type( mpi.MPI_COMM_WORLD, mpi.MPI_COMM_TYPE_SHARED, 0, mpi.MPI_INFO_NULL) Blockdata["myid_on_node"] = mpi.mpi_comm_rank(Blockdata["shared_comm"]) Blockdata["no_of_processes_per_group"] = mpi.mpi_comm_size( Blockdata["shared_comm"]) masters_from_groups_vs_everything_else_comm = mpi.mpi_comm_split( mpi.MPI_COMM_WORLD, Blockdata["main_node"] == Blockdata["myid_on_node"], Blockdata["myid_on_node"], ) Blockdata["color"], Blockdata[ "no_of_groups"], balanced_processor_load_on_nodes = sp_utilities.get_colors_and_subsets( Blockdata["main_node"], mpi.MPI_COMM_WORLD, Blockdata["myid"], Blockdata["shared_comm"], Blockdata["myid_on_node"], masters_from_groups_vs_everything_else_comm, ) # We need two nodes for processing of volumes Blockdata["node_volume"] = [ Blockdata["no_of_groups"] - 3, Blockdata["no_of_groups"] - 2, Blockdata["no_of_groups"] - 1, ] # For 3D stuff take three last nodes # We need two CPUs for processing of volumes, they are taken to be main CPUs on each volume # We have to send the two myids to all nodes so we can identify main nodes on two selected groups. Blockdata["nodes"] = [ Blockdata["node_volume"][0] * Blockdata["no_of_processes_per_group"], Blockdata["node_volume"][1] * Blockdata["no_of_processes_per_group"], Blockdata["node_volume"][2] * Blockdata["no_of_processes_per_group"], ] # End of Blockdata: sorting requires at least three nodes, and the used number of nodes be integer times of three sp_global_def.BATCH = True sp_global_def.MPI = True if adjust_to_given_pw2: checking_flag = 0 if Blockdata["myid"] == Blockdata["main_node"]: if not os.path.exists(options.pw_adjustment): checking_flag = 1 checking_flag = sp_utilities.bcast_number_to_all( checking_flag, Blockdata["main_node"], mpi.MPI_COMM_WORLD) if checking_flag == 1: sp_global_def.ERROR("User provided power spectrum does not exist", myid=Blockdata["myid"]) Tracker = {} Constants = {} Constants["isac_dir"] = options.isac_dir Constants["masterdir"] = options.output_dir Constants["pixel_size"] = options.pixel_size Constants["orgstack"] = options.stack Constants["radius"] = options.radius Constants["xrange"] = options.xr Constants["FH"] = options.fh Constants["low_pass_filter"] = options.fl # Constants["maxit"] = options.maxit Constants["navg"] = options.navg Constants["B_start"] = options.B_start Constants["Bfactor"] = options.Bfactor if adjust_to_given_pw2: Constants["modelpw"] = options.pw_adjustment Tracker["constants"] = Constants # ------------------------------------------------------------- # # Create and initialize Tracker dictionary with input options # State Variables # <<<---------------------->>>imported functions<<<--------------------------------------------- # x_range = max(Tracker["constants"]["xrange"], int(1./Tracker["ini_shrink"])+1) # y_range = x_range ####----------------------------------------------------------- # Create Master directory and associated subdirectories line = time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) + " =>" if Tracker["constants"]["masterdir"] == Tracker["constants"]["isac_dir"]: masterdir = os.path.join(Tracker["constants"]["isac_dir"], "sharpen") else: masterdir = Tracker["constants"]["masterdir"] if Blockdata["myid"] == Blockdata["main_node"]: msg = "Postprocessing ISAC 2D averages starts" sp_global_def.sxprint(line, "Postprocessing ISAC 2D averages starts") if not masterdir: timestring = time.strftime("_%d_%b_%Y_%H_%M_%S", time.localtime()) masterdir = "sharpen_" + Tracker["constants"]["isac_dir"] os.makedirs(masterdir) else: if os.path.exists(masterdir): sp_global_def.sxprint("%s already exists" % masterdir) else: os.makedirs(masterdir) sp_global_def.write_command(masterdir) subdir_path = os.path.join(masterdir, "ali2d_local_params_avg") if not os.path.exists(subdir_path): os.mkdir(subdir_path) subdir_path = os.path.join(masterdir, "params_avg") if not os.path.exists(subdir_path): os.mkdir(subdir_path) li = len(masterdir) else: li = 0 li = mpi.mpi_bcast(li, 1, mpi.MPI_INT, Blockdata["main_node"], mpi.MPI_COMM_WORLD)[0] masterdir = mpi.mpi_bcast(masterdir, li, mpi.MPI_CHAR, Blockdata["main_node"], mpi.MPI_COMM_WORLD) masterdir = b"".join(masterdir).decode('latin1') Tracker["constants"]["masterdir"] = masterdir log_main = sp_logger.Logger(sp_logger.BaseLogger_Files()) log_main.prefix = Tracker["constants"]["masterdir"] + "/" while not os.path.exists(Tracker["constants"]["masterdir"]): sp_global_def.sxprint( "Node ", Blockdata["myid"], " waiting...", Tracker["constants"]["masterdir"], ) time.sleep(1) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) if Blockdata["myid"] == Blockdata["main_node"]: init_dict = {} sp_global_def.sxprint(Tracker["constants"]["isac_dir"]) Tracker["directory"] = os.path.join(Tracker["constants"]["isac_dir"], "2dalignment") core = sp_utilities.read_text_row( os.path.join(Tracker["directory"], "initial2Dparams.txt")) for im in range(len(core)): init_dict[im] = core[im] del core else: init_dict = 0 init_dict = sp_utilities.wrap_mpi_bcast(init_dict, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) ### do_ctf = True if options.noctf: do_ctf = False if Blockdata["myid"] == Blockdata["main_node"]: if do_ctf: sp_global_def.sxprint("CTF correction is on") else: sp_global_def.sxprint("CTF correction is off") if options.local_alignment: sp_global_def.sxprint("local refinement is on") else: sp_global_def.sxprint("local refinement is off") if B_enhance: sp_global_def.sxprint("Bfactor is to be applied on averages") elif adjust_to_given_pw2: sp_global_def.sxprint( "PW of averages is adjusted to a given 1D PW curve") elif adjust_to_analytic_model: sp_global_def.sxprint( "PW of averages is adjusted to analytical model") else: sp_global_def.sxprint("PW of averages is not adjusted") # Tracker["constants"]["orgstack"] = "bdb:"+ os.path.join(Tracker["constants"]["isac_dir"],"../","sparx_stack") image = sp_utilities.get_im(Tracker["constants"]["orgstack"], 0) Tracker["constants"]["nnxo"] = image.get_xsize() if Tracker["constants"]["pixel_size"] == -1.0: sp_global_def.sxprint( "Pixel size value is not provided by user. extracting it from ctf header entry of the original stack." ) try: ctf_params = image.get_attr("ctf") Tracker["constants"]["pixel_size"] = ctf_params.apix except: sp_global_def.ERROR( "Pixel size could not be extracted from the original stack.", myid=Blockdata["myid"], ) ## Now fill in low-pass filter isac_shrink_path = os.path.join(Tracker["constants"]["isac_dir"], "README_shrink_ratio.txt") if not os.path.exists(isac_shrink_path): sp_global_def.ERROR( "%s does not exist in the specified ISAC run output directory" % (isac_shrink_path), myid=Blockdata["myid"], ) isac_shrink_file = open(isac_shrink_path, "r") isac_shrink_lines = isac_shrink_file.readlines() isac_shrink_ratio = float( isac_shrink_lines[5] ) # 6th line: shrink ratio (= [target particle radius]/[particle radius]) used in the ISAC run isac_radius = float( isac_shrink_lines[6] ) # 7th line: particle radius at original pixel size used in the ISAC run isac_shrink_file.close() print("Extracted parameter values") print("ISAC shrink ratio : {0}".format(isac_shrink_ratio)) print("ISAC particle radius : {0}".format(isac_radius)) Tracker["ini_shrink"] = isac_shrink_ratio else: Tracker["ini_shrink"] = 0.0 Tracker = sp_utilities.wrap_mpi_bcast(Tracker, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) # print(Tracker["constants"]["pixel_size"], "pixel_size") x_range = max( Tracker["constants"]["xrange"], int(old_div(1.0, Tracker["ini_shrink"]) + 0.99999), ) a_range = y_range = x_range if Blockdata["myid"] == Blockdata["main_node"]: parameters = sp_utilities.read_text_row( os.path.join(Tracker["constants"]["isac_dir"], "all_parameters.txt")) else: parameters = 0 parameters = sp_utilities.wrap_mpi_bcast(parameters, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) params_dict = {} list_dict = {} # parepare params_dict # navg = min(Tracker["constants"]["navg"]*Blockdata["nproc"], EMUtil.get_image_count(os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"))) navg = min( Tracker["constants"]["navg"], EMAN2_cppwrap.EMUtil.get_image_count( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf")), ) global_dict = {} ptl_list = [] memlist = [] if Blockdata["myid"] == Blockdata["main_node"]: sp_global_def.sxprint("Number of averages computed in this run is %d" % navg) for iavg in range(navg): params_of_this_average = [] image = sp_utilities.get_im( os.path.join(Tracker["constants"]["isac_dir"], "class_averages.hdf"), iavg, ) members = sorted(image.get_attr("members")) memlist.append(members) for im in range(len(members)): abs_id = members[im] global_dict[abs_id] = [iavg, im] P = sp_utilities.combine_params2( init_dict[abs_id][0], init_dict[abs_id][1], init_dict[abs_id][2], init_dict[abs_id][3], parameters[abs_id][0], old_div(parameters[abs_id][1], Tracker["ini_shrink"]), old_div(parameters[abs_id][2], Tracker["ini_shrink"]), parameters[abs_id][3], ) if parameters[abs_id][3] == -1: sp_global_def.sxprint( "WARNING: Image #{0} is an unaccounted particle with invalid 2D alignment parameters and should not be the member of any classes. Please check the consitency of input dataset." .format(abs_id) ) # How to check what is wrong about mirror = -1 (Toshio 2018/01/11) params_of_this_average.append([P[0], P[1], P[2], P[3], 1.0]) ptl_list.append(abs_id) params_dict[iavg] = params_of_this_average list_dict[iavg] = members sp_utilities.write_text_row( params_of_this_average, os.path.join( Tracker["constants"]["masterdir"], "params_avg", "params_avg_%03d.txt" % iavg, ), ) ptl_list.sort() init_params = [None for im in range(len(ptl_list))] for im in range(len(ptl_list)): init_params[im] = [ptl_list[im]] + params_dict[global_dict[ ptl_list[im]][0]][global_dict[ptl_list[im]][1]] sp_utilities.write_text_row( init_params, os.path.join(Tracker["constants"]["masterdir"], "init_isac_params.txt"), ) else: params_dict = 0 list_dict = 0 memlist = 0 params_dict = sp_utilities.wrap_mpi_bcast(params_dict, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) list_dict = sp_utilities.wrap_mpi_bcast(list_dict, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) memlist = sp_utilities.wrap_mpi_bcast(memlist, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) # Now computing! del init_dict tag_sharpen_avg = 1000 ## always apply low pass filter to B_enhanced images to suppress noise in high frequencies enforced_to_H1 = False if B_enhance: if Tracker["constants"]["low_pass_filter"] == -1.0: enforced_to_H1 = True # distribute workload among mpi processes image_start, image_end = sp_applications.MPI_start_end( navg, Blockdata["nproc"], Blockdata["myid"]) if Blockdata["myid"] == Blockdata["main_node"]: cpu_dict = {} for iproc in range(Blockdata["nproc"]): local_image_start, local_image_end = sp_applications.MPI_start_end( navg, Blockdata["nproc"], iproc) for im in range(local_image_start, local_image_end): cpu_dict[im] = iproc else: cpu_dict = 0 cpu_dict = sp_utilities.wrap_mpi_bcast(cpu_dict, Blockdata["main_node"], communicator=mpi.MPI_COMM_WORLD) slist = [None for im in range(navg)] ini_list = [None for im in range(navg)] avg1_list = [None for im in range(navg)] avg2_list = [None for im in range(navg)] data_list = [None for im in range(navg)] plist_dict = {} if Blockdata["myid"] == Blockdata["main_node"]: if B_enhance: sp_global_def.sxprint( "Avg ID B-factor FH1(Res before ali) FH2(Res after ali)") else: sp_global_def.sxprint( "Avg ID FH1(Res before ali) FH2(Res after ali)") FH_list = [[0, 0.0, 0.0] for im in range(navg)] for iavg in range(image_start, image_end): mlist = EMAN2_cppwrap.EMData.read_images( Tracker["constants"]["orgstack"], list_dict[iavg]) for im in range(len(mlist)): sp_utilities.set_params2D(mlist[im], params_dict[iavg][im], xform="xform.align2d") if options.local_alignment: new_avg, plist, FH2 = sp_applications.refinement_2d_local( mlist, Tracker["constants"]["radius"], a_range, x_range, y_range, CTF=do_ctf, SNR=1.0e10, ) plist_dict[iavg] = plist FH1 = -1.0 else: new_avg, frc, plist = compute_average( mlist, Tracker["constants"]["radius"], do_ctf) FH1 = get_optimistic_res(frc) FH2 = -1.0 FH_list[iavg] = [iavg, FH1, FH2] if B_enhance: new_avg, gb = apply_enhancement( new_avg, Tracker["constants"]["B_start"], Tracker["constants"]["pixel_size"], Tracker["constants"]["Bfactor"], ) sp_global_def.sxprint(" %6d %6.3f %4.3f %4.3f" % (iavg, gb, FH1, FH2)) elif adjust_to_given_pw2: roo = sp_utilities.read_text_file(Tracker["constants"]["modelpw"], -1) roo = roo[0] # always on the first column new_avg = adjust_pw_to_model(new_avg, Tracker["constants"]["pixel_size"], roo) sp_global_def.sxprint(" %6d %4.3f %4.3f " % (iavg, FH1, FH2)) elif adjust_to_analytic_model: new_avg = adjust_pw_to_model(new_avg, Tracker["constants"]["pixel_size"], None) sp_global_def.sxprint(" %6d %4.3f %4.3f " % (iavg, FH1, FH2)) elif no_adjustment: pass if Tracker["constants"]["low_pass_filter"] != -1.0: if Tracker["constants"]["low_pass_filter"] == 0.0: low_pass_filter = FH1 elif Tracker["constants"]["low_pass_filter"] == 1.0: low_pass_filter = FH2 if not options.local_alignment: low_pass_filter = FH1 else: low_pass_filter = Tracker["constants"]["low_pass_filter"] if low_pass_filter >= 0.45: low_pass_filter = 0.45 new_avg = sp_filter.filt_tanl(new_avg, low_pass_filter, 0.02) else: # No low pass filter but if enforced if enforced_to_H1: new_avg = sp_filter.filt_tanl(new_avg, FH1, 0.02) if B_enhance: new_avg = sp_fundamentals.fft(new_avg) new_avg.set_attr("members", list_dict[iavg]) new_avg.set_attr("n_objects", len(list_dict[iavg])) slist[iavg] = new_avg sp_global_def.sxprint( time.strftime("%Y-%m-%d_%H:%M:%S", time.localtime()) + " =>", "Refined average %7d" % iavg, ) ## send to main node to write mpi.mpi_barrier(mpi.MPI_COMM_WORLD) for im in range(navg): # avg if (cpu_dict[im] == Blockdata["myid"] and Blockdata["myid"] != Blockdata["main_node"]): sp_utilities.send_EMData(slist[im], Blockdata["main_node"], tag_sharpen_avg) elif (cpu_dict[im] == Blockdata["myid"] and Blockdata["myid"] == Blockdata["main_node"]): slist[im].set_attr("members", memlist[im]) slist[im].set_attr("n_objects", len(memlist[im])) slist[im].write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im, ) elif (cpu_dict[im] != Blockdata["myid"] and Blockdata["myid"] == Blockdata["main_node"]): new_avg_other_cpu = sp_utilities.recv_EMData( cpu_dict[im], tag_sharpen_avg) new_avg_other_cpu.set_attr("members", memlist[im]) new_avg_other_cpu.set_attr("n_objects", len(memlist[im])) new_avg_other_cpu.write_image( os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), im, ) if options.local_alignment: if cpu_dict[im] == Blockdata["myid"]: sp_utilities.write_text_row( plist_dict[im], os.path.join( Tracker["constants"]["masterdir"], "ali2d_local_params_avg", "ali2d_local_params_avg_%03d.txt" % im, ), ) if (cpu_dict[im] == Blockdata["myid"] and cpu_dict[im] != Blockdata["main_node"]): sp_utilities.wrap_mpi_send(plist_dict[im], Blockdata["main_node"], mpi.MPI_COMM_WORLD) sp_utilities.wrap_mpi_send(FH_list, Blockdata["main_node"], mpi.MPI_COMM_WORLD) elif (cpu_dict[im] != Blockdata["main_node"] and Blockdata["myid"] == Blockdata["main_node"]): dummy = sp_utilities.wrap_mpi_recv(cpu_dict[im], mpi.MPI_COMM_WORLD) plist_dict[im] = dummy dummy = sp_utilities.wrap_mpi_recv(cpu_dict[im], mpi.MPI_COMM_WORLD) FH_list[im] = dummy[im] else: if (cpu_dict[im] == Blockdata["myid"] and cpu_dict[im] != Blockdata["main_node"]): sp_utilities.wrap_mpi_send(FH_list, Blockdata["main_node"], mpi.MPI_COMM_WORLD) elif (cpu_dict[im] != Blockdata["main_node"] and Blockdata["myid"] == Blockdata["main_node"]): dummy = sp_utilities.wrap_mpi_recv(cpu_dict[im], mpi.MPI_COMM_WORLD) FH_list[im] = dummy[im] mpi.mpi_barrier(mpi.MPI_COMM_WORLD) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) if options.local_alignment: if Blockdata["myid"] == Blockdata["main_node"]: ali3d_local_params = [None for im in range(len(ptl_list))] for im in range(len(ptl_list)): ali3d_local_params[im] = [ptl_list[im]] + plist_dict[ global_dict[ptl_list[im]][0]][global_dict[ptl_list[im]][1]] sp_utilities.write_text_row( ali3d_local_params, os.path.join(Tracker["constants"]["masterdir"], "ali2d_local_params.txt"), ) sp_utilities.write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) else: if Blockdata["myid"] == Blockdata["main_node"]: sp_utilities.write_text_row( FH_list, os.path.join(Tracker["constants"]["masterdir"], "FH_list.txt")) mpi.mpi_barrier(mpi.MPI_COMM_WORLD) target_xr = 3 target_yr = 3 if Blockdata["myid"] == 0: cmd = "{} {} {} {} {} {} {} {} {} {}".format( "sp_chains.py", os.path.join(Tracker["constants"]["masterdir"], "class_averages.hdf"), os.path.join(Tracker["constants"]["masterdir"], "junk.hdf"), os.path.join(Tracker["constants"]["masterdir"], "ordered_class_averages.hdf"), "--circular", "--radius=%d" % Tracker["constants"]["radius"], "--xr=%d" % (target_xr + 1), "--yr=%d" % (target_yr + 1), "--align", ">/dev/null", ) junk = sp_utilities.cmdexecute(cmd) cmd = "{} {}".format( "rm -rf", os.path.join(Tracker["constants"]["masterdir"], "junk.hdf")) junk = sp_utilities.cmdexecute(cmd) return
def main(): progname = optparse.os.path.basename(sys.argv[0]) usage = """%prog [options] input.pdb output.hdf Converts a pdb file into an electron density map. 0,0,0 in PDB space will map to the center of the volume.""" parser = optparse.OptionParser(usage=usage,version=EMAN2_meta.EMANVERSION) parser.add_option("--apix", "-A", type="float", help="Angstrom/voxel", default=1.0) parser.add_option("--box", "-B", type="string", help="Box size in pixels, <xyz> or <x,y,z>") parser.add_option("--het", action="store_true", help="Include HET atoms in the map", default=False) parser.add_option("--chains", type="string", help="String list of chain identifiers to include, e.g. 'ABEFG'; default: include all chains", default='') parser.add_option("--center", type="string", default="a", help="center: c - coordinates; a (default) - center of gravity; <x,y,z> - vector (in Angstrom) to subtract from all coordinates; n - none" ) parser.add_option("--O", action="store_true", default=False, help="use O system of coordinates") parser.add_option("--quiet", action="store_true", default=False, help="Verbose is the default") parser.add_option("--tr0", type="string", default="none", help="Filename of initial 3x4 transformation matrix") parser.add_option("--set_apix_value", action="store_true", help="Set apix value in header of the ouput map", default=False) (options, args) = parser.parse_args()# if len(args)<2 : sp_global_def.ERROR( "Input and output files required" ) return if sp_global_def.CACHE_DISABLE: pass#IMPORTIMPORTIMPORT from sp_utilities import disable_bdb_cache sp_utilities.disable_bdb_cache() chains = options.chains if chains == '': chains = None try: infile=open(args[0],"r") except: sp_global_def.ERROR( "Cannot open input file" ) return aavg=[0,0,0] # calculate atomic center asig=[0,0,0] # to compute radius of gyration natm=0 atoms=[] # we store a list of atoms to process to avoid multiple passes nelec=0 mass=0 # read in initial-transformation file: if(options.tr0 != "none"): cols = sp_utilities.read_text_file(options.tr0,-1) txlist=[] for i in range(3): txlist.append(cols[0][i]) txlist.append(cols[1][i]) txlist.append(cols[2][i]) txlist.append(cols[3][i]) tr0 = EMAN2_cppwrap.Transform(txlist) # parse the pdb file and pull out relevant atoms for line in infile: if (line[:4]=='ATOM' or (line[:6]=='HETATM' and options.het)) : if chains and (line[21] not in chains): continue try: a=line[12:14].strip() aseq=int(line[6:11].strip()) res=int(line[22:26].strip()) if(options.O): x=float(line[38:46]) y=float(line[30:38]) z=-float(line[46:54]) else: x=float(line[30:38]) y=float(line[38:46]) z=float(line[46:54]) except: sp_global_def.sxprint("PDB Parse error:\n%s\n'%s','%s','%s' '%s','%s','%s'\n"%( line,line[12:14],line[6:11],line[22:26],line[30:38],line[38:46],line[46:54])) sp_global_def.sxprint(a,aseq,res,x,y,z) try: nelec += atomdefs[a.upper()][0] mass += atomdefs[a.upper()][1] except: sp_global_def.sxprint(("Unknown atom %s ignored at %d"%(a,aseq))) atoms.append([a,x,y,z]) natm += 1 if(options.center == "a"): aavg[0] += x*atomdefs[a.upper()][1] aavg[1] += y*atomdefs[a.upper()][1] aavg[2] += z*atomdefs[a.upper()][1] asig[0] += x**2*atomdefs[a.upper()][1] asig[1] += y**2*atomdefs[a.upper()][1] asig[2] += z**2*atomdefs[a.upper()][1] else: aavg[0] += x aavg[1] += y aavg[2] += z asig[0] += x**2 asig[1] += y**2 asig[2] += z**2 infile.close() if(options.center == "a"): rad_gyr = math.sqrt((asig[0]+asig[1]+asig[2])/mass-(aavg[0]/mass)**2-(aavg[1]/mass)**2-(aavg[2]/mass)**2) else: rad_gyr = math.sqrt((asig[0]+asig[1]+asig[2])/natm-(aavg[0]/natm)**2-(aavg[1]/natm)**2-(aavg[2]/natm)**2) if not options.quiet: sp_global_def.sxprint("%d atoms; total charge = %d e-; mol mass = %.2f kDa; radius of gyration = %.2f A"%(natm,nelec,mass/1000.0,rad_gyr)) # center PDB according to option: if(options.center == "a"): if not options.quiet: sp_global_def.sxprint("center of gravity at %1.1f,%1.1f,%1.1f (center of volume at 0,0,0)"%(aavg[0]/mass,aavg[1]/mass,aavg[2]/mass)) for i in range( len(atoms) ) : atoms[i][1] -= aavg[0]/mass atoms[i][2] -= aavg[1]/mass atoms[i][3] -= aavg[2]/mass if(options.center == "c"): if not options.quiet: sp_global_def.sxprint("atomic center at %1.1f,%1.1f,%1.1f (center of volume at 0,0,0)"%(aavg[0]/natm,aavg[1]/natm,aavg[2]/natm)) for i in range( len(atoms) ) : atoms[i][1] -= aavg[0]/natm atoms[i][2] -= aavg[1]/natm atoms[i][3] -= aavg[2]/natm spl = options.center.split(',') if len(spl)==3: # substract the given vector from all coordinates if not options.quiet: sp_global_def.sxprint("vector to substract: %1.1f,%1.1f,%1.1f (center of volume at 0,0,0)"%(float(spl[0]),float(spl[1]),float(spl[2]))) for i in range( len(atoms) ) : atoms[i][1] -= float(spl[0]) atoms[i][2] -= float(spl[1]) atoms[i][3] -= float(spl[2]) # apply given initial transformation (this used to be done before centering, # thereby loosing the translation. This is the right place to apply tr0): if(options.tr0 != "none"): if not options.quiet: sp_global_def.sxprint("Applying initial transformation to PDB coordinates... ") for i in range(len(atoms)): atom_coords = EMAN2_cppwrap.Vec3f(atoms[i][1],atoms[i][2],atoms[i][3]) new_atom_coords = tr0*atom_coords atoms[i][1] = new_atom_coords[0] atoms[i][2] = new_atom_coords[1] atoms[i][3] = new_atom_coords[2] if not options.quiet: sp_global_def.sxprint("done.\n") # bounding box: amin=[atoms[0][1],atoms[0][2],atoms[0][3]] amax=[atoms[0][1],atoms[0][2],atoms[0][3]] for i in range(1,len(atoms)): for k in range(3): amin[k]=min(atoms[i][k+1],amin[k]) amax[k]=max(atoms[i][k+1],amax[k]) if not options.quiet: sp_global_def.sxprint("Range of coordinates [A]: x: %7.2f - %7.2f"%(amin[0],amax[0])) sp_global_def.sxprint(" y: %7.2f - %7.2f"%(amin[1],amax[1])) sp_global_def.sxprint(" z: %7.2f - %7.2f"%(amin[2],amax[2])) # find the output box size, either user specified or from bounding box box=[0,0,0] try: spl=options.box.split(',') if len(spl)==1 : box[0]=box[1]=box[2]=int(spl[0]) else : box[0]=int(spl[0]) box[1]=int(spl[1]) box[2]=int(spl[2]) except: for i in range(3): box[i]=int(2*max(math.fabs(amax[i]), math.fabs(amin[i]))/options.apix) # Increase the box size by 1/4. box[i]+=box[i]//4 if not options.quiet: sp_global_def.sxprint("Bounding box [pixels]: x: %5d "%box[0]) sp_global_def.sxprint(" y: %5d "%box[1]) sp_global_def.sxprint(" z: %5d "%box[2]) # figure oversampled box size #bigb = max(box[0],box[1],box[2]) fcbig = 1 """Multiline Comment0""" #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 #MULTILINEMULTILINEMULTILINE 0 if not options.quiet: sp_global_def.sxprint("Box size: %d x %d x %d"%(box[0],box[1],box[2]),", oversampling ",fcbig) # Calculate working dimensions pixelbig = options.apix/fcbig bigbox = [] for i in range(3): bigbox.append(box[i]*fcbig) # initialize the final output volume outmap=EMAN2_cppwrap.EMData(bigbox[0],bigbox[1],bigbox[2], True) nc = [] for i in range(3): nc.append( bigbox[i]//2 ) # fill in the atoms for i in range(len(atoms)): #print "Adding %d '%s'"%(i,atoms[i][0]) if not options.quiet and i%1000==0 : sp_global_def.sxprint('\r %d'%i, end=' ') sys.stdout.flush() try: elec = atomdefs[atoms[i][0].upper()][0] #outmap[int(atoms[i][1]/pixelbig+bigbox[0]//2),int(atoms[i][2]/pixelbig+bigbox[1]//2),int(atoms[i][3]/pixelbig+bigbox[2]//2)] += elec for k in range(2): pz = atoms[i][3]/pixelbig+nc[2] dz = pz - int(pz) uz = ((1-k) + (2*k-1)*dz)*elec for l in range(2): py = atoms[i][2]/pixelbig+nc[1] dy = py - int(py) uy = ((1-l) + (2*l-1)*dy)*uz for m in range(2): px = atoms[i][1]/pixelbig+nc[0] dx = px - int(px) outmap[int(px)+m,int(py)+l,int(pz)+k] += ((1-m) + (2*m-1)*dx)*uy except: sp_global_def.sxprint("Skipping %d '%s'"%(i,atoms[i][0])) if not options.quiet: sp_global_def.sxprint('\r %d\nConversion complete.'%len(atoms)) #," Now shape atoms." """Multiline Comment1""" #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 #MULTILINEMULTILINEMULTILINE 1 (filename_path, filextension) = optparse.os.path.splitext(args[1]) if filextension == ".hdf" : if options.set_apix_value: outmap.set_attr("apix_x",options.apix) outmap.set_attr("apix_y",options.apix) outmap.set_attr("apix_z",options.apix) outmap.set_attr("pixel_size",options.apix) else: sp_global_def.sxprint("Pixel_size is not set in the header!") outmap.write_image(args[1],0, EMAN2_cppwrap.EMUtil.ImageType.IMAGE_HDF) elif filextension == ".spi": outmap.write_image(args[1],0, EMAN2_cppwrap.EMUtil.ImageType.IMAGE_SINGLE_SPIDER) else: sp_global_def.ERROR( "Unknown image type" ) return