def print_results(self): """ Prints diagnostics from the final integration run. """ cons_s = Counter(self.s).most_common(1)[0][0] cons_h = Counter(self.h).most_common(1)[0][0] cons_a = Counter(self.a).most_common(1)[0][0] final_table = [] final_table.append("\n\n{:-^80}\n".format('ANALYSIS OF RESULTS')) final_table.append("Total images: {}".format(len(self.final_objects))) final_table.append("Avg. signal height: {:<8.3f} std. dev: {:<6.2f}"\ " max: {:<3} min: {:<3} consensus: {:<3}"\ "".format(np.mean(self.s), np.std(self.s), max(self.s), min(self.s), cons_s)) final_table.append("Avg. spot height: {:<8.3f} std. dev: {:<6.2f}"\ " max: {:<3} min: {:<3} consensus: {:<3}"\ "".format(np.mean(self.h), np.std(self.h), max(self.h), min(self.h), cons_h)) final_table.append("Avg. spot areas: {:<8.3f} std. dev: {:<6.2f}"\ " max: {:<3} min: {:<3} consensus: {:<3}"\ "".format(np.mean(self.a), np.std(self.a), max(self.a), min(self.a), cons_a)) final_table.append("Avg. resolution: {:<8.3f} std. dev: {:<6.2f}"\ " lowest: {:<6.3f} highest: {:<6.3f}"\ "".format(np.mean(self.res), np.std(self.res), max(self.res), min(self.res))) final_table.append("Avg. number of spots: {:<8.3f} std. dev: {:<6.2f}"\ "".format(np.mean(self.spots), np.std(self.spots))) final_table.append("Avg. mosaicity: {:<8.3f} std. dev: {:<6.2f}"\ "".format(np.mean(self.mos), np.std(self.mos))) for item in final_table: misc.main_log(self.logfile, item, True)
def print_summary(self, int_base): """ Prints summary and appends to general log file. Also outputs some of it on stdout. Also writes out output list files. """ summary = [] misc.main_log(self.logfile, "\n\n{:-^80}\n".format('SUMMARY'), True) summary.append('raw images read in: {}'\ ''.format(len(self.all_objects))) no_diff_objects = [i for i in self.all_objects if i.fail == 'failed triage'] summary.append('raw images with no diffraction: {}'\ ''.format(len(no_diff_objects))) diff_objects = [i for i in self.all_objects if i.fail != 'failed triage'] summary.append('raw images with diffraction: {}'\ ''.format(len(diff_objects))) not_int_objects = [i for i in self.all_objects if i.fail == 'failed grid search'] summary.append('raw images not integrated: {}'\ ''.format(len(not_int_objects))) prefilter_fail_objects = [i for i in self.all_objects if i.fail == 'failed prefilter'] summary.append('images failed prefilter: {}'\ ''.format(len(prefilter_fail_objects))) final_images = sorted(self.final_objects, key=lambda i: i.final['mos']) summary.append('final integrated pickles: {}'\ ''.format(len(final_images))) for item in summary: misc.main_log(self.logfile, "{}".format(item), True) misc.main_log(self.logfile, '\n\nIOTA version {0}'.format(self.ver)) misc.main_log(self.logfile, "{}\n".format(self.now)) # Write list files: if int_base != None: input_list_file = os.path.join(int_base, 'input_images.lst') blank_images_file = os.path.join(int_base, 'blank_images.lst') prefilter_fail_file = os.path.join(int_base, 'failed_prefilter.lst') not_integrated_file = os.path.join(int_base, 'not_integrated.lst') integrated_file = os.path.join(int_base, 'integrated.lst') int_images_file = os.path.join(int_base, 'int_image_pickles.lst') if self.prime_data_path == None: self.prime_data_path = integrated_file if len(no_diff_objects) > 0: with open(blank_images_file, 'w') as bif: for obj in no_diff_objects: bif.write('{}\n'.format(obj.conv_img)) if len(diff_objects) > 0: with open(input_list_file, 'w') as ilf: for obj in diff_objects: ilf.write('{}\n'.format(obj.conv_img)) if len(not_int_objects) > 0: with open(not_integrated_file, 'w') as nif: for obj in not_int_objects: nif.write('{}\n'.format(obj.conv_img)) if len(prefilter_fail_objects) > 0: with open(prefilter_fail_file, 'w') as pff: for obj in prefilter_fail_objects: pff.write('{}\n'.format(obj.conv_img)) if len(self.final_objects) > 0: with open(integrated_file, 'w') as intf: for obj in final_images: intf.write('{}\n'.format(obj.final['final'])) with open(int_images_file, 'w') as ipf: for obj in final_images: ipf.write('{}\n'.format(obj.final['img']))
def unit_cell_analysis(self, cluster_threshold, output_dir, write_files=True): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found if len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] counter = 1 # run hierarchical clustering analysis ucs = Cluster.from_files(self.pickles, use_b=True) clusters, _ = ucs.ab_cluster(cluster_threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] output_file = os.path.join(output_dir, "uc_cluster_{}.lst".format(counter)) # write out lists of output pickles that comprise clusters with > 1 members if len(cluster.members) > 1: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if write_files: for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format(obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # format and record output uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ "{}".format('({})'.format(len(cluster.members)), cons_pg[0], cluster.medians[0], cluster.stdevs[0], cluster.medians[1], cluster.stdevs[1], cluster.medians[2], cluster.stdevs[2], cluster.medians[3], cluster.stdevs[3], cluster.medians[4], cluster.stdevs[4], cluster.medians[5], cluster.stdevs[5], mark_output) uc_table.append(uc_line) uc_info = [len(cluster.members), cons_pg[0], cluster.medians, output_file, uc_line] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, True)
def process(self, single_image=False): """ Image processing; selects method, runs requisite modules """ #for CCTBX indexing / integration if self.params.advanced.integrate_with == 'cctbx': terminate = False prev_status = self.status prev_fail = 'first cycle' prev_final = self.final prev_epv = 9999 while not terminate: # Run grid search if haven't already if self.fail == None and self.status != 'grid search': self.integrate_cctbx('grid search', single_image=single_image) # Run selection if haven't already if self.fail == None and self.status != 'selection': self.select_cctbx() # If smart grid search is active run multiple rounds until convergence if self.params.cctbx.grid_search.type == 'smart': if self.fail == None and self.final['epv'] < prev_epv: prev_epv = self.final['epv'] prev_final = self.final prev_status = self.status prev_fail = self.fail self.hmed = self.final['sph'] self.amed = self.final['spa'] self.grid, self.final = self.generate_grid() self.final['final'] = self.fin_file if len(self.grid) == 0: self.final = prev_final self.status = prev_status self.fail = prev_fail terminate = True continue if self.verbose: log_entry = '\nNew starting point: H = {}, A = {}\n'\ ''.format(self.hmed, self.amed) self.log_info.append(log_entry) else: if prev_fail != 'first cycle': self.final = prev_final self.status = prev_status self.fail = prev_fail if self.verbose: log_entry = '\nFinal set of parameters: H = {}, A = {}'\ ''.format(self.final['sph'], self.final['spa']) self.log_info.append(log_entry) terminate = True # If brute force grid search is selected run one round else: terminate = True # Run final integration if haven't already if self.fail == None and self.status != 'final': self.integrate_cctbx('integrate', single_image=single_image) if self.verbose: log_entry = "\n".join(self.log_info) misc.main_log(self.main_log, log_entry) misc.main_log(self.main_log, '\n{:-^100}\n'.format('')) # For DIALS integration (DOES NOT YET WORK) elif self.params.advanced.integrate_with == 'dials': # Create DIALS integrator object from prime.iota.iota_dials import Integrator integrator = Integrator(self.conv_img, self.obj_base, self.gain, self.params) # Run DIALS test integrator.find_spots() integrator.index() return self
def run(self): self.args, self.phil_args = parse_command_args(self.iver, self.help_message).parse_known_args() # Check for type of input if self.args.path == None: # No input parse_command_args(self.iver, self.help_message).print_help() if self.args.default: # Write out default params and exit help_out, txt_out = inp.print_params() print '\n{:-^70}\n'.format('IOTA Parameters') print help_out inp.write_defaults(os.path.abspath(os.path.curdir), txt_out) misc.iota_exit(self.iver) else: # If input exists, check type carg = os.path.abspath(self.args.path) if os.path.exists(carg): # If user provided a parameter file if os.path.isfile(carg) and os.path.basename(carg).endswith('.param'): msg = '' self.params, self.txt_out = inp.process_input(self.args, self.phil_args, carg, 'file') # If user provided a list of input files elif os.path.isfile(carg) and os.path.basename(carg).endswith('.lst'): msg = "\nIOTA will run in AUTO mode using {}:\n".format(carg) self.params, self.txt_out = inp.process_input(self.args, self.phil_args, carg, 'auto', self.now) # If user provided a single filepath elif os.path.isfile(carg) and not os.path.basename(carg).endswith('.lst'): msg = "\nIOTA will run in SINGLE-FILE mode using {}:\n".format(carg) self.params, self.txt_out = inp.process_input(self.args, self.phil_args, carg, 'auto', self.now) # If user provided a data folder elif os.path.isdir(carg): msg = "\nIOTA will run in AUTO mode using {}:\n".format(carg) self.params, self.txt_out = inp.process_input(self.args, self.phil_args, carg, 'auto', self.now) # If user provided gibberish else: print self.logo print "ERROR: Invalid input! Need parameter filename or data folder." misc.iota_exit(self.iver) # Identify indexing / integration program if self.params.advanced.integrate_with == 'cctbx': prg = " with CCTBX.XFEL\n" elif self.params.advanced.integrate_with == 'dials': prg = " with DIALS\n" self.logo += prg print self.logo print '\n{}\n'.format(self.now) if msg != '': print msg # Check for -l option, output list of input files and exit if self.args.list: list_file = os.path.abspath("{}/input.lst".format(os.curdir)) print '\nIOTA will run in LIST INPUT ONLY mode' print 'Input list in {} \n\n'.format(list_file) with open(list_file, "w") as lf: for i, input_file in enumerate(input_list, 1): print "{}: {}".format(i, input_file) lf.write('{}\n'.format(input_file)) print '\nExiting...\n\n' misc.iota_exit(self.iver) if self.args.analyze != None: self.analyze_prior_results('{:003d}'.format(int(self.args.analyze))) misc.iota_exit(self.iver) if self.params.mp_method == 'mpi': rank, size = misc.get_mpi_rank_and_size() self.master_process = rank == 0 else: self.master_process = True # Call function to read input folder structure (or input file) and # generate list of image file paths if self.params.cctbx.selection.select_only.flag_on: self.gs_img_objects = self.make_int_object_list() self.input_list = [i.conv_img for i in self.gs_img_objects] else: self.input_list = self.make_input_list() # If fewer images than requested processors are supplied, set the number of # processors to the number of images if self.params.n_processors > len(self.input_list): self.params.n_processors = len(self.input_list) # Generate base folder paths self.conv_base = misc.set_base_dir('converted_pickles') self.int_base = misc.set_base_dir('integration') self.obj_base = os.path.join(self.int_base, 'image_objects') self.fin_base = os.path.join(self.int_base, 'final') self.tmp_base = os.path.join(self.int_base, 'tmp') if self.params.analysis.viz != 'None' or\ self.params.analysis.heatmap != 'None' or\ self.params.analysis.charts: self.viz_base = os.path.join(self.int_base, 'visualization') else: self.viz_base = None # Generate base folders os.makedirs(self.int_base) os.makedirs(self.obj_base) os.makedirs(self.fin_base) os.makedirs(self.tmp_base) # Determine input base self.input_base = os.path.abspath(os.path.dirname(os.path.commonprefix(self.input_list))) # Initialize main log self.logfile = os.path.abspath(os.path.join(self.int_base, 'iota.log')) # Log starting info misc.main_log(self.logfile, '{:=^100} \n'.format(' IOTA MAIN LOG ')) misc.main_log(self.logfile, '{:-^100} \n'.format(' SETTINGS FOR THIS RUN ')) misc.main_log(self.logfile, self.txt_out) misc.main_log(self.logfile, '{:-^100} \n\n' ''.format(' TARGET FILE ({}) CONTENTS ' ''.format(self.params.target))) with open(self.params.target, 'r') as phil_file: phil_file_contents = phil_file.read() misc.main_log(self.logfile, phil_file_contents)
else: # Import ( and check / convert / triage) images cmd.Command.start("Importing {} images".format(len(init.input_list))) img_list = [[i, len(init.input_list) + 1, j] for i, j in enumerate(init.input_list, 1)] img_objects = parallel_map(iterable = img_list, func = conversion_wrapper, processes = init.params.n_processors) # Remove rejected images from image object list acc_img_objects = [i.fail for i in img_objects if i.fail == None] cmd.Command.end("Accepted {} of {} images -- DONE "\ "".format(len(acc_img_objects), len(img_objects))) # Exit if none of the images have diffraction if len(acc_img_objects) == 0: misc.main_log(init.logfile, 'No images have diffraction!', True) misc.iota_exit(iota_version) else: misc.main_log(init.logfile, "{} out of {} images have diffraction"\ "".format(len(acc_img_objects), len(img_objects))) # Check for -c option and exit if true if init.params.image_conversion.convert_only: misc.iota_exit(iota_version) cmd.Command.start("Processing {} images".format(len(img_objects))) img_list = [[i, len(img_objects) + 1, j] for i, j in enumerate(img_objects, 1)] img_objects = parallel_map(iterable = img_list, func = processing_wrapper, processes = init.params.n_processors)