def run(_args): if _args < 2: raise IOError("Must provide location(s) of pickles") if _args.paths: ucs = Cluster.from_files(raw_input=_args.dirs, n_images=_args.n, dials=_args.dials) else: ucs = Cluster.from_directories(_args.dirs, n_images=_args.n, dials=_args.dials) if not _args.noplot: clusters, _ = ucs.ab_cluster(_args.t, log=_args.log, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) print unit_cell_info(clusters) else: plt.figure("Andrews-Bernstein distance dendogram", figsize=(12, 8)) ax = plt.gca() clusters, cluster_axes = ucs.ab_cluster(_args.t, log=_args.log, ax=ax, write_file_lists=_args.nofiles, schnell=_args.schnell, doplot=_args.noplot) print unit_cell_info(clusters) plt.tight_layout() plt.show()
def unit_cell_analysis(self): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if not self.info.categories['integrated']: util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) util.main_log(self.info.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.info.categories['integrated']) == 1: unit_cell = (self.info.cluster_iterable[0][:5]) point_group = self.info.cluster_iterable[0][6] util.main_log(self.info.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, " \ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) util.main_log(self.info.logfile, uc_line, True) self.info.best_pg = str(point_group) self.info.best_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.clustering.flag_on: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 self.info.clusters = [] threshold = self.params.analysis.clustering.threshold cluster_limit = self.params.analysis.clustering.limit final_pickles = self.info.categories['integrated'][0] pickles = [] if self.params.analysis.clustering.n_images > 0: import random for i in range( len(self.params.analysis.clustering.n_images)): random_number = random.randrange(0, len(final_pickles)) if final_pickles[random_number] in pickles: while final_pickles[random_number] in pickles: random_number = random.randrange( 0, len(final_pickles)) pickles.append(final_pickles[random_number]) else: pickles = final_pickles # Cluster from files (slow, but will keep for now) ucs = Cluster.from_files(pickle_list=pickles) # Do clustering clusters, _ = ucs.ab_cluster(threshold=threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if cluster_limit is None: if len(pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Write to file cluster_filenames = [j.path for j in cluster.members] if self.params.analysis.clustering.write_files: output_file = os.path.join( self.info.int_base, "uc_cluster_{}.lst".format(counter)) for fn in cluster_filenames: with open(output_file, 'a') as scf: scf.write('{}\n'.format(fn)) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # Populate clustering info for GUI display uc_init = uctbx.unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': mark_output } self.info.clusters.append(cluster_info) # format and record output # TODO: How to propagate stdevs after conversion from Niggli? # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ # "{}".format('({})'.format(len(cluster.members)), cons_pg[0], # cluster.medians[0], cluster.stdevs[0], # cluster.medians[1], cluster.stdevs[1], # cluster.medians[2], cluster.stdevs[2], # cluster.medians[3], cluster.stdevs[3], # cluster.medians[4], cluster.stdevs[4], # cluster.medians[5], cluster.stdevs[5], # mark_output) # uc_table.append(uc_line) uc_table.append("{:<6}: {} {}".format( len(cluster.members), uc_no_stdev, mark_output)) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) # uc_info = [len(cluster.members), cons_pg[0], cluster.medians, # output_file, uc_line, lattices] uc_info = [ len(cluster.members), best_sg, best_uc, output_file, uc_no_stdev, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a, uc_b, uc_c, uc_alpha, \ uc_beta, uc_gamma, uc_sg = list(zip(*self.info.cluster_iterable)) cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) # Populate clustering info for GUI display uc_init = uctbx.unit_cell(unit_cell) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_sg = str(groups.lattice_group_info()).split('(')[0] best_uc = top_group['best_subsym'].unit_cell().parameters() # best_sg = str(top_group['best_subsym'].space_group_info()) uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(self.info.cluster_iterable), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': None } self.info.clusters.append(cluster_info) # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ # "{}".format('({})'.format(len(self.final_objects)), cons_pg, # np.median(uc_a), np.std(uc_a), # np.median(uc_b), np.std(uc_b), # np.median(uc_c), np.std(uc_c), # np.median(uc_alpha), np.std(uc_alpha), # np.median(uc_beta), np.std(uc_beta), # np.median(uc_gamma), np.std(uc_gamma), '') # # uc_table.append(uc_line) uc_table.append(uc_no_stdev) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) # uc_info = [len(self.final_objects), cons_pg, unit_cell, None, # uc_line, lattices] uc_info = [ len(self.info.cluster_iterable), best_sg, best_uc, None, uc_no_stdev, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.info.best_pg = str(uc_pick[1]) self.info.best_uc = uc_pick[2] if uc_pick[3] is not None: self.prime_data_path = uc_pick[3] for item in uc_table: util.main_log(self.info.logfile, item, False) self.info.update(uc_table=uc_table) if self.gui_mode: return self.info.clusters
def unit_cell_analysis(self, write_files=True): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if self.final_objects is None: self.cons_uc = None self.cons_pg = None misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.run_clustering: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 ucs = Cluster.from_files(pickle_list=self.pickles, use_b=True) clusters, _ = ucs.ab_cluster( self.params.analysis.cluster_threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if len(self.pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(self.pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if write_files: output_file = os.path.join( self.output_dir, "uc_cluster_{}.lst".format(counter)) for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format( obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None # Populate clustering info for GUI display uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(cluster.medians[0], cluster.medians[1], cluster.medians[2], cluster.medians[3], cluster.medians[4], cluster.medians[5]) cluster_info = { 'number': len(cluster.members), 'pg': cons_pg[0], 'uc': uc_no_stdev, 'filename': mark_output } self.clusters.append(cluster_info) else: mark_output = '' output_file = None # format and record output uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ "{}".format('({})'.format(len(cluster.members)), cons_pg[0], cluster.medians[0], cluster.stdevs[0], cluster.medians[1], cluster.stdevs[1], cluster.medians[2], cluster.stdevs[2], cluster.medians[3], cluster.stdevs[3], cluster.medians[4], cluster.stdevs[4], cluster.medians[5], cluster.stdevs[5], mark_output) uc_table.append(uc_line) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) uc_info = [ len(cluster.members), cons_pg[0], cluster.medians, output_file, uc_line, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a = [i.final['a'] for i in self.final_objects] uc_b = [i.final['b'] for i in self.final_objects] uc_c = [i.final['c'] for i in self.final_objects] uc_alpha = [i.final['alpha'] for i in self.final_objects] uc_beta = [i.final['beta'] for i in self.final_objects] uc_gamma = [i.final['gamma'] for i in self.final_objects] uc_sg = [i.final['sg'] for i in self.final_objects] cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ "{}".format('({})'.format(len(self.final_objects)), cons_pg, np.median(uc_a), np.std(uc_a), np.median(uc_b), np.std(uc_b), np.median(uc_c), np.std(uc_c), np.median(uc_alpha), np.std(uc_alpha), np.median(uc_beta), np.std(uc_beta), np.median(uc_gamma), np.std(uc_gamma), '') unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) uc_table.append(uc_line) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) uc_info = [ len(self.final_objects), cons_pg, unit_cell, None, uc_line, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, (not self.gui_mode)) if self.gui_mode: return self.cons_pg, self.cons_uc, self.clusters
def unit_cell_analysis(self, cluster_threshold, output_dir, write_files=True): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found if len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] counter = 1 # run hierarchical clustering analysis ucs = Cluster.from_files(self.pickles, use_b=True) clusters, _ = ucs.ab_cluster(cluster_threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] output_file = os.path.join(output_dir, "uc_cluster_{}.lst".format(counter)) # write out lists of output pickles that comprise clusters with > 1 members if len(cluster.members) > 1: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if write_files: for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format(obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # format and record output uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ "{}".format('({})'.format(len(cluster.members)), cons_pg[0], cluster.medians[0], cluster.stdevs[0], cluster.medians[1], cluster.stdevs[1], cluster.medians[2], cluster.stdevs[2], cluster.medians[3], cluster.stdevs[3], cluster.medians[4], cluster.stdevs[4], cluster.medians[5], cluster.stdevs[5], mark_output) uc_table.append(uc_line) uc_info = [len(cluster.members), cons_pg[0], cluster.medians, output_file, uc_line] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, True)
def unit_cell_analysis(self): """ Calls unit cell analysis module, which uses hierarchical clustering (Zeldin, et al, Acta D, 2015) to split integration results according to detected morphological groupings (if any). Most useful with preliminary integration without target unit cell specified. """ # Will not run clustering if only one integration result found or if turned off if self.final_objects is None: self.cons_uc = None self.cons_pg = None misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) misc.main_log(self.logfile, '\n UNIT CELL CANNOT BE DETERMINED!', True) elif len(self.final_objects) == 1: unit_cell = (self.final_objects[0].final['a'], self.final_objects[0].final['b'], self.final_objects[0].final['c'], self.final_objects[0].final['alpha'], self.final_objects[0].final['beta'], self.final_objects[0].final['gamma']) point_group = self.final_objects[0].final['sg'] misc.main_log(self.logfile, "\n\n{:-^80}\n".format(' UNIT CELL ANALYSIS '), True) uc_line = "{:<6} {:^4}: {:<6.2f}, {:<6.2f}, {:<6.2f}, {:<6.2f}, "\ "{:<6.2f}, {:<6.2f}".format('(1)', point_group, unit_cell[0], unit_cell[1], unit_cell[2], unit_cell[3], unit_cell[4], unit_cell[5]) misc.main_log(self.logfile, uc_line, True) self.cons_pg = point_group self.cons_uc = unit_cell else: uc_table = [] uc_summary = [] if self.params.analysis.run_clustering: # run hierarchical clustering analysis from xfel.clustering.cluster import Cluster counter = 0 threshold = self.params.analysis.cluster_threshold cluster_limit = self.params.analysis.cluster_limit if self.params.analysis.cluster_n_images > 0: n_images = self.params.analysis.cluster_n_images else: n_images = len(self.final_objects) obj_list = [] if n_images < len(self.final_objects): import random for i in range(n_images): random_number = random.randrange( 0, len(self.final_objects)) if self.final_objects[random_number] in obj_list: while self.final_objects[ random_number] in obj_list: random_number = random.randrange( 0, len(self.final_objects)) obj_list.append(self.final_objects[random_number]) else: obj_list.append(self.final_objects[random_number]) if obj_list == []: obj_list = self.final_objects # Cluster from iterable (this doesn't keep filenames - bad!) # with Capturing() as suppressed_output: # uc_iterable = [] # for obj in obj_list: # unit_cell = (float(obj.final['a']), # float(obj.final['b']), # float(obj.final['c']), # float(obj.final['alpha']), # float(obj.final['beta']), # float(obj.final['gamma']), # obj.final['sg']) # uc_iterable.append(unit_cell) # ucs = Cluster.from_iterable(iterable=uc_iterable) # Cluster from files (slow, but will keep for now) ucs = Cluster.from_files(pickle_list=self.pickles) # Do clustering clusters, _ = ucs.ab_cluster(threshold=threshold, log=False, write_file_lists=False, schnell=False, doplot=False) uc_table.append("\n\n{:-^80}\n"\ "".format(' UNIT CELL ANALYSIS ')) # extract clustering info and add to summary output list if cluster_limit is None: if len(self.pickles) / 10 >= 10: cluster_limit = 10 else: cluster_limit = len(self.pickles) / 10 for cluster in clusters: sorted_pg_comp = sorted(cluster.pg_composition.items(), key=lambda x: -1 * x[1]) pg_nums = [pg[1] for pg in sorted_pg_comp] cons_pg = sorted_pg_comp[np.argmax(pg_nums)] if len(cluster.members) > cluster_limit: counter += 1 # Sort clustered images by mosaicity, lowest to highest cluster_filenames = [j.path for j in cluster.members] clustered_objects = [i for i in self.final_objects if \ i.final['final'] in cluster_filenames] sorted_cluster = sorted(clustered_objects, key=lambda i: i.final['mos']) # Write to file if self.params.analysis.cluster_write_files: output_file = os.path.join( self.output_dir, "uc_cluster_{}.lst".format(counter)) for obj in sorted_cluster: with open(output_file, 'a') as scf: scf.write('{}\n'.format( obj.final['final'])) mark_output = os.path.basename(output_file) else: mark_output = '*' output_file = None else: mark_output = '' output_file = None # Populate clustering info for GUI display uc_init = uctbx.unit_cell(cluster.medians) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = sgtbx.lattice_symmetry.\ metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_uc = top_group['best_subsym'].unit_cell().parameters() best_sg = top_group['best_subsym'].space_group_info() uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(cluster.members), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': mark_output } self.clusters.append(cluster_info) # format and record output # TODO: How to propagate stdevs after conversion from Niggli? # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), "\ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) "\ # "{}".format('({})'.format(len(cluster.members)), cons_pg[0], # cluster.medians[0], cluster.stdevs[0], # cluster.medians[1], cluster.stdevs[1], # cluster.medians[2], cluster.stdevs[2], # cluster.medians[3], cluster.stdevs[3], # cluster.medians[4], cluster.stdevs[4], # cluster.medians[5], cluster.stdevs[5], # mark_output) # uc_table.append(uc_line) uc_table.append("{:<6}: {} {}".format( len(cluster.members), uc_no_stdev, mark_output)) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in sorted_pg_comp]) # uc_info = [len(cluster.members), cons_pg[0], cluster.medians, # output_file, uc_line, lattices] uc_info = [ len(cluster.members), best_sg, best_uc, output_file, uc_no_stdev, lattices ] uc_summary.append(uc_info) else: # generate average unit cell uc_table.append("\n\n{:-^80}\n" \ "".format(' UNIT CELL AVERAGING (no clustering) ')) uc_a = [i.final['a'] for i in self.final_objects] uc_b = [i.final['b'] for i in self.final_objects] uc_c = [i.final['c'] for i in self.final_objects] uc_alpha = [i.final['alpha'] for i in self.final_objects] uc_beta = [i.final['beta'] for i in self.final_objects] uc_gamma = [i.final['gamma'] for i in self.final_objects] uc_sg = [i.final['sg'] for i in self.final_objects] cons_pg = Counter(uc_sg).most_common(1)[0][0] all_pgs = Counter(uc_sg).most_common() unit_cell = (np.median(uc_a), np.median(uc_b), np.median(uc_c), np.median(uc_alpha), np.median(uc_beta), np.median(uc_gamma)) # Populate clustering info for GUI display uc_init = uctbx.unit_cell(unit_cell) symmetry = crystal.symmetry(unit_cell=uc_init, space_group_symbol='P1') groups = sgtbx.lattice_symmetry. \ metric_subgroups(input_symmetry=symmetry, max_delta=3) top_group = groups.result_groups[0] best_uc = top_group['best_subsym'].unit_cell().parameters() best_sg = top_group['best_subsym'].space_group_info() uc_no_stdev = "{:<6.2f} {:<6.2f} {:<6.2f} " \ "{:<6.2f} {:<6.2f} {:<6.2f} " \ "".format(best_uc[0], best_uc[1], best_uc[2], best_uc[3], best_uc[4], best_uc[5]) cluster_info = { 'number': len(self.final_objects), 'pg': best_sg, 'uc': uc_no_stdev, 'filename': None } self.clusters.append(cluster_info) # uc_line = "{:<6} {:^4}: {:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}), " \ # "{:<6.2f} ({:>5.2f}), {:<6.2f} ({:>5.2f}) " \ # "{}".format('({})'.format(len(self.final_objects)), cons_pg, # np.median(uc_a), np.std(uc_a), # np.median(uc_b), np.std(uc_b), # np.median(uc_c), np.std(uc_c), # np.median(uc_alpha), np.std(uc_alpha), # np.median(uc_beta), np.std(uc_beta), # np.median(uc_gamma), np.std(uc_gamma), '') # # uc_table.append(uc_line) uc_table.append(uc_no_stdev) lattices = ', '.join( ['{} ({})'.format(i[0], i[1]) for i in all_pgs]) # uc_info = [len(self.final_objects), cons_pg, unit_cell, None, # uc_line, lattices] uc_info = [ len(self.final_objects), best_sg, best_uc, None, uc_no_stdev, lattices ] uc_summary.append(uc_info) uc_table.append('\nMost common unit cell:\n') # select the most prevalent unit cell (most members in cluster) uc_freqs = [i[0] for i in uc_summary] uc_pick = uc_summary[np.argmax(uc_freqs)] uc_table.append(uc_pick[4]) uc_table.append('\nBravais Lattices in Biggest Cluster: {}' ''.format(uc_pick[5])) self.cons_pg = uc_pick[1] self.cons_uc = uc_pick[2] if uc_pick[3] != None: self.prime_data_path = uc_pick[3] for item in uc_table: misc.main_log(self.logfile, item, (not self.gui_mode)) self.analysis_result.__setattr__('clusters', self.clusters) self.analysis_result.__setattr__('cons_pg', self.cons_pg) self.analysis_result.__setattr__('cons_uc', self.cons_uc) if self.gui_mode: return self.clusters