def copy_flag_to_mtz(flag_array, flag_name, flag_value, mtz_in, mtz_out, log_out=sys.stdout): # Open mtz miller_arrays = iotbx.mtz.object(mtz_in).as_miller_arrays() print >> log_out, "Opening", mtz_in if flag_name in [arr.info().label_string() for arr in miller_arrays]: print >> log_out, "Error: The column %s already exists in the mtz file: %s" % ( flag_name, mtz_in) return print >> log_out, " Using information from", miller_arrays[0].info( ).label_string() input_symm = crystal.symmetry( unit_cell=miller_arrays[0].unit_cell(), space_group_info=miller_arrays[0].space_group().info(), assert_is_compatible_unit_cell=False, force_compatible_unit_cell=False) d_max, d_min = get_best_resolution(miller_arrays, input_symm) print >> log_out, " d_max, d_min=", d_max, d_min print >> log_out, " Symm:", input_symm.space_group_info( ), input_symm.unit_cell() print >> log_out # Extend flag complete_set = make_joined_set(miller_arrays).complete_set() r_free_flags = r_free_utils.extend_flags( r_free_flags=flag_array.customized_copy(crystal_symmetry=input_symm), test_flag_value=flag_value, array_label=flag_name, complete_set=complete_set, accumulation_callback=None, preserve_input_values=True, d_max=d_max, d_min=d_min, log=log_out).common_set( complete_set) #resolution_filter(d_min=d_min-0.01) print >> log_out r_free_flags.customized_copy( data=r_free_flags.data() == flag_value).show_r_free_flags_info( out=log_out) # Write mtz file mtz_object = iotbx.mtz.object(mtz_in).add_crystal("crystal", "project", r_free_flags.unit_cell()). \ add_dataset(name="dataset", wavelength=0). \ add_miller_array(miller_array=r_free_flags, column_root_label=flag_name).mtz_object() mtz_object.write(file_name=mtz_out)
def extract(file_name, crystal_symmetry, wavelength_id, crystal_id, show_details_if_error, output_r_free_label, merge_non_unique_under_symmetry, map_to_asu, remove_systematic_absences, all_miller_arrays=None, incompatible_flags_to_work_set=False, ignore_bad_sigmas=False, extend_flags=False, return_as_miller_arrays=False, log=sys.stdout): import iotbx.cif from cctbx import miller if all_miller_arrays is None: base_array_info = miller.array_info( crystal_symmetry_from_file=crystal_symmetry) all_miller_arrays = iotbx.cif.reader( file_path=file_name).build_miller_arrays( base_array_info=base_array_info) if (len(all_miller_arrays) == 0): raise Sorry( "No data arrays were found in this CIF file. Please make " + "sure that the file contains reflection data, rather than the refined " + "model.") column_labels = set() if (extend_flags): map_to_asu = True # TODO: is all_mille_arrays a dict ? If not change back for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): other_symmetry = crystal_symmetry try: crystal_symmetry = other_symmetry.join_symmetry( other_symmetry=ma.crystal_symmetry(), force=True) except AssertionError as e: str_e = str(e) from six.moves import cStringIO as StringIO s = StringIO() if "Space group is incompatible with unit cell parameters." in str_e: other_symmetry.show_summary(f=s) ma.crystal_symmetry().show_summary(f=s) str_e += "\n%s" % (s.getvalue()) raise Sorry(str_e) else: raise if (crystal_symmetry.unit_cell() is None or crystal_symmetry.space_group_info() is None): raise Sorry( "Crystal symmetry is not defined. Please use the --symmetry option." ) mtz_object = iotbx.mtz.object() \ .set_title(title="phenix.cif_as_mtz") \ .set_space_group_info(space_group_info=crystal_symmetry.space_group_info()) unit_cell = crystal_symmetry.unit_cell() mtz_crystals = {} mtz_object.set_hkl_base(unit_cell=unit_cell) from iotbx.reflection_file_utils import cif_status_flags_as_int_r_free_flags # generate list of all reflections (for checking R-free flags) from iotbx.reflection_file_utils import make_joined_set all_arrays = [] for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): all_arrays.append(ma) complete_set = make_joined_set(all_arrays) if return_as_miller_arrays: miller_array_list = [] current_i = -1 uc = None for i, (data_name, miller_arrays) in enumerate(six.iteritems(all_miller_arrays)): for ma in miller_arrays.values(): #ma = ma.customized_copy( # crystal_symmetry=crystal_symmetry).set_info(ma.info()) if ma._space_group_info is None: ma._space_group_info = crystal_symmetry.space_group_info() labels = ma.info().labels label = get_label(miller_array=ma, output_r_free_label=output_r_free_label) if label is None: print("Can't determine output label for %s - skipping." % \ ma.info().label_string(), file=log) continue elif label.startswith(output_r_free_label): ma, _ = cif_status_flags_as_int_r_free_flags( ma, test_flag_value="f") if isinstance(ma.data(), flex.double): data_int = ma.data().iround() assert data_int.as_double().all_eq(ma.data()) ma = ma.customized_copy(data=data_int).set_info(ma.info()) elif ( (ma.is_xray_amplitude_array() or ma.is_xray_intensity_array()) and isinstance(ma.data(), flex.int)): ma = ma.customized_copy(data=ma.data().as_double()).set_info( ma.info()) crys_id = 0 for l in labels: if 'crystal_id' in l: crys_id = int(l.split('=')[-1]) break if crys_id > 0 and crystal_id is None: label += "%i" % crys_id if crystal_id is not None and crys_id > 0 and crys_id != crystal_id: continue if ma.unit_cell( ) is not None: # use symmetry file on the command line if it's None unit_cell = ma.unit_cell() if crys_id not in mtz_crystals or \ (i > current_i and unit_cell is not None and uc is not None and unit_cell.parameters() != uc.parameters()): # Ensure new mtz crystals are created if miller_array objects have different unit cells # Can happen if there are more datasets in the same cif file, like MAD datasets uc = unit_cell current_i = i # Use unique project and crystal names so that MtzGet() in cmtzlib.c picks up individual unit cells mtz_crystals[crys_id] = (mtz_object.add_crystal( name="crystal_%i" % i, project_name="project_%i" % i, unit_cell=uc), {}) crystal, datasets = mtz_crystals[crys_id] w_id = 0 for l in labels: if 'wavelength_id' in l: w_id = int(l.split('=')[-1]) break if wavelength_id is not None and w_id > 0 and w_id != wavelength_id: continue if w_id > 1 and wavelength_id is None: if (label in column_labels): label += "%i" % w_id #print "label is", label if w_id not in datasets: wavelength = ma.info().wavelength if (wavelength is None): wavelength = 0 datasets[w_id] = crystal.add_dataset(name="dataset", wavelength=wavelength) dataset = datasets[w_id] # if all sigmas for an array are set to zero either raise an error, or set sigmas to None if ma.sigmas() is not None and (ma.sigmas() == 0).count(False) == 0: if ignore_bad_sigmas: print("Warning: bad sigmas, setting sigmas to None.", file=log) ma.set_sigmas(None) else: raise Sorry("""Bad sigmas: all sigmas are equal to zero. Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file.""" ) if not ma.is_unique_set_under_symmetry(): if merge_non_unique_under_symmetry: print("Warning: merging non-unique data", file=log) if (label.startswith(output_r_free_label) and incompatible_flags_to_work_set): merging = ma.merge_equivalents( incompatible_flags_replacement=0) if merging.n_incompatible_flags > 0: print("Warning: %i reflections were placed in the working set " \ "because of incompatible flags between equivalents." %( merging.n_incompatible_flags), file=log) else: try: merging = ma.merge_equivalents() except Sorry as e: if ("merge_equivalents_exact: incompatible" in str(e)): raise Sorry( str(e) + " for %s" % ma.info().labels[-1] + "\n" + "Add --incompatible_flags_to_work_set to command line " "arguments to place incompatible flags to working set." ) raise ma = merging.array().customized_copy( crystal_symmetry=ma).set_info(ma.info()) elif return_as_miller_arrays: # allow non-unique set pass else: n_all = ma.indices().size() sel_unique = ma.unique_under_symmetry_selection() sel_dup = ~flex.bool(n_all, sel_unique) n_duplicate = sel_dup.count(True) n_uus = sel_unique.size() msg = ( "Miller indices not unique under symmetry: " + file_name + \ "(%d redundant indices out of %d)" % (n_all-n_uus, n_all) + "Add --merge to command arguments to force merging data.") if (show_details_if_error): print(msg) ma.show_comprehensive_summary(prefix=" ") ma.map_to_asu().sort().show_array(prefix=" ") raise Sorry(msg) if (map_to_asu): ma = ma.map_to_asu().set_info(ma.info()) if (remove_systematic_absences): ma = ma.remove_systematic_absences() if (label.startswith(output_r_free_label) and complete_set is not None): n_missing = len(complete_set.lone_set(other=ma).indices()) if (n_missing > 0): if (extend_flags): from cctbx import r_free_utils # determine flag values fvals = list(set(ma.data())) print("fvals", fvals) fval = None if (len(fvals) == 1): fval = fvals[0] elif (len(fvals) == 2): f1 = (ma.data() == fvals[0]).count(True) / ma.data().size() f2 = (ma.data() == fvals[1]).count(True) / ma.data().size() if (f1 < f2): fval = fvals[0] else: fval = fvals[1] elif (len(fvals) == 0): fval = None else: fval = 0 if (not fval in fvals): raise Sorry( "Cannot determine free-R flag value.") # if (fval is not None): ma = r_free_utils.extend_flags( r_free_flags=ma, test_flag_value=fval, array_label=label, complete_set=complete_set, preserve_input_values=True, allow_uniform_flags=True, log=sys.stdout) else: ma = None else: libtbx.warn(( "%d reflections do not have R-free flags in the " + "array '%s' - this may " + "cause problems if you try to use the MTZ file for refinement " + "or map calculation. We recommend that you extend the flags " + "to cover all reflections (--extend_flags on the command line)." ) % (n_missing, label)) # Get rid of fake (0,0,0) reflection in some CIFs if (ma is not None): ma = ma.select_indices(indices=flex.miller_index( ((0, 0, 0), )), negate=True).set_info(ma.info()) if return_as_miller_arrays: miller_array_list.append(ma) continue # don't make a dataset dec = None if ("FWT" in label): dec = iotbx.mtz.ccp4_label_decorator() column_types = None if ("PHI" in label or "PHWT" in label) and (ma.is_real_array()): column_types = "P" elif (label.startswith("DANO") and ma.is_real_array()): if (ma.sigmas() is not None): column_types = "DQ" else: column_types = "D" label_base = label i = 1 while label in column_labels: label = label_base + "-%i" % (i) i += 1 if (ma is not None): column_labels.add(label) if ("FWT-1" in label): dec = None dataset.add_miller_array(ma, column_root_label=label, label_decorator=dec, column_types=column_types) if return_as_miller_arrays: return miller_array_list else: return mtz_object
def run(mtz, mtz_out, mtz_ref, flag_name=None, flag_value=None): ref_arrays = iotbx.mtz.object(mtz_ref).as_miller_arrays() print "Opening reference:", mtz_ref # Get flag array flag_array = None if flag_name is None: flags = filter(lambda x: is_rfree_array(x, x.info()), ref_arrays) if len(flags) == 0: print " No R free flags like column found." quit() elif len(flags) > 1: print " More than one column which looks like R free flag:" for f in flags: print " ", f.info().label_string() quit() else: flag_name = flags[0].info().label_string() flag_array = flags[0] print " Guessing R free flag:", flag_name else: flags = filter(lambda x: flag_name==x.info().label_string(), ref_arrays) if len(flags) == 0: print " Specified flag name not found:", flag quit() else: print " Use specified flag:", flag flag_array = flags[0] # Get flag number if flag_value is None: flag_scores = get_r_free_flags_scores(miller_arrays=[flag_array], test_flag_value=flag_value) flag_value = flag_scores.test_flag_values[0] print " Guessing flag number:", flag_value else: print " Specified flag number:", flag_value print " d_max, d_min=", get_best_resolution([flag_array], flag_array.crystal_symmetry()) print " Symm:", flag_array.space_group().info(), flag_array.unit_cell() print # Open mtz miller_arrays = iotbx.mtz.object(mtz).as_miller_arrays() print "Opening", mtz if flag_name in [arr.info().label_string() for arr in miller_arrays]: print "Error: The column %s already exists in the mtz file: %s" % (flag_name, mtz) quit() print " Using information from", miller_arrays[0].info().label_string() input_symm = crystal.symmetry( unit_cell=miller_arrays[0].unit_cell(), space_group_info=miller_arrays[0].space_group().info(), assert_is_compatible_unit_cell=False, force_compatible_unit_cell=False) d_max, d_min = get_best_resolution(miller_arrays, input_symm) print " d_max, d_min=", d_max, d_min print " Symm:", input_symm.space_group_info(), input_symm.unit_cell() print # Extend flag complete_set = make_joined_set(miller_arrays).complete_set() r_free_flags = r_free_utils.extend_flags( r_free_flags=flag_array.customized_copy(crystal_symmetry=input_symm), test_flag_value=flag_value, array_label=flag_name, complete_set=complete_set, accumulation_callback=None, preserve_input_values=True, d_max=d_max, d_min=d_min, log=sys.stdout).common_set(complete_set) #resolution_filter(d_min=d_min-0.01) print print r_free_flags.customized_copy(data=r_free_flags.data()==flag_value).show_r_free_flags_info() # Write mtz file #mtz_dataset = create_mtz_dataset(miller_arrays) #mtz_dataset.add_miller_array(miller_array=r_free_flags, column_root_label=flag_name) #mtz_object = mtz_dataset.mtz_object() mtz_object = iotbx.mtz.object(mtz).add_crystal("crystal", "project", r_free_flags.unit_cell()). \ add_dataset(name="dataset", wavelength=0). \ add_miller_array(miller_array=r_free_flags, column_root_label=flag_name).mtz_object() #mtz_object.add_history("copy and extend test flag from "+mtz_ref) #mtz_object.show_summary(out=sys.stdout, prefix=" ") mtz_object.write(file_name=mtz_out) print print "Writing:", mtz_out print
ma.map_to_asu().sort().show_array(prefix=" ") raise Sorry(msg) if (map_to_asu): ma = ma.map_to_asu().set_info(ma.info()) if (remove_systematic_absences): ma = ma.remove_systematic_absences() if (label.startswith(output_r_free_label) and complete_set is not None): n_missing = len(complete_set.lone_set(other=ma).indices()) if (n_missing > 0): if (extend_flags): from cctbx import r_free_utils ma = r_free_utils.extend_flags( r_free_flags=ma, test_flag_value=0, array_label=label, complete_set=complete_set, preserve_input_values=True, allow_uniform_flags=True, log=sys.stdout) else: libtbx.warn(( "%d reflections do not have R-free flags in the " + "array '%s' - this may " + "cause problems if you try to use the MTZ file for refinement " + "or map calculation. We recommend that you extend the flags " + "to cover all reflections (--extend-flags on the command line)." ) % (n_missing, label)) # Get rid of fake (0,0,0) reflection in some CIFs ma = ma.select_indices(indices=flex.miller_index(((0, 0, 0), )),
ma.show_comprehensive_summary(prefix=" ") ma.map_to_asu().sort().show_array(prefix=" ") raise Sorry(msg) if(map_to_asu): ma = ma.map_to_asu().set_info(ma.info()) if(remove_systematic_absences): ma = ma.remove_systematic_absences() if (label.startswith(output_r_free_label) and complete_set is not None) : n_missing = len(complete_set.lone_set(other=ma).indices()) if (n_missing > 0) : if (extend_flags) : from cctbx import r_free_utils ma = r_free_utils.extend_flags( r_free_flags=ma, test_flag_value=0, array_label=label, complete_set=complete_set, preserve_input_values=True, allow_uniform_flags=True, log=sys.stdout) else : libtbx.warn(("%d reflections do not have R-free flags in the "+ "array '%s' - this may "+ "cause problems if you try to use the MTZ file for refinement "+ "or map calculation. We recommend that you extend the flags "+ "to cover all reflections (--extend-flags on the command line).") % (n_missing, label)) # Get rid of fake (0,0,0) reflection in some CIFs ma = ma.select_indices(indices=flex.miller_index(((0,0,0),)), negate=True).set_info(ma.info()) if return_as_miller_arrays: