def to_mtz(miller_array, column_root_label, column_types=None): mtz_object = mtz.object() mtz_object.set_title("mtz writer test") mtz_object.add_history(line="start") mtz_object.set_space_group_info(miller_array.space_group_info()) mtz_object.set_hkl_base(miller_array.unit_cell()) crystal = mtz_object.add_crystal(name="test_crystal", project_name="test_project", unit_cell=miller_array.unit_cell()) dataset = crystal.add_dataset(name="test_dataset", wavelength=1) assert dataset.add_miller_array(miller_array=miller_array, column_root_label=column_root_label, column_types=column_types) is dataset mtz_object.add_history(line="done") return dataset
def to_mtz(miller_array, column_root_label, column_types=None): mtz_object = mtz.object() mtz_object.set_title("mtz writer test") mtz_object.add_history(line="start") mtz_object.set_space_group_info(miller_array.space_group_info()) mtz_object.set_hkl_base(miller_array.unit_cell()) crystal = mtz_object.add_crystal( name="test_crystal", project_name="test_project", unit_cell=miller_array.unit_cell() ) dataset = crystal.add_dataset(name="test_dataset", wavelength=1) assert ( dataset.add_miller_array( miller_array=miller_array, column_root_label=column_root_label, column_types=column_types ) is dataset ) mtz_object.add_history(line="done") return dataset
def compare_masks(struc, opts): tolerance = opts.tolerance resolution = opts.resolution solvent_radius = opts.solvent_radius shrink_radius = opts.shrink_radius verbose = opts.verbose cout.truncate(0) time_p1 = 0.0 time_asu = 0.0 time_orig = 0.0 params = masks.mask_master_params.extract() params.ignore_hydrogens = False params.ignore_zero_occupancy_atoms = False params.solvent_radius = solvent_radius params.shrink_truncation_radius = shrink_radius fc = struc.structure_factors(d_min = resolution).f_calc() while fc.data().size() <= 3 : resolution /= 1.2 assert resolution > 1.0E-3 fc = struc.structure_factors( d_min = resolution).f_calc() print >>cout, "Resolution= ", resolution, " solvent radius= ", \ solvent_radius, " shrink radius= ", shrink_radius, " Tolerance= ", \ tolerance, " Number of reflection= ", fc.data().size() struc.show_summary(cout) group = struc.space_group() print >>cout, "Cell volume= ", struc.unit_cell().volume(), \ " Group order= ", group.order_z(), " p= ", group.order_p() print >>cout, "Hall symbol: ", group.type().hall_symbol() #check_group(group) tb = time.time() asu_mask = masks.atom_mask( unit_cell = struc.unit_cell(), group = struc.space_group(), resolution = fc.d_min(), grid_step_factor = params.grid_step_factor, solvent_radius = params.solvent_radius, shrink_truncation_radius = params.shrink_truncation_radius ) te = time.time() time_asu += (te-tb) grid = asu_mask.grid_size() print >>cout, "asu mask grid = ", grid zero_test(asu_mask, fc, tolerance = tolerance) radii = get_radii(struc) assert len(radii) == len(struc.sites_frac()) tb = time.time() asu_mask.compute( struc.sites_frac(), radii ) te = time.time() time_asu += (te-tb) print >>cout, " n asu atoms= ", asu_mask.n_asu_atoms(), \ " has-enclosed= ", asu_mask.debug_has_enclosed_box tb = time.time() fm_asu = asu_mask.structure_factors( fc.indices() ) fm_asu = fc.set().array( data = fm_asu ) te = time.time() time_asu_sf = te-tb time_asu += (te-tb) # save files if not opts.save_files is None: tmp_file = open(opts.save_files + ".pdb", "w") print >>tmp_file, struc.as_pdb_file() tmp_file.close() asu_mask.xplor_write_map(opts.save_files + "_mask.map") asu_mask.xplor_write_map(opts.save_files + "_inverted_mask.map", 1, True) # also save structure factors import iotbx.mtz mtzo = iotbx.mtz.object() mtzo.set_title("mask test") mtzo.add_history(line="start") mtzo.set_space_group_info(fm_asu.space_group_info()) mtzo.set_hkl_base(fm_asu.unit_cell()) crystal = mtzo.add_crystal( name="mask_test_crystal", project_name="mask_test_project", unit_cell=fm_asu.unit_cell()) dataset = crystal.add_dataset( name="mask_test_dataset", wavelength=1) assert dataset.add_miller_array( miller_array=fm_asu, column_root_label="F", #column_types=column_types ) is dataset mtzo.add_history(line="done") mtzo.write(opts.save_files + "_sf.mtz") # # ========= old mask ============= # tb = time.time() struc_p1 = struc.expand_to_p1() te = time.time() time_p1_exp = (te-tb) time_p1 += (te-tb) fc_p1 = fc.deep_copy() fc_p1 = fc_p1.customized_copy(crystal_symmetry = struc_p1.crystal_symmetry()) tb = time.time() blk_p1 = masks.bulk_solvent( xray_structure = struc_p1, gridding_n_real = grid, ignore_zero_occupancy_atoms = params.ignore_zero_occupancy_atoms, ignore_hydrogen_atoms = params.ignore_hydrogens, solvent_radius = params.solvent_radius, shrink_truncation_radius = params.shrink_truncation_radius) te = time.time() time_p1_msk = (te-tb) time_p1 += (te-tb) tb = time.time() fm_p1 = blk_p1.structure_factors( miller_set = fc_p1 ) te = time.time() time_p1_sf = (te-tb) time_p1 += (te-tb) blk_p1.show_summary(cout) ### original mask tb = time.time() blk_o = masks.bulk_solvent( xray_structure = struc, gridding_n_real = grid, ignore_zero_occupancy_atoms = params.ignore_zero_occupancy_atoms, ignore_hydrogen_atoms = params.ignore_hydrogens, solvent_radius = params.solvent_radius, shrink_truncation_radius = params.shrink_truncation_radius) te = time.time() time_orig_msk = (te-tb) time_orig += (te-tb) tb = time.time() fm_o = blk_o.structure_factors( miller_set = fc ) te = time.time() time_orig_sf = (te-tb) time_orig += (te-tb) print >>cout, "Number of reflections ::: Fm asu = ", fm_asu.data().size(), \ "Fm P1 = ", fm_p1.data().size() print >>cout, "Time ( ms ) P1= ", time_p1*1000.0, " orig= ", \ time_orig*1000.0, " asu= ", time_asu*1000.0 print >>cout, "Times ( ms ) mask_asu= ", asu_mask.debug_mask_asu_time, \ " atoms_to_asu= ", asu_mask.debug_atoms_to_asu_time, \ " accessible= ", asu_mask.debug_accessible_time, \ " contact= ", asu_mask.debug_contact_time, \ " Fc= ", time_asu_sf*1000.0, \ " fft= ", asu_mask.debug_fft_time print >>cout, "Times ( ms ) orig: mask= ", time_orig_msk*1000.0, " Fc=", \ time_orig_sf*1000.0 print >>cout, "Times ( ms ) p1 : expand= ", time_p1_exp*1000.0, " mask= ", \ time_p1_msk*1000.0, " Fc=", time_p1_sf*1000.0 assert fm_asu.data().size() == fm_o.data().size() t_v1 = asu_mask.contact_surface_fraction t_v2 = blk_p1.contact_surface_fraction t_v3 = max( abs(t_v1), abs(t_v2) ) if t_v3 > 1.0E-6: t_v4 = abs( t_v1 - t_v2 ) / t_v3 else: t_v4 = 0.0 if( t_v4>1.0E-6 ): if not opts.failed_file is None: tmp_file = open(opts.failed_file, "w") print >>tmp_file, struc.as_pdb_file() tmp_file.close() raise "Not equal solvent volume" assert approx_equal( asu_mask.contact_surface_fraction, blk_p1.contact_surface_fraction) assert approx_equal( asu_mask.accessible_surface_fraction, blk_p1.accessible_surface_fraction) assert is_below_limit( value=asu_mask.accessible_surface_fraction, limit=asu_mask.contact_surface_fraction) n_compared = compare_fc(fm_asu, fm_p1, tolerance = tolerance) assert n_compared == fm_asu.data().size(), \ "N compared refls: "+str(n_compared) + " != " + str(fm_asu.data().size()) assert n_compared >0 if verbose: print cout.getvalue() # test that second calculation will produce the same results asu_mask.compute( struc.sites_frac(), radii ) fm_asu2 = asu_mask.structure_factors( fc.indices() ) fm_asu2 = fc.set().array( data = fm_asu2 ) n_compared = compare_fc(fm_asu, fm_asu2, tolerance = tolerance) assert n_compared == fm_asu.data().size(), \ "N compared refls: "+str(n_compared) + " != " + str(fm_asu.data().size()) cout.truncate(0)
def extract(file_name, crystal_symmetry, wavelength_id, crystal_id, show_details_if_error, output_r_free_label, merge_non_unique_under_symmetry, map_to_asu, remove_systematic_absences, all_miller_arrays=None, incompatible_flags_to_work_set=False, ignore_bad_sigmas=False, extend_flags=False, return_as_miller_arrays=False, log=sys.stdout): import iotbx.cif from cctbx import miller if all_miller_arrays is None: base_array_info = miller.array_info( crystal_symmetry_from_file=crystal_symmetry) all_miller_arrays = iotbx.cif.reader( file_path=file_name).build_miller_arrays( base_array_info=base_array_info) if (len(all_miller_arrays) == 0): raise Sorry( "No data arrays were found in this CIF file. Please make " + "sure that the file contains reflection data, rather than the refined " + "model.") column_labels = set() if (extend_flags): map_to_asu = True # TODO: is all_mille_arrays a dict ? If not change back for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): other_symmetry = crystal_symmetry try: crystal_symmetry = other_symmetry.join_symmetry( other_symmetry=ma.crystal_symmetry(), force=True) except AssertionError as e: str_e = str(e) from six.moves import cStringIO as StringIO s = StringIO() if "Space group is incompatible with unit cell parameters." in str_e: other_symmetry.show_summary(f=s) ma.crystal_symmetry().show_summary(f=s) str_e += "\n%s" % (s.getvalue()) raise Sorry(str_e) else: raise if (crystal_symmetry.unit_cell() is None or crystal_symmetry.space_group_info() is None): raise Sorry( "Crystal symmetry is not defined. Please use the --symmetry option." ) mtz_object = iotbx.mtz.object() \ .set_title(title="phenix.cif_as_mtz") \ .set_space_group_info(space_group_info=crystal_symmetry.space_group_info()) unit_cell = crystal_symmetry.unit_cell() mtz_crystals = {} mtz_object.set_hkl_base(unit_cell=unit_cell) from iotbx.reflection_file_utils import cif_status_flags_as_int_r_free_flags # generate list of all reflections (for checking R-free flags) from iotbx.reflection_file_utils import make_joined_set all_arrays = [] for (data_name, miller_arrays) in six.iteritems(all_miller_arrays): for ma in miller_arrays.values(): all_arrays.append(ma) complete_set = make_joined_set(all_arrays) if return_as_miller_arrays: miller_array_list = [] current_i = -1 uc = None for i, (data_name, miller_arrays) in enumerate(six.iteritems(all_miller_arrays)): for ma in miller_arrays.values(): #ma = ma.customized_copy( # crystal_symmetry=crystal_symmetry).set_info(ma.info()) if ma._space_group_info is None: ma._space_group_info = crystal_symmetry.space_group_info() labels = ma.info().labels label = get_label(miller_array=ma, output_r_free_label=output_r_free_label) if label is None: print("Can't determine output label for %s - skipping." % \ ma.info().label_string(), file=log) continue elif label.startswith(output_r_free_label): ma, _ = cif_status_flags_as_int_r_free_flags( ma, test_flag_value="f") if isinstance(ma.data(), flex.double): data_int = ma.data().iround() assert data_int.as_double().all_eq(ma.data()) ma = ma.customized_copy(data=data_int).set_info(ma.info()) elif ( (ma.is_xray_amplitude_array() or ma.is_xray_intensity_array()) and isinstance(ma.data(), flex.int)): ma = ma.customized_copy(data=ma.data().as_double()).set_info( ma.info()) crys_id = 0 for l in labels: if 'crystal_id' in l: crys_id = int(l.split('=')[-1]) break if crys_id > 0 and crystal_id is None: label += "%i" % crys_id if crystal_id is not None and crys_id > 0 and crys_id != crystal_id: continue if ma.unit_cell( ) is not None: # use symmetry file on the command line if it's None unit_cell = ma.unit_cell() if crys_id not in mtz_crystals or \ (i > current_i and unit_cell is not None and uc is not None and unit_cell.parameters() != uc.parameters()): # Ensure new mtz crystals are created if miller_array objects have different unit cells # Can happen if there are more datasets in the same cif file, like MAD datasets uc = unit_cell current_i = i # Use unique project and crystal names so that MtzGet() in cmtzlib.c picks up individual unit cells mtz_crystals[crys_id] = (mtz_object.add_crystal( name="crystal_%i" % i, project_name="project_%i" % i, unit_cell=uc), {}) crystal, datasets = mtz_crystals[crys_id] w_id = 0 for l in labels: if 'wavelength_id' in l: w_id = int(l.split('=')[-1]) break if wavelength_id is not None and w_id > 0 and w_id != wavelength_id: continue if w_id > 1 and wavelength_id is None: if (label in column_labels): label += "%i" % w_id #print "label is", label if w_id not in datasets: wavelength = ma.info().wavelength if (wavelength is None): wavelength = 0 datasets[w_id] = crystal.add_dataset(name="dataset", wavelength=wavelength) dataset = datasets[w_id] # if all sigmas for an array are set to zero either raise an error, or set sigmas to None if ma.sigmas() is not None and (ma.sigmas() == 0).count(False) == 0: if ignore_bad_sigmas: print("Warning: bad sigmas, setting sigmas to None.", file=log) ma.set_sigmas(None) else: raise Sorry("""Bad sigmas: all sigmas are equal to zero. Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file.""" ) if not ma.is_unique_set_under_symmetry(): if merge_non_unique_under_symmetry: print("Warning: merging non-unique data", file=log) if (label.startswith(output_r_free_label) and incompatible_flags_to_work_set): merging = ma.merge_equivalents( incompatible_flags_replacement=0) if merging.n_incompatible_flags > 0: print("Warning: %i reflections were placed in the working set " \ "because of incompatible flags between equivalents." %( merging.n_incompatible_flags), file=log) else: try: merging = ma.merge_equivalents() except Sorry as e: if ("merge_equivalents_exact: incompatible" in str(e)): raise Sorry( str(e) + " for %s" % ma.info().labels[-1] + "\n" + "Add --incompatible_flags_to_work_set to command line " "arguments to place incompatible flags to working set." ) raise ma = merging.array().customized_copy( crystal_symmetry=ma).set_info(ma.info()) elif return_as_miller_arrays: # allow non-unique set pass else: n_all = ma.indices().size() sel_unique = ma.unique_under_symmetry_selection() sel_dup = ~flex.bool(n_all, sel_unique) n_duplicate = sel_dup.count(True) n_uus = sel_unique.size() msg = ( "Miller indices not unique under symmetry: " + file_name + \ "(%d redundant indices out of %d)" % (n_all-n_uus, n_all) + "Add --merge to command arguments to force merging data.") if (show_details_if_error): print(msg) ma.show_comprehensive_summary(prefix=" ") ma.map_to_asu().sort().show_array(prefix=" ") raise Sorry(msg) if (map_to_asu): ma = ma.map_to_asu().set_info(ma.info()) if (remove_systematic_absences): ma = ma.remove_systematic_absences() if (label.startswith(output_r_free_label) and complete_set is not None): n_missing = len(complete_set.lone_set(other=ma).indices()) if (n_missing > 0): if (extend_flags): from cctbx import r_free_utils # determine flag values fvals = list(set(ma.data())) print("fvals", fvals) fval = None if (len(fvals) == 1): fval = fvals[0] elif (len(fvals) == 2): f1 = (ma.data() == fvals[0]).count(True) / ma.data().size() f2 = (ma.data() == fvals[1]).count(True) / ma.data().size() if (f1 < f2): fval = fvals[0] else: fval = fvals[1] elif (len(fvals) == 0): fval = None else: fval = 0 if (not fval in fvals): raise Sorry( "Cannot determine free-R flag value.") # if (fval is not None): ma = r_free_utils.extend_flags( r_free_flags=ma, test_flag_value=fval, array_label=label, complete_set=complete_set, preserve_input_values=True, allow_uniform_flags=True, log=sys.stdout) else: ma = None else: libtbx.warn(( "%d reflections do not have R-free flags in the " + "array '%s' - this may " + "cause problems if you try to use the MTZ file for refinement " + "or map calculation. We recommend that you extend the flags " + "to cover all reflections (--extend_flags on the command line)." ) % (n_missing, label)) # Get rid of fake (0,0,0) reflection in some CIFs if (ma is not None): ma = ma.select_indices(indices=flex.miller_index( ((0, 0, 0), )), negate=True).set_info(ma.info()) if return_as_miller_arrays: miller_array_list.append(ma) continue # don't make a dataset dec = None if ("FWT" in label): dec = iotbx.mtz.ccp4_label_decorator() column_types = None if ("PHI" in label or "PHWT" in label) and (ma.is_real_array()): column_types = "P" elif (label.startswith("DANO") and ma.is_real_array()): if (ma.sigmas() is not None): column_types = "DQ" else: column_types = "D" label_base = label i = 1 while label in column_labels: label = label_base + "-%i" % (i) i += 1 if (ma is not None): column_labels.add(label) if ("FWT-1" in label): dec = None dataset.add_miller_array(ma, column_root_label=label, label_decorator=dec, column_types=column_types) if return_as_miller_arrays: return miller_array_list else: return mtz_object
w_id = 0 for l in labels: if 'wavelength_id' in l: w_id = int(l.split('=')[-1]) break if wavelength_id is not None and w_id > 0 and w_id != wavelength_id: continue if w_id > 1 and wavelength_id is None: if (label in column_labels): label += "%i" % w_id #print "label is", label if w_id not in datasets: wavelength = ma.info().wavelength if (wavelength is None): wavelength = 0 datasets[w_id] = crystal.add_dataset(name="dataset", wavelength=wavelength) dataset = datasets[w_id] # if all sigmas for an array are set to zero either raise an error, or set sigmas to None if ma.sigmas() is not None and (ma.sigmas() == 0).count(False) == 0: if ignore_bad_sigmas: print >> log, "Warning: bad sigmas, setting sigmas to None." ma.set_sigmas(None) else: raise Sorry("""Bad sigmas: all sigmas are equal to zero. Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file.""" ) if not ma.is_unique_set_under_symmetry(): if merge_non_unique_under_symmetry: print >> log, "Warning: merging non-unique data" if (label.startswith(output_r_free_label)
for l in labels: if 'wavelength_id' in l: w_id = int(l.split('=')[-1]) break if wavelength_id is not None and w_id > 0 and w_id != wavelength_id: continue if w_id > 1 and wavelength_id is None: if (label in column_labels) : label += "%i" %w_id #print "label is", label if w_id not in datasets: wavelength = ma.info().wavelength if (wavelength is None) : wavelength = 0 datasets[w_id] = crystal.add_dataset( name="dataset", wavelength=wavelength) dataset = datasets[w_id] # if all sigmas for an array are set to zero either raise an error, or set sigmas to None if ma.sigmas() is not None and (ma.sigmas() == 0).count(False) == 0: if ignore_bad_sigmas: print >> log, "Warning: bad sigmas, setting sigmas to None." ma.set_sigmas(None) else: raise Sorry( """Bad sigmas: all sigmas are equal to zero. Add --ignore_bad_sigmas to command arguments to leave out sigmas from mtz file.""") if not ma.is_unique_set_under_symmetry(): if merge_non_unique_under_symmetry: print >> log, "Warning: merging non-unique data" if (label.startswith(output_r_free_label)