def cctbx_miller_fft_map_as_xplor_map( self, file_name, title_lines=["cctbx.miller.fft_map"], gridding_first=None, gridding_last=None, average=None, standard_deviation=None): if (gridding_first is None): gridding_first = (0,0,0) if (gridding_last is None): gridding_last = self.n_real() gridding_ = gridding( n=self.n_real(), first=gridding_first, last=gridding_last) data = self.real_map() if (average is None or standard_deviation is None): statistics = maptbx.statistics(data) if (average is None): average = statistics.mean() if (standard_deviation is None): standard_deviation = statistics.sigma() writer( file_name=file_name, title_lines=title_lines, unit_cell=self.unit_cell(), gridding=gridding_, data=data, is_p1_cell=True, average=average, standard_deviation=standard_deviation)
def exercise(args,use_mrcfile=None): exercise_with_tst_input_map(use_mrcfile=use_mrcfile) exercise_with_tst_input_map_2(use_mrcfile=use_mrcfile) for file_name in args: print("\n",file_name,"use_mrcfile=",use_mrcfile) if use_mrcfile: m = iotbx.mrcfile.map_reader(file_name=file_name) else: m = iotbx.ccp4_map.map_reader(file_name=file_name) print("header_min: ", m.header_min) print("header_max: ", m.header_max) print("header_mean:", m.header_mean) print("header_rms: ", m.header_rms) print("unit cell grid:", m.unit_cell_grid) print("unit cell parameters:", m.unit_cell_parameters) print("space group number: ", m.space_group_number) print("map origin:", m.map_data().origin()) print("map grid: ", m.map_data().all()) map_stats = maptbx.statistics(m.data) assert approx_equal(map_stats.min(), m.header_min) assert approx_equal(map_stats.max(), m.header_max) assert approx_equal(map_stats.mean(), m.header_mean) if (m.header_rms != 0): assert approx_equal(map_stats.sigma(), m.header_rms) print()
def calculate_exp_i_two_phi_peaks(xray_structure, d_min, min_peak_distance, max_reduced_peaks): f_h = xray_structure.structure_factors( anomalous_flag=False, d_min=d_min).f_calc() two_i_phi_h = miller.array( miller_set=f_h, data=flex.polar(1, flex.arg(f_h.data())*2)) fft_map = two_i_phi_h.fft_map( d_min=d_min, symmetry_flags=maptbx.use_space_group_symmetry) real_map = fft_map.real_map() real_map = maptbx.copy(real_map, flex.grid(real_map.focus())) stats = maptbx.statistics(real_map) if (stats.max() != 0): real_map /= abs(stats.max()) grid_tags = maptbx.grid_tags(real_map.focus()) grid_tags.build(fft_map.space_group_info().type(), fft_map.symmetry_flags()) grid_tags.verify(real_map) peak_list = maptbx.peak_list( data=real_map, tags=grid_tags.tag_array(), max_peaks=10*max_reduced_peaks, interpolate=True) reduced_peaks = peak_cluster_reduction( crystal_symmetry=xray_structure, peak_list=peak_list, min_peak_distance=min_peak_distance, max_reduced_peaks=max_reduced_peaks) return reduced_peaks
def recycle(): for n, first, last in [[(5, 3, 4), (0, 0, 0), (3, 5, 6)], [(4, 3, 5), (-1, -3, 4), (6, 4, 5)], [(3, 4, 5), (-2, 3, 0), (-2, 3, 0)], [(3, 4, 5), (-2, 3, 0), (-2, 3, 3)], [(3, 4, 5), (-2, 3, 0), (-2, 8, 0)], [(3, 4, 5), (-2, 3, 0), (-2, 9, 0)], [(3, 4, 5), (-2, 3, 0), (3, 3, 0)], [(3, 4, 5), (-2, 3, 0), (4, 3, 0)]]: gridding = iotbx.xplor.map.gridding(n=n, first=first, last=last) flex_grid = gridding.as_flex_grid() data = 20000 * flex.random_double(size=flex_grid.size_1d()) - 10000 data.resize(flex_grid) stats = maptbx.statistics(data) iotbx.xplor.map.writer(file_name="tmp.map", title_lines=["regression test"], unit_cell=uctbx.unit_cell( (10, 20, 30, 80, 90, 100)), gridding=gridding, data=data, average=stats.mean(), standard_deviation=stats.sigma()) read = iotbx.xplor.map.reader(file_name="tmp.map") assert read.title_lines == ["regression test"] assert read.gridding.n == n assert read.gridding.first == first assert read.gridding.last == last assert read.unit_cell.is_similar_to( uctbx.unit_cell((10, 20, 30, 80, 90, 100))) assert eps_eq(read.average, stats.mean(), eps=1.e-4) assert eps_eq(read.standard_deviation, stats.sigma(), eps=1.e-4) assert read.data.origin() == first assert read.data.last(False) == last assert read.data.focus() == data.focus() assert eps_eq(read.data, data, eps=1.e-4)
def run_fast_nv1995(f_obs, f_calc_fixed, f_calc_p1, symmetry_flags, gridding, grid_tags, verbose): if (f_calc_fixed is None): f_part = flex.complex_double() else: f_part = f_calc_fixed.data() assert f_obs.anomalous_flag() == f_calc_p1.anomalous_flag() fast_nv1995 = translation_search.fast_nv1995( gridding=gridding, space_group=f_obs.space_group(), anomalous_flag=f_obs.anomalous_flag(), miller_indices_f_obs=f_obs.indices(), f_obs=f_obs.data(), f_part=f_part, miller_indices_p1_f_calc=f_calc_p1.indices(), p1_f_calc=f_calc_p1.data()) assert fast_nv1995.target_map().all() == gridding map_stats = maptbx.statistics(fast_nv1995.target_map()) if (0 or verbose): map_stats.show_summary() grid_tags.build(f_obs.space_group_info().type(), symmetry_flags) assert grid_tags.n_grid_misses() == 0 assert grid_tags.verify(fast_nv1995.target_map()) peak_list = maptbx.peak_list( data=fast_nv1995.target_map(), tags=grid_tags.tag_array(), peak_search_level=1, max_peaks=10, interpolate=True) if (0 or verbose): print "gridding:", gridding for i,site in enumerate(peak_list.sites()): print "(%.4f,%.4f,%.4f)" % site, "%.6g" % peak_list.heights()[i] assert approx_equal(map_stats.max(), flex.max(peak_list.grid_heights())) return peak_list
def cctbx_miller_fft_map_as_xplor_map( self, file_name, title_lines=["cctbx.miller.fft_map"], gridding_first=None, gridding_last=None, average=None, standard_deviation=None, ): if gridding_first is None: gridding_first = (0, 0, 0) if gridding_last is None: gridding_last = self.n_real() gridding_ = gridding(n=self.n_real(), first=gridding_first, last=gridding_last) data = self.real_map() if average is None or standard_deviation is None: statistics = maptbx.statistics(data) if average is None: average = statistics.mean() if standard_deviation is None: standard_deviation = statistics.sigma() writer( file_name=file_name, title_lines=title_lines, unit_cell=self.unit_cell(), gridding=gridding_, data=data, is_p1_cell=True, average=average, standard_deviation=standard_deviation, )
def read_xplor(file_name): a = iotbx.xplor.map.reader(file_name=file_name) assert a.title_lines == [ ' REMARKS FILENAME="cns.map"', ' REMARKS DATE:15-May-2004 02:15:56 created by user: rwgk', ' REMARKS VERSION:1.1' ] assert a.gridding.n == (24, 24, 40) assert a.gridding.first == (1, -4, -6) assert a.gridding.last == (10, 0, -3) assert a.unit_cell.is_similar_to( uctbx.unit_cell( (7.41407939496, 7.41407939496, 12.6039349714, 90, 90, 120))) assert approx_equal(a.average, -0.5274E-10) assert approx_equal(a.standard_deviation, 0.1792E+00) assert a.data.origin() == (1, -4, -6) assert a.data.last(False) == (10, 0, -3) assert a.data.focus() == (11, 1, -2) assert approx_equal(a.data[:10], [ -2.63210E-01, -4.36970E-01, -5.71930E-01, -6.09230E-01, -2.07220E-01, -4.15100E-01, -6.11970E-01, -7.13380E-01, -2.05500E-01, -3.60990E-01 ]) assert approx_equal(a.data[40:50], [ -4.08540E-01, -4.77320E-01, -5.16210E-01, -4.84100E-01, -2.93930E-01, -3.58500E-01, -4.40170E-01, -4.92110E-01, -2.19660E-01, -2.40570E-01 ]) assert approx_equal(a.data[-10:], [ -2.13550E-01, -4.87250E-01, -4.51260E-02, -2.13540E-01, -4.57070E-01, -6.38040E-01, -3.51570E-01, -5.98030E-01, -7.60270E-01, -7.62940E-01 ]) stats = maptbx.statistics(a.data) assert approx_equal(stats.min(), -0.78098) assert approx_equal(stats.max(), 0.27233) assert approx_equal(stats.mean(), -0.363687) assert approx_equal(stats.sigma(), 0.20679) s = StringIO() a.show_summary(out=s, prefix="$") assert not show_diff( s.getvalue(), """\ $Title lines: 3 $ REMARKS FILENAME="cns.map" $ REMARKS DATE:15-May-2004 02:15:56 created by user: rwgk $ REMARKS VERSION:1.1 $Gridding: $ n: (24, 24, 40) $ first: (1, -4, -6) $ last: (10, 0, -3) $Total number of data points: 200 $ min: -0.78098 $ max: 0.27233 $ mean: -0.363687 $ sigma: 0.20679 """) return a
def read_xplor(file_name): a = iotbx.xplor.map.reader(file_name=file_name) assert a.title_lines == [ ' REMARKS FILENAME="cns.map"', ' REMARKS DATE:15-May-2004 02:15:56 created by user: rwgk', ' REMARKS VERSION:1.1'] assert a.gridding.n == (24,24,40) assert a.gridding.first == (1,-4,-6) assert a.gridding.last == (10,0,-3) assert a.unit_cell.is_similar_to( uctbx.unit_cell((7.41407939496,7.41407939496,12.6039349714,90,90,120))) assert approx_equal(a.average, -0.5274E-10) assert approx_equal(a.standard_deviation, 0.1792E+00) assert a.data.origin() == (1,-4,-6) assert a.data.last(False) == (10,0,-3) assert a.data.focus() == (11,1,-2) assert approx_equal(a.data[:10], [-2.63210E-01, -4.36970E-01, -5.71930E-01, -6.09230E-01, -2.07220E-01, -4.15100E-01, -6.11970E-01, -7.13380E-01, -2.05500E-01, -3.60990E-01]) assert approx_equal(a.data[40:50], [-4.08540E-01, -4.77320E-01, -5.16210E-01, -4.84100E-01, -2.93930E-01, -3.58500E-01, -4.40170E-01, -4.92110E-01, -2.19660E-01, -2.40570E-01]) assert approx_equal(a.data[-10:], [-2.13550E-01, -4.87250E-01, -4.51260E-02, -2.13540E-01, -4.57070E-01, -6.38040E-01, -3.51570E-01, -5.98030E-01, -7.60270E-01, -7.62940E-01]) stats = maptbx.statistics(a.data) assert approx_equal(stats.min(), -0.78098) assert approx_equal(stats.max(), 0.27233) assert approx_equal(stats.mean(), -0.363687) assert approx_equal(stats.sigma(), 0.20679) s = StringIO() a.show_summary(out=s, prefix="$") assert not show_diff(s.getvalue(), """\ $Title lines: 3 $ REMARKS FILENAME="cns.map" $ REMARKS DATE:15-May-2004 02:15:56 created by user: rwgk $ REMARKS VERSION:1.1 $Gridding: $ n: (24, 24, 40) $ first: (1, -4, -6) $ last: (10, 0, -3) $Total number of data points: 200 $ min: -0.78098 $ max: 0.27233 $ mean: -0.363687 $ sigma: 0.20679 """) return a
def show_summary(self, out=None, prefix=""): if (out is None): out = sys.stdout print >> out, prefix+"Title lines:", len(self.title_lines) for line in self.title_lines: print >> out, prefix+" "+line.rstrip() g = self.gridding print >> out, prefix+"Gridding:" print >> out, prefix+" n: ", g.n print >> out, prefix+" first:", g.first print >> out, prefix+" last: ", g.last print >> out, prefix+"Total number of data points:", self.data.size() stats = maptbx.statistics(self.data) print >> out, prefix+" min: %.6g" % stats.min() print >> out, prefix+" max: %.6g" % stats.max() print >> out, prefix+" mean: %.6g" % stats.mean() print >> out, prefix+" sigma: %.6g" % stats.sigma()
def show_summary(self, out=None, prefix=""): if out is None: out = sys.stdout print >> out, prefix + "Title lines:", len(self.title_lines) for line in self.title_lines: print >> out, prefix + " " + line.rstrip() g = self.gridding print >> out, prefix + "Gridding:" print >> out, prefix + " n: ", g.n print >> out, prefix + " first:", g.first print >> out, prefix + " last: ", g.last print >> out, prefix + "Total number of data points:", self.data.size() stats = maptbx.statistics(self.data) print >> out, prefix + " min: %.6g" % stats.min() print >> out, prefix + " max: %.6g" % stats.max() print >> out, prefix + " mean: %.6g" % stats.mean() print >> out, prefix + " sigma: %.6g" % stats.sigma()
def __init__(self, fft_map=None, unit_cell=None, raw_map=None, periodic=False, positive_iso_level=None, iso_level_positive_range_fraction=None, negative_iso_level=None, iso_level_negative_range_fraction=None, wires=True, **kwds): if fft_map is not None: unit_cell = fft_map.unit_cell() super(map_viewer, self).__init__(unit_cell=unit_cell, light_position=(-1, 1, 1, 0), **kwds) assert (fft_map is not None or (unit_cell is not None and raw_map is not None)) assert (positive_iso_level is not None or iso_level_positive_range_fraction is not None) assert (bool(negative_iso_level is not None) ^ bool(iso_level_negative_range_fraction is None)) if fft_map is not None: self.rho = fft_map.real_map() else: self.rho = raw_map density_stats = maptbx.statistics(self.rho) self.min_density = density_stats.min() self.max_density = density_stats.max() if positive_iso_level is not None: self.positive_iso_level = positive_iso_level else: self.positive_iso_level = ( iso_level_positive_range_fraction*self.max_density) if (negative_iso_level is None and iso_level_negative_range_fraction is not None): negative_iso_level = ( iso_level_negative_range_fraction*self.min_density) self.negative_iso_level = negative_iso_level self.periodic = periodic self.wires = wires self.compute_triangulation()
def exercise(args): exercise_with_tst_input_map() for file_name in args: print file_name m = iotbx.ccp4_map.map_reader(file_name=file_name) print "header_min: ", m.header_min print "header_max: ", m.header_max print "header_mean:", m.header_mean print "header_rms: ", m.header_rms print "unit cell grid:", m.unit_cell_grid print "unit cell parameters:", m.unit_cell_parameters print "space group number: ", m.space_group_number print "map origin:", m.data.origin() print "map grid: ", m.data.all() map_stats = maptbx.statistics(m.data) assert approx_equal(map_stats.min(), m.header_min) assert approx_equal(map_stats.max(), m.header_max) assert approx_equal(map_stats.mean(), m.header_mean) if (m.header_rms != 0): assert approx_equal(map_stats.sigma(), m.header_rms) print
def recycle(): for n,first,last in [[(5,3,4),(0,0,0),(3,5,6)], [(4,3,5),(-1,-3,4),(6,4,5)], [(3,4,5),(-2,3,0),(-2,3,0)], [(3,4,5),(-2,3,0),(-2,3,3)], [(3,4,5),(-2,3,0),(-2,8,0)], [(3,4,5),(-2,3,0),(-2,9,0)], [(3,4,5),(-2,3,0),(3,3,0)], [(3,4,5),(-2,3,0),(4,3,0)]]: gridding = iotbx.xplor.map.gridding( n=n, first=first, last=last) flex_grid = gridding.as_flex_grid() data = 20000*flex.random_double(size=flex_grid.size_1d())-10000 data.resize(flex_grid) stats = maptbx.statistics(data) iotbx.xplor.map.writer( file_name="tmp.map", title_lines=["regression test"], unit_cell=uctbx.unit_cell((10,20,30,80,90,100)), gridding=gridding, data=data, average=stats.mean(), standard_deviation=stats.sigma()) read = iotbx.xplor.map.reader(file_name="tmp.map") assert read.title_lines == ["regression test"] assert read.gridding.n == n assert read.gridding.first == first assert read.gridding.last == last assert read.unit_cell.is_similar_to( uctbx.unit_cell((10,20,30,80,90,100))) assert eps_eq(read.average, stats.mean(), eps=1.e-4) assert eps_eq(read.standard_deviation, stats.sigma(), eps=1.e-4) assert read.data.origin() == first assert read.data.last(False) == last assert read.data.focus() == data.focus() assert eps_eq(read.data, data, eps=1.e-4)
def run_fast_terms(structure_fixed, structure_p1, f_obs, f_calc_fixed, f_calc_p1, symmetry_flags, gridding, grid_tags, n_sample_grid_points=10, test_origin=False, verbose=0): if (f_calc_fixed is None): f_part = flex.complex_double() else: f_part = f_calc_fixed.data() m = flex.double() for i in xrange(f_obs.indices().size()): m.append(random.random()) assert f_obs.anomalous_flag() == f_calc_p1.anomalous_flag() fast_terms = translation_search.fast_terms( gridding=gridding, anomalous_flag=f_obs.anomalous_flag(), miller_indices_p1_f_calc=f_calc_p1.indices(), p1_f_calc=f_calc_p1.data()) for squared_flag in (False, True): map = fast_terms.summation( space_group=f_obs.space_group(), miller_indices_f_obs=f_obs.indices(), m=m, f_part=f_part, squared_flag=squared_flag).fft().accu_real_copy() assert map.all() == gridding map_stats = maptbx.statistics(map) if (0 or verbose): map_stats.show_summary() grid_tags.build(f_obs.space_group_info().type(), symmetry_flags) assert grid_tags.n_grid_misses() == 0 assert grid_tags.verify(map) for i_sample in xrange(n_sample_grid_points): run_away_counter = 0 while 1: run_away_counter += 1 assert run_away_counter < 1000 if (i_sample == 0 and test_origin): grid_point = [0,0,0] else: grid_point = [random.randrange(g) for g in gridding] grid_site = [float(x)/g for x,g in zip(grid_point,gridding)] structure_shifted = structure_fixed.deep_copy_scatterers() assert structure_shifted.special_position_indices().size() == 0 structure_shifted.add_scatterers( scatterers=structure_p1.apply_shift(grid_site).scatterers()) if (structure_shifted.special_position_indices().size() == 0): break if (test_origin): assert i_sample != 0 i_grid = flex.norm(f_obs.structure_factors_from_scatterers( xray_structure=structure_shifted, algorithm="direct").f_calc().data()) if (squared_flag): p = 4 else: p = 2 map_value = map[grid_point] * f_obs.space_group().n_ltr()**p if (not squared_flag): sum_m_i_grid = flex.sum(m * i_grid) else: sum_m_i_grid = flex.sum(m * flex.pow2(i_grid)) assert "%.6g" % sum_m_i_grid == "%.6g" % map_value, ( sum_m_i_grid, map_value)
def __init__(self, map_1, xray_structure, fft_map, atom_radius, hydrogen_atom_radius, model_i, number_previous_scatters, ignore_hd=False, residue_detail=True, selection=None, pdb_hierarchy=None): self.xray_structure = xray_structure self.selection = selection self.pdb_hierarchy = pdb_hierarchy self.result = [] self.map_1_size = map_1.size() self.map_1_stat = maptbx.statistics(map_1) self.atoms_with_labels = None self.residue_detail = residue_detail self.model_i = model_i if (pdb_hierarchy is not None): self.atoms_with_labels = list(pdb_hierarchy.atoms_with_labels()) scatterers = self.xray_structure.scatterers() sigma_occ = flex.double() if (self.selection is None): self.selection = flex.bool(scatterers.size(), True) real_map_unpadded = fft_map.real_map_unpadded() sites_cart = self.xray_structure.sites_cart() if not self.residue_detail: self.gifes = [ None, ] * scatterers.size() self._result = [ None, ] * scatterers.size() # atom_radii = flex.double(scatterers.size(), atom_radius) for i_seq, sc in enumerate(scatterers): if (self.selection[i_seq]): if (sc.element_symbol().strip().lower() in ["h", "d"]): atom_radii[i_seq] = hydrogen_atom_radius # for i_seq, site_cart in enumerate(sites_cart): if (self.selection[i_seq]): sel = maptbx.grid_indices_around_sites( unit_cell=self.xray_structure.unit_cell(), fft_n_real=real_map_unpadded.focus(), fft_m_real=real_map_unpadded.all(), sites_cart=flex.vec3_double([site_cart]), site_radii=flex.double([atom_radii[i_seq]])) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = map_1.eight_point_interpolation( scatterers[i_seq].site) sigma_occ.append(ed1) a = None if (self.atoms_with_labels is not None): a = self.atoms_with_labels[i_seq] self._result[i_seq] = group_args(atom=a, m1=m1, ed1=ed1, xyz=site_cart) self.xray_structure.set_occupancies(sigma_occ) ### For testing other residue averaging options residues = self.extract_residues( model_i=model_i, number_previous_scatters=number_previous_scatters) self.xray_structure.residue_selections = residues # Residue detail if self.residue_detail: assert self.pdb_hierarchy is not None residues = self.extract_residues( model_i=model_i, number_previous_scatters=number_previous_scatters) self.gifes = [ None, ] * len(residues) self._result = [ None, ] * len(residues) for i_seq, residue in enumerate(residues): residue_sites_cart = sites_cart.select(residue.selection) if 0: print(i_seq, list(residue.selection)) # DEBUG sel = maptbx.grid_indices_around_sites( unit_cell=self.xray_structure.unit_cell(), fft_n_real=real_map_unpadded.focus(), fft_m_real=real_map_unpadded.all(), sites_cart=residue_sites_cart, site_radii=flex.double(residue.selection.size(), atom_radius)) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = flex.double() for i_seq_r in residue.selection: ed1.append( map_1.eight_point_interpolation( scatterers[i_seq_r].site)) self._result[i_seq] = \ group_args(residue = residue, m1 = m1, ed1 = flex.mean(ed1), xyz=residue_sites_cart.mean(), n_atoms=residue_sites_cart.size()) residue_scatterers = scatterers.select(residue.selection) residue_ed1 = flex.double() for n, scatter in enumerate(residue_scatterers): if ignore_hd: if scatter.element_symbol() not in ['H', 'D']: residue_ed1.append(ed1[n]) else: residue_ed1.append(ed1[n]) for x in range(ed1.size()): sigma_occ.append(flex.mean(residue_ed1)) self.xray_structure.set_occupancies(sigma_occ) self.xray_structure.residue_selections = residues del map_1
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb'%command_name) .option(None, "--show_defaults", action="store_true", help="Show list of parameters.") ).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args = args, log = sys.stdout, master_params = master_phil) params = processed_args.params if(params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1 : raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register( label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open(pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.log', "w") self.log.replace_stringio( old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out = self.log) params.show(out = self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry("Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print >> self.log, 'Reflection file : ', processed_args.reflection_file_names[0] utils.print_header("Model and data statistics", out = self.log) rfs = reflection_file_server( crystal_symmetry = crystal_symmetry, reflection_files = processed_args.reflection_files, log = self.log) parameters = utils.data_and_flags_master_params().extract() determine_data_and_flags_result = utils.determine_data_and_flags( reflection_file_server = rfs, parameters = parameters, data_parameter_scope = "refinement.input.xray_data", flags_parameter_scope = "refinement.input.xray_data.r_free_flags", data_description = "X-ray data", keep_going = True, log = self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if(r_free_flags is None): r_free_flags=f_obs.array(data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print >> self.log, 'Number of structure in ensemble : ', number_structures # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file(self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl" : if input_file.file_object.file_type() != "ccp4_mtz" : raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, map_coeffs_1 = map_coeffs_1, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, log = self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_vs_' + self.params.ensemble_sigma_map_input.replace('.mtz','') + '_pensemble.pdb' write_ensemble_pdb(filename = filename, xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry("Please supply high resolution limit or input mtz file.") dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print >> self.log, 'Supplied mtz used to determine high and low resolution cuttoffs' dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs(ens_pdb_xrs.structure_factors(d_min = dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array(data=flex.bool(dummy_fobs.data().size(),False)) # fmodel = utils.fmodel_simple( scattering_table = "wk1995", xray_structures = [ens_pdb_xrs], f_obs = dummy_fobs, target_name = 'ls', bulk_solvent_and_scaling = False, r_free_flags = r_free_flags ) f_calc_ave = fmodel.f_calc().array(data = fmodel.f_calc().data()*0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure = ens_pdb_xrs, update_f_calc = True, update_f_mask = False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print >> self.log, 'Model :', model+1 print >> self.log, "\nStructure vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type = 'Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write(file_name = str(model+1)+"_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total / number_structures)) print >> self.log, "\nEnsemble vs real Fobs (no bulk solvent or scaling)" print >> self.log, 'Rwork : %5.4f '%fmodel.r_work() print >> self.log, 'Rfree : %5.4f '%fmodel.r_free() print >> self.log, 'K1 : %5.4f '%fmodel.scale_k1() # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients(map_type = 'Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset(column_root_label ='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name = "aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print >> self.log, "<Fcalc> Map Stats :" fcalc_ave_map_stats.show_summary(f = self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print >> self.log, '\n\nModel : ', model+1 # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print >> self.log, "Fcalc map stats :" fcalc_map_stats.show_summary(f = self.log) xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_map, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma(ens_pdb_hierarchy = ens_pdb_hierarchy, ens_pdb_xrs = ens_pdb_xrs, fft_map_1 = fcalc_ave_map_coeffs, model_i = model, residue_detail = self.params.residue_detail, ignore_hd = self.params.ignore_hd, number_previous_scatters = number_previous_scatters, log = self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n,p in enumerate(prob): print >> self.log, ' {0:5d} {1:5.3f}'.format(n,p) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print >> self.log, 'Model probability stats :' print >> self.log, prob.min_max_mean().show() print >> self.log, ' Count < 0.0 : ', prob_lss_zero.count(True) print >> self.log, ' Count > 1.0 : ', prob_grt_one.count(True) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb(filename = pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list, ens_pdb_hierarchy = ens_pdb_hierarchy ) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0,0.975,0.95,0.9,0.8,0.6,0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[int(number_atoms * percentile)-1] cutoff_selections = flex.bool(nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected(cutoff_selections, 0.0) #XXX Debug if False: print '\nDebug' for x in xrange(len(cutoff_selections)): print cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x] print percentile print percentile_prob_cutoff print cutoff_selections.count(True) print cutoff_selections.size() print cutoff_nll_occ.count(0.0) print 'Count q = 1 : ', cutoff_nll_occ.count(1.0) print 'Count scatterers size : ', cutoff_nll_occ.size() xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure = xrs, update_f_calc = True, update_f_mask = True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered = fmodel.f_masks()[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data().deep_copy() f_mask_ave_total_reordered += fmodel.f_masks()[0].data().deep_copy() cntr+=1 fmodel.update(f_calc = f_calc_ave.array(f_calc_ave_total_reordered / cntr).deep_copy(), f_mask = f_calc_ave.array(f_mask_ave_total_reordered / cntr).deep_copy() ) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free()]) # Order models by nll and print summary print >> self.log, '\nModels ranked by nll <Fcalc> R-factors recalculated' print >> self.log, 'Percentile cutoff : {0:5.3f}'.format(percentile) xrs_list_sorted_nll = [] print >> self.log, ' | NLL <Rw> <Rf> Ens Model' for info in print_list: print >> self.log, ' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'.format( info[0], info[1], info[3], info[4], info[2]+1, ) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb(filename = 'nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb','') + '_pensemble.pdb', xrs_list = xrs_list_sorted_nll, ens_pdb_hierarchy = ens_pdb_hierarchy )
def __init__(self, map_1, xray_structure, fft_map, atom_radius, hydrogen_atom_radius, model_i, number_previous_scatters, ignore_hd = False, residue_detail = True, selection = None, pdb_hierarchy = None): self.xray_structure = xray_structure self.selection = selection self.pdb_hierarchy = pdb_hierarchy self.result = [] self.map_1_size = map_1.size() self.map_1_stat = maptbx.statistics(map_1) self.atoms_with_labels = None self.residue_detail = residue_detail self.model_i = model_i if(pdb_hierarchy is not None): self.atoms_with_labels = list(pdb_hierarchy.atoms_with_labels()) scatterers = self.xray_structure.scatterers() sigma_occ = flex.double() if(self.selection is None): self.selection = flex.bool(scatterers.size(), True) real_map_unpadded = fft_map.real_map_unpadded() sites_cart = self.xray_structure.sites_cart() if not self.residue_detail: self.gifes = [None,]*scatterers.size() self._result = [None,]*scatterers.size() # atom_radii = flex.double(scatterers.size(), atom_radius) for i_seq, sc in enumerate(scatterers): if(self.selection[i_seq]): if(sc.element_symbol().strip().lower() in ["h","d"]): atom_radii[i_seq] = hydrogen_atom_radius # for i_seq, site_cart in enumerate(sites_cart): if(self.selection[i_seq]): sel = maptbx.grid_indices_around_sites( unit_cell = self.xray_structure.unit_cell(), fft_n_real = real_map_unpadded.focus(), fft_m_real = real_map_unpadded.all(), sites_cart = flex.vec3_double([site_cart]), site_radii = flex.double([atom_radii[i_seq]])) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = map_1.eight_point_interpolation(scatterers[i_seq].site) sigma_occ.append(ed1) a = None if(self.atoms_with_labels is not None): a = self.atoms_with_labels[i_seq] self._result[i_seq] = group_args(atom = a, m1 = m1, ed1 = ed1, xyz=site_cart) self.xray_structure.set_occupancies(sigma_occ) ### For testing other residue averaging options residues = self.extract_residues(model_i = model_i, number_previous_scatters = number_previous_scatters) self.xray_structure.residue_selections = residues # Residue detail if self.residue_detail: assert self.pdb_hierarchy is not None residues = self.extract_residues(model_i = model_i, number_previous_scatters = number_previous_scatters) self.gifes = [None,]*len(residues) self._result = [None,]*len(residues) for i_seq, residue in enumerate(residues): residue_sites_cart = sites_cart.select(residue.selection) if 0: print i_seq, list(residue.selection) # DEBUG sel = maptbx.grid_indices_around_sites( unit_cell = self.xray_structure.unit_cell(), fft_n_real = real_map_unpadded.focus(), fft_m_real = real_map_unpadded.all(), sites_cart = residue_sites_cart, site_radii = flex.double(residue.selection.size(), atom_radius)) self.gifes[i_seq] = sel m1 = map_1.select(sel) ed1 = flex.double() for i_seq_r in residue.selection: ed1.append(map_1.eight_point_interpolation(scatterers[i_seq_r].site)) self._result[i_seq] = \ group_args(residue = residue, m1 = m1, ed1 = flex.mean(ed1), xyz=residue_sites_cart.mean(), n_atoms=residue_sites_cart.size()) residue_scatterers = scatterers.select(residue.selection) residue_ed1 = flex.double() for n,scatter in enumerate(residue_scatterers): if ignore_hd: if scatter.element_symbol() not in ['H', 'D']: residue_ed1.append(ed1[n]) else: residue_ed1.append(ed1[n]) for x in range(ed1.size()): sigma_occ.append(flex.mean(residue_ed1)) self.xray_structure.set_occupancies(sigma_occ) self.xray_structure.residue_selections = residues del map_1
def run(self, args, command_name, out=sys.stdout): command_line = (iotbx_option_parser( usage="%s [options]" % command_name, description='Example: %s data.mtz data.mtz ref_model.pdb' % command_name).option( None, "--show_defaults", action="store_true", help="Show list of parameters.")).process(args=args) cif_file = None processed_args = utils.process_command_line_args( args=args, log=sys.stdout, master_params=master_phil) params = processed_args.params if (params is None): params = master_phil self.params = params.extract().ensemble_probability pdb_file_names = processed_args.pdb_file_names if len(pdb_file_names) != 1: raise Sorry("Only one PDB structure may be used") pdb_file = file_reader.any_file(pdb_file_names[0]) self.log = multi_out() self.log.register(label="stdout", file_object=sys.stdout) self.log.register(label="log_buffer", file_object=StringIO(), atexit_send_to=None) sys.stderr = self.log log_file = open( pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.log', "w") self.log.replace_stringio(old_label="log_buffer", new_label="log", new_file_object=log_file) utils.print_header(command_name, out=self.log) params.show(out=self.log) # f_obs = None r_free_flags = None reflection_files = processed_args.reflection_files if self.params.fobs_vs_fcalc_post_nll: if len(reflection_files) == 0: raise Sorry( "Fobs from input MTZ required for fobs_vs_fcalc_post_nll") if len(reflection_files) > 0: crystal_symmetry = processed_args.crystal_symmetry print('Reflection file : ', processed_args.reflection_file_names[0], file=self.log) utils.print_header("Model and data statistics", out=self.log) rfs = reflection_file_server( crystal_symmetry=crystal_symmetry, reflection_files=processed_args.reflection_files, log=self.log) parameters = extract_xtal_data.data_and_flags_master_params( ).extract() determine_data_and_flags_result = extract_xtal_data.run( reflection_file_server=rfs, parameters=parameters, data_parameter_scope="refinement.input.xray_data", flags_parameter_scope="refinement.input.xray_data.r_free_flags", data_description="X-ray data", keep_going=True, log=self.log) f_obs = determine_data_and_flags_result.f_obs number_of_reflections = f_obs.indices().size() r_free_flags = determine_data_and_flags_result.r_free_flags test_flag_value = determine_data_and_flags_result.test_flag_value if (r_free_flags is None): r_free_flags = f_obs.array( data=flex.bool(f_obs.data().size(), False)) # process PDB pdb_file.assert_file_type("pdb") # pdb_in = hierarchy.input(file_name=pdb_file.file_name) ens_pdb_hierarchy = pdb_in.construct_hierarchy() ens_pdb_hierarchy.atoms().reset_i_seq() ens_pdb_xrs_s = pdb_in.input.xray_structures_simple() number_structures = len(ens_pdb_xrs_s) print('Number of structure in ensemble : ', number_structures, file=self.log) # Calculate sigmas from input map only if self.params.assign_sigma_from_map and self.params.ensemble_sigma_map_input is not None: # process MTZ input_file = file_reader.any_file( self.params.ensemble_sigma_map_input) if input_file.file_type == "hkl": if input_file.file_object.file_type() != "ccp4_mtz": raise Sorry("Only MTZ format accepted for map input") else: mtz_file = input_file else: raise Sorry("Only MTZ format accepted for map input") miller_arrays = mtz_file.file_server.miller_arrays map_coeffs_1 = miller_arrays[0] # xrs_list = [] for n, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): # get sigma levels from ensemble fc for each structure xrs = get_map_sigma(ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, map_coeffs_1=map_coeffs_1, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, log=self.log) xrs_list.append(xrs) # write ensemble pdb file, occupancies as sigma level filename = pdb_file_names[0].split('/')[-1].replace( '.pdb', '') + '_vs_' + self.params.ensemble_sigma_map_input.replace( '.mtz', '') + '_pensemble.pdb' write_ensemble_pdb(filename=filename, xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # Do full analysis vs Fobs else: model_map_coeffs = [] fmodel = None # Get <fcalc> for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): ens_pdb_xrs.set_occupancies(1.0) if model == 0: # If mtz not supplied get fobs from xray structure... # Use input Fobs for scoring against nll if self.params.fobs_vs_fcalc_post_nll: dummy_fobs = f_obs else: if f_obs == None: if self.params.fcalc_high_resolution == None: raise Sorry( "Please supply high resolution limit or input mtz file." ) dummy_dmin = self.params.fcalc_high_resolution dummy_dmax = self.params.fcalc_low_resolution else: print( 'Supplied mtz used to determine high and low resolution cuttoffs', file=self.log) dummy_dmax, dummy_dmin = f_obs.d_max_min() # dummy_fobs = abs( ens_pdb_xrs.structure_factors( d_min=dummy_dmin).f_calc()) dummy_fobs.set_observation_type_xray_amplitude() # If mtz supplied, free flags are over written to prevent array size error r_free_flags = dummy_fobs.array( data=flex.bool(dummy_fobs.data().size(), False)) # fmodel = utils.fmodel_simple( scattering_table="wk1995", xray_structures=[ens_pdb_xrs], f_obs=dummy_fobs, target_name='ls', bulk_solvent_and_scaling=False, r_free_flags=r_free_flags) f_calc_ave = fmodel.f_calc().array( data=fmodel.f_calc().data() * 0).deep_copy() # XXX Important to ensure scale is identical for each model and <model> fmodel.set_scale_switch = 1.0 f_calc_ave_total = fmodel.f_calc().data().deep_copy() else: fmodel.update_xray_structure(xray_structure=ens_pdb_xrs, update_f_calc=True, update_f_mask=False) f_calc_ave_total += fmodel.f_calc().data().deep_copy() print('Model :', model + 1, file=self.log) print("\nStructure vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) fcalc_edm = fmodel.electron_density_map() fcalc_map_coeffs = fcalc_edm.map_coefficients(map_type='Fc') fcalc_mtz_dataset = fcalc_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_mtz_dataset.mtz_object().write( file_name=str(model + 1) + "_Fc.mtz") model_map_coeffs.append(fcalc_map_coeffs.deep_copy()) fmodel.update(f_calc=f_calc_ave.array(f_calc_ave_total / number_structures)) print("\nEnsemble vs real Fobs (no bulk solvent or scaling)", file=self.log) print('Rwork : %5.4f ' % fmodel.r_work(), file=self.log) print('Rfree : %5.4f ' % fmodel.r_free(), file=self.log) print('K1 : %5.4f ' % fmodel.scale_k1(), file=self.log) # Get <Fcalc> map fcalc_ave_edm = fmodel.electron_density_map() fcalc_ave_map_coeffs = fcalc_ave_edm.map_coefficients( map_type='Fc').deep_copy() fcalc_ave_mtz_dataset = fcalc_ave_map_coeffs.as_mtz_dataset( column_root_label='Fc') if self.params.output_model_and_model_ave_mtz: fcalc_ave_mtz_dataset.mtz_object().write(file_name="aveFc.mtz") fcalc_ave_map_coeffs = fcalc_ave_map_coeffs.fft_map() fcalc_ave_map_coeffs.apply_volume_scaling() fcalc_ave_map_data = fcalc_ave_map_coeffs.real_map_unpadded() fcalc_ave_map_stats = maptbx.statistics(fcalc_ave_map_data) print("<Fcalc> Map Stats :", file=self.log) fcalc_ave_map_stats.show_summary(f=self.log) offset = fcalc_ave_map_stats.min() model_neg_ll = [] number_previous_scatters = 0 # Run through structure list again and get probability xrs_list = [] for model, ens_pdb_xrs in enumerate(ens_pdb_xrs_s): if self.params.verbose: print('\n\nModel : ', model + 1, file=self.log) # Get model atom sigmas vs Fcalc fcalc_map = model_map_coeffs[model].fft_map() fcalc_map.apply_volume_scaling() fcalc_map_data = fcalc_map.real_map_unpadded() fcalc_map_stats = maptbx.statistics(fcalc_map_data) if self.params.verbose: print("Fcalc map stats :", file=self.log) fcalc_map_stats.show_summary(f=self.log) xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_map, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) fcalc_sigmas = xrs.scatterers().extract_occupancies() del fcalc_map # Get model atom sigmas vs <Fcalc> xrs = get_map_sigma( ens_pdb_hierarchy=ens_pdb_hierarchy, ens_pdb_xrs=ens_pdb_xrs, fft_map_1=fcalc_ave_map_coeffs, model_i=model, residue_detail=self.params.residue_detail, ignore_hd=self.params.ignore_hd, number_previous_scatters=number_previous_scatters, log=self.log) ### For testing other residue averaging options #print xrs.residue_selections fcalc_ave_sigmas = xrs.scatterers().extract_occupancies() # Probability of model given <model> prob = fcalc_ave_sigmas / fcalc_sigmas # XXX debug option if False: for n, p in enumerate(prob): print(' {0:5d} {1:5.3f}'.format(n, p), file=self.log) # Set probabilty between 0 and 1 # XXX Make Histogram / more stats prob_lss_zero = flex.bool(prob <= 0) prob_grt_one = flex.bool(prob > 1) prob.set_selected(prob_lss_zero, 0.001) prob.set_selected(prob_grt_one, 1.0) xrs.set_occupancies(prob) xrs_list.append(xrs) sum_neg_ll = sum(-flex.log(prob)) model_neg_ll.append((sum_neg_ll, model)) if self.params.verbose: print('Model probability stats :', file=self.log) print(prob.min_max_mean().show(), file=self.log) print(' Count < 0.0 : ', prob_lss_zero.count(True), file=self.log) print(' Count > 1.0 : ', prob_grt_one.count(True), file=self.log) # For averaging by residue number_previous_scatters += ens_pdb_xrs.sites_cart().size() # write ensemble pdb file, occupancies as sigma level write_ensemble_pdb( filename=pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list, ens_pdb_hierarchy=ens_pdb_hierarchy) # XXX Test ordering models by nll # XXX Test removing nth percentile atoms if self.params.sort_ensemble_by_nll_score or self.params.fobs_vs_fcalc_post_nll: for percentile in [1.0, 0.975, 0.95, 0.9, 0.8, 0.6, 0.2]: model_neg_ll = sorted(model_neg_ll) f_calc_ave_total_reordered = None print_list = [] for i_neg_ll in model_neg_ll: xrs = xrs_list[i_neg_ll[1]] nll_occ = xrs.scatterers().extract_occupancies() # Set q=0 nth percentile atoms sorted_nll_occ = sorted(nll_occ, reverse=True) number_atoms = len(sorted_nll_occ) percentile_prob_cutoff = sorted_nll_occ[ int(number_atoms * percentile) - 1] cutoff_selections = flex.bool( nll_occ < percentile_prob_cutoff) cutoff_nll_occ = flex.double(nll_occ.size(), 1.0).set_selected( cutoff_selections, 0.0) #XXX Debug if False: print('\nDebug') for x in range(len(cutoff_selections)): print(cutoff_selections[x], nll_occ[x], cutoff_nll_occ[x]) print(percentile) print(percentile_prob_cutoff) print(cutoff_selections.count(True)) print(cutoff_selections.size()) print(cutoff_nll_occ.count(0.0)) print('Count q = 1 : ', cutoff_nll_occ.count(1.0)) print('Count scatterers size : ', cutoff_nll_occ.size()) xrs.set_occupancies(cutoff_nll_occ) fmodel.update_xray_structure(xray_structure=xrs, update_f_calc=True, update_f_mask=True) if f_calc_ave_total_reordered == None: f_calc_ave_total_reordered = fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered = fmodel.f_masks( )[0].data().deep_copy() cntr = 1 else: f_calc_ave_total_reordered += fmodel.f_calc().data( ).deep_copy() f_mask_ave_total_reordered += fmodel.f_masks( )[0].data().deep_copy() cntr += 1 fmodel.update( f_calc=f_calc_ave.array( f_calc_ave_total_reordered / cntr).deep_copy(), f_mask=f_calc_ave.array( f_mask_ave_total_reordered / cntr).deep_copy()) # Update solvent and scale # XXX Will need to apply_back_trace on latest version fmodel.set_scale_switch = 0 fmodel.update_all_scales() # Reset occ for outout xrs.set_occupancies(nll_occ) # k1 updated vs Fobs if self.params.fobs_vs_fcalc_post_nll: print_list.append([ cntr, i_neg_ll[0], i_neg_ll[1], fmodel.r_work(), fmodel.r_free() ]) # Order models by nll and print summary print( '\nModels ranked by nll <Fcalc> R-factors recalculated', file=self.log) print('Percentile cutoff : {0:5.3f}'.format(percentile), file=self.log) xrs_list_sorted_nll = [] print(' | NLL <Rw> <Rf> Ens Model', file=self.log) for info in print_list: print(' {0:4d} | {1:8.1f} {2:8.4f} {3:8.4f} {4:12d}'. format( info[0], info[1], info[3], info[4], info[2] + 1, ), file=self.log) xrs_list_sorted_nll.append(xrs_list[info[2]]) # Output nll ordered ensemble write_ensemble_pdb( filename='nll_ordered_' + pdb_file_names[0].split('/')[-1].replace('.pdb', '') + '_pensemble.pdb', xrs_list=xrs_list_sorted_nll, ens_pdb_hierarchy=ens_pdb_hierarchy)
def exercise_under_sampled(space_group_info, anomalous_flag, conjugate_flag, under_sampling, d_min=2., resolution_factor=0.5, max_prime=5, verbose=0): structure_factors = random_structure.xray_structure( space_group_info, elements=("N", "C", "C", "O"), random_f_prime_d_min=1, random_f_double_prime=anomalous_flag, use_u_aniso=True, random_u_iso=True, random_occupancy=True).structure_factors(anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct") f_calc = structure_factors.f_calc() n_real = maptbx.crystal_gridding(unit_cell=f_calc.unit_cell(), d_min=d_min, resolution_factor=resolution_factor, max_prime=max_prime, mandatory_factors=(under_sampling, ) * 3).n_real() if (not anomalous_flag): rfft = fftpack.real_to_complex_3d(n_real) n_complex = rfft.n_complex() else: cfft = fftpack.complex_to_complex_3d(n_real) n_complex = cfft.n() map = maptbx.structure_factors.to_map(space_group=f_calc.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc.indices(), structure_factors=f_calc.data(), n_real=n_real, map_grid=flex.grid(n_complex), conjugate_flag=conjugate_flag) f_calc_p1 = f_calc.expand_to_p1() map_p1 = maptbx.structure_factors.to_map( space_group=f_calc_p1.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc_p1.indices(), structure_factors=f_calc_p1.data(), n_real=n_real, map_grid=flex.grid(n_complex), conjugate_flag=conjugate_flag) assert flex.max( flex.abs(map_p1.complex_map() - map.complex_map())) < 1.e-10 if (not anomalous_flag): real_map = rfft.backward(map.complex_map()) assert real_map.all() == rfft.m_real() else: real_map = cfft.backward(map.complex_map()) assert not real_map.is_padded() if (0 or verbose): if (not anomalous_flag): maptbx.statistics(real_map).show_summary() maptbx.statistics(real_map).show_summary() else: maptbx.statistics(flex.real(real_map)).show_summary() maptbx.statistics(flex.imag(real_map)).show_summary() n_real_under_sampled = [n // under_sampling for n in n_real] if (not anomalous_flag): rfft = fftpack.real_to_complex_3d(n_real_under_sampled) n_complex_under_sampled = rfft.n_complex() else: cfft = fftpack.complex_to_complex_3d(n_real_under_sampled) n_complex_under_sampled = cfft.n() under_sampled_map = maptbx.structure_factors.to_map( space_group=f_calc.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc.indices(), structure_factors=f_calc.data(), n_real=n_real_under_sampled, map_grid=flex.grid(n_complex_under_sampled), conjugate_flag=conjugate_flag) under_sampled_map_p1 = maptbx.structure_factors.to_map( space_group=f_calc_p1.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc_p1.indices(), structure_factors=f_calc_p1.data(), n_real=n_real_under_sampled, map_grid=flex.grid(n_complex_under_sampled), conjugate_flag=conjugate_flag) assert flex.max( flex.abs(under_sampled_map_p1.complex_map() - under_sampled_map.complex_map())) < 1.e-10 if (not anomalous_flag): under_sampled_map_before_fft = under_sampled_map.complex_map( ).deep_copy() under_sampled_real_map = rfft.backward(under_sampled_map.complex_map()) assert under_sampled_real_map.all() == rfft.m_real() else: under_sampled_real_map = cfft.backward(under_sampled_map.complex_map()) assert not under_sampled_real_map.is_padded() if (0 or verbose): if (not anomalous_flag): maptbx.statistics(under_sampled_real_map).show_summary() maptbx.statistics(under_sampled_real_map).show_summary() else: maptbx.statistics(flex.real(under_sampled_real_map)).show_summary() maptbx.statistics(flex.imag(under_sampled_real_map)).show_summary() if (0 or verbose): print(real_map.all(), n_complex) print(under_sampled_real_map.all(), n_complex_under_sampled) if (not anomalous_flag): x_source = real_map y_source = under_sampled_real_map else: x_source = flex.real(real_map) y_source = flex.real(under_sampled_real_map) x = flex.double() n = x_source.focus() for i in range(0, n[0], under_sampling): for j in range(0, n[1], under_sampling): for k in range(0, n[2], under_sampling): x.append(x_source[(i, j, k)]) y = maptbx.copy(y_source, flex.grid(y_source.focus())).as_1d() if (0 or verbose): print("x:", tuple(x)) print("y:", tuple(y)) assert flex.max(flex.abs(x-y)) \ < (flex.max(flex.abs(x))+flex.max(flex.abs(y)))/2*1.e-6 if (under_sampling == 1): x = maptbx.copy(x_source, flex.grid(x_source.focus())).as_1d() c = flex.linear_correlation(x, y) assert c.coefficient() >= 0.9999
def exercise_under_sampled(space_group_info, anomalous_flag, conjugate_flag, under_sampling, d_min=2., resolution_factor=0.5, max_prime=5, verbose=0): structure_factors = random_structure.xray_structure( space_group_info, elements=("N", "C", "C", "O"), random_f_prime_d_min=1, random_f_double_prime=anomalous_flag, use_u_aniso=True, random_u_iso=True, random_occupancy=True ).structure_factors( anomalous_flag=anomalous_flag, d_min=d_min, algorithm="direct") f_calc = structure_factors.f_calc() n_real = maptbx.crystal_gridding( unit_cell=f_calc.unit_cell(), d_min=d_min, resolution_factor=resolution_factor, max_prime=max_prime, mandatory_factors=(under_sampling,)*3).n_real() if (not anomalous_flag): rfft = fftpack.real_to_complex_3d(n_real) n_complex = rfft.n_complex() else: cfft = fftpack.complex_to_complex_3d(n_real) n_complex = cfft.n() map = maptbx.structure_factors.to_map( space_group=f_calc.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc.indices(), structure_factors=f_calc.data(), n_real=n_real, map_grid=flex.grid(n_complex), conjugate_flag=conjugate_flag) f_calc_p1 = f_calc.expand_to_p1() map_p1 = maptbx.structure_factors.to_map( space_group=f_calc_p1.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc_p1.indices(), structure_factors=f_calc_p1.data(), n_real=n_real, map_grid=flex.grid(n_complex), conjugate_flag=conjugate_flag) assert flex.max(flex.abs(map_p1.complex_map() - map.complex_map())) < 1.e-10 if (not anomalous_flag): real_map = rfft.backward(map.complex_map()) assert real_map.all() == rfft.m_real() else: real_map = cfft.backward(map.complex_map()) assert not real_map.is_padded() if (0 or verbose): if (not anomalous_flag): maptbx.statistics(real_map).show_summary() maptbx.statistics(real_map).show_summary() else: maptbx.statistics(flex.real(real_map)).show_summary() maptbx.statistics(flex.imag(real_map)).show_summary() n_real_under_sampled = [n//under_sampling for n in n_real] if (not anomalous_flag): rfft = fftpack.real_to_complex_3d(n_real_under_sampled) n_complex_under_sampled = rfft.n_complex() else: cfft = fftpack.complex_to_complex_3d(n_real_under_sampled) n_complex_under_sampled = cfft.n() under_sampled_map = maptbx.structure_factors.to_map( space_group=f_calc.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc.indices(), structure_factors=f_calc.data(), n_real=n_real_under_sampled, map_grid=flex.grid(n_complex_under_sampled), conjugate_flag=conjugate_flag) under_sampled_map_p1 = maptbx.structure_factors.to_map( space_group=f_calc_p1.space_group(), anomalous_flag=anomalous_flag, miller_indices=f_calc_p1.indices(), structure_factors=f_calc_p1.data(), n_real=n_real_under_sampled, map_grid=flex.grid(n_complex_under_sampled), conjugate_flag=conjugate_flag) assert flex.max(flex.abs(under_sampled_map_p1.complex_map() - under_sampled_map.complex_map())) < 1.e-10 if (not anomalous_flag): under_sampled_map_before_fft = under_sampled_map.complex_map().deep_copy() under_sampled_real_map = rfft.backward(under_sampled_map.complex_map()) assert under_sampled_real_map.all() == rfft.m_real() else: under_sampled_real_map = cfft.backward(under_sampled_map.complex_map()) assert not under_sampled_real_map.is_padded() if (0 or verbose): if (not anomalous_flag): maptbx.statistics(under_sampled_real_map).show_summary() maptbx.statistics(under_sampled_real_map).show_summary() else: maptbx.statistics(flex.real(under_sampled_real_map)).show_summary() maptbx.statistics(flex.imag(under_sampled_real_map)).show_summary() if (0 or verbose): print real_map.all(), n_complex print under_sampled_real_map.all(), n_complex_under_sampled if (not anomalous_flag): x_source = real_map y_source = under_sampled_real_map else: x_source = flex.real(real_map) y_source = flex.real(under_sampled_real_map) x = flex.double() n = x_source.focus() for i in xrange(0, n[0], under_sampling): for j in xrange(0, n[1], under_sampling): for k in xrange(0, n[2], under_sampling): x.append(x_source[(i,j,k)]) y = maptbx.copy(y_source, flex.grid(y_source.focus())).as_1d() if (0 or verbose): print "x:", tuple(x) print "y:", tuple(y) assert flex.max(flex.abs(x-y)) \ < (flex.max(flex.abs(x))+flex.max(flex.abs(y)))/2*1.e-6 if (under_sampling == 1): x = maptbx.copy(x_source, flex.grid(x_source.focus())).as_1d() c = flex.linear_correlation(x, y) assert c.coefficient() >= 0.9999
def statistics(self): from cctbx import maptbx return maptbx.statistics(self.map_data())
def exercise_statistics(): import scitbx.math for flex_type in flex_types(): a = flex_type(flex.grid((3,5))) s = maptbx.statistics(a) assert s.min() == 0 assert s.max() == 0 assert s.mean() == 0 assert s.mean_sq() == 0 assert s.sigma() == 0 a = flex_type([random.random() for i in xrange(3*5)]) a.resize(flex.grid((3,5))) s = maptbx.statistics(a) assert approx_equal(flex.min(a), s.min()) assert approx_equal(flex.max(a), s.max()) assert approx_equal(flex.mean(a), s.mean()) assert approx_equal(flex.mean_sq(a), s.mean_sq()) assert approx_equal(flex.mean_sq(a)-flex.mean(a)**2, s.sigma()**2) b = flex_type(flex.grid((4,6)).set_focus((3,5))) for i in xrange(3): for j in xrange(5): b[(i,j)] = a[(i,j)] b[(3,5)] = -1 b[(2,5)] = 2 b.resize(flex.grid((-2,3), (2,9)).set_focus((1,8))) t = maptbx.statistics(b) assert not_approx_equal(flex.min(b), t.min()) assert not_approx_equal(flex.max(b), t.max()) assert not_approx_equal(flex.mean(b), t.mean()) assert not_approx_equal(flex.mean_sq(b), t.mean_sq()) assert not_approx_equal(flex.mean_sq(b)-flex.mean(b)**2, t.sigma()**2) assert approx_equal(s.min(), t.min()) assert approx_equal(s.max(), t.max()) assert approx_equal(s.mean(), t.mean()) assert approx_equal(s.mean_sq(), t.mean_sq()) assert approx_equal(s.sigma(), t.sigma()) a = flex.double(flex.grid(5,3)) s = maptbx.more_statistics(a) assert s.min() == 0 assert s.max() == 0 assert s.mean() == 0 assert s.mean_sq() == 0 assert s.sigma() == 0 assert s.skewness() == 0 assert s.kurtosis() == 0 a = flex.random_double(5*3) reference = scitbx.math.basic_statistics(a) a.resize(flex.grid(5,3)) s = maptbx.more_statistics(a) assert approx_equal(s.min(), reference.min) assert approx_equal(s.max(), reference.max) assert approx_equal(s.mean(), reference.mean) assert approx_equal(s.sigma(), reference.biased_standard_deviation) assert approx_equal(s.skewness(), reference.skew) assert approx_equal(s.kurtosis(), reference.kurtosis) b = flex.double(flex.grid((6,4)).set_focus((5,3))) for i in xrange(5): for j in xrange(3): b[(i,j)] = a[(i,j)] b[(5,3)] = -1 b[(5,2)] = 2 b.resize(flex.grid((3,-2), (9,2)).set_focus((8,1))) t = maptbx.statistics(b) assert approx_equal(s.min(), reference.min) assert approx_equal(s.max(), reference.max) assert approx_equal(s.mean(), reference.mean) assert approx_equal(s.sigma(), reference.biased_standard_deviation) assert approx_equal(s.skewness(), reference.skew) assert approx_equal(s.kurtosis(), reference.kurtosis) m = flex.double(flex.grid((6,4,8)).set_focus((5,3,7)))
def statistics (self) : from cctbx import maptbx return maptbx.statistics(self.data)
def sigma(self): return maptbx.statistics(self.real_map()).sigma()