def absolute_structure_analysis(xs, fo2, fc, scale, nu=None, log=None, outlier_cutoff_factor=None): if log is None: log = sys.stdout hooft_analysis = absolute_structure.hooft_analysis( fo2, fc, scale_factor=scale, outlier_cutoff_factor=outlier_cutoff_factor) print >> log, "Gaussian analysis:" hooft_analysis.show(out=log) NPP = absolute_structure.bijvoet_differences_probability_plot( hooft_analysis) print >> log, "Probability plot:" NPP.show(out=log) print >> log if nu is None: nu = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) distribution = distributions.students_t_distribution(nu) observed_deviations = NPP.y expected_deviations = distribution.quantiles(observed_deviations.size()) fit = flex.linear_regression( expected_deviations[5:-5], observed_deviations[5:-5]) t_analysis = absolute_structure.students_t_hooft_analysis( fo2, fc, nu, scale_factor=scale, probability_plot_slope=fit.slope(), outlier_cutoff_factor=outlier_cutoff_factor) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True, students_t_nu=nu) print >> log, "Student's t analysis:" print >> log, "nu: %.2f" %nu t_analysis.show(out=log) print >> log, "Probability plot:" tPP.show(out=log) print >> log if xs is not None: flack = absolute_structure.flack_analysis(xs, fo2.as_xray_observations()) flack.show(out=log)
def absolute_structure_analysis(xs, fo2, fc, scale, nu=None, log=None, outlier_cutoff_factor=None): if log is None: log = sys.stdout hooft_analysis = absolute_structure.hooft_analysis( fo2, fc, scale_factor=scale, outlier_cutoff_factor=outlier_cutoff_factor) print >> log, "Gaussian analysis:" hooft_analysis.show(out=log) NPP = absolute_structure.bijvoet_differences_probability_plot( hooft_analysis) print >> log, "Probability plot:" NPP.show(out=log) print >> log if nu is None: nu = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) distribution = distributions.students_t_distribution(nu) observed_deviations = NPP.y expected_deviations = distribution.quantiles(observed_deviations.size()) fit = flex.linear_regression(expected_deviations[5:-5], observed_deviations[5:-5]) t_analysis = absolute_structure.students_t_hooft_analysis( fo2, fc, nu, scale_factor=scale, probability_plot_slope=fit.slope(), outlier_cutoff_factor=outlier_cutoff_factor) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True, students_t_nu=nu) print >> log, "Student's t analysis:" print >> log, "nu: %.2f" % nu t_analysis.show(out=log) print >> log, "Probability plot:" tPP.show(out=log) print >> log if xs is not None: flack = absolute_structure.flack_analysis(xs, fo2.as_xray_observations()) flack.show(out=log)
def exercise(self, debug=False): if debug: distribution = distributions.normal_distribution() observed_deviations = ( self.fo2.data() - self.scale_factor*self.fc.as_intensity_array().data()) observed_deviations = observed_deviations.select( flex.sort_permutation(observed_deviations)) expected_deviations = distribution.quantiles(observed_deviations.size()) csv_utils.writer( open('delta_F_npp.csv', 'wb'), (expected_deviations, observed_deviations)) # first with the correct absolute structure gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc) analyses = [gaussian] NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian) if self.use_students_t_errors: nu_calc = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) t_analysis = absolute_structure.students_t_hooft_analysis( self.fo2, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope()) analyses.append(gaussian) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True, students_t_nu=nu_calc) if tPP.distribution.degrees_of_freedom() < 100: assert tPP.correlation.coefficient() > NPP.correlation.coefficient() else: assert approx_equal(NPP.correlation.coefficient(), 1, 0.005) for analysis in analyses: assert approx_equal(analysis.hooft_y, 0, 1e-2) assert approx_equal(analysis.p2_true, 1) assert approx_equal(analysis.p2_false, 0) assert approx_equal(analysis.p3_true, 1) assert approx_equal(analysis.p3_false, 0) assert approx_equal(analysis.p3_racemic_twin, 0) if debug: csv_utils.writer(open('npp.csv', 'wb'), (NPP.x,NPP.y)) if self.use_students_t_errors: csv_utils.writer(open('tpp.csv', 'wb'), (tPP.x,tPP.y)) assert approx_equal(NPP.fit.y_intercept(), 0) # and now with the wrong absolute structure gaussian = absolute_structure.hooft_analysis(self.fo2, self.fc_i) analyses = [gaussian] NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian) if self.use_students_t_errors: nu_calc = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) t_analysis = absolute_structure.students_t_hooft_analysis( self.fo2, self.fc_i, nu_calc, probability_plot_slope=NPP.fit.slope()) analyses.append(gaussian) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True) if tPP.distribution.degrees_of_freedom() < 100: assert tPP.correlation.coefficient() > NPP.correlation.coefficient() else: assert approx_equal(NPP.correlation.coefficient(), 1, 0.002) assert approx_equal(NPP.fit.y_intercept(), 0) for analysis in analyses: assert approx_equal(analysis.hooft_y, 1, 1e-2) assert approx_equal(analysis.p2_true, 0) assert approx_equal(analysis.p2_false, 1) assert approx_equal(analysis.p3_true, 0) assert approx_equal(analysis.p3_false, 1) assert approx_equal(analysis.p3_racemic_twin, 0) # test for the case of a racemic twin gaussian = absolute_structure.hooft_analysis(self.fo2_twin, self.fc) analyses = [gaussian] NPP = absolute_structure.bijvoet_differences_probability_plot(gaussian) if self.use_students_t_errors: nu_calc = absolute_structure.maximise_students_t_correlation_coefficient( NPP.y, min_nu=1, max_nu=200) t_analysis = absolute_structure.students_t_hooft_analysis( self.fo2_twin, self.fc, nu_calc, probability_plot_slope=NPP.fit.slope()) tPP = absolute_structure.bijvoet_differences_probability_plot( t_analysis, use_students_t_distribution=True) if tPP.distribution.degrees_of_freedom() < 100: assert tPP.correlation.coefficient() > NPP.correlation.coefficient() else: assert approx_equal(NPP.correlation.coefficient(), 1, 0.002) assert approx_equal(NPP.fit.y_intercept(), 0) for analysis in analyses: assert approx_equal(analysis.hooft_y, 0.5, 1e-2) assert approx_equal(analysis.p3_true, 0) assert approx_equal(analysis.p3_false, 0) assert approx_equal(analysis.p3_racemic_twin, 1)