def _integrate_finish(self): '''Finish off the integration by running correct.''' # first run the postrefinement etc with spacegroup P1 # and the current unit cell - this will be used to # obtain a benchmark rmsd in pixels / phi and also # cell deviations (this is working towards spotting bad # indexing solutions) - only do this if we have no # reindex matrix... and no postrefined cell... p1_deviations = None # fix for bug # 3264 - # if we have not run integration with refined parameters, make it so... # erm? shouldn't this therefore return if this is the principle, or # set the flag after we have tested the lattice? if not self._xds_data_files.has_key('GXPARM.XDS') and \ PhilIndex.params.xds.integrate.reintegrate: Debug.write( 'Resetting integrater, to ensure refined orientation is used') self.set_integrater_done(False) if not self.get_integrater_reindex_matrix() and not self._intgr_cell \ and not Flags.get_no_lattice_test() and \ not self.get_integrater_sweep().get_user_lattice(): correct = self.Correct() correct.set_data_range(self._intgr_wedge[0], self._intgr_wedge[1]) if self.get_polarization() > 0.0: correct.set_polarization(self.get_polarization()) # FIXME should this be using the correctly transformed # cell or are the results ok without it?! correct.set_spacegroup_number(1) correct.set_cell(self._intgr_refiner_cell) correct.run() # record the log file - pname, xname, dname = self.get_integrater_project_info() sweep = self.get_integrater_sweep_name() FileHandler.record_log_file('%s %s %s %s CORRECT' % \ (pname, xname, dname, sweep), os.path.join( self.get_working_directory(), 'CORRECT.LP')) FileHandler.record_more_data_file( '%s %s %s %s CORRECT' % (pname, xname, dname, sweep), os.path.join(self.get_working_directory(), 'XDS_ASCII.HKL')) cell = correct.get_result('cell') cell_esd = correct.get_result('cell_esd') Debug.write('Postrefinement in P1 results:') Debug.write('%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f' % \ tuple(cell)) Debug.write('%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f' % \ tuple(cell_esd)) Debug.write('Deviations: %.2f pixels %.2f degrees' % \ (correct.get_result('rmsd_pixel'), correct.get_result('rmsd_phi'))) p1_deviations = (correct.get_result('rmsd_pixel'), correct.get_result('rmsd_phi')) # next run the postrefinement etc with the given # cell / lattice - this will be the assumed result... correct = self.Correct() correct.set_data_range(self._intgr_wedge[0], self._intgr_wedge[1]) if self.get_polarization() > 0.0: correct.set_polarization(self.get_polarization()) # BUG # 2695 probably comes from here - need to check... # if the pointless interface comes back with a different # crystal setting then the unit cell stored in self._intgr_cell # needs to be set to None... if self.get_integrater_spacegroup_number(): correct.set_spacegroup_number( self.get_integrater_spacegroup_number()) if not self._intgr_cell: raise RuntimeError, 'no unit cell to recycle' correct.set_cell(self._intgr_cell) # BUG # 3113 - new version of XDS will try and figure the # best spacegroup out from the intensities (and get it wrong!) # unless we set the spacegroup and cell explicitly if not self.get_integrater_spacegroup_number(): cell = self._intgr_refiner_cell lattice = self._intgr_refiner.get_refiner_lattice() spacegroup_number = lattice_to_spacegroup_number(lattice) # this should not prevent the postrefinement from # working correctly, else what is above would not # work correctly (the postrefinement test) correct.set_spacegroup_number(spacegroup_number) correct.set_cell(cell) Debug.write('Setting spacegroup to: %d' % spacegroup_number) Debug.write( 'Setting cell to: %.2f %.2f %.2f %.2f %.2f %.2f' % tuple(cell)) if self.get_integrater_reindex_matrix(): # bug! if the lattice is not primitive the values in this # reindex matrix need to be multiplied by a constant which # depends on the Bravais lattice centering. lattice = self._intgr_refiner.get_refiner_lattice() import scitbx.matrix matrix = self.get_integrater_reindex_matrix() matrix = scitbx.matrix.sqr(matrix).transpose().elems matrix = r_to_rt(matrix) if lattice[1] == 'P': mult = 1 elif lattice[1] == 'C' or lattice[1] == 'I': mult = 2 elif lattice[1] == 'R': mult = 3 elif lattice[1] == 'F': mult = 4 else: raise RuntimeError, 'unknown multiplier for lattice %s' % \ lattice Debug.write('REIDX multiplier for lattice %s: %d' % \ (lattice, mult)) mult_matrix = [mult * m for m in matrix] Debug.write('REIDX set to %d %d %d %d %d %d %d %d %d %d %d %d' % \ tuple(mult_matrix)) correct.set_reindex_matrix(mult_matrix) correct.run() # erm. just to be sure if self.get_integrater_reindex_matrix() and \ correct.get_reindex_used(): raise RuntimeError, 'Reindex panic!' # get the reindex operation used, which may be useful if none was # set but XDS decided to apply one, e.g. #419. if not self.get_integrater_reindex_matrix() and \ correct.get_reindex_used(): # convert this reindex operation to h, k, l form: n.b. this # will involve dividing through by the lattice centring multiplier matrix = rt_to_r(correct.get_reindex_used()) import scitbx.matrix matrix = scitbx.matrix.sqr(matrix).transpose().elems lattice = self._intgr_refiner.get_refiner_lattice() if lattice[1] == 'P': mult = 1.0 elif lattice[1] == 'C' or lattice[1] == 'I': mult = 2.0 elif lattice[1] == 'R': mult = 3.0 elif lattice[1] == 'F': mult = 4.0 matrix = [m / mult for m in matrix] reindex_op = mat_to_symop(matrix) # assign this to self: will this reset?! make for a leaky # abstraction and just assign this... # self.set_integrater_reindex_operator(reindex) self._intgr_reindex_operator = reindex_op # record the log file - pname, xname, dname = self.get_integrater_project_info() sweep = self.get_integrater_sweep_name() FileHandler.record_log_file('%s %s %s %s CORRECT' % \ (pname, xname, dname, sweep), os.path.join(self.get_working_directory(), 'CORRECT.LP')) # should get some interesting stuff from the XDS correct file # here, for instance the resolution range to use in integration # (which should be fed back if not fast) and so on... self._intgr_corrected_hklout = os.path.join(self.get_working_directory(), 'XDS_ASCII.HKL') # also record the batch range - needed for the analysis of the # radiation damage in chef... self._intgr_batches_out = (self._intgr_wedge[0], self._intgr_wedge[1]) # FIXME perhaps I should also feedback the GXPARM file here?? for file in ['GXPARM.XDS']: self._xds_data_files[file] = correct.get_output_data_file(file) # record the postrefined cell parameters self._intgr_cell = correct.get_result('cell') self._intgr_n_ref = correct.get_result('n_ref') Debug.write('Postrefinement in "correct" spacegroup results:') Debug.write('%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f' % \ tuple(correct.get_result('cell'))) Debug.write('%7.3f %7.3f %7.3f %7.3f %7.3f %7.3f' % \ tuple(correct.get_result('cell_esd'))) Debug.write('Deviations: %.2f pixels %.2f degrees' % \ (correct.get_result('rmsd_pixel'), correct.get_result('rmsd_phi'))) Debug.write('Error correction parameters: A=%.3f B=%.3f' % \ correct.get_result('sdcorrection')) # compute misorientation of axes xparm_file = os.path.join(self.get_working_directory(), 'GXPARM.XDS') from iotbx.xds import xparm handle = xparm.reader() handle.read_file(xparm_file) rotn = handle.rotation_axis beam = handle.beam_vector dot = sum([rotn[j] * beam[j] for j in range(3)]) r = math.sqrt(sum([rotn[j] * rotn[j] for j in range(3)])) b = math.sqrt(sum([beam[j] * beam[j] for j in range(3)])) rtod = 180.0 / math.pi angle = rtod * math.fabs(0.5 * math.pi - math.acos(dot / (r * b))) Debug.write('Axis misalignment %.2f degrees' % angle) correct_deviations = (correct.get_result('rmsd_pixel'), correct.get_result('rmsd_phi')) if p1_deviations: # compare and reject if both > 50% higher - though adding a little # flexibility - 0.5 pixel / osc width slack. pixel = p1_deviations[0] phi = math.sqrt(0.05 * 0.05 + \ p1_deviations[1] * p1_deviations[1]) threshold = Flags.get_rejection_threshold() Debug.write('RMSD ratio: %.2f' % (correct_deviations[0] / pixel)) Debug.write('RMSPhi ratio: %.2f' % (correct_deviations[1] / phi)) if correct_deviations[0] / pixel > threshold and \ correct_deviations[1] / phi > threshold: Chatter.write( 'Eliminating this indexing solution as postrefinement') Chatter.write( 'deviations rather high relative to triclinic') raise BadLatticeError, \ 'high relative deviations in postrefinement' if not Flags.get_quick() and Flags.get_remove(): # check for alien reflections and perhaps recycle - removing them if len(correct.get_remove()) > 0: correct_remove = correct.get_remove() current_remove = [] final_remove = [] # first ensure that there are no duplicate entries... if os.path.exists(os.path.join( self.get_working_directory(), 'REMOVE.HKL')): for line in open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'r').readlines(): h, k, l = map(int, line.split()[:3]) z = float(line.split()[3]) if not (h, k, l, z) in current_remove: current_remove.append((h, k, l, z)) for c in correct_remove: if c in current_remove: continue final_remove.append(c) Debug.write( '%d alien reflections are already removed' % \ (len(correct_remove) - len(final_remove))) else: # we want to remove all of the new dodgy reflections final_remove = correct_remove remove_hkl = open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'w') z_min = Flags.get_z_min() rejected = 0 # write in the old reflections for remove in current_remove: z = remove[3] if z >= z_min: remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d old reflections to REMOVE.HKL' % \ (len(current_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) # and the new reflections rejected = 0 used = 0 for remove in final_remove: z = remove[3] if z >= z_min: used += 1 remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d new reflections to REMOVE.HKL' % \ (len(final_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) remove_hkl.close() # we want to rerun the finishing step so... # unless we have added no new reflections... or unless we # have not confirmed the point group (see SCI-398) if used and self.get_integrater_reindex_matrix(): self.set_integrater_finish_done(False) else: Debug.write( 'Going quickly so not removing %d outlier reflections...' % \ len(correct.get_remove())) # Convert INTEGRATE.HKL to MTZ format and reapply any reindexing operations # spacegroup changes to allow use with CCP4 / Aimless for scaling integrate_hkl = os.path.join( self.get_working_directory(), 'INTEGRATE.HKL') hklout = os.path.splitext(integrate_hkl)[0] + ".mtz" self._factory.set_working_directory(self.get_working_directory()) pointless = self._factory.Pointless() pointless.set_xdsin(integrate_hkl) pointless.set_hklout(hklout) pointless.xds_to_mtz() integrate_mtz = hklout if self.get_integrater_reindex_operator() or \ self.get_integrater_spacegroup_number(): Debug.write('Reindexing things to MTZ') reindex = Reindex() reindex.set_working_directory(self.get_working_directory()) auto_logfiler(reindex) if self.get_integrater_reindex_operator(): reindex.set_operator(self.get_integrater_reindex_operator()) if self.get_integrater_spacegroup_number(): reindex.set_spacegroup(self.get_integrater_spacegroup_number()) hklout = '%s_reindex.mtz' % os.path.splitext(integrate_mtz)[0] reindex.set_hklin(integrate_mtz) reindex.set_hklout(hklout) reindex.reindex() integrate_mtz = hklout return integrate_mtz
def _scale(self): '''Actually scale all of the data together.''' from xia2.Handlers.Environment import debug_memory_usage debug_memory_usage() Journal.block( 'scaling', self.get_scaler_xcrystal().get_name(), 'XSCALE', {'scaling model':'default (all)'}) epochs = self._sweep_information.keys() epochs.sort() xscale = self.XScale() xscale.set_spacegroup_number(self._xds_spacegroup) xscale.set_cell(self._scalr_cell) Debug.write('Set CELL: %.2f %.2f %.2f %.2f %.2f %.2f' % \ tuple(self._scalr_cell)) Debug.write('Set SPACEGROUP_NUMBER: %d' % \ self._xds_spacegroup) Debug.write('Gathering measurements for scaling') for epoch in epochs: # get the prepared reflections reflections = self._sweep_information[epoch][ 'prepared_reflections'] # and the get wavelength that this belongs to dname = self._sweep_information[epoch]['dname'] sname = self._sweep_information[epoch]['sname'] # and the resolution range for the reflections intgr = self._sweep_information[epoch]['integrater'] Debug.write('Epoch: %d' % epoch) Debug.write('HKL: %s (%s/%s)' % (reflections, dname, sname)) resolution_low = intgr.get_integrater_low_resolution() resolution_high = self._scalr_resolution_limits.get((dname, sname), 0.0) resolution = (resolution_high, resolution_low) xscale.add_reflection_file(reflections, dname, resolution) # set the global properties of the sample xscale.set_crystal(self._scalr_xname) xscale.set_anomalous(self._scalr_anomalous) if Flags.get_zero_dose(): Debug.write('Switching on zero-dose extrapolation') xscale.set_zero_dose() debug_memory_usage() xscale.run() scale_factor = xscale.get_scale_factor() Debug.write('XSCALE scale factor found to be: %e' % scale_factor) # record the log file pname = self._scalr_pname xname = self._scalr_xname FileHandler.record_log_file('%s %s XSCALE' % \ (pname, xname), os.path.join(self.get_working_directory(), 'XSCALE.LP')) # check for outlier reflections and if a number are found # then iterate (that is, rerun XSCALE, rejecting these outliers) if not Flags.get_quick() and Flags.get_remove(): if len(xscale.get_remove()) > 0: xscale_remove = xscale.get_remove() current_remove = [] final_remove = [] # first ensure that there are no duplicate entries... if os.path.exists(os.path.join( self.get_working_directory(), 'REMOVE.HKL')): for line in open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'r').readlines(): h, k, l = map(int, line.split()[:3]) z = float(line.split()[3]) if not (h, k, l, z) in current_remove: current_remove.append((h, k, l, z)) for c in xscale_remove: if c in current_remove: continue final_remove.append(c) Debug.write( '%d alien reflections are already removed' % \ (len(xscale_remove) - len(final_remove))) else: # we want to remove all of the new dodgy reflections final_remove = xscale_remove remove_hkl = open(os.path.join( self.get_working_directory(), 'REMOVE.HKL'), 'w') z_min = Flags.get_z_min() rejected = 0 # write in the old reflections for remove in current_remove: z = remove[3] if z >= z_min: remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d old reflections to REMOVE.HKL' % \ (len(current_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) # and the new reflections rejected = 0 used = 0 for remove in final_remove: z = remove[3] if z >= z_min: used += 1 remove_hkl.write('%d %d %d %f\n' % remove) else: rejected += 1 Debug.write('Wrote %d new reflections to REMOVE.HKL' % \ (len(final_remove) - rejected)) Debug.write('Rejected %d as z < %f' % \ (rejected, z_min)) remove_hkl.close() # we want to rerun the finishing step so... # unless we have added no new reflections if used: self.set_scaler_done(False) if not self.get_scaler_done(): Chatter.write('Excluding outlier reflections Z > %.2f' % Flags.get_z_min()) if Flags.get_egg(): for record in ttt(): Chatter.write(record) return debug_memory_usage() # now get the reflection files out and merge them with aimless output_files = xscale.get_output_reflection_files() wavelength_names = output_files.keys() # these are per wavelength - also allow for user defined resolution # limits a la bug # 3183. No longer... for epoch in self._sweep_information.keys(): input = self._sweep_information[epoch] intgr = input['integrater'] rkey = input['dname'], input['sname'] if intgr.get_integrater_user_resolution(): dmin = intgr.get_integrater_high_resolution() if not self._user_resolution_limits.has_key(rkey): self._scalr_resolution_limits[rkey] = dmin self._user_resolution_limits[rkey] = dmin elif dmin < self._user_resolution_limits[rkey]: self._scalr_resolution_limits[rkey] = dmin self._user_resolution_limits[rkey] = dmin self._scalr_scaled_refl_files = { } self._scalr_statistics = { } max_batches = 0 mtz_dict = { } project_info = { } for epoch in self._sweep_information.keys(): pname = self._scalr_pname xname = self._scalr_xname dname = self._sweep_information[epoch]['dname'] reflections = os.path.split( self._sweep_information[epoch]['prepared_reflections'])[-1] project_info[reflections] = (pname, xname, dname) for epoch in self._sweep_information.keys(): self._sweep_information[epoch]['scaled_reflections'] = None debug_memory_usage() for wavelength in wavelength_names: hklin = output_files[wavelength] xsh = XDSScalerHelper() xsh.set_working_directory(self.get_working_directory()) ref = xsh.split_and_convert_xscale_output( hklin, 'SCALED_', project_info, 1.0 / scale_factor) for hklout in ref.keys(): for epoch in self._sweep_information.keys(): if os.path.split(self._sweep_information[epoch][ 'prepared_reflections'])[-1] == \ os.path.split(hklout)[-1]: if self._sweep_information[epoch][ 'scaled_reflections'] != None: raise RuntimeError, 'duplicate entries' self._sweep_information[epoch][ 'scaled_reflections'] = ref[hklout] del(xsh) debug_memory_usage() for epoch in self._sweep_information.keys(): hklin = self._sweep_information[epoch]['scaled_reflections'] dname = self._sweep_information[epoch]['dname'] sname = self._sweep_information[epoch]['sname'] hkl_copy = os.path.join(self.get_working_directory(), 'R_%s' % os.path.split(hklin)[-1]) if not os.path.exists(hkl_copy): shutil.copyfile(hklin, hkl_copy) # let's properly listen to the user's resolution limit needs... if self._user_resolution_limits.get((dname, sname), False): resolution = self._user_resolution_limits[(dname, sname)] else: resolution = self._estimate_resolution_limit(hklin) Chatter.write('Resolution for sweep %s/%s: %.2f' % \ (dname, sname, resolution)) if not (dname, sname) in self._scalr_resolution_limits: self._scalr_resolution_limits[(dname, sname)] = resolution self.set_scaler_done(False) else: if resolution < self._scalr_resolution_limits[(dname, sname)]: self._scalr_resolution_limits[(dname, sname)] = resolution self.set_scaler_done(False) debug_memory_usage() if not self.get_scaler_done(): Debug.write('Returning as scaling not finished...') return self._sort_together_data_xds() highest_resolution = min(self._scalr_resolution_limits.values()) self._scalr_highest_resolution = highest_resolution Debug.write('Scaler highest resolution set to %5.2f' % \ highest_resolution) if not self.get_scaler_done(): Debug.write('Returning as scaling not finished...') return sdadd_full = 0.0 sdb_full = 0.0 # ---------- FINAL MERGING ---------- sc = self._factory.Aimless() FileHandler.record_log_file('%s %s aimless' % (self._scalr_pname, self._scalr_xname), sc.get_log_file()) sc.set_resolution(highest_resolution) sc.set_hklin(self._prepared_reflections) sc.set_new_scales_file('%s_final.scales' % self._scalr_xname) if sdadd_full == 0.0 and sdb_full == 0.0: pass else: sc.add_sd_correction('both', 1.0, sdadd_full, sdb_full) for epoch in epochs: input = self._sweep_information[epoch] start, end = (min(input['batches']), max(input['batches'])) rkey = input['dname'], input['sname'] run_resolution_limit = self._scalr_resolution_limits[rkey] sc.add_run(start, end, exclude = False, resolution = run_resolution_limit, name = input['sname']) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() sc.multi_merge() FileHandler.record_xml_file('%s %s aimless xml' % (self._scalr_pname, self._scalr_xname), sc.get_xmlout()) data = sc.get_summary() loggraph = sc.parse_ccp4_loggraph() standard_deviation_info = { } for key in loggraph.keys(): if 'standard deviation v. Intensity' in key: dataset = key.split(',')[-1].strip() standard_deviation_info[dataset] = transpose_loggraph( loggraph[key]) resolution_info = { } for key in loggraph.keys(): if 'Analysis against resolution' in key: dataset = key.split(',')[-1].strip() resolution_info[dataset] = transpose_loggraph( loggraph[key]) # and also radiation damage stuff... batch_info = { } for key in loggraph.keys(): if 'Analysis against Batch' in key: dataset = key.split(',')[-1].strip() batch_info[dataset] = transpose_loggraph( loggraph[key]) # finally put all of the results "somewhere useful" self._scalr_statistics = data self._scalr_scaled_refl_files = copy.deepcopy( sc.get_scaled_reflection_files()) self._scalr_scaled_reflection_files = { } # also output the unmerged scalepack format files... sc = self._factory.Aimless() sc.set_resolution(highest_resolution) sc.set_hklin(self._prepared_reflections) sc.set_scalepack() for epoch in epochs: input = self._sweep_information[epoch] start, end = (min(input['batches']), max(input['batches'])) rkey = input['dname'], input['sname'] run_resolution_limit = self._scalr_resolution_limits[rkey] sc.add_run(start, end, exclude = False, resolution = run_resolution_limit, name = input['sname']) sc.set_hklout(os.path.join(self.get_working_directory(), '%s_%s_scaled.mtz' % \ (self._scalr_pname, self._scalr_xname))) if self.get_scaler_anomalous(): sc.set_anomalous() sc.multi_merge() self._scalr_scaled_reflection_files['sca_unmerged'] = { } self._scalr_scaled_reflection_files['mtz_unmerged'] = { } for dataset in sc.get_scaled_reflection_files().keys(): hklout = sc.get_scaled_reflection_files()[dataset] # then mark the scalepack files for copying... scalepack = os.path.join(os.path.split(hklout)[0], os.path.split(hklout)[1].replace( '_scaled', '_scaled_unmerged').replace('.mtz', '.sca')) self._scalr_scaled_reflection_files['sca_unmerged'][ dataset] = scalepack FileHandler.record_data_file(scalepack) mtz_unmerged = os.path.splitext(scalepack)[0] + '.mtz' self._scalr_scaled_reflection_files['mtz_unmerged'][dataset] = mtz_unmerged FileHandler.record_data_file(mtz_unmerged) if PhilIndex.params.xia2.settings.merging_statistics.source == 'cctbx': for key in self._scalr_scaled_refl_files: stats = self._compute_scaler_statistics( self._scalr_scaled_reflection_files['mtz_unmerged'][key]) self._scalr_statistics[ (self._scalr_pname, self._scalr_xname, key)] = stats # convert reflection files to .sca format - use mtz2various for this self._scalr_scaled_reflection_files['sca'] = { } self._scalr_scaled_reflection_files['hkl'] = { } for key in self._scalr_scaled_refl_files: f = self._scalr_scaled_refl_files[key] scaout = '%s.sca' % f[:-4] m2v = self._factory.Mtz2various() m2v.set_hklin(f) m2v.set_hklout(scaout) m2v.convert() self._scalr_scaled_reflection_files['sca'][key] = scaout FileHandler.record_data_file(scaout) if Flags.get_small_molecule(): hklout = '%s.hkl' % f[:-4] m2v = self._factory.Mtz2various() m2v.set_hklin(f) m2v.set_hklout(hklout) m2v.convert_shelx() self._scalr_scaled_reflection_files['hkl'][key] = hklout FileHandler.record_data_file(hklout) return