def Freerflag(DriverType=None): """A factory for FreerflagWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class FreerflagWrapper(CCP4DriverInstance.__class__): """A wrapper for Freerflag, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) Citations.cite("ccp4") self.set_executable( os.path.join(os.environ.get("CBIN", ""), "freerflag")) self._free_fraction = 0.05 def set_free_fraction(self, free_fraction): self._free_fraction = free_fraction def add_free_flag(self): self.check_hklin() self.check_hklout() self.start() self.input("freerfrac %.3f" % self._free_fraction) self.close_wait() self.check_for_errors() self.check_ccp4_errors() def complete_free_flag(self): self.check_hklin() self.check_hklout() free_column = FindFreeFlag(self.get_hklin()) self.start() self.input("freerfrac %.3f" % self._free_fraction) self.input("complete FREE=%s" % free_column) self.close_wait() self.check_for_errors() self.check_ccp4_errors() return FreerflagWrapper()
def Freerflag(DriverType=None): '''A factory for FreerflagWrapper classes.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class FreerflagWrapper(CCP4DriverInstance.__class__): '''A wrapper for Freerflag, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'freerflag')) self._free_fraction = 0.05 def set_free_fraction(self, free_fraction): self._free_fraction = free_fraction def add_free_flag(self): self.check_hklin() self.check_hklout() self.start() self.input('freerfrac %.3f' % self._free_fraction) self.close_wait() self.check_for_errors() self.check_ccp4_errors() def complete_free_flag(self): self.check_hklin() self.check_hklout() free_column = FindFreeFlag(self.get_hklin()) self.start() self.input('freerfrac %.3f' % self._free_fraction) self.input('complete FREE=%s' % free_column) self.close_wait() self.check_for_errors() self.check_ccp4_errors() return FreerflagWrapper()
def MosflmRefineCell(DriverType=None, indxr_print=True): """Factory for MosflmRefineCell wrapper classes, with the specified Driver type.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class MosflmRefineCellWrapper(CCP4DriverInstance.__class__): """A wrapper for Mosflm cell refinement""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ["CCP4"], "bin", "ipmosflm")) # local parameters used in autoindexing self._mosflm_autoindex_sol = 0 self._mosflm_autoindex_thresh = None self._mosflm_spot_file = None self._images = [] self._reverse_phi = False self._template = None self._directory = None self._beam_centre = None self._wavelength = None self._distance = None self._unit_cell = None self._space_group_number = None self._input_mat_file = None self._output_mat_file = None self._gain = None self._mosaic = None self._resolution = None self._fix_mosaic = False self._sdfac = None self._add_autoindex = False self._lim_x = None self._lim_y = None self._ignore_cell_refinement_failure = False self._parameters = {} self._refined_beam_centre = None self._refined_distance = None self._refined_distance2 = None self._refined_distortion_tilt = None self._refined_distortion_twist = None self._refined_mosaic = None self._refined_unit_cell = None self._raster = None self._cell_refinement_ok = False self._separation = None def set_images(self, images): self._images = list(images) def set_reverse_phi(self, reverse_phi): self._reverse_phi = reverse_phi def set_directory(self, directory): self._directory = directory def set_template(self, template): self._template = template def set_beam_centre(self, beam_centre): self._beam_centre = tuple(beam_centre) def set_wavelength(self, wavelength): self._wavelength = wavelength def set_distance(self, distance): self._distance = abs(distance) def set_unit_cell(self, unit_cell): self._unit_cell def set_space_group_number(self, space_group_number): self._space_group_number = space_group_number def set_input_mat_file(self, mat_file): self._input_mat_file = mat_file def set_output_mat_file(self, mat_file): self._output_mat_file = mat_file def set_gain(self, gain): self._gain = gain def set_mosaic(self, mosaic): self._mosaic = mosaic def set_resolution(self, resolution): self._resolution = resolution def set_fix_mosaic(self, fix_mosaic): self._fix_mosaic = fix_mosaic def set_sdfac(self, sdfac): self._sdfac = sdfac def set_limits(self, lim_x, lim_y): self._lim_x = lim_x self._lim_y = lim_y def set_add_autoindex(self, add_autoindex): self._add_autoindex = add_autoindex def set_ignore_cell_refinement_failure(self, ignore_cell_refinement_failure): self._ignore_cell_refinement_failure = ignore_cell_refinement_failure def update_parameters(self, parameters): self._parameters.update(parameters) def run(self): """Run mosflm cell refinement""" assert len(self._images) > 0 self.start() if self._gain is not None: self.input("gain %5.2f" % self._gain) if self._reverse_phi: self.input("detector reversephi") assert self._template is not None and self._directory is not None assert (self._input_mat_file is not None and self._output_mat_file is not None) assert self._mosaic is not None self.input('template "%s"' % self._template) self.input('directory "%s"' % self._directory) self.input("matrix %s" % self._input_mat_file) self.input("newmat %s" % self._output_mat_file) if self._beam_centre is not None: self.input("beam %f %f" % self._beam_centre) if self._wavelength is not None: self.input("wavelength %f" % self._wavelength) if self._distance is not None: self.input("distance %f" % self._distance) if self._space_group_number is not None: self.input("symmetry %d" % self._space_group_number) self.input("mosaic %f" % self._mosaic) if self._resolution is not None: self.input("resolution %f" % self._resolution) if self._fix_mosaic: self.input("postref fix mosaic") if self._sdfac is not None: self.input("postref sdfac %f" % self._sdfac) # belt + braces mode - only to be used when considering failover, # will run an additional step of autoindexing prior to cell # refinement, to be used only after proving that not going it # will result in cell refinement failure - will use the first # wedge... N.B. this is only useful if the indexer is Labelit # not Mosflm... if self._add_autoindex: cri = self._images[0] for j in range(cri[0], 1 + cri[1]): self.input("autoindex dps refine image %d" % j) self.input("go") if self._parameters: self.input("!parameters from autoindex run") for p, v in self._parameters.items(): self.input("%s %s" % (p, str(v))) if self._lim_x is not None and self._lim_y is not None: self.input("limits xscan %f yscan %f" % (self._lim_x, self._lim_y)) self.input("separation close") self.input("refinement residual 15.0") self.input("refinement include partials") self._reorder_cell_refinement_images() self.input("postref multi segments %d repeat 10" % len(self._images)) self.input("postref maxresidual 5.0") genfile = os.path.join(os.environ["CCP4_SCR"], "%d_mosflm.gen" % self.get_xpid()) self.input("genfile %s" % genfile) for cri in self._images: self.input("process %d %d" % cri) self.input("go") # that should be everything self.close_wait() # get the log file output = self.get_all_output() # then look to see if the cell refinement worked ok - if it # didn't then this may indicate that the lattice was wrongly # selected. self._cell_refinement_ok = False for o in output: if "Cell refinement is complete" in o: self._cell_refinement_ok = True if not self._cell_refinement_ok: if not self._ignore_cell_refinement_failure: return [0.0], [0.0] rms_values = None new_cycle_number = 0 new_rms_values = {} new_image_counter = None new_ignore_update = False parse_image = 0 background_residual = {} for i in range(len(output)): o = output[i] if "Processing will be aborted" in o: raise BadLatticeError("cell refinement failed") if "An unrecoverable error has occurred in MOSFLM" in o: raise BadLatticeError("cell refinement failed") if "Processing Image" in o: new_image_counter = int(o.split()[2]) if "As this is near to the start" in o: new_ignore_update = True if "Post-refinement will use partials" in o: if new_ignore_update: new_ignore_update = False else: new_cycle_number += 1 new_rms_values[new_cycle_number] = {} if "Final rms residual" in o: rv = float(o.replace("mm", " ").split()[3]) new_rms_values[new_cycle_number][new_image_counter] = rv if "Rms positional error (mm) as a function of" in o and True: images = [] cycles = [] rms_values = {} j = i + 1 while output[j].split(): if "Image" in output[j]: for image in map( int, output[j].replace("Image", "").split()): images.append(image) else: cycle = int(output[j].replace("Cycle", "").split()[0]) if not cycle in cycles: cycles.append(cycle) rms_values[cycle] = [] record = [ output[j][k:k + 6] for k in range(11, len(output[j]), 6) ] data = [] for r in record: if r.strip(): data.append(r.strip()) record = data try: values = map(float, record) for v in values: rms_values[cycle].append(v) except ValueError: Debug.write("Error parsing %s as floats" % output[j][12:]) j += 1 for cycle in new_rms_values.keys(): images = sorted(new_rms_values[cycle].keys()) rms_values[cycle] = [] for ii in images: rms_values[cycle].append(new_rms_values[cycle][ii]) # look for "error" type problems if ("is greater than the maximum allowed" in o and "FINAL weighted residual" in o): Debug.write("Large weighted residual... ignoring") if "INACCURATE CELL PARAMETERS" in o: # get the inaccurate cell parameters in question parameters = output[i + 3].lower().split() # and the standard deviations - so we can decide # if it really has failed sd_record = (output[i + 5].replace("A", " ").replace( ",", " ").split()) sds = map(float, [sd_record[j] for j in range(1, 12, 2)]) Debug.write("Standard deviations:") Debug.write("A %4.2f B %4.2f C %4.2f" % (tuple(sds[:3]))) Debug.write("Alpha %4.2f Beta %4.2f Gamma %4.2f" % (tuple(sds[3:6]))) Debug.write( "In cell refinement, the following cell parameters") Debug.write("have refined poorly:") for p in parameters: Debug.write("... %s" % p) Debug.write("However, will continue to integration.") if "One or more cell parameters has changed by more" in o: # this is a more severe example of the above problem... Debug.write("Cell refinement is unstable...") raise BadLatticeError("Cell refinement failed") # other possible problems in the cell refinement - a # negative mosaic spread, for instance if "Refined mosaic spread (excluding safety factor)" in o: mosaic = float(o.split()[-1]) if mosaic < 0.00: Debug.write("Negative mosaic spread (%5.2f)" % mosaic) raise NegativeMosaicError("refinement failed") parse_cycle = 1 parse_image = 0 background_residual = {} for i, o in enumerate(output): if "Processing Image" in o: parse_image = int(o.split()[2]) if "Repeating the entire run" in o: parse_cycle += 1 if "Background residual" in o: res = float(o.replace("residual=", "").split()[8]) if not parse_cycle in background_residual: background_residual[parse_cycle] = {} background_residual[parse_cycle][parse_image] = res if "Cell refinement is complete" in o: j = i + 2 refined_cell = map(float, output[j].split()[2:]) error = map(float, output[j + 1].split()[1:]) names = ["A", "B", "C", "Alpha", "Beta", "Gamma"] Debug.write("Errors in cell parameters (relative %)") for j in range(6): Debug.write("%s\t%7.3f %5.3f %.3f" % ( names[j], refined_cell[j], error[j], 100.0 * error[j] / refined_cell[j], )) if "Refined cell" in o: refined_cell = tuple(map(float, o.split()[-6:])) self._refined_unit_cell = refined_cell # FIXME with these are they really on one line? if "Detector distance as a" in o: j = i + 1 while output[j].strip() != "": j += 1 distances = map(float, output[j - 1].split()[2:]) distance = 0.0 for d in distances: distance += d distance /= len(distances) # XXX FIXME not sure why there are two separate distances extracted # from the log file, and which one is the "correct" one self._refined_distance2 = distance if "YSCALE as a function" in o: j = i + 1 while output[j].strip() != "": j += 1 yscales = map(float, output[j - 1].split()[2:]) yscale = 0.0 for y in yscales: yscale += y yscale /= len(yscales) self._refined_distortion_yscale = yscale if "Final optimised raster parameters:" in o: self._raster = o.split(":")[1].strip() if "Separation parameters updated to" in o: tokens = o.replace("mm", " ").split() self._separation = (tokens[4], tokens[8]) if "XCEN YCEN XTOFRA" in o: numbers = output[i + 1].split() self._refined_beam_centre = (numbers[0], numbers[1]) self._refined_distance = float( numbers[3]) # XXX duplicate of above? self._refined_distortion_tilt = numbers[5] self._refined_distortion_twist = numbers[6] if "Refined mosaic spread" in o: self._refined_mosaic = float(o.split()[-1]) self._rms_values = rms_values self._background_residual = background_residual return rms_values, background_residual def _reorder_cell_refinement_images(self): if not self._images: raise RuntimeError("no cell refinement images to reorder") hashmap = {} for m in self._images: hashmap[m[0]] = m[1] keys = sorted(hashmap.keys()) cell_ref_images = [(k, hashmap[k]) for k in keys] self._images = cell_ref_images def get_rms_values(self): return self._rms_values def get_background_residual(self): return self._background_residual def get_refined_unit_cell(self): return self._refined_unit_cell def get_refined_beam_centre(self): return self._refined_beam_centre def get_refined_distance(self): return self._refined_distance def get_refined_distance2(self): return self._refined_distance2 def get_raster(self): return self._raster def get_separation(self): return self._separation def get_refined_distortion_yscale(self): return self._refined_distortion_yscale def get_refined_distortion_tilt(self): return self._refined_distortion_tilt def get_refined_distortion_twist(self): return self._refined_distortion_twist def get_refined_mosaic(self): return self._refined_mosaic def cell_refinement_ok(self): return self._cell_refinement_ok return MosflmRefineCellWrapper()
def Cad(DriverType=None): '''A factory for CadWrapper classes.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class CadWrapper(CCP4DriverInstance.__class__): '''A wrapper for Cad, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable(os.path.join(os.environ.get('CBIN', ''), 'cad')) self._hklin_files = [] self._new_cell_parameters = None self._new_column_suffix = None self._pname = None self._xname = None self._dname = None # stuff to specifically copy in the freer column... self._freein = None self._freein_column = 'FreeR_flag' def add_hklin(self, hklin): '''Add a reflection file to the list to be sorted together.''' self._hklin_files.append(hklin) def set_freein(self, freein): # I guess I should check in here that this file actually # exists... - also that it has a sensible FreeR column... if not os.path.exists(freein): raise RuntimeError('reflection file does not exist: %s' % \ freein) cname = FindFreeFlag(freein) Debug.write('FreeR_flag column identified as %s' % cname) self._freein = freein self._freein_column = cname return def set_project_info(self, pname, xname, dname): self._pname = pname self._xname = xname self._dname = dname return def set_new_suffix(self, suffix): '''Set a column suffix for this dataset.''' self._new_column_suffix = suffix return def set_new_cell(self, cell): '''Set a new unit cell for this dataset.''' self._new_cell_parameters = cell return def merge(self): '''Merge multiple reflection files into one file.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') self.check_hklout() hklin_counter = 0 # for each reflection file, need to gather the column names # and so on, to put in the cad input here - also check to see # if the column names clash... check also that the spacegroups # match up... spacegroup = None column_names = [] column_names_by_file = {} for hklin in self._hklin_files: md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() columns = md.get_columns() spag = md.get_spacegroup() if spacegroup is None: spacegroup = spag if spag != spacegroup: raise RuntimeError('spacegroups do not match') column_names_by_file[hklin] = [] for c in columns: name = c[0] if name in ['H', 'K', 'L']: continue if name in column_names: raise RuntimeError('duplicate column names') column_names.append(name) column_names_by_file[hklin].append(name) # if we get to here then this is a good set up... # create the command line hklin_counter = 0 for hklin in self._hklin_files: hklin_counter += 1 self.add_command_line('hklin%d' % hklin_counter) self.add_command_line(hklin) self.start() hklin_counter = 0 for hklin in self._hklin_files: column_counter = 0 hklin_counter += 1 labin_command = 'labin file_number %d' % hklin_counter for column in column_names_by_file[hklin]: column_counter += 1 labin_command += ' E%d=%s' % (column_counter, column) self.input(labin_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def update(self): '''Update the information for one reflection file.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') if len(self._hklin_files) > 1: raise RuntimeError('can have only one hklin to update') hklin = self._hklin_files[0] self.check_hklout() column_names_by_file = {} dataset_names_by_file = {} md = Mtzdump() md.set_hklin(hklin) md.dump() columns = md.get_columns() column_names_by_file[hklin] = [] dataset_names_by_file[hklin] = md.get_datasets() # get a dataset ID - see FIXME 03/NOV/06 below... dataset_ids = [md.get_dataset_info(d)['id'] for \ d in md.get_datasets()] for c in columns: name = c[0] if name in ['H', 'K', 'L']: continue column_names_by_file[hklin].append(name) self.add_command_line('hklin1') self.add_command_line(hklin) self.start() dataset_id = dataset_ids[0] if self._pname and self._xname and self._dname: self.input('drename file_number 1 %d %s %s' % \ (dataset_id, self._xname, self._dname)) self.input('dpname file_number 1 %d %s' % \ (dataset_id, self._pname)) column_counter = 0 labin_command = 'labin file_number 1' for column in column_names_by_file[hklin]: column_counter += 1 labin_command += ' E%d=%s' % (column_counter, column) self.input(labin_command) # FIXME perhaps - ASSERT that we want only the information from # the first dataset here... pname, xname, dname = dataset_names_by_file[hklin][0].split('/') dataset_id = dataset_ids[0] # FIXME 03/NOV/06 this needs to id the dataset by it's number # not by pname/xname/dname, as the latter get's confused if the # xname is a number... if self._new_cell_parameters: a, b, c, alpha, beta, gamma = self._new_cell_parameters self.input('dcell file_number 1 %d %f %f %f %f %f %f' % \ (dataset_id, a, b, c, alpha, beta, gamma)) if self._new_column_suffix: suffix = self._new_column_suffix column_counter = 0 labout_command = 'labout file_number 1' for column in column_names_by_file[hklin]: column_counter += 1 labout_command += ' E%d=%s_%s' % \ (column_counter, column, suffix) self.input(labout_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def copyfree(self): '''Copy the free column from freein into hklin -> hklout.''' if not self._hklin_files: raise RuntimeError('no hklin files defined') if len(self._hklin_files) > 1: raise RuntimeError('can have only one hklin to update') hklin = self._hklin_files[0] # get the resolution limit to give as a limit for the FreeR # column md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() resolution_range = md.get_resolution_range() self.check_hklout() if self._freein is None: raise RuntimeError('freein not defined') if self._freein_column is None: raise RuntimeError('freein column not defined') self.add_command_line('hklin1') self.add_command_line(self._freein) self.add_command_line('hklin2') self.add_command_line(hklin) self.start() self.input('labin file_number 1 E1=%s' % self._freein_column) self.input('resolution file_number 1 %f %f' % resolution_range) self.input('labin file_number 2 all') self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() return CadWrapper()
def Reindex(DriverType=None): """A new factory for ReindexWrapper classes, which will actually use pointless.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class ReindexWrapper(CCP4DriverInstance.__class__): """A wrapper for Reindex, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "pointless")) # reindex specific things self._spacegroup = None # this should be of the form e.g. k, l, h self._operator = None # results self._cell = None def set_spacegroup(self, spacegroup): """Set the spacegroup to reindex the reflections to.""" self._spacegroup = spacegroup def set_operator(self, operator): """Set the reindexing operator for mapping from in to out.""" # pointless doesn't like reindex operators with '*' if operator is not None: operator = operator.replace("*", "") self._operator = operator def get_cell(self): return self._cell def check_reindex_errors(self): """Check the standard output for standard reindex errors.""" pass def reindex_old(self): self.set_executable( os.path.join(os.environ.get("CBIN", ""), "reindex")) self.check_hklin() self.check_hklout() if not self._spacegroup and not self._operator: raise RuntimeError("reindex requires spacegroup or operator") self.start() # look up the space group number to cope with complex symbols # that old fashioned CCP4 reindex does not understand... from cctbx.sgtbx import space_group, space_group_symbols sg_t = space_group(space_group_symbols(str( self._spacegroup))).type() if self._operator: self.input("reindex %s" % str(self._operator)) if self._spacegroup: self.input("symmetry %d" % sg_t.number()) self.close_wait() # check for errors try: self.check_for_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() for j, o in enumerate(output): if "Cell Dimensions : (obsolete" in o: self._cell = map(float, output[j + 2].split()) return "OK" def cctbx_reindex(self): from xia2.Modules.MtzUtils import reindex reindex(self._hklin, self._hklout, self._operator, space_group=self._spacegroup) return "OK" def reindex(self): """Actually perform the reindexing.""" if PhilIndex.params.ccp4.reindex.program == "reindex": return self.reindex_old() elif PhilIndex.params.ccp4.reindex.program == "cctbx": return self.cctbx_reindex() self.check_hklin() self.check_hklout() if not self._spacegroup and not self._operator: raise RuntimeError("reindex requires spacegroup or operator") if self._operator: self._operator = self._operator.replace("[", "").replace("]", "") Debug.write("Reindex... %s %s" % (self._spacegroup, self._operator)) self.start() if self._spacegroup: if isinstance(self._spacegroup, type(0)): spacegroup = Syminfo.spacegroup_number_to_name( self._spacegroup) elif self._spacegroup[0] in "0123456789": spacegroup = Syminfo.spacegroup_number_to_name( int(self._spacegroup)) else: spacegroup = self._spacegroup self.input("spacegroup '%s'" % spacegroup) if self._operator: # likewise self.input("reindex '%s'" % self._operator) else: self.input("reindex 'h,k,l'") self.close_wait() # check for errors try: self.check_for_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() for j, o in enumerate(output): if "Cell Dimensions : (obsolete" in o: self._cell = map(float, output[j + 2].split()) elif "ReindexOp: syntax error in operator" in o: raise RuntimeError(o) return "OK" return ReindexWrapper()
def Mtz2various(DriverType=None): '''A factory for Mtz2variousWrapper classes.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class Mtz2variousWrapper(CCP4DriverInstance.__class__): '''A wrapper for Mtz2various, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'mtz2various')) # this will allow extraction of specific intensities # from a multi-set reflection file self._dataset_suffix = '' def set_suffix(self, suffix): if suffix: self._dataset_suffix = '_%s' % suffix else: self._dataset_suffix = suffix def convert(self): '''Convert the input reflection file to .sca format.''' self.check_hklin() self.check_hklout() self.start() labin = 'I(+)=I(+)%s SIGI(+)=SIGI(+)%s ' % \ (self._dataset_suffix, self._dataset_suffix) labin += 'I(-)=I(-)%s SIGI(-)=SIGI(-)%s' % \ (self._dataset_suffix, self._dataset_suffix) self.input('output scal') self.input('labin %s' % labin) self.close_wait() output = self.get_all_output() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass def convert_shelx(self, unmerged=False): '''Convert the input reflection file to SHELX hklf4 format.''' self.check_hklin() self.check_hklout() self.start() if self._dataset_suffix or unmerged: labin = 'I=I%s SIGI=SIGI%s' % \ (self._dataset_suffix, self._dataset_suffix) else: labin = 'I=IMEAN SIGI=SIGIMEAN' self.input('output shelx') self.input('labin %s' % labin) self.close_wait() output = self.get_all_output() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass return Mtz2variousWrapper()
def MosflmIndex(DriverType=None, indxr_print=True): '''Factory for MosflmIndex wrapper classes, with the specified Driver type.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class MosflmIndexWrapper(CCP4DriverInstance.__class__): '''A wrapper for Mosflm indexing - which will provide functionality for deciding the beam centre and indexing the diffraction pattern.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ['CCP4'], 'bin', 'ipmosflm')) # local parameters used in autoindexing self._mosflm_autoindex_sol = 0 self._mosflm_autoindex_thresh = None self._mosflm_spot_file = None self._images = [] self._reverse_phi = False self._template = None self._directory = None self._beam_centre = None self._wavelength = None self._distance = None self._unit_cell = None self._space_group_number = None self._solution_number = 0 self._threshold = 20.0 self._solutions = {} def set_images(self, images): self._images = list(images) def set_reverse_phi(self, reverse_phi): self._reverse_phi = reverse_phi def set_directory(self, directory): self._directory = directory def set_template(self, template): self._template = template def set_beam_centre(self, beam_centre): self._beam_centre = beam_centre def set_wavelength(self, wavelength): self._wavelength = wavelength def set_distance(self, distance): self._distance = distance def set_unit_cell(self, unit_cell): self._unit_cell def set_space_group_number(self, space_group_number): self._space_group_number = space_group_number def set_threshold(self, threshold): self._threshold = threshold def set_solution_number(self, solution_number): self._solution_number = solution_number def run(self): '''Run mosflm indexing''' assert len(self._images) > 0 self._images.sort() self.start() if self._reverse_phi: self.input('detector reversephi') assert self._template is not None and self._directory is not None self.input('template "%s"' % self._template) self.input('directory "%s"' % self._directory) self.input('newmat xiaindex.mat') if self._beam_centre is not None: self.input('beam %f %f' % tuple(self._beam_centre)) if self._wavelength is not None: self.input('wavelength %f' % self._wavelength) if self._distance is not None: self.input('distance %f' % abs(self._distance)) if self._unit_cell is not None: self.input('cell %f %f %f %f %f %f' % self._unit_cell) if self._space_group_number is not None: self.input('symmetry %d' % self._space_group_number) for i in self._images: if self._solution_number > 0: self.input( 'autoindex dps refine image %d thresh %d solu %d' % \ (i, self._threshold, self._solution_number)) else: self.input( 'autoindex dps refine image %d thresh %d' % \ (i, self._threshold)) for i in self._images: self.input('mosaic estimate %d' % i) self.input('go') self.close_wait() #sweep = self.get_indexer_sweep_name() #FileHandler.record_log_file( #'%s INDEX' % (sweep), self.get_log_file()) # check for errors self.check_for_errors() # ok now we're done, let's look through for some useful stuff output = self.get_all_output() self._solutions = _parse_mosflm_index_output(output) self._refined_cell = None self._refined_beam_centre = None self._lattice = None self._mosaic_spreads = [] self._refined_detector_distance = None for o in output: if 'Final cell (after refinement)' in o: self._refined_cell = tuple(map(float, o.split()[-6:])) if 'Beam coordinates of' in o: self._refined_beam_centre = tuple( map(float, o.split()[-2:])) # FIXED this may not be there if this is a repeat indexing! if 'Symmetry:' in o: self._lattice = o.split(':')[1].split()[0] # so we have to resort to this instead... if 'Refining solution #' in o: from cctbx.sgtbx.bravais_types import bravais_lattice self._indexed_space_group_number = int( o.split(')')[0].split()[-1]) self._lattice = str( bravais_lattice( number=self._indexed_space_group_number)) if 'The mosaicity has been estimated' in o: ms = float(o.split('>')[1].split()[0]) self._mosaic_spreads.append(ms) if 'The mosaicity estimation has not worked for some' in o: # this is a problem... in particular with the # mosflm built on linux in CCP4 6.0.1... # FIXME this should be a specific kind of # exception e.g. an IndexError raise IndexingError('mosaicity estimation failed') # mosflm doesn't refine this in autoindexing... if 'Crystal to detector distance of' in o: d = float(o.split()[5].replace('mm', '')) if self._distance is None or self._distance >= 0: self._refined_detector_distance = d else: self._refined_detector_distance = -d # but it does complain if it is different to the header # value - so just use the input value in this case... if 'Input crystal to detector distance' in o \ and 'does NOT agree with' in o: self._refined_detector_distance = self._distance if 'parameters have been set to' in o: self._raster = map(int, o.split()[-5:]) if '(currently SEPARATION' in o: self._separation = map(float, o.replace(')', '').split()[-2:]) # get the resolution estimate out... if '99% have resolution' in o: self._resolution_estimate = float(o.split()[-2]) def get_solutions(self): return self._solutions def get_refined_unit_cell(self): return self._refined_cell def get_refined_beam_centre(self): return self._refined_beam_centre def get_lattice(self): return self._lattice def get_indexed_space_group_number(self): return self._indexed_space_group_number def get_mosaic_spreads(self): return self._mosaic_spreads def get_refined_distance(self): return self._refined_detector_distance def get_raster(self): return self._raster def get_separation(self): return self._separation def get_resolution_estimate(self): return self._resolution_estimate return MosflmIndexWrapper()
def Sortmtz(DriverType=None): """A factory for SortmtzWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class SortmtzWrapper(CCP4DriverInstance.__class__): """A wrapper for Sortmtz, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "sortmtz")) self._sort_order = "H K L M/ISYM BATCH" self._hklin_files = [] def add_hklin(self, hklin): """Add a reflection file to the list to be sorted together.""" self._hklin_files.append(hklin) def check_sortmtz_errors(self): """Check the output for "standard" errors.""" lwbat_warning = "" for l in self.get_all_output(): if "From ccp4_lwbat: warning:" in l: lwbat_warning = l.split("warning:")[1].strip() if "error in ccp4_lwbat" in l: raise RuntimeError(lwbat_warning) if "Sorting failed" in l: raise RuntimeError("sorting failed") if "Inconsistent operator orders in input file" in l: raise RuntimeError("different sort orders") def sort(self, vrset=None): """Actually sort the reflections.""" if len(self._hklin_files) == 1: self.set_hklin(self._hklin_files[0]) self._hklin_files = [] if not self._hklin_files: self.check_hklin() self.check_hklout() if self._hklin_files: self.set_task("Sorting reflections %s => %s" % ( " ".join(self._hklin_files), os.path.split(self.get_hklout())[-1], )) else: self.set_task("Sorting reflections %s => %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], )) self.start() # allow for the fact that large negative reflections may # result from XDS output... if vrset: self.input("VRSET_MAGIC %f" % vrset) self.input(self._sort_order) if self._hklin_files: for m in self._hklin_files: self.input('"%s"' % m) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() if "Error" in self.get_ccp4_status(): raise RuntimeError("[SORTMTZ] %s" % status) self.check_sortmtz_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() return SortmtzWrapper()
def Scaleit(DriverType=None): """A factory for ScaleitWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class ScaleitWrapper(CCP4DriverInstance.__class__): """A wrapper for Scaleit, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "scaleit")) self._columns = [] self._statistics = {} self._anomalous = False def set_anomalous(self, anomalous): self._anomalous = anomalous def find_columns(self): """Identify columns to use with scaleit.""" # run mtzdump to get a list of columns out and also check that # this is a valid merged mtz file.... self.check_hklin() md = Mtzdump() md.set_hklin(self.get_hklin()) md.dump() # get information to check that this is merged # next get the column information - check that F columns are # present column_info = md.get_columns() columns = [] j = 0 groups = 0 # assert that the columns for F, SIGF, DANO, SIGDANO for a # particular group will appear in that order if anomalous, # F, SIGF if not anomalous while j < len(column_info): c = column_info[j] name = c[0] typ = c[1] if typ == "F" and name.split( "_")[0] == "F" and self._anomalous: groups += 1 for i in range(4): columns.append(column_info[i + j][0]) j += 4 elif typ == "F" and name.split( "_")[0] == "F" and not self._anomalous: groups += 1 for i in range(2): columns.append(column_info[i + j][0]) j += 2 else: j += 1 # ok that should be all of the groups identified self._columns = columns return columns def check_scaleit_errors(self): for record in self.get_all_output(): if "SCALEIT: ** No reflections **" in record: raise RuntimeError("no reflections") def scaleit(self): """Run scaleit and get some interesting facts out.""" self.check_hklin() # need to have a HKLOUT even if we do not want the # reflections... self.check_hklout() if not self._columns: self.find_columns() self.start() self.input("nowt") self.input("converge ncyc 4") self.input("converge abs 0.001") self.input("converge tolr -7") self.input("refine anisotropic wilson") self.input("auto") labin = "labin FP=%s SIGFP=%s" % (self._columns[0], self._columns[1]) if self._anomalous: groups = len(self._columns) // 4 else: groups = len(self._columns) // 2 for j in range(groups): if self._anomalous: labin += " FPH%d=%s" % (j + 1, self._columns[4 * j]) labin += " SIGFPH%d=%s" % (j + 1, self._columns[4 * j + 1]) labin += " DPH%d=%s" % (j + 1, self._columns[4 * j + 2]) labin += " SIGDPH%d=%s" % (j + 1, self._columns[4 * j + 3]) else: labin += " FPH%d=%s" % (j + 1, self._columns[2 * j]) labin += " SIGFPH%d=%s" % (j + 1, self._columns[2 * j + 1]) self.input(labin) self.close_wait() # check for errors try: self.check_for_errors() self.check_ccp4_errors() self.check_scaleit_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() # generate mapping from derivative number to data set self._statistics["mapping"] = {} for j in range(groups): if self._anomalous: self._statistics["mapping"][j + 1] = self._columns[4 * j].replace( "F_", "") else: self._statistics["mapping"][j + 1] = self._columns[2 * j].replace( "F_", "") # now get some interesting information out... j = 0 r_values = [] while j < len(output): line = output[j] if "APPLICATION OF SCALES AND ANALYSIS OF DIFFERENCES" in line: current_derivative = -1 while not "SUMMARY_END" in line: lst = line.split() if "Derivative" in lst: if "b_factor" not in self._statistics: self._statistics["b_factor"] = {} self._statistics["b_factor"][int(lst[1])] = { "scale": float(lst[2]), "b": float(lst[3]), "dname": self._statistics["mapping"][int(lst[1])], } current_derivative = int(lst[1]) if "The equivalent isotropic" in line: self._statistics["b_factor"][current_derivative][ "b"] = float(lst[-1]) j += 1 line = output[j] if "acceptable differences are less than" in line and groups == 1: max_difference = float(line.split()[-1]) if max_difference > 0.01: self._statistics["max_difference"] = max_difference if "THE TOTALS" in line: r_values.append(float(line.split()[6])) j += 1 # transform back the r values to the statistics for j in range(len(r_values)): d = j + 1 self._statistics["b_factor"][d]["r"] = r_values[j] return def get_statistics(self): """Get the statistics from the Scaleit run.""" return self._statistics return ScaleitWrapper()
def MosflmIntegrate(DriverType=None, indxr_print=True): '''Factory for MosflmIntegrate wrapper classes, with the specified Driver type.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class MosflmIntegrateWrapper(CCP4DriverInstance.__class__): '''A wrapper for Mosflm indexing - which will provide functionality for deciding the beam centre and indexing the diffraction pattern.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ['CCP4'], 'bin', 'ipmosflm')) # local parameters used in autoindexing self._mosflm_autoindex_sol = 0 self._mosflm_autoindex_thresh = None self._mosflm_spot_file = None self._mosflm_hklout = None self._images = [] self._reverse_phi = False self._template = None self._directory = None self._beam_centre = None self._wavelength = None self._distance = None self._unit_cell = None self._space_group_number = None self._refine_profiles = True self._threshold = 20.0 self._pname = None self._xname = None self._dname = None self._exclude_ice = False self._exclude_regions = None self._instructions = [] self._input_mat_file = None self._output_mat_file = None self._mosaic = None self._gain = None self._d_min = None self._d_max = None self._mask = None self._lim_x = None self._lim_y = None self._fix_mosaic = False self._pre_refinement = False self._parameters = {} self._bgsig_too_large = False self._getprof_error = False self._batches_out = None self._mosaic_spreads = None self._spot_status = None self._residuals = None self._postref_result = {} self._nref = None self._detector_gain_error = False self._suggested_gain = None def set_image_range(self, image_range): self._image_range = image_range def set_reverse_phi(self, reverse_phi): self._reverse_phi = reverse_phi def set_directory(self, directory): self._directory = directory def set_template(self, template): self._template = template def set_beam_centre(self, beam_centre): self._beam_centre = beam_centre def set_wavelength(self, wavelength): self._wavelength = wavelength def set_distance(self, distance): self._distance = abs(distance) def set_unit_cell(self, unit_cell): self._unit_cell = unit_cell def set_space_group_number(self, space_group_number): self._space_group_number = space_group_number def set_threshold(self, threshold): self._threshold = threshold def set_refine_profiles(self, refine_profiles): self._refine_profiles = refine_profiles def set_exclude_ice(self, exclude_ice): self._exclude_ice = exclude_ice def set_exclude_regions(self, exclude_regions): self._exclude_regions = exclude_regions def add_instruction(self, instruction): self._instructions.append(instruction) def set_pname_xname_dname(self, pname, xname, dname): self._pname = pname self._xname = xname self._dname = dname def set_input_mat_file(self, mat_file): self._input_mat_file = mat_file def set_output_mat_file(self, mat_file): self._output_mat_file = mat_file def set_mosaic(self, mosaic): self._mosaic = mosaic def set_gain(self, gain): self._gain = gain def set_d_min(self, d_min): self._d_min = d_min def set_d_max(self, d_max): self._d_max = d_max def set_mask(self, mask): self._mask = mask def set_limits(self, lim_x, lim_y): self._lim_x = lim_x self._lim_y = lim_y def set_fix_mosaic(self, fix_mosaic): self._fix_mosaic = fix_mosaic def set_pre_refinement(self, pre_refinement): self._pre_refinement = pre_refinement def update_parameters(self, parameters): self._parameters.update(parameters) def get_per_image_statistics(self): return self._per_image_statistics def run(self): '''Run mosflm integration''' assert self._space_group_number is not None summary_file = 'summary_%s.log' % self._space_group_number self.add_command_line('SUMMARY') self.add_command_line(summary_file) self.start() if not self._refine_profiles: self.input('profile nooptimise') if [self._pname, self._xname, self._dname].count(None) == 0: self.input('harvest on') self.input('pname %s' % self._pname) self.input('xname %s' % self._xname) self.input('dname %s' % self._dname) if self._reverse_phi: self.input('detector reversephi') assert self._template is not None and self._directory is not None self.input('template "%s"' % self._template) self.input('directory "%s"' % self._directory) if self._exclude_ice: for record in open( os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'Data', 'ice-rings.dat'))).readlines(): resol = tuple(map(float, record.split()[:2])) self.input('resolution exclude %.2f %.2f' % (resol)) if self._exclude_regions is not None: for upper, lower in self._exclude_regions: self.input('resolution exclude %.2f %.2f' % (upper, lower)) for instruction in self._instructions: self.input(instruction) self.input('matrix %s' % self._input_mat_file) assert self._beam_centre is not None assert self._distance is not None assert self._mosaic is not None self.input('beam %f %f' % tuple(self._beam_centre)) self.input('distance %f' % self._distance) self.input('mosaic %f' % self._mosaic) if self._unit_cell is not None: self.input('cell %f %f %f %f %f %f' % self._unit_cell) self.input('refinement include partials') if self._wavelength is not None: self.input('wavelength %f' % self._wavelength) if self._parameters: for p, v in self._parameters.items(): self.input('%s %s' % (p, str(v))) self.input('symmetry %d' % self._space_group_number) if self._gain is not None: self.input('gain %5.2f' % self._gain) # check for resolution limits if self._d_min is not None: if self._d_max is not None: self.input('resolution %f %f' % (self._d_min, self._d_max)) else: self.input('resolution %f' % self._d_min) if self._mask is not None: record = 'limits quad' for m in self._mask: record += ' %.1f %.1f' % m self.input(record) # set up the integration self.input('postref fix all') self.input('postref maxresidual 5.0') if self._lim_x is not None and self._lim_y is not None: self.input('limits xscan %f yscan %f' % (self._lim_x, self._lim_y)) if self._fix_mosaic: self.input('postref fix mosaic') #self.input('separation close') ## XXX FIXME this is a horrible hack - I at least need to ## sand box this ... #if self.get_header_item('detector') == 'raxis': #self.input('adcoffset 0') genfile = os.path.join(os.environ['CCP4_SCR'], '%d_mosflm.gen' % self.get_xpid()) self.input('genfile %s' % genfile) # add an extra chunk of orientation refinement # XXX FIXME if self._pre_refinement: a, b = self._image_range if b - a > 3: b = a + 3 self.input('postref multi segments 1') self.input('process %d %d' % (a, b)) self.input('go') self.input('postref nosegment') if self._fix_mosaic: self.input('postref fix mosaic') self.input('separation close') self.input('process %d %d' % (self._image_range[0], self._image_range[1])) self.input('go') # that should be everything self.close_wait() # get the log file output = self.get_all_output() integrated_images_first = 1.0e6 integrated_images_last = -1.0e6 # look for major errors for i in range(len(output)): o = output[i] if 'LWBAT: error in ccp4_lwbat' in o: raise RuntimeError('serious mosflm error - inspect %s' % \ self.get_log_file()) mosaics = [] for i in range(len(output)): o = output[i] if 'Integrating Image' in o: batch = int(o.split()[2]) if batch < integrated_images_first: integrated_images_first = batch if batch > integrated_images_last: integrated_images_last = batch if 'Smoothed value for refined mosaic' in o: mosaics.append(float(o.split()[-1])) if 'ERROR IN DETECTOR GAIN' in o: self._detector_gain_error = True # look for the correct gain for j in range(i, i + 10): if output[j].split()[:2] == ['set', 'to']: gain = float(output[j].split()[-1][:-1]) # check that this is not the input # value... Bug # 3374 if self._gain: if math.fabs(gain - self._gain) > 0.02: self._suggested_gain = gain else: self._suggested_gain = gain # FIXME if mosaic spread refines to a negative value # once the lattice has passed the triclinic postrefinement # test then fix this by setting "POSTREF FIX MOSAIC" and # restarting. if 'Smoothed value for refined mosaic spread' in o: mosaic = float(o.split()[-1]) if mosaic < 0.0: raise IntegrationError('negative mosaic spread') if 'WRITTEN OUTPUT MTZ FILE' in o: self._mosflm_hklout = os.path.join( self.get_working_directory(), output[i + 1].split()[-1]) if 'Number of Reflections' in o: self._nref = int(o.split()[-1]) # if a BGSIG error happened try not refining the # profile and running again... if 'BGSIG too large' in o: self._bgsig_too_large = True if 'An unrecoverable error has occurred in GETPROF' in o: self._getprof_error = True if 'MOSFLM HAS TERMINATED EARLY' in o: raise RuntimeError( 'integration failed: reason unknown (log %s)' % \ self.get_log_file()) if not self._mosflm_hklout: raise RuntimeError('processing abandoned') self._batches_out = (integrated_images_first, integrated_images_last) self._mosaic_spreads = mosaics self._per_image_statistics = _parse_mosflm_integration_output( output) # inspect the output for e.g. very high weighted residuals images = sorted(self._per_image_statistics.keys()) # FIXME bug 2175 this should probably look at the distribution # of values rather than the peak, since this is probably a better # diagnostic of a poor lattice. residuals = [] for i in images: if 'weighted_residual' in self._per_image_statistics[i]: residuals.append( self._per_image_statistics[i]['weighted_residual']) self._residuals = residuals try: self._postref_result = _parse_summary_file( os.path.join(self.get_working_directory(), summary_file)) except AssertionError: self._postref_result = {} return self._mosflm_hklout def get_hklout(self): return self._mosflm_hklout def get_nref(self): return self._nref def get_bgsig_too_large(self): return self._bgsig_too_large def get_getprof_error(self): return self._getprof_error def get_batches_out(self): return self._batches_out def get_mosaic_spreads(self): return self._mosaic_spreads def get_spot_status(self): return self._spot_status def get_residuals(self): return self._residuals def get_postref_result(self): return self._postref_result def get_detector_gain_error(self): return self._detector_gain_error def get_suggested_gain(self): return self._suggested_gain return MosflmIntegrateWrapper()
def Pointless(DriverType=None): """A factory for PointlessWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class PointlessWrapper(CCP4DriverInstance.__class__): """A wrapper for Pointless, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable(os.path.join(os.environ.get("CBIN", ""), "pointless")) self._input_laue_group = None self._pointgroup = None self._spacegroup = None self._reindex_matrix = None self._reindex_operator = None self._spacegroup_reindex_matrix = None self._spacegroup_reindex_operator = None self._confidence = 0.0 self._hklref = None self._xdsin = None self._probably_twinned = False self._allow_out_of_sequence_files = False # pname, xname, dname stuff for when we are copying reflections self._pname = None self._xname = None self._dname = None # space to store all possible solutions, to allow discussion of # the correct lattice with the indexer... this should be a # list containing e.g. 'tP' self._possible_lattices = [] self._lattice_to_laue = {} # all "likely" spacegroups... self._likely_spacegroups = [] # and unit cell information self._cell_info = {} self._cell = None # and scale factors to use in conversion self._scale_factor = 1.0 def set_scale_factor(self, scale_factor): self._scale_factor = scale_factor def set_hklref(self, hklref): self._hklref = hklref def set_allow_out_of_sequence_files(self, allow=True): self._allow_out_of_sequence_files = allow def get_hklref(self): return self._hklref def set_project_info(self, pname, xname, dname): self._pname = pname self._xname = xname self._dname = dname def check_hklref(self): if self._hklref is None: raise RuntimeError("hklref not defined") if not os.path.exists(self._hklref): raise RuntimeError("hklref %s does not exist" % self._hklref) def set_xdsin(self, xdsin): self._xdsin = xdsin def get_xdsin(self): return self._xdsin def check_xdsin(self): if self._xdsin is None: raise RuntimeError("xdsin not defined") if not os.path.exists(self._xdsin): raise RuntimeError("xdsin %s does not exist" % self._xdsin) def set_correct_lattice(self, lattice): """In a rerunning situation, set the correct lattice, which will assert a correct lauegroup based on the previous run of the program...""" if self._lattice_to_laue == {}: raise RuntimeError("no lattice to lauegroup mapping") if lattice not in self._lattice_to_laue: raise RuntimeError("lattice %s not possible" % lattice) self._input_laue_group = self._lattice_to_laue[lattice] def sum_mtz(self, summedlist): """Sum partials in an MTZ file from Mosflm to a text file.""" self.add_command_line("-c") self.check_hklin() self.start() self.input("output summedlist %s" % summedlist) self.close_wait() # get out the unit cell - we will need this... output = self.get_all_output() cell = None for j in range(len(output)): line = output[j] if "Space group from HKLIN file" in line: cell = tuple(map(float, output[j + 1].split()[1:])) return cell def limit_batches(self, first, last): """Replacement for rebatch, removing batches.""" self.check_hklin() self.check_hklout() self.add_command_line("-c") self.start() if first > 1: self.input("exclude batch %d to %d" % (0, first - 1)) self.input("exclude batch %d to %d" % (last + 1, 9999999)) self.close_wait() def xds_to_mtz(self): """Use pointless to convert XDS file to MTZ.""" if not self._xdsin: raise RuntimeError("XDSIN not set") self.check_hklout() # -c for copy - just convert the file to MTZ multirecord self.add_command_line("-c") self.start() if self._pname and self._xname and self._dname: self.input( "name project %s crystal %s dataset %s" % (self._pname, self._xname, self._dname) ) self.input("xdsin %s" % self._xdsin) if self._scale_factor: Debug.write("Scaling intensities by factor %e" % self._scale_factor) self.input("multiply %e" % self._scale_factor) self.close_wait() # FIXME need to check the status and so on here if self._xdsin: from xia2.Wrappers.XDS import XDS XDS.add_xds_version_to_mtz_history(self.get_hklout()) def decide_pointgroup(self, ignore_errors=False, batches=None): """Decide on the correct pointgroup for hklin.""" if not self._xdsin: self.check_hklin() self.set_task( "Computing the correct pointgroup for %s" % self.get_hklin() ) else: Debug.write("Pointless using XDS input file %s" % self._xdsin) self.set_task( "Computing the correct pointgroup for %s" % self.get_xdsin() ) # FIXME this should probably be a standard CCP4 keyword if self._xdsin: self.add_command_line("xdsin") self.add_command_line(self._xdsin) self.add_command_line("xmlout") self.add_command_line("%d_pointless.xml" % self.get_xpid()) if self._hklref: self.add_command_line("hklref") self.add_command_line(self._hklref) self.start() if self._allow_out_of_sequence_files: self.input("allow outofsequencefiles") # https://github.com/xia2/xia2/issues/125 pass in run limits for this # HKLIN file - prevents automated RUN determination from causing errors if batches: self.input("run 1 batch %d to %d" % tuple(batches)) self.input("systematicabsences off") self.input("setting symmetry-based") if self._hklref: dev = PhilIndex.params.xia2.settings.developmental if dev.pointless_tolerance > 0.0: self.input("tolerance %f" % dev.pointless_tolerance) # may expect more %age variation for small molecule data if PhilIndex.params.xia2.settings.small_molecule: if self._hklref: self.input("tolerance 5.0") if PhilIndex.params.xia2.settings.symmetry.chirality is not None: self.input( "chirality %s" % PhilIndex.params.xia2.settings.symmetry.chirality ) if self._input_laue_group: self.input("lauegroup %s" % self._input_laue_group) self.close_wait() # check for errors self.check_for_errors() # check for fatal errors output = self.get_all_output() fatal_error = False for j, record in enumerate(output): if "FATAL ERROR message:" in record: if ignore_errors: fatal_error = True else: raise RuntimeError( "Pointless error: %s" % output[j + 1].strip() ) if ( "Resolution range of Reference data and observed data do not" in record and ignore_errors ): fatal_error = True if "All reflection pairs rejected" in record and ignore_errors: fatal_error = True if ( "Reference data and observed data do not overlap" in record and ignore_errors ): fatal_error = True hklin_spacegroup = "" # split loop - first seek hklin symmetry then later look for everything # else for o in self.get_all_output(): if "Spacegroup from HKLIN file" in o: hklin_spacegroup = spacegroup_name_xHM_to_old( o.replace("Spacegroup from HKLIN file :", "").strip() ) if "Space group from HKLREF file" in o: hklref_spacegroup = spacegroup_name_xHM_to_old( o.replace("Space group from HKLREF file :", "").strip() ) # https://github.com/xia2/xia2/issues/115 if fatal_error: assert hklref_spacegroup self._pointgroup = hklref_spacegroup self._confidence = 1.0 self._totalprob = 1.0 self._reindex_matrix = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] self._reindex_operator = "h,k,l" return "ok" for o in self.get_all_output(): if "No alternative indexing possible" in o: # then the XML file will be broken - no worries... self._pointgroup = hklin_spacegroup self._confidence = 1.0 self._totalprob = 1.0 self._reindex_matrix = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] self._reindex_operator = "h,k,l" return "ok" if "**** Incompatible symmetries ****" in o: raise RuntimeError( "reindexing against a reference with different symmetry" ) if "***** Stopping because cell discrepancy between files" in o: raise RuntimeError("incompatible unit cells between data sets") if "L-test suggests that the data may be twinned" in o: self._probably_twinned = True # parse the XML file for the information I need... xml_file = os.path.join( self.get_working_directory(), "%d_pointless.xml" % self.get_xpid() ) mend_pointless_xml(xml_file) # catch the case sometimes on ppc mac where pointless adds # an extra .xml on the end... if not os.path.exists(xml_file) and os.path.exists("%s.xml" % xml_file): xml_file = "%s.xml" % xml_file if not self._hklref: dom = xml.dom.minidom.parse(xml_file) try: best = dom.getElementsByTagName("BestSolution")[0] except IndexError: raise RuntimeError("error getting solution from pointless") self._pointgroup = ( best.getElementsByTagName("GroupName")[0].childNodes[0].data ) self._confidence = float( best.getElementsByTagName("Confidence")[0].childNodes[0].data ) self._totalprob = float( best.getElementsByTagName("TotalProb")[0].childNodes[0].data ) self._reindex_matrix = map( float, best.getElementsByTagName("ReindexMatrix")[0] .childNodes[0] .data.split(), ) self._reindex_operator = clean_reindex_operator( best.getElementsByTagName("ReindexOperator")[0] .childNodes[0] .data.strip() ) else: # if we have provided a HKLREF input then the xml output # is changed... # FIXME in here, need to check if there is the legend # "No possible alternative indexing" in the standard # output, as this will mean that the index scores are # not there... c/f oppf1314, with latest pointless build # 1.2.14. dom = xml.dom.minidom.parse(xml_file) try: best = dom.getElementsByTagName("IndexScores")[0] except IndexError: Debug.write("Reindex not found in xml output") # check for this legend then found = False for record in self.get_all_output(): if "No possible alternative indexing" in record: found = True if not found: raise RuntimeError("error finding solution") best = None hklref_pointgroup = "" # FIXME need to get this from the reflection file HKLREF reflection_file_elements = dom.getElementsByTagName("ReflectionFile") for rf in reflection_file_elements: stream = rf.getAttribute("stream") if stream == "HKLREF": hklref_pointgroup = ( rf.getElementsByTagName("SpacegroupName")[0] .childNodes[0] .data.strip() ) # Chatter.write('HKLREF pointgroup is %s' % \ # hklref_pointgroup) if hklref_pointgroup == "": raise RuntimeError("error finding HKLREF pointgroup") self._pointgroup = hklref_pointgroup self._confidence = 1.0 self._totalprob = 1.0 if best: index = best.getElementsByTagName("Index")[0] self._reindex_matrix = map( float, index.getElementsByTagName("ReindexMatrix")[0] .childNodes[0] .data.split(), ) self._reindex_operator = clean_reindex_operator( index.getElementsByTagName("ReindexOperator")[0] .childNodes[0] .data.strip() ) else: # no alternative indexing is possible so just # assume the default... self._reindex_matrix = [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0] self._reindex_operator = "h,k,l" if not self._input_laue_group and not self._hklref: scorelist = dom.getElementsByTagName("LaueGroupScoreList")[0] scores = scorelist.getElementsByTagName("LaueGroupScore") for s in scores: lauegroup = ( s.getElementsByTagName("LaueGroupName")[0].childNodes[0].data ) netzc = float( s.getElementsByTagName("NetZCC")[0].childNodes[0].data ) # record this as a possible lattice if its Z score is positive lattice = lauegroup_to_lattice(lauegroup) if not lattice in self._possible_lattices: if netzc > 0.0: self._possible_lattices.append(lattice) # do we not always want to have access to the # solutions, even if they are unlikely - this will # only be invoked if they are known to # be right... self._lattice_to_laue[lattice] = lauegroup return "ok" def decide_spacegroup(self): """Given data indexed in the correct pointgroup, have a guess at the spacegroup.""" if not self._xdsin: self.check_hklin() self.set_task( "Computing the correct spacegroup for %s" % self.get_hklin() ) else: Debug.write("Pointless using XDS input file %s" % self._xdsin) self.set_task( "Computing the correct spacegroup for %s" % self.get_xdsin() ) # FIXME this should probably be a standard CCP4 keyword if self._xdsin: self.add_command_line("xdsin") self.add_command_line(self._xdsin) self.add_command_line("xmlout") self.add_command_line("%d_pointless.xml" % self.get_xpid()) self.add_command_line("hklout") self.add_command_line("pointless.mtz") self.start() self.input("lauegroup hklin") self.input("setting symmetry-based") if PhilIndex.params.xia2.settings.symmetry.chirality is not None: self.input( "chirality %s" % PhilIndex.params.xia2.settings.symmetry.chirality ) self.close_wait() # check for errors self.check_for_errors() xml_file = os.path.join( self.get_working_directory(), "%d_pointless.xml" % self.get_xpid() ) mend_pointless_xml(xml_file) if not os.path.exists(xml_file) and os.path.exists("%s.xml" % xml_file): xml_file = "%s.xml" % xml_file dom = xml.dom.minidom.parse(xml_file) sg_list = dom.getElementsByTagName("SpacegroupList")[0] sg_node = sg_list.getElementsByTagName("Spacegroup")[0] best_prob = float( sg_node.getElementsByTagName("TotalProb")[0].childNodes[0].data.strip() ) # FIXME 21/NOV/06 in here record a list of valid spacegroups # (that is, those which are as likely as the most likely) # for later use... self._spacegroup = ( sg_node.getElementsByTagName("SpacegroupName")[0] .childNodes[0] .data.strip() ) self._spacegroup_reindex_operator = ( sg_node.getElementsByTagName("ReindexOperator")[0] .childNodes[0] .data.strip() ) self._spacegroup_reindex_matrix = tuple( map( float, sg_node.getElementsByTagName("ReindexMatrix")[0] .childNodes[0] .data.split(), ) ) # get a list of "equally likely" spacegroups for node in sg_list.getElementsByTagName("Spacegroup"): prob = float( node.getElementsByTagName("TotalProb")[0].childNodes[0].data.strip() ) name = ( node.getElementsByTagName("SpacegroupName")[0] .childNodes[0] .data.strip() ) if math.fabs(prob - best_prob) < 0.01: # this is jolly likely! self._likely_spacegroups.append(name) # now parse the output looking for the unit cell information - # this should look familiar from mtzdump output = self.get_all_output() length = len(output) a = 0.0 b = 0.0 c = 0.0 alpha = 0.0 beta = 0.0 gamma = 0.0 self._cell_info["datasets"] = [] self._cell_info["dataset_info"] = {} for i in range(length): line = output[i][:-1] if "Dataset ID, " in line: block = 0 while output[block * 5 + i + 2].strip(): dataset_number = int(output[5 * block + i + 2].split()[0]) project = output[5 * block + i + 2][10:].strip() crystal = output[5 * block + i + 3][10:].strip() dataset = output[5 * block + i + 4][10:].strip() cell = map(float, output[5 * block + i + 5].strip().split()) wavelength = float(output[5 * block + i + 6].strip()) dataset_id = "%s/%s/%s" % (project, crystal, dataset) self._cell_info["datasets"].append(dataset_id) self._cell_info["dataset_info"][dataset_id] = {} self._cell_info["dataset_info"][dataset_id][ "wavelength" ] = wavelength self._cell_info["dataset_info"][dataset_id]["cell"] = cell self._cell_info["dataset_info"][dataset_id][ "id" ] = dataset_number block += 1 for dataset in self._cell_info["datasets"]: cell = self._cell_info["dataset_info"][dataset]["cell"] a += cell[0] b += cell[1] c += cell[2] alpha += cell[3] beta += cell[4] gamma += cell[5] n = len(self._cell_info["datasets"]) self._cell = (a / n, b / n, c / n, alpha / n, beta / n, gamma / n) if self._xdsin: from xia2.Wrappers.XDS import XDS XDS.add_xds_version_to_mtz_history(self.get_hklout()) return "ok" def get_reindex_matrix(self): return self._reindex_matrix def get_reindex_operator(self): return self._reindex_operator def get_pointgroup(self): return self._pointgroup def get_spacegroup(self): return self._spacegroup def get_cell(self): return self._cell def get_probably_twinned(self): return self._probably_twinned def get_spacegroup_reindex_operator(self): return self._spacegroup_reindex_operator def get_spacegroup_reindex_matrix(self): return self._spacegroup_reindex_matrix def get_likely_spacegroups(self): return self._likely_spacegroups def get_confidence(self): return self._confidence def get_possible_lattices(self): return self._possible_lattices return PointlessWrapper()
def Chef(DriverType=None): """A factory for wrappers for the chef.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class ChefWrapper(CCP4DriverInstance.__class__): """Provide access to the functionality in chef.""" def __init__(self): CCP4DriverInstance.__class__.__init__(self) self.set_executable("pychef") self._hklin_list = [] self._anomalous = False self._b_width = 0.0 self._b_max = 0.0 self._b_labin = None self._resolution = 0.0 self._p_crd = True self._completeness = {} # this will be parsed from the Chef program output (which # reconstructs this) if available self._dose_profile = {} self._title = None def add_hklin(self, hklin): self._hklin_list.append(hklin) def set_anomalous(self, anomalous): self._anomalous = anomalous def set_resolution(self, resolution): self._resolution = resolution def set_max(self, max): self._b_max = max def get_completeness(self, wavelength): return self._completeness[wavelength] def run(self): """Actually run chef...""" if not self._hklin_list: raise RuntimeError("HKLIN not defined") for j, hklin in enumerate(self._hklin_list): self.add_command_line("HKLIN%d" % (j + 1)) self.add_command_line(hklin) self.start() if self._anomalous: self.input("anomalous on") if self._b_width > 0.0: self.input("range width %f" % self._b_width) if self._b_max > 0.0: self.input("range max %f" % self._b_max) if self._resolution > 0.0: self.input("resolution %.2f" % self._resolution) if self._b_labin: self.input("labin BASE=%s" % self._b_labin) if self._title: self.input("title %s" % self._title) self.close_wait() # FIXME should check the status here... # read out the completeness curves... output = self.get_all_output() all_doses = [] for j, record in enumerate(output): if "Completeness vs. BASELINE" in record: dataset = record.split()[-1] completeness = [] k = j + 2 record = output[k] while "Expected" not in record and "$TABLE" not in record: completeness.append((float(record.split()[0]), float(record.split()[-1]))) dose = float(record.split()[0]) if dose not in all_doses: all_doses.append(dose) k += 1 record = output[k] self._completeness[dataset] = completeness # now jimmy these.. for dataset in self._completeness: completeness = self._completeness[dataset] cmax = completeness[-1][1] cnew = [] # hash this up ctable = {} for c in completeness: ctable[c[0]] = c[1] for dose in all_doses: if dose in ctable: cnew.append((dose, ctable[dose])) else: cnew.append((dose, cmax)) self._completeness[dataset] = cnew # at some point need to figure out how to analyse these # results... self.parse() def digest_rd(self, values): """Digest the results of an Rd calculation, working on the assumptions that (i) the corresponding dose values are meaningless and (ii) we are trying to decide if there is a significant gradient there. N.B. does however assume that the dose increments are UNIFORM.""" # FIXME in here, replace the crude comparison with sigma with a # distribution comparison - values about the mean, values about # a straight line fit: are they drawn from the same distribution. # If definately not then the hypothesis of some radiation damage # is suggested? sx = 0.0 sy = 0.0 n = 0 for j, v in enumerate(values): if not v: continue sx += j sy += v n += 1 mx = sx / n my = sy / n sxx = 0.0 sxy = 0.0 for j, v in enumerate(values): if not v: continue sxx += (j - mx) * (j - mx) sxy += (j - mx) * (v - my) if not sxx: return 0.0 m = sxy / sxx c = my - m * mx # now calculate residual about this line ss = 0.0 for j, v in enumerate(values): if not v: continue _v = m * j + c ss += (v - _v) * (v - _v) sd = math.sqrt(ss / (n - 2)) # then compute the standard deviation of the population var = 0.0 for j, v in enumerate(values): if not v: continue var += (v - my) * (v - my) return (var / (sd * sd)) / n def parse(self): """Parse the output of the chef run.""" results = self.parse_ccp4_loggraph() rd_keys = [] comp_keys = [] scp_data = None comp_data = {} rd_data = {} datasets_damaged = [] for key in results: if "Completeness vs. " in key: comp_keys.append(key) comp_data[key.split()[-1]] = transpose_loggraph( results[key]) elif "R vs. " in key: rd_keys.append(key) wavelength = key.split()[-1] rd_data[wavelength] = transpose_loggraph(results[key]) values = [float(x) for x in rd_data[wavelength]["2_Rd"]] digest = self.digest_rd(values) # logger.info('Rd score (%s): %.2f' , \ # (wavelength, digest)) if digest > 3: datasets_damaged.append((wavelength, digest)) elif "Normalised radiation" in key: scp_data = transpose_loggraph(results[key]) elif "Dose vs. BATCH" in key: self._dose_profile = transpose_loggraph(results[key]) # right, so first work through these to define the limits from # where the first set is 50% complete to 90% complete, which # will establish the benchmark, then calculate a kinda # Z-score for the subsequent Scp values lowest_50 = None lowest_90 = None i_col = "2_I" dose_col = "1_DOSE" for dataset in comp_data: if "5_dI" in comp_data[dataset]: i_col = "4_I" if "1_BATCH" in comp_data[dataset]: dose_col = "1_BATCH" completeness = comp_data[dataset][i_col] local_50 = None local_90 = None max_comp = max(map(float, completeness)) for j, dose in enumerate(comp_data[dataset][dose_col]): comp = float(completeness[j]) if comp > (0.5 * max_comp) and not local_50: local_50 = float(dose) if comp > (0.9 * max_comp) and not local_90: local_90 = float(dose) # check if we have dose profile etc available stop_doses = [] groups = [] if self._dose_profile: wedges = sorted(self.digest_dose_profile()) # given these and the completeness curves, need to make a # choice as to when to stop... will be necessary here # to have an indication of the logical wavelength to # which the measurements belong # wedges is a list of: # FIRST_DOSE FIRST_BATCH SIZE EXPOSURE DATASET # digest this as follows: if sweeps switch between # A and B, or A, B and C then these are tied wedges: aim # for uniform total rotation / number of images. if # there is the same data set in subsequent chunks, # these are tied inverse beams # ok, logic. expect at most four wavelengths interleaved, # most likely two or three. also assume that the sizes of # the wedges should be the same. only want to "block" these. # if len(wedges) == 1: can consider any point at which # to cut off the data. N.B. don't forget EDNA strategies... # ok, easiest thing is encode a set of rules. stop_doses, groups = digest_wedges(wedges) for j, g in enumerate(groups): logger.info("Group %d: %s", (j + 1, g)) if not lowest_50: lowest_50 = local_50 if local_50 < lowest_50: lowest_50 = local_50 if not lowest_90: lowest_90 = local_90 if local_90 < lowest_90: lowest_90 = local_90 # now build up the reference population scp_reference = [] scp_key = None for k in scp_data: if "Scp(d)" in k: scp_key = k for j, d in enumerate(scp_data[dose_col]): dose = float(d) if dose >= lowest_50 and dose <= lowest_90: scp_reference.append(float(scp_data[scp_key][j])) m, s = mean_sd(scp_reference) dose = scp_data[dose_col][0] scp_max = 0.0 if s == 0.0: logger.info("Insufficient measurements for analysis") return for j, d in enumerate(scp_data[dose_col]): dose = float(d) scp = float(scp_data[scp_key][j]) z = (scp - m) / s if dose < lowest_90: scp_max = max(scp, scp_max) continue if z > 3 and scp > scp_max: break scp_max = max(scp, scp_max) if not datasets_damaged: logger.info("No significant radiation damage detected") return if not groups: stop_dose = dose elif groups == ["Single wedge"]: stop_dose = dose else: for stop_dose in stop_doses: if stop_dose > dose: break logger.info("Significant radiation damage detected:") for wavelength, digest in datasets_damaged: logger.info("Rd analysis (%s): %.2f", wavelength, digest) if stop_dose == float(scp_data[dose_col][-1]): logger.info("Conclusion: use all data") else: logger.info( "Conclusion: cut off after %s ~ %.1f", dose_col.replace("1_", ""), stop_dose, ) def digest_dose_profile(self): """Digest the dose profile to list a range of points where we could consider stopping the data collection.""" # N.B. in the first pass this may not make proper acknowledgement # of the wedged structure of the data collection! dose_batch = {} batch_dose = {} batch_dataset = {} for j, b in enumerate(self._dose_profile["1_BATCH"]): b = int(b) d = float(self._dose_profile["2_DOSE"][j]) ds = self._dose_profile["3_DATASET"][j] dose_batch[d] = b batch_dose[b] = d batch_dataset[b] = ds doses = sorted(dose_batch) first_batch = dose_batch[doses[0]] start_batches = [first_batch] current = first_batch wedge_sizes = {first_batch: 1} wedge_datasets = {first_batch: batch_dataset[first_batch]} for d in doses[1:]: b = dose_batch[d] if b < first_batch: current = b start_batches.append(current) wedge_sizes[current] = 0 wedge_datasets[current] = batch_dataset[current] if b > first_batch + 1: current = b start_batches.append(current) wedge_sizes[current] = 0 wedge_datasets[current] = batch_dataset[current] first_batch = b wedge_sizes[current] += 1 result = [] start_batches.sort() for batch in start_batches: if (batch + 1) not in batch_dose: continue exposure = batch_dose[batch + 1] - batch_dose[batch] result.append(( batch_dose[batch], batch, wedge_sizes[batch], exposure, wedge_datasets[batch], )) return result return ChefWrapper()
def Reindex(DriverType=None): '''A new factory for ReindexWrapper classes, which will actually use pointless.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class ReindexWrapper(CCP4DriverInstance.__class__): '''A wrapper for Reindex, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'pointless')) # reindex specific things self._spacegroup = None # this should be of the form e.g. k, l, h self._operator = None # results self._cell = None def set_spacegroup(self, spacegroup): '''Set the spacegroup to reindex the reflections to.''' self._spacegroup = spacegroup def set_operator(self, operator): '''Set the reindexing operator for mapping from in to out.''' # pointless doesn't like reindex operators with '*' if operator is not None: operator = operator.replace('*', '') self._operator = operator def get_cell(self): return self._cell def check_reindex_errors(self): '''Check the standard output for standard reindex errors.''' pass def reindex_old(self): self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'reindex')) self.check_hklin() self.check_hklout() if not self._spacegroup and not self._operator: raise RuntimeError('reindex requires spacegroup or operator') self.start() # look up the space group number to cope with complex symbols # that old fashioned CCP4 reindex does not understand... from cctbx.sgtbx import space_group, space_group_symbols sg_t = space_group(space_group_symbols(str( self._spacegroup))).type() if self._operator: self.input('reindex %s' % str(self._operator)) if self._spacegroup: self.input('symmetry %d' % sg_t.number()) self.close_wait() # check for errors try: self.check_for_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() for j, o in enumerate(output): if 'Cell Dimensions : (obsolete' in o: self._cell = map(float, output[j + 2].split()) return 'OK' def cctbx_reindex(self): from xia2.Modules.MtzUtils import reindex reindex(self._hklin, self._hklout, self._operator, space_group=self._spacegroup) return 'OK' def reindex(self): '''Actually perform the reindexing.''' if PhilIndex.params.ccp4.reindex.program == 'reindex': return self.reindex_old() elif PhilIndex.params.ccp4.reindex.program == 'cctbx': return self.cctbx_reindex() self.check_hklin() self.check_hklout() if not self._spacegroup and not self._operator: raise RuntimeError('reindex requires spacegroup or operator') if self._operator: self._operator = self._operator.replace('[', '').replace(']', '') Debug.write('Reindex... %s %s' % (self._spacegroup, self._operator)) if False and self._spacegroup and PhilIndex.params.xia2.settings.small_molecule == True: ## FIXME: This still needed? if not self._operator or self._operator.replace(' ', '') == 'h,k,l': return self.cctbx_reindex() self.start() if self._spacegroup: if isinstance(self._spacegroup, type(0)): spacegroup = Syminfo.spacegroup_number_to_name( self._spacegroup) elif self._spacegroup[0] in '0123456789': spacegroup = Syminfo.spacegroup_number_to_name( int(self._spacegroup)) else: spacegroup = self._spacegroup self.input('spacegroup \'%s\'' % spacegroup) if self._operator: # likewise self.input('reindex \'%s\'' % self._operator) else: self.input('reindex \'h,k,l\'') self.close_wait() # check for errors try: self.check_for_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() for j, o in enumerate(output): if 'Cell Dimensions : (obsolete' in o: self._cell = map(float, output[j + 2].split()) elif 'ReindexOp: syntax error in operator' in o: raise RuntimeError(o) return 'OK' return ReindexWrapper()
def Cad(DriverType=None): """A factory for CadWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class CadWrapper(CCP4DriverInstance.__class__): """A wrapper for Cad, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable(os.path.join(os.environ.get("CBIN", ""), "cad")) self._hklin_files = [] self._new_cell_parameters = None self._new_column_suffix = None self._pname = None self._xname = None self._dname = None # stuff to specifically copy in the freer column... self._freein = None self._freein_column = "FreeR_flag" def add_hklin(self, hklin): """Add a reflection file to the list to be sorted together.""" self._hklin_files.append(hklin) def set_freein(self, freein): # I guess I should check in here that this file actually # exists... - also that it has a sensible FreeR column... if not os.path.exists(freein): raise RuntimeError("reflection file does not exist: %s" % freein) cname = FindFreeFlag(freein) logger.debug("FreeR_flag column identified as %s", cname) self._freein = freein self._freein_column = cname def set_project_info(self, pname, xname, dname): self._pname = pname self._xname = xname self._dname = dname def set_new_suffix(self, suffix): """Set a column suffix for this dataset.""" self._new_column_suffix = suffix def merge(self): """Merge multiple reflection files into one file.""" if not self._hklin_files: raise RuntimeError("no hklin files defined") self.check_hklout() hklin_counter = 0 # for each reflection file, need to gather the column names # and so on, to put in the cad input here - also check to see # if the column names clash... check also that the spacegroups # match up... spacegroup = None column_names = [] column_names_by_file = {} for hklin in self._hklin_files: md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() columns = md.get_columns() spag = md.get_spacegroup() if spacegroup is None: spacegroup = spag if spag != spacegroup: raise RuntimeError("spacegroups do not match") column_names_by_file[hklin] = [] for c in columns: name = c[0] if name in ["H", "K", "L"]: continue if name in column_names: raise RuntimeError("duplicate column names") column_names.append(name) column_names_by_file[hklin].append(name) # if we get to here then this is a good set up... # create the command line hklin_counter = 0 for hklin in self._hklin_files: hklin_counter += 1 self.add_command_line("hklin%d" % hklin_counter) self.add_command_line(hklin) self.start() hklin_counter = 0 for hklin in self._hklin_files: column_counter = 0 hklin_counter += 1 labin_command = "labin file_number %d" % hklin_counter for column in column_names_by_file[hklin]: column_counter += 1 labin_command += " E%d=%s" % (column_counter, column) self.input(labin_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def update(self): """Update the information for one reflection file.""" if not self._hklin_files: raise RuntimeError("no hklin files defined") if len(self._hklin_files) > 1: raise RuntimeError("can have only one hklin to update") hklin = self._hklin_files[0] self.check_hklout() column_names_by_file = {} dataset_names_by_file = {} md = Mtzdump() md.set_hklin(hklin) md.dump() columns = md.get_columns() column_names_by_file[hklin] = [] dataset_names_by_file[hklin] = md.get_datasets() # get a dataset ID - see FIXME 03/NOV/06 below... dataset_ids = [md.get_dataset_info(d)["id"] for d in md.get_datasets()] for c in columns: name = c[0] if name in ["H", "K", "L"]: continue column_names_by_file[hklin].append(name) self.add_command_line("hklin1") self.add_command_line(hklin) self.start() dataset_id = dataset_ids[0] if self._pname and self._xname and self._dname: self.input( "drename file_number 1 %d %s %s" % (dataset_id, self._xname, self._dname) ) self.input("dpname file_number 1 %d %s" % (dataset_id, self._pname)) column_counter = 0 labin_command = "labin file_number 1" for column in column_names_by_file[hklin]: column_counter += 1 labin_command += " E%d=%s" % (column_counter, column) self.input(labin_command) # FIXME perhaps - ASSERT that we want only the information from # the first dataset here... dataset_id = dataset_ids[0] if self._new_cell_parameters: a, b, c, alpha, beta, gamma = self._new_cell_parameters self.input( "dcell file_number 1 %d %f %f %f %f %f %f" % (dataset_id, a, b, c, alpha, beta, gamma) ) if self._new_column_suffix: suffix = self._new_column_suffix column_counter = 0 labout_command = "labout file_number 1" for column in column_names_by_file[hklin]: column_counter += 1 labout_command += " E%d=%s_%s" % (column_counter, column, suffix) self.input(labout_command) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def copyfree(self): """Copy the free column from freein into hklin -> hklout.""" if not self._hklin_files: raise RuntimeError("no hklin files defined") if len(self._hklin_files) > 1: raise RuntimeError("can have only one hklin to update") hklin = self._hklin_files[0] # get the resolution limit to give as a limit for the FreeR # column md = Mtzdump() md.set_working_directory(self.get_working_directory()) md.set_hklin(hklin) md.dump() resolution_range = md.get_resolution_range() self.check_hklout() if self._freein is None: raise RuntimeError("freein not defined") if self._freein_column is None: raise RuntimeError("freein column not defined") self.add_command_line("hklin1") self.add_command_line(self._freein) self.add_command_line("hklin2") self.add_command_line(hklin) self.start() self.input("labin file_number 1 E1=%s" % self._freein_column) self.input("resolution file_number 1 %f %f" % resolution_range) self.input("labin file_number 2 all") self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError as e: # something went wrong; remove the output file try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() return CadWrapper()
def Matthews_coef(DriverType=None): """A factory for Matthews_coefWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class Matthews_coefWrapper(CCP4DriverInstance.__class__): """A wrapper for Matthews_coef, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "matthews_coef")) self._nmol = 1 self._nres = 0 self._cell = None self._spacegroup = None # results self._solvent = 0.0 return # setters follow def set_nmol(self, nmol): self._nmol = nmol return def set_nres(self, nres): self._nres = nres return def set_cell(self, cell): self._cell = cell return def set_spacegroup(self, spacegroup): self._spacegroup = spacegroup return def compute_solvent(self): self.start() self.input("cell %f %f %f %f %f %f" % tuple(self._cell)) # cannot cope with spaces in the spacegroup! self.input("symmetry %s" % self._spacegroup.replace(" ", "")) self.input("nres %d" % self._nres) self.input("nmol %d" % self._nmol) self.close_wait() self.check_for_errors() self.check_ccp4_errors() # get the useful information out from here... for line in self.get_all_output(): if "Assuming protein density" in line: self._solvent = 0.01 * float(line.split()[-1]) return def get_solvent(self): return self._solvent return Matthews_coefWrapper()
def Truncate(DriverType=None): """A factory for TruncateWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") if PhilIndex.params.ccp4.truncate.program == "ctruncate": return Ctruncate(DriverType) elif PhilIndex.params.ccp4.truncate.program == "cctbx": from xia2.Wrappers.XIA.FrenchWilson import FrenchWilson return FrenchWilson(DriverType) class TruncateWrapper(CCP4DriverInstance.__class__): """A wrapper for Truncate, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "truncate")) self._anomalous = False self._nres = 0 # should we do wilson scaling? self._wilson = True self._b_factor = 0.0 self._moments = None self._wilson_fit_grad = 0.0 self._wilson_fit_grad_sd = 0.0 self._wilson_fit_m = 0.0 self._wilson_fit_m_sd = 0.0 self._wilson_fit_range = None # numbers of reflections in and out, and number of absences # counted self._nref_in = 0 self._nref_out = 0 self._nabsent = 0 self._xmlout = None def set_anomalous(self, anomalous): self._anomalous = anomalous def set_wilson(self, wilson): """Set the use of Wilson scaling - if you set this to False Wilson scaling will be switched off...""" self._wilson = wilson def get_xmlout(self): return self._xmlout def truncate(self): """Actually perform the truncation procedure.""" self.check_hklin() self.check_hklout() self.start() if self._anomalous: self.input("anomalous yes") else: self.input("anomalous no") if self._nres: self.input("nres %d" % self._nres) if not self._wilson: self.input("scale 1") self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass raise RuntimeError("truncate failure") # parse the output for interesting things, including the # numbers of reflections in and out (isn't that a standard CCP4 # report?) and the number of absent reflections. self._nref_in, self._nref_out = self.read_nref_hklin_hklout( self.get_all_output()) # FIXME guess I should be reading this properly... self._nabsent = self._nref_in - self._nref_out for line in self.get_all_output(): if "Least squares straight line gives" in line: list = line.replace("=", " ").split() if not "***" in list[6]: self._b_factor = float(list[6]) else: Debug.write("no B factor available") if "LSQ Line Gradient" in line: self._wilson_fit_grad = float(line.split()[-1]) resol_width = max(self._wilson_fit_range) - min( self._wilson_fit_range) if self._wilson_fit_grad > 0 and resol_width > 1.0 and False: raise RuntimeError( "wilson plot gradient positive: %.2f" % self._wilson_fit_grad) elif self._wilson_fit_grad > 0: Debug.write( "Positive gradient but not much wilson plot") if "Uncertainty in Gradient" in line: self._wilson_fit_grad_sd = float(line.split()[-1]) if "X Intercept" in line: self._wilson_fit_m = float(line.split()[-1]) if "Uncertainty in Intercept" in line: self._wilson_fit_m_sd = float(line.split()[-1]) if "Resolution range" in line: self._wilson_fit_range = map(float, line.split()[-2:]) results = self.parse_ccp4_loggraph() moments = transpose_loggraph( results["Acentric Moments of E for k = 1,3,4,6,8"]) # keys we want in this are "Resln_Range" "1/resol^2" and # MomentZ2. The last of these should be around two, but is # likely to be a little different to this. self._moments = moments def get_b_factor(self): return self._b_factor def get_wilson_fit(self): return ( self._wilson_fit_grad, self._wilson_fit_grad_sd, self._wilson_fit_m, self._wilson_fit_m_sd, ) def get_wilson_fit_range(self): return self._wilson_fit_range def get_moments(self): return self._moments def get_nref_in(self): return self._nref_in def get_nref_out(self): return self._nref_out def get_nabsent(self): return self._nabsent def read_nref_hklin_hklout(self, records): """Look to see how many reflections came in through HKLIN, and how many went out again in HKLOUT.""" nref_in = 0 nref_out = 0 current_logical = None for record in records: if "Logical Name" in record: current_logical = record.split()[2] assert current_logical in ["HKLIN", "HKLOUT", "SYMINFO"] if "Number of Reflections" in record: if current_logical == "HKLIN": nref_in = int(record.split()[-1]) elif current_logical == "HKLOUT": nref_out = int(record.split()[-1]) return nref_in, nref_out return TruncateWrapper()
def Sortmtz(DriverType=None): '''A factory for SortmtzWrapper classes.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class SortmtzWrapper(CCP4DriverInstance.__class__): '''A wrapper for Sortmtz, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'sortmtz')) self._sort_order = 'H K L M/ISYM BATCH' self._hklin_files = [] def add_hklin(self, hklin): '''Add a reflection file to the list to be sorted together.''' self._hklin_files.append(hklin) def check_sortmtz_errors(self): '''Check the output for "standard" errors.''' lwbat_warning = '' for l in self.get_all_output(): if 'From ccp4_lwbat: warning:' in l: lwbat_warning = l.split('warning:')[1].strip() if 'error in ccp4_lwbat' in l: raise RuntimeError(lwbat_warning) if 'Sorting failed' in l: raise RuntimeError('sorting failed') if 'Inconsistent operator orders in input file' in l: raise RuntimeError('different sort orders') def sort(self, vrset=None): '''Actually sort the reflections.''' if len(self._hklin_files) == 1: self.set_hklin(self._hklin_files[0]) self._hklin_files = [] if not self._hklin_files: self.check_hklin() self.check_hklout() if self._hklin_files: self.set_task('Sorting reflections %s => %s' % \ (' '.join(self._hklin_files), os.path.split(self.get_hklout())[-1])) else: self.set_task('Sorting reflections %s => %s' % \ (os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1])) self.start() # allow for the fact that large negative reflections may # result from XDS output... if vrset: self.input('VRSET_MAGIC %f' % vrset) self.input(self._sort_order) if self._hklin_files: for m in self._hklin_files: self.input('"%s"' % m) self.close_wait() try: self.check_for_errors() self.check_ccp4_errors() if 'Error' in self.get_ccp4_status(): raise RuntimeError('[SORTMTZ] %s' % status) self.check_sortmtz_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() return SortmtzWrapper()
def Aimless(DriverType=None, absorption_correction=None, decay_correction=None): """A factory for AimlessWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class AimlessWrapper(CCP4DriverInstance.__class__): """A wrapper for Aimless, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable(os.path.join(os.environ.get("CBIN", ""), "aimless")) if not os.path.exists(self.get_executable()): raise RuntimeError("aimless binary not found") self.start() self.close_wait() version = None for record in self.get_all_output(): if "##" in record and "AIMLESS" in record: version = record.split()[5] if not version: raise RuntimeError("version not found") Debug.write("Using version: %s" % version) # clear all the header junk self.reset() # input and output files self._scalepack = False self._chef_unmerged = False self._unmerged_reflections = None self._xmlout = None # scaling parameters self._resolution = None # scales file for recycling self._scales_file = None # this defaults to SCALES - and is useful for when we # want to refine the SD parameters because we can # recycle the scale factors through the above interface self._new_scales_file = None # this flag indicates that the input reflections are already # scaled and just need merging e.g. from XDS/XSCALE. self._onlymerge = False # by default, switch this on if decay_correction is None: self._bfactor = True else: self._bfactor = decay_correction # this will often be wanted self._anomalous = False self._mode = "rotation" # these are only relevant for 'rotation' mode scaling self._spacing = 5 self._cycles = 100 self._brotation = None self._bfactor_tie = None self._surface_tie = None self._surface_link = True self._intensities = "combine" self._project_crystal_dataset = {} self._runs = [] # for adding data on merge - one dname self._pname = None self._xname = None self._dname = None # getter and setter methods def set_project_info(self, pname, xname, dname): """Only use this for the merge() method.""" self._pname = pname self._xname = xname self._dname = dname def add_run( self, start, end, pname=None, xname=None, dname=None, exclude=False, resolution=0.0, name=None, ): """Add another run to the run table, optionally not including it in the scaling - for solution to bug 2229.""" self._runs.append( (start, end, pname, xname, dname, exclude, resolution, name) ) def set_scalepack(self, scalepack=True): self._scalepack = scalepack def set_chef_unmerged(self, chef_unmerged=True): """Output the measurements in the form suitable for input to chef, that is with SDCORRECTION 1 0 0 and in unmerged MTZ format.""" self._chef_unmerged = chef_unmerged def set_resolution(self, resolution): """Set the resolution limit for the scaling - default is to include all reflections.""" self._resolution = resolution def get_xmlout(self): return self._xmlout def set_scales_file(self, scales_file): """Set the file containing all of the scales required for this run. Used when fiddling the error parameters or obtaining stats to different resolutions. See also set_new_scales_file(). This will switch on ONLYMERGE RESTORE.""" # bodge: take this file and make a temporary local copy which will # have the Nparameters token spaced from the number which follows # it.... tmp_scales_file = os.path.join( self.get_working_directory(), "%s.tmp" % os.path.split(scales_file)[-1] ) open(tmp_scales_file, "w").write( open(os.path.join(self.get_working_directory(), scales_file)) .read() .replace("Nparameters", "Nparameters ") ) self._scales_file = tmp_scales_file def set_new_scales_file(self, new_scales_file): """Set the file to which the scales will be written. This will allow reusing through the above interface.""" self._new_scales_file = new_scales_file def get_new_scales_file(self): """Get the file to which the scales have been written.""" if self._new_scales_file: if not os.path.isfile( os.path.join(self.get_working_directory(), self._new_scales_file) ): Chatter.write( "Aimless did not scale the data, see log file for more details:\n %s" % self.get_log_file() ) raise RuntimeError("data not scaled") return os.path.join(self.get_working_directory(), self._new_scales_file) def set_onlymerge(self, onlymerge=True): """Switch on merging only - this will presume that the input reflections are scaled already.""" self._onlymerge = onlymerge def set_bfactor(self, bfactor=True, brotation=None): """Switch on/off bfactor refinement, optionally with the spacing for the bfactor refinement (in degrees.)""" self._bfactor = bfactor if brotation: self._brotation = brotation def set_surface_tie(self, surface_tie): self._surface_tie = surface_tie def set_surface_link(self, surface_link): self._surface_link = surface_link def set_anomalous(self, anomalous=True): """Switch on/off separating of anomalous pairs.""" self._anomalous = anomalous def set_secondary(self, mode, lmax): assert mode in ("secondary", "absorption") self._secondary = mode self._secondary_lmax = lmax def set_mode(self, mode): if not mode in ["rotation", "batch"]: raise RuntimeError('unknown scaling mode "%s"' % mode) self._mode = mode def set_spacing(self, spacing): self._spacing = spacing def set_cycles(self, cycles): """Set the maximum number of cycles allowed for the scaling - this assumes the default convergence parameters.""" self._cycles = cycles def set_intensities(self, intensities): intensities = intensities.lower() assert intensities in ("summation", "profile", "combine") self._intensities = intensities def identify_negative_scale_run(self): """Given the presence of a negative scale factor, try to identify it - this is going to be called after a negative scales error has been raised.""" bad_run = 0 runs_to_batches = {} run = 0 for record in self.get_all_output(): if "Run number" and "consists of batches" in record: run = int(record.split()[2]) runs_to_batches[run] = [] continue if run and not record.strip(): run = 0 continue if run: runs_to_batches[run].extend(map(int, record.split())) if "shifted scale factor" in record and "negative" in record: tokens = record.split() scale = tokens[tokens.index("factor") + 1] bad_run = int(scale.split(".")[0][1:]) return ( bad_run, (min(runs_to_batches[bad_run]), max(runs_to_batches[bad_run])), ) def identify_no_observations_run(self): """Identify the run which was causing problems with "no observations" reported.""" bad_run = 0 runs_to_batches = {} run = 0 for record in self.get_all_output(): if "Run number" and "consists of batches" in record: run = int(record.split()[2]) runs_to_batches[run] = [] continue if run and not record.strip(): run = 0 continue if run: runs_to_batches[run].extend(map(int, record.split())) if "No observations for parameter" in record: bad_run = int(record.split()[-1]) return ( bad_run, (min(runs_to_batches[bad_run]), max(runs_to_batches[bad_run])), ) def check_aimless_error_negative_scale_run(self): """Check for a bad run giving a negative scale in Aimless - this is particularly for the multi-crystal analysis.""" for record in self.get_all_output(): if " **** Negative scale factor" in record: raise RuntimeError("bad batch %d" % int(record.split()[-3])) def check_aimless_errors(self): """Check for Aimless specific errors. Raise RuntimeError if error is found.""" # FIXME in here I need to add a test for convergence output = self.get_all_output() for n, line in enumerate(output): if "File must be sorted" in line: raise RuntimeError("hklin not sorted") if "Negative scales" in line: run, batches = self.identify_negative_scale_run() raise RuntimeError( "negative scales run %d: %d to %d" % (run, batches[0], batches[1]) ) if "Scaling has failed to converge" in line: raise RuntimeError("scaling not converged") if "*** No observations ***" in line: run, batches = self.identify_no_observations_run() raise RuntimeError( "no observations run %d: %d to %d" % (run, batches[0], batches[1]) ) if "FATAL ERROR message:" in line: raise RuntimeError(output[n + 1].strip()) def sum(self): """Sum a set of reflections in a sorted mtz file - this will just sum partials to make whole reflections, initially for resolution analysis.""" self.check_hklin() self.check_hklout() self.start() self.input("run 1 all") self.input("scales constant") self.input("output unmerged") self.input("sdcorrection noadjust 1.0 0.0 0.0") self.close_wait() # check for errors if True: # try: self.check_for_errors() self.check_ccp4_errors() self.check_aimless_error_negative_scale_run() self.check_aimless_errors() else: # except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def const(self): """Const scaling; for cleaner input to pointless""" self.check_hklin() self.check_hklout() self.start() self.input("scales constant") self.input("output unmerged") self.input("sdcorrection norefine 1 0 0") self.close_wait() # check for errors self.check_for_errors() self.check_ccp4_errors() self.check_aimless_errors() return "OK" def merge(self): """Actually merge the already scaled reflections.""" self.check_hklin() self.check_hklout() if not self._onlymerge: raise RuntimeError("for scaling use scale()") if not self._scalepack: self.set_task( "Merging scaled reflections from %s => %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) else: self.set_task( "Merging reflections from %s => scalepack %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) self._xmlout = os.path.join( self.get_working_directory(), "%d_aimless.xml" % self.get_xpid() ) self.start() self.input("xmlout %d_aimless.xml" % self.get_xpid()) if not PhilIndex.params.xia2.settings.small_molecule: self.input("bins 20") self.input("run 1 all") self.input("scales constant") self.input("initial unity") self.input("sdcorrection both noadjust 1.0 0.0 0.0") if self._anomalous: self.input("anomalous on") else: self.input("anomalous off") if self._scalepack: self.input("output polish unmerged") self.input("output unmerged") self.close_wait() # check for errors try: self.check_for_errors() self.check_ccp4_errors() self.check_aimless_errors() status = self.get_ccp4_status() if "Error" in status: raise RuntimeError("[AIMLESS] %s" % status) except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e return self.get_ccp4_status() def scale(self): """Actually perform the scaling.""" self.check_hklin() self.check_hklout() if self._chef_unmerged and self._scalepack: raise RuntimeError("CHEF and scalepack incompatible") if self._onlymerge: raise RuntimeError("use merge() method") if not self._scalepack: self.set_task( "Scaling reflections from %s => %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) else: self.set_task( "Scaling reflections from %s => scalepack %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) self._xmlout = os.path.join( self.get_working_directory(), "%d_aimless.xml" % self.get_xpid() ) self.start() nproc = PhilIndex.params.xia2.settings.multiprocessing.nproc if isinstance(nproc, int) and nproc > 1: self.set_working_environment("OMP_NUM_THREADS", "%d" % nproc) self.input("refine parallel") self.input("xmlout %d_aimless.xml" % self.get_xpid()) if not PhilIndex.params.xia2.settings.small_molecule: self.input("bins 20") self.input("intensities %s" % self._intensities) if self._new_scales_file: self.input("dump %s" % self._new_scales_file) run_number = 0 for run in self._runs: run_number += 1 if not run[5]: self.input("run %d batch %d to %d" % (run_number, run[0], run[1])) if run[6] != 0.0 and not run[5]: self.input("resolution run %d high %g" % (run_number, run[6])) run_number = 0 for run in self._runs: run_number += 1 if run[7]: Debug.write("Run %d corresponds to sweep %s" % (run_number, run[7])) if run[5]: continue self.input("sdcorrection same") # FIXME this is a bit of a hack - should be better determined # than this... if PhilIndex.params.xia2.settings.small_molecule: # self.input('sdcorrection tie sdfac 0.707 0.3 tie sdadd 0.01 0.05') # self.input('reject all 30') self.input("sdcorrection fixsdb") if self._secondary_lmax and self._surface_tie: self.input("tie surface %.4f" % self._surface_tie) if not self._surface_link: self.input("unlink all") # assemble the scales command if self._mode == "rotation": scale_command = "scales rotation spacing %g" % self._spacing if self._secondary_lmax is not None: scale_command += " %s %d" % ( self._secondary, int(self._secondary_lmax), ) else: scale_command += " %s" % self._secondary if self._bfactor: scale_command += " bfactor on" if self._brotation: scale_command += " brotation %g" % self._brotation else: scale_command += " bfactor off" self.input(scale_command) else: scale_command = "scales batch" if self._bfactor: scale_command += " bfactor on" if self._brotation: scale_command += " brotation %g" % self._brotation else: scale_command += " brotation %g" % self._spacing else: scale_command += " bfactor off" self.input(scale_command) # Debug.write('Scaling command: "%s"' % scale_command) # next any 'generic' parameters if self._resolution: self.input("resolution %g" % self._resolution) self.input("cycles %d" % self._cycles) if self._anomalous: self.input("anomalous on") else: self.input("anomalous off") if self._scalepack: self.input("output polish unmerged") elif self._chef_unmerged: self.input("output unmerged together") else: self.input("output unmerged") # run using previously determined scales if self._scales_file: self.input("onlymerge") self.input("restore %s" % self._scales_file) self.close_wait() # check for errors if True: # try: try: self.check_for_errors() self.check_ccp4_errors() self.check_aimless_error_negative_scale_run() self.check_aimless_errors() except Exception: Chatter.write( "Aimless failed, see log file for more details:\n %s" % self.get_log_file() ) raise Debug.write("Aimless status: OK") else: # except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e # here get a list of all output files... output = self.get_all_output() hklout_files = [] hklout_dict = {} for i in range(len(output)): record = output[i] # this is a potential source of problems - if the # wavelength name has a _ in it then we are here stuffed! if "Writing merged data for dataset" in record: if len(record.split()) == 9: hklout = output[i + 1].strip() else: hklout = record.split()[9] dname = record.split()[6].split("/")[-1] hklout_dict[dname] = hklout hklout_files.append(hklout) elif "Writing unmerged data for all datasets" in record: if len(record.split()) == 9: hklout = output[i + 1].strip() else: hklout = record.split()[9] self._unmerged_reflections = hklout self._scalr_scaled_reflection_files = hklout_dict return "OK" def multi_merge(self): """Merge data from multiple runs - this is very similar to the scaling subroutine...""" self.check_hklin() self.check_hklout() if not self._scalepack: self.set_task( "Scaling reflections from %s => %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) else: self.set_task( "Scaling reflections from %s => scalepack %s" % ( os.path.split(self.get_hklin())[-1], os.path.split(self.get_hklout())[-1], ) ) self.start() self._xmlout = os.path.join( self.get_working_directory(), "%d_aimless.xml" % self.get_xpid() ) self.input("xmlout %d_aimless.xml" % self.get_xpid()) if not PhilIndex.params.xia2.settings.small_molecule: self.input("bins 20") if self._new_scales_file: self.input("dump %s" % self._new_scales_file) if self._resolution: self.input("resolution %g" % self._resolution) run_number = 0 for run in self._runs: run_number += 1 if not run[5]: self.input("run %d batch %d to %d" % (run_number, run[0], run[1])) if run[6] != 0.0 and not run[5]: self.input("resolution run %d high %g" % (run_number, run[6])) # put in the pname, xname, dname stuff run_number = 0 for run in self._runs: run_number += 1 if run[7]: Debug.write("Run %d corresponds to sweep %s" % (run_number, run[7])) if run[5]: continue # we are only merging here so the scales command is # dead simple... self.input("scales constant") if self._anomalous: self.input("anomalous on") else: self.input("anomalous off") # FIXME this is probably not ready to be used yet... if self._scalepack: self.input("output polish unmerged") self.input("output unmerged") if self._scales_file: self.input("onlymerge") self.input("restore %s" % self._scales_file) self.close_wait() # check for errors try: self.check_for_errors() self.check_ccp4_errors() self.check_aimless_errors() Debug.write("Aimless status: ok") except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e # here get a list of all output files... output = self.get_all_output() # want to put these into a dictionary at some stage, keyed # by the data set id. how this is implemented will depend # on the number of datasets... # FIXME file names on windows separate out path from # drive with ":"... fixed! split on "Filename:" # get a list of dataset names... datasets = [] for run in self._runs: # cope with case where two runs make one dataset... if not run[4] in datasets: if run[5]: pass else: datasets.append(run[4]) hklout_files = [] hklout_dict = {} for i in range(len(output)): record = output[i] # this is a potential source of problems - if the # wavelength name has a _ in it then we are here stuffed! if "Writing merged data for dataset" in record: if len(record.split()) == 9: hklout = output[i + 1].strip() else: hklout = record.split()[9] dname = record.split()[6].split("/")[-1] hklout_dict[dname] = hklout hklout_files.append(hklout) elif "Writing unmerged data for all datasets" in record: if len(record.split()) == 9: hklout = output[i + 1].strip() else: hklout = record.split()[9] self._unmerged_reflections = hklout self._scalr_scaled_reflection_files = hklout_dict return "OK" def get_scaled_reflection_files(self): """Get the names of the actual scaled reflection files - note that this is not the same as HKLOUT because Aimless splits them up...""" return self._scalr_scaled_reflection_files def get_unmerged_reflection_file(self): return self._unmerged_reflections def get_summary(self): """Get a summary of the data.""" xml_file = self.get_xmlout() assert os.path.isfile(xml_file) from xia2.Wrappers.CCP4.AimlessHelpers import parse_aimless_xml return parse_aimless_xml(xml_file) return AimlessWrapper()
def Mtz2various(DriverType=None): """A factory for Mtz2variousWrapper classes.""" DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, "ccp4") class Mtz2variousWrapper(CCP4DriverInstance.__class__): """A wrapper for Mtz2various, using the CCP4-ified Driver.""" def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get("CBIN", ""), "mtz2various")) # this will allow extraction of specific intensities # from a multi-set reflection file self._dataset_suffix = "" def set_suffix(self, suffix): if suffix: self._dataset_suffix = "_%s" % suffix else: self._dataset_suffix = suffix def convert(self): """Convert the input reflection file to .sca format.""" self.check_hklin() self.check_hklout() self.start() labin = "I(+)=I(+){suffix} SIGI(+)=SIGI(+){suffix} ".format( suffix=self._dataset_suffix, ) labin += "I(-)=I(-){suffix} SIGI(-)=SIGI(-){suffix}".format( suffix=self._dataset_suffix, ) self.input("output scal") self.input("labin " + labin) self.close_wait() self.get_all_output() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass def convert_shelx(self, unmerged=False): """Convert the input reflection file to SHELX hklf4 format.""" self.check_hklin() self.check_hklout() self.start() if self._dataset_suffix or unmerged: labin = "I=I{suffix} SIGI=SIGI{suffix}".format( suffix=self._dataset_suffix, ) else: labin = "I=IMEAN SIGI=SIGIMEAN" self.input("output shelx") self.input("labin " + labin) self.close_wait() self.get_all_output() try: self.check_for_errors() self.check_ccp4_errors() except RuntimeError: try: os.remove(self.get_hklout()) except Exception: pass return Mtz2variousWrapper()
def Scaleit(DriverType=None): '''A factory for ScaleitWrapper classes.''' DriverInstance = DriverFactory.Driver(DriverType) CCP4DriverInstance = DecoratorFactory.Decorate(DriverInstance, 'ccp4') class ScaleitWrapper(CCP4DriverInstance.__class__): '''A wrapper for Scaleit, using the CCP4-ified Driver.''' def __init__(self): # generic things CCP4DriverInstance.__class__.__init__(self) self.set_executable( os.path.join(os.environ.get('CBIN', ''), 'scaleit')) self._columns = [] self._statistics = {} self._anomalous = False def set_anomalous(self, anomalous): self._anomalous = anomalous def find_columns(self): '''Identify columns to use with scaleit.''' # run mtzdump to get a list of columns out and also check that # this is a valid merged mtz file.... self.check_hklin() md = Mtzdump() md.set_hklin(self.get_hklin()) md.dump() # get information to check that this is merged # next get the column information - check that F columns are # present column_info = md.get_columns() columns = [] j = 0 groups = 0 # assert that the columns for F, SIGF, DANO, SIGDANO for a # particular group will appear in that order if anomalous, # F, SIGF if not anomalous while j < len(column_info): c = column_info[j] name = c[0] type = c[1] if type == 'F' and name.split('_')[0] == 'F' and \ self._anomalous: groups += 1 for i in range(4): columns.append(column_info[i + j][0]) j += 4 elif type == 'F' and name.split('_')[0] == 'F' and \ not self._anomalous: groups += 1 for i in range(2): columns.append(column_info[i + j][0]) j += 2 else: j += 1 # ok that should be all of the groups identified self._columns = columns return columns def check_scaleit_errors(self): for record in self.get_all_output(): if 'SCALEIT: ** No reflections **' in record: raise RuntimeError('no reflections') def scaleit(self): '''Run scaleit and get some interesting facts out.''' self.check_hklin() # need to have a HKLOUT even if we do not want the # reflections... self.check_hklout() if not self._columns: self.find_columns() self.start() self.input('nowt') self.input('converge ncyc 4') self.input('converge abs 0.001') self.input('converge tolr -7') self.input('refine anisotropic wilson') self.input('auto') labin = 'labin FP=%s SIGFP=%s' % \ (self._columns[0], self._columns[1]) if self._anomalous: groups = len(self._columns) // 4 else: groups = len(self._columns) // 2 for j in range(groups): if self._anomalous: labin += ' FPH%d=%s' % (j + 1, self._columns[4 * j]) labin += ' SIGFPH%d=%s' % (j + 1, self._columns[4 * j + 1]) labin += ' DPH%d=%s' % (j + 1, self._columns[4 * j + 2]) labin += ' SIGDPH%d=%s' % (j + 1, self._columns[4 * j + 3]) else: labin += ' FPH%d=%s' % (j + 1, self._columns[2 * j]) labin += ' SIGFPH%d=%s' % (j + 1, self._columns[2 * j + 1]) self.input(labin) self.close_wait() # check for errors try: self.check_for_errors() self.check_ccp4_errors() self.check_scaleit_errors() except RuntimeError as e: try: os.remove(self.get_hklout()) except Exception: pass raise e output = self.get_all_output() # generate mapping from derivative number to data set self._statistics['mapping'] = {} for j in range(groups): if self._anomalous: self._statistics['mapping'][j + 1] = self._columns[4 * j].replace( 'F_', '') else: self._statistics['mapping'][j + 1] = self._columns[2 * j].replace( 'F_', '') # now get some interesting information out... j = 0 r_values = [] while j < len(output): line = output[j] if 'APPLICATION OF SCALES AND ANALYSIS OF DIFFERENCES' in line: current_derivative = -1 while not 'SUMMARY_END' in line: list = line.split() if 'Derivative' in list: if 'b_factor' not in self._statistics: self._statistics['b_factor'] = {} self._statistics['b_factor'][int(list[1])] = { 'scale': float(list[2]), 'b': float(list[3]), 'dname': self._statistics['mapping'][int(list[1])] } current_derivative = int(list[1]) if 'The equivalent isotropic' in line: self._statistics['b_factor'][current_derivative][ 'b'] = float(list[-1]) j += 1 line = output[j] if 'acceptable differences are less than' in line and \ groups == 1: max_difference = float(line.split()[-1]) if max_difference > 0.01: self._statistics['max_difference'] = max_difference if 'THE TOTALS' in line: r_values.append(float(line.split()[6])) j += 1 # transform back the r values to the statistics for j in range(len(r_values)): d = j + 1 self._statistics['b_factor'][d]['r'] = r_values[j] return def get_statistics(self): '''Get the statistics from the Scaleit run.''' return self._statistics return ScaleitWrapper()