def get_bounding_box(event_map: gemmi.FloatGrid, coord: Coord, radius: float = 8.0, margin: float = 5.0) -> gemmi.FloatGrid: event_centroid = gemmi.Position(coord.x, coord.y, coord.z) box_lower_bound = gemmi.Position( float(coord.x) - radius, float(coord.y) - radius, float(coord.z) - radius) box_upper_bound = gemmi.Position( float(coord.x) + radius, float(coord.y) + radius, float(coord.z) + radius) box_lower_bound_fractional = event_map.grid.unit_cell.fractionalize( box_lower_bound) box_upper_bound_fractional = event_map.grid.unit_cell.fractionalize( box_upper_bound) box = gemmi.FractionalBox() box.extend(box_lower_bound_fractional) box.extend(box_upper_bound_fractional) return box
def apply_inverse(self, position: gemmi.Position) -> gemmi.Position: rotation_frame_position = gemmi.Position( position[0] - self.com_moving[0], position[1] - self.com_moving[1], position[2] - self.com_moving[2]) transformed_vector = self.transform.inverse().apply( rotation_frame_position) transformed_position = gemmi.Position( transformed_vector[0] + self.com_reference[0], transformed_vector[1] + self.com_reference[1], transformed_vector[2] + self.com_reference[2]) return transformed_position
def apply_moving_to_reference(self, positions: typing.Dict[typing.Tuple[int], gemmi.Position]) -> typing.Dict[ typing.Tuple[int], gemmi.Position]: transformed_positions = {} for index, position in positions.items(): rotation_frame_position = gemmi.Position(position[0] - self.com_moving[0], position[1] - self.com_moving[1], position[2] - self.com_moving[2]) transformed_vector = self.transform.apply(rotation_frame_position) transformed_positions[index] = gemmi.Position(transformed_vector[0] + self.com_reference[0], transformed_vector[1] + self.com_reference[1], transformed_vector[2] + self.com_reference[2]) return transformed_positions
def test_read_5i55_again(self): block = gemmi.cif.read(full_path('5i55.cif'))[0] st = gemmi.make_structure_from_block(block) self.assertEqual(st.info['_entry.id'], '5I55') center = st[0].calculate_center_of_mass() # PyMOL>print cmd.centerofmass() pymol_ctr = [15.468438991742687, 4.8312495347721045, 20.607400844016833] self.assertTrue(center.dist(gemmi.Position(*pymol_ctr)) < 1e-7) chain, = st[0] a, b, c, d = chain.subchains() ent_a = st.get_entity_of(a) self.assertEqual(ent_a.name, '1') self.assertEqual(ent_a.entity_type, gemmi.EntityType.Polymer) self.assertEqual(ent_a.polymer_type, gemmi.PolymerType.PeptideL) ent_b = st.get_entity_of(b) self.assertEqual(ent_b.entity_type, gemmi.EntityType.NonPolymer) self.assertEqual(ent_b.polymer_type, gemmi.PolymerType.Unknown) ent_d = st.get_entity('4') self.assertEqual(ent_d.subchains, ['D']) self.assertEqual(ent_d.entity_type, gemmi.EntityType.Water) self.assertEqual(ent_d.polymer_type, gemmi.PolymerType.Unknown) output_block = st.make_mmcif_document().sole_block() cnames = block.get_mmcif_category_names() cnames_out = [name for name in output_block.get_mmcif_category_names() if len(output_block.find_mmcif_category(name)) > 0] common_categories = [name for name in cnames_out if name in cnames] common_categories.sort() cc = ['_atom_site.', '_atom_type.', '_cell.', '_chem_comp.', '_diffrn.', '_diffrn_detector.', '_diffrn_radiation.', '_diffrn_source.', '_entity.', '_entity_poly.', '_entity_poly_seq.', '_entry.', '_exptl.', '_exptl_crystal.', '_pdbx_database_status.', '_pdbx_struct_assembly.', '_pdbx_struct_assembly_gen.', '_pdbx_struct_oper_list.', '_refine.', '_reflns.', '_software.', '_struct.', '_struct_asym.', '_struct_conf.', '_struct_conf_type.', '_struct_conn.', '_struct_conn_type.', '_struct_keywords.', '_struct_ref.', '_struct_ref_seq.', '_symmetry.'] self.assertEqual(common_categories, cc) for name in common_categories: cat_in = block.get_mmcif_category(name) cat_out = output_block.get_mmcif_category(name) for tag, values_out in cat_out.items(): if tag == 'ccp4_link_id': continue values_in = cat_in[tag] self.assertEqual(len(values_in), len(values_out)) for (a, b) in zip(values_in, values_out): try: if a == b or abs(float(a) - float(b)) < 2e-4: continue except ValueError: pass self.assertTrue(name+tag in ['_struct_conf.id', '_chem_comp.type']) for name_out in cnames_out: self.assertTrue(name_out in cnames)
def get_sample_points( location, rotation, shape, scale, ): points_to_sample = {} for x, y, z in zip( range(shape[0]), range(shape[1]), range(shape[2]), ): reference_vector = scale * np.array([x, y, z]) rotated_vector = np.matmul(rotation, reference_vector) position = location + rotated_vector points_to_sample[( x, y, z, )] = gemmi.Position( position[0], position[1], position[2], ) return points_to_sample
def apply_reference_to_moving( self, positions: Dict[Tuple[int], gemmi.Position] ) -> Dict[Tuple[int], gemmi.Position]: inverse_transform = self.transform.inverse() transformed_positions = {} for index, position in positions.items(): rotation_frame_position = gemmi.Position( position[0] - self.com_reference[0], position[1] - self.com_reference[1], position[2] - self.com_reference[2]) transformed_vector = inverse_transform.apply( rotation_frame_position) transformed_positions[index] = gemmi.Position( transformed_vector[0] + self.com_moving[0], transformed_vector[1] + self.com_moving[1], transformed_vector[2] + self.com_moving[2]) return transformed_positions
def sample( xmap_grid, points_to_sample, shape, ): sample_grid = np.zeros(shape) for index, position in points_to_sample.items(): sample_grid[index] = xmap_grid.interpolate_value( gemmi.Position(position)) return sample_grid
def test_reading(self): path = os.path.join(os.path.dirname(__file__), '5i55_tiny.ccp4') m = gemmi.read_ccp4_map(path) self.assertEqual(m.grid.nu, 8) self.assertEqual(m.grid.nv, 6) self.assertEqual(m.grid.nw, 10) self.assertEqual(m.header_i32(28), 0) m.set_header_i32(28, 20140) # set NVERSION self.assertEqual(m.header_i32(28), 20140) dmax = m.header_float(21) self.assertEqual(dmax, max(p.value for p in m.grid)) self.assertNotEqual(m.grid.axis_order, gemmi.AxisOrder.XYZ) m.setup() self.assertEqual(m.grid.axis_order, gemmi.AxisOrder.XYZ) self.assertEqual(m.grid.nu, 60) self.assertEqual(m.grid.nv, 24) self.assertEqual(m.grid.nw, 60) self.assertEqual(m.grid.point_count, 60 * 24 * 60) self.assertEqual(m.header_float(14), 90.0) # 14 - alpha angle self.assertEqual(m.grid.unit_cell.alpha, 90.0) self.assertEqual(m.grid.spacegroup.ccp4, 4) # P21 pos = gemmi.Position(19.4, 3., 21.) frac = m.grid.unit_cell.fractionalize(pos) pos_value = 2.1543798446655273 self.assertAlmostEqual(m.grid.interpolate_value(pos), pos_value) self.assertAlmostEqual(m.grid.interpolate_value(frac), pos_value) # this spacegroup has symop -x, y+1/2, -z m.grid.set_value(60 - 3, 24 // 2 + 4, 60 - 5, 100) # image of (3, 4, 5) self.assertEqual(m.grid.get_value(60 - 3, 24 // 2 + 4, 60 - 5), 100) self.assertTrue(math.isnan(m.grid.get_value(3, 4, 5))) m.grid.symmetrize_max() self.assertEqual(m.grid.get_value(3, 4, 5), 100) m.grid.set_value(3, 4, 5, float('nan')) self.assertTrue(math.isnan(m.grid.get_value(3, 4, 5))) m.grid.symmetrize_min() self.assertEqual(m.grid.get_value(3, 4, 5), 100) m.grid.set_value(60 - 3, 24 // 2 + 4, 60 - 5, float('nan')) m.grid.symmetrize_max() self.assertEqual(m.grid.get_value(60 - 3, 24 // 2 + 4, 60 - 5), 100) if numpy: arr = numpy.array(m.grid, copy=False) self.assertEqual(arr.shape, (60, 24, 60)) self.assertEqual(arr[3][4][5], 100) grid2 = gemmi.FloatGrid(arr) self.assertTrue( numpy.allclose(m.grid, grid2, atol=0.0, rtol=0, equal_nan=True))
def to_gemmi(self): partitioning_dict = {} for res_id, residue_dict in self.partitioning.items(): partitioning_dict[res_id] = {} for grid_coord, python_position in residue_dict.items(): coord_python = gemmi.Position( python_position[0], python_position[1], python_position[2], ) partitioning_dict[res_id][grid_coord] = coord_python return partitioning_dict
def sample_receptor_layers( event_map, receptor_model, event_centroid, rotation, translation, shape, ): element_key = get_element_key() layers = [clone_grid(event_map) for element in element_key] marks = gemmi.subcells.find_atoms( gemmi.Position( event_centroid[0], event_centroid[1], event_centroid[2], ), '\0', radius=10, ) for atom_mark in marks: cra = atom_mark.to_cra(receptor_model) pos = cra.atom.pos elm = cra.atom.element layers[element_key[elm.name]].set_points_around( pos, radius=1, value=1, ) samples = [ sample_map( layer, event_centroid, rotation, translation, shape, ) for layer in layers ] return samples
def get_cut_out_event_map(event_map: gemmi.FloatGrid, coord: Coord, radius: float = 10.0) -> gemmi.FloatGrid: event_centroid = gemmi.Position(coord.x, coord.y, coord.z) xmap_array = np.array(event_map, copy=True) mask_grid = gemmi.Int8Grid(*xmap_array.shape) # print(f"Spacegroup: {mask_grid.spacegroup.xhm()}") # mask_grid.spacegroup = gemmi.find_spacegroup_by_name("P 21 21 21") # gemmi.find_spacegroup_by_name("P 1")#event_map.spacegroup mask_grid.spacegroup = gemmi.find_spacegroup_by_name( "P 1") #event_map.spacegroup print(f"Spacegroup: {mask_grid.spacegroup.xhm()}") print(f"grid: {mask_grid}") mask_grid_array = np.array(mask_grid) print(f"Mask grid array: {mask_grid_array.shape}") print(f"Mask grid array: {mask_grid_array.size}") print(f"Mask grid array: {np.sum(np.isfinite(mask_grid_array))}") # print(f"Grid size: {mask_grid.size}") mask_grid.set_unit_cell(event_map.unit_cell) mask_grid.set_points_around( event_centroid, radius=radius, value=1, ) mask_grid.symmetrize_max() mask_array = np.array(mask_grid, copy=False, dtype=np.int8) new_grid = gemmi.FloatGrid(*xmap_array.shape) new_grid.spacegroup = event_map.spacegroup # gemmi.find_spacegroup_by_name("P 1") new_grid.set_unit_cell(event_map.unit_cell) new_grid_array = np.array(new_grid, copy=False) new_grid_array[np.nonzero(mask_array)] = xmap_array[np.nonzero(mask_array)] new_grid.symmetrize_max() return new_grid
def get_masked_pdb(pdb: gemmi.Structure, coord: Coord, radius: float = 8.0) -> gemmi.Structure: event_centoid = gemmi.Position( coord.x, coord.y, coord.z, ) new_structure = gemmi.Structure() for model_i, model in enumerate(pdb): new_model = gemmi.Model(model.name) new_structure.add_model(new_model, pos=-1) for chain_i, chain in enumerate(model): new_chain = gemmi.Chain(chain.name) new_structure[model_i].add_chain(new_chain, pos=-1) for residue_i, residue in enumerate(chain): new_residue = gemmi.Residue() new_residue.name = residue.name new_residue.seqid = residue.seqid new_residue.subchain = residue.subchain new_residue.label_seq = residue.label_seq new_residue.het_flag = residue.het_flag new_structure[model_i][chain_i].add_residue(new_residue, pos=-1) for atom_i, atom in enumerate(residue): pos = atom.pos if pos.dist(event_centoid) > radius: new_structure[model_i][chain_i][residue_i].add_atom( atom, pos=-1) for model_i, model in enumerate(pdb): pdb.add_model(new_structure[model_i], pos=-1) del pdb[0] return pdb
def score_structure(structure, xmap): unit_cell = xmap.unit_cell mask = gemmi.FloatGrid(xmap.nu, xmap.nv, xmap.nw) mask.set_unit_cell(unit_cell) mask.spacegroup = gemmi.find_spacegroup_by_name("P 1") for model in structure: for chain in model: for residue in chain: for atom_1 in residue: if atom_1.element.name == "H": continue pos_1 = atom_1.pos for atom_2 in residue: if atom_2.element.name == "H": continue pos_2 = atom_2.pos if pos_1.dist(pos_2) < 2.0: new_pos = gemmi.Position( (pos_1.x + pos_2.x) / 2, (pos_1.y + pos_2.y) / 2, (pos_1.z + pos_2.z) / 2, ) mask.set_points_around(new_pos, 0.75, 1.0) mask_array = np.array(mask) xmap_array = np.array(xmap) truncated_xmap_mask = xmap_array > 1.25 score = np.sum(truncated_xmap_mask * mask_array) return float(score)
def prepare_training_data( mtz_directory: str, mtz_file: str, xyz_limits: List[int], output_directory: str, db_file: str, delete_temp: bool = True, ): """Convert both the original and inverse hands of a structure into a regular map file based on information about the cell info and space group and the xyz dimensions. Return True if no exceptions""" logging.info("Preparing training data") # Check all directories exist try: mtz_dir = Path(mtz_directory) assert mtz_dir.exists() except Exception: logging.error(f"Could not find mtz directory at {mtz_directory}") raise try: output_dir = Path(output_directory) assert output_dir.exists() except Exception: logging.error(f"Could not find output directory at {output_directory}") raise # Check xyz limits are of correct format try: assert type(xyz_limits) == list or type(xyz_limits) == tuple assert len(xyz_limits) == 3 assert all(type(values) == int for values in xyz_limits) except AssertionError: logging.error( "xyz_limits muste be provided as a list or tupls of three integer values" ) raise # Innitialise connection to database try: conn = sqlite3.connect(db_file) cur = conn.cursor() except Exception: logging.error(f"Could not connect to database at {db_file}") raise if not os.path.exists(os.path.join(output_dir, "conv_map_list.csv")): with open(os.path.join(output_dir, "conv_map_list.csv"), "w") as out_csv: writer = csv.writer(out_csv) writer.writerow(["filename", "ai_lable"]) # Get lists of child directories mtz_structs = [struct.stem for struct in mtz_dir.iterdir()] mtz_structs = sorted(mtz_structs) logging.debug(f"Following structures found to transform: {mtz_structs}") #this below works but runs serial for struct in mtz_structs: struct_dir = Path(os.path.join(mtz_dir, struct)) homo_lst = [h**o.stem for h**o in struct_dir.iterdir()] for h**o in homo_lst: homo_dir = os.path.join(struct_dir, h**o) logging.info( f"Converting results for structure {struct}, {mtz_structs.index(struct)+1}/{len(mtz_structs)}" ) if mtz_file in os.listdir(homo_dir): logging.info( f"Collecting info for {h**o}, {homo_lst.index(h**o)+1}/{len(homo_lst)}" ) homo_mtz = Path(os.path.join(homo_dir, mtz_file)) try: homo_mtz = Path(os.path.join(homo_dir, mtz_file)) assert homo_mtz.exists() except Exception: logging.error( f"Could not find homologue phased MTZ file {homo_mtz}") raise try: data = gemmi.read_mtz_file(str(homo_mtz)) cell = data.cell sg = data.spacegroup except Exception: logging.error(f"Could not read {homo_mtz}") # raise pass temp_out_file = os.path.join( output_dir, "temp_" + struct + "_" + h**o + ".ccp4") try: data_to_map = gemmi.Ccp4Map() print("Grid of MTZ file", data_to_map.grid) data_to_map.grid = data.transform_f_phi_to_map( 'FWT', 'PHWT', sample_rate=4) # shape = [round(a/1.2/2)*2 for a in data.cell.parameters[:3]] # data_to_map.grid = data.transform_f_phi_to_map('FWT', 'PHWT', exact_size=shape) print("Grid after converting MTZ to MAP", data_to_map.grid) data_to_map.update_ccp4_header(2, True) data_to_map.write_ccp4_map(temp_out_file) except Exception: logging.error(f"Could not create map from {homo_mtz}") raise try: # opening temporary map file which shouldn't be neccessary to be written out map_to_map = gemmi.read_ccp4_map(temp_out_file) map_to_map.setup() print("Grid after loading temp file", map_to_map.grid) #this bit here expands the unit cell to be 200A^3; #Can I expand the unit cell to standard volume and then extract a #grid cube (200, 200, 200) # xyz_limits = [200, 200, 200] # xyz_limits = [100, 100, 100] xyz_limits = [50, 50, 50] upper_limit = gemmi.Position(*xyz_limits) box = gemmi.FractionalBox() box.minimum = gemmi.Fractional(0, 0, 0) box.maximum = map_to_map.grid.unit_cell.fractionalize( upper_limit) # box.maximum = map_to_map.grid.point_to_fractional(map_to_map.grid.get_point(200, 200, 200)) # box.maximum = map_to_map.grid.point_to_fractional(map_to_map.grid.get_point(100, 100, 100)) box.maximum = map_to_map.grid.point_to_fractional( map_to_map.grid.get_point(50, 50, 50)) box.add_margin(1e-5) map_to_map.set_extent(box) print("Grid after setting XYZ limits for MAP", map_to_map.grid) #create a grid with extend x=0-->200, y=0-->200, z=0-->200 #currently problems as the 200 limit not always reached for all axes; #adding a margin maybe that will help # new_map.setup() # box1 = gemmi.FractionalBox() # box1.minimum = gemmi.Fractional(0, 0, 0) # box1.maximum = new_map.grid.point_to_fractional(new_map.grid.get_point(200, 200, 200)) # map_to_map.setup() # new_map.set_extent(box1) # print("Grid after setting grid dimensions", new_map.grid) except Exception: logging.error(f"Could not expand map {map_to_map}") raise # # try: # map_to_map = gemmi.read_ccp4_map(temp_out_file) # map_to_map.setup() # print(map_to_map.grid) # grid = map_to_map.grid # print(grid) # new_grid = grid.set_value(200, 200, 200, 4.0) # print(new_grid.get_value) # xyz_limits = [200, 200, 200] # upper_limit = gemmi.Position(*xyz_limits) # box = gemmi.FractionalBox() # box.minimum = gemmi.Fractional(0, 0, 0) # box.maximum = map_to_map.grid.unit_cell.fractionalize(upper_limit) # map_to_map.set_extent(box) # except Exception: # logging.error(f"Could not expand map {map_to_map}") # raise mtz_state = str(mtz_file).strip(".mtz") final_name = struct + "_" + h**o + "_" + mtz_state + ".ccp4" final = os.path.join(output_dir, final_name) # final = os.path.join(output_dir, struct+"_"+h**o+"_"+mtz_state+".ccp4") try: map_to_map.write_ccp4_map(final) # data_to_map.write_ccp4_map(final) cur.execute(''' SELECT refinement_success_lable, homologue_name_id FROM homologue_stats INNER JOIN homologue_name ON homologue_name.id = homologue_stats.homologue_name_id INNER JOIN pdb_id ON pdb_id.id = homologue_name.pdb_id_id WHERE homologue_name = "%s" AND pdb_id.pdb_id = "%s" ''' % (h**o, struct)) lable = (cur.fetchone())[0] print(lable) #set for MR works and building works if lable == "1a": new_lable = 1 else: new_lable = 0 #focus on the MR solution with building first and then work on the other options # #set for MR works and building doesn't work # if lable == "1b": # new_lable = 1 # else: # new_lable = 0 # #set for superpositioning works and building works # if lable == "2a": # new_lable = 1 # else: # new_lable = 0 # #set for superpositioning works and building doesn't work # if lable == "2b": # new_lable = 1 # else: # new_lable = 0 print(new_lable) with open(os.path.join(output_dir, "conv_map_list.csv"), "a", newline="") as out_csv: writer = csv.writer(out_csv) writer.writerow([final, new_lable]) # writer.writerow([final_name, new_lable]) except Exception: logging.error(f"Could not write final map {final}") return True
def make_gemmi_position_format_from_coords(coords): return gemmi.Position(float(coords.x), float(coords.y), float(coords.z))
def resample( reference_xmap: gemmi.FloatGrid, moving_xmap: gemmi.FloatGrid, reference_structure: gemmi.Structure, moving_structure: gemmi.Structure, monomerized=False, ): # Get transform: from ref to align transform = get_alignment(moving_structure, reference_structure, monomerized=monomerized) print(f"Transform: {transform}; {transform.transform.vec} {transform.transform.mat}") interpolated_grid = gemmi.FloatGrid( reference_xmap.nu, reference_xmap.nv, reference_xmap.nw, ) interpolated_grid.set_unit_cell(reference_xmap.unit_cell) interpolated_grid.spacegroup = reference_xmap.spacegroup # points mask = gemmi.FloatGrid(reference_xmap.nu, reference_xmap.nv, reference_xmap.nw, ) mask.set_unit_cell(reference_xmap.unit_cell) mask.spacegroup = gemmi.find_spacegroup_by_name("P 1") for model in reference_structure: for chain in model: for residue in chain.get_polymer(): for atom in residue: mask.set_points_around(atom.pos, 3.0, 1.0) mask_array = np.array(mask) mask_indicies = np.hstack([x.reshape((len(x), 1)) for x in np.nonzero(mask)]) print(f"Mask indicies shape: {mask_indicies.shape}") fractional_coords = [] for model in reference_structure: for chain in model: for residue in chain.get_polymer(): for atom in residue: fractional = reference_xmap.unit_cell.fractionalize(atom.pos) fractional_coords.append([fractional.x, fractional.y, fractional.z]) fractional_coords_array = np.array(fractional_coords) max_coord = np.max(fractional_coords_array, axis=0) min_coord = np.min(fractional_coords_array, axis=0) min_index = np.floor(min_coord * np.array([interpolated_grid.nu, interpolated_grid.nv, interpolated_grid.nw])) max_index = np.floor(max_coord * np.array([interpolated_grid.nu, interpolated_grid.nv, interpolated_grid.nw])) points = itertools.product(range(int(min_index[0]), int(max_index[0])), range(int(min_index[1]), int(max_index[1])), range(int(min_index[2]), int(max_index[2])), ) # Unpack the points, poitions and transforms point_list: List[Tuple[int, int, int]] = [] position_list: List[Tuple[float, float, float]] = [] transform_list: List[gemmi.transform] = [] com_moving_list: List[np.array] = [] com_reference_list: List[np.array] = [] transform_rotate_reference_to_moving = transform.transform transform_rotate_reference_to_moving.vec.fromlist([0.0, 0.0, 0.0]) transform_reference_to_centered = gemmi.Transform() transform_reference_to_centered.vec.fromlist((-transform.com_reference).tolist()) transform_reference_to_centered.mat.fromlist(np.eye(3).tolist()) tranform_centered_to_moving = gemmi.Transform() tranform_centered_to_moving.vec.fromlist(transform.com_moving.tolist()) tranform_centered_to_moving.mat.fromlist(np.eye(3).tolist()) # indicies to positions for point in points: if mask.get_value(*point) < 1.0: continue # get position position = interpolated_grid.get_position(*point) # Tranform to origin frame position_origin_reference = gemmi.Position(transform_reference_to_centered.apply(position)) # Rotate position_origin_moving = gemmi.Position(transform_rotate_reference_to_moving.apply(position_origin_reference)) # Transform to moving frame position_moving = gemmi.Position(tranform_centered_to_moving.apply(position_origin_moving)) # Interpolate moving map interpolated_map_value = moving_xmap.interpolate_value(position_moving) # Set original point interpolated_grid.set_value(point[0], point[1], point[2], interpolated_map_value) interpolated_grid.symmetrize_max() return interpolated_grid