def unpickle_miller_arrays(file_name): result = easy_pickle.load(file_name) # Python 3 pickle fix # ========================================================================= if sys.version_info.major == 3: result = easy_pickle.fix_py2_pickle(result) # ========================================================================= if (isinstance(result, miller.array)): return [result] result = list(result) for miller_array in result: if (not isinstance(miller_array, miller.array)): return None return result
def load_db(file_name=None): if (file_name is None): file_name = libtbx.env.find_in_repositories( relative_path="chem_data/polygon_data/all_mvd.pickle", test=os.path.isfile) assert os.path.isfile(file_name) database_dict = easy_pickle.load(file_name) # Python 3 pickle fix # ========================================================================= if sys.version_info.major == 3: database_dict = easy_pickle.fix_py2_pickle(database_dict) # ========================================================================= return database_dict
def __init__(self, models, log): db_path = libtbx.env.find_in_repositories( relative_path="chem_data/rama_z/top8000_rama_z_dict.pkl", test=os.path.isfile) self.log = log # this takes ~0.15 seconds, so I don't see a need to cache it somehow. self.db = easy_pickle.load(db_path) # Python 3 pickle fix # ========================================================================= if sys.version_info.major == 3: self.db = easy_pickle.fix_py2_pickle(self.db) # ========================================================================= self.calibration_values = { 'H': (-0.045355950779513175, 0.1951165524439217), 'S': (-0.0425581278436754, 0.20068584887814633), 'L': (-0.018457764754231075, 0.15788374669456848), 'W': (-0.016806654295023003, 0.12044960331869274) } self.residue_counts = {"H": 0, "S": 0, "L": 0} self.z_score = {"H": None, "S": None, "L": None, 'W': None} self.means = {"H": {}, "S": {}, "L": {}} self.stds = {"H": {}, "S": {}, "L": {}} self.phi_step = 4 self.psi_step = 4 self.n_phi_half = 45 self.n_psi_half = 45 # this is needed to disable e.g. selection functionality when # multiple models are present self.n_models = len(models) self.res_info = [] for model in models: if model.get_hierarchy().models_size() > 1: hierarchy = iotbx.pdb.hierarchy.root() m = model.get_hierarchy().models()[0].detached_copy() hierarchy.append_model(m) asc = hierarchy.atom_selection_cache() else: hierarchy = model.get_hierarchy() asc = model.get_atom_selection_cache() sec_str_master_phil = iotbx.phil.parse(sec_str_master_phil_str) ss_params = sec_str_master_phil.fetch().extract() ss_params.secondary_structure.protein.search_method = "from_ca" ss_params.secondary_structure.from_ca_conservative = True ssm = ss_manager( hierarchy, atom_selection_cache=asc, geometry_restraints_manager=None, sec_str_from_pdb_file=None, # params=None, params=ss_params.secondary_structure, was_initialized=False, mon_lib_srv=None, verbose=-1, log=null_out(), # log=sys.stdout, ) filtered_ann = ssm.actual_sec_str.deep_copy() filtered_ann.remove_short_annotations( helix_min_len=4, sheet_min_len=4, keep_one_stranded_sheets=True) self.helix_sel = asc.selection( filtered_ann.overall_helices_selection()) self.sheet_sel = asc.selection( filtered_ann.overall_sheets_selection()) used_atoms = set() for three in generate_protein_threes(hierarchy=hierarchy, geometry=None): main_residue = three[1] phi_psi_atoms = three.get_phi_psi_atoms() if phi_psi_atoms is None: continue phi_atoms, psi_atoms = phi_psi_atoms key = [x.i_seq for x in phi_atoms] + [psi_atoms[-1].i_seq] key = "%s" % key if key not in used_atoms: phi, psi = three.get_phi_psi_angles() rkey = three.get_ramalyze_key() resname = main_residue.resname ss_type = self._figure_out_ss(three) self.res_info.append( ["", rkey, resname, ss_type, phi, psi]) self.residue_counts[ss_type] += 1 used_atoms.add(key) self.residue_counts["W"] = self.residue_counts[ "H"] + self.residue_counts["S"] + self.residue_counts["L"]