def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec ## collect some statistics from the PDB records stats = {} stats["path"] = path ## adverages Bnum_prot = 0 Badv_prot = 0.0 Unum_prot = 0 Uadv_prot = 0.0 Anum_prot = 0 Aadv_prot = 0.0 for rec in records: rec_type = rec["RECORD"] if rec_type == "ATOM " or rec_type == "HETATM": if res_dict.has_key(rec.get("resName", "")): try: stats["atoms"] += 1 except KeyError: stats["atoms"] = 1 try: Badv_prot += rec["tempFactor"] except KeyError: pass else: Bnum_prot += 1 cid = rec.get("chainID") if cid: try: pchains = stats["prot_chains"] except KeyError: stats["prot_chains"] = [cid] else: if cid not in pchains: pchains.append(cid) elif rec_type == "HETATM": try: stats["hetatoms"] += 1 except KeyError: stats["hetatoms"] = 1 elif rec_type == "ANISOU": try: stats["anisou"] += 1 except KeyError: stats["anisou"] = 1 if res_dict.has_key(rec.get("resName", "")): Usum = rec["u[0][0]"]+rec["u[1][1]"]+rec["u[2][2]"] Uadv_prot += float(Usum)/30000.0 Unum_prot += 1 try: aniso = set_aniso_dict(rec, stats["aniso_dict"]) except KeyError: stats["aniso_dict"] = mk_aniso_dict() aniso = set_aniso_dict(rec, stats["aniso_dict"]) if aniso != None: Anum_prot += 1 Aadv_prot += aniso elif rec_type == "REMARK": try: text = rec["text"] except KeyError: continue m = re_res.match(text) if m != None: try: stats["res"] = float(m.group(1)) except ValueError: pass continue m = re_program.match(text) if m != None: stats["program"] = m.group(1) elif rec_type == "REVDAT": if rec.get("modType") == 0: stats["date"] = rec.get("modDate", "dd-mmm-yy") elif rec_type == "EXPDTA": stats["tech"] = rec["technique"] elif rec_type == "HEADER": stats["id"] = rec.get("idCode", "XXXX") ## finish calculating some adverages try: stats["Badv_prot"] = Badv_prot / Bnum_prot except ZeroDivisionError: pass try: stats["Uadv_prot"] = Uadv_prot / Unum_prot except ZeroDivisionError: pass try: stats["Aadv_prot"] = Aadv_prot / Anum_prot except ZeroDivisionError: pass return stats
def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue ## Capture the "OVERALL ANISOTROPIC B VALUE" fields if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") ## Capture the TLS "RANGE"s fields m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) ## Capture the TLS "ORIGIN"s fields m = re_tls_origin.match(text) if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass ## Capture the actual TLS values/fields. ## NOTE: These can be provided via an external TLSIN/TLSOUT file. #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) ## Now, capture the ATOM and/or HETATM fields elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) ## Capture the ANISOU fields, if they are present elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: ## We only keep the backbone atom's Uij values. U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False # (default: False) if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 return "\n".join(listx), overall, atom_obj
def read_pdb(path): sec = time.time() records = pdbmodule.read(path) sec = time.time() - sec Ueq = [] atom_obj = [] atom_dict = {} listx = [] state = 0 overall = {} n = 1 has_anisou = 0 for rec in records: ## Start with empty dict atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '', 'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0, 'u[0][2]': 0, 'u[1][2]': 0, 'element': '', 'RECORD': '', 'u[2][2]': 0, 'resName': '', 'serial': 0, 'has_anisou': 0, 'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0, 'occupancy': 0.0} rec_type = rec["RECORD"] if rec_type == "REMARK": try: text = rec["text"] except KeyError: continue if state == 0: m = re_overall_b.match(text) if m != None: state = 1 elif state == 1: m = re_b11.match(text) if m != None: b11 = m.groups(1) if b11[0] != "NULL": overall[0] = float(b11[0]) m = re_b22.match(text) if m != None: b22 = m.groups(1) if b22[0] != "NULL": overall[1] = float(b22[0]) m = re_b33.match(text) if m != None: b33 = m.groups(1) if b22[0] != "NULL": overall[2] = float(b33[0]) m = re_b12.match(text) if m != None: b12 = m.groups(1) if b22[0] != "NULL": overall[3] = float(b12[0]) m = re_b13.match(text) if m != None: b13 = m.groups(1) if b22[0] != "NULL": overall[4] = float(b13[0]) m = re_b23.match(text) if m != None: b23 = m.groups(1) if b22[0] != "NULL": overall[5] = float(b23[0]) state = 2 #if tls_desc.name != "": # listx.append("TLS %s" % (tls_desc.name)) #else: # listx.append("TLS") m = re_tls_range.match(text) if m != None: (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups() sel = "ALL" frag_id1 = convert_frag_id_save(frag_id1) frag_id2 = convert_frag_id_save(frag_id2) listx.append("#RANGE '%s%s' '%s%s' %s" % ( chain_id1, frag_id1.rjust(5), chain_id2, frag_id2.rjust(5), sel)) m = re_tls_origin.match(text) #if tls_desc.origin is not None: # listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # tls_desc.origin[0], tls_desc.origin[1], tls_desc.origin[2])) #elif re_key == "origin": if m != None: strx = m.group(1) ## this is nasty -- I wish I could trust the numbers ## to stay in fixed columns, but I can't ox = [0.0, 0.0, 0.0] for i in (0,1,2): j = strx.find(".") if j==-1: break x = strx[ max(0, j-4) : j+5] strx = strx[j+5:] ox[i] = float(x) try: #self.tls_desc.set_origin(ox[0], ox[1], ox[2]) #origin = numpy.array((ox[0], ox[1], ox[2]), float) #listx.append("ORIGIN %8.4f %8.4f %8.4f" % ( # origin[0], origin[1], origin[2])) listx.append("#ORIGIN %8.4f %8.4f %8.4f" % ( ox[0], ox[1], ox[2])) except: print "ERROR!" pass #m = re_tls_T.match(text) #if m != None: #if tls_desc.T is not None: # ## REFMAC ORDER: t11 t22 t33 t12 t13 t23 # listx.append("T %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2], # tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2])) # #m = re_tls_L.match(text) #if m != None: #if tls_desc.L is not None: # ## REFMAC ORDER: l11 l22 l33 l12 l13 l23 # listx.append("L %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # tls_desc.L[0,0] * Constants.RAD2DEG2, # tls_desc.L[1,1] * Constants.RAD2DEG2, # tls_desc.L[2,2] * Constants.RAD2DEG2, # tls_desc.L[0,1] * Constants.RAD2DEG2, # tls_desc.L[0,2] * Constants.RAD2DEG2, # tls_desc.L[1,2] * Constants.RAD2DEG2)) # #m = re_tls_S.match(text) #if m != None: #if tls_desc.S is not None: # ## REFMAC ORDER: # ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32> # listx.append( # "S %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % ( # (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG, # (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG, # tls_desc.S[0,1] * Constants.RAD2DEG, # tls_desc.S[0,2] * Constants.RAD2DEG, # tls_desc.S[1,2] * Constants.RAD2DEG, # tls_desc.S[1,0] * Constants.RAD2DEG, # tls_desc.S[2,0] * Constants.RAD2DEG, # tls_desc.S[2,1] * Constants.RAD2DEG)) #elif rec_type == "ATOM ": elif rec_type == "ATOM " or rec_type == "HETATM": if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: atom_dict["serial"] = rec["serial"] atom_dict["name"] = rec["name"] atom_dict["resName"] = rec["resName"] atom_dict["chainID"] = rec["chainID"] atom_dict["resSeq"] = rec["resSeq"] atom_dict["x"] = rec["x"] atom_dict["y"] = rec["y"] atom_dict["z"] = rec["z"] atom_dict["occupancy"] = rec["occupancy"] atom_dict["tempFactor"] = rec["tempFactor"] #atom_dict["u[1][1]"] = 0.0 # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 # in case this atom has no ANISOU record, e.g. # if it is next to but not in a TLS group Uiso = rec["tempFactor"] * B2UE4 atom_dict["u[0][0]"] = Uiso atom_dict["u[1][1]"] = Uiso atom_dict["u[2][2]"] = Uiso atom_dict["u[0][1]"] = 0.0 atom_dict["u[0][2]"] = 0.0 atom_dict["u[1][2]"] = 0.0 atom_dict["has_anisou"] = 0 atom_obj.append(atom_dict) elif rec_type == "ANISOU": #print rec["serial"], rec["resName"] #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"] try: Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"] except KeyError: print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % ( rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"]) continue try: #text = rec["text"] foo = 1 except KeyError: continue if Library.library_is_standard_residue(rec["resName"]) and \ rec["name"].strip() in Constants.BACKBONE_ATOMS: U11 = rec["u[0][0]"] U22 = rec["u[1][1]"] U33 = rec["u[2][2]"] U12 = U21 = rec["u[0][1]"] U13 = U31 = rec["u[0][2]"] U23 = U32 = rec["u[1][2]"] ADD_B_OVERALL = False if ADD_B_OVERALL: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5] else: atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] atom_obj[-1]["has_anisou"] = 1 has_anisou = 1 #U = tlsvld.array([[U11,U12,U13],[U21,U22,U23],[U31,U32,U33]]) #Ueq.append(U) #return "\n".join(listx), overall, Ueq, atom_obj return "\n".join(listx), overall, atom_obj