Пример #1
0
def read_pdb(path):
    sec     = time.time()
    records = pdbmodule.read(path)
    sec     = time.time() - sec

    ## collect some statistics from the PDB records
    stats = {}
    stats["path"] = path

    ## adverages
    Bnum_prot = 0
    Badv_prot = 0.0
    Unum_prot = 0
    Uadv_prot = 0.0
    Anum_prot = 0
    Aadv_prot = 0.0

    for rec in records:
        rec_type = rec["RECORD"]

        if rec_type == "ATOM  " or rec_type == "HETATM":

            if res_dict.has_key(rec.get("resName", "")):
                try:
                    stats["atoms"] += 1
                except KeyError:
                    stats["atoms"] = 1

                try:
                    Badv_prot += rec["tempFactor"]
                except KeyError:
                    pass
                else:
                    Bnum_prot += 1

                cid = rec.get("chainID")
                if cid:
                    try:
                        pchains = stats["prot_chains"]
                    except KeyError:
                        stats["prot_chains"] = [cid]
                    else:
                        if cid not in pchains:
                            pchains.append(cid)

            elif rec_type == "HETATM":
                try:
                    stats["hetatoms"] += 1
                except KeyError:
                    stats["hetatoms"] = 1            

        elif rec_type == "ANISOU":
            try:
                stats["anisou"] += 1
            except KeyError:
                stats["anisou"] = 1

            if res_dict.has_key(rec.get("resName", "")):
                Usum = rec["u[0][0]"]+rec["u[1][1]"]+rec["u[2][2]"]
                Uadv_prot += float(Usum)/30000.0
                Unum_prot += 1

                try:
                    aniso = set_aniso_dict(rec, stats["aniso_dict"])
                except KeyError:
                    stats["aniso_dict"] = mk_aniso_dict()
                    aniso = set_aniso_dict(rec, stats["aniso_dict"])

                if aniso != None:
                    Anum_prot += 1
                    Aadv_prot += aniso

        elif rec_type == "REMARK":
            try:
                text = rec["text"]
            except KeyError:
                continue

            m = re_res.match(text)
            if m != None:
                try:
                    stats["res"] = float(m.group(1))
                except ValueError:
                    pass
                continue

            m = re_program.match(text)
            if m != None:
                stats["program"] = m.group(1)

        elif rec_type == "REVDAT":
            if rec.get("modType") == 0:
                stats["date"] = rec.get("modDate", "dd-mmm-yy")

        elif rec_type == "EXPDTA":
            stats["tech"] = rec["technique"]

        elif rec_type == "HEADER":
            stats["id"] = rec.get("idCode", "XXXX")

    ## finish calculating some adverages
    try:
        stats["Badv_prot"] = Badv_prot / Bnum_prot
    except ZeroDivisionError:
        pass

    try:
        stats["Uadv_prot"] = Uadv_prot / Unum_prot
    except ZeroDivisionError:
        pass

    try:
        stats["Aadv_prot"] = Aadv_prot / Anum_prot
    except ZeroDivisionError:
        pass

    return stats
Пример #2
0
def read_pdb(path):
    sec     = time.time()
    records = pdbmodule.read(path)
    sec     = time.time() - sec

    Ueq = []
    atom_obj = []
    atom_dict = {}
    listx = []
    state = 0
    overall = {}
    n = 1
    has_anisou = 0

    for rec in records:
        ## Start with empty dict
        atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '',
                    'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0,
                    'u[0][2]': 0, 'u[1][2]': 0, 'element': '',
                    'RECORD': '', 'u[2][2]': 0, 'resName': '',
                    'serial': 0, 'has_anisou': 0,
                    'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0,
                    'occupancy': 0.0}

        rec_type = rec["RECORD"]

        if rec_type == "REMARK":
            try:
                text = rec["text"]
            except KeyError:
                continue

            ## Capture the "OVERALL ANISOTROPIC B VALUE" fields
            if state == 0:
                m = re_overall_b.match(text)
                if m != None:
                    state = 1
            elif state == 1:
                m = re_b11.match(text)
                if m != None:
                    b11 = m.groups(1)
                    if b11[0] != "NULL":
                        overall[0] = float(b11[0])
                m = re_b22.match(text)
                if m != None:
                    b22 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[1] = float(b22[0])
                m = re_b33.match(text)
                if m != None:
                    b33 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[2] = float(b33[0])
                m = re_b12.match(text)
                if m != None:
                    b12 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[3] = float(b12[0])
                m = re_b13.match(text)
                if m != None:
                    b13 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[4] = float(b13[0])
                m = re_b23.match(text)
                if m != None:
                    b23 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[5] = float(b23[0])
                    state = 2

            #if tls_desc.name != "":
            #    listx.append("TLS %s" % (tls_desc.name))
            #else:
            #    listx.append("TLS")

            ## Capture the TLS "RANGE"s fields
            m = re_tls_range.match(text)
            if m != None:
                (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups()
                sel = "ALL"
                frag_id1 = convert_frag_id_save(frag_id1)
                frag_id2 = convert_frag_id_save(frag_id2)

                listx.append("#RANGE  '%s%s' '%s%s' %s" % (
                    chain_id1, frag_id1.rjust(5),
                    chain_id2, frag_id2.rjust(5), sel))

            ## Capture the TLS "ORIGIN"s fields
            m = re_tls_origin.match(text)
            if m != None:
                strx = m.group(1)
                ## this is nasty -- I wish I could trust the numbers
                ## to stay in fixed columns, but I can't
                ox = [0.0, 0.0, 0.0]
                for i in (0,1,2):
                    j = strx.find(".")
                    if j==-1:
                        break
                    x = strx[ max(0, j-4) : j+5]
                    strx = strx[j+5:]
                    ox[i] = float(x)

                try:
                    listx.append("#ORIGIN   %8.4f %8.4f %8.4f" % (
                        ox[0], ox[1], ox[2]))
                except:
                    print "ERROR!"
                    pass

            ## Capture the actual TLS values/fields.
            ## NOTE: These can be provided via an external TLSIN/TLSOUT file.
            #m = re_tls_T.match(text)
            #if m != None:
            #if tls_desc.T is not None:
            #    ## REFMAC ORDER: t11 t22 t33 t12 t13 t23
            #    listx.append("T   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2],
            #        tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2]))
            #
            #m = re_tls_L.match(text)
            #if m != None:
            #if tls_desc.L is not None:
            #     ## REFMAC ORDER: l11 l22 l33 l12 l13 l23
            #     listx.append("L   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        tls_desc.L[0,0] * Constants.RAD2DEG2,
            #        tls_desc.L[1,1] * Constants.RAD2DEG2,
            #        tls_desc.L[2,2] * Constants.RAD2DEG2,
            #        tls_desc.L[0,1] * Constants.RAD2DEG2,
            #        tls_desc.L[0,2] * Constants.RAD2DEG2,
            #        tls_desc.L[1,2] * Constants.RAD2DEG2))
            #
            #m = re_tls_S.match(text)
            #if m != None:
            #if tls_desc.S is not None:
            #    ## REFMAC ORDER:
            #    ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32>
            #    listx.append(
            #        "S   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG,
            #        (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG,
            #        tls_desc.S[0,1] * Constants.RAD2DEG,
            #        tls_desc.S[0,2] * Constants.RAD2DEG,
            #        tls_desc.S[1,2] * Constants.RAD2DEG,
            #        tls_desc.S[1,0] * Constants.RAD2DEG,
            #        tls_desc.S[2,0] * Constants.RAD2DEG,
            #        tls_desc.S[2,1] * Constants.RAD2DEG))

        ## Now, capture the ATOM and/or HETATM fields
        elif rec_type == "ATOM  " or rec_type == "HETATM":
            if Library.library_is_standard_residue(rec["resName"]) and \
               rec["name"].strip() in Constants.BACKBONE_ATOMS:
                atom_dict["serial"] = rec["serial"]
                atom_dict["name"] = rec["name"]
                atom_dict["resName"] = rec["resName"]
                atom_dict["chainID"] = rec["chainID"]
                atom_dict["resSeq"] = rec["resSeq"]
                atom_dict["x"] = rec["x"]
                atom_dict["y"] = rec["y"]
                atom_dict["z"] = rec["z"]
                atom_dict["occupancy"] = rec["occupancy"]
                atom_dict["tempFactor"] = rec["tempFactor"]
                #atom_dict["u[1][1]"] = 0.0
                # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 
		#     in case this atom has no ANISOU record, e.g. 
		#     if it is next to but not in a TLS group
		Uiso = rec["tempFactor"] * B2UE4
                atom_dict["u[0][0]"] = Uiso
                atom_dict["u[1][1]"] = Uiso
                atom_dict["u[2][2]"] = Uiso
                atom_dict["u[0][1]"] = 0.0
                atom_dict["u[0][2]"] = 0.0
                atom_dict["u[1][2]"] = 0.0
                atom_dict["has_anisou"] = 0
                atom_obj.append(atom_dict)

        ## Capture the ANISOU fields, if they are present
        elif rec_type == "ANISOU":
            #print rec["serial"], rec["resName"]
            #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"]

            try:
                Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"]
            except KeyError:
                print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % (
                    rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"])
                continue

            try:
                #text = rec["text"]
                foo = 1
            except KeyError:
                continue

            if Library.library_is_standard_residue(rec["resName"]) and \
               rec["name"].strip() in Constants.BACKBONE_ATOMS:
                ## We only keep the backbone atom's Uij values.
                U11 = rec["u[0][0]"]
                U22 = rec["u[1][1]"]
                U33 = rec["u[2][2]"]
                U12 = U21 = rec["u[0][1]"]
                U13 = U31 = rec["u[0][2]"]
                U23 = U32 = rec["u[1][2]"]

                ADD_B_OVERALL = False # (default: False)
                if ADD_B_OVERALL:
                    atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0]
                    atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1]
                    atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2]
                    atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3]
                    atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4]
                    atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5]
                else:
                    atom_obj[-1]["u[0][0]"] = rec["u[0][0]"]
                    atom_obj[-1]["u[1][1]"] = rec["u[1][1]"]
                    atom_obj[-1]["u[2][2]"] = rec["u[2][2]"]
                    atom_obj[-1]["u[0][1]"] = rec["u[0][1]"]
                    atom_obj[-1]["u[0][2]"] = rec["u[0][2]"]
                    atom_obj[-1]["u[1][2]"] = rec["u[1][2]"]

                    atom_obj[-1]["has_anisou"] = 1
                has_anisou = 1

    return "\n".join(listx), overall, atom_obj
Пример #3
0
def read_pdb(path):
    sec     = time.time()
    records = pdbmodule.read(path)
    sec     = time.time() - sec

    Ueq = []
    atom_obj = []
    atom_dict = {}
    listx = []
    state = 0
    overall = {}
    n = 1
    has_anisou = 0

    for rec in records:
        ## Start with empty dict
        atom_dict = {'u[1][1]': 0, 'chainID': '', 'name': '',
                    'resSeq': 0, 'u[0][1]': 0, 'u[0][0]': 0,
                    'u[0][2]': 0, 'u[1][2]': 0, 'element': '',
                    'RECORD': '', 'u[2][2]': 0, 'resName': '',
                    'serial': 0, 'has_anisou': 0,
                    'tempFactor': 0.0, 'x': 0.0, 'y': 0.0, 'z': 0.0,
                    'occupancy': 0.0}

        rec_type = rec["RECORD"]

        if rec_type == "REMARK":
            try:
                text = rec["text"]
            except KeyError:
                continue

            if state == 0:
                m = re_overall_b.match(text)
                if m != None:
                    state = 1
            elif state == 1:
                m = re_b11.match(text)
                if m != None:
                    b11 = m.groups(1)
                    if b11[0] != "NULL":
                        overall[0] = float(b11[0])
                m = re_b22.match(text)
                if m != None:
                    b22 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[1] = float(b22[0])
                m = re_b33.match(text)
                if m != None:
                    b33 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[2] = float(b33[0])
                m = re_b12.match(text)
                if m != None:
                    b12 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[3] = float(b12[0])
                m = re_b13.match(text)
                if m != None:
                    b13 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[4] = float(b13[0])
                m = re_b23.match(text)
                if m != None:
                    b23 = m.groups(1)
                    if b22[0] != "NULL":
                        overall[5] = float(b23[0])
                    state = 2

            #if tls_desc.name != "":
            #    listx.append("TLS %s" % (tls_desc.name))
            #else:
            #    listx.append("TLS")

            m = re_tls_range.match(text)
            if m != None:
                (chain_id1, frag_id1, chain_id2, frag_id2) = m.groups()
                sel = "ALL"
                frag_id1 = convert_frag_id_save(frag_id1)
                frag_id2 = convert_frag_id_save(frag_id2)

                listx.append("#RANGE  '%s%s' '%s%s' %s" % (
                    chain_id1, frag_id1.rjust(5),
                    chain_id2, frag_id2.rjust(5), sel))

            m = re_tls_origin.match(text)
            #if tls_desc.origin is not None:
            #    listx.append("ORIGIN   %8.4f %8.4f %8.4f" % (
            #        tls_desc.origin[0], tls_desc.origin[1], tls_desc.origin[2]))
            #elif re_key == "origin":
            if m != None:
                strx = m.group(1)
                ## this is nasty -- I wish I could trust the numbers
                ## to stay in fixed columns, but I can't
                ox = [0.0, 0.0, 0.0]
                for i in (0,1,2):
                    j = strx.find(".")
                    if j==-1:
                        break
                    x = strx[ max(0, j-4) : j+5]
                    strx = strx[j+5:]
                    ox[i] = float(x)

                try:
                    #self.tls_desc.set_origin(ox[0], ox[1], ox[2])
                    #origin = numpy.array((ox[0], ox[1], ox[2]), float)
                    #listx.append("ORIGIN   %8.4f %8.4f %8.4f" % (
                    #    origin[0], origin[1], origin[2]))
                    listx.append("#ORIGIN   %8.4f %8.4f %8.4f" % (
                        ox[0], ox[1], ox[2]))
                except:
                    print "ERROR!"
                    pass

            #m = re_tls_T.match(text)
            #if m != None:
            #if tls_desc.T is not None:
            #    ## REFMAC ORDER: t11 t22 t33 t12 t13 t23
            #    listx.append("T   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        tls_desc.T[0,0], tls_desc.T[1,1], tls_desc.T[2,2],
            #        tls_desc.T[0,1], tls_desc.T[0,2], tls_desc.T[1,2]))
            #
            #m = re_tls_L.match(text)
            #if m != None:
            #if tls_desc.L is not None:
            #     ## REFMAC ORDER: l11 l22 l33 l12 l13 l23
            #     listx.append("L   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        tls_desc.L[0,0] * Constants.RAD2DEG2,
            #        tls_desc.L[1,1] * Constants.RAD2DEG2,
            #        tls_desc.L[2,2] * Constants.RAD2DEG2,
            #        tls_desc.L[0,1] * Constants.RAD2DEG2,
            #        tls_desc.L[0,2] * Constants.RAD2DEG2,
            #        tls_desc.L[1,2] * Constants.RAD2DEG2))
            #
            #m = re_tls_S.match(text)
            #if m != None:
            #if tls_desc.S is not None:
            #    ## REFMAC ORDER:
            #    ## <S22 - S11> <S11 - S33> <S12> <S13> <S23> <S21> <S31> <S32>
            #    listx.append(
            #        "S   %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f %8.4f" % (
            #        (tls_desc.S[1,1] - tls_desc.S[0,0]) * Constants.RAD2DEG,
            #        (tls_desc.S[0,0] - tls_desc.S[2,2]) * Constants.RAD2DEG,
            #        tls_desc.S[0,1] * Constants.RAD2DEG,
            #        tls_desc.S[0,2] * Constants.RAD2DEG,
            #        tls_desc.S[1,2] * Constants.RAD2DEG,
            #        tls_desc.S[1,0] * Constants.RAD2DEG,
            #        tls_desc.S[2,0] * Constants.RAD2DEG,
            #        tls_desc.S[2,1] * Constants.RAD2DEG))

        #elif rec_type == "ATOM  ":
        elif rec_type == "ATOM  " or rec_type == "HETATM":
            if Library.library_is_standard_residue(rec["resName"]) and \
               rec["name"].strip() in Constants.BACKBONE_ATOMS:
                atom_dict["serial"] = rec["serial"]
                atom_dict["name"] = rec["name"]
                atom_dict["resName"] = rec["resName"]
                atom_dict["chainID"] = rec["chainID"]
                atom_dict["resSeq"] = rec["resSeq"]
                atom_dict["x"] = rec["x"]
                atom_dict["y"] = rec["y"]
                atom_dict["z"] = rec["z"]
                atom_dict["occupancy"] = rec["occupancy"]
                atom_dict["tempFactor"] = rec["tempFactor"]
                #atom_dict["u[1][1]"] = 0.0
                # Initialize ANISOU values to isotropic = I3x3 * B/8*pi^2 
		#     in case this atom has no ANISOU record, e.g. 
		#     if it is next to but not in a TLS group
		Uiso = rec["tempFactor"] * B2UE4
                atom_dict["u[0][0]"] = Uiso
                atom_dict["u[1][1]"] = Uiso
                atom_dict["u[2][2]"] = Uiso
                atom_dict["u[0][1]"] = 0.0
                atom_dict["u[0][2]"] = 0.0
                atom_dict["u[1][2]"] = 0.0
                atom_dict["has_anisou"] = 0
                atom_obj.append(atom_dict)

        elif rec_type == "ANISOU":
            #print rec["serial"], rec["resName"]
            #print rec["u[0][0]"], rec["u[1][1]"], rec["u[2][2]"]

            try:
                Usum = rec["u[0][0]"] + rec["u[1][1]"] + rec["u[2][2]"]
            except KeyError:
                print "# STRANGE Uij VALUE(S) FOR ATOM=%s; RESIDUE=%s:%s:%s" % (
                    rec["serial"], rec["chainID"], rec["resName"], rec["resSeq"])
                continue

            try:
                #text = rec["text"]
                foo = 1
            except KeyError:
                continue

            if Library.library_is_standard_residue(rec["resName"]) and \
               rec["name"].strip() in Constants.BACKBONE_ATOMS:
                U11 = rec["u[0][0]"]
                U22 = rec["u[1][1]"]
                U33 = rec["u[2][2]"]
                U12 = U21 = rec["u[0][1]"]
                U13 = U31 = rec["u[0][2]"]
                U23 = U32 = rec["u[1][2]"]

                ADD_B_OVERALL = False
                if ADD_B_OVERALL:
                    atom_obj[-1]["u[0][0]"] = rec["u[0][0]"] + overall[0]
                    atom_obj[-1]["u[1][1]"] = rec["u[1][1]"] + overall[1]
                    atom_obj[-1]["u[2][2]"] = rec["u[2][2]"] + overall[2]
                    atom_obj[-1]["u[0][1]"] = rec["u[0][1]"] + overall[3]
                    atom_obj[-1]["u[0][2]"] = rec["u[0][2]"] + overall[4]
                    atom_obj[-1]["u[1][2]"] = rec["u[1][2]"] + overall[5]
                else:
                    atom_obj[-1]["u[0][0]"] = rec["u[0][0]"]
                    atom_obj[-1]["u[1][1]"] = rec["u[1][1]"]
                    atom_obj[-1]["u[2][2]"] = rec["u[2][2]"]
                    atom_obj[-1]["u[0][1]"] = rec["u[0][1]"]
                    atom_obj[-1]["u[0][2]"] = rec["u[0][2]"]
                    atom_obj[-1]["u[1][2]"] = rec["u[1][2]"]

                    atom_obj[-1]["has_anisou"] = 1
                has_anisou = 1

                #U = tlsvld.array([[U11,U12,U13],[U21,U22,U23],[U31,U32,U33]])
                #Ueq.append(U)
  
    #return "\n".join(listx), overall, Ueq, atom_obj
    return "\n".join(listx), overall, atom_obj