Пример #1
0
def get_chain_seq(flist):
    '''return a dic (each chain has a list nres).
    '''

    chain = {}

    items, values = cif.cifparse(flist, '_atom_site.')
    asym = cif.parse_values(items, values, "_atom_site.auth_asym_id")
    seq = cif.parse_values(items, values, "_atom_site.auth_seq_id")

    if not (asym or seq): return chain

    ch_old, ns_old = '', ''

    for x in asym:
        ch = x
        if ch != ch_old:
            chain[ch] = []
            ch_old = ch

    for i, x in enumerate(seq):
        ns = x
        ch = asym[i]

        if ns != ns_old and util.is_number(ns):
            chain[ch].append(int(ns))
            ns_old = ns

#    print chain
    return chain
Пример #2
0
def cell(flist):
    '''return cell values as a list of float!
    '''

    cell = [0, 0, 0, 0, 0, 0]
    items, values = cif.cifparse(flist, '_cell.')
    if not len(items): return cell

    a = cif.parse_values(items, values, '_cell.length_a')
    b = cif.parse_values(items, values, '_cell.length_b')
    c = cif.parse_values(items, values, '_cell.length_c')
    alpha = cif.parse_values(items, values, '_cell.angle_alpha')
    beta = cif.parse_values(items, values, '_cell.angle_beta')
    gamma = cif.parse_values(items, values, '_cell.angle_gamma')

    if (not (a and b and c and alpha and beta and gamma)):
        print('Warning: cells not extracted. Check ciftokens')

    for i, x in enumerate([a, b, c, alpha, beta, gamma]):
        if len(x) == 0 or not util.is_number(x[0]):
            print('Error: cell has wrong (%s) values' % x)
            continue
        cell[i] = float(x[0].strip())

    return cell
Пример #3
0
def space_group_name(flist):
    '''get space_group, return a string
    '''

    spg = ''
    items, values = cif.cifparse(flist, '_symmetry.')
    symm = cif.parse_values(items, values, '_symmetry.space_group_name_H-M')
    if symm:
        spg = symm[0].replace("'", '').replace('"', '').strip()
    else:
        print('Warning: space group not extracted. Check ciftokens')

    return spg
Пример #4
0
def is_bigcif(file):
    '''big cif has chainID more than one charactors
    return 0: not cif
    '''
    n=0
    flist=open(file, 'r').readlines()
    items,values = cif.cifparse(flist, '_atom_site.')  # a loop
    asym=cif.parse_values(items,values,"_atom_site.auth_asym_id")
    if asym :
        for x in asym:
            if len(x)>1:
                n=2
                print('Note: Cif file (%s) has chainID >1 charactor.' %file)
                break
            
    return n
Пример #5
0
def remove_bad_water(pdbfile, sffile):

    npdb=pdbfile+'_rmwat'
    fw=open(npdb, 'w')

    dccfile=pdbfile + '_dcc.cif'
    arg='%s/bin/dcc  -pdb %s -sf %s -no_xtriage -o %s  ' %(os.environ['DCCPY'],  pdbfile, sffile, dccfile)
    os.system(arg)

    print 'Removing bad waters from %s' %dccfile
    flist=open(dccfile, 'r').readlines()

    items,values = cif.cifparse(flist, '_pdbx_rscc_mapman_prob.')
    ch=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.auth_asym_id')
    comp=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.auth_comp_id')
    seq=cif.parse_values(items,values ,'_pdbx_rscc_mapman_prob.auth_seq_id')
    rsr=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.real_space_R')
    wrsr=cif.parse_values(items,values, '_pdbx_rscc_mapman_prob.RsR_over_correlation')
    dcc=cif.parse_values(items,values ,'_pdbx_rscc_mapman_prob.correlation')
    nn=len(ch)
    fp=open(pdbfile, 'r')
    for x in fp:
        if (('ATOM' in x[:4] or 'HETATM' in x[:6] or 'ANISOU' in x[:6]) and 
             x[17:20] == 'HOH' ):
            ch1, seq1 = x[20:22].strip(), x[22:26].strip()
            
            id=0
            for i, y in enumerate (comp):
                if 'HOH' in y and ch1==ch[i] and seq1==seq[i] :
                    id=1
                    break
            if id==1:
                print 'removing water (%s)' %x[:28]
                continue
            else:
                fw.write(x)
        else:
            fw.write(x)


    fw.close()
    fp.close()
    print 'The new pdb file = %s' %npdb
Пример #6
0
def ncs_matrix(flist):
    '''get NCS matrix, return a list of 3X4 matrix
    '''
    mtrix = []
    items, values = cif.cifparse(flist, "_struct_ncs_oper.")
    if not items: return []

    id = cif.parse_values(items, values, '_struct_ncs_oper.id')
    code = cif.parse_values(items, values, '_struct_ncs_oper.code')

    b11 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][1]')
    b12 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][2]')
    b13 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[1][3]')

    b21 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][1]')
    b22 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][2]')
    b23 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[2][3]')

    b31 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][1]')
    b32 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][2]')
    b33 = cif.parse_values(items, values, '_struct_ncs_oper.matrix[3][3]')

    t1 = cif.parse_values(items, values, '_struct_ncs_oper.vector[1]')
    t2 = cif.parse_values(items, values, '_struct_ncs_oper.vector[2]')
    t3 = cif.parse_values(items, values, '_struct_ncs_oper.vector[3]')

    if not (id and code and b11 and b12 and b13 and t1 and b21 and b22 and b23
            and t2 and b31 and b32 and b33 and t3):
        return []

    for i in range(len(b11)):
        idd = ' '
        if code[i] == 'given': idd = '1'
        mt1 = [
            float(x) for x in (b11[i], b12[i], b13[i], t1[i])
            if util.is_number(x)
        ]
        mt2 = [
            float(x) for x in (b21[i], b22[i], b23[i], t2[i])
            if util.is_number(x)
        ]
        mt3 = [
            float(x) for x in (b31[i], b32[i], b33[i], t3[i])
            if util.is_number(x)
        ]
        if not (mt1 and mt2 and mt3):
            print 'Error: NCS matrix has wrong values'
            continue
        mtrix.append([mt1, mt2, mt3])

    return mtrix
Пример #7
0
def get_rsr_database(res, rfact, file):
    '''parse the mean and dev of the RsR for the res range for each
    residue.
    '''

    
    if not util.check_file(30, file):
        print('Warning: no RsR data base is found. ')
        
    flist=open(file, 'r').readlines()
    items,values = cif.cifparse(flist, '_rsr_shell.')  # a loop

    resname=cif.parse_values(items,values,"_rsr_shell.residue");
    resh=cif.parse_values(items,values,"_rsr_shell.d_res_high");
    resl=cif.parse_values(items,values,"_rsr_shell.d_res_low");
    rfh=cif.parse_values(items,values,"_rsr_shell.rfact_high");
    rfl=cif.parse_values(items,values,"_rsr_shell.rfact_low");
    sst=cif.parse_values(items,values,"_rsr_shell.secondary_structure");
    mean_all=cif.parse_values(items,values,"_rsr_shell.mean_all");
    dev_all= cif.parse_values(items,values,"_rsr_shell.deviation_all");
    num_all=cif.parse_values(items,values,"_rsr_shell.number_all");
    mean_fil=cif.parse_values(items,values,"_rsr_shell.mean_filter");
    dev_fil=cif.parse_values(items,values,"_rsr_shell.deviation_filter");
    num_fil=cif.parse_values(items,values,"_rsr_shell.number_filter");

    resn={}
    for i,x in enumerate (resname) :
#        if float(resh[i]) <=res <= float(resl[i]):
        id='%s_%s' %(resname[i], sst[i])
#        if float(resh[i]) <= res <= float(resl[i]) and float(rfl[i]) <= rfact <= float(rfh[i]):
        if float(resh[i]) <= res <= float(resl[i]) :
            t= [float(mean_all[i]),float(dev_all[i]),int(num_all[i]),
                float(mean_fil[i]),float(dev_fil[i]), int(num_fil[i])]
            if id not in resn : resn[ id ] = t


    return resn
Пример #8
0
def parse_dcc(flist, table):
    '''parse the table in the DCC:  flist is a list
    '''
    
    dcc=[]
    if table == '_pdbx_rscc_mapman.' :
        items,values = cif.cifparse(flist, '_pdbx_rscc_mapman.')
        
        nseq=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_seq_id");
        chid=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_asym_id");
        comp=cif.parse_values(items,values,"_pdbx_rscc_mapman.auth_comp_id");
        alt=cif.parse_values(items,values,"_pdbx_rscc_mapman.label_alt_id");
        ins=cif.parse_values(items,values,"_pdbx_rscc_mapman.label_ins_code");
        cc=cif.parse_values(items,values,"_pdbx_rscc_mapman.correlation");
        rsr=cif.parse_values(items,values,"_pdbx_rscc_mapman.real_space_R");
        zrsr=cif.parse_values(items,values,"_pdbx_rscc_mapman.real_space_Zscore");
        biso=cif.parse_values(items,values,"_pdbx_rscc_mapman.Biso_mean");
        occ=cif.parse_values(items,values,"_pdbx_rscc_mapman.occupancy_mean");
        modid=cif.parse_values(items,values,"_pdbx_rscc_mapman.model_id");
        pdbid=cif.parse_values(items,values,"_pdbx_rscc_mapman.pdb_id");
        
        if not items: return dcc 
        for i in range(len(chid)):
            if (modid and int(modid[i])>1): break
            a=[nseq[i], chid[i], comp[i], alt[i], cc[i],
               rsr[i], biso[i], occ[i],  pdbid[i], zrsr[i], ins[i] ]
            dcc.append(a)
            
    elif table == '_pdbx_map.' :

        items,values = cif.cifparse(flist, '_pdbx_map.')
        
        nseq=cif.parse_values(items,values,"_pdbx_map.auth_seq_id");
        chid=cif.parse_values(items,values,"_pdbx_map.auth_asym_id");
        comp=cif.parse_values(items,values,"_pdbx_map.auth_comp_id");
    
        biso=cif.parse_values(items,values,"_pdbx_map.Biso_mean_overall");
        cc=cif.parse_values(items,values,"_pdbx_map.density_correlation_overall");
        rsr=cif.parse_values(items,values,"_pdbx_map.real_space_R_overall");
        zobs=cif.parse_values(items,values,"_pdbx_map.ZOBS_overall");
        zdiff=cif.parse_values(items,values,"_pdbx_map.ZDIFF_overall");
        zdplus=cif.parse_values(items,values,"_pdbx_map.ZDplus_overall");
        zdminus=cif.parse_values(items,values,"_pdbx_map.ZDminus_overall");
        
        cc_m=cif.parse_values(items,values,"_pdbx_map.density_correlation_main_chain");
        rsr_m=cif.parse_values(items,values,"_pdbx_map.real_space_R_main_chain");
        zobs_m=cif.parse_values(items,values,"_pdbx_map.ZOBS_main_chain");
        zdiff_m=cif.parse_values(items,values,"_pdbx_map.ZDIFF_main_chain");

        cc_s=cif.parse_values(items,values,"_pdbx_map.density_correlation_side_chain");
        rsr_s=cif.parse_values(items,values,"_pdbx_map.real_space_R_side_chain");
        zobs_s=cif.parse_values(items,values,"_pdbx_map.ZOBS_side_chain");
        zdiff_s=cif.parse_values(items,values,"_pdbx_map.ZDIFF_side_chain");

        if not items: return dcc 
        for i in range(len(chid)):
            if zdiff and not zdiff_s:
                a=[ comp[i], chid[i], nseq[i], biso[i], cc[i], rsr[i], zobs[i], zdiff[i],zdplus[i],zdminus[i] ]
            elif zdiff and zdiff_s:
                a=[ comp[i], chid[i], nseq[i], biso[i], cc[i], rsr[i], zobs[i], zdiff[i], zdplus[i],zdminus[i],
                   cc_m[i], rsr_m[i], zobs_m[i], zdiff_m[i],cc_s[i], rsr_s[i], zobs_s[i], zdiff_s[i]]
            dcc.append(a)
        
    return dcc #nseq,chid,comp,alt,cc,rsr,biso,occ,pdid
Пример #9
0
def parse_cif(flist, table):

    dic={}
    
    ciftable='_%s.'%table
    items,values = cif.cifparse(flist, ciftable)
    if ciftable =='_pdbx_nonpoly_scheme.':
        asym=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.asym_id')
        monid=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_mon_id')
        nseq=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_seq_num')
        chid=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_strand_id')
        ins=cif.parse_values(items,values,'_pdbx_nonpoly_scheme.pdb_ins_code')
        if(monid and nseq and asym):
            dic={'monid':monid, 'nseq':nseq, 'asym':asym,'chid':chid, 'ins':ins}
        
    elif ciftable =='_struct_conn.':
        id=cif.parse_values(items,values,'_struct_conn.id')
        asym1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_asym_id')
        comp1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_comp_id')
        nseq1=cif.parse_values(items,values,'_struct_conn.ptnr1_auth_seq_id')
        alt1 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr1_label_alt_id')
        ins1 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr1_PDB_ins_code')

        asym2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_asym_id')
        comp2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_comp_id')
        nseq2=cif.parse_values(items,values,'_struct_conn.ptnr2_auth_seq_id')
        alt2 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr2_label_alt_id')
        ins2 =cif.parse_values(items,values,'_struct_conn.pdbx_ptnr2_PDB_ins_code')

        if (id and asym1 and comp1 and nseq1 and asym2 and comp2 and nseq2):
            dic={'id':id, 'asym1':asym1, 'comp1':comp1, 'nseq1':nseq1, 'alt1':alt1, 'ins1':ins1,
                 'asym2':asym2, 'comp2':comp2, 'nseq2':nseq2, 'alt2':alt2, 'ins2':ins2}
  
    elif ciftable =='_pdbx_molecule.':
        ins_id=cif.parse_values(items,values,'_pdbx_molecule.instance_id')
        prd_id=cif.parse_values(items,values,'_pdbx_molecule.prd_id')
        asm_id=cif.parse_values(items,values,'_pdbx_molecule.asym_id')

        if (ins_id and prd_id and asm_id):
            dic={'ins_id':ins_id, 'prd_id':prd_id, 'asm_id':asm_id}

    elif ciftable =='_pdbx_poly_seq_scheme.':
        asym=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.asym_id')
        nseq=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_seq_num')
        monid=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_mon_id')
        chid=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_strand_id')
        ins=cif.parse_values(items,values,'_pdbx_poly_seq_scheme.pdb_ins_code')
        if asym and monid :
            dic={'monid':monid, 'nseq':nseq, 'asym':asym,'chid':chid, 'ins':ins}
        

    return dic
Пример #10
0
def get_dcc(dccfile):
    '''put dccfile as list of list
    '''

    dcc = []
    if (util.check_file(100, dccfile) == 0): return dcc
    flist = open(dccfile, 'r').readlines()
    items, values = cif.cifparse(flist, '_pdbx_rscc_mapman.')
    nseq = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_seq_id")
    chid = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_asym_id")
    comp = cif.parse_values(items, values, "_pdbx_rscc_mapman.auth_comp_id")
    alt = cif.parse_values(items, values, "_pdbx_rscc_mapman.label_alt_id")
    ins = cif.parse_values(items, values, "_pdbx_rscc_mapman.label_ins_code")
    cc = cif.parse_values(items, values, "_pdbx_rscc_mapman.correlation")
    rsr = cif.parse_values(items, values, "_pdbx_rscc_mapman.real_space_R")
    zrsr = cif.parse_values(items, values,
                            "_pdbx_rscc_mapman.real_space_Zscore")
    biso = cif.parse_values(items, values, "_pdbx_rscc_mapman.Biso_mean")
    occ = cif.parse_values(items, values, "_pdbx_rscc_mapman.occupancy_mean")
    #model=cif.parse_values(items,values,"_pdbx_rscc_mapman.model_id");
    pdbid = cif.parse_values(items, values, "_pdbx_rscc_mapman.pdb_id")
    if not items: return dcc
    for i in range(len(chid)):
        a = [
            nseq[i], chid[i], comp[i], alt[i], cc[i], rsr[i], biso[i], occ[i],
            pdbid[i]
        ]
        dcc.append(a)
    return dcc
Пример #11
0
def find_xyzlim_compound(compid, coord):
    '''find xyzlimit used by mapmask, and write the coord in cif or pdb format.
    compid: atom_group_id (model_compound_chainID_resnumber_alter_insertion)
    coord: the coordinate file
    idd = 0, cif format; =1, the pdb format
    '''

    comp = 'XXXX'
    t1 = compid.split(':')
    for i, x in enumerate(t1):
        t = x.split('_')
        if i == 0: comp = '_'.join([t[0], t[1], t[2], t[3]])

        if len(t) != 6:
            print(
                'Error: in group-id (%d). it should be (model_compound_chainID_resnumber_alter_insertion).'
                % (i + 1))
            return '', ''

    idd = util.is_cif(coord)
    xyzcomp = comp + '.pdb'
    if idd == 1: xyzcomp = comp + '.cif'

    fw = open(xyzcomp, 'w')

    border = 1  #extend a little to cover more density
    xx, yy, zz = [], [], []
    if idd == 1:  #input cif format
        fw.write('data_xyzcomp\n#\n')

        flist = open(coord, 'r').readlines()
        items, values = cif.cifparse(flist, '_cell.')
        fw.write('\n#\n')
        for m, p in enumerate(items):
            fw.write("%s  %s\n" % (p, values[m]))

        cell = cif.get_cell(flist)

        items, values = cif.cifparse(flist, '_atom_site.')
        comp = cif.parse_values(items, values, "_atom_site.auth_comp_id")
        asym = cif.parse_values(items, values, "_atom_site.auth_asym_id")
        seq = cif.parse_values(items, values, "_atom_site.auth_seq_id")
        alt = cif.parse_values(items, values, "_atom_site.label_alt_id")
        ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code")
        x = cif.parse_values(items, values, "_atom_site.Cartn_x")
        y = cif.parse_values(items, values, "_atom_site.Cartn_y")
        z = cif.parse_values(items, values, "_atom_site.Cartn_z")
        model = cif.parse_values(items, values,
                                 "_atom_site.pdbx_PDB_model_num")

        if (not (alt and comp and ins and asym and seq and x and y and z)):
            print(
                'Error: not enough infor. extraced from (%s). Check ciftokens'
                % coord)
            sys.exit()

        fw.write('\n#\nloop_\n')
        for p in items:
            fw.write("%s\n" % p)
        row = cif.get_rows(items, values)

        for i in range(len(x)):
            alter, inst, mod = '.', '.', '1'
            if model and util.is_number(model[i]): mod = model[i]
            if alt and alt[i] != '?': alter = alt[i]
            if ins and ins[i] != '?': inst = ins[i]

            id1 = '_'.join([mod, comp[i], asym[i], seq[i], alter, inst])

            if id1 in compid:
                xx.append(float(x[i]))
                yy.append(float(y[i]))
                zz.append(float(z[i]))

                for m in row[i]:
                    fw.write("%s " % m)
                fw.write('\n')

    else:  #pdb format
        fp = open(coord, 'r')
        for x1 in fp:

            if ('CRYST1' in x1[:6]):
                fw.write(x1)
                cell = [float(p) for p in x1[8:54].split()]

            elif ('ATOM' in x1[:4] or 'HETATM' in x1[:6]):
                alt = x1[16:17]
                if alt.isspace(): alt = '.'
                ins = x1[26:27]
                if ins.isspace(): ins = '.'
                resname, chid, resnum = x1[17:20].strip(), x1[20:22].strip(
                ), x1[22:26].strip()
                resid = '_'.join([resname, chid, resnum, alt, ins])

                if resid in compid:
                    fw.write(x1)  #only write the selected section
                    xx.append(float(x1[30:38]))
                    yy.append(float(x1[38:46]))
                    zz.append(float(x1[46:54]))
        fp.close()

    if not xx or not yy or not zz:
        print('Error: %s can not be found in the coordinate. try a new id. ' %
              (compid))
        return '', ''

    frac, orth = util.frac_orth_matrix(cell)  #get matrix
    border = 2.0
    xx_min, xx_max = min(xx) - border, max(xx) + border
    yy_min, yy_max = min(yy) - border, max(yy) + border
    zz_min, zz_max = min(zz) - border, max(zz) + border

    xf_min = util.matrix_prod(frac, [xx_min, yy_min, zz_min])
    xf_max = util.matrix_prod(frac, [xx_max, yy_max, zz_max])

    xyzlim = '%.3f %.3f  %.3f %.3f  %.3f %.3f' % (
        xf_min[0], xf_max[0], xf_min[1], xf_max[1], xf_min[2], xf_max[2])

    fw.close()
    return xyzlim, xyzcomp
Пример #12
0
def cut_map_around_ligand_peptide(dccfile, dic, mapfile_in, xyzfile_in):
    '''It generate a complete set for ligand (map, html, jmol).
    dccfile: the density file by dcc.
    dic: a directory to hold all the file for webpage (url below).
    mapfile_in: a input map file.
    xyzfile_in: a input coordinate file.
    '''

    print('Cutting the density maps for ligands/peptide')

    tmpxyz = xyzfile_in
    if util.is_cif(xyzfile_in): tmpxyz = cif.cif2pdb(xyzfile_in)
    pdbfile = os.path.basename(dic['pdbfile']) + '_new'
    if pdbfile != tmpxyz: shutil.copy(tmpxyz, pdbfile)

    mapfile = os.path.basename(dic['pdbfile']) + '_2fofc.map'
    if dic['ligmapcif']: mapfile = dic['xyzfile_orig'] + '_2fofc.map'
    shutil.move(mapfile_in, mapfile)

    if dic['ligmapcif']:  #pre-parse the cif file.
        dic['cif'] = 1

        ciffile = dic['xyzfile_orig']

        flist = open(ciffile, 'r').readlines()
        cell_items, values = cif.cifparse(flist, '_cell.')
        cell = cif.get_rows(cell_items, values)
        dic['cell_items'], dic['lig_cell'] = cell_items, cell

        sym_items, values = cif.cifparse(flist, '_symmetry.')
        sym = cif.get_rows(sym_items, values)
        dic['sym_items'], dic['lig_sym'] = sym_items, sym

        items, values = cif.cifparse(flist, '_atom_site.')
        comp = cif.parse_values(items, values, "_atom_site.auth_comp_id")
        asym = cif.parse_values(items, values, "_atom_site.auth_asym_id")
        seq = cif.parse_values(items, values, "_atom_site.auth_seq_id")
        alt = cif.parse_values(items, values, "_atom_site.label_alt_id")
        ins = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code")
        mod = cif.parse_values(items, values, "_atom_site.pdbx_PDB_model_num")
        row = cif.get_rows(items, values)

        dic['items'], dic['comp1'], dic['asym'], dic[
            'seq'] = items, comp, asym, seq
        dic['alt'], dic['ins'], dic['mod'], dic['row'] = alt, ins, mod, row

    fw_itool = open('LIG_PEPTIDE.cif',
                    'w')  #a cif file contains table, filenames
    fw_itool.write('data_lig_peptide\n')
    fw_itool.write(
        '\n# A "!" will be given if the residue is bad with real_space_R.\n')
    fw_itool.write('\n# Criteria: (CC<0.7 and R>0.4) or CC<0.5 or R>0.5\n')

    url = 'http://sf-tool.wwpdb.org/users_data/dir_%s/' % dic['dir']
    #url=os.environ['THIS_SERVICE_URL__FIX_ME'] + '/users_data/dir_%s/' %dic['dir']

    ch_pep, chr_pep, ch_lig, chr_lig, ch_wat, chr_wat = tls.chain_res_range(
        pdbfile)

    ligpdb = non_poly_pdb(ch_pep, ch_lig, pdbfile)  #get non-poly xyz file
    dcc = get_dcc(dccfile)  #get a list for dcc of each residue

    if not dcc:
        util.perror(
            'Warning: Failed to parse EDS values! No ligand/peptide maps will be generated. '
        )

    for k, v in ch_pep.items():
        if len(v) < 15:  #length of peptide
            if not dic['sdsc_map']:
                map_around_peptide(fw_itool, dic, mapfile, ligpdb, dcc, ch_pep,
                                   url)
            break

    if ch_lig:
        map_around_ligand(fw_itool, dic, mapfile, ligpdb, dcc, ch_lig, url)

    get_html_table_baddcc_general(mapfile, dcc)  #for polymer/lig/peptide

    fw_itool.close()

    if dic['sdsc_map']:
        arg = 'rm -f %s %s  %s  LIG_PEPTIDE.cif ' % (mapfile, mapfile_in,
                                                     xyzfile_in)
        arg = arg + ' %s_rcc_sum.cif.mtz  %s_2fofc.map_all.html ' % (
            dic['pdbfile'], dic['pdbfile'])
        os.system(arg)

#    util.delete_file(pdbfile)
    return
Пример #13
0
def get_list(file):

    vlist=[]

    if not util.check_file(200, file):
        print('Error: file (%s) do not exist' %file)
        return vlist

    flist=open(file,'r').readlines()
    pdbid='XXXX'
    for x in flist:
        if 'data_' in x:
            pdbid=x.split('_')[1].strip()
            break
            
    items,values = cif.cifparse(flist, '_pdbx_density.')

    sym=cif.parse_values(items,values,'_pdbx_density.space_group_name_H-M')
    res=cif.parse_values(items,values,'_pdbx_density.ls_d_res_high');
    rw=cif.parse_values(items,values,'_pdbx_density.R_value_R_work');
    rf=cif.parse_values(items,values,'_pdbx_density.R_value_R_free');
    biso=cif.parse_values(items,values,'_pdbx_density.Biso_mean');
    bwil=cif.parse_values(items,values,'_pdbx_density.B_wilson');
    l2=cif.parse_values(items,values,'_pdbx_density.Padilla-Yeates_L2_mean');
    z=cif.parse_values(items,values,'_pdbx_density.Z_score_L_test');
    fom=cif.parse_values(items,values,'_pdbx_density.fom');
    isig=cif.parse_values(items,values,'_pdbx_density.I_over_sigI_resh');
    isigd=cif.parse_values(items,values,'_pdbx_density.I_over_sigI_diff');
    pst=cif.parse_values(items,values,'_pdbx_density.translational_pseudo_symmetry');
    bsol=cif.parse_values(items,values,'_pdbx_density.B_solvent');
    ksol=cif.parse_values(items,values,'_pdbx_density.K_solvent');
    tlst=cif.parse_values(items,values,'_pdbx_density.partial_B_value_correction_success');
    ntls=cif.parse_values(items,values,'_pdbx_density.tls_group_number');
    nncs=cif.parse_values(items,values,'_pdbx_density.ncs_group_number');
    nmtx=cif.parse_values(items,values,'_pdbx_density.mtrix_number');
    matt=cif.parse_values(items,values,'_pdbx_density.Matthew_coeff');
    solv=cif.parse_values(items,values,'_pdbx_density.solvent_content');
    dpix=cif.parse_values(items,values,'_pdbx_density.Cruickshank_dpi_xyz');
    rtwin=cif.parse_values(items,values,'_pdbx_density.reflns_twin');
    xtwin=cif.parse_values(items,values,'_pdbx_density.twin_by_xtriage');
    tmp=cif.parse_values(items,values,'_pdbx_density.iso_B_value_type');
    if tmp : btype=tmp[0][0]
    ctwin_t=cif.parse_values(items,values,'_pdbx_density.twin_operator');
    ctwin='N'
    if '2:' in ctwin_t[0] : ctwin='Y'
    anis=cif.parse_values(items,values,'_pdbx_density.anisotropy');


# looped 
    items,values = cif.cifparse(flist, '_pdbx_density_corr.')
    prog=cif.parse_values(items,values,'_pdbx_density_corr.program');
    resh=cif.parse_values(items,values,'_pdbx_density_corr.ls_d_res_high');
    rwork=cif.parse_values(items,values,'_pdbx_density_corr.ls_R_factor_R_work');
    rfree=cif.parse_values(items,values,'_pdbx_density_corr.ls_R_factor_R_free');
    fcc=cif.parse_values(items,values,'_pdbx_density_corr.correlation_coeff_Fo_to_Fc');
    rsr=cif.parse_values(items,values,'_pdbx_density_corr.real_space_R');
    dcc=cif.parse_values(items,values,'_pdbx_density_corr.correlation');
    detail=cif.parse_values(items,values,'_pdbx_density_corr.details');     
    

    nr, nc=0, 0
    for i, x in enumerate(detail):
        if 'Best' in x :
            nc=i
            break
        
    rprog, cprog = prog[nr].replace(' ', ''), prog[nc]
    crw, crf, fcc, rsr, dcc=rwork[nc], rfree[nc], fcc[nc],rsr[nc],dcc[nc]
    rw_crw='?'
    if util.is_number(rw[0]) and util.is_number(crw):
        t=int (1000*(float(rw[0]) -float(crw)))
        rw_crw='%d' %(t)
    all=[pdbid, res,rw_crw, rw, rf, crw, crf, fcc, rsr, dcc, fom, biso,bwil,
         matt,solv, ksol, bsol,  ntls,nncs, nmtx,
         tlst,btype, pst, rtwin,xtwin, ctwin,  l2, z,anis, isig,isigd, rprog, sym]

    
    all_new=[]
    for x in all:
        t=x
        if not x :
           t='?'
        else:
            if (type(x)==list ):t=x[0]

        y=t.replace(' ', '_')
        if util.is_number(y) and '.' in y:
            y='%.2f' %float(y)
        
        all_new.append(y)
    
    return all_new
Пример #14
0
def check_ncs_cif(file):
    '''check all errors in the cif file:
    check the four cif tables agaist the scheme
    '''

    flist = open(file, 'r').readlines()

    ncs = ncs_from_head(flist)

    items, values = cif.cifparse(flist, '_struct_ncs_ens.')
    ens_id = cif.parse_values(items, values, '_struct_ncs_ens.id')

    items, values = cif.cifparse(flist, '_refine_ls_restr_ncs.')
    res_ord = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.pdbx_ordinal')
    res_ref = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.pdbx_refine_id')
    res_ens = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.pdbx_ens_id')
    res_dom = cif.parse_values(items, values, '_refine_ls_restr_ncs.dom_id')
    res_typ = cif.parse_values(items, values, '_refine_ls_restr_ncs.pdbx_type')
    res_asy = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.pdbx_auth_asym_id')
    res_num = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.pdbx_number')
    res_rms = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.rms_dev_position')

    if '?' in res_rms:
        res_rms = cif.parse_values(items, values,
                                   '_refine_ls_restr_ncs.pdbx_rms')
    res_wgh = cif.parse_values(items, values,
                               '_refine_ls_restr_ncs.weight_position')

    items, values = cif.cifparse(flist, '_struct_ncs_dom.')
    dom_ens = cif.parse_values(items, values, '_struct_ncs_dom.pdbx_ens_id')
    dom_id = cif.parse_values(items, values, '_struct_ncs_dom.id')
    dom_all = cif.parse_values(items, values, '_struct_ncs_dom.details')

    items, values = cif.cifparse(flist, '_struct_ncs_dom_lim.')
    lim_ens = cif.parse_values(items, values,
                               '_struct_ncs_dom_lim.pdbx_ens_id')
    lim_dom = cif.parse_values(items, values, '_struct_ncs_dom_lim.dom_id')
    lim_com = cif.parse_values(items, values,
                               '_struct_ncs_dom_lim.pdbx_component_id')
    lim_basy = cif.parse_values(items, values,
                                '_struct_ncs_dom_lim.beg_auth_asym_id')
    lim_bseq = cif.parse_values(items, values,
                                '_struct_ncs_dom_lim.beg_auth_seq_id')
    lim_easy = cif.parse_values(items, values,
                                '_struct_ncs_dom_lim.end_auth_asym_id')
    lim_eseq = cif.parse_values(items, values,
                                '_struct_ncs_dom_lim.end_auth_seq_id')
    lim_all = cif.parse_values(items, values,
                               '_struct_ncs_dom_lim.selection_details')

    if len(ncs):
        if not (res_ens or dom_ens or lim_ens or ens_id):
            util.perror(
                'Warning: NCS records exist, but no cif tables for it.')
            return
    else:
        if not len(ens_id): return

    if not (res_ens or res_dom or res_ord or res_ref or res_asy or res_typ):
        util.perror(
            'Warning: No cif table (refine_ls_restr_ncs) or missing key items for NCS.'
        )
    if not (dom_ens or dom_id):
        util.perror(
            'Warning: No cif table (struct_ncs_dom) or missing key items for NCS.'
        )
    if not (lim_ens or lim_dom or lim_com):
        util.perror(
            'Warning: No cif table (struct_ncs_dom_lim) or missing key items for NCS.'
        )
    if not (ens_id):
        util.perror('Warning: No cif table (struct_ncs_ens) for NCS.')

    chain = get_chain_seq(flist)  #chain is dic for int

    def tmp(list1, list2):  #put ensemble=key and doms=[] to a dic
        tmpd = {}
        for j, y in enumerate(list1):
            if y not in list(tmpd.keys()): tmpd[y] = []
            tmpd[y].append(list2[j])
        return tmpd

    dom = tmp(dom_ens, dom_id)
    lim = tmp(lim_ens, lim_dom)
    res = tmp(res_ens, res_dom)

    def tmp1(list1, list2, s2):
        for n in list1:
            if n not in list2:
                util.perror('Error: NCS ID (%s) not in table (%s).' % (n, s2))

    if dom: tmp1(ens_id, list(dom.keys()), 'struct_ncs_dom')
    if lim: tmp1(ens_id, list(lim.keys()), 'struct_ncs_dom_lim')
    if res: tmp1(ens_id, list(res.keys()), 'refine_ls_restr_ncs')

    #    print chain.keys(), chain, dom, lim, res

    if (lim_basy and lim_easy and lim_bseq and lim_eseq):
        check_lim(lim_ens, chain, lim_basy, lim_easy, lim_bseq, lim_eseq)

    check_res(res_ens, chain, res_ref, res_asy, res_typ, res_num, res_rms)
Пример #15
0
def val_from_dcc(outf):
    ''' The format of outf can not be changed !!
    index (0,1,2,3,4,5,6,7...) corresponds to
    (resh,resl,Rwork,Rfree,Comp,FCC, Real_R, Dcc)
    res_rep: the reported; res_not: no TLS; res_tls: with TLS
    '''

    if not util.check_file(500, outf): return '', ''

    flist = open(outf, "r").readlines()

    items, values = cif.cifparse(flist, '_pdbx_density.')
    res = cif.parse_values(items, values, '_pdbx_density.ls_d_res_high')
    rw = cif.parse_values(items, values, '_pdbx_density.R_value_R_work')
    rf = cif.parse_values(items, values, '_pdbx_density.R_value_R_free')
    biso = cif.parse_values(items, values, '_pdbx_density.Biso_mean')
    bwil = cif.parse_values(items, values, '_pdbx_density.B_wilson')
    l2 = cif.parse_values(items, values,
                          '_pdbx_density.Padilla-Yeates_L2_mean')
    z = cif.parse_values(items, values, '_pdbx_density.Z_score_L_test')
    fom = cif.parse_values(items, values, '_pdbx_density.fom')

    items, values = cif.cifparse(flist, '_pdbx_density_corr.')
    prog = cif.parse_values(items, values, '_pdbx_density_corr.program')
    resh = cif.parse_values(items, values, '_pdbx_density_corr.ls_d_res_high')
    rwork = cif.parse_values(items, values,
                             '_pdbx_density_corr.ls_R_factor_R_work')
    rfree = cif.parse_values(items, values,
                             '_pdbx_density_corr.ls_R_factor_R_free')
    fcc = cif.parse_values(items, values,
                           '_pdbx_density_corr.correlation_coeff_Fo_to_Fc')
    rsr = cif.parse_values(items, values, '_pdbx_density_corr.real_space_R')
    dcc = cif.parse_values(items, values, '_pdbx_density_corr.correlation')
    detail = cif.parse_values(items, values, '_pdbx_density_corr.details')

    nr, nb = 0, 0
    for i, x in enumerate(detail):
        if 'Best' in x:
            nb = i
            break

    rep = '%8s  %4s  %6s  %6s  %6s ' % (prog[nr], resh[nr], rwork[nr],
                                        rfree[nr], biso[0])
    val = '%8s  %4s  %6s  %6s  %6s  %6s  %6s  %6s  %6s  %6s  %6s ' % (
        prog[nb], resh[nb], rwork[nb], rfree[nb], bwil[0], l2[0], z[0], fom[0],
        fcc[nb], rsr[nb], dcc[nb])

    return rep, val
Пример #16
0
def atom_site(flist):
    '''get all atom record. return a dictionary
    '''

    dic = {}
    chain, nres, comp, atom, symbol, alt, ins = [], [], [], [], [], [], []
    x, y, z, biso, occ = [], [], [], [], []

    items, values = cif.cifparse(flist, '_atom_site.')  # a loop

    group1 = cif.parse_values(items, values, "_atom_site.group_PDB")
    natm = cif.parse_values(items, values, "_atom_site.id")
    symbol1 = cif.parse_values(items, values, "_atom_site.type_symbol")
    if not symbol1:
        symbol1 = cif.parse_values(items, values,
                                   "_atom_site.atom_type_symbol")
    atom1 = cif.parse_values(items, values, "_atom_site.label_atom_id")
    asym1 = cif.parse_values(items, values, "_atom_site.auth_asym_id")
    comp1 = cif.parse_values(items, values, "_atom_site.label_comp_id")
    nres1 = cif.parse_values(items, values, "_atom_site.auth_seq_id")
    x1 = cif.parse_values(items, values, "_atom_site.Cartn_x")
    y1 = cif.parse_values(items, values, "_atom_site.Cartn_y")
    z1 = cif.parse_values(items, values, "_atom_site.Cartn_z")
    biso1 = cif.parse_values(items, values, "_atom_site.B_iso_or_equiv")
    occ1 = cif.parse_values(items, values, "_atom_site.occupancy")
    ins1 = cif.parse_values(items, values, "_atom_site.pdbx_PDB_ins_code")
    if not ins1:
        ins1 = cif.parse_values(items, values, "_atom_site.ndb_ins_code")
    alt1 = cif.parse_values(items, values, "_atom_site.label_alt_id")

    if (not (atom1 and comp1 and asym1 and nres1 and x1 and y1 and z1 and occ1
             and biso1)):
        print('Error: there is problem to parse atom_site.')
        return dic

    n = len(x1)

    for i in range(n):
        chain.append(asym1[i])
        nres.append(int(nres1[i]))
        comp.append(comp1[i])
        atom.append(atom1[i])
        if (natm): natm.append(natm[i])  #newly added
        symbol.append(symbol1[i])
        if not alt1:
            alt.append('.')
        else:
            alt.append(alt1[i])
        if not ins1:
            ins.append('.')
        else:
            ins.append(ins1[i])

        x.append(float(x1[i]))
        y.append(float(y1[i]))
        z.append(float(z1[i]))
        biso.append(float(biso1[i]))
        occ.append(float(occ1[i]))

    dic = {
        'chain': chain,
        'nres': nres,
        'comp': comp,
        'atom': atom,
        'symbol': symbol,
        'alt': alt,
        'ins': ins,
        'x': x,
        'y': y,
        'z': z,
        'biso': biso,
        'occ': occ,
        'natm': natm
    }

    return dic
Пример #17
0
def scale(flist):
    '''get SCALE matrix, return a list of 3X4 matrix
    '''
    scale = []
    items, values = cif.cifparse(flist, "_atom_sites.")
    if not items: return []

    b11 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[1][1]')
    b12 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[1][2]')
    b13 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[1][3]')
    t1 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[1]')

    b21 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[2][1]')
    b22 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[2][2]')
    b23 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[2][3]')
    t2 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[2]')

    b31 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[3][1]')
    b32 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[3][2]')
    b33 = cif.parse_values(items, values,
                           '_atom_sites.fract_transf_matrix[3][3]')
    t3 = cif.parse_values(items, values, '_atom_sites.fract_transf_vector[3]')

    if not (b11 and b12 and b13 and t1 and b21 and b22 and b23 and t2 and b31
            and b32 and b33 and t3):
        return []

    for i in range(len(b11)):
        mt1 = [
            float(x) for x in (b11[i], b12[i], b13[i], t1[i])
            if util.is_number(x)
        ]
        mt2 = [
            float(x) for x in (b11[i], b12[i], b13[i], t1[i])
            if util.is_number(x)
        ]
        mt3 = [
            float(x) for x in (b11[i], b12[i], b13[i], t1[i])
            if util.is_number(x)
        ]
        if not (mt1 and mt2 and mt3):
            print 'Error: Scale matrix has wrong values'
            continue
        scale.append([mt1, mt2, mt3])

    return scale