def get_auth_asym_id_list(iciffile): amino = [ 'ALA', 'CYS', 'ASP', 'GLU', 'PHE', 'GLY', 'HIS', 'ILE', 'LYS', 'LEU', 'MET', 'ASN', 'PRO', 'GLN', 'ARG', 'SER', 'THR', 'VAL', 'TRP', 'TYR' ] nuc = ['A', 'T', 'G', 'C', 'U', 'DA', 'DT', 'DG', 'DC', 'DU'] dat = {} mmCIF.read_mmCIF_file(iciffile, dat, focus_category=['atom_site']) property = {} auth_asym_id_list = [] if ('atom_site' not in dat): return (auth_asym_id_list, property) S = dat['atom_site'] NATOM_auth_asym_id = {} NHETATM_auth_asym_id = {} NRES_auth_asym_id = {} NBASE_auth_asym_id = {} for i in range(len(S['id'])): c = S['auth_asym_id'][i] if (c not in property): property[c] = {} property[c]['NATOM'] = 0 property[c]['NHETATM'] = 0 property[c]['NRES'] = 0 property[c]['NBASE'] = 0 if (S['group_PDB'][i] == 'ATOM'): property[c]['NATOM'] = property[c].get('NATOM', 0) + 1 if (S['group_PDB'][i] == 'HETATM') and (S['label_comp_id'][i] != 'HOH'): property[c]['NHETATM'] = property[c].get('NHETATM', 0) + 1 if (S['label_comp_id'][i] in amino) and (S['label_atom_id'][i] == 'CA'): property[c]['NRES'] = property[c].get('NRES', 0) + 1 if (S['label_comp_id'][i] in nuc) and (S['label_atom_id'][i] == 'P'): property[c]['NBASE'] = property[c].get('NBASE', 0) + 1 auth_asym_id_list = sorted(property.keys(), lambda x, y: cmp(x, y)) for c in (auth_asym_id_list): print "%s NATOM %d NHETATM %d NRES %d NBASE %d" % ( c, property[c]['NATOM'], property[c]['NHETATM'], property[c]['NRES'], property[c]['NBASE']) return (auth_asym_id_list, property)
def get_assembly_id_list(iciffile): assembly_id_list = ['-'] dat = {} mmCIF.read_mmCIF_file(iciffile, dat, focus_category=[ 'atom_site', 'pdbx_struct_assembly', 'pdbx_struct_assembly_gen' ]) if ('atom_site' not in dat): return ([]) if ('pdbx_struct_assembly' not in dat) or ('pdbx_struct_assembly_gen' not in dat): return (['-']) S = dat['atom_site'] asym_id_dic = {} for i in range(len(S['id'])): asym_id_dic[S['label_asym_id'][i]] = 1 asym_id_list_asymmetric_unit = sorted(asym_id_dic.keys(), lambda x, y: cmp(x, y)) A = dat['pdbx_struct_assembly'] G = dat['pdbx_struct_assembly_gen'] Nassembly = len(A['id']) items = ('id', 'details', 'method_details', 'oligomeric_details', 'oligomeric_count') for i in range(Nassembly): assembly_id = A['id'][i] for item in (items): if (item in A.keys()): sys.stdout.write(" %s" % (A[item][i])) else: sys.stdout.write("-") sys.stdout.write(" %s" % (G['asym_id_list'][i])) sys.stdout.write(" %s" % (G['oper_expression'][i])) oper_expression_list = mmCIF.get_oper_expression_list( G['oper_expression'][i]) asym_id_list = sorted(G['asym_id_list'][i].split(','), lambda x, y: cmp(x, y)) ## Check the assembly is equal to the asymmetric unit,or not ## (1) asym_id_list[] is identical asym_id_list_asymmetric_unit[] ## (2) Length of oper_expression_list[] is 1. (Single transformation) if (asym_id_list == asym_id_list_asymmetric_unit) and (len(oper_expression_list) == 1): sys.stdout.write("#assembly_id '%s' is the asymmetric_unit." % (assembly_id)) else: assembly_id_list.append(assembly_id) sys.stdout.write("\n") dat = {} return (assembly_id_list)
#1ai1.cif.gz #1ai2.cif.gz field = file.split('.') pdb_id = field[0] pdb_id_list.append(pdb_id) print "#len(pdb_id_list):%d" % (len(pdb_id_list)) ### (3) do commands ### MISS_PDB_ID = {} for pdb_id in (pdb_id_list): iciffile = OPT['icifdir'] + '/' + pdb_id[1:3] + '/' + pdb_id + '.cif.gz' dat = {} mmCIF.read_mmCIF_file(iciffile, dat, focus_category=['pdbx_struct_assembly']) if ('pdbx_struct_assembly' in dat): assembly_id_list = dat['pdbx_struct_assembly']['id'] else: assembly_id_list = [] if (len(assembly_id_list) > 1): ogmmdir = OPT['odir'] + '/' + pdb_id[1:3] ogmmfile = ogmmdir + '/' + pdb_id + '-' + '1' + '.gmm' if (os.path.isfile(ogmmfile) == 0): assembly_id_list = get_assembly_id_list(iciffile) print "#%s %s" % (pdb_id, assembly_id_list) for assembly_id in (assembly_id_list):
pdbid_list = [] if (os.path.isdir(OPT['icifdir'])): dirlist = os.listdir(OPT['icifdir']) for subdir in (dirlist): dir = OPT['icifdir'] + '/' + subdir if (os.path.isfile(dir)): pass elif (os.path.isdir(dir)): filelist = os.listdir(dir) for file in (filelist): filefull = OPT['icifdir'] + '/' + subdir + '/' + file if (os.path.isfile(filefull)): dat = {} mmCIF.read_mmCIF_file( filefull, dat, focus_category=['pdbx_struct_assembly_gen']) findit = 0 if ('pdbx_struct_assembly_gen' in dat): if ('oper_expression' in dat['pdbx_struct_assembly_gen']): for x in (dat['pdbx_struct_assembly_gen'] ['oper_expression']): if (x.find(')(')) > 0: findit = 1 if (findit == 1): print "COMBI_OPER '%s'" % (filefull) field = file.split('.') pdbid_list.append(field[0]) of = open(OPT['olist'], 'w')