def perform_screen(work_name, user_target, center_x, center_y, center_z, size_x, size_y, size_z, mol_db, pdb_path, res_path): """ 用户指定筛选中心以及盒子大小 :param work_name: :param center_x: :param center_y: :param center_z: :param size_x: :param size_y: :param size_z: :param mol_db: :param pdb_path: :param res_path: :return: """ screen_status(work_name, status='computing') email = UserProfile.objects.filter(id=VirtualScreen.objects.filter( pdb_file=pdb_path.replace('/home/wz/pywork/try27/drug/media', '') ).values()[0]['user_id']).values()[0]['email'] screen_out = 'screen_out.csv' res = os.path.join(res_path, 'res') if not os.path.exists(res): os.mkdir(res) ligand_db = os.path.join(drugdb, mol_db) os.system("python %s/extra_apps/vina/prepare_receptor4.py -r %s" " -A checkhydrogens " % (BASE_DIR, pdb_path)) pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt' if os.path.exists(pdbqt): os.system("mv %s %s" % (pdbqt, res_path)) if len(user_target) > 1: user_target = user_target.split(';') target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs')) smile_file = os.path.join(ligand_db, 'smiles.csv') df = pd.read_csv(smile_file, header=None, encoding='utf-8') smile_data = df.values.tolist() curr_proc = mp.current_process() curr_proc.daemon = False p = mp.Pool(processes=mp.cpu_count()) curr_proc.daemon = True pool_lst = [] for ligand in smile_data: smiles = ligand[1] targets = p.apply_async(pred, args=(smiles, target_list)) pool_lst.append([ligand[0], targets]) p.close() p.join() pool_lst = [[n[0], n[1].get()] for n in pool_lst] for target in pool_lst: if target[1]: pred_target = [] for pred_tar in target[1]: pred_target.append(pred_tar['chembl_id']) same_target = [l for l in pred_target if l in user_target] if same_target: ligand = target[0].split('.')[0] + '.pdbqt' ligand_path = os.path.join(ligand_db, ligand) os.system( "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s" " --center_z %s --size_x %s --size_y %s --size_z %s" % ('vina', res_path, pdbqt, ligand_path, center_x, center_y, center_z, size_x, size_y, size_z)) os.system("mv %s %s" % (ligand_path.split('.')[0] + '_out.pdbqt', res)) os.system( "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" % (BASE_DIR, os.path.join( res, ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt'))) else: ligand_lst = os.listdir(ligand_db) ligand_lst = [v for v in ligand_lst if v.endswith('pdbqt')] for ligand_ in ligand_lst: ligand_path = os.path.join(ligand_db, ligand_) os.system( "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s" " --center_z %s --size_x %s --size_y %s --size_z %s" % ('vina', res_path, pdbqt, ligand_path, center_x, center_y, center_z, size_x, size_y, size_z)) os.system("mv %s %s" % (ligand_path.split('.')[0] + '_out.pdbqt', res)) os.system( "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" % (BASE_DIR, os.path.join( res, ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt'))) res_lst = os.listdir(res) reg = 'REMARK VINA RESULT:(.*?)\n' re_reg = re.compile(reg) screen_res = [] insert_lst = [] for out in res_lst[:]: out_path = os.path.join(res, out) with open(out_path, 'r') as f: data = f.read() out_lst = re_reg.findall(data) if out_lst: med = [] for model in out_lst: med.append(float(model.split()[0])) file_name = out.split('_out')[0] screen_res.append([file_name, min(med)]) insert_lst.append( Screen(work_name=work_name, screen_cat='screen', affinity=min(med), path=os.path.join(res.split('media/')[1], out[:-2]))) if screen_res: Screen.objects.bulk_create(insert_lst) arr = np.array(screen_res) df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)']) df = df.sort_values("Affinity (kcal/mol)", ascending=False) df.to_csv(screen_out, index=False) os.system("mv %s %s" % (screen_out, res)) email_status(email, res_path + '/res/screen_out.csv') else: email_status(email, '') screen_status(work_name=work_name, status='completed')
def perform_screen2_user(work_name, user_target, user_db_name, pdb_path, resi_path, res_path): """ 用户提供数据库以及残基进行筛选 :param work_name: :param user_db_name: :param pdb_path: :param resi_path: :param res_path: :return: """ # screen2_status(work_name, status='computing') with open(resi_path, 'r') as f: lines = f.readlines() lines = [n.rstrip() for n in lines if len(n) > 1] x, y, z = [], [], [] for n in lines: x.append(float(n[30:38])) y.append(float(n[38:46])) z.append(float(n[46:54])) center_x = float('%.3f' % (sum(x) / len(x))) center_y = float('%.3f' % (sum(y) / len(y))) center_z = float('%.3f' % (sum(z) / len(z))) size_x = max(x) - min(x) size_y = max(y) - min(y) size_z = max(z) - min(z) screen_out = 'screen_out.csv' user_db = os.path.join(res_path, 'userdb') res = os.path.join(res_path, 'res') if not os.path.exists(user_db): os.mkdir(user_db) if not os.path.exists(res): os.mkdir(res) input_file = os.path.join(res_path, user_db_name) gen_user_db_qt_smiles(input_file, user_db) os.system("python %s/extra_apps/vina/prepare_receptor4.py -r /%s" " -A checkhydrogens " % (BASE_DIR, pdb_path)) pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt' if os.path.exists(pdbqt): os.system("mv %s %s" % (pdbqt, res_path)) if len(user_target) > 1: user_target = user_target.split(';') target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs')) smile_file = os.path.join(user_db, 'smiles.csv') df = pd.read_csv(smile_file, header=None, encoding='utf-8') smile_data = df.values.tolist() curr_proc = mp.current_process() curr_proc.daemon = False p = mp.Pool(processes=mp.cpu_count()) curr_proc.daemon = True pool_lst = [] for ligand in smile_data: smiles = ligand[1] targets = p.apply_async(pred, args=(smiles, target_list)) pool_lst.append([ligand[0], targets]) p.close() p.join() pool_lst = [[n[0], n[1].get()] for n in pool_lst] for target in pool_lst: if target[1]: pred_target = [] for pred_tar in target[1]: pred_target.append(pred_tar['chembl_id']) same_target = [l for l in pred_target if l in user_target] if same_target: ligand = target[0].split('.')[0] + '.pdbqt' ligand_path = os.path.join(user_db, ligand) if os.path.exists(ligand_path): os.system( "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s" " --center_z %s --size_x %s --size_y %s --size_z %s" % ('vina', res_path, pdbqt, ligand_path, center_x, center_y, center_z, size_x, size_y, size_z)) os.system( "mv %s %s" % (ligand_path.split('.')[0] + '_out.pdbqt', res)) os.system( "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" % (BASE_DIR, os.path.join( res, ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt'))) else: ligand_lst = os.listdir(user_db) ligand_lst = [v for v in ligand_lst if v.endswith('pdbqt')] for ligand_ in ligand_lst: ligand_path = os.path.join(user_db, ligand_) os.system( "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s" " --center_z %s --size_x %s --size_y %s --size_z %s" % ('vina', res_path, pdbqt, ligand_path, center_x, center_y, center_z, size_x, size_y, size_z)) os.system("mv %s %s" % (ligand_path.split('.')[0] + '_out.pdbqt', res)) os.system( "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" % (BASE_DIR, os.path.join( res, ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt'))) res_lst = os.listdir(res) reg = 'REMARK VINA RESULT:(.*?)\n' re_reg = re.compile(reg) screen_res = [] insert_lst = [] for out in res_lst[:]: out_path = os.path.join(res, out) with open(out_path, 'r') as f: data = f.read() out_lst = re_reg.findall(data) if out_lst: med = [] for model in out_lst: med.append(float(model.split()[0])) file_name = out.split('_out')[0] screen_res.append([file_name, min(med)]) insert_lst.append( Screen(work_name=work_name, screen_cat='screen2', affinity=min(med), path=os.path.join(res.split('media/')[1], out[:-2]))) if screen_res: Screen.objects.bulk_create(insert_lst) arr = np.array(screen_res) df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)']) df = df.sort_values("Affinity (kcal/mol)", ascending=False) df.to_csv(screen_out, index=False) os.system("mv %s %s" % (screen_out, res))
def perform_screen_user(work_name, center_x, center_y, center_z, size_x, size_y, size_z, user_db_name, pdb_path, res_path): """ 用户提供数据库以及中心坐标和盒子大小进行筛选 :param work_name: :param center_x: :param center_y: :param center_z: :param size_x: :param size_y: :param size_z: :param user_db_name: :param pdb_path: :param res_path: :return: """ screen_status(work_name, status='computing') screen_out = 'screen_out.csv' user_db = os.path.join(res_path, 'userdb') res = os.path.join(res_path, 'res') if not os.path.exists(user_db): os.mkdir(user_db) if not os.path.exists(res): os.mkdir(res) input_file = os.path.join(res_path, user_db_name) gen_user_db_qt_smiles(input_file, user_db) os.system("python %s/extra_apps/vina/prepare_receptor4.py -r /%s" " -A checkhydrogens " % (BASE_DIR, pdb_path)) pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt' os.system("mv %s %s" % (pdbqt, res_path)) target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs')) smile_file = os.path.join(user_db, 'smiles.csv') df = pd.read_csv(smile_file, header=None, encoding='utf-8') smile_data = df.values.tolist() curr_proc = mp.current_process() curr_proc.daemon = False p = mp.Pool(processes=mp.cpu_count()) curr_proc.daemon = True pool_lst = [] for ligand in smile_data: smiles = ligand[1] targets = p.apply_async(pred, args=(smiles, target_list)) pool_lst.append([ligand[0], targets]) p.close() p.join() pool_lst = [[n[0], n[1].get()] for n in pool_lst] for target in pool_lst: if target[1]: ligand = target[0].split('.')[0] + '.pdbqt' ligand_path = os.path.join(user_db, ligand) if os.path.exists(ligand_path): os.system( "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s" " --center_z %s --size_x %s --size_y %s --size_z %s" % (vina_path, res_path, pdbqt, ligand_path, center_x, center_y, center_z, size_x, size_y, size_z)) os.system("mv %s %s" % (ligand_path.split('.')[0] + '_out.pdbqt', res)) os.system( "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" % (BASE_DIR, os.path.join( res, ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt'))) res_lst = os.listdir(res) reg = 'REMARK VINA RESULT:(.*?)\n' re_reg = re.compile(reg) screen_res = [] insert_lst = [] for out in res_lst[:]: out_path = os.path.join(res, out) with open(out_path, 'r') as f: data = f.read() out_lst = re_reg.findall(data) if out_lst: med = [] for model in out_lst: med.append(float(model.split()[0])) file_name = out.split('_out')[0] screen_res.append([file_name, min(med)]) insert_lst.append( Screen(work_name=work_name, screen_cat='screen', affinity=min(med), path=os.path.join(res.split('media/')[1], out[:-2]))) Screen.objects.bulk_create(insert_lst) arr = np.array(screen_res) df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)']) df = df.sort_values("Affinity (kcal/mol)", ascending=False) df.to_csv(screen_out, index=False) os.system("mv %s %s" % (screen_out, res)) screen_status(work_name, status='completed')