Пример #1
0
def perform_screen(work_name, user_target, center_x, center_y, center_z,
                   size_x, size_y, size_z, mol_db, pdb_path, res_path):
    """
    用户指定筛选中心以及盒子大小
    :param work_name:
    :param center_x:
    :param center_y:
    :param center_z:
    :param size_x:
    :param size_y:
    :param size_z:
    :param mol_db:
    :param pdb_path:
    :param res_path:
    :return:
    """
    screen_status(work_name, status='computing')
    email = UserProfile.objects.filter(id=VirtualScreen.objects.filter(
        pdb_file=pdb_path.replace('/home/wz/pywork/try27/drug/media', '')
    ).values()[0]['user_id']).values()[0]['email']
    screen_out = 'screen_out.csv'
    res = os.path.join(res_path, 'res')

    if not os.path.exists(res):
        os.mkdir(res)

    ligand_db = os.path.join(drugdb, mol_db)

    os.system("python %s/extra_apps/vina/prepare_receptor4.py -r %s"
              " -A checkhydrogens " % (BASE_DIR, pdb_path))
    pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt'
    if os.path.exists(pdbqt):
        os.system("mv %s %s" % (pdbqt, res_path))
    if len(user_target) > 1:
        user_target = user_target.split(';')
        target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs'))
        smile_file = os.path.join(ligand_db, 'smiles.csv')
        df = pd.read_csv(smile_file, header=None, encoding='utf-8')
        smile_data = df.values.tolist()
        curr_proc = mp.current_process()
        curr_proc.daemon = False
        p = mp.Pool(processes=mp.cpu_count())
        curr_proc.daemon = True
        pool_lst = []
        for ligand in smile_data:
            smiles = ligand[1]
            targets = p.apply_async(pred, args=(smiles, target_list))
            pool_lst.append([ligand[0], targets])
        p.close()
        p.join()
        pool_lst = [[n[0], n[1].get()] for n in pool_lst]
        for target in pool_lst:
            if target[1]:
                pred_target = []
                for pred_tar in target[1]:
                    pred_target.append(pred_tar['chembl_id'])
                same_target = [l for l in pred_target if l in user_target]
                if same_target:
                    ligand = target[0].split('.')[0] + '.pdbqt'
                    ligand_path = os.path.join(ligand_db, ligand)
                    os.system(
                        "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s"
                        " --center_z %s --size_x %s --size_y %s --size_z %s" %
                        ('vina', res_path, pdbqt, ligand_path, center_x,
                         center_y, center_z, size_x, size_y, size_z))
                    os.system("mv %s %s" %
                              (ligand_path.split('.')[0] + '_out.pdbqt', res))
                    os.system(
                        "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" %
                        (BASE_DIR,
                         os.path.join(
                             res,
                             ligand_path.split('/')[-1].split('.')[0] +
                             '_out.pdbqt')))
    else:
        ligand_lst = os.listdir(ligand_db)
        ligand_lst = [v for v in ligand_lst if v.endswith('pdbqt')]
        for ligand_ in ligand_lst:
            ligand_path = os.path.join(ligand_db, ligand_)
            os.system(
                "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s"
                " --center_z %s --size_x %s --size_y %s --size_z %s" %
                ('vina', res_path, pdbqt, ligand_path, center_x, center_y,
                 center_z, size_x, size_y, size_z))
            os.system("mv %s %s" %
                      (ligand_path.split('.')[0] + '_out.pdbqt', res))
            os.system(
                "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" %
                (BASE_DIR,
                 os.path.join(
                     res,
                     ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt')))
    res_lst = os.listdir(res)
    reg = 'REMARK VINA RESULT:(.*?)\n'
    re_reg = re.compile(reg)
    screen_res = []
    insert_lst = []
    for out in res_lst[:]:
        out_path = os.path.join(res, out)
        with open(out_path, 'r') as f:
            data = f.read()
        out_lst = re_reg.findall(data)
        if out_lst:
            med = []
            for model in out_lst:
                med.append(float(model.split()[0]))
            file_name = out.split('_out')[0]
            screen_res.append([file_name, min(med)])
            insert_lst.append(
                Screen(work_name=work_name,
                       screen_cat='screen',
                       affinity=min(med),
                       path=os.path.join(res.split('media/')[1], out[:-2])))
    if screen_res:
        Screen.objects.bulk_create(insert_lst)
        arr = np.array(screen_res)
        df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)'])
        df = df.sort_values("Affinity (kcal/mol)", ascending=False)
        df.to_csv(screen_out, index=False)
        os.system("mv %s %s" % (screen_out, res))
        email_status(email, res_path + '/res/screen_out.csv')
    else:
        email_status(email, '')
    screen_status(work_name=work_name, status='completed')
Пример #2
0
def perform_screen2_user(work_name, user_target, user_db_name, pdb_path,
                         resi_path, res_path):
    """
    用户提供数据库以及残基进行筛选
    :param work_name:
    :param user_db_name:
    :param pdb_path:
    :param resi_path:
    :param res_path:
    :return:
    """
    # screen2_status(work_name, status='computing')

    with open(resi_path, 'r') as f:
        lines = f.readlines()
    lines = [n.rstrip() for n in lines if len(n) > 1]

    x, y, z = [], [], []
    for n in lines:
        x.append(float(n[30:38]))
        y.append(float(n[38:46]))
        z.append(float(n[46:54]))
    center_x = float('%.3f' % (sum(x) / len(x)))
    center_y = float('%.3f' % (sum(y) / len(y)))
    center_z = float('%.3f' % (sum(z) / len(z)))
    size_x = max(x) - min(x)
    size_y = max(y) - min(y)
    size_z = max(z) - min(z)

    screen_out = 'screen_out.csv'
    user_db = os.path.join(res_path, 'userdb')
    res = os.path.join(res_path, 'res')

    if not os.path.exists(user_db):
        os.mkdir(user_db)

    if not os.path.exists(res):
        os.mkdir(res)
    input_file = os.path.join(res_path, user_db_name)
    gen_user_db_qt_smiles(input_file, user_db)

    os.system("python %s/extra_apps/vina/prepare_receptor4.py -r /%s"
              " -A checkhydrogens " % (BASE_DIR, pdb_path))
    pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt'
    if os.path.exists(pdbqt):
        os.system("mv %s %s" % (pdbqt, res_path))
    if len(user_target) > 1:
        user_target = user_target.split(';')
        target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs'))
        smile_file = os.path.join(user_db, 'smiles.csv')
        df = pd.read_csv(smile_file, header=None, encoding='utf-8')
        smile_data = df.values.tolist()
        curr_proc = mp.current_process()
        curr_proc.daemon = False
        p = mp.Pool(processes=mp.cpu_count())
        curr_proc.daemon = True
        pool_lst = []
        for ligand in smile_data:
            smiles = ligand[1]
            targets = p.apply_async(pred, args=(smiles, target_list))
            pool_lst.append([ligand[0], targets])
        p.close()
        p.join()
        pool_lst = [[n[0], n[1].get()] for n in pool_lst]
        for target in pool_lst:
            if target[1]:
                pred_target = []
                for pred_tar in target[1]:
                    pred_target.append(pred_tar['chembl_id'])
                same_target = [l for l in pred_target if l in user_target]
                if same_target:
                    ligand = target[0].split('.')[0] + '.pdbqt'
                    ligand_path = os.path.join(user_db, ligand)
                    if os.path.exists(ligand_path):
                        os.system(
                            "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s"
                            " --center_z %s --size_x %s --size_y %s --size_z %s"
                            % ('vina', res_path, pdbqt, ligand_path, center_x,
                               center_y, center_z, size_x, size_y, size_z))
                        os.system(
                            "mv %s %s" %
                            (ligand_path.split('.')[0] + '_out.pdbqt', res))
                        os.system(
                            "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v"
                            % (BASE_DIR,
                               os.path.join(
                                   res,
                                   ligand_path.split('/')[-1].split('.')[0] +
                                   '_out.pdbqt')))

    else:
        ligand_lst = os.listdir(user_db)
        ligand_lst = [v for v in ligand_lst if v.endswith('pdbqt')]
        for ligand_ in ligand_lst:
            ligand_path = os.path.join(user_db, ligand_)
            os.system(
                "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s"
                " --center_z %s --size_x %s --size_y %s --size_z %s" %
                ('vina', res_path, pdbqt, ligand_path, center_x, center_y,
                 center_z, size_x, size_y, size_z))
            os.system("mv %s %s" %
                      (ligand_path.split('.')[0] + '_out.pdbqt', res))
            os.system(
                "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" %
                (BASE_DIR,
                 os.path.join(
                     res,
                     ligand_path.split('/')[-1].split('.')[0] + '_out.pdbqt')))
    res_lst = os.listdir(res)
    reg = 'REMARK VINA RESULT:(.*?)\n'
    re_reg = re.compile(reg)
    screen_res = []
    insert_lst = []
    for out in res_lst[:]:
        out_path = os.path.join(res, out)
        with open(out_path, 'r') as f:
            data = f.read()
        out_lst = re_reg.findall(data)
        if out_lst:
            med = []
            for model in out_lst:
                med.append(float(model.split()[0]))
            file_name = out.split('_out')[0]
            screen_res.append([file_name, min(med)])
            insert_lst.append(
                Screen(work_name=work_name,
                       screen_cat='screen2',
                       affinity=min(med),
                       path=os.path.join(res.split('media/')[1], out[:-2])))
    if screen_res:
        Screen.objects.bulk_create(insert_lst)
        arr = np.array(screen_res)
        df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)'])
        df = df.sort_values("Affinity (kcal/mol)", ascending=False)
        df.to_csv(screen_out, index=False)
        os.system("mv %s %s" % (screen_out, res))
Пример #3
0
def perform_screen_user(work_name, center_x, center_y, center_z, size_x,
                        size_y, size_z, user_db_name, pdb_path, res_path):
    """
    用户提供数据库以及中心坐标和盒子大小进行筛选
    :param work_name:
    :param center_x:
    :param center_y:
    :param center_z:
    :param size_x:
    :param size_y:
    :param size_z:
    :param user_db_name:
    :param pdb_path:
    :param res_path:
    :return:
    """
    screen_status(work_name, status='computing')

    screen_out = 'screen_out.csv'
    user_db = os.path.join(res_path, 'userdb')
    res = os.path.join(res_path, 'res')

    if not os.path.exists(user_db):
        os.mkdir(user_db)

    if not os.path.exists(res):
        os.mkdir(res)

    input_file = os.path.join(res_path, user_db_name)
    gen_user_db_qt_smiles(input_file, user_db)

    os.system("python %s/extra_apps/vina/prepare_receptor4.py -r /%s"
              " -A checkhydrogens " % (BASE_DIR, pdb_path))
    pdbqt = pdb_path.split('/')[-1].split('.')[0] + '.pdbqt'
    os.system("mv %s %s" % (pdbqt, res_path))

    target_list = os.listdir(os.path.join(TARGET_FOLDER_BASE, 'maccs'))
    smile_file = os.path.join(user_db, 'smiles.csv')
    df = pd.read_csv(smile_file, header=None, encoding='utf-8')
    smile_data = df.values.tolist()
    curr_proc = mp.current_process()
    curr_proc.daemon = False
    p = mp.Pool(processes=mp.cpu_count())
    curr_proc.daemon = True
    pool_lst = []
    for ligand in smile_data:
        smiles = ligand[1]
        targets = p.apply_async(pred, args=(smiles, target_list))
        pool_lst.append([ligand[0], targets])
    p.close()
    p.join()
    pool_lst = [[n[0], n[1].get()] for n in pool_lst]
    for target in pool_lst:
        if target[1]:
            ligand = target[0].split('.')[0] + '.pdbqt'
            ligand_path = os.path.join(user_db, ligand)
            if os.path.exists(ligand_path):
                os.system(
                    "%s --receptor %s/%s --ligand %s --center_x %s --center_y %s"
                    " --center_z %s --size_x %s --size_y %s --size_z %s" %
                    (vina_path, res_path, pdbqt, ligand_path, center_x,
                     center_y, center_z, size_x, size_y, size_z))
                os.system("mv %s %s" %
                          (ligand_path.split('.')[0] + '_out.pdbqt', res))
                os.system(
                    "python %s/extra_apps/vina/pdbqt_to_pdb.py -f %s -v" %
                    (BASE_DIR,
                     os.path.join(
                         res,
                         ligand_path.split('/')[-1].split('.')[0] +
                         '_out.pdbqt')))

    res_lst = os.listdir(res)
    reg = 'REMARK VINA RESULT:(.*?)\n'
    re_reg = re.compile(reg)
    screen_res = []
    insert_lst = []
    for out in res_lst[:]:
        out_path = os.path.join(res, out)
        with open(out_path, 'r') as f:
            data = f.read()
        out_lst = re_reg.findall(data)
        if out_lst:
            med = []
            for model in out_lst:
                med.append(float(model.split()[0]))
            file_name = out.split('_out')[0]
            screen_res.append([file_name, min(med)])
            insert_lst.append(
                Screen(work_name=work_name,
                       screen_cat='screen',
                       affinity=min(med),
                       path=os.path.join(res.split('media/')[1], out[:-2])))
    Screen.objects.bulk_create(insert_lst)
    arr = np.array(screen_res)
    df = pd.DataFrame(arr, columns=['id', 'Affinity (kcal/mol)'])
    df = df.sort_values("Affinity (kcal/mol)", ascending=False)
    df.to_csv(screen_out, index=False)
    os.system("mv %s %s" % (screen_out, res))

    screen_status(work_name, status='completed')