Exemplo n.º 1
0
def main():
    # Parse args
    PELE_sim_paths, output_path, proc_number, warning_threshold = parse_args()

    all_sim_it = SimIt(PELE_sim_paths)

    models_counter = {}

    for PELE_sim_path in all_sim_it:
        sim_it = SimIt(PELE_sim_path)
        sim_it.build_repo_it(output_path, 'report')

        reports = [repo for repo in sim_it.repo_it]
        with Pool(proc_number) as pool:
            results = pool.map(parallel_models_counter, reports)

        models_counter[PELE_sim_path] = sum(results)

    print('Results:')
    for sim, result in models_counter.items():
        print(' - {:<100}: {:10d} models'.format(str(sim), result))

    print('Warnings:')
    for sim, result in models_counter.items():
        if (result < warning_threshold):
            print(' - {:<100}: {:10d} models'.format(str(sim), result))
Exemplo n.º 2
0
def main():
    hb_paths, mode, lim, epochs_to_ignore, trajectories_to_ignore, \
        models_to_ignore, relative_output_path, proc_number, \
        PELE_output_path, PELE_report_name = parse_args()

    hb_paths_list = []
    if (type(hb_paths) == list):
        for hb_path in hb_paths:
            hb_paths_list += glob.glob(hb_path)
    else:
        hb_paths_list = glob.glob(hb_paths)
    general_results = {}
    for hb_path in hb_paths_list:
        df = create_df(hb_path)
        # Calculate hbond_atoms, which is a dict with PELE_ids as key and
        # corresponding lists of H bonds as values
        hbond_atoms = get_hbond_atoms_from_df(df, hb_path, epochs_to_ignore,
                                              trajectories_to_ignore,
                                              models_to_ignore)

        if (relative_output_path is not None):
            output_path = Path(hb_path).parent.joinpath(relative_output_path)
        else:
            output_path = relative_output_path

        if (mode == "count"):
            counter = count(hbond_atoms)

        elif (mode == "relative_frequency"):
            counter = count_norm(hbond_atoms)

        elif (mode == "frequent_interactions"):
            counter = count_norm(hbond_atoms)
            counter = discard_non_frequent(counter, lim)

        elif (mode == "mean_energies"):
            sim_it = SimIt(Path(hb_path).parent)
            sim_it.build_repo_it(PELE_output_path, 'report')
            reports = [repo for repo in sim_it.repo_it]

            PELE_ids = extract_PELE_ids(reports)
            metrics = extract_metrics(reports, (4, ), proc_number)

            ies = []
            for ies_chunk in metrics:
                ies.append(list(map(float, np.concatenate(ies_chunk))))

            ie_by_PELE_id = get_metric_by_PELE_id(PELE_ids, ies)

            counter = count_energy(hbond_atoms, ie_by_PELE_id)

        general_results[hb_path] = counter

    combined_results = combine_results(general_results, mode)

    generate_barplot(combined_results, mode, lim, output_path)
def main():
    # Parse args
    PELE_sim_paths, csv_file, output_name = parse_args()

    all_sim_it = SimIt(PELE_sim_paths)

    print(' - Simulations that will be analyzed:')
    for sim_path in all_sim_it:
        print('   - {}'.format(sim_path.name))

    for PELE_sim_path in all_sim_it:
        print('')
        print(' - Analyzing {}'.format(PELE_sim_path))

        csv_path = PELE_sim_path.joinpath(csv_file)

        if (not csv_path.is_file()):
            print(' - Skipping simulation because intersections csv file ' +
                  'was missing')
            continue

        data = pd.read_csv(str(csv_path))
        data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

        columns = []
        print('   - Subpockets found:')
        for col in data.columns:
            if ('_intersection' in col):
                columns.append(col)
                print('     - {}'.format(col.strip('_intersection')))

        if (len(columns) == 0):
            print(' - Skipping simulation because no subpocket was found')
            continue

        with open(str(PELE_sim_path.joinpath(output_name)), 'w') as f:
            f.write('   - Subpocket results:\n')
            for col in columns:
                intersects = data.loc[:, col].to_numpy()
                f.write('   - {}:\n'.format(col.strip('_intersection')))
                f.write('     - Mean: {: 7.2f}\n'.format(np.mean(intersects)))
                f.write('     - Min: {: 7.2f}\n'.format(np.min(intersects)))
                f.write('     - 5th percentile: {: 7.2f}\n'.format(
                    np.percentile(intersects, 5)))
                f.write('     - 1st quartile: {: 7.2f}\n'.format(
                    np.percentile(intersects, 25)))
                f.write('     - Median: {: 7.2f}\n'.format(
                    np.median(intersects)))
                f.write('     - 3rd quartile: {: 7.2f}\n'.format(
                    np.percentile(intersects, 75)))
                f.write('     - 95th percentile: {: 7.2f}\n'.format(
                    np.percentile(intersects, 95)))
                f.write('     - Max: {: 7.2f}\n'.format(np.max(intersects)))
def main():
    # Parse args
    PELE_sim_paths, filtered_hbonds_path, ic50_csv, hbonds, normalize = \
        parse_args()

    if (len(hbonds) == 0):
        raise ValueError('No H bonds to track were defined')

    all_sim_it = SimIt(PELE_sim_paths)

    print(' - Simulations that will be analyzed:')
    for sim_path in all_sim_it:
        print('   - {}'.format(sim_path.name))

    data = pd.DataFrame()
    for PELE_sim_path in all_sim_it:
        print('')
        print(' - Reading data from {}'.format(PELE_sim_path))

        if (not PELE_sim_path.joinpath(filtered_hbonds_path).is_file()):
            print(' - Skipping simulation because filtered H bonds csv file ' +
                  'was missing')
            continue

        sim_data = pd.read_csv(PELE_sim_path.joinpath(filtered_hbonds_path),
                               sep=';')

        spec_hbonds = []
        for hbond in hbonds:
            for col in sim_data.columns:
                if (hbond in col and col not in spec_hbonds):
                    spec_hbonds.append(col)
                    break

        print('   - Retrieving H bonds: {}'.format(spec_hbonds))

        sim_data = sim_data.loc[:, spec_hbonds + ['donors', 'acceptors']]
        sim_data['path'] = PELE_sim_path.name
        data = pd.concat((data, sim_data))

    print(' - Retrieving IC50 values')
    ic50 = pd.read_csv(ic50_csv)
    data = data.merge(ic50, left_on='path', right_on='path')
    data['pIC50'] = -np.log10(data.loc[:, 'IC50'] / 1000000)

    if (normalize):
        print(' - Normalizing H bonds')
        data['donors+acceptors'] = data['donors'] + data['acceptors']
        for hbond in spec_hbonds:
            data[hbond] = data[hbond] / data['donors+acceptors']

    fig, axs = plt.subplots(
        int(len(spec_hbonds) / 2) + len(spec_hbonds) % 2,
        2,
        figsize=(15, 5 * int(len(spec_hbonds) / 2 + len(spec_hbonds) % 2)))
    fig.suptitle('H bond frequency vs -pIC50')

    X_all = data.loc[:, spec_hbonds].values
    y_all = data['pIC50'].values

    for i, hbond in enumerate(spec_hbonds):
        ax = axs[int(i / 2)][i % 2]
        ax.set_title(hbond)
        if (normalize):
            ax.set_ylabel('Normalized frequency')
        else:
            ax.set_ylabel('Frequency')
        ax.set_xlabel('-pIC50')

        x_array = np.array([X[i] for X in X_all])
        ax.plot(y_all, x_array, ls='', c='r', marker='x')

        ax.set_axisbelow(True)
        ax.grid(True, color='white')
        ax.set_facecolor('lightgray')

        lin_reg = LinearRegression()
        lin_reg.fit(y_all.reshape(-1, 1), x_array.reshape(-1, 1))
        y_pred = lin_reg.predict(y_all.reshape(-1, 1))
        for x, xp, y, l in zip(x_array, y_pred, y_all, data['path'].values):
            if (xp == min(y_pred)):
                min_y = y
            if (xp == max(y_pred)):
                max_y = y

            ax.annotate(l.split('_')[-1], (y, x),
                        textcoords="offset points",
                        xytext=(0, 10),
                        ha='center')

        ax.plot((min_y, max_y), (min(y_pred), max(y_pred)), 'k--', linewidth=1)
        ax.autoscale(tight=False)

        handles = [
            mpl_patches.Rectangle((0, 0),
                                  1,
                                  1,
                                  fc="white",
                                  ec="white",
                                  lw=0,
                                  alpha=0)
        ]

        score = "r2 = {:.3f}".format(metrics.r2_score(x_array, y_pred))
        labels = []
        labels.append(score)

        ax.legend(handles,
                  labels,
                  loc='best',
                  fontsize='small',
                  fancybox=True,
                  framealpha=0.7,
                  handlelength=0,
                  handletextpad=0)

    # Empty unpaired axis
    if (i % 2 == 0):
        fig.delaxes(axs[int(i / 2)][1])

    plt.tight_layout(rect=(0, 0, 1, 0.97))
    plt.savefig('Hbond_correlations.png')
    plt.close()
def main():
    # Parse args
    PELE_sim_paths, csv_file_name, ic50_csv, percentile = parse_args()

    all_sim_it = SimIt(PELE_sim_paths)

    print(' - Simulations that will be analyzed:')
    for sim_path in all_sim_it:
        print('   - {}'.format(sim_path.name))

    columns = []
    for PELE_sim_path in all_sim_it:
        if (not PELE_sim_path.joinpath(csv_file_name).is_file()):
            print(' - Skipping simulation because subpockets csv file ' +
                  'was missing')
            continue

        data = pd.read_csv(PELE_sim_path.joinpath(csv_file_name))
        data = data.loc[:, ~data.columns.str.contains('^Unnamed')]

        for col in data.columns:
            if ('_nonpolar_intersection' in col):
                if (col not in columns):
                    columns.append(col)

    print('   - Subpockets found:')
    for col in columns:
        print('     - {}'.format(col.strip('_nonpolar_intersection')))

    if (len(columns) == 0):
        raise ValueError('Subpocket nonpolar intersections were missing in ' +
                         'the simulation paths that were supplied')

    fig, axs = plt.subplots(len(columns), 1, figsize=(20, 15))
    fig.suptitle('Subpocket-LIG non-polar volume intersection')

    for i, col in enumerate(columns):
        axs[i].set_title(col.strip('_nonpolar_intersection'))
        axs[i].set_ylabel('{}'.format(col.strip('_nonpolar_intersection')) +
                          '-LIG non-polar volume intersection ($\AA^3$)')

    subpocket_results = pd.DataFrame()
    for PELE_sim_path in all_sim_it:
        print('')
        print(' - Reading data from {}'.format(PELE_sim_path))

        if (not PELE_sim_path.joinpath(csv_file_name).is_file()):
            print(' - Skipping simulation because intersection csv file ' +
                  'is missing')
            continue

        print('   - Retrieving subpocket intersections')
        data = pd.read_csv(PELE_sim_path.joinpath(csv_file_name))

        metrics = [
            PELE_sim_path.name,
        ]
        for col in columns:
            values = data[col].values
            metrics.append(np.percentile(values, percentile))

        subpocket_results = pd.concat([
            subpocket_results,
            pd.DataFrame([metrics], columns=[
                'path',
            ] + columns)
        ])

    print(' - Retrieving IC50 values')
    ic50 = pd.read_csv(ic50_csv)
    subpocket_results = subpocket_results.merge(ic50,
                                                left_on='path',
                                                right_on='path')
    subpocket_results['pIC50'] = -np.log10(
        subpocket_results.loc[:, 'IC50'] / 1000000)

    fig, axs = plt.subplots(int(len(columns) / 2) + len(columns) % 2,
                            2,
                            figsize=(15, 5 *
                                     int(len(columns) / 2 + len(columns) % 2)))
    fig.suptitle('Subpocket non-polar occupancy vs -pIC50')

    X_all = subpocket_results.loc[:, columns].values
    y_all = subpocket_results['pIC50'].values

    for i, col in enumerate(columns):
        ax = axs[int(i / 2)][i % 2]
        ax.set_title(col.strip('_nonpolar_intersection'))
        ax.set_ylabel('{}-percentile of {} occupancies'.format(
            percentile, col.strip('_nonpolar_intersection')))
        ax.set_xlabel('-pIC50')

        x_array = np.array([X[i] for X in X_all])
        ax.plot(y_all, x_array, ls='', c='r', marker='x')

        ax.set_axisbelow(True)
        ax.grid(True, color='white')
        ax.set_facecolor('lightgray')

        lin_reg = LinearRegression()
        lin_reg.fit(y_all.reshape(-1, 1), x_array.reshape(-1, 1))
        y_pred = lin_reg.predict(y_all.reshape(-1, 1))
        for x, xp, y, path in zip(x_array, y_pred, y_all,
                                  subpocket_results['path'].values):
            if (xp == min(y_pred)):
                min_y = y
            if (xp == max(y_pred)):
                max_y = y

            ax.annotate(path, (y, x),
                        textcoords="offset points",
                        xytext=(0, 10),
                        ha='center')

        ax.plot((min_y, max_y), (min(y_pred), max(y_pred)), 'k--', linewidth=1)
        ax.autoscale(tight=False)

        handles = [
            mpl_patches.Rectangle((0, 0),
                                  1,
                                  1,
                                  fc="white",
                                  ec="white",
                                  lw=0,
                                  alpha=0)
        ]

        score = "r2 = {:.3f}".format(skmetrics.r2_score(x_array, y_pred))
        labels = []
        labels.append(score)

        ax.legend(handles,
                  labels,
                  loc='best',
                  fontsize='small',
                  fancybox=True,
                  framealpha=0.7,
                  handlelength=0,
                  handletextpad=0)

    # Empty unpaired axis
    if (i % 2 == 0):
        fig.delaxes(axs[int(i / 2)][1])

    plt.tight_layout(rect=(0, 0, 1, 0.97))
    plt.savefig('subpocket_nonpolar_correlations.png')
    plt.close()
def main():
    # Parse args
    PELE_sim_paths, hbonds_relative_path, hbonds, output_path, lig_resname = \
        parse_args()

    output_path = Path(output_path)

    hbonds_to_track = get_hbond_linkers(hbonds)

    print(' - Persistance will be calculated on H bonds :')
    print_hbonds(hbonds_to_track)

    all_sim_it = SimIt(PELE_sim_paths)

    for PELE_sim_path in all_sim_it:
        print('')
        print(' - Filtering H bonds from {}'.format(PELE_sim_path))
        hbonds_path = PELE_sim_path.joinpath(hbonds_relative_path)
        lig_rotamers_path = PELE_sim_path.joinpath('DataLocal/' +
                                                   'LigandRotamerLibs/' +
                                                   '{}'.format(lig_resname) +
                                                   '.rot.assign')

        if (not hbonds_path.is_file()):
            print(' - Skipping simulation because hbonds file was ' +
                  'missing')
            continue

        if (not lig_rotamers_path.is_file()):
            print(' - Skipping simulation because ligand rotamer library was' +
                  ' missing')
            continue

        hbond_data, n_donors, n_acceptors = extract_hbond_linkers(hbonds_path)

        print(' - Detected {} sets of H bonds'.format(len(hbond_data)))

        if (len(hbonds) == 0):
            print(' - Skipping simulation because no H bonds were found')
            continue

        persistance_by_hbond = hbond_persistance(hbond_data, hbonds_to_track)

        n_rotamers = get_ligand_rotatable_bonds(lig_rotamers_path)

        print(' - Results:')
        print('   - Ligand rotamers:           {:10d}'.format(n_rotamers))
        print('   - Ligand donors:             {:10d}'.format(n_donors))
        print('   - Ligand acceptors:          {:10d}'.format(n_acceptors))
        print('   - Total models:              {:10d}'.format(len(hbond_data)))
        if (len(hbonds_to_track) > 0):
            print('   - Maximum persistance by H bond:')
        for hb_linker in hbonds_to_track:
            print('     - {}:{}:{:20s} {:10d}'.format(
                hb_linker.chain, hb_linker.residue,
                ','.join(list(hb_linker.atoms)),
                np.max(persistance_by_hbond.get(hb_linker, [
                    0,
                ]))))

        if (len(hbonds_to_track) > 0):
            print('   - Mean persistance by H bond:')
        for hb_linker in hbonds_to_track:
            print('     - {}:{}:{:20s} {:10.1f}'.format(
                hb_linker.chain, hb_linker.residue,
                ','.join(list(hb_linker.atoms)),
                np.mean(persistance_by_hbond.get(hb_linker, [
                    0,
                ]))))

        with open(str(PELE_sim_path.joinpath(output_path)), 'w') as f:
            for hb_linker in hbonds_to_track:
                f.write('{}:{}:{};'.format(hb_linker.chain, hb_linker.residue,
                                           ','.join(hb_linker.atoms)))
                f.write(';'.join(
                    map(
                        str,
                        sorted(persistance_by_hbond.get(hb_linker, []),
                               reverse=True))))
                f.write('\n')

        with open(
                str(
                    PELE_sim_path.joinpath(
                        str(output_path).replace(output_path.suffix, '') +
                        '_summary.out')), 'w') as f:
            f.write('rotamers;donors;acceptors;models')
            for hb_linker in hbonds_to_track:
                f.write(';maxp_{}:{}:{}'.format(hb_linker.chain,
                                                hb_linker.residue,
                                                ','.join(hb_linker.atoms)))
                f.write(';meanp_{}:{}:{}'.format(hb_linker.chain,
                                                 hb_linker.residue,
                                                 ','.join(hb_linker.atoms)))
            f.write('\n')

            f.write('{};{};{};'.format(n_rotamers, n_donors, n_acceptors))
            f.write('{}'.format(len(hbond_data)))
            for hb_linker in hbonds_to_track:
                f.write(';{:d}'.format(
                    np.max(persistance_by_hbond.get(hb_linker, [
                        0,
                    ]))))
                f.write(';{:.1f}'.format(
                    np.mean(persistance_by_hbond.get(hb_linker, [
                        0,
                    ]))))
Exemplo n.º 7
0
def main():
    # Parse args
    PELE_sim_paths, PELE_output_path, proc_number, output_relative_path, \
        ie_col, topology_relative_path, lig_resname = parse_args()

    all_sim_it = SimIt(PELE_sim_paths)

    p_function = partial(parallel_metrics_getter, ie_col)

    for PELE_sim_path in all_sim_it:
        sim_it = SimIt(PELE_sim_path)
        sim_it.build_repo_it(PELE_output_path, 'report')
        print(' - Analyzing {}'.format(PELE_sim_path))

        topology_path = PELE_sim_path.joinpath(topology_relative_path)
        if (not topology_path.is_file()):
            print(' - Skipping simulation because topology file with ' +
                  'connectivity was missing')
            continue

        reports = [repo for repo in sim_it.repo_it]
        with Pool(proc_number) as pool:
            results = pool.map(p_function, reports)

        min_te = 0
        min_ie = 0
        min_te_PDB_id = None
        min_ie_PDB_id = None
        for repo, (tes, ies) in zip(reports, results):
            for i, te in enumerate(tes):
                if (float(te) < min_te):
                    min_te = float(te)
                    min_te_PDB_id = (repo.parent,
                                     int(''.join(filter(
                                         str.isdigit, repo.name))), i)
            for i, ie in enumerate(ies):
                if (float(ie) < min_ie):
                    min_ie = float(ie)
                    min_ie_PDB_id = (repo.parent,
                                     int(''.join(filter(
                                         str.isdigit, repo.name))), i)

        ligand_heavy_atoms, ligand_mass = extract_ligand_properties(
            topology_path, lig_resname)

        output_path = PELE_sim_path.joinpath(output_relative_path)

        if (not output_path.is_dir()):
            os.mkdir(str(output_path))

        with open(str(output_path.joinpath('results.out')), 'w') as f:
            f.write('lig_heavy_atoms,lig_mass,')
            f.write('best_total_energy,best_interaction_energy\n')
            f.write('{},{:.3f},{},{}\n'.format(ligand_heavy_atoms, ligand_mass,
                                               min_te, min_ie))

        if (min_te_PDB_id is not None):
            t = md.load(str(min_te_PDB_id[0].joinpath(
                'trajectory_{}.xtc'.format(min_te_PDB_id[1]))),
                        top=str(topology_path))
            t[min_te_PDB_id[2]].save_pdb(
                str(output_path.joinpath('best_total_energy.pdb')))

        if (min_ie_PDB_id is not None):
            t = md.load(str(min_ie_PDB_id[0].joinpath(
                'trajectory_{}.xtc'.format(min_ie_PDB_id[1]))),
                        top=str(topology_path))
            t[min_ie_PDB_id[2]].save_pdb(
                str(output_path.joinpath('best_interaction_energy.pdb')))