def process_results(this_folders):
    results_matrix = {}
    #fig = plt.figure(figsize=(18, 18))
    # Plot by temperature
    for folder in this_folders:
        subfolder_results = []
        for subfolder in subfolders:
            results_file = os.path.join(folder, subfolder, "results", "results.json")
            if os.path.exists(results_file):
                results = load_dic_in_json(results_file)
                print results_file, results["selected"][results["best_clustering"]]["clustering"]["number_of_clusters"],\
                results["selected"][results["best_clustering"]]["evaluation"]["Noise level"]
                subfolder_results.append(results["selected"][results["best_clustering"]]["clustering"]["number_of_clusters"])
            else:
                subfolder_results.append(0.)
        results_matrix[folder] = subfolder_results
        plt.plot(range(len(subfolders)), subfolder_results, linewidth=2, label = folder.split("/")[2])
    plt.legend(loc=2,prop={'size':6})
    plt.show()

    ## Now plot by trajectory length
    for i in range(len(subfolders)):
        plot_by_size = []
        for folder in this_folders:
            plot_by_size.append(results_matrix[folder][i])
        plt.plot(range(len(this_folders)), plot_by_size, linewidth=2,label = subfolders[i])
    plt.legend(prop={'size':6})
    plt.show()
def process_campari_vs_profasi(campari, profasi):
    data = []
    for i in range(len(campari)):
        A_folder = campari[i]
        B_folder = profasi[i]
        results_file =os.path.join("comparisons","campari_vs_profasi", "%svs%s"%(A_folder, B_folder), "results", "conf_space_comp.json")
        if os.path.exists(results_file):
            data.append(load_dic_in_json(results_file)["overlap"])
        else:
            data.append(0.)
            print results_file, "not found"

    plt.plot(range(len(data)), data, linewidth=2)
    plt.show()
def process_matrix(folders, image_path, sim_type):
    data = []
    for i in range(0,len(folders)-1):
        A_folder = folders[i]
        for j in range(i+1,len(campari_folders)):
            B_folder = folders[j]
            results_file = os.path.join("comparisons",sim_type, "%svs%s"%(A_folder, B_folder), "results", "conf_space_comp.json")
            print results_file
            if os.path.exists(results_file):
                data.append(load_dic_in_json(results_file)["overlap"])
            else:
                data.append(0.)
    print data
    matrixToImage(CondensedMatrix(data), image_path, diagonal_value=1.)
def process_matrix_stats(this_folders):
    mean = []
    stddev = []
    for folder in this_folders:
        subfolder = "9000"
        matrix_stats_file = os.path.join(folder, subfolder, "matrix", "statistics.json")
        if os.path.exists(matrix_stats_file):
            stats = load_dic_in_json(matrix_stats_file)
            mean.append(stats["Mean"])
            stddev.append(stats["Std. Dev."])
        else:
            mean.append(0)
            stddev.append(0)
        plt.errorbar(range(len(mean)), mean, yerr = stddev, linewidth=2)
    plt.show()
"""
Created on 27/03/2014

@author: victor
"""
import copy
import os

from tools import  load_dic_in_json, create_dir, save_dic_in_json

script_template = load_dic_in_json("template.json")

campari_trajs = [
                 "trajectories/campari/N_000_.pdb",
                 "trajectories/campari/N_001_.pdb",
                 "trajectories/campari/N_002_.pdb",
                 "trajectories/campari/N_003_.pdb",
                 "trajectories/campari/N_004_.pdb",
                 "trajectories/campari/N_005_.pdb",
                 "trajectories/campari/N_006_.pdb",
                 "trajectories/campari/N_007_.pdb",
                 "trajectories/campari/N_008_.pdb",
                 "trajectories/campari/N_009_.pdb",
                 "trajectories/campari/N_010_.pdb",
                 "trajectories/campari/N_011_.pdb",
                 "trajectories/campari/N_012_.pdb",
                 "trajectories/campari/N_013_.pdb",
                 "trajectories/campari/N_014_.pdb",
                 "trajectories/campari/N_015_.pdb"
                 ]
示例#6
0
        selection = filterRecords("'L1  Binding Ene' < -226 and 'L1  Binding Ene' > -424 and 'L1(24.954.352.7)' < 14.1 and 'L1(24.954.352.7)' > 5.9", records)
        genSingleTrajFast(FILTERED_PDB_FILE, records, selection)
        genMetricsFile(METRICS_FILE, ["L1(24.954.352.7)","L1  Binding Ene"], selection)
        metrics = genMetrics(["L1(24.954.352.7)","L1  Binding Ene"], selection).T

    metrics = numpy.loadtxt(METRICS_FILE).T
    
    #--------------------------------  
    # Prepare the clustering for this guy
    #--------------------------------
    ## Load template and modify its contents for this case
    CLUSTERING_PATH = os.path.join(RESULTS_PATH,"%s_%s_clustering"%(options.drug, options.protein))
    MAX_CLUSTERS = 10
    SCRIPT_PATH = os.path.join(RESULTS_PATH,"clustering.json")
    OUT_FILE = os.path.join(RESULTS_PATH, "clustering.out")
    script = load_dic_in_json(options.template)
    script["global"]["workspace"]["base"] = CLUSTERING_PATH
    script["data"]["files"].append(FILTERED_PDB_FILE)
    script["clustering"]["evaluation"]["maximum_clusters"] = MAX_CLUSTERS
    save_dic_in_json(script, SCRIPT_PATH)
    os.system("python -m pyproct.main %s > %s"%(SCRIPT_PATH, OUT_FILE))
    best_clustering = Clustering.from_dic(get_best_clustering(CLUSTERING_PATH)["clustering"])
     
    #--------------------------------
    # Now calculate the values
    #--------------------------------
    results = {}
    for cluster in best_clustering.clusters:
        energies = metrics[1][cluster.all_elements]
        distances = metrics[0][cluster.all_elements]
        results[cluster.id] = {}
# RMSD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb"))
#
# RCD_script = copy.deepcopy(template_script)
# RCD_script["global"]["workspace"]["base"] = os.path.join("RDCvsRMSD", "campari", "RDC", "clustering")
# RCD_script["data"]["matrix"]["method"] = "load"
# RCD_script["data"]["matrix"]["parameters"]["path"] = os.path.join("RDCvsRMSD", "campari", "RDC", "matrix")
# RCD_script["data"]["files"].append(os.path.join("RDCvsRMSD", "campari.pdb"))
#
# tools.save_dic_in_json(RCD_script, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json"))
# tools.save_dic_in_json(RMSD_script, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json"))
#
# os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RDC", "script.json")))
# os.system("python %s %s "%(PYPROCT, os.path.join("RDCvsRMSD", "campari", "RMSD", "script.json")))


results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RDC_refined", "clustering","results","results.json"))
RDC_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "RMSD_refined", "clustering","results","results.json"))
RMSD_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_clustering = Clustering.from_dic(results["selected"][results["best_clustering"]]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_bad_score = Clustering.from_dic(results["selected"]["clustering_0098"]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
Dihedral_medium_score = Clustering.from_dic(results["selected"]["clustering_0056"]["clustering"]).gen_class_list(number_of_elements = 5926)

results = tools.load_dic_in_json(os.path.join("RDCvsRMSD", "campari", "Dihedral", "clustering","results","results.json"))
    records = []
    processFile(traj_pdb, records, True)
    all_metrics = genMetrics(plots["totale_spawning"], records)
    matrix_data = scipy.spatial.distance.pdist(normalize_metrics(all_metrics), 'euclidean')
    m_handler = MatrixHandler()
    m_handler.distance_matrix = CondensedMatrix(matrix_data)
    matrix_file = os.path.join(base_dir, TENERGY_SPAWN_MATRIX)
    m_handler.saveMatrix(matrix_file)

    #######################################################################################################################
    # Cluster by metrics
    #######################################################################################################################
    print "* Spawning - totalE clustering"
    be_rmsd_clustering_script_path = os.path.join(base_dir, 'scripts', CLUSTERING_SPAWN_TOTE_SCRIPT)
    working_directory = os.path.join(base_dir, TOTALE_SPAWN_WORKSPACE)
    params = load_dic_in_json(be_rmsd_clustering_script_path)
    params['global']['workspace']['base'] = working_directory
    params['data']['files'] = [os.path.join(os.getcwd(), traj_pdb)]
    params['data']['matrix']['parameters']['path'] = matrix_file
    save_dic_in_json(params, be_rmsd_clustering_script_path)
    use_pyproct(working_directory, be_rmsd_clustering_script_path)

    #######################################################################################################################
    # Get 5 representatives. 2 strategies.
    #######################################################################################################################

    #####################################################################
    #####################################################################
    # Work only with best clustering/ find best cluster
    #####################################################################
    #####################################################################