Пример #1
0
    def run(cls, rank, mpi_comm, parameters, workspace_handler, timer,
            generated_files):
        DataDriver.timer = timer

        # Wait until the trajectories are loaded.
        # Ideally this and a lot of other things may be done
        # in parallel too.
        # TODO: Atomic looks not to be pickable, which doesn't allow
        # us to directly share the data_handler
        data_handler = cls.load_data(parameters)
        mpi_comm.Barrier()

        # Load the trajectories
        if rank == 0:
            matrix_handler = cls.calc_matrix(data_handler,
                                             parameters["matrix"])
            matrix_contents = matrix_handler.distance_matrix.get_data()
        else:
            matrix_contents = None
            matrix_handler = MatrixHandler(None, parameters["matrix"])
        mpi_comm.Barrier()
        # Wait until matrix is calculated.

        # Then broadcast its contents
        matrix_contents = mpi_comm.bcast(matrix_contents, root=0)
        if rank != 0:
            matrix_handler.distance_matrix = CondensedMatrix(matrix_contents)

        # We expect to have at least 2 processes running (if not, the process will freeze)
        if rank == 0:
            # Save statistics
            statistics_file_path = matrix_handler.save_statistics(
                workspace_handler["matrix"])
            generated_files.append({
                "description":
                "Matrix statistics",
                "path":
                os.path.abspath(statistics_file_path),
                "type":
                "text"
            })

            # Save matrix contents
            if "filename" in parameters["matrix"]:
                cls.save_matrix(matrix_handler, workspace_handler,
                                parameters["matrix"])
        if rank == 1:
            # Plot matrix
            if "image" in parameters["matrix"]:
                cls.plot_matrix(matrix_handler, workspace_handler,
                                parameters["matrix"], generated_files)

        return data_handler, matrix_handler
Пример #2
0
 def testSilhouetteSpecialCase(self):
     clustering = Clustering.from_dic(data.clustering_01)
     mh = MatrixHandler({
                             "method": "load",
                             "parameters":{
                                 "path": "data/example_clustering_1_matrix"
                             }
                         }
     )
     s = SilhouetteCoefficientCalculator()
     matrix =  mh.create_matrix(None)
     print s.evaluate(clustering, matrix)
Пример #3
0
    def run(cls, rank, mpi_comm, parameters, workspace_handler, timer, generated_files):
        DataDriver.timer = timer
        
        # Wait until the trajectories are loaded. 
        # Ideally this and a lot of other things may be done
        # in parallel too.
        # TODO: Atomic looks not to be pickable, which doesn't allow
        # us to directly share the data_handler
        data_handler = cls.load_data(parameters)
        mpi_comm.Barrier() 

        # Load the trajectories        
        if rank == 0:
            matrix_handler = cls.calc_matrix(data_handler, 
                                         parameters["matrix"])
            matrix_contents = matrix_handler.distance_matrix.get_data()
        else:
            matrix_contents = None
            matrix_handler = MatrixHandler(None, parameters["matrix"])
        mpi_comm.Barrier() 
        # Wait until matrix is calculated.
        
        # Then broadcast its contents
        matrix_contents = mpi_comm.bcast(matrix_contents, root=0)
        if rank != 0:
            matrix_handler.distance_matrix = CondensedMatrix(matrix_contents)
        
        # We expect to have at least 2 processes running (if not, the process will freeze)
        if rank == 0:
            # Save statistics
            statistics_file_path = matrix_handler.save_statistics(workspace_handler["matrix"])
            generated_files.append({
                                    "description":"Matrix statistics",
                                    "path":os.path.abspath(statistics_file_path),
                                    "type":"text"
            })
    
            # Save matrix contents
            if "filename" in parameters["matrix"]:
                cls.save_matrix(matrix_handler, 
                                workspace_handler,
                                parameters["matrix"])
        if rank == 1:
            # Plot matrix
            if "image" in parameters["matrix"]:
                cls.plot_matrix(matrix_handler, 
                                workspace_handler,
                                parameters["matrix"], 
                                generated_files)
        
        return data_handler, matrix_handler 
Пример #4
0
class MatrixCalculator(object):

    CALCULATION_METHOD = "None"  # Hack that allow us to work with matrix combination.

    # in skip_list, matrixCalculator has been removed.
    # TODO: understand why this is necessary

    def __init__(self):
        pass

    @classmethod
    def calculate(cls, data_handler, matrix_params):

        calculator_class = cls.get_calculator(matrix_params)

        try:
            distance_matrix = calculator_class.calculate(
                data_handler, matrix_params["parameters"])
        except Exception, e:
            print "[ERROR][Driver::postprocess] Impossible to perform matrix calculation for method: %s" % (
                calculator_class.CALCULATION_METHOD)
            print "Message: %s" % str(e)
            traceback.print_exc()
            exit()

        return MatrixHandler(distance_matrix, matrix_params)
Пример #5
0
from pyproct.driver.parameters import ProtocolParameters
from pyproct.driver.observer.observer import Observer
from pyproct.driver.driver import Driver
from pyproct.tools.commonTools import convert_to_utf8
from pyproct.clustering.clustering import Clustering
from pyproct.data.matrix.matrixHandler import MatrixHandler

if __name__ == '__main__':
    create_directory("./clustering_images")
    create_directory("./matrices")
    create_directory("./tmp")
    condensed_matrices, all_observations = vtools.create_matrices(data)

    # Saving matrices
    for dataset_name in data.all_datasets:
        handler = MatrixHandler(condensed_matrices[dataset_name],
                                {"method": "load"})
        handler.save_matrix("./matrices/%s" % dataset_name)

    # Run pyProCT for each of them
    base_script = "".join(open("base_script.json", "r").readlines())
    for dataset_name in ['concentric_circles'
                         ]:  #data.all_datasets: #["spaeth_06"]:#
        print dataset_name
        # Change placeholders
        script_str = base_script % (os.path.abspath(
            "./tmp/%s" % dataset_name), "./matrices/%s" % dataset_name)
        parameters = ProtocolParameters.get_params_from_json(script_str)
        # And change another hypothesis stuff
        parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[
            dataset_name]
        parameters["clustering"]["evaluation"][
Пример #6
0
from pyproct.driver.parameters import ProtocolParameters
from pyproct.driver.observer.observer import Observer
from pyproct.driver.driver import Driver
from pyproct.tools.commonTools import convert_to_utf8
from pyproct.clustering.clustering import Clustering
from pyproct.data.matrix.matrixHandler import MatrixHandler

if __name__ == '__main__':
    create_directory("./clustering_images")
    create_directory("./matrices")
    create_directory("./tmp")
    condensed_matrices, all_observations = vtools.create_matrices(data)
    
    # Saving matrices
    for dataset_name in data.all_datasets:
        handler = MatrixHandler(condensed_matrices[dataset_name], {"method":"load"})
        handler.save_matrix("./matrices/%s"%dataset_name)

    # Run pyProCT for each of them
    base_script = "".join(open("base_script.json","r").readlines())
    for dataset_name in ['concentric_circles']: #data.all_datasets: #["spaeth_06"]:#
        print dataset_name
        # Change placeholders
        script_str = base_script%(os.path.abspath("./tmp/%s"%dataset_name),"./matrices/%s"%dataset_name)
        parameters = ProtocolParameters.get_params_from_json(script_str)
        # And change another hypothesis stuff
        parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[dataset_name]
        parameters["clustering"]["evaluation"]["minimum_cluster_size"] = data.minsize[dataset_name]
        parameters["clustering"]["evaluation"]["minimum_clusters"] = data.num_cluster_ranges[dataset_name][0]
        parameters["clustering"]["evaluation"]["maximum_clusters"] = data.num_cluster_ranges[dataset_name][1]
        print parameters["clustering"]["evaluation"]["minimum_clusters"], parameters["clustering"]["evaluation"]["maximum_clusters"]