def run(cls, rank, mpi_comm, parameters, workspace_handler, timer, generated_files): DataDriver.timer = timer # Wait until the trajectories are loaded. # Ideally this and a lot of other things may be done # in parallel too. # TODO: Atomic looks not to be pickable, which doesn't allow # us to directly share the data_handler data_handler = cls.load_data(parameters) mpi_comm.Barrier() # Load the trajectories if rank == 0: matrix_handler = cls.calc_matrix(data_handler, parameters["matrix"]) matrix_contents = matrix_handler.distance_matrix.get_data() else: matrix_contents = None matrix_handler = MatrixHandler(None, parameters["matrix"]) mpi_comm.Barrier() # Wait until matrix is calculated. # Then broadcast its contents matrix_contents = mpi_comm.bcast(matrix_contents, root=0) if rank != 0: matrix_handler.distance_matrix = CondensedMatrix(matrix_contents) # We expect to have at least 2 processes running (if not, the process will freeze) if rank == 0: # Save statistics statistics_file_path = matrix_handler.save_statistics( workspace_handler["matrix"]) generated_files.append({ "description": "Matrix statistics", "path": os.path.abspath(statistics_file_path), "type": "text" }) # Save matrix contents if "filename" in parameters["matrix"]: cls.save_matrix(matrix_handler, workspace_handler, parameters["matrix"]) if rank == 1: # Plot matrix if "image" in parameters["matrix"]: cls.plot_matrix(matrix_handler, workspace_handler, parameters["matrix"], generated_files) return data_handler, matrix_handler
def testSilhouetteSpecialCase(self): clustering = Clustering.from_dic(data.clustering_01) mh = MatrixHandler({ "method": "load", "parameters":{ "path": "data/example_clustering_1_matrix" } } ) s = SilhouetteCoefficientCalculator() matrix = mh.create_matrix(None) print s.evaluate(clustering, matrix)
def run(cls, rank, mpi_comm, parameters, workspace_handler, timer, generated_files): DataDriver.timer = timer # Wait until the trajectories are loaded. # Ideally this and a lot of other things may be done # in parallel too. # TODO: Atomic looks not to be pickable, which doesn't allow # us to directly share the data_handler data_handler = cls.load_data(parameters) mpi_comm.Barrier() # Load the trajectories if rank == 0: matrix_handler = cls.calc_matrix(data_handler, parameters["matrix"]) matrix_contents = matrix_handler.distance_matrix.get_data() else: matrix_contents = None matrix_handler = MatrixHandler(None, parameters["matrix"]) mpi_comm.Barrier() # Wait until matrix is calculated. # Then broadcast its contents matrix_contents = mpi_comm.bcast(matrix_contents, root=0) if rank != 0: matrix_handler.distance_matrix = CondensedMatrix(matrix_contents) # We expect to have at least 2 processes running (if not, the process will freeze) if rank == 0: # Save statistics statistics_file_path = matrix_handler.save_statistics(workspace_handler["matrix"]) generated_files.append({ "description":"Matrix statistics", "path":os.path.abspath(statistics_file_path), "type":"text" }) # Save matrix contents if "filename" in parameters["matrix"]: cls.save_matrix(matrix_handler, workspace_handler, parameters["matrix"]) if rank == 1: # Plot matrix if "image" in parameters["matrix"]: cls.plot_matrix(matrix_handler, workspace_handler, parameters["matrix"], generated_files) return data_handler, matrix_handler
class MatrixCalculator(object): CALCULATION_METHOD = "None" # Hack that allow us to work with matrix combination. # in skip_list, matrixCalculator has been removed. # TODO: understand why this is necessary def __init__(self): pass @classmethod def calculate(cls, data_handler, matrix_params): calculator_class = cls.get_calculator(matrix_params) try: distance_matrix = calculator_class.calculate( data_handler, matrix_params["parameters"]) except Exception, e: print "[ERROR][Driver::postprocess] Impossible to perform matrix calculation for method: %s" % ( calculator_class.CALCULATION_METHOD) print "Message: %s" % str(e) traceback.print_exc() exit() return MatrixHandler(distance_matrix, matrix_params)
from pyproct.driver.parameters import ProtocolParameters from pyproct.driver.observer.observer import Observer from pyproct.driver.driver import Driver from pyproct.tools.commonTools import convert_to_utf8 from pyproct.clustering.clustering import Clustering from pyproct.data.matrix.matrixHandler import MatrixHandler if __name__ == '__main__': create_directory("./clustering_images") create_directory("./matrices") create_directory("./tmp") condensed_matrices, all_observations = vtools.create_matrices(data) # Saving matrices for dataset_name in data.all_datasets: handler = MatrixHandler(condensed_matrices[dataset_name], {"method": "load"}) handler.save_matrix("./matrices/%s" % dataset_name) # Run pyProCT for each of them base_script = "".join(open("base_script.json", "r").readlines()) for dataset_name in ['concentric_circles' ]: #data.all_datasets: #["spaeth_06"]:# print dataset_name # Change placeholders script_str = base_script % (os.path.abspath( "./tmp/%s" % dataset_name), "./matrices/%s" % dataset_name) parameters = ProtocolParameters.get_params_from_json(script_str) # And change another hypothesis stuff parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[ dataset_name] parameters["clustering"]["evaluation"][
from pyproct.driver.parameters import ProtocolParameters from pyproct.driver.observer.observer import Observer from pyproct.driver.driver import Driver from pyproct.tools.commonTools import convert_to_utf8 from pyproct.clustering.clustering import Clustering from pyproct.data.matrix.matrixHandler import MatrixHandler if __name__ == '__main__': create_directory("./clustering_images") create_directory("./matrices") create_directory("./tmp") condensed_matrices, all_observations = vtools.create_matrices(data) # Saving matrices for dataset_name in data.all_datasets: handler = MatrixHandler(condensed_matrices[dataset_name], {"method":"load"}) handler.save_matrix("./matrices/%s"%dataset_name) # Run pyProCT for each of them base_script = "".join(open("base_script.json","r").readlines()) for dataset_name in ['concentric_circles']: #data.all_datasets: #["spaeth_06"]:# print dataset_name # Change placeholders script_str = base_script%(os.path.abspath("./tmp/%s"%dataset_name),"./matrices/%s"%dataset_name) parameters = ProtocolParameters.get_params_from_json(script_str) # And change another hypothesis stuff parameters["clustering"]["evaluation"]["maximum_noise"] = data.noise[dataset_name] parameters["clustering"]["evaluation"]["minimum_cluster_size"] = data.minsize[dataset_name] parameters["clustering"]["evaluation"]["minimum_clusters"] = data.num_cluster_ranges[dataset_name][0] parameters["clustering"]["evaluation"]["maximum_clusters"] = data.num_cluster_ranges[dataset_name][1] print parameters["clustering"]["evaluation"]["minimum_clusters"], parameters["clustering"]["evaluation"]["maximum_clusters"]