def train(config_file="pipeline_config.yaml"): logging.info( headline(" Step 5: Building track candidates from the scored graph ")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] gnn_configs = all_configs["gnn_configs"] track_building_configs = all_configs["track_building_configs"] logging.info(headline("a) Loading scored graphs")) all_graphs = [] for subdir in ["train", "val", "test"]: subdir_graphs = os.listdir( os.path.join(gnn_configs["output_dir"], subdir)) all_graphs += [ torch.load(os.path.join(gnn_configs["output_dir"], subdir, graph), map_location="cpu") for graph in subdir_graphs ] logging.info(headline("b) Labelling graph nodes")) score_cut = track_building_configs["score_cut"] save_dir = track_building_configs["output_dir"] if common_configs["clear_directories"]: delete_directory(track_building_configs["output_dir"]) # RUN IN SERIAL FOR NOW --> for graph in tqdm(all_graphs): label_graph(graph, score_cut=score_cut, save_dir=save_dir)
def train(config_file="pipeline_config.yaml"): logging.info(headline("Step 1: Running metric learning training")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] metric_learning_configs = all_configs["metric_learning_configs"] logging.info(headline("a) Initialising model")) model = LayerlessEmbedding(metric_learning_configs) logging.info(headline("b) Running training")) save_directory = os.path.join(common_configs["artifact_directory"], "metric_learning") logger = CSVLogger(save_directory, name=common_configs["experiment_name"]) trainer = Trainer(accelerator='gpu' if torch.cuda.is_available() else None, gpus=common_configs["gpus"], max_epochs=metric_learning_configs["max_epochs"], logger=logger) trainer.fit(model) logging.info(headline("c) Saving model")) os.makedirs(save_directory, exist_ok=True) trainer.save_checkpoint( os.path.join(save_directory, common_configs["experiment_name"] + ".ckpt")) return trainer, model
def train(config_file="pipeline_config.yaml"): logging.info(headline(" Step 3: Running GNN training ")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] gnn_configs = all_configs["gnn_configs"] logging.info(headline("a) Initialising model")) model = InteractionGNN(gnn_configs) logging.info(headline("b) Running training")) save_directory = os.path.join(common_configs["artifact_directory"], "gnn") logger = CSVLogger(save_directory, name=common_configs["experiment_name"]) trainer = Trainer(gpus=common_configs["gpus"], max_epochs=gnn_configs["max_epochs"], logger=logger) trainer.fit(model) logging.info(headline("c) Saving model")) os.makedirs(save_directory, exist_ok=True) trainer.save_checkpoint( os.path.join(save_directory, common_configs["experiment_name"] + ".ckpt")) return trainer, model
def train(config_file="pipeline_config.yaml"): logging.info(headline("Step 2: Constructing graphs from metric learning model")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] metric_learning_configs = all_configs["metric_learning_configs"] logging.info(headline("a) Loading trained model")) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = LayerlessEmbedding.load_from_checkpoint(os.path.join(common_configs["artifact_directory"], "metric_learning", common_configs["experiment_name"]+".ckpt")).to(device) logging.info(headline("b) Running inferencing")) if common_configs["clear_directories"]: delete_directory(metric_learning_configs["output_dir"]) graph_builder = EmbeddingInferenceBuilder(model, metric_learning_configs["train_split"], overwrite=True, knn_max=1000, radius=metric_learning_configs["r_test"]) graph_builder.build() return graph_builder
def train(config_file="pipeline_config.yaml"): logging.info(headline("Step 4: Scoring graph edges using GNN ")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] gnn_configs = all_configs["gnn_configs"] logging.info(headline("a) Loading trained model")) if common_configs["clear_directories"]: delete_directory(gnn_configs["output_dir"]) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = InteractionGNN.load_from_checkpoint( os.path.join(common_configs["artifact_directory"], "gnn", common_configs["experiment_name"] + ".ckpt")).to(device) model.setup_data() logging.info(headline("b) Running inferencing")) graph_scorer = GNNInferenceBuilder(model) graph_scorer.infer()
def evaluate(config_file="pipeline_config.yaml"): logging.info( headline("Step 6: Evaluating the track reconstruction performance")) with open(config_file) as file: all_configs = yaml.load(file, Loader=yaml.FullLoader) common_configs = all_configs["common_configs"] track_building_configs = all_configs["track_building_configs"] evaluation_configs = all_configs["evaluation_configs"] logging.info(headline("a) Loading labelled graphs")) input_dir = track_building_configs["output_dir"] output_dir = evaluation_configs["output_dir"] os.makedirs(output_dir, exist_ok=True) all_graph_files = os.listdir(input_dir) all_graph_files = [ os.path.join(input_dir, graph) for graph in all_graph_files ] evaluated_events = [] for graph_file in tqdm(all_graph_files): evaluated_events.append( evaluate_labelled_graph( graph_file, matching_fraction=evaluation_configs["matching_fraction"], matching_style=evaluation_configs["matching_style"], min_track_length=evaluation_configs["min_track_length"], min_particle_length=evaluation_configs["min_particle_length"])) evaluated_events = pd.concat(evaluated_events) particles = evaluated_events[evaluated_events["is_reconstructable"]] reconstructed_particles = particles[particles["is_reconstructed"] & particles["is_matchable"]] tracks = evaluated_events[evaluated_events["is_matchable"]] matched_tracks = tracks[tracks["is_matched"]] n_particles = len( particles.drop_duplicates(subset=['event_id', 'particle_id'])) n_reconstructed_particles = len( reconstructed_particles.drop_duplicates( subset=['event_id', 'particle_id'])) n_tracks = len(tracks.drop_duplicates(subset=['event_id', 'track_id'])) n_matched_tracks = len( matched_tracks.drop_duplicates(subset=['event_id', 'track_id'])) n_dup_reconstructed_particles = len( reconstructed_particles) - n_reconstructed_particles logging.info(headline("b) Calculating the performance metrics")) logging.info( f"Number of reconstructed particles: {n_reconstructed_particles}") logging.info(f"Number of particles: {n_particles}") logging.info(f"Number of matched tracks: {n_matched_tracks}") logging.info(f"Number of tracks: {n_tracks}") logging.info( f"Number of duplicate reconstructed particles: {n_dup_reconstructed_particles}" ) # Plot the results across pT and eta eff = n_reconstructed_particles / n_particles fake_rate = 1 - (n_matched_tracks / n_tracks) dup_rate = n_dup_reconstructed_particles / n_reconstructed_particles logging.info(f"Efficiency: {eff:.3f}") logging.info(f"Fake rate: {fake_rate:.3f}") logging.info(f"Duplication rate: {dup_rate:.3f}") logging.info(headline("c) Plotting results")) # First get the list of particles without duplicates grouped_reco_particles = particles.groupby( 'particle_id')["is_reconstructed"].any() particles["is_reconstructed"] = particles["particle_id"].isin( grouped_reco_particles[grouped_reco_particles].index.values) particles = particles.drop_duplicates(subset=['particle_id']) # Plot the results across pT and eta plot_pt_eff(particles) # TODO: Plot the results return evaluated_events, reconstructed_particles, particles, matched_tracks, tracks