def run(data_filename): SF = 0.83 forget_threshold = 60 # To include forgetting, threshold should be < learning iterations. temporal_contexts = 1 # If stationary data - keep this at 1 learning_itr = 100 smoothing_irt = 50 plot_for_itr = 4 # Unused parameter - just for visualization. Keep this as it is. # File Config dataset = 'anomaly' # data_filename = "../../data/NSLKDD-3.csv".replace('\\', '/') experiment_id = 'Exp-new-gsom-' + datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d-%H-%M-%S') output_save_location = join('output/', experiment_id) # Init GSOM Parameters gsom_params = Params.GSOMParameters(SF, learning_itr, smoothing_irt, distance=Params.DistanceFunction.EUCLIDEAN, temporal_context_count=temporal_contexts, forget_itr_count=forget_threshold) generalise_params = Params.GeneraliseParameters(gsom_params) # Process the input files input_vector_database, labels, classes,X_test, y_test = Parser.InputParser.parse_input_zoo_data(data_filename) output_loc, output_loc_images = generate_output_config(dataset, SF, forget_threshold, temporal_contexts, output_save_location) # Setup the age threshold based on the input vector length generalise_params.setup_age_threshold(input_vector_database[0].shape[0]) # Process the clustering algorithm algorithm controller = Core.Controller(generalise_params) controller_start = time.time() result_dict, y_pred = controller.run(input_vector_database,X_test, plot_for_itr, classes, output_loc_images) return y_test, y_pred
def GSOM_model(SF,forget_threshold,temporal_contexts,learning_itr,smoothing_irt,plot_for_itr,data_filename,output_save_location,name): # Init GSOM Parameters gsom_params = Params.GSOMParameters(SF, learning_itr, smoothing_irt, distance=Params.DistanceFunction.EUCLIDEAN, temporal_context_count=temporal_contexts, forget_itr_count=forget_threshold) generalise_params = Params.GeneraliseParameters(gsom_params) # Process the input files input_vector_database, labels, classes = Parser.InputParser.parse_input_train_data(data_filename, None) # Setup the age threshold based on the input vector length generalise_params.setup_age_threshold(input_vector_database[0].shape[0]) # Process the clustering algorithm controller = Core.Controller(generalise_params) controller_start = time.time() result_dict = controller.run(input_vector_database, plot_for_itr, classes) print('Algorithms completed in', round(time.time() - controller_start, 2), '(s)') gsom_nodemap = result_dict[0]['gsom'] # Saving gsom node map saved_gsom_nodemap_for_0_7 = joblib.dump(gsom_nodemap, output_save_location+'gsom_nodemap_{}.joblib'.format(name)) # Display display = Display_Utils.Display(result_dict[0]['gsom'], None) display.setup_labels_for_gsom_nodemap(classes, 2, 'Latent Space of cnn_5100_input_file_to_gsom : SF=0.7',output_save_location+'latent_space_{}_hitvalues'.format(name)) print('Completed.')
def generate_latent_map(visualize=False): # File config dataset_location = config.Configuration.dataset_location experiment_id = 'Exp-' + datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d-%H-%M-%S') output_save_location = join('output/', experiment_id) ID_column_name = config.Configuration.ID_column_name is_normalized = config.Configuration.normalize_dataset # Grid search config sheet_names = config.Configuration.excel_sheet_names # GSOM Config SF_values = config.Configuration.SF_values learning_itr = config.Configuration.learning_itr smoothing_irt = config.Configuration.smoothing_irt forget_threshold_values = [ int(learning_itr * (1 - tp)) for tp in config.Configuration.transience_percentages ] temporal_contexts = 1 plot_for_itr = 4 for dataset in sheet_names: # Process the input files input_vector_database, labels, selected_vars = Parser.InputParser.parse_data( dataset_location, dataset, ID_column_name, is_normalized) print('Latent Spaces for {}, using: \n{}'.format( dataset, list(selected_vars))) for SF, forget_threshold in itertools.product(SF_values, forget_threshold_values): print('\nProcessing with SF={} with Forgetting={}'.format( SF, forget_threshold)) forget_threshold_label = Utils.Utilities.forget_thresh_label( learning_itr, forget_threshold) output_loc, output_loc_images = generate_output_config( dataset, SF, forget_threshold, temporal_contexts, output_save_location) # Init GSOM Parameters gsom_params = Params.GSOMParameters( SF, learning_itr, smoothing_irt, distance=Params.DistanceFunction.EUCLIDEAN, temporal_context_count=temporal_contexts, forget_itr_count=forget_threshold) generalise_params = Params.GeneraliseParameters(gsom_params) # Setup the age threshold based on the input vector length generalise_params.setup_age_threshold( input_vector_database[0].shape[0]) # Process the clustering algorithm algorithm controller = Core.Controller(generalise_params) result_dict = controller.run(input_vector_database, plot_for_itr, labels, output_loc_images) Utils.Utilities.save_object( result_dict, join( output_loc, 'gsom_nodemap_SF-{}_F-{}'.format(SF, forget_threshold_label))) gsom_nodemap = result_dict[0]['gsom'] print('Latent Space generated with {} neurons.'.format( len(gsom_nodemap))) # Visualizations display = Display_Utils.Display(gsom_nodemap, None) display.setup_labels_for_gsom_nodemap( gsom_nodemap, labels, 'Latent Space of {} : SF={} with Forget={}'.format( dataset, SF, forget_threshold_label), join( output_loc, '{}_latent_space_SF-{}_F-{}_hit_count'.format( dataset, SF, forget_threshold_label)), selected_vars=selected_vars) # Plot profile images profiler.Profiler.plot_profile_images(dataset_location, dataset, ID_column_name, output_loc, SF, forget_threshold_label, is_normalized) # Plot interactive profile visualization if visualize: int_display.InteractiveDisplay.plot_interactive( dataset, output_loc, SF, forget_threshold_label)
if __name__ == '__main__': # Init GSOM Parameters gsom_params = Params.GSOMParameters(SF, learning_itr, smoothing_irt, distance=Params.DistanceFunction.EUCLIDEAN, temporal_context_count=temporal_contexts, forget_itr_count=forget_threshold) generalise_params = Params.GeneraliseParameters(gsom_params) # Process the input files input_vector_database, labels, classes = Parser.InputParser.parse_input_zoo_data(data_filename, None) output_loc, output_loc_images = generate_output_config(dataset, SF, forget_threshold) # Setup the age threshold based on the input vector length generalise_params.setup_age_threshold(input_vector_database[0].shape[0]) # Process the clustering algorithm algorithm controller = Core.Controller(generalise_params) controller_start = time.time() result_dict = controller.run(input_vector_database, plot_for_itr, classes, output_loc_images) print('Algorithms completed in', round(time.time() - controller_start, 2), '(s)') saved_name = Utils.Utilities.save_object(result_dict, join(output_loc, 'gsom_nodemap_SF-{}'.format(SF))) gsom_nodemap = result_dict[0]['gsom'] # Display display = Display_Utils.Display(result_dict[0]['gsom'], None) display.setup_labels_for_gsom_nodemap(classes, 2, 'Latent Space of {} : SF={}'.format(dataset, SF), join(output_loc, 'latent_space_' + str(SF) + '_hitvalues')) display.setup_labels_for_gsom_nodemap(labels, 2, 'Latent Space of {} : SF={}'.format(dataset, SF), join(output_loc, 'latent_space_' + str(SF) + '_labels')) print('Completed.')