def main(): params = get_argument() # model input train_file = params['train'] test_file = params['test'] # feature params features_list = params['features_list'] kernel = params['kernel'] stride = params['stride'] # feature extraction resample_rate = params.get('resample_rate', 6400) custom_resample = params.get('custom_resample', False) # model params model_type = params['model_type'] params_file = params['model_params'] # Read train file print("Read Train File: ", os.path.basename(train_file)) ds_train = pd.read_csv(train_file) # Select features if features_list: ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor(ds_train, resample_rate) else: if resample_rate > 1: ds_train = resample(ds_train, resample_rate) print('Train Original File Length: ', train_len) print('New File Length {} {:.02f}'.format(len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Train shape ', x_train.shape) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) print("Read Test File: ", os.path.basename(test_file)) ds_test = pd.read_csv(test_file) # Select features if features_list: ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor(ds_test, resample_rate) else: if resample_rate > 1: ds_test = resample(ds_test, resample_rate) print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format(len(ds_test), 100 * len(ds_test) / test_len)) print('Testing...') y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results print("Results:") results = pd.DataFrame(results) print(tabulate(results, headers='keys', tablefmt='psql'))
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] resample_rate = 6400 stride = 1 epochs = 500 transform_type = 'minmax' save_result = True output_dir = './results' cluster_models = { 'agglomerative': AgglomerativeClustering, 'kmeans': KMeans, 'spectral': SpectralClustering } for train_id in [1, 2]: skip_list = [] train_list = [train_id] ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Read all datasets') for state_id, folder in enumerate(all_state_folder): print('\nRead state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') selected_train_id = [ x for x in range(len(files)) if x in train_list ] if not len(selected_train_id): selected_train_id = [1] for i, filename in enumerate(files): if i in skip_list: print('Skip: {}'.format(filename)) continue # ds = None ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in selected_train_id: print('Train state {} file: {}'.format(state_id, filename)) ds_train_list.append(ds) y_train_list.append(state_id) else: print('Test state {} file: {}'.format(state_id, filename)) ds_test_list.append(ds) y_test_list.append(state_id) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data(ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list for kernel in [40, 80, 120, 200, 240, 360]: # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] record = {} for cluster_name, cluster_model in cluster_models.items(): print('\n', cluster_name) # ToDo: remove n_clusters params cls = cluster_model(n_clusters=4) enc_pred = x_test.reshape(len(x_test), -1) print(enc_pred.shape) y_pred = cls.fit_predict(enc_pred) ami = adjusted_mutual_info_score(y_test, y_pred) r_score = adjusted_rand_score(y_test, y_pred) hom_score = homogeneity_score(y_test, y_pred) record[cluster_name] = { 'adjusted_mutual_info_score': ami, 'adjusted_rand_score': r_score, 'homogenity_score': hom_score } print(record[cluster_name]) ds_res = pd.DataFrame(record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_cluster_{}_{}.csv'.format( train_id, 'raw', kernel)) ds_res.to_csv(filename, index=True) for model_type in ['cnn', 'deep', 'lstm', 'bilstm']: # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type) # Training print("Training...") model.fit(x=x_new, epochs=epochs, verbose=2) enc_pred = model.encoder.predict(x_test) enc_pred = enc_pred.reshape((len(x_test), -1)) record = {} for cluster_name, cluster_model in cluster_models.items(): print('\n', cluster_name) print(enc_pred.shape) # ToDo: remove n_clusters params cls = cluster_model(n_clusters=4) y_pred = cls.fit_predict(enc_pred) ami = adjusted_mutual_info_score(y_test, y_pred) r_score = adjusted_rand_score(y_test, y_pred) hom_score = homogeneity_score(y_test, y_pred) record[cluster_name] = { 'adjusted_mutual_info_score': ami, 'adjusted_rand_score': r_score, 'homogenity_score': hom_score } print(record[cluster_name]) ds_res = pd.DataFrame(record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_cluster_{}_{}.csv'.format( train_id, model_type, kernel)) ds_res.to_csv(filename, index=True)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] kernel = params['kernel'] stride = params['stride'] model_type = params['model_type'] resample_rate = params.get('resample_rate', 6400) with_decision_score = params.get('with_decision_score', False) custom_resample = params.get('custom_resample', False) # resample_rate = 12800 # 12800 sample are 1 second # num_sample = 1000000 with_skip = False params_file = './params/params_{}.json'.format(model_type) save_result = True overwrite = True output_dir = './results' result_array = [] # Get list of list of files, where for each state we have a list of file curr_files = [] # Get list of test files test_files = [] for folder in all_state_folder: files = get_files(folder, ext='lvm') curr_files.append(files) test_files += files max_size = min([len(files) for files in curr_files[:3]]) # Get train files where each element is a list of files for a single train train_files = [] for i in range(max_size): train_pack = [files[i] for files in curr_files[:3]] for j in range(1, len(train_pack)): train_files.append(train_pack[:j + 1]) for train_pack in train_files: if len(train_pack) < 3: continue print('\n' + '\\\\//' * 20) selected_files = [] train_states = [] x_states = [] print('\n Train Pack') for train_file in train_pack: train_state = os.path.split(os.path.dirname(train_file))[-1] print("State: ", train_state) print("Read File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') continue # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor( ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Original File Length: ', train_len) print('New File Length {} {:.02f}'.format( len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Shape ', x_train.shape) selected_files.append(train_file) train_states.append(train_state) x_states.append(x_train) x_states = np.vstack(x_states) print('\n Train Size: ', x_states.shape) print('Train state: ', train_states) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_states) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if test_file in selected_files: continue # if test_state in train_states: # continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor( ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_states, 'TRAIN': [ os.path.basename(train_file) for train_file in selected_files ], 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': test_state not in train_states } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'results_multi_' + model_type + '.csv') result_ds = pd.DataFrame(result_array) if os.path.isfile(filename) and not overwrite: prev_result_ds = pd.read_csv(filename) result_ds = pd.concat([prev_result_ds, result_ds], axis=0, ignore_index=True) result_ds.to_csv(filename, index=False)
def main(): train_state = os.path.split(os.path.dirname(train_file))[-1] print("\n State Train: ", train_state) print("Read Train File: ", os.path.basename(train_file)) ds_train = read_ds_lvm(train_file, get_header=False) # Check train if ds_train is None or ds_train.empty: print('Impossible read train file') return # Select features ds_train = ds_train[features_list] # Resample train_len = len(ds_train) if custom_resample: ds_train = resample_with_feature_extractor(ds_train, resample_rate) else: ds_train = resample(ds_train, resample_rate) # ds_train = ds_train[:num_sample] print('Train Original File Length: ', train_len) print('New File Length {} {:.02f}'.format(len(ds_train), 100 * len(ds_train) / train_len)) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) print('Train shape ', x_train.shape) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) test_state = os.path.split(os.path.dirname(test_file))[-1] print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') return # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) if custom_resample: ds_test = resample_with_feature_extractor(ds_test, resample_rate) else: ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format(len(ds_test), 100 * len(ds_test) / test_len)) # Testing # y_pred = predict_anomaly(ds_test, model, kernel, with_skip=with_skip) if with_skip: test_stride = kernel else: test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') if with_decision_score: y_pred = model.decision_score(x_test) else: y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} {:.05f} total {})".format(num_error, mean_error, mean_only_error, len(x_test))) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results results = pd.DataFrame(results) if results.empty: print("Results: NO Anomaly founded") else: # print(tabulate(results, headers='keys', tablefmt='psql')) test_stride = kernel if with_skip else 1 # Number of test samples of kernel length test_sample = int((len(ds_test) - kernel) / test_stride) + 1 # Number of single anomaly point tot = results['support'].sum() pct_tot = 100 * tot / (test_sample * test_stride) print("Results: {} (record {:.02f})".format(tot, pct_tot)) if with_skip: # Number of anomaly sample tot_sample = int(tot / test_stride) print("Anomaly Sample: {} (test sample {:.02f})".format( int(tot_sample), test_sample))
def main(): params = get_argument() all_state_folder = params['all_state_folder'] features_list = params['features_list'] resample_rate = 6400 stride = 1 epochs = 300 transform_type = 'minmax' # 'minmax' save_result = True output_dir = './results' for train_id in [1, 2, 0]: skip_list = [] train_list = [train_id] ds_train_list = [] y_train_list = [] ds_test_list = [] y_test_list = [] # Read train and test files print('Read all datasets') for state_id, folder in enumerate(all_state_folder): print('\nRead state: ', os.path.basename(folder)) files = get_files(folder, ext='lvm') selected_train_id = [ x for x in range(len(files)) if x in train_list ] if not len(selected_train_id): selected_train_id = [1] for i, filename in enumerate(files): if i in skip_list: print('Skip: {}'.format(filename)) continue # ds = None ds = read_ds_lvm(filename, get_header=False) ds = ds[features_list] ds = resample(ds, resample_rate) if i in selected_train_id: print('Train state {} file: {}'.format(state_id, filename)) ds_train_list.append(ds) y_train_list.append(state_id) else: print('Test state {} file: {}'.format(state_id, filename)) ds_test_list.append(ds) y_test_list.append(state_id) # Apply transform transformer = None if transform_type: print('Apply transform: ', transform_type) x_train_list, transformer = transform_data(ds_train_list, transform_type) x_test_list = [ apply_transform(ds, transformer) for ds in ds_test_list ] else: print('No transform selected') x_train_list = ds_train_list x_test_list = ds_test_list for kernel in [40, 80, 120, 200, 240, 360]: # Create train and test matrix set x_train, y_train = prepare_data(x_train_list, labels=y_train_list, kernel=kernel, stride=stride) x_test, y_test = prepare_data(x_test_list, labels=y_test_list, kernel=kernel, stride=stride) print('Train size: ', x_train.shape) print('Train label size: ', y_train.shape) print('Test size: ', x_test.shape) print('Test label size: ', y_test.shape) order = np.random.permutation(len(x_train)) x_new = x_train[order] y_new = y_train[order] for model_type in [ 'classifier', 'linear', 'cnn', 'deep', 'lstm', 'bilstm' ]: # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type) # Training print("Training...") model.fit(x=x_new, y=y_new, epochs=epochs, batch_size=32, verbose=2) y_pred = model.predict(x_test, classifier=True) print(classification_report(y_test, y_pred)) ds_res = pd.DataFrame( classification_report(y_test, y_pred, output_dict=True)) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_{}_accuracy_{}_{}.csv'.format( train_id, model_type, kernel)) ds_res.to_csv(filename, index=True)
def main(): output_dir = './results' selected_files = [ "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 2/testaccelerometri.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 3/testaccelerometri_1.lvm", "/export/static/pub/softlab/dataset_sbdio/Anomaly Detection/TEST 4/testaccelerometri.lvm", ] features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list] resample_rate = 6400 # Initialize result array to memorize performance result result_array = [] # Model cycle for model_type, kernel in zip(model_list, kernel_list): print('\n\n') print('\nModel: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) # Train cycle for i in range(len(selected_files)): x_train = [] # Get train for pos, train_file in enumerate(selected_files[:i + 1]): if i > 0 and pos == 0: continue ds_train = read_ds_lvm(train_file, get_header=False) if ds_train is None or ds_train.empty: raise ValueError('Impossible read train file') ds_train = ds_train[features_list] ds_train = resample(ds_train, resample_rate) x = get_sliding_window_matrix(ds_train.values, kernel, stride) if pos == 0: x = x[:len(x) // 2] x_train.append(x) # Train set x_train = np.vstack(x_train) print('\nTrain size: {}\n'.format(len(x_train))) # Model init model = get_model(model_type, params_file=params_file) # Model training train_start = datetime.now() model.fit(x_train) train_end = datetime.now() # Test cycle for j in range(len(selected_files)): x_test = [] # Get test for pos, test_file in enumerate(selected_files[:j + 1]): if j > 0 and pos == 0: continue ds_test = read_ds_lvm(test_file, get_header=False) if ds_test is None or ds_test.empty: raise ValueError('Impossible read test file') ds_test = ds_test[features_list] ds_test = resample(ds_test, resample_rate) x = get_sliding_window_matrix(ds_test.values, kernel, stride) if pos == 0: x = x[:1] x_test.append(x) # Test set x_test = np.vstack(x_test) print('\nTest size: {}\n'.format(len(x_test))) # Model predict test_start = datetime.now() model.predict(x_test) test_end = datetime.now() result_record = { 'model': model_type, 'train_size': len(x_train), 'train_time': train_end - train_start, 'test_size': len(x_test), 'test_time': test_end - test_start, } result_array.append(result_record) # Save results if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join(output_dir, 'performance.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)
def main(): print('Read input data') # Get train dataset print('train: {}'.format(train_file)) ds_train = get_time_series_dataset(filename=train_file, sep=sep, col=datetime_col) # Check train if ds_train is None: raise ValueError('Impossible read train file') # Get test dataset print('test: {}'.format(test_file)) ds_test = get_time_series_dataset(filename=test_file, sep=sep, col=datetime_col) # Check test if ds_test is None: raise ValueError('Impossible read test file') print('from {} to {}'.format(ds_test.index.min(), ds_test.index.max())) # Get features print('Select features') features = features_list if not features: features = ds_train.columns.to_list() elif set(features).difference(set(ds_train.columns)): raise ValueError('Select the wrong features') assert np.all(ds_train.columns == ds_test.columns), 'Train and Test file have different features' # Select features ds_train = ds_train[features] ds_test = ds_test[features] # if visualize: # fig, ax = plt.subplots(2, 1, figsize=(20, 10)) # ds_train.plot(ax=ax[0]) # ax[0].set_title('Training Data') # # ds_test.plot(ax=ax[1]) # ax[1].set_title('Test Data') # plt.show() # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Create training set print("Create training set") x_train = get_sliding_window_matrix(ds_train.values, kernel, stride) # Training print("Training...") model.fit(x_train) # Option 1: Save trained models if save_model: # Create output directory filename = os.path.join(output_dir, 'model_{}.pkl'.format(model_type)) if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) # Save trained models joblib.dump(model, filename) # Load trained models model = joblib.load(filename) # Testing print('Testing...') y_pred = predict_anomaly(ds_test, model, kernel, with_skip=False) # Encoding results into triplet formats results = create_triplet_time_series(y_pred, with_support=True) # Show results print("Results:") results = pd.DataFrame(results) print(tabulate(results, headers='keys', tablefmt='psql')) # Save results if save: filename = os.path.basename(test_file) filename = os.path.join(output_dir, 'results_' + model_type + '_' + filename) if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) results.to_csv(filename, sep=sep, index=False)
def main(): params = get_argument() all_state_folder = params['all_state_folder'] size = 3 features_list = [ "Acceleration_X1", "Acceleration_Y1", "Acceleration_Z1", "Acceleration_X2", "Acceleration_Y2", "Acceleration_Z2", "Acceleration_X3", "Acceleration_Y3", "Acceleration_Z3" ] stride = 1 model_list = [ 'cnn', 'lstm', 'deep', 'isolation_forest', 'setup_clustering', 'pca', 'lof', 'svm', ] kernel_list = [ 180 if model_type in ['cnn', 'lstm', 'deep'] else 10 for model_type in model_list ] resample_rate = 6400 save_result = True output_dir = './results' # Initialize result array to memorize result # for each train and test step result_array = [] # Get files from selected folder to use for training and testing curr_files = [] for folder in all_state_folder: curr_files += get_files(folder, ext='lvm')[:] test_files = curr_files for model_type, kernel in zip(model_list, kernel_list): print('\n' + '\\\\//' * 20) print('\n Model: {}\n'.format(model_type)) params_file = './params/params_{}.json'.format(model_type) for pos, train_file in enumerate(curr_files): skip_step = False train_state = os.path.split(os.path.dirname(train_file))[-1] x_train = [] print("\n State Train: ", train_state) for i in range(size): if pos + i >= len(curr_files): print('Not enough files') skip_step = True break tmp_file = curr_files[pos + i] tmp_state = os.path.split(os.path.dirname(tmp_file))[-1] if tmp_state != train_state: print('Different state and skip current train') skip_step = True break print("Read {} Train File: {}".format( i, os.path.basename(tmp_file))) ds_tmp = read_ds_lvm(tmp_file, get_header=False) # Check train if ds_tmp is None or ds_tmp.empty: print('Impossible read train file') skip_step = True break # Select features ds_tmp = ds_tmp[features_list] # Resample ds_tmp = resample(ds_tmp, resample_rate) # Create training set x_tmp = get_sliding_window_matrix(ds_tmp.values, kernel, stride) x_train.append(x_tmp) if skip_step: print('Skip current train') continue # Train set x_train = np.vstack(x_train) train_len = len(x_train) print('\nTrain size: {}\n'.format(x_train.shape)) # Model initialization print("Model initialization: {}".format(model_type)) model = get_model(model_type, params_file=params_file) # Training print("Training...") model.fit(x_train) for test_file in test_files: test_state = os.path.split(os.path.dirname(test_file))[-1] if train_state == test_state \ and test_file == train_file: continue print("\n State Test: ", test_state) print("Read Test File: ", os.path.basename(test_file)) ds_test = read_ds_lvm(test_file, get_header=False) # t1 = datetime.now() # Check test if ds_test is None or ds_test.empty: print('Impossible read test file') continue # Select features ds_test = ds_test[features_list] # Resample test_len = len(ds_test) ds_test = resample(ds_test, resample_rate) # ds_test = ds_test[:num_sample] print('Test Original File Length: ', test_len) print('New File Length {} {:.02f}'.format( len(ds_test), 100 * len(ds_test) / test_len)) test_stride = 1 # Create set print("Create testing set") x_test = get_sliding_window_matrix(ds_test.values, kernel, test_stride) print('Test shape ', x_test.shape) # Testing print('Testing...') y_pred = model.predict(x_test) num_error = np.sum(y_pred > 0) mean_error = np.mean(y_pred) if num_error > 0: mean_only_error = np.mean(y_pred[y_pred > 0]) else: mean_only_error = 0 if not np.sum(y_pred > 0): print("Results: NO Anomaly founded") else: print("Results: {} anomalies " "({:.05f} total {})".format(num_error, mean_error, len(x_test))) result_record = { 'MODEL': model_type, 'KERNEL': kernel, 'STRIDE': stride, 'TRAIN_STATE': train_state, 'TRAIN': os.path.basename(train_file), 'TRAIN_SIZE': train_len, 'TEST_STATE': test_state, 'TEST': os.path.basename(test_file), 'TEST_LEN': test_len, 'NUM_SINGLE_ANOMALY': num_error, 'PCT_ANOMALY': mean_error, 'NUM_SAMPLE_ANOMALY': mean_only_error, 'NUM_SAMPLE': len(x_test), 'LABEL': train_state != test_state } result_array.append(result_record) if save_result: if not os.path.isdir(output_dir): os.makedirs(output_dir, exist_ok=True) filename = os.path.join( output_dir, 'results_single_{}'.format(size) + model_type + '.csv') result_ds = pd.DataFrame(result_array) result_ds.to_csv(filename, index=False)