def lcp_recognition_binary_model_3(): ''' Dual layer ''' visible_in = Input(shape=(6000, 1)) # Part a conv_a_1 = Conv1D(32, kernel_size=5, activation='relu', name='conv_a_1')(visible_in) conv_a_2 = Conv1D(32, kernel_size=5, activation='relu', name='conv_a_2')(conv_a_1) maxpool_a_1 = MaxPooling1D(pool_size=3, strides=2, name='maxp_a_1')(conv_a_2) drop_a_1 = Dropout(0.3, name='drop_a_1')(maxpool_a_1) conv_a_3 = Conv1D(64, kernel_size=5, activation='relu', name='conv_a_3')(drop_a_1) conv_a_4 = Conv1D(128, kernel_size=5, activation='relu', name='conv_a_4', use_bias=False)(conv_a_3) maxpool_a_2 = MaxPooling1D(pool_size=3, strides=2, name='maxp_a_2')(conv_a_4) gap_a_1 = GlobalAveragePooling1D(name='gap_a_1')(maxpool_a_2) # Part b conv_b_1 = Conv1D(32, kernel_size=5, activation='relu', name='conv_b_1')(visible_in) conv_b_2 = Conv1D(32, kernel_size=5, activation='relu', name='conv_b_2')(conv_b_1) maxpool_b_1 = MaxPooling1D(pool_size=3, strides=2, name='maxp_b_1')(conv_b_2) drop_b_1 = Dropout(0.3, name='drop_b_1')(maxpool_b_1) conv_b_3 = Conv1D(128, kernel_size=5, activation='relu', name='conv_b_3')(drop_b_1) # drop_b_2 = Dropout(0.3, name='drop_b_2')(conv_b_3) # conv_b_4 = Conv1D(128, kernel_size=5, activation='relu', name='conv_b_4')(drop_b_2) # maxpool_b_2 = MaxPooling1D(pool_size=3, strides=2, name='maxp_b_2')(conv_b_4) gap_b_1 = GlobalAveragePooling1D(name='gap_b_1')(conv_b_3) # Layer 2 merge_1 = concatenate([gap_a_1, gap_b_1]) dense_1 = Dense(50, activation='relu', name='dense_1')(merge_1) drop_1 = Dropout(0.2, name='drop_1')(dense_1) visible_out = Dense(1, activation='sigmoid', name='dense_2')(drop_1) model = Model(inputs=visible_in, outputs=visible_out) print(model.summary()) save_model_plot = direct_to_dir( where='result') + 'lcp_recognition_binary_model_3.png' plot_model(model, to_file=save_model_plot) return model
''' By Using Parallel Run, the total time taken to process all 59 tdms files reduces from 27hrs to 12 hrs This script is to combine all 3 csv of max vector generated in to one. All 3 csv must contains all same classes. This script will gather samples of same class and append into a continuous rows of vector. ''' import numpy as np import pandas as pd from src.utils.helpers import direct_to_dir # change the filename here only folder_dir = direct_to_dir( where='result') + 'cwt_xcor_maxpoints_vector_dataset_bounded_xcor_4' filename_1 = folder_dir + '_p1.csv' filename_2 = folder_dir + '_p2.csv' filename_3 = folder_dir + '_p3.csv' filename_combined = folder_dir + '.csv' file_1_df = pd.read_csv(filename_1) file_2_df = pd.read_csv(filename_2) file_3_df = pd.read_csv(filename_3) # store column name for re-saving column_name = file_1_df.columns.get_values().tolist()[ 1:] # discard first col name of index # find unique labels in data 'label' column all_label_1 = np.unique(file_1_df.values[:, -1]) all_label_2 = np.unique(file_2_df.values[:, -1]) all_label_3 = np.unique(file_3_df.values[:, -1]) # make sure all csv contains same labels
cwt_wavelet = 'gaus1' scale = np.linspace(2, 30, 100) fs = 1e6 # segmentation no_of_segment = 2 # 10 is showing a consistent pattern # DATA POINT ---------------------------------------------------------------------------------------------------------- # read leak data on_pc = True if on_pc: data = AcousticEmissionDataSet_13_7_2018(drive='F') n_channel_leak = data.test_data(sensor_dist='near', pressure=1, leak=True) else: data_dir = direct_to_dir(where='yh_laptop_test_data') + '1bar_leak/' n_channel_leak = read_all_tdms_from_folder(data_dir) n_channel_leak = np.swapaxes(n_channel_leak, 1, 2) n_channel_leak = n_channel_leak[0] # processing print(n_channel_leak.shape) # break into a list of segmented points n_channel_leak = np.split(n_channel_leak, axis=1, indices_or_sections=no_of_segment) print('Total Segment: ', len(n_channel_leak)) print('Each Segment Dim: ', n_channel_leak[0].shape) # CWT + XCOR + VISUALIZE SCRIPT ---------------------------------------------------------------------------------------
from sklearn.decomposition import PCA from sklearn.manifold import TSNE import matplotlib.pyplot as plt from matplotlib import cm from mpl_toolkits.mplot3d import Axes3D # self lib from src.utils.helpers import direct_to_dir, shuffle_in_unison, scatter_plot, scatter_plot_3d_vispy from src.experiment_dataset.dataset_experiment_2018_7_13 import AcousticEmissionDataSet_13_7_2018 # data preprocessing --------------------------------------------------------------------------------------------------- on_pc = True if on_pc is False: f_range_to_keep = (0, 50) filename = direct_to_dir(where='result') + 'test.csv' data_df = pd.read_csv(filename) data_df_col_name = data_df.columns[1:-1] # convert df values to arrays data_mat = data_df.values # drop the first column, segment the 2d mat into dataset and label dataset = data_mat[:, 1:-1] label = data_mat[:, -1] dataset = dataset[:, f_range_to_keep[0]:f_range_to_keep[1]] # std normalize the data dataset_shape = dataset.shape scaler = StandardScaler()
def generate_leak_1bar_in_cwt_xcor_maxpoints_vector_2( self, saved_filename=None, file_to_process=None, denoise=False): ''' version 2: Instead of cwt for all scale in one shot, we do cwt scale by scale this method read all tdms file from a folder, split each of them into certain parts, perform CWT follow by XCOR according to the sensor pair list, then append into a dataset with labels :param saved_filename: filename Label for the dataset generated :param file_to_process: a list of strings, which is full dir and filename of the tdms to be processed. if none, it is taken as all tdms in the 1bar leak :param denoise: True it will denoise the signal bfore CWT and xcor :return: dataset where shape[0] -> no of samples of all classes shape[1] -> no of elements in a vector label where shape[0] -> aligned with the shape[0] of dataset shape[1] -> 1 ''' # CONFIG ------------------------------------------------------------------------------------------------------- # DWT dwt_wavelet = 'db2' dwt_smooth_level = 2 # CWT m_wavelet = 'gaus1' scale = np.linspace(2, 10, 100) fs = 1e6 # segmentation per tdms (sample size by each tdms) no_of_segment = 2 # file dir if file_to_process is None: # list full path of all tdms file in the specified folder folder_path = self.path_leak_1bar_2to12 all_file_path = [(folder_path + f) for f in listdir(folder_path) if f.endswith('.tdms')] else: all_file_path = file_to_process # DATA READING ------------------------------------------------------------------------------------------------- # creating dict to store each class data all_class = {} for i in range(0, 11, 1): all_class['class_[{}]'.format(i)] = [] # for all tdms file in folder (Warning: It takes 24min for 1 tdms file) for tdms_file in all_file_path: # read raw from drive n_channel_data_near_leak = read_single_tdms(tdms_file) n_channel_data_near_leak = np.swapaxes(n_channel_data_near_leak, 0, 1) if denoise: temp = [] for signal in n_channel_data_near_leak: denoised_signal = dwt_smoothing(x=signal, wavelet=dwt_wavelet, level=dwt_smooth_level) temp.append(denoised_signal) n_channel_data_near_leak = np.array(temp) # split on time axis into no_of_segment n_channel_leak = np.split(n_channel_data_near_leak, axis=1, indices_or_sections=no_of_segment) dist_diff = 0 # for all sensor combination for sensor_pair in self.sensor_pair_near: segment_no = 0 pb = ProgressBarForLoop( title='CWT+Xcor using {}'.format(sensor_pair), end=len(n_channel_leak)) # for all segmented signals for segment in n_channel_leak: max_xcor_vector = [] # for all scales for s in scale: pos1_leak_cwt, _ = pywt.cwt(segment[sensor_pair[0]], scales=s, wavelet=m_wavelet) pos2_leak_cwt, _ = pywt.cwt(segment[sensor_pair[1]], scales=s, wavelet=m_wavelet) # xcor for every pair of cwt xcor, _ = one_dim_xcor_1d_input( input_mat=[pos1_leak_cwt, pos2_leak_cwt], pair_list=[(0, 1)]) xcor = xcor[0] # midpoint in xcor mid = xcor.shape[0] // 2 + 1 # 24000 = fs*24ms(max deviation in ToA) upper_xcor_bound = mid + 24000 lower_xcor_bound = mid - 24000 # for every row of xcor, find max point index max_along_x = np.argmax( xcor[lower_xcor_bound:upper_xcor_bound]) max_xcor_vector.append(max_along_x + lower_xcor_bound - mid) # free up memory for unwanted variable pos1_leak_cwt, pos2_leak_cwt, xcor = None, None, None gc.collect() # store all feature vector for same class all_class['class_[{}]'.format(dist_diff)].append( max_xcor_vector) # progress pb.update(now=segment_no) segment_no += 1 pb.destroy() dist_diff += 1 # just to display the dict full dim temp = [] for _, value in all_class.items(): temp.append(value[0]) temp = np.array(temp) print('all_class dim: ', temp.shape) # free up memory for unwanted variable pos1_leak_cwt, pos2_leak_cwt, n_channel_data_near_leak = None, None, None gc.collect() # transfer all data from dict to array dataset = [] label = [] # for all class for i in range(0, 11, 1): # for all samples in a class for sample in all_class['class_[{}]'.format( i)]: # a list of list(max vec) dataset.append(sample) label.append(i) # convert to array dataset = np.array(dataset) label = np.array(label) print('Dataset Dim: ', dataset.shape) print('Label Dim: ', label.shape) # save to csv label = label.reshape((-1, 1)) all_in_one = np.concatenate([dataset, label], axis=1) # column label freq = pywt.scale2frequency(wavelet=m_wavelet, scale=scale) * fs column_label = [ 'Scale_{:.4f}_Freq_{:.4f}Hz'.format(i, j) for i, j in zip(scale, freq) ] + ['label'] df = pd.DataFrame(all_in_one, columns=column_label) filename = direct_to_dir( where='result' ) + 'cwt_xcor_maxpoints_vector_dataset_{}.csv'.format(saved_filename) df.to_csv(filename)
# dwt_dec_level = 5 scale = np.linspace(2, 30, 100) fs = 1e6 # segmentation no_of_segment = 10 # 10 is showing a consistent pattern # DATA READING --------------------------------------------------------------------------------------------------------- on_pc = False # test data (1 tdms file only) if on_pc: data = AcousticEmissionDataSet_13_7_2018(drive='F') n_channel_leak = data.test_data(sensor_dist='near', pressure=1, leak=True) else: data_dir = direct_to_dir(where='yh_laptop_test_data') + '1bar_leak/' n_channel_leak = read_all_tdms_from_folder(data_dir) n_channel_leak = np.swapaxes(n_channel_leak, 1, 2) n_channel_leak = n_channel_leak[0] # break into a list of segmented points n_channel_leak = np.split(n_channel_leak, axis=1, indices_or_sections=no_of_segment) print('Total Segment: ', len(n_channel_leak)) print('Each Segment Dim: ', n_channel_leak[0].shape) # signal selection input_signal_1 = n_channel_leak[3][1, :] input_signal_2 = n_channel_leak[3][7, :] # DWT DENOISING --------------------------------------------------------------------------------------------------------
import matplotlib.image as mpimg import matplotlib.pyplot as plt from mpl_toolkits.axes_grid1 import AxesGrid import numpy as np from src.utils.helpers import direct_to_dir img_bank = [] for dist in range(11): filename = direct_to_dir(where='result') + 'xcor_cwt_DistDiff[{}m]_sample[22]'.format(dist) + '.png' xcor_img = mpimg.imread(filename) img_bank.append(xcor_img[350:624, 53:928, :]) filename = direct_to_dir(where='result') + 'xcor_cwt_DistDiff[0m]_sample[22]' + '.png' time_img = mpimg.imread(filename) time_img = time_img[52:323, 40:905, :] fig = plt.figure(figsize=(15, 7)) fig.subplots_adjust(left=0.02, bottom=0, right=1, top=0.98, wspace=0.1, hspace=0.2) for i in range(11): ax = fig.add_subplot(4, 3, i+1) ax.imshow(img_bank[i]) ax.set_title('Dist_diff[{}m]'.format(i), fontsize=7) ax = fig.add_subplot(4, 3, 12) ax.imshow(time_img) ax.set_title('Time Series of 0m', fontsize=7) # grid_0 = AxesGrid(fig, 141, # nrows_ncols=(5, 2),