def save_dataset(dataset_name, output_dir, seed=0): """ Save single dataset """ train_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "train")) valid_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "valid")) test_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "test")) # Skip if they already exist if os.path.exists(train_filename) \ and os.path.exists(valid_filename) \ and os.path.exists(test_filename): if FLAGS.debug: print("Skipping:", train_filename, valid_filename, test_filename, "already exist") return if FLAGS.debug: print("Saving dataset", dataset_name) dataset, dataset_class = datasets.load(dataset_name) # Skip if already normalized/bounded, e.g. UCI HAR datasets already_normalized = dataset_class.already_normalized # Split into training/valid datasets valid_data, valid_labels, train_data, train_labels = \ valid_split(dataset.train_data, dataset.train_labels, seed=seed) # Calculate normalization only on the training data if FLAGS.normalize != "none" and not already_normalized: normalization = datasets.calc_normalization(train_data, FLAGS.normalize) # Apply the normalization to the training, validation, and testing data train_data = datasets.apply_normalization(train_data, normalization) valid_data = datasets.apply_normalization(valid_data, normalization) test_data = datasets.apply_normalization(dataset.test_data, normalization) else: test_data = dataset.test_data # Saving write(train_filename, train_data, train_labels) write(valid_filename, valid_data, valid_labels) write(test_filename, test_data, dataset.test_labels)
def save_dataset(dataset_name, output_dir, isSubset,seed=0): """ Save single dataset """ train_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "train")) valid_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "valid")) test_filename = os.path.join(output_dir, tfrecord_filename(dataset_name, "test")) # Skip if they already exist # if os.path.exists(train_filename) \ # and os.path.exists(valid_filename) \ # and os.path.exists(test_filename): # return print("Saving dataset", dataset_name) # dataset, dataset_class = datasets.load(dataset_name) train_data,train_labels,valid_data,valid_labels,test_data,test_labels = cwru_load(dataset_name,isSubset) # # Skip if already normalized/bounded, e.g. UCI HAR datasets # # already_normalized = dataset_class.already_normalized # # Split into training/valid datasets # valid_data, valid_labels, train_data, train_labels = \ # valid_split(dataset.train_data, dataset.train_labels, seed=seed) # # Calculate normalization only on the training data # if not already_normalized: normalization = datasets.calc_normalization(train_data, "minmax") # Apply the normalization to the training, validation, and testing data train_data = datasets.apply_normalization(train_data, normalization) valid_data = datasets.apply_normalization(valid_data, normalization) test_data = datasets.apply_normalization(test_data, normalization) # else: # test_data = dataset.test_data # Saving write(train_filename, train_data, train_labels) write(valid_filename, valid_data, valid_labels) write(test_filename, test_data, test_labels)
def subdataset_split(original_dataset =None, feature = "Radius"): output_dir = os.path.join("datasets", "tfrecords") subdataset = ["12DriveEndFault_0.007","12DriveEndFault_0.014","12DriveEndFault_0.021", "12FanEndFault_0.007","12FanEndFault_0.014","12FanEndFault_0.021", "48DriveEndFault_0.007","48DriveEndFault_0.014","48DriveEndFault_0.021", ] Feature_name = { "Hz": ["12","48"], "End":["Drive","Fan"],"Radius":["0.007","0.014","0.021"] } Transfer_dataset = ["12DriveEndFault","12FanEndFault","48DriveEndFault"] class_labels= [] # 0.007: [0,4] n = 5 # 0.014: [5,7] n = 3 # 0.021:[8,12] n = 4 # 0.028: [13:14] n = 2 # Normal:15 n = 1 for Dataset_name in Transfer_dataset: _cwru= CWRU(Dataset_name, '1797', 384) X_train = np.array( _cwru.X_train,dtype=np.float32) X_test = np.array( _cwru.X_test,dtype=np.float32) y_train = np.array(_cwru.y_train) y_test = np.array(_cwru.y_test) for atrr in Feature_name[feature]: subdataset_name = "cwru_"+Dataset_name+"_"+atrr train_filename = os.path.join(output_dir, tfrecord_filename(subdataset_name, "train")) valid_filename = os.path.join(output_dir, tfrecord_filename(subdataset_name, "valid")) test_filename = os.path.join(output_dir,tfrecord_filename(subdataset_name, "test")) atrr = float(atrr) if atrr == 0.007: subdataset_X_train = X_train[ np.where( y_train< 5)] subdataset_y_train = y_train[np.where( y_train< 5) ] subdataset_X_test = X_train[ np.where( y_test< 5)] subdataset_y_test = y_train[np.where( y_test< 5) ] elif atrr == 0.0014: subdataset_X_train = X_train[ np.where( (y_train<=7) & (y_train>=5) )] subdataset_y_train = y_train[ np.where( (y_train<=7) & (y_train>=5) ) ] subdataset_X_test = X_train[ np.where( (y_train<=7) & (y_train>=5))] subdataset_y_test = y_train[ np.where( (y_train<=7) & (y_train>=5)) ] elif atrr == 0.021: subdataset_X_train = X_train[ np.where( (y_train<=12)& (y_train>=8))] subdataset_y_train = y_train[ np.where( (y_train<=12 ) & ( y_train>=8))] subdataset_X_test = X_train[ np.where((y_train<=12 ) & ( y_train>=8))] subdataset_y_test = y_train[ np.where( (y_train<=12 ) & ( y_train>=8))] subdataset_X_valid, subdataset_y_valid, subdataset_X_train, subdataset_y_train = valid_split(subdataset_X_train, subdataset_y_train) normalization = datasets.calc_normalization(subdataset_X_train, "minmax") subdataset_X_train_shape =subdataset_X_train.shape subdataset_X_test_shape = subdataset_X_test.shape subdataset_X_valid_shape = subdataset_X_valid.shape print(subdataset_name) print( subdataset_X_train_shape) print(subdataset_X_valid_shape) print(subdataset_X_test_shape) # Apply the normalization to the training, validation, and testing data subdataset_X_train = datasets.apply_normalization(subdataset_X_train, normalization) subdataset_X_valid = datasets.apply_normalization(subdataset_X_valid, normalization) subdataset_X_test = datasets.apply_normalization(subdataset_X_test, normalization) subdataset_X_train = np.reshape( subdataset_X_train,(subdataset_X_train_shape[0],subdataset_X_train_shape[1],1)) subdataset_X_valid = np.reshape( subdataset_X_valid,(subdataset_X_valid_shape[0],subdataset_X_valid_shape[1],1)) subdataset_X_test = np.reshape( subdataset_X_test,(subdataset_X_test_shape[0],subdataset_X_test_shape[1],1)) subdataset_y_train = np.squeeze(np.array( subdataset_y_train, dtype=np.float32)) subdataset_y_valid = np.squeeze(np.array( subdataset_y_valid, dtype=np.float32)) subdataset_y_test = np.squeeze(np.array( subdataset_y_test, dtype=np.float32)) # else: # test_data = dataset.test_data # Saving write(train_filename, subdataset_X_train, subdataset_y_train) write(valid_filename, subdataset_X_valid, subdataset_y_valid) write(test_filename, subdataset_X_test, subdataset_y_test) return