def create_adversarial_sampleset_2remote_mac_disrupt(resource_file_dir_path, file_path, n=10000, k_min=2, k_max=5): if os.path.exists(file_path): # 如果文件存在 os.remove(file_path) mac_dict = globalConfig.get_mac_dict() dirs = os.listdir(resource_file_dir_path) j = 0 # 统计生成的对抗样本数量 for point_tag in dirs: data_path = os.path.join(resource_file_dir_path, point_tag) files = os.listdir(data_path) for file in files: data_file = os.path.join(data_path, file) data = pd.read_csv(data_file, header=None) tokens = [] token = [0] * (seq_len + 1) begin_time = data.iloc[0][0] first_time = begin_time i = 0 # 序列号 for row in data.itertuples(): if j < n: if row[TIME_IDX] - begin_time > timeInterval: token[seq_len] = '0' tokens.append(token) j += 1 begin_time = row[TIME_IDX] token = [0] * (seq_len + 1) i = 0 if row[TIME_IDX] - first_time > time_each_file * 1000: break idx = mac_dict.get(row[MAC_IDX], -1) if idx == 13 or idx == 25: # 更换相隔很远的两个ble,ble14和ble26 idx = 25 + 13 - idx if idx > -1 and i < seq_len: value = getNumber(idx, row[RSSI_IDX]) if value > -1: token[i] = value i += 1 if j >= n: break pd.DataFrame(tokens).to_csv(file_path, index=False, encoding='utf-8', mode="a+", header=None) print(file + " finish") else: continue if j >= n: pd.DataFrame(tokens).to_csv(file_path, index=False, encoding='utf-8', mode="a+", header=None) break df = pd.read_csv(file_path, header=None) df.to_csv(file_path, columns=[i for i in range(seq_len + 1)])
def create_adversarial_sampleset_alternating_noise(resource_file_dir_path, result_token_file, noise): if os.path.exists(result_token_file): # 如果文件存在 os.remove(result_token_file) data_tag = '0' noise_flag = 1 mac_dict = globalConfig.get_mac_dict() dirs = os.listdir(resource_file_dir_path) for point_tag in dirs: data_path = os.path.join(resource_file_dir_path, point_tag) files = os.listdir(data_path) for file in files: data_file = os.path.join(data_path, file) data = pd.read_csv(data_file, header=None) tokens = [] token = [0] * (seq_len + 1) begin_time = data.iloc[0][0] first_time = begin_time i = 0 # 序列号 for row in data.itertuples(): if row[TIME_IDX] - begin_time > timeInterval: token[seq_len] = data_tag tokens.append(token) begin_time = row[TIME_IDX] token = [0] * (seq_len + 1) i = 0 if row[TIME_IDX] - first_time > time_each_file * 1000: break idx = mac_dict.get(row[MAC_IDX], -1) if idx > -1 and i < seq_len: if noise_flag: value = getNumber(idx, row[RSSI_IDX] + noise) # 加噪声 noise_flag = 0 else: value = getNumber(idx, row[RSSI_IDX] - noise) # 加噪声 noise_flag = 1 if value > -1: token[i] = value i += 1 pd.DataFrame(tokens).to_csv(result_token_file, index=False, encoding='utf-8', mode="a+", header=None) print(file + " finish") df = pd.read_csv(result_token_file, header=None) df.to_csv(result_token_file, columns=[i for i in range(seq_len + 1)])
import globalConfig import os from keras_bert.bert import get_model from keras_bert.backend import keras import tensorflow as tf from sklearn.utils import shuffle from NonMasking import NonMasking from sklearn.model_selection import train_test_split import pandas as pd training = True seq_len = globalConfig.seq_len token_dict = globalConfig.get_mac_dict() pretrain_model_path = "model/bert_1.h5" save_model_path = "model/discriminator_2_random_macdisrupt.h5" train_data_file = "data/train_dataset.csv" test_data_file = "data/test_dataset.csv" df_train = pd.read_csv(train_data_file, index_col=0) df_test = pd.read_csv(test_data_file, index_col=0) x_train, y_train = df_train.values[:, :seq_len], df_train.values[:, seq_len:] x_test, y_test = df_test.values[:, :seq_len], df_test.values[:, seq_len:] config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as session: input_layer, transformed = get_model( token_num=globalConfig.token_num, head_num=globalConfig.head_num, transformer_num=globalConfig.transformer_num,