def unpack_raw_data(root_path, points=None, name=None, collection_type='line_base'): # "mac", "ADV", "RSSI", "NAME", "CAHNNEL", "TYPE", "TX_POWER", "A_GAIN", "A_TYPE", "BOARD" space = [] # 전체 폴더 00, 05, 10, .... dir_list = file_io.get_directory_list(input_dir_path) dir_list = file_io.extract_only_directory(dir_list) print(dir_list) # 전체 폴더 중 하나 접근 00 for dir_idx, directory in enumerate(dir_list): line_base_pack = [] # 하나의 라인에 대한 폴더 접근 dir_path = "{}/{}/".format(root_path, directory) # 파일 리스트 추출 file_list = file_io.get_all_file_path(dir_path, file_extension='txt') print(file_list) # 하나의 파일 열어서 가공 for file_idx, file in enumerate(file_list): channel_info = file_io.get_pure_filename(file).split('_')[1] lines = file_io.read_txt_file(file) for line in lines: # 0: mac # 1: ADV # 2: RSSI # 3: Name split_data = split_raw_data_line(line) if split_data != '': for device_pack_idx, device_pack in enumerate(name): for device_idx, device_name in enumerate(device_pack): if device_name in split_data[0]: if len(points[device_pack_idx][dir_idx]) == 0: continue split_data.append(channel_info) # print("device pack idx : ", points[device_pack_idx]) # print("point : ", points[device_pack_idx][dir_idx]) # print("data : ", split_data) device_point_x = points[device_pack_idx][ dir_idx][0] device_point_y = points[device_pack_idx][ dir_idx][1] split_data.append(device_point_x) split_data.append(device_point_y) space.append(split_data) line_base_pack.append(split_data) print(split_data) pack_pd = pd.DataFrame( line_base_pack, columns=["mac", "ADV", "RSSI", "NAME", "CHANNEL", "X", "Y"]) save_path = "{}/{}.csv".format(root_path, directory) pack_pd.to_csv(save_path, mode='w', index=None) pack_all_pd = pd.DataFrame( space, columns=["mac", "ADV", "RSSI", "NAME", "CHANNEL", "X", "Y"]) save_path = "{}.csv".format(root_path) pack_all_pd.to_csv(save_path, mode='w', index=None)
def load_path_loss_with_detail_dataset(input_dir, model_type='CRNN', num_workers=8, batch_size=128, shuffle=True, input_size=15, various_input=False): # 파일들이 저장되었는 경로를 받아 파일 리스트를 얻어냄 file_list = file_io.get_all_file_path(input_dir, file_extension='csv') # csv에 있는 모든 데이터를 다 꺼내서 넘파이로 만듬 addition_dataset = [] setup_dataset = None for idx, file in enumerate(file_list): addition_dataset.append(pd.read_csv(file).to_numpy()) div_meter_pack = [] rnn_dataset = [] for n_idx, pack in enumerate(addition_dataset): label = pack[:, 0].tolist() label = list(set(label)) temp_pack = pd.DataFrame(pack) for key in label: div_meter_pack.append(temp_pack[temp_pack[0] == key].to_numpy()) for n_idx, pack in enumerate(div_meter_pack): if len(pack) < 30: temp = pack.tolist() temp = temp * (int(30 / len(pack)) + 2) pack = np.array(temp) for i in range(len(pack) - input_size): rnn_dataset.append(pack[i:i + input_size]) # if various_input is True: # for i in range(len(pack)-input_size): # rnn_dataset.append(pack[i:i+np.random.randint(input_size-7)+7]) rnn_dataset = np.array(rnn_dataset) setup_dataset = rnn_dataset train_data, valid_data, test_data = data_split(setup_dataset, shuffle=shuffle) pathloss_train_dataset = PathLossWithDetailDataset(input_data=train_data, model_type=model_type) pathloss_test_dataset = PathLossWithDetailDataset(input_data=test_data, model_type=model_type) pathloss_valid_dataset = PathLossWithDetailDataset(input_data=valid_data, model_type=model_type) pathloss_train_dataloader = DataLoader(pathloss_train_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) pathloss_test_dataloader = DataLoader(pathloss_test_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) pathloss_valid_dataloader = DataLoader(pathloss_valid_dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers) return pathloss_train_dataloader, pathloss_valid_dataloader, pathloss_test_dataloader
def get_scaler_checkpoint(input_dir, scaler_type='robust'): file_list = file_io.get_all_file_path(input_dir, file_extension='csv') dataset = pd.DataFrame() for idx, file in enumerate(file_list): temp = pd.read_csv(file, header=None) dataset = pd.concat([dataset, temp], ignore_index=True) x_data = dataset.drop([0], axis='columns') if scaler_type == 'robust': scaler = RobustScaler().fit(x_data) filename = '{}_scaler.pkl'.format(scaler_type) joblib.dump(scaler, filename)
def get_addition_dataset(config): file_list = file_io.get_all_file_path(input_dir=config['input_dir'], file_extension='csv') file_list.sort() print(file_list) target_dataset = [] addition_dataset = [] # original file have -> ['meter', 'mac', 'type', 'rssi'] # version up file have -> ['meter', 'mac', 'type', 'rssi', 'channel'] for file in file_list: temp = pd.read_csv(file) if config['device_id'] != '': temp = temp[temp['mac'] == config['device_id']] temp = temp.drop(['mac', 'type'], axis=1) # 불필요한 데이터 제거 target_dataset.append(temp) # dropped -> ['meter', 'rssi', 'channel'] for item in target_dataset: temp = [] for idx, line in item.iterrows(): data = line.tolist() data.append(config.get('tx_power')) data.append(config.get('rx_height')) data.append(config.get('tx_antenna_gain')) data.append(config.get('rx_antenna_gain')) data.append(config.get('covered')) if config['use_fspl']: data.append(path_loss.get_distance_with_rssi_fspl(data[1])) if config['inference']: del data[0] temp.append(data) addition_dataset.append(temp) # finish ['meter', 'rssi', 'channel', 'tx_power', 'rx_height', 'tx_antenna_gain', # 'rx_antenna_gain', 'covered', 'fspl'] for idx, item in enumerate(addition_dataset): temp = pd.DataFrame(item) file_io.create_directory(config['save_dir']) temp.to_csv('{}/dataset_{}_mac_{}.csv'.format(config['save_dir'], idx, config['device_id']), header=None, index=None)
import numpy as np from tool import file_io import pandas as pd input_dir = '../dataset/v9/loader_test' input_size = 20 # 파일들이 저장되었는 경로를 받아 파일 리스트를 얻어냄 file_list = file_io.get_all_file_path(input_dir, file_extension='csv') # csv에 있는 모든 데이터를 다 꺼내서 넘파이로 만듬 addition_dataset = [] setup_dataset = None for idx, file in enumerate(file_list): addition_dataset.append(pd.read_csv(file).to_numpy()) div_meter_pack = [] rnn_dataset = [] for n_idx, pack in enumerate(addition_dataset): label = pack[:, 0].tolist() label = list(set(label)) temp_pack = pd.DataFrame(pack) for key in label: div_meter_pack.append(temp_pack[temp_pack[0] == key].to_numpy()) for n_idx, pack in enumerate(div_meter_pack): print(len(pack)) if len(pack) < 30: temp = pack.tolist() temp = temp * (int(30/len(pack))+6) pack = np.array(temp)