def fasttext_evaluation(model_testing_result: OutputDirectory(), trained_model_dir: InputDirectory() = None, test_data_dir: InputDirectory() = None): print('=====================================================') print(f'trained_model_dir: {Path(trained_model_dir).resolve()}') print(f'test_data_dir: {Path(test_data_dir).resolve()}') path_word_to_index = os.path.join(test_data_dir, 'word_to_index.json') word_to_index = get_vocab(path_word_to_index) path_label = os.path.join(test_data_dir, 'label.txt') map_id_label, map_label_id = get_id_label(path_label) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) path = os.path.join(trained_model_dir, 'shared_params.json') with open(path, 'r', encoding='utf-8') as f: shared_params = json.load(f) path = os.path.join(test_data_dir, 'data.txt') test_samples = load_dataset(file_path=path, max_len=shared_params['max_len'], ngram_size=shared_params['ngram_size'], word_to_index=word_to_index, map_label_id=map_label_id) test_iter = DataIter(samples=test_samples, shuffle=False, device=device) path = os.path.join(trained_model_dir, 'BestModel') model = torch.load(f=path, map_location=device) path = os.path.join(model_testing_result, 'result.json') acc_ = test(model, test_iter) with open(path, 'w', encoding='utf-8') as f: json.dump({"acc": acc_}, f) print('\n============================================')
def load_data(): x = utils.load_dataset() x = x.transpose(0, 2, 1) y = utils.load_label() x, y = shuffle(x, y, random_state=conf.seed) x = pre_process.rm_noise_2d(signals=x, wave_name='bior2.6', level=8) print('x.shape,y.shape', x.shape, y.shape) return x, y
def extract_files(self, files=consts.EXTRACT_FILES, **kwargs): """Loads a set of files, extracts the features and saves them to file Args: files (str / str[]): TFRecord file(s) extracted by rosbag_to_tfrecord For **kwargs see extract_dataset Returns: success (bool) """ dataset, total = utils.load_dataset(files) return self.extract_dataset(dataset, total, **kwargs)
def run(files): if len(files) == 0: return [] with torch.no_grad(): test_samples = load_dataset(file_path=files, max_len=shared_params['max_len'], ngram_size=shared_params['ngram_size'], word_to_index=word_to_index, map_label_id=map_label_id) test_iter = DataIter(samples=test_samples, batch_size=1, shuffle=False, device=device) results = predict_parallel(model, test_iter, map_id_label) dict_ = {'Filename': files, 'Class': results} df = pd.DataFrame(data=dict_) output_file = os.path.join(scored_data_output_dir, f"{uuid4().hex}.parquet") df.to_parquet(output_file, index=False) return results
# -*- coding:utf-8 _*- """ @author: danna.li @date: 2019/4/2 @file: train_tradition.py @description: python -W ignore train_tradition.py """ from common.conf import current_config as conf from tradition.extract_feature import get_all_feature import common.utils as utils import tradition.tradition_model as tradition_model import os signals = utils.load_dataset() labels = utils.load_label() all_feature = get_all_feature(signals) x_train, y_train, x_test, y_test = utils.split_data(all_feature, labels, conf.train_ratio) print(x_train.shape, y_train.shape, x_test.shape, y_test.shape) if __name__ == '__main__': utils.re_create_path(conf.output_dir) save_model = False print('fitting knn model..') result_file = os.path.join(conf.output_dir, 'knn01.txt') tradition_model.do_knn(x_train, y_train, x_test, y_test, save_model, result_file) print('fitting svm model..') result_file = os.path.join(conf.output_dir, 'svm01.txt')
def fasttext_train(trained_model_dir: OutputDirectory(type='ModelDirectory'), training_data_dir: InputDirectory() = None, validation_data_dir: InputDirectory() = None, epochs=1, batch_size=64, max_len=32, embed_dim=300, hidden_size=256, ngram_size=200000, dropout=0.5, learning_rate=0.001): print('============================================') print('training_data_dir:', training_data_dir) print('validation_data_dir:', validation_data_dir) path_word_to_index = os.path.join(training_data_dir, 'word_to_index.json') word_to_index = get_vocab(path_word_to_index) path_label = os.path.join(training_data_dir, 'label.txt') map_id_label, map_label_id = get_id_label(path_label) class_num = len(map_id_label) vocab_size = len(word_to_index) stop_patience = 5 device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') print('device:', device) # load training dataset path = os.path.join(training_data_dir, 'data.txt') train_samples = load_dataset(file_path=path, word_to_index=word_to_index, map_label_id=map_label_id, max_len=max_len, ngram_size=ngram_size) train_iter = DataIter(samples=train_samples, batch_size=batch_size, shuffle=True, device=device) # load validation dataset path = os.path.join(validation_data_dir, 'data.txt') dev_samples = load_dataset(file_path=path, word_to_index=word_to_index, map_label_id=map_label_id, max_len=max_len, ngram_size=ngram_size) dev_iter = DataIter(samples=dev_samples, batch_size=batch_size, shuffle=True, device=device) model = FastText(vocab_size=vocab_size, class_num=class_num, dropout=dropout, embed_dim=embed_dim, hidden_size=hidden_size, ngram_size=ngram_size) # watch parameters print(model.parameters) # copy word_to_index.json and label.txt for later scoring. shutil.copy(src=path_word_to_index, dst=trained_model_dir) shutil.copy(src=path_label, dst=trained_model_dir) # shared parameters for loading dataset shared_params = {'max_len': max_len, 'ngram_size': ngram_size} path = os.path.join(trained_model_dir, 'shared_params.json') with open(path, 'w', encoding='utf-8') as f: json.dump(shared_params, f) start = time.time() train(model, trained_model_dir, train_iter=train_iter, dev_iter=dev_iter, epochs=epochs, learning_rate=learning_rate, stop_patience=stop_patience, device=device) end = time.time() print('\nduration of training process: %.2f sec' % (end - start)) print('============================================')