## 왜 입력 데이터를 반전시키는 것만으로 학습의 진행이 빨라지고 정확도가 향상되는 것일까? * 직관적으로는 기울기 전파가 원활해지기 때문이라고 생각됨 """ import numpy as np import sys sys.path.append('../../') from myutils.seq2seq import Seq2seq from myutils.optimizer import Adam from myutils.trainer import Trainer from seq_dataset import load_data, get_vocab #%% # read additon dataset (x_train, t_train), (x_test, t_test) = load_data('addition.txt') ## seq2seq 성능 개선 : 입력 데이터 반전(Reverse) x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] #***************** char_to_id, id_to_char = get_vocab() print(x_train.shape, t_train.shape, x_test.shape, t_test.shape) # (45000,7) (45000,5) (5000,7) (5000,5) print('vocab_size:', len(id_to_char)) # 13 : 0~9, +, _, ' ' #%% # Setting hyperparameters vocab_size = len(char_to_id) wordvec_size = 16 hidden_size = 128 batch_size = 128 max_epoch = 25
@author: shkim """ """ # Attension 시각화(Visualization) """ #%% import sys sys.path.append('..') import numpy as np from seq_dataset import load_data, get_vocab import matplotlib.pyplot as plt from myutils.attention_seq2seq import AttentionSeq2seq #%% (x_train, t_train), (x_test, t_test) = load_data('date.txt') char_to_id, id_to_char = get_vocab() # 입력 문장 반전 x_train, x_test = x_train[:, ::-1], x_test[:, ::-1] vocab_size = len(char_to_id) # 59 wordvec_size = 16 hidden_size = 256 model = AttentionSeq2seq(vocab_size, wordvec_size, hidden_size) model.load_params('AttentionSeq2seq-ep10.pkl') #%% _idx = 0
""" ## 시계열 데이터 변환을 위한 Toy Example * 덧셈(addition) 계산 문제 --> Question & Answering Sentence * dataset : addition.txt --> seq_dataset.py """ #%% """ ## 시계열 데이터 변환용 덧셈 Toy Dataset 살펴보기 * 덧셈 학습 데이터 : addition.txt --> 5만개의 덧셈 학습 데이터(문제와 답) """ from seq_dataset import load_data, get_vocab #%% (x_train, t_train), (x_test, t_test) = load_data('addition.txt', seed=2020) char_to_id, id_to_char = get_vocab() print('x_train.shape:', x_train.shape, 't_train.shape:', t_train.shape) # (45000,7),(45000,5) print('x_test.shape:', x_test.shape, 't_test.shape:', t_test.shape) # (5000,7),(5000,5) print(x_train[0]) # [ 0 7 2 11 11 12 5] print(t_train[0]) # [ 6 7 9 10 5] # print('5(%c)' % id_to_char[5]) # 5( ) # print('6(%c)' % id_to_char[6]) # 6(_) print('x_train[0]-->', ''.join([id_to_char[c] for c in x_train[0]])) # 19+884 print('t_train[0]-->', ''.join([id_to_char[c] for c in t_train[0]])) # _903