def __init__(self): datasize = 250 #Just make this arbitrarily large when you want to use the whole dataset print("Loading data...") if STRATIFY_DATA: self.data, self.labels = dataset.get_stratified_data(datasize, shuffle=True) else: self.data, self.labels = dataset.get_data(datasize) print("Building encoder...") self.data_encoder = OHencoder.map_to_int_ids(self.data, threshold=4) self.label_encoder = OHencoder.map_to_int_ids([self.labels]) # reserve 0th index of one_hot vector for unknown words for x in self.data_encoder: self.data_encoder[x] += 1 # split data into train and validation sets split_idx = int(len(self.data) * (1 - VAL_RATIO)) self.val_data = self.data[split_idx:] self.val_labels = self.labels[split_idx:] self.data = self.data[:split_idx] self.labels = self.labels[:split_idx] # e.g. ["Sing", "me", "a", "song"] self.data_decoder = dict([ (x[1], x[0]) for x in list(self.data_encoder.items()) ]) #Gives you word/genre from vector index # e.g. ["Rock", "Pop", "Hip Hop"] self.label_decoder = dict([(x[1], x[0]) for x in list(self.label_encoder.items())]) self.num_classes = len(self.label_encoder) #print([data_enconder[word] for word in data[-1]]) self.model = rnn( len(self.data_encoder) + 1, [128], [128], self.num_classes) self.best_acc = 0 self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=LEARNING_RATE)
if __name__ == '__main__': dataset_name = "web10k-norm" # dataset_name = "2003_td_dataset" data_infos = svmDataset(dataset_name) home = "D:\\Colecoes" fold = "1" data_infos.train_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/Norm.train.txt" data_infos.test_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/Norm.test.txt" data_infos.vali_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/Norm.vali.txt" data_infos.baseline_train_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/baseline.Norm.train.txt" data_infos.baseline_test_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/baseline.Norm.test.txt" data_infos.baseline_vali_data_path = home + "/BD/" + dataset_name + f"/Fold{fold}/baseline.Norm.vali.txt" X_train, y_train = get_data(data_infos, "test") y_baseline_train = get_baseline_data(data_infos, "test") N_queries_train = len(X_train) print(N_queries_train) X_vali, y_vali = get_data(data_infos, "vali") y_baseline_vali = get_baseline_data(data_infos, "vali") X_train = torch.tensor(X_train, requires_grad=True) y_train = torch.tensor(y_train, requires_grad=True) # y_train = torch.tanh(y_train) y_baseline_train = torch.tensor(y_baseline_train, requires_grad=True) X_vali = torch.tensor(X_vali, requires_grad=True) y_vali = torch.tensor(y_vali, requires_grad=True) y_baseline_vali = torch.tensor(y_baseline_vali, requires_grad=True)
def build_model(self): opts = self.opts print('load embedding...') word_emb = np.array(get_data(opts['data_path'] + 'word_emb.json'), dtype=np.float32) word_size = word_emb.shape[0] word_dim = word_emb.shape[1] self.word_embeddings = nn.Embedding(word_emb.shape[0], word_dim, padding_idx=0) self.word_embeddings.weight.data = torch.from_numpy(word_emb) self.pos_embeddings = nn.Embedding(opts['pos_size'], opts['pos_dim'], padding_idx=0) self.ner_embeddings = nn.Embedding(opts['ner_size'], opts['ner_dim'], padding_idx=0) self.fix_embeddings = opts['fix_embeddings'] if self.fix_embeddings: for p in self.word_embeddings.parameters(): p.requires_grad = False else: with open(opts['data_path'] + 'tune_word_idx.pkl', 'rb') as f: tune_idx = pkl.load(f) self.fixed_idx = list( set([i for i in range(word_size)]) - set(tune_idx)) fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx] self.register_buffer('fixed_embedding', fixed_embedding) self.fixed_embedding = fixed_embedding cove_dim = 600 pos_dim = opts['pos_dim'] ner_dim = opts['ner_dim'] hidden_size = opts['hidden_size'] dropout = opts['dropout'] attention_size = opts['attention_size'] self.use_char = opts['use_char'] char_dim = opts['char_dim'] char_hidden_size = opts['char_hidden_size'] self.use_cuda = opts['use_cuda'] if self.use_char: self.char_embeddings = nn.Embedding(opts['char_size'], char_dim, padding_idx=0) self.char_rnn = nn.LSTM(input_size=char_dim, hidden_size=char_hidden_size, batch_first=True, bidirectional=True, num_layers=1, dropout=0) opt = { 'vocab_size': word_emb.shape[0], 'embedding_dim': word_dim, 'MTLSTM_path': 'utils/MT-LSTM.pth' } self.cove_rnn = MTLSTM(opt, embedding=torch.from_numpy(word_emb)) feat_size = 4 low_p_word_size = word_dim + word_dim + cove_dim + opts[ 'pos_dim'] + opts['ner_dim'] + feat_size low_q_word_size = word_dim + cove_dim + opts['pos_dim'] + opts[ 'ner_dim'] if self.use_char: low_p_word_size += 2 * char_hidden_size low_q_word_size += 2 * char_hidden_size self.word_attention_layer = WordAttention(input_size=word_dim, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.low_passage_rnn = StackedLSTM(input_size=low_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self.low_ques_rnn = StackedLSTM(input_size=low_q_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) high_p_word_size = 2 * hidden_size high_q_word_size = 2 * hidden_size self.high_passage_rnn = StackedLSTM(input_size=high_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self.high_ques_rnn = StackedLSTM(input_size=high_q_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) und_q_word_size = 2 * (2 * hidden_size) self.und_ques_rnn = StackedLSTM(input_size=und_q_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) attention_inp_size = word_dim + cove_dim + 2 * (2 * hidden_size) self.low_attention_layer = FullAttention(input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.high_attention_layer = FullAttention( input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.und_attention_layer = FullAttention(input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) fuse_inp_size = 5 * (2 * hidden_size) self.fuse_rnn = StackedLSTM(input_size=fuse_inp_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self_attention_inp_size = word_dim + cove_dim + pos_dim + ner_dim + 6 * ( 2 * hidden_size) + 1 self.self_attention_layer = FullAttention( input_size=self_attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.self_rnn = StackedLSTM(input_size=2 * (2 * hidden_size), hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self.summ_layer = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.pointer_layer = PointerNet(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda)
def build_model(self): opts = self.opts print('load embedding...') word_emb = np.array(get_data(opts['data_path'] + 'word_emb.json'), dtype=np.float32) word_size = word_emb.shape[0] word_dim = word_emb.shape[1] self.word_embeddings = nn.Embedding(word_emb.shape[0], word_dim, padding_idx=0) self.word_embeddings.weight.data = torch.from_numpy(word_emb) self.pos_embeddings = nn.Embedding(opts['pos_size'], opts['pos_dim'], padding_idx=0) self.ner_embeddings = nn.Embedding(opts['ner_size'], opts['ner_dim'], padding_idx=0) self.fix_embeddings = opts['fix_embeddings'] if self.fix_embeddings: for p in self.word_embeddings.parameters(): p.requires_grad = False else: with open(opts['data_path'] + 'tune_word_idx.pkl', 'rb') as f: tune_idx = pkl.load(f) self.fixed_idx = list( set([i for i in range(word_size)]) - set(tune_idx)) fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx] self.register_buffer('fixed_embedding', fixed_embedding) self.fixed_embedding = fixed_embedding pos_dim = opts['pos_dim'] ner_dim = opts['ner_dim'] hidden_size = opts['hidden_size'] dropout = opts['dropout'] attention_size = opts['attention_size'] self.use_cuda = opts['use_cuda'] self.use_elmo = opts['use_elmo'] if self.use_elmo: elmo_dim = 1024 options_file = "./SQuAD/elmo_options.json" weight_file = "./SQuAD/elmo_weights.hdf5" self.elmo = Elmo(options_file, weight_file, 1, dropout=0) feat_size = 4 low_p_word_size = word_dim + word_dim + opts['pos_dim'] + opts[ 'ner_dim'] + feat_size low_q_word_size = word_dim + opts['pos_dim'] + opts['ner_dim'] if self.use_elmo: low_p_word_size += elmo_dim low_q_word_size += elmo_dim self.word_attention_layer = WordAttention(input_size=word_dim, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.low_cat_rnn = StackedBRNN(input_size=low_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) high_p_word_size = 2 * hidden_size self.high_cat_rnn = StackedBRNN(input_size=high_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) und_q_word_size = 2 * (2 * hidden_size) self.und_cat_rnn = StackedBRNN(input_size=und_q_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) attention_inp_size = word_dim + 2 * (2 * hidden_size) if self.use_elmo: attention_inp_size += elmo_dim self.low_attention_layer = FullAttention(input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.high_attention_layer = FullAttention( input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.und_attention_layer = FullAttention(input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) fuse_inp_size = 5 * (2 * hidden_size) self.fuse_rnn = StackedBRNN(input_size=fuse_inp_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self_attention_inp_size = word_dim + pos_dim + ner_dim + 6 * ( 2 * hidden_size) + 1 if self.use_elmo: self_attention_inp_size += elmo_dim self.self_attention_layer = FullAttention( input_size=self_attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda) self.self_rnn = StackedBRNN(input_size=2 * (2 * hidden_size), hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) if self.opts['multi_point']: self.self_rnn_p = StackedBRNN(input_size=2 * (2 * hidden_size), hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda) self.summ_layer = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) if self.opts['multi_point']: self.summ_layer_p = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_cf = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_layer2 = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_cf2 = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.pointer_layer = PointerNet(input_size=2 * hidden_size, opt=self.opts, use_cuda=self.use_cuda) if self.opts['multi_point']: self.pointer_layer_p = PointerNet(input_size=2 * hidden_size, opt=self.opts, use_cuda=self.use_cuda) if self.opts['check_answer']: self.has_ans = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(6 * hidden_size, 2)) else: self.has_ans = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(6 * hidden_size, 2))
def build_model(self): opts = self.opts print("load embedding...") word_emb = np.array( get_data(os.path.join(opts["prepro_dir"], "word_emb.json")), dtype=np.float32, ) word_size = word_emb.shape[0] word_dim = word_emb.shape[1] self.word_embeddings = nn.Embedding(word_emb.shape[0], word_dim, padding_idx=0) self.word_embeddings.weight.data = torch.from_numpy(word_emb) self.pos_embeddings = nn.Embedding(opts["pos_size"], opts["pos_dim"], padding_idx=0) self.ner_embeddings = nn.Embedding(opts["ner_size"], opts["ner_dim"], padding_idx=0) self.fix_embeddings = opts["fix_embeddings"] if self.fix_embeddings: for p in self.word_embeddings.parameters(): p.requires_grad = False else: with open(os.path.join(opts["prepro_dir"], "tune_word_idx.pkl"), "rb") as f: tune_idx = pkl.load(f) self.fixed_idx = list( set([i for i in range(word_size)]) - set(tune_idx)) fixed_embedding = torch.from_numpy(word_emb)[self.fixed_idx] self.register_buffer("fixed_embedding", fixed_embedding) self.fixed_embedding = fixed_embedding pos_dim = opts["pos_dim"] ner_dim = opts["ner_dim"] hidden_size = opts["hidden_size"] dropout = opts["dropout"] attention_size = opts["attention_size"] self.use_cuda = opts["use_cuda"] self.use_elmo = opts["use_elmo"] if self.use_elmo: elmo_dim = 1024 options_file = opts["elmo_options_file"] weight_file = opts["elmo_weights_file"] self.elmo = Elmo(options_file, weight_file, 1, dropout=0) feat_size = 4 low_p_word_size = (word_dim + word_dim + opts["pos_dim"] + opts["ner_dim"] + feat_size) low_q_word_size = word_dim + opts["pos_dim"] + opts["ner_dim"] if self.use_elmo: low_p_word_size += elmo_dim low_q_word_size += elmo_dim self.word_attention_layer = WordAttention( input_size=word_dim, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda, ) self.low_cat_rnn = StackedBRNN( input_size=low_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) high_p_word_size = 2 * hidden_size self.high_cat_rnn = StackedBRNN( input_size=high_p_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) und_q_word_size = 2 * (2 * hidden_size) self.und_cat_rnn = StackedBRNN( input_size=und_q_word_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) attention_inp_size = word_dim + 2 * (2 * hidden_size) if self.use_elmo: attention_inp_size += elmo_dim self.low_attention_layer = FullAttention( input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda, ) self.high_attention_layer = FullAttention( input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda, ) self.und_attention_layer = FullAttention( input_size=attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda, ) fuse_inp_size = 5 * (2 * hidden_size) self.fuse_rnn = StackedBRNN( input_size=fuse_inp_size, hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) self_attention_inp_size = (word_dim + pos_dim + ner_dim + 6 * (2 * hidden_size) + 1) if self.use_elmo: self_attention_inp_size += elmo_dim self.self_attention_layer = FullAttention( input_size=self_attention_inp_size, hidden_size=attention_size, dropout=dropout, use_cuda=self.use_cuda, ) self.self_rnn = StackedBRNN( input_size=2 * (2 * hidden_size), hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) if self.opts["multi_point"]: self.self_rnn_p = StackedBRNN( input_size=2 * (2 * hidden_size), hidden_size=hidden_size, num_layers=1, dropout=dropout, use_cuda=self.use_cuda, ) self.summ_layer = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) if self.opts["multi_point"]: self.summ_layer_p = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_cf = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_layer2 = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.summ_cf2 = Summ(input_size=2 * hidden_size, dropout=dropout, use_cuda=self.use_cuda) self.pointer_layer = PointerNet(input_size=2 * hidden_size, opt=self.opts, use_cuda=self.use_cuda) if self.opts["multi_point"]: self.pointer_layer_p = PointerNet(input_size=2 * hidden_size, opt=self.opts, use_cuda=self.use_cuda) if self.opts["check_answer"]: self.has_ans = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(6 * hidden_size, 2)) else: self.has_ans = nn.Sequential(nn.Dropout(p=dropout), nn.Linear(6 * hidden_size, 2))