def forward(self,Y,h,c, outEncoder,teacher_force):# Y это кол-во символов умножить на 256 if (np.random.rand()>teacher_force): seq_len=Y.shape[0]-1 output_decoder= load_to_cuda(torch.autograd.Variable(torch.zeros(seq_len, h.shape[1], 48))) Y = self.embedding(Y) for i in range(len(Y)-1): # -1 так как sos не учитывем в criterion h[0],c[0] = self.lstm1(Y[i],(h[0].clone(),c[0].clone())) h[1],c[1] = self.lstm2(h[0].clone(),(h[1].clone(),c[1].clone())) h[2],c[2] = self.lstm3(h[1].clone(),(h[2].clone(),c[2].clone())) h2=h[2].clone() context = self.attention(h2, outEncoder,BATCH_SIZE) context = torch.bmm( context,outEncoder.view(outEncoder.shape[1],outEncoder.shape[0],-1) ) # print("context",context.shape) # torch sueeze output_decoder[i] = self.MLP(torch.cat( (h2,torch.squeeze(context,1)) ,1 )) else: seq_len=Y.shape[0]-1 output_decoder= load_to_cuda(torch.autograd.Variable(torch.zeros(seq_len, h.shape[1], 48))) alphabet = Alphabet() Y_cur = self.embedding( load_to_cuda(Variable(torch.LongTensor([alphabet.ch2index('<sos>')]))) ).view(1,self.hidden_size) for i in range(seq_len-1): Y_cur=Y_cur.expand(BATCH_SIZE,self.hidden_size) h[0],c[0] = self.lstm1(Y_cur,(h[0].clone(),c[0].clone())) h[1],c[1] = self.lstm2(h[0].clone(),(h[1].clone(),c[1].clone())) h[2],c[2] = self.lstm3(h[1].clone(),(h[2].clone(),c[2].clone())) h2 = h[2].clone() context = self.attention(h2, outEncoder,BATCH_SIZE) context = torch.bmm( context,outEncoder.view(outEncoder.shape[1],outEncoder.shape[0],-1) ) output_decoder[i] = self.MLP(torch.cat( (h2,torch.squeeze(context,1)) ,1 )) argmax = torch.max(output_decoder[i][0],dim=0) Y_cur=self.embedding( Variable(load_to_cuda(torch.LongTensor([argmax[1][0].data[0]]))) ).view(1,self.hidden_size) return output_decoder
def evaluate(frames,targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion): frames = frames.float() frames, targets = to_var(frames),to_var(targets) #print("eval frames",frames.shape) encoder_output, encoder_hidden = encoder(frames) encoder_output = torch.squeeze(encoder_output,1) decoder_output,word,ok = decoder.evaluate(encoder_hidden[0],encoder_hidden[1],encoder_output) if (ok==False): return 0.0 decoder_output = torch.squeeze(decoder_output,1) # print(targets.shape) # print(decoder_output.shape) decoder_output=Variable(decoder_output) targets=targets[:,1:]# убираем sos res_exp=get_word(targets[0][:len(targets[0])-1].data)#записываем без eos seq=load_to_cuda(torch.LongTensor(decoder_output.shape[0]-1)) # for i in range(len(seq)): # argmax = torch.max(decoder_output[i][0],dim=0) # # print(seq[i]) # # print(argmax[1][0].data) # seq[i]=argmax[1][0].data[0] # res=get_word(seq) print("exp:",res_exp) print("res:",word) # print("res:",res) with open('log2/result.txt', 'a') as f: f.write("res:"+word+'\t'+"exp:"+res_exp+'\n') # loss = get_loss(load_to_cuda(decoder_output), load_to_cuda(targets),criterion) # return loss.data[0] return -1.0
def completeNull(v,v_size,out_size):#v_size - размерность вектора, out_size - необходимая размерность newv = Variable(load_to_cuda(torch.LongTensor(out_size).zero_())) for i in range(v_size): newv[i]=v[i].clone() # j=v_size # while j<out_size: # newv[j]=alphabet.ch2index('null') # j+=1 return newv
def train(frames, targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,teacher_force): # print("targets",targets) frames = frames.float() frames, targets = to_var(frames),to_var(targets) decoder_optimizer.zero_grad() encoder_optimizer.zero_grad() encoder_output, encoder_hidden = encoder(frames) encoder_hidden[0]= encoder_hidden[0].view(encoder_hidden[0].shape[1],encoder_hidden[0].shape[0],-1) encoder_hidden[1]= encoder_hidden[1].view(encoder_hidden[1].shape[1],encoder_hidden[1].shape[0],-1) encoder_output=encoder_output.view(encoder_output.shape[1],encoder_output.shape[0],-1) decoder_output = decoder(targets.view(targets.shape[1],targets.shape[0]), encoder_hidden[0],encoder_hidden[1],encoder_output,teacher_force) decoder_output = load_to_cuda(torch.squeeze(decoder_output,1)) targets=targets[:,1:]# убираем sos loss = get_loss(load_to_cuda(torch.t(decoder_output)), load_to_cuda(targets),criterion) loss.backward() encoder_optimizer.step() decoder_optimizer.step() return loss.data[0]
def forward(self,x): first_dim=x.shape[0] second_dim=x.shape[1] # print("x",x.shape) # print("f",first_dim,"s",second_dim) x=x.view(x.shape[0]*x.shape[1],5,120,120) x = F.relu(self.conv1(x)) x = F.max_pool2d(x, kernel_size=(3, 3), stride=2, padding=1) x = self.batchNorm1(x) # 2 x = F.relu(self.conv2(x)) x = F.max_pool2d(x, kernel_size=(3, 3), stride=2, padding=1) x = self.batchNorm2(x) # 3 x = F.relu(self.conv3(x)) x = self.dropout1(x) # 4 x = F.relu(self.conv4(x)) x = self.dropout2(x) # 5 x = F.relu(self.conv5(x)) x = F.max_pool2d(x, kernel_size=(3, 3), stride=2, padding=1) # 6 x = x.view(x.shape[0],32768) x = self.fc6(x) # должна ли быть функция актвации для последнего слоя? x = x.view(first_dim,second_dim,512)# меняем местами first и second, так как в lstm первая это seq_len, вторая - batch h = load_to_cuda(Variable(torch.zeros(3,x.shape[0],256))) c = load_to_cuda(Variable(torch.zeros(3,x.shape[0],256))) self.lstm1.flatten_parameters() output, hidden = self.lstm1(x,(h,c)) #self.lstm1.flatten_parameters() hidden=list(hidden) hidden[0]=hidden[0].view(hidden[0].shape[1],hidden[0].shape[0],-1) hidden[1]=hidden[1].view(hidden[1].shape[1],hidden[1].shape[0],-1) return output,hidden
def __init__(self): super(DecoderRNN, self).__init__() # LSTM self.hidden_size=256 self.embedding = nn.Embedding(48, self.hidden_size) self.lstm1=nn.LSTMCell(self.hidden_size,self.hidden_size) self.lstm2=nn.LSTMCell(self.hidden_size,self.hidden_size) self.lstm3=nn.LSTMCell(self.hidden_size,self.hidden_size) # attention self.att_fc1=nn.Linear(self.hidden_size,self.hidden_size) self.att_fc2=nn.Linear(self.hidden_size,self.hidden_size) self.att_fc3=nn.Linear(self.hidden_size,self.hidden_size) self.att_vector = load_to_cuda(Variable(torch.randn(1,self.hidden_size),requires_grad=True)) self.att_W = load_to_cuda(Variable(torch.randn(self.hidden_size,self.hidden_size), requires_grad=True)) self.att_V = load_to_cuda(Variable(torch.randn(self.hidden_size,self.hidden_size), requires_grad=True)) self.att_b = load_to_cuda(Variable(torch.randn(self.hidden_size,1), requires_grad=True)) #MLP self.MLP_hidden_size=256 self.MLP_fc1 = nn.Linear(2*self.MLP_hidden_size,self.MLP_hidden_size) self.MLP_fc2 = nn.Linear(self.MLP_hidden_size,self.MLP_hidden_size) self.MLP_fc3=nn.Linear(self.MLP_hidden_size,48)
def evaluate(self,h,c,outEncoder,max_len=-1): # sos в return быть не должно # h = load_to_cuda(torch.squeeze(h0.clone(),0)) # c = load_to_cuda(torch.squeeze(c0.clone(),0)) h = h.view(h.shape[1],h.shape[0],-1).clone() c = c.view(c.shape[1],c.shape[0],-1).clone() if max_len==-1: seq_len = 50# максимальная длина else: seq_len=max_len result = load_to_cuda(torch.FloatTensor(seq_len,1,48).zero_()) if (len(outEncoder.shape))!=3: print("размерность encoderOut неправильная") return result, result[0], False alphabet = Alphabet() listArgmax=[]# буквы, которые выдал Y_cur = self.embedding( load_to_cuda(Variable(torch.LongTensor([alphabet.ch2index('<sos>')]))) ).view(1,self.hidden_size) for i in range(seq_len-1): h[0],c[0] = self.lstm1(Y_cur,(h[0].clone(),c[0].clone())) h[1],c[1] = self.lstm2(h[0],(h[1].clone(),c[1].clone())) h[2],c[2] = self.lstm3(h[1].clone(),(h[2].clone(),c[2].clone())) context = self.attention(h[2].clone(), outEncoder.view(outEncoder.shape[1],outEncoder.shape[0],-1),1) context = torch.bmm(context,outEncoder) char = self.MLP( torch.cat( (h[2].clone(),context.view(1,self.hidden_size)),1 ) ) result[i] = char.data argmax = torch.max(result[i][0],dim=0) listArgmax.append(argmax[1][0]) if argmax[1][0] == alphabet.ch2index('<eos>'): seq_len=i+1 break Y_cur=self.embedding( Variable(load_to_cuda(torch.LongTensor([argmax[1][0]]))) ).view(1,self.hidden_size) word=get_word(torch.LongTensor(listArgmax)) # print("res:",word) # with open('log2/result.txt', 'a') as f: # f.write("res:"+word+'\n') # print("res:",word) return result[:seq_len],word, True
def train_iters(encoder, decoder, num_epochs=NUM_EPOCHS, print_every=10, plot_every=10, learning_rate=LEARNING_RATE): print('ITERATIONS: {}, BATCH SIZE: {}, LEARNING RATE: {}' .format(num_epochs, COUNT_FRAMES, learning_rate)) print('====================================================================') start = time.time() plot_losses = [] total_train_loss = 0 plot_total_train_loss = 0 encoder_optimizer = optim.Adam(encoder.parameters(), lr=learning_rate) decoder_optimizer = optim.Adam(decoder.parameters(), lr=learning_rate) criterion = load_to_cuda(nn.CrossEntropyLoss(size_average=False)) # Загружаем данные - в итоге получаем объект типа torch.utils.data.Dataloader, train_data_loader = get_loader(FRAME_DIR_TRAIN) evaluate_data_loader = get_loader_evaluate(FRAME_DIR_TEST) words_amount = 0 total_test_loss=0.0 # frames_batch frames_batch = [] targets_batch = [] batch_count = 0 start_time=time.time() for epoch in range(1, num_epochs+1): count_words=0 for i, (frames, targets) in enumerate(train_data_loader): #print('train - frames: ', frames.shape) # print('train - targets: ', targets.shape) # print(frames) # print(is_valid[0]) # frames = torch.squeeze(frames, dim=0) # DataLoader почему-то прибавляет лишнее измерение # targets = torch.squeeze(targets, dim=0) # print(frames.shape) #targets_for_training = torch.LongTensor(targets.shape[0], 48).zero_() #for j in range(targets.shape[0]): # targets_for_training[j][targets[j]] = 1 # print('targets for training: ', targets_for_training.shape) #targets_for_training = targets_for_training.view(-1, 1, 48) # print('targets: ', targets.shape) # print('frames: ', frames.shape) if epoch<=300: teacher_force=0.0 else: if epoch<=200: teacher_force=0.15 else: teacher_force=0.3 if count_words%200==0: print("finish words:",count_words*BATCH_SIZE) loss = train(frames, targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion,teacher_force) total_train_loss += loss plot_total_train_loss += loss count_words+=1 writer.add_scalar('trainLoss', total_train_loss, epoch) with open('log2/totalTrain.txt', 'a') as f: f.write(str(total_train_loss)+'\n') with open('log2/totalTest.txt', 'a') as f: f.write(str(total_test_loss)+'\n') print('iteration: {}, loss: {}'.format(epoch, total_train_loss)) total_train_loss = 0 print("--- %s seconds ---" % (time.time() - start_time)) print("count_words:",count_words*BATCH_SIZE) # TODO: для testLoss в tensorboard вставить в нужное место эту строчку: # writer.add_scalar('testLoss', loss, epoch) # plot_losses.append(plot_total_train_loss/words_amount) # plot_total_train_loss = 0 # # frames_batch = [] # очищаем батч # targets_batch = [] # batch_count = 0 for i, (frames, targets, is_valid) in enumerate(evaluate_data_loader): if not is_valid[0]: continue # frames = torch.squeeze(frames, dim=0) # DataLoader почему-то прибавляет лишнее измерение # targets = torch.squeeze(targets, dim=0) test_loss = evaluate(frames,targets, encoder, decoder, encoder_optimizer, decoder_optimizer, criterion) # print("test_loss",test_loss) total_test_loss+=test_loss if i>100: break