def Test28(): Trans, BatchDatas, SrcDict, TgtDict, MaxLength = T.TrainBuildTransformer() for BatchInd, Batch in enumerate(BatchDatas): SrcSent = Batch["SrcSent"] SrcLength = Batch["SrcLength"] TgtSent = Batch["TgtSent"] TgtLength = Batch["TgtLength"] SrcMask = T.BatchLengthToBoolTensorMask(SrcLength, MaxLength) TgtMask = T.BatchLengthToBoolTensorMask(TgtLength, MaxLength) Output = Trans(SrcSent, TgtSent, SrcMask, TgtMask) print("Step") print(BatchInd + 1) print(Output.size()) print(Output[0][2])
def Do(self): for BatchInd, Batch in enumerate(self.BatchDatas): SrcSent = Batch["SrcSent"] SrcLength = Batch["SrcLength"] BatchSampleNum = SrcSent.size()[0] #print(type(BatchSampleNum)) CurrentBatchOuput = TranslateOutput( self.TgtDict, self.MaxLength).Init(BatchSampleNum) TgtIndexSent = CurrentBatchOuput.GetCurrentIndexTensor() SrcMask = Tr.BatchLengthToBoolTensorMask(SrcLength, self.MaxLength) for Step in range(self.MaxLength): TgtMask = self.GetTgtMask(Step, BatchSampleNum) if self.UseGPU: SrcSent=SrcSent.cuda() TgtMask=TgtMask.cuda() SrcMask=SrcMask.cuda() TgtMask=TgtMask.cuda() ProOutput = self.Model(SrcSent, TgtIndexSent, SrcMask, TgtMask) ProOutput=ProOutput.cpu() LocalMaxPro, Idx = self.PickWord(ProOutput, Step) CurrentBatchOuput.Add(Idx) if CurrentBatchOuput.AllFinish(): print("Appending Ouput of Batch "+str(BatchInd)) CurrentBatchOuput.ToFile(self.ResultPath) break TgtIndexSent = CurrentBatchOuput.GetCurrentIndexTensor()
def Test29(): Trans, BatchDatas, SrcDict, TgtDict, MaxLength = T.TestBuildTransformer() for BatchInd, Batch in enumerate(BatchDatas): SrcSent = Batch["SrcSent"] SrcLength = Batch["SrcLength"] SrcMask = T.BatchLengthToBoolTensorMask(SrcLength, MaxLength) #Output=Trans(SrcSent,TgtSent,SrcMask,TgtMask) print("Step") print(BatchInd + 1)
def Test26(): MaxLength = 30 BatchSize = 2 EmbeddingSize = 4 HeadNum = 2 EnLayer = 2 DeLayer = 2 SrcIndSentences, SrcLength, SrcDict = DL.LoadData("src.sents", "src.vocab", MaxLength) TgtIndSentences, TgtLength, TgtDict = DL.LoadData("tgt.sents", "tgt.vocab", MaxLength) TrainDataset = DL.TrainCorpusDataset(SrcIndSentences, SrcLength, TgtIndSentences, TgtLength) BatchDatas = DL.TrainDataLoaderCreator(TrainDataset, BatchSize) SrcVocabularySize = SrcDict.VocabularySize() TgtVocabularySize = TgtDict.VocabularySize() Trans = T.TransformerNMTModel(HeadNum, EmbeddingSize, SrcVocabularySize, TgtVocabularySize, MaxLength, EnLayer, DeLayer) for BatchInd, Batch in enumerate(BatchDatas): print("BegingBatch") SrcSent = Batch["SrcSent"] print(SrcSent.size()) SrcLength = Batch["SrcLength"] #print(SrcLength.size()) TgtSent = Batch["TgtSent"] print(TgtSent.size()) TgtLength = Batch["TgtLength"] #print(TgtLength.size()) SrcMask = T.BatchLengthToBoolTensorMask(SrcLength, MaxLength) TgtMask = T.BatchLengthToBoolTensorMask(TgtLength, MaxLength) Output = Trans(SrcSent, TgtSent, SrcMask, TgtMask) print("Step") print(BatchInd + 1) print(Output.size()) print(Output[0][2])
def Test24(): print(T.BatchLengthToBoolTensorMask([2, 3, 4], 6))