def TrainBuildTransformer(): with open("Model/Config.json") as Fd: ConfigDict = json.load(Fd) MaxLength = ConfigDict["MaxLength"] BatchSize = ConfigDict["BatchSize"] EmbeddingSize = ConfigDict["EmbeddingSize"] HeadNum = ConfigDict["HeadNum"] EnLayer = ConfigDict["EnLayer"] DeLayer = ConfigDict["DeLayer"] Dropout = ConfigDict["Dropout"] print("Loading Tgt vocab") TgtDict = DLoad.LoadVocabulary("Data/tgt.vocab") print("Tgt vocab Loading finished") print("Loading Src vocab") SrcDict = DLoad.LoadVocabulary("Data/src.vocab") print("Src vocab Loadinf finished") # SrcIndSentences, SrcLength, SrcDict = DLoad.LoadData( # "Data/src.sents", "Data/src.vocab", MaxLength) # TgtIndSentences, TgtLength, TgtDict = DLoad.LoadData( # "Data/tgt.sents", "Data/tgt.vocab", MaxLength) # TrainDataset = DLoad.TrainCorpusDataset( # SrcIndSentences, SrcLength, TgtIndSentences, TgtLength) #BatchDatas = DLoad.TrainDataLoaderCreator(TrainDataset, BatchSize) SrcVocabularySize = SrcDict.VocabularySize() TgtVocabularySize = TgtDict.VocabularySize() print("Building Model") Trans = TransformerNMTModel(HeadNum, EmbeddingSize, SrcVocabularySize, TgtVocabularySize, MaxLength, EnLayer, DeLayer, Dropout) print("Model building finished") # return Trans, BatchDatas, SrcDict, TgtDict, MaxLength, EmbeddingSize return Trans, BatchSize, SrcDict, TgtDict, MaxLength, EmbeddingSize
def Test16(): Sentences = DL.LoadSentences("src.sents") PaddedSentences, Length = DL.PaddingSentences(Sentences, 30) Dict = DL.LoadVocabulary("src.vocab") IndSentences = DL.ChangePaddedSentencesToInd(PaddedSentences, Dict) print(Dict.VocabularySize()) for Sent in IndSentences: print(Sent)
def Test31(): TgtDict = DL.LoadVocabulary("Model/tgt.vocab") Out = TT.TranslateOutput(TgtDict, 5).Init(4) print(Out.IndexSent) Out.Add([1, 2, 3, 4]) print(Out.IndexSent) Out.Add([2, 3, 4, 5]) print(Out.IndexSent) Out.Add([2, 3, 4, 5]) print(Out.AllFinish()) print(Out.IndexSent) Out.Add([2, 3, 4, 5]) print(Out.AllFinish()) Out.Add([2, 3, 4, 5]) Out.Add([2, 3, 4, 5]) print(Out.AllFinish()) print(Out.GetCurrentIndexTensor()) print(Out.IndexSent) print(Out.GetWordSent()) print(Out.ToFile("Output/predict"))
def TestBuildTransformer(): with open("Model/Config.json") as Fd: ConfigDict = json.load(Fd) MaxLength = ConfigDict["MaxLength"] BatchSize = ConfigDict["BatchSize"] EmbeddingSize = ConfigDict["EmbeddingSize"] HeadNum = ConfigDict["HeadNum"] EnLayer = ConfigDict["EnLayer"] DeLayer = ConfigDict["DeLayer"] SrcIndSentences, SrcLength, SrcDict = DLoad.LoadData( "Data/test.sents", "Data/src.vocab", MaxLength) TgtDict = DLoad.LoadVocabulary("Data/tgt.vocab") TestDataset = DLoad.TestCorpusDataset(SrcIndSentences, SrcLength) BatchDatas = DLoad.TestDataLoaderCreator(TestDataset, BatchSize) SrcVocabularySize = SrcDict.VocabularySize() TgtVocabularySize = TgtDict.VocabularySize() print("Building Model") Trans = TransformerNMTModel( HeadNum, EmbeddingSize, SrcVocabularySize, TgtVocabularySize, MaxLength, EnLayer, DeLayer) print("Model building finished") return Trans, BatchDatas, SrcDict, TgtDict, MaxLength
def Test13(): Dict = DL.LoadVocabulary("src.vocab") for i in range(80): print(Dict.GetWord(i))