def main(): char2idx = {} idx2char = [] vocabSize = 0 NN.EMBEDDINGDIM = 0 NN.NNUNITS = 0 firstSequenceToUse = "" NN.NHIDDENLAYERS = 0 NN.HIDDENLAYERS = [] NN.TEMPERATURE = 0 NN.ReadArgsForGenerating() with open('./NNTraining/' + NN.TRAINFILE, 'rb') as f: char2idx = pickle.load(f) idx2char = pickle.load(f) vocabSize = pickle.load(f) NN.EMBEDDINGDIM = pickle.load(f) NN.NNUNITS = pickle.load(f) firstSequenceToUse = pickle.load(f) NN.NHIDDENLAYERS = pickle.load(f) NN.HIDDENLAYERS = pickle.load(f) NN.TEMPERATURE = pickle.load(f) model = NN.BuildModel(vocabSize, NN.EMBEDDINGDIM, NN.NNUNITS, batchSize=1) # model.load_weights(tf.train.latest_checkpoint(checkpointDir)) model.load_weights('./NNTraining/cp.ckpt') model.build(tf.TensorShape([1, None])) model.summary() genText = NN.GenerateText(model, firstSequenceToUse, NN.WIDTH, char2idx, idx2char) NN.SaveFile(NN.OUTPUT, genText)
def main(): NN.ReadArgsForTrainning() print(NN.SEQLENGTH) if NN.DEPURATION: print("FILES: " + str(NN.NFILES)) for f in NN.FILE: print(" FILE: " + str(f)) print() print("SEQUENCE LENGHT: " + str(NN.SEQLENGTH)) print() print("BUFFER SIZE: " + str(NN.BUFFERSIZE)) print() print("EMBEDDING DIM: " + str(NN.EMBEDDINGDIM)) print() print("NN UNITS: " + str(NN.NNUNITS)) print() print("EPOCHS: " + str(NN.EPOCHS)) print() print("LAYERS: " + str(NN.NHIDDENLAYERS)) for l in NN.HIDDENLAYERS: print(" LAYER: " + str(l)) print() print("TEMPERATURE: " + str(NN.TEMPERATURE)) print() print("Generating neural network:") print() #Auxiliar variables to store the first sequence to generate text listDatasets = [] vocab = [] textstr = [] for f in NN.FILE: text = NN.ReadFile(f) v, tstr = NN.GetVocabulary(text) vocab += (v) textstr.append(tstr) vocab = set(vocab) char2idx, idx2char = NN.VectorizeText(vocab) #print(idx2char.size) textint = [] for t in textstr: textint.append(NN.np.array([char2idx[s] for s in t])) firstSeq = False firstSequenceToUse = "" datasets = [] examplesPerEpoch = 0 for t in textint: print(t) print() firstSequence, charDataset = NN.CreateTrainingSamples(t, idx2char) if not firstSeq: firstSequenceToUse = firstSequence firstSeq = True sequencesCreated = NN.CreateSequences(NN.SEQLENGTH, charDataset) dataset = sequencesCreated.map(NN.SplitInputTarget) dataset = dataset.shuffle(NN.BUFFERSIZE, False).batch(NN.GetExamplesPerEpoch( t, NN.SEQLENGTH), drop_remainder=True) examplesPerEpoch = NN.GetExamplesPerEpoch(t, NN.SEQLENGTH) datasets.append(dataset) # Length of the vocabulary in chars vocabSize = len(vocab) model = NN.BuildModel(vocabSize=vocabSize, embeddingDim=NN.EMBEDDINGDIM, nnUnits=NN.NNUNITS, batchSize=examplesPerEpoch) model.summary() model.compile(optimizer='adam', loss=NN.Loss) # Directory where the checkpoints will be saved checkpointDir = './NNTraining/cp.ckpt' # Name of the checkpoint files # checkpointPrefix = os.path.join(checkpointDir, "ckpt_{epoch}") checkpointCallback = NN.tf.keras.callbacks.ModelCheckpoint( filepath=checkpointDir, save_weights_only=True) for d in datasets: model.fit(d, epochs=NN.EPOCHS, callbacks=[checkpointCallback]) try: os.mkdir('./NNTraining/') except FileExistsError: pass trainFileName = time.strftime("%Y%m%d_%H%M%S") with open('./NNTraining/' + trainFileName + '_Training_NN.pkl', "wb") as f: pickle.dump(char2idx, f, pickle.HIGHEST_PROTOCOL) pickle.dump(idx2char, f, pickle.HIGHEST_PROTOCOL) pickle.dump(vocabSize, f, pickle.HIGHEST_PROTOCOL) pickle.dump(NN.EMBEDDINGDIM, f, pickle.HIGHEST_PROTOCOL) pickle.dump(NN.NNUNITS, f, pickle.HIGHEST_PROTOCOL) pickle.dump(firstSequenceToUse, f, pickle.HIGHEST_PROTOCOL) pickle.dump(NN.NHIDDENLAYERS, f, pickle.HIGHEST_PROTOCOL) pickle.dump(NN.HIDDENLAYERS, f, pickle.HIGHEST_PROTOCOL) pickle.dump(NN.TEMPERATURE, f, pickle.HIGHEST_PROTOCOL)