def train(self,load_file='',num_epochs=10,chunks=1000,learning_rate=0.001,n_units=256,dropout=0.5): if self.vocab == []: self.create_vocab() if self.training_labels == []: train_df = self.load_train_df(load_file) else: train_df = pd.DataFrame({'sub_seqs':self.training_sequences,'sub_label':self.training_labels,'seq_length':self.training_seq_lengths}) train,test = self.split_train_test(train_df,0.99) print("Training Model with learning_rate = " + str(learning_rate) + " n_units = " + str(n_units) + " dropout = " +str(dropout)) self.model = rnn.train_model(train,test,self.vocab_size,self.sequence_chunk_size,num_epochs=num_epochs,chunks=chunks, learning_rate=learning_rate,n_units=n_units,dropout=dropout) return self.model
import tensorflow import urllib import zipfile from tensorflow import nottingham_util import rnn url = "http://www-etud.iro.umontreal.ca/~boulanni/Nottingham.zip" urllib.urlretrieve(url, "dataset.zip") zip = zipfile.ZipFile(r'dataset.zip') zip.extractall('data') nottingham_util.create_model() rnn.train_model()
import urllib import zipfile import nottingham_util import rnn zip = zipfile.ZipFile(r'dataset.zip') zip.extractall('data') # build the model nottingham_util.create_model() # train the model rnn.train_model()
# use this to train a model on Democratic tweets import rnn # change this to train on a different dataset training_name = "dem_tweets" # leave these alone data_folder = "data" models_folder = "models" new_tweets_folder = "tweets" data_file = "{}/{}.txt".format(data_folder, training_name) ckpt_file = "{}/{}/model.ckpt".format(models_folder, training_name) rnn.train_model(iterations=20000, data_file=data_file, ckpt_file=ckpt_file)