def model_structure(): """Train the model.""" if not TEXTFILE.exists(): text = '' for path in BUILDDIR.glob('*.epub'): book = open_book(path) lines = convert_epub_to_lines(book) for line in lines: soup = BeautifulSoup(line) text += soup.get_text() TEXTFILE.write_text(text) x, y, charidx = textfile_to_semi_redundant_sequences(TEXTFILE, seq_maxlen=SEQ_MAXLEN) g = tflearn.input_data([None, SEQ_MAXLEN, len(charidx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(charidx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) model = tflearn.SequenceGenerator(g, dictionary=charidx, seq_maxlen=SEQ_MAXLEN, clip_gradients=5.0, checkpoint_path=MODEL) return model, x, y, charidx
def initialize_model(self): char_idx_file = 'char_idx.pickle' maxlen = 25 char_idx = None if os.path.isfile(char_idx_file): print('Loading previous char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) X, Y, char_idx = textfile_to_semi_redundant_sequences( path, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx) g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.01) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_tweets') # Load the model m.load("model.tfl") self.__text_model = m
def buildModel(hp, idxs, layers): print("building tensorflow model...") net = tflearn.input_data(shape=[None, hp['seqLength'], len(idxs)]) print("added input layer") for layer in range(layers - 1): net = tflearn.lstm(net, hp['m'], return_seq=True) print("added lstm layer") net = tflearn.dropout(net, 0.3) print("added dropout") net = tflearn.lstm(net, hp['m']) print("added final lstm layer") net = tflearn.dropout(net, 0.3) net = tflearn.fully_connected(net, len(idxs), activation='softmax') print("added fully connected softmax") net = tflearn.regression(net, optimizer='adam', loss='categorical_crossentropy', learning_rate=hp['eta']) print("added ADAM optimized cross entropy loss") model = tflearn.SequenceGenerator(net, dictionary=idxs, seq_maxlen=hp['seqLength'], clip_gradients=5.0) print("created sequence generator") print("model built!") return model
def build_model(char_idx): logging.info('building model') model = tflearn.input_data([None, MAXLEN, len(char_idx)]) n_lstm_neurons = 512 dropout = 0.4 for _ in xrange(2): model = tflearn.lstm(model, n_lstm_neurons, return_seq=True) model = tflearn.dropout(model, dropout) for _ in xrange(1): model = tflearn.lstm(model, n_lstm_neurons) model = tflearn.dropout(model, dropout) model = tflearn.fully_connected(model, len(char_idx), activation='softmax') model = tflearn.regression(model, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) return tflearn.SequenceGenerator(model, dictionary=char_idx, seq_maxlen=MAXLEN, clip_gradients=5.0, checkpoint_path=CHECKPOINT_PATH)
def run(): # imagine cnn, the third dim is like the 'chnl' g = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='models/model_us_cities') for i in range(40): seed = random_sequence_from_textfile(path, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='us_cities') print("-- TESTING...") print("-- Test with temperature of 1.2 --") print(m.generate(30, temperature=1.2, seq_seed=seed)) print("-- Test with temperature of 1.0 --") print(m.generate(30, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(30, temperature=0.5, seq_seed=seed))
def test_sequencegenerator(self): with tf.Graph().as_default(): text = "123456789101234567891012345678910123456789101234567891012345678910" maxlen = 5 X, Y, char_idx = \ tflearn.data_utils.string_to_semi_redundant_sequences(text, seq_maxlen=maxlen, redun_step=3) g = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 32) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.1) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0) m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False) res = m.generate(10, temperature=.5, seq_seed="12345") #self.assertEqual(res, "123456789101234", "SequenceGenerator test failed! Generated sequence: " + res + " expected '123456789101234'") # Testing save method m.save("test_seqgen.tflearn") self.assertTrue(os.path.exists("test_seqgen.tflearn.index")) # Testing load method m.load("test_seqgen.tflearn") res = m.generate(10, temperature=.5, seq_seed="12345")
def train(self): char_idx = None if (os.path.isfile(self.charIDXFile)): # load previous character file char_idx = pickle.load(open(self.charIDXFile, 'rb')) X, Y, char_idx = textfile_to_semi_redundant_sequences( self.path, seq_maxlen=self.maxLength, redun_step=3) pickle.dump(char_idx, open(self.charIDXFile, 'wb')) self.g = tflearn.input_data([None, self.maxLength, len(char_idx)]) self.g = tflearn.lstm(self.g, 512, return_seq=True) self.g = tflearn.dropout(self.g, 0.5) self.g = tflearn.lstm(self.g, 512, return_seq=True) self.g = tflearn.dropout(self.g, 0.5) self.g = tflearn.lstm(self.g, 512) self.g = tflearn.dropout(self.g, 0.5) self.g = tflearn.fully_connected(self.g, len(char_idx), activation='softmax') self.g = tflearn.regression(self.g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) self.model = tflearn.SequenceGenerator(self.g, dictionary=char_idx, seq_maxlen=self.maxLength, max_checkpoints=0, checkpoint_path='model_trump')
def CharacterLSTM_Train(data, model, dictionary, history=25, layers=3, epochs=10, hidden_nodes=512, dropout=False): char_idx_file = dictionary maxlen = history char_idx = None ''' if os.path.isfile(char_idx_file): print('Loading previous char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) print("---------------") print(char_idx) print(len(char_idx)) ''' X, Y, char_idx = textfile_to_semi_redundant_sequences(data, seq_maxlen=maxlen, redun_step=3) pickle.dump(char_idx, open(dictionary, 'wb')) tf.reset_default_graph() print("layers " + str(layers) + " hidden " + str(hidden_nodes)) ''' g = tflearn.input_data([None, maxlen, len(char_idx)]) for n in range(layers-1): g = tflearn.lstm(g, hidden_nodes, return_seq=True) if dropout: g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, hidden_nodes) if dropout: g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) ''' g = buildModel(layers, hidden_nodes, maxlen, char_idx, dropout) m = tflearn.SequenceGenerator( g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0) #, checkpoint_path='model_history_gen') #if model is not None: # m.load(model) #for i in range(epochs): #seed = random_sequence_from_textfile(data, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=epochs, run_id='run_gen') print("Saving...") m.save(model)
def shakespeare(): path = "shakespeare_input.txt" #path = "shakespeare_input-100.txt" char_idx_file = 'char_idx.pickle' if not os.path.isfile(path): urllib.request.urlretrieve( "https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path) maxlen = 25 char_idx = None if os.path.isfile(char_idx_file): print('Loading previous char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) X, Y, char_idx = \ textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx) pickle.dump(char_idx, open(char_idx_file, 'wb')) g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_shakespeare') for i in range(50): seed = random_sequence_from_textfile(path, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='shakespeare') print("-- TESTING...") print("-- Test with temperature of 1.0 --") print(m.generate(600, temperature=1.0, seq_seed=seed)) #print(m.generate(10, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(600, temperature=0.5, seq_seed=seed))
def test_sequencegenerator_words(self): with tf.Graph().as_default(): text = ["hello", "world"] * 100 word_idx = {"hello": 0, "world": 1} maxlen = 2 vec = [x for x in map(word_idx.get, text) if x is not None] sequences = [] next_words = [] for i in range(0, len(vec) - maxlen, 3): sequences.append(vec[i:i + maxlen]) next_words.append(vec[i + maxlen]) X = np.zeros((len(sequences), maxlen, len(word_idx)), dtype=np.bool) Y = np.zeros((len(sequences), len(word_idx)), dtype=np.bool) for i, seq in enumerate(sequences): for t, idx in enumerate(seq): X[i, t, idx] = True Y[i, next_words[i]] = True g = tflearn.input_data(shape=[None, maxlen, len(word_idx)]) g = tflearn.lstm(g, 32) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(word_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.1) m = tflearn.SequenceGenerator(g, dictionary=word_idx, seq_maxlen=maxlen, clip_gradients=5.0) m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False) res = m.generate(4, temperature=.5, seq_seed=["hello", "world"]) res_str = " ".join(res[-2:]) self.assertEqual( res_str, "hello world", "SequenceGenerator (word level) test failed! Generated sequence: " + res_str + " expected 'hello world'") # Testing save method m.save("test_seqgen_word.tflearn") self.assertTrue(os.path.exists("test_seqgen_word.tflearn.index")) # Testing load method m.load("test_seqgen_word.tflearn") res = m.generate(4, temperature=.5, seq_seed=["hello", "world"]) res_str = " ".join(res[-2:]) self.assertEqual( res_str, "hello world", "Reloaded SequenceGenerator (word level) test failed! Generated sequence: " + res_str + " expected 'hello world'")
def __init__(self, char_idx, seq_max_len=25, checkpoint_path=None, **kwargs): self.init_params = kwargs self.char_idx = char_idx g = self._build_model(seq_max_len, len(char_idx), **kwargs) self.model = tflearn.SequenceGenerator( g, dictionary=char_idx, clip_gradients=5.0, checkpoint_path=checkpoint_path ) self.default_seed = kwargs['default_seed'] if 'default_seed' in kwargs else None
def shakespeare(): path = "shakespeare_input.txt" #path = "shakespeare_input-100.txt" char_idx_file = 'char_idx.pickle' if not os.path.isfile(path): urllib.request.urlretrieve( "https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path) maxlen = 25 char_idx = None if os.path.isfile(char_idx_file): print('Loading previous char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) X, Y, char_idx = \ textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx) pickle.dump(char_idx, open(char_idx_file, 'wb')) inputs = Input([None, maxlen, len(char_idx)]) g = LSTM(512, return_seq=True)(inputs) g = Dropout(0.5)(g) g = LSTM(512, return_seq=True)(g) g = Dropout(0.5)(g) g = LSTM(512)(g) g = Dropout(0.5)(g) predictions = Dense(len(char_idx), activation='softmax')(g) adam = optimizers.adam(lr=0.001) model = Model(inputs=inputs, outputs=predictions) model.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['accuracy']) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_shakespeare') for i in range(50): seed = random_sequence_from_textfile(path, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='shakespeare') print("-- TESTING...") print("-- Test with temperature of 1.0 --") print(m.generate(600, temperature=1.0, seq_seed=seed)) #print(m.generate(10, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(600, temperature=0.5, seq_seed=seed))
def generator_xss(): global char_idx global xss_data_file global maxlen if os.path.isfile(char_idx_file): print('Loading previous xxs_char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) X, Y, char_idx = \ textfile_to_semi_redundant_sequences(xss_data_file, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx) #pickle.dump(char_idx, open(char_idx_file, 'wb')) g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 32, return_seq=True) g = tflearn.dropout(g, 0.1) g = tflearn.lstm(g, 32, return_seq=True) g = tflearn.dropout(g, 0.1) g = tflearn.lstm(g, 32) g = tflearn.dropout(g, 0.1) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='chkpoint/model_scanner_poc') print "random_sequence_from_textfile" #seed = random_sequence_from_textfile(xss_data_file, maxlen) seed = '"/><script>' m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=2, run_id='scanner-poc') print("-- TESTING...") print("-- Test with temperature of 0.1 --") print(m.generate(32, temperature=0.1, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(32, temperature=0.5, seq_seed=seed)) print("-- Test with temperature of 1.0 --") print(m.generate(32, temperature=1.0, seq_seed=seed))
def main(): path = '../data/cityName/US_Cities.txt' maxlen = 20 string_utf8 = open(path, 'r').read() x, y, char_idx = string_to_semi_redundant_sequences(string_utf8, seq_maxlen=maxlen, redun_step=3) # string_utf8是输入的字符串,格式为“皇太极\n祖大寿\n倪哑巴\n胡桂南\n胡老三崔秋山\n黄真\n崔希敏\n黄二毛子\n曹化淳\n黄须人”,注意\n也是一个字符。 # seq_maxlen是生成的序列的长度,这里取20。 # redun_step是步长,就是每隔几个字取一次,这里取3。 g = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) # 设置损失和优化器 # 实例化基于RNN的序列生成器,并使用对应的字典 m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_us_cities') # 使用随机种子,通过RNN模型随机生成城市的名称 for i in range(40): # 建立生成序列的种子,随机的 seed = random_sequence_from_string(string_utf8, maxlen) # 填充数据进行训练 m.fit(x, y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='us_cities') print("-- TESTING...") print("-- Test with temperature of 1.2 --") # 调用模型进行数据生成 # temperature 新颖程度, 越小,自动生成的城市的名称越接近样本中的城市名称,越大越新鲜 # 0 表示就是样本数据 # generate(seq_length, temperature=0.5, seq_seed=None, display=False) print(m.generate(30, temperature=1.2, seq_seed=seed)) print("-- Test with temperature of 1.0 --") print(m.generate(30, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(30, temperature=0.5, seq_seed=seed))
def CharacterLSTM_Run(seed, dictionary, model, output, steps=600, layers=3, hidden_nodes=512, history=25, temperature=0.5, dropout=False): char_idx_file = dictionary maxlen = history char_idx = None if os.path.isfile(char_idx_file): print('Loading previous char_idx') char_idx = pickle.load(open(char_idx_file, 'rb')) tf.reset_default_graph() g = buildModel(layers, hidden_nodes, maxlen, char_idx, dropout) ''' g = tflearn.input_data([None, maxlen, len(char_idx)]) for n in range(layers-1): g = tflearn.lstm(g, hidden_nodes, return_seq=True) if dropout: g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, hidden_nodes) if dropout: g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) ''' m = tflearn.SequenceGenerator( g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0) #, checkpoint_path='model_history_gen') m.load(model) #seed = random_sequence_from_textfile(data, maxlen) print('seed=' + seed) print('len=' + str(len(seed))) result = m.generate(steps, temperature=temperature, seq_seed=seed[:history]) print(result) return result
def __init__(self, char_idx, seq_max_len=25, checkpoint_path=None, default_seed=None): g = tflearn.input_data([None, seq_max_len, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) self.model = tflearn.SequenceGenerator( g, dictionary=char_idx, clip_gradients=5.0, checkpoint_path=checkpoint_path ) self.default_seed = default_seed if default_seed else "life in the hood"
def build_model(maxlen, char_idx, checkpoint_path): g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) return tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path=checkpoint_path)
def train_piano_melody(right, sequence_length=8, temperature=1.0, epochs=5): tf.reset_default_graph() piano_melody = [] for sequence in right: piano_melody.extend(sequence) train_melody_X, train_melody_Y, melody_dict = create_train_sequence( piano_melody) melody_seed = random_sample_note_sequence(piano_melody, sequence_length) #melody_seed = [45, 46, 48, 49] print('Training melody...') melody_trainer = tflearn.input_data( [None, sequence_length, len(melody_dict)]) melody_trainer = tflearn.lstm(melody_trainer, 256, return_seq=True) melody_trainer = tflearn.dropout(melody_trainer, 0.5) melody_trainer = tflearn.lstm(melody_trainer, 256, return_seq=True) melody_trainer = tflearn.dropout(melody_trainer, 0.5) melody_trainer = tflearn.lstm(melody_trainer, 256) melody_trainer = tflearn.dropout(melody_trainer, 0.5) melody_trainer = tflearn.fully_connected(melody_trainer, len(melody_dict), activation='softmax') melody_trainer = tflearn.regression(melody_trainer, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) melody_generator = tflearn.SequenceGenerator(melody_trainer, dictionary=melody_dict, seq_maxlen=sequence_length, clip_gradients=5.0) melody_generator.fit(train_melody_X, train_melody_Y, validation_set=0.1, batch_size=32, n_epoch=epochs, run_id='melody') return melody_generator.generate(100, temperature=temperature, seq_seed=melody_seed)
def train_piano_accompany(left, sequence_length=8, temperature=1.0, epochs=5): tf.reset_default_graph() piano_accompany = [] for sequence in left: piano_accompany.extend(sequence) train_accompany_X, train_accompany_Y, accompany_dict = create_train_sequence( piano_accompany) accompany_seed = random_sample_note_sequence(piano_accompany, sequence_length) print('Training accompany...') accompany_trainer = tflearn.input_data( [None, sequence_length, len(accompany_dict)]) accompany_trainer = tflearn.lstm(accompany_trainer, 256, return_seq=True) accompany_trainer = tflearn.dropout(accompany_trainer, 0.5) accompany_trainer = tflearn.lstm(accompany_trainer, 256, return_seq=True) accompany_trainer = tflearn.dropout(accompany_trainer, 0.5) accompany_trainer = tflearn.lstm(accompany_trainer, 256) accompany_trainer = tflearn.dropout(accompany_trainer, 0.5) accompany_trainer = tflearn.fully_connected(accompany_trainer, len(accompany_dict), activation='softmax') accompany_trainer = tflearn.regression(accompany_trainer, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) accompany_generator = tflearn.SequenceGenerator(accompany_trainer, dictionary=accompany_dict, seq_maxlen=sequence_length, clip_gradients=5.0) accompany_generator.fit(train_accompany_X, train_accompany_Y, validation_set=0.1, batch_size=32, n_epoch=epochs, run_id='accompany') return accompany_generator.generate(100, temperature=temperature, seq_seed=accompany_seed)
def replicator(char_idx): g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, internal_size, return_seq=True) g = tflearn.dropout(g, dropout) g = tflearn.lstm(g, internal_size, return_seq=True) g = tflearn.dropout(g, dropout) g = tflearn.lstm(g, internal_size) g = tflearn.dropout(g, dropout) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.05) rep = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, tensorboard_verbose=3, tensorboard_dir='logs', checkpoint_path='tweeterReplicator') return rep
def createNetwork(max_len, char_dict, save_load_point): g = tflearn.input_data([None, max_len, len(char_dict)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_dict), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tflearn.SequenceGenerator(g, dictionary=char_dict, seq_maxlen=max_len, clip_gradients=5.0, max_checkpoints=5, checkpoint_path=save_load_point) return m
def typeSubtypeNameGeneratorModel(maxLength, charIndex, checkpoint_path='./generator_checkpoints/'): ''' Recurrent network model for generating card types, subtypes, and names Inputs: maxLength: the maximum length for a generated sequence charIndex: map from chars to the index they represent in a onehot encoding checkpoint_path: path to save model after every epoch ('./generator_checkpoints/') ''' network = input_data(shape=[None, maxLength, len(charIndex)]) network = lstm(network, 512, return_seq=True) network = dropout(network, 0.5) network = lstm(network, 512, return_seq=True) network = dropout(network, 0.5) network = lstm(network, 512) network = dropout(network, 0.5) network = fully_connected(network, len(charIndex), activation='softmax') network = regression(network, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) model = tflearn.SequenceGenerator(network, tensorboard_verbose=0, dictionary=charIndex, seq_maxlen=maxLength, clip_gradients=5.0, checkpoint_path='./generator_checkpoints/') return model
def gen_model(self): char_idx_file = 'char_idx_xss.pkl' maxlen = 25 char_idx = None xss_data_file = "xss-2000.txt" model = None try: self.X, self.Y, char_idx = \ textfile_to_semi_redundant_sequences(xss_data_file, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx) #pickle.dump(char_idx, open(char_idx_file, 'wb')) g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 32, return_seq=True) g = tflearn.dropout(g, 0.1) g = tflearn.lstm(g, 32, return_seq=True) g = tflearn.dropout(g, 0.1) g = tflearn.lstm(g, 32) g = tflearn.dropout(g, 0.1) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) model = tflearn.SequenceGenerator( g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_scanner_poc') except: traceback.print_exc() finally: return model
activation='softmax') #Creating the output layer using softmax lstm = tflearn.regression( lstm, optimizer='adam', loss= 'categorical_crossentropy', #Creating the regression for the nodes created learning_rate=0.001) '''-------------------------------------------------------------------------------------------------''' '''---------------------------------------------------------------------------------------------------- Creating The Learning Process Of The Neural Network ----------------------------------------------------------------------------------------------------''' city_gen = tflearn.SequenceGenerator( lstm, dictionary= char_dic, #Generating the new city names by using a sequence generator function of tensor flow seq_maxlen=maxlength, clip_gradients=5.0, checkpoint_path='model_us_cities') '''-------------------------------------------------------------------------------------------------''' '''---------------------------------------------------------------------------------------------------- Training The Neural Network For City Generation ----------------------------------------------------------------------------------------------------''' for i in range(40): #A loop which runs the training for 40 times seed = random_sequence_from_textfile( path, maxlength ) #A random city name is used from the file and assigned to seed variable city_gen.fit( X, Y,
g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_shakespeare') for i in range(50): seed = random_sequence_from_textfile(path, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='shakespeare') print("-- TESTING...") print("-- Test with temperature of 1.0 --") print(m.generate(600, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --")
import tflearn as tfl import tensorflow as tf import encoding import os flags = tf.flags logging = tf.logging FILE_PATH= os.getcwd()+'/input/' SAVE_PATH= os.getcwd() INPUT_SIZE = 13 flags.DEFINE_string("model", "small", "A type of model. Possible options are: small, medium, large.") flags.DEFINE_string("data_path", FILE_PATH, "Where the training/test data is stored.") flags.DEFINE_string("save_path", SAVE_PATH, "Model output directory.") flags.DEFINE_bool("use_fp16", False, "Train using 16-bit floats instead of 32bit floats") FLAGS = flags.FLAGS raw_data = encoding.input_data(FLAGS.data_path) train_data, valid_data, test_data, vocabulary = raw_data input_data, targets = encoding.input_producer(train_data, None, None, name=None) g = tfl.input_data(shape=[None, 10, INPUT_SIZE]) g = tfl.lstm(g, 512) g = tfl.dropout(g, 0.5) g = tfl.fully_connected(g, INPUT_SIZE, activation='softmax') g = tfl.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) m = tfl.SequenceGenerator(g) m.fit(input_data, targets)
print("Setting up network") g = tflearn.input_data([None, maxlen, len(char_idx)]) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512, return_seq=True) g = tflearn.dropout(g, 0.5) g = tflearn.lstm(g, 512) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) print("Network Complete. Training...") m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='./checkpoints/sayton') for i in range(50): seed = random_sequence_from_textfile(path, maxlen) m.fit(X, Y, validation_set=0.1, batch_size=128, n_epoch=1, run_id='shakespeare') print("-- TESTING...") print("-- Test with temperature of 1.0 --") print(m.generate(600, temperature=1.0, seq_seed=seed)) print("-- Test with temperature of 0.5 --") print(m.generate(600, temperature=0.5, seq_seed=seed))
def main(): path = FLAGS.dataset # We avoid using fixed padding and simply calculate the max lenght of our input set. if FLAGS.max_sequence_lenght < 1: maxlen = find_maxlenght(path) else: maxlen = FLAGS.max_sequence_lenght print("MaxLen = ", maxlen) X, Y, char_idx = textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3) # Here we define our network structure, using common used values for node dimensions and dropout # Input Layer g = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) # Create our hidden LSTM Layers from parameters for i in range(FLAGS.hidden_layer_size): g = tflearn.lstm(g, FLAGS.lstm_node_size, return_seq=True) g = tflearn.dropout(g, 0.5) # Finally our last lstm layer and a fully_connected with softmax activation for the output g = tflearn.lstm(g, FLAGS.lstm_node_size) g = tflearn.dropout(g, 0.5) g = tflearn.fully_connected(g, len(char_idx), activation='softmax') # Let's not forget our regression! g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) # wrap it up in a sequence generator m = tflearn.SequenceGenerator(g, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path='model_' + os.path.basename(path)) train = True if os.path.exists(FLAGS.model_file): # Load our pre-train model from file print("Loading model from file ", FLAGS.model_file) load_model(m) train = False # Let's train it if train: print("Training model...") m.fit(X, Y, validation_set=0.1, batch_size=FLAGS.batch_size, n_epoch=FLAGS.epochs, run_id=os.path.basename(path)) # save our results print("Saving trained model to file ", FLAGS.model_file) save_model(m) # Generate a test result generate(m, maxlen) # Interactive Session: try: import readline temp = 1.0 while temp > 0.0: temp = float(raw_input('Insert temperature for generation: ')) FLAGS.temperature = temp generate(m, maxlen) except EOFError: print("Bye!") return
# Create LSTM model model = tflearn.input_data(shape=[None, maxlen, len(char_idx)]) model = tflearn.lstm(model, 512, return_seq=True) model = tflearn.dropout(model, 0.5) model = tflearn.lstm(model, 512) model = tflearn.dropout(model, 0.5) model = tflearn.fully_connected(model, len(char_idx), activation="softmax") model = tflearn.regression(model, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001) # Generate city names model = tflearn.SequenceGenerator(model, dictionary=char_idx, seq_maxlen=maxlen, clip_gradients=5.0, checkpoint_path="model_us_cities") # training for i in range(40): seed = random_sequence_from_textfile(data_path, maxlen) model.fit(X, Y, validation_set=0.2, batch_size=128, n_epoch=1) print("Testing 0.5:", model.generate(30, temperature=0.5,seq_seed=seed)) print("Testing 1.0:", model.generate(30, temperature=1.0,seq_seed=seed)) print("Testing 1.2:", model.generate(30, temperature=1.2,seq_seed=seed))
# # 3:王 # # 4:\n # print(char_idx)#9个元素的字典 # 构建网络 g=tflearn.input_data(shape=[None,maxlen,len(char_idx)]) g=tflearn.layers.recurrent.lstm(g,512,return_seq=True,name='g1') g=tflearn.dropout(g,0.5,name='d1') g=tflearn.layers.recurrent.lstm(g,512,name='g2') g=tflearn.dropout(g,0.5,name='d2') # 全连接 g=tflearn.fully_connected(g,len(char_idx),activation='softmax') g=tflearn.regression(g,optimizer='adam',loss='categorical_crossentropy',learning_rate=0.001)# 设置损失和优化器 # 序列生成的深层神经网络模型 # clip_gradients 梯度 m=tflearn.SequenceGenerator(g,dictionary=char_idx,seq_maxlen=maxlen,clip_gradients=5.0,checkpoint_path='./model/lstm_gen/model',tensorboard_dir='./logs') # 循环遍历,进行序列生成 for i in range(40): # 建立生成序列的种子,随机的 seed=list(tflearn.data_utils.random_sequence_from_string(string_utf8,maxlen)) # 填充数据进行训练 m.fit(X,Y,validation_set=0.1,batch_size=1024,n_epoch=1,run_id='us_cities') # 调用模型进行数据生成 # temperature 新颖程度 # 0 表示就是样本数据 print(''.join(m.generate(seq_length=30,temperature=1.5,seq_seed=seed))) print(''.join(m.generate(seq_length=30, temperature=1., seq_seed=seed))) print(''.join(m.generate(seq_length=30, temperature=.5, seq_seed=seed)))