示例#1
0
def model_structure():
    """Train the model."""

    if not TEXTFILE.exists():
        text = ''
        for path in BUILDDIR.glob('*.epub'):
            book = open_book(path)
            lines = convert_epub_to_lines(book)
            for line in lines:
                soup = BeautifulSoup(line)
                text += soup.get_text()
        TEXTFILE.write_text(text)

    x, y, charidx = textfile_to_semi_redundant_sequences(TEXTFILE,
                                                         seq_maxlen=SEQ_MAXLEN)

    g = tflearn.input_data([None, SEQ_MAXLEN, len(charidx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(charidx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)
    model = tflearn.SequenceGenerator(g,
                                      dictionary=charidx,
                                      seq_maxlen=SEQ_MAXLEN,
                                      clip_gradients=5.0,
                                      checkpoint_path=MODEL)

    return model, x, y, charidx
示例#2
0
    def initialize_model(self):
        char_idx_file = 'char_idx.pickle'
        maxlen = 25

        char_idx = None
        if os.path.isfile(char_idx_file):
            print('Loading previous char_idx')
            char_idx = pickle.load(open(char_idx_file, 'rb'))

        X, Y, char_idx = textfile_to_semi_redundant_sequences(
            path, seq_maxlen=maxlen, redun_step=3, pre_defined_char_idx=char_idx)

        g = tflearn.input_data([None, maxlen, len(char_idx)])
        g = tflearn.lstm(g, 512, return_seq=True)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.lstm(g, 512, return_seq=True)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.lstm(g, 512)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
        g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                               learning_rate=0.01)

        m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                      seq_maxlen=maxlen,
                                      clip_gradients=5.0,
                                      checkpoint_path='model_tweets')
        # Load the model
        m.load("model.tfl")
        self.__text_model = m
示例#3
0
def buildModel(hp, idxs, layers):
    print("building tensorflow model...")
    net = tflearn.input_data(shape=[None, hp['seqLength'], len(idxs)])
    print("added input layer")
    for layer in range(layers - 1):
        net = tflearn.lstm(net, hp['m'], return_seq=True)
        print("added lstm layer")
        net = tflearn.dropout(net, 0.3)
        print("added dropout")
    net = tflearn.lstm(net, hp['m'])
    print("added final lstm layer")
    net = tflearn.dropout(net, 0.3)
    net = tflearn.fully_connected(net, len(idxs), activation='softmax')
    print("added fully connected softmax")
    net = tflearn.regression(net,
                             optimizer='adam',
                             loss='categorical_crossentropy',
                             learning_rate=hp['eta'])
    print("added ADAM optimized cross entropy loss")
    model = tflearn.SequenceGenerator(net,
                                      dictionary=idxs,
                                      seq_maxlen=hp['seqLength'],
                                      clip_gradients=5.0)
    print("created sequence generator")
    print("model built!")
    return model
示例#4
0
def build_model(char_idx):
    logging.info('building model')
    model = tflearn.input_data([None, MAXLEN, len(char_idx)])

    n_lstm_neurons = 512
    dropout = 0.4
    for _ in xrange(2):
        model = tflearn.lstm(model, n_lstm_neurons, return_seq=True)
        model = tflearn.dropout(model, dropout)

    for _ in xrange(1):
        model = tflearn.lstm(model, n_lstm_neurons)
        model = tflearn.dropout(model, dropout)

    model = tflearn.fully_connected(model, len(char_idx), activation='softmax')
    model = tflearn.regression(model,
                               optimizer='adam',
                               loss='categorical_crossentropy',
                               learning_rate=0.001)

    return tflearn.SequenceGenerator(model,
                                     dictionary=char_idx,
                                     seq_maxlen=MAXLEN,
                                     clip_gradients=5.0,
                                     checkpoint_path=CHECKPOINT_PATH)
示例#5
0
def run():
    # imagine cnn, the third dim is like the 'chnl'
    g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g, optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='models/model_us_cities')

    for i in range(40):
        seed = random_sequence_from_textfile(path, maxlen)
        m.fit(X, Y, validation_set=0.1, batch_size=128,
              n_epoch=1, run_id='us_cities')
        print("-- TESTING...")
        print("-- Test with temperature of 1.2 --")
        print(m.generate(30, temperature=1.2, seq_seed=seed))
        print("-- Test with temperature of 1.0 --")
        print(m.generate(30, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(30, temperature=0.5, seq_seed=seed))
示例#6
0
    def test_sequencegenerator(self):

        with tf.Graph().as_default():
            text = "123456789101234567891012345678910123456789101234567891012345678910"
            maxlen = 5

            X, Y, char_idx = \
                tflearn.data_utils.string_to_semi_redundant_sequences(text, seq_maxlen=maxlen, redun_step=3)

            g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
            g = tflearn.lstm(g, 32)
            g = tflearn.dropout(g, 0.5)
            g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
            g = tflearn.regression(g,
                                   optimizer='adam',
                                   loss='categorical_crossentropy',
                                   learning_rate=0.1)

            m = tflearn.SequenceGenerator(g,
                                          dictionary=char_idx,
                                          seq_maxlen=maxlen,
                                          clip_gradients=5.0)
            m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False)
            res = m.generate(10, temperature=.5, seq_seed="12345")
            #self.assertEqual(res, "123456789101234", "SequenceGenerator test failed! Generated sequence: " + res + " expected '123456789101234'")

            # Testing save method
            m.save("test_seqgen.tflearn")
            self.assertTrue(os.path.exists("test_seqgen.tflearn.index"))

            # Testing load method
            m.load("test_seqgen.tflearn")
            res = m.generate(10, temperature=.5, seq_seed="12345")
示例#7
0
    def train(self):

        char_idx = None

        if (os.path.isfile(self.charIDXFile)):
            # load previous character file
            char_idx = pickle.load(open(self.charIDXFile, 'rb'))

        X, Y, char_idx = textfile_to_semi_redundant_sequences(
            self.path, seq_maxlen=self.maxLength, redun_step=3)

        pickle.dump(char_idx, open(self.charIDXFile, 'wb'))

        self.g = tflearn.input_data([None, self.maxLength,
                                     len(char_idx)])
        self.g = tflearn.lstm(self.g, 512, return_seq=True)
        self.g = tflearn.dropout(self.g, 0.5)
        self.g = tflearn.lstm(self.g, 512, return_seq=True)
        self.g = tflearn.dropout(self.g, 0.5)
        self.g = tflearn.lstm(self.g, 512)
        self.g = tflearn.dropout(self.g, 0.5)
        self.g = tflearn.fully_connected(self.g,
                                         len(char_idx),
                                         activation='softmax')
        self.g = tflearn.regression(self.g,
                                    optimizer='adam',
                                    loss='categorical_crossentropy',
                                    learning_rate=0.001)
        self.model = tflearn.SequenceGenerator(self.g,
                                               dictionary=char_idx,
                                               seq_maxlen=self.maxLength,
                                               max_checkpoints=0,
                                               checkpoint_path='model_trump')
示例#8
0
def CharacterLSTM_Train(data,
                        model,
                        dictionary,
                        history=25,
                        layers=3,
                        epochs=10,
                        hidden_nodes=512,
                        dropout=False):
    char_idx_file = dictionary
    maxlen = history

    char_idx = None
    '''
	if os.path.isfile(char_idx_file):
		print('Loading previous char_idx')
		char_idx = pickle.load(open(char_idx_file, 'rb'))
	print("---------------")
	print(char_idx)
	print(len(char_idx))
	'''

    X, Y, char_idx = textfile_to_semi_redundant_sequences(data,
                                                          seq_maxlen=maxlen,
                                                          redun_step=3)

    pickle.dump(char_idx, open(dictionary, 'wb'))

    tf.reset_default_graph()
    print("layers " + str(layers) + " hidden " + str(hidden_nodes))
    '''
	g = tflearn.input_data([None, maxlen, len(char_idx)])
	for n in range(layers-1):
		g = tflearn.lstm(g, hidden_nodes, return_seq=True)
		if dropout:
			g = tflearn.dropout(g, 0.5)
	g = tflearn.lstm(g, hidden_nodes)
	if dropout:
		g = tflearn.dropout(g, 0.5)
	g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
	g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)
	'''
    g = buildModel(layers, hidden_nodes, maxlen, char_idx, dropout)
    m = tflearn.SequenceGenerator(
        g, dictionary=char_idx, seq_maxlen=maxlen,
        clip_gradients=5.0)  #, checkpoint_path='model_history_gen')

    #if model is not None:
    #	m.load(model)

    #for i in range(epochs):
    #seed = random_sequence_from_textfile(data, maxlen)
    m.fit(X,
          Y,
          validation_set=0.1,
          batch_size=128,
          n_epoch=epochs,
          run_id='run_gen')
    print("Saving...")
    m.save(model)
示例#9
0
def shakespeare():

    path = "shakespeare_input.txt"
    #path = "shakespeare_input-100.txt"
    char_idx_file = 'char_idx.pickle'

    if not os.path.isfile(path):
        urllib.request.urlretrieve(
            "https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt",
            path)

    maxlen = 25

    char_idx = None
    if os.path.isfile(char_idx_file):
        print('Loading previous char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))

    X, Y, char_idx = \
        textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,
                                             pre_defined_char_idx=char_idx)

    pickle.dump(char_idx, open(char_idx_file, 'wb'))

    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_shakespeare')

    for i in range(50):
        seed = random_sequence_from_textfile(path, maxlen)
        m.fit(X,
              Y,
              validation_set=0.1,
              batch_size=128,
              n_epoch=1,
              run_id='shakespeare')
        print("-- TESTING...")
        print("-- Test with temperature of 1.0 --")
        print(m.generate(600, temperature=1.0, seq_seed=seed))
        #print(m.generate(10, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(600, temperature=0.5, seq_seed=seed))
示例#10
0
    def test_sequencegenerator_words(self):

        with tf.Graph().as_default():
            text = ["hello", "world"] * 100
            word_idx = {"hello": 0, "world": 1}
            maxlen = 2

            vec = [x for x in map(word_idx.get, text) if x is not None]

            sequences = []
            next_words = []
            for i in range(0, len(vec) - maxlen, 3):
                sequences.append(vec[i:i + maxlen])
                next_words.append(vec[i + maxlen])

            X = np.zeros((len(sequences), maxlen, len(word_idx)),
                         dtype=np.bool)
            Y = np.zeros((len(sequences), len(word_idx)), dtype=np.bool)
            for i, seq in enumerate(sequences):
                for t, idx in enumerate(seq):
                    X[i, t, idx] = True
                    Y[i, next_words[i]] = True

            g = tflearn.input_data(shape=[None, maxlen, len(word_idx)])
            g = tflearn.lstm(g, 32)
            g = tflearn.dropout(g, 0.5)
            g = tflearn.fully_connected(g, len(word_idx), activation='softmax')
            g = tflearn.regression(g,
                                   optimizer='adam',
                                   loss='categorical_crossentropy',
                                   learning_rate=0.1)

            m = tflearn.SequenceGenerator(g,
                                          dictionary=word_idx,
                                          seq_maxlen=maxlen,
                                          clip_gradients=5.0)
            m.fit(X, Y, validation_set=0.1, n_epoch=100, snapshot_epoch=False)
            res = m.generate(4, temperature=.5, seq_seed=["hello", "world"])
            res_str = " ".join(res[-2:])
            self.assertEqual(
                res_str, "hello world",
                "SequenceGenerator (word level) test failed! Generated sequence: "
                + res_str + " expected 'hello world'")

            # Testing save method
            m.save("test_seqgen_word.tflearn")
            self.assertTrue(os.path.exists("test_seqgen_word.tflearn.index"))

            # Testing load method
            m.load("test_seqgen_word.tflearn")
            res = m.generate(4, temperature=.5, seq_seed=["hello", "world"])
            res_str = " ".join(res[-2:])
            self.assertEqual(
                res_str, "hello world",
                "Reloaded SequenceGenerator (word level) test failed! Generated sequence: "
                + res_str + " expected 'hello world'")
示例#11
0
    def __init__(self, char_idx, seq_max_len=25, checkpoint_path=None, **kwargs):
        self.init_params = kwargs
        self.char_idx = char_idx

        g = self._build_model(seq_max_len, len(char_idx), **kwargs)
        self.model = tflearn.SequenceGenerator(
            g, dictionary=char_idx,
            clip_gradients=5.0,
            checkpoint_path=checkpoint_path
        )
        self.default_seed = kwargs['default_seed'] if 'default_seed' in kwargs else None
示例#12
0
文件: rnn.py 项目: Renwoxin/webDL
def shakespeare():


    path = "shakespeare_input.txt"
    #path = "shakespeare_input-100.txt"
    char_idx_file = 'char_idx.pickle'

    if not os.path.isfile(path):
        urllib.request.urlretrieve(
            "https://raw.githubusercontent.com/tflearn/tflearn.github.io/master/resources/shakespeare_input.txt", path)

    maxlen = 25

    char_idx = None
    if os.path.isfile(char_idx_file):
        print('Loading previous char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))

    X, Y, char_idx = \
        textfile_to_semi_redundant_sequences(path, seq_maxlen=maxlen, redun_step=3,
                                             pre_defined_char_idx=char_idx)

    pickle.dump(char_idx, open(char_idx_file, 'wb'))

    inputs = Input([None, maxlen, len(char_idx)])
    g = LSTM(512, return_seq=True)(inputs)
    g = Dropout(0.5)(g)
    g = LSTM(512, return_seq=True)(g)
    g = Dropout(0.5)(g)
    g = LSTM(512)(g)
    g = Dropout(0.5)(g)
    predictions = Dense(len(char_idx), activation='softmax')(g)

    adam = optimizers.adam(lr=0.001)
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer=adam,
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_shakespeare')

    for i in range(50):
        seed = random_sequence_from_textfile(path, maxlen)
        m.fit(X, Y, validation_set=0.1, batch_size=128,
              n_epoch=1, run_id='shakespeare')
        print("-- TESTING...")
        print("-- Test with temperature of 1.0 --")
        print(m.generate(600, temperature=1.0, seq_seed=seed))
        #print(m.generate(10, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(600, temperature=0.5, seq_seed=seed))
示例#13
0
def generator_xss():
    global char_idx
    global xss_data_file
    global maxlen

    if os.path.isfile(char_idx_file):
        print('Loading previous xxs_char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))


    X, Y, char_idx = \
        textfile_to_semi_redundant_sequences(xss_data_file, seq_maxlen=maxlen, redun_step=3,
                                             pre_defined_char_idx=char_idx)

    #pickle.dump(char_idx, open(char_idx_file, 'wb'))

    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 32, return_seq=True)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.lstm(g, 32, return_seq=True)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.lstm(g, 32)
    g = tflearn.dropout(g, 0.1)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='chkpoint/model_scanner_poc')

    print "random_sequence_from_textfile"
    #seed = random_sequence_from_textfile(xss_data_file, maxlen)
    seed = '"/><script>'
    m.fit(X,
          Y,
          validation_set=0.1,
          batch_size=128,
          n_epoch=2,
          run_id='scanner-poc')
    print("-- TESTING...")

    print("-- Test with temperature of 0.1 --")
    print(m.generate(32, temperature=0.1, seq_seed=seed))
    print("-- Test with temperature of 0.5 --")
    print(m.generate(32, temperature=0.5, seq_seed=seed))
    print("-- Test with temperature of 1.0 --")
    print(m.generate(32, temperature=1.0, seq_seed=seed))
def main():
    path = '../data/cityName/US_Cities.txt'
    maxlen = 20
    string_utf8 = open(path, 'r').read()
    x, y, char_idx = string_to_semi_redundant_sequences(string_utf8,
                                                        seq_maxlen=maxlen,
                                                        redun_step=3)

    # string_utf8是输入的字符串,格式为“皇太极\n祖大寿\n倪哑巴\n胡桂南\n胡老三崔秋山\n黄真\n崔希敏\n黄二毛子\n曹化淳\n黄须人”,注意\n也是一个字符。
    # seq_maxlen是生成的序列的长度,这里取20。
    # redun_step是步长,就是每隔几个字取一次,这里取3。
    g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)  # 设置损失和优化器
    # 实例化基于RNN的序列生成器,并使用对应的字典
    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_us_cities')
    # 使用随机种子,通过RNN模型随机生成城市的名称
    for i in range(40):
        # 建立生成序列的种子,随机的
        seed = random_sequence_from_string(string_utf8, maxlen)
        # 填充数据进行训练
        m.fit(x,
              y,
              validation_set=0.1,
              batch_size=128,
              n_epoch=1,
              run_id='us_cities')
        print("-- TESTING...")
        print("-- Test with temperature of 1.2 --")
        # 调用模型进行数据生成
        # temperature  新颖程度, 越小,自动生成的城市的名称越接近样本中的城市名称,越大越新鲜
        # 0 表示就是样本数据
        # generate(seq_length, temperature=0.5, seq_seed=None, display=False)
        print(m.generate(30, temperature=1.2, seq_seed=seed))
        print("-- Test with temperature of 1.0 --")
        print(m.generate(30, temperature=1.0, seq_seed=seed))
        print("-- Test with temperature of 0.5 --")
        print(m.generate(30, temperature=0.5, seq_seed=seed))
示例#15
0
def CharacterLSTM_Run(seed,
                      dictionary,
                      model,
                      output,
                      steps=600,
                      layers=3,
                      hidden_nodes=512,
                      history=25,
                      temperature=0.5,
                      dropout=False):
    char_idx_file = dictionary
    maxlen = history

    char_idx = None
    if os.path.isfile(char_idx_file):
        print('Loading previous char_idx')
        char_idx = pickle.load(open(char_idx_file, 'rb'))

    tf.reset_default_graph()
    g = buildModel(layers, hidden_nodes, maxlen, char_idx, dropout)
    '''
	g = tflearn.input_data([None, maxlen, len(char_idx)])
	for n in range(layers-1):
		g = tflearn.lstm(g, hidden_nodes, return_seq=True)
		if dropout:
			g = tflearn.dropout(g, 0.5)
	g = tflearn.lstm(g, hidden_nodes)
	if dropout:
		g = tflearn.dropout(g, 0.5)
	g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
	g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)
	'''
    m = tflearn.SequenceGenerator(
        g, dictionary=char_idx, seq_maxlen=maxlen,
        clip_gradients=5.0)  #, checkpoint_path='model_history_gen')

    m.load(model)

    #seed = random_sequence_from_textfile(data, maxlen)

    print('seed=' + seed)
    print('len=' + str(len(seed)))
    result = m.generate(steps,
                        temperature=temperature,
                        seq_seed=seed[:history])
    print(result)
    return result
示例#16
0
    def __init__(self, char_idx, seq_max_len=25, checkpoint_path=None, default_seed=None):
        g = tflearn.input_data([None, seq_max_len, len(char_idx)])
        g = tflearn.lstm(g, 512, return_seq=True)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.lstm(g, 512, return_seq=True)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.lstm(g, 512)
        g = tflearn.dropout(g, 0.5)
        g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
        g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)

        self.model = tflearn.SequenceGenerator(
            g, dictionary=char_idx,
            clip_gradients=5.0,
            checkpoint_path=checkpoint_path
        )

        self.default_seed = default_seed if default_seed else "life in the hood"
示例#17
0
def build_model(maxlen, char_idx, checkpoint_path):
    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    return tflearn.SequenceGenerator(g,
                                     dictionary=char_idx,
                                     seq_maxlen=maxlen,
                                     clip_gradients=5.0,
                                     checkpoint_path=checkpoint_path)
示例#18
0
def train_piano_melody(right, sequence_length=8, temperature=1.0, epochs=5):
    tf.reset_default_graph()
    piano_melody = []
    for sequence in right:
        piano_melody.extend(sequence)

    train_melody_X, train_melody_Y, melody_dict = create_train_sequence(
        piano_melody)

    melody_seed = random_sample_note_sequence(piano_melody, sequence_length)
    #melody_seed = [45, 46, 48, 49]

    print('Training melody...')
    melody_trainer = tflearn.input_data(
        [None, sequence_length, len(melody_dict)])
    melody_trainer = tflearn.lstm(melody_trainer, 256, return_seq=True)
    melody_trainer = tflearn.dropout(melody_trainer, 0.5)
    melody_trainer = tflearn.lstm(melody_trainer, 256, return_seq=True)
    melody_trainer = tflearn.dropout(melody_trainer, 0.5)
    melody_trainer = tflearn.lstm(melody_trainer, 256)
    melody_trainer = tflearn.dropout(melody_trainer, 0.5)
    melody_trainer = tflearn.fully_connected(melody_trainer,
                                             len(melody_dict),
                                             activation='softmax')
    melody_trainer = tflearn.regression(melody_trainer,
                                        optimizer='adam',
                                        loss='categorical_crossentropy',
                                        learning_rate=0.001)

    melody_generator = tflearn.SequenceGenerator(melody_trainer,
                                                 dictionary=melody_dict,
                                                 seq_maxlen=sequence_length,
                                                 clip_gradients=5.0)
    melody_generator.fit(train_melody_X,
                         train_melody_Y,
                         validation_set=0.1,
                         batch_size=32,
                         n_epoch=epochs,
                         run_id='melody')

    return melody_generator.generate(100,
                                     temperature=temperature,
                                     seq_seed=melody_seed)
示例#19
0
def train_piano_accompany(left, sequence_length=8, temperature=1.0, epochs=5):
    tf.reset_default_graph()
    piano_accompany = []
    for sequence in left:
        piano_accompany.extend(sequence)

    train_accompany_X, train_accompany_Y, accompany_dict = create_train_sequence(
        piano_accompany)
    accompany_seed = random_sample_note_sequence(piano_accompany,
                                                 sequence_length)

    print('Training accompany...')
    accompany_trainer = tflearn.input_data(
        [None, sequence_length, len(accompany_dict)])
    accompany_trainer = tflearn.lstm(accompany_trainer, 256, return_seq=True)
    accompany_trainer = tflearn.dropout(accompany_trainer, 0.5)
    accompany_trainer = tflearn.lstm(accompany_trainer, 256, return_seq=True)
    accompany_trainer = tflearn.dropout(accompany_trainer, 0.5)
    accompany_trainer = tflearn.lstm(accompany_trainer, 256)
    accompany_trainer = tflearn.dropout(accompany_trainer, 0.5)
    accompany_trainer = tflearn.fully_connected(accompany_trainer,
                                                len(accompany_dict),
                                                activation='softmax')
    accompany_trainer = tflearn.regression(accompany_trainer,
                                           optimizer='adam',
                                           loss='categorical_crossentropy',
                                           learning_rate=0.001)

    accompany_generator = tflearn.SequenceGenerator(accompany_trainer,
                                                    dictionary=accompany_dict,
                                                    seq_maxlen=sequence_length,
                                                    clip_gradients=5.0)
    accompany_generator.fit(train_accompany_X,
                            train_accompany_Y,
                            validation_set=0.1,
                            batch_size=32,
                            n_epoch=epochs,
                            run_id='accompany')
    return accompany_generator.generate(100,
                                        temperature=temperature,
                                        seq_seed=accompany_seed)
示例#20
0
def replicator(char_idx):
    g = tflearn.input_data([None, maxlen, len(char_idx)])
    g = tflearn.lstm(g, internal_size, return_seq=True)
    g = tflearn.dropout(g, dropout)
    g = tflearn.lstm(g, internal_size, return_seq=True)
    g = tflearn.dropout(g, dropout)
    g = tflearn.lstm(g, internal_size)
    g = tflearn.dropout(g, dropout)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.05)
    rep = tflearn.SequenceGenerator(g,
                                    dictionary=char_idx,
                                    seq_maxlen=maxlen,
                                    clip_gradients=5.0,
                                    tensorboard_verbose=3,
                                    tensorboard_dir='logs',
                                    checkpoint_path='tweeterReplicator')
    return rep
示例#21
0
def createNetwork(max_len, char_dict, save_load_point):
    g = tflearn.input_data([None, max_len, len(char_dict)])
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512, return_seq=True)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.lstm(g, 512)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_dict), activation='softmax')
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_dict,
                                  seq_maxlen=max_len,
                                  clip_gradients=5.0,
                                  max_checkpoints=5,
                                  checkpoint_path=save_load_point)

    return m
示例#22
0
def typeSubtypeNameGeneratorModel(maxLength, charIndex, checkpoint_path='./generator_checkpoints/'):
  '''
  Recurrent network model for generating card types, subtypes, and names
  Inputs:
    maxLength: the maximum length for a generated sequence
    charIndex: map from chars to the index they represent in a onehot encoding
    checkpoint_path: path to save model after every epoch ('./generator_checkpoints/')
  '''
  network = input_data(shape=[None, maxLength, len(charIndex)])
  network = lstm(network, 512, return_seq=True)
  network = dropout(network, 0.5)
  network = lstm(network, 512, return_seq=True)
  network = dropout(network, 0.5)
  network = lstm(network, 512)
  network = dropout(network, 0.5)
  network = fully_connected(network, len(charIndex), activation='softmax')
  network = regression(network, optimizer='adam', loss='categorical_crossentropy',
                        learning_rate=0.001)

  model = tflearn.SequenceGenerator(network, tensorboard_verbose=0, dictionary=charIndex,
                                      seq_maxlen=maxLength, clip_gradients=5.0,
                                      checkpoint_path='./generator_checkpoints/')
  return model
示例#23
0
    def gen_model(self):
        char_idx_file = 'char_idx_xss.pkl'
        maxlen = 25
        char_idx = None
        xss_data_file = "xss-2000.txt"
        model = None
        try:
            self.X, self.Y, char_idx = \
                textfile_to_semi_redundant_sequences(xss_data_file, seq_maxlen=maxlen, redun_step=3,
                                                     pre_defined_char_idx=char_idx)

            #pickle.dump(char_idx, open(char_idx_file, 'wb'))

            g = tflearn.input_data([None, maxlen, len(char_idx)])
            g = tflearn.lstm(g, 32, return_seq=True)
            g = tflearn.dropout(g, 0.1)
            g = tflearn.lstm(g, 32, return_seq=True)
            g = tflearn.dropout(g, 0.1)
            g = tflearn.lstm(g, 32)
            g = tflearn.dropout(g, 0.1)
            g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
            g = tflearn.regression(g,
                                   optimizer='adam',
                                   loss='categorical_crossentropy',
                                   learning_rate=0.001)

            model = tflearn.SequenceGenerator(
                g,
                dictionary=char_idx,
                seq_maxlen=maxlen,
                clip_gradients=5.0,
                checkpoint_path='model_scanner_poc')
        except:
            traceback.print_exc()
        finally:
            return model
示例#24
0
    activation='softmax')  #Creating the output layer using softmax
lstm = tflearn.regression(
    lstm,
    optimizer='adam',
    loss=
    'categorical_crossentropy',  #Creating the regression for the nodes created
    learning_rate=0.001)
'''-------------------------------------------------------------------------------------------------'''
'''----------------------------------------------------------------------------------------------------
	Creating The Learning Process Of The Neural Network
----------------------------------------------------------------------------------------------------'''

city_gen = tflearn.SequenceGenerator(
    lstm,
    dictionary=
    char_dic,  #Generating the new city names by using a sequence generator function of tensor flow
    seq_maxlen=maxlength,
    clip_gradients=5.0,
    checkpoint_path='model_us_cities')
'''-------------------------------------------------------------------------------------------------'''
'''----------------------------------------------------------------------------------------------------
	Training The Neural Network For City Generation
----------------------------------------------------------------------------------------------------'''

for i in range(40):  #A loop which runs the training for 40 times
    seed = random_sequence_from_textfile(
        path, maxlength
    )  #A random city name is used from the file and assigned to seed variable
    city_gen.fit(
        X,
        Y,
g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g,
                       optimizer='adam',
                       loss='categorical_crossentropy',
                       learning_rate=0.001)

m = tflearn.SequenceGenerator(g,
                              dictionary=char_idx,
                              seq_maxlen=maxlen,
                              clip_gradients=5.0,
                              checkpoint_path='model_shakespeare')

for i in range(50):
    seed = random_sequence_from_textfile(path, maxlen)
    m.fit(X,
          Y,
          validation_set=0.1,
          batch_size=128,
          n_epoch=1,
          run_id='shakespeare')
    print("-- TESTING...")
    print("-- Test with temperature of 1.0 --")
    print(m.generate(600, temperature=1.0, seq_seed=seed))
    print("-- Test with temperature of 0.5 --")
示例#26
0
import tflearn as tfl
import tensorflow as tf
import encoding
import os

flags = tf.flags
logging = tf.logging
FILE_PATH= os.getcwd()+'/input/'
SAVE_PATH= os.getcwd()
INPUT_SIZE = 13
flags.DEFINE_string("model", "small", "A type of model. Possible options are: small, medium, large.")
flags.DEFINE_string("data_path", FILE_PATH, "Where the training/test data is stored.")
flags.DEFINE_string("save_path", SAVE_PATH, "Model output directory.")
flags.DEFINE_bool("use_fp16", False, "Train using 16-bit floats instead of 32bit floats")
FLAGS = flags.FLAGS

raw_data = encoding.input_data(FLAGS.data_path)
train_data, valid_data, test_data, vocabulary = raw_data

input_data, targets = encoding.input_producer(train_data, None, None, name=None)


g = tfl.input_data(shape=[None, 10, INPUT_SIZE])
g = tfl.lstm(g, 512)
g = tfl.dropout(g, 0.5)
g = tfl.fully_connected(g, INPUT_SIZE, activation='softmax')
g = tfl.regression(g, optimizer='adam', loss='categorical_crossentropy', learning_rate=0.001)

m = tfl.SequenceGenerator(g)
m.fit(input_data, targets)
示例#27
0
print("Setting up network")

g = tflearn.input_data([None, maxlen, len(char_idx)])
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512, return_seq=True)
g = tflearn.dropout(g, 0.5)
g = tflearn.lstm(g, 512)
g = tflearn.dropout(g, 0.5)
g = tflearn.fully_connected(g, len(char_idx), activation='softmax')
g = tflearn.regression(g, optimizer='adam', loss='categorical_crossentropy',
                       learning_rate=0.001)

print("Network Complete. Training...")

m = tflearn.SequenceGenerator(g, dictionary=char_idx,
                              seq_maxlen=maxlen,
                              clip_gradients=5.0,
                              checkpoint_path='./checkpoints/sayton')

for i in range(50):
    seed = random_sequence_from_textfile(path, maxlen)
    m.fit(X, Y, validation_set=0.1, batch_size=128,
          n_epoch=1, run_id='shakespeare')
    print("-- TESTING...")
    print("-- Test with temperature of 1.0 --")
    print(m.generate(600, temperature=1.0, seq_seed=seed))
    print("-- Test with temperature of 0.5 --")
    print(m.generate(600, temperature=0.5, seq_seed=seed))
示例#28
0
def main():

    path = FLAGS.dataset

    # We avoid using fixed padding and simply calculate the max lenght of our input set.
    if FLAGS.max_sequence_lenght < 1:
        maxlen = find_maxlenght(path)
    else:
        maxlen = FLAGS.max_sequence_lenght

    print("MaxLen = ", maxlen)
    X, Y, char_idx = textfile_to_semi_redundant_sequences(path,
                                                          seq_maxlen=maxlen,
                                                          redun_step=3)

    # Here we define our network structure, using common used values for node dimensions and dropout

    # Input Layer
    g = tflearn.input_data(shape=[None, maxlen, len(char_idx)])

    # Create our hidden LSTM Layers from parameters
    for i in range(FLAGS.hidden_layer_size):
        g = tflearn.lstm(g, FLAGS.lstm_node_size, return_seq=True)
        g = tflearn.dropout(g, 0.5)

    # Finally our last lstm layer and a fully_connected with softmax activation for the output
    g = tflearn.lstm(g, FLAGS.lstm_node_size)
    g = tflearn.dropout(g, 0.5)
    g = tflearn.fully_connected(g, len(char_idx), activation='softmax')

    # Let's not forget our regression!
    g = tflearn.regression(g,
                           optimizer='adam',
                           loss='categorical_crossentropy',
                           learning_rate=0.001)

    # wrap it up in a sequence generator
    m = tflearn.SequenceGenerator(g,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path='model_' +
                                  os.path.basename(path))
    train = True
    if os.path.exists(FLAGS.model_file):
        # Load our pre-train model from file
        print("Loading model from file ", FLAGS.model_file)
        load_model(m)
        train = False

    # Let's train it
    if train:
        print("Training model...")
        m.fit(X,
              Y,
              validation_set=0.1,
              batch_size=FLAGS.batch_size,
              n_epoch=FLAGS.epochs,
              run_id=os.path.basename(path))

        # save our results
        print("Saving trained model to file ", FLAGS.model_file)
        save_model(m)

    # Generate a test result
    generate(m, maxlen)

    # Interactive Session:
    try:
        import readline
        temp = 1.0
        while temp > 0.0:
            temp = float(raw_input('Insert temperature for generation: '))
            FLAGS.temperature = temp
            generate(m, maxlen)
    except EOFError:
        print("Bye!")
        return
# Create LSTM model
model = tflearn.input_data(shape=[None, maxlen, len(char_idx)])

model = tflearn.lstm(model, 512, return_seq=True)

model = tflearn.dropout(model, 0.5)

model = tflearn.lstm(model, 512)

model = tflearn.dropout(model, 0.5)

model = tflearn.fully_connected(model, len(char_idx), activation="softmax")

model = tflearn.regression(model, optimizer='adam', loss='categorical_crossentropy',
                       learning_rate=0.001)

# Generate city names
model = tflearn.SequenceGenerator(model,
                                  dictionary=char_idx,
                                  seq_maxlen=maxlen,
                                  clip_gradients=5.0,
                                  checkpoint_path="model_us_cities")

# training
for i in range(40):
    seed = random_sequence_from_textfile(data_path, maxlen)
    model.fit(X, Y, validation_set=0.2, batch_size=128, n_epoch=1)

    print("Testing 0.5:", model.generate(30, temperature=0.5,seq_seed=seed))
    print("Testing 1.0:", model.generate(30, temperature=1.0,seq_seed=seed))
    print("Testing 1.2:", model.generate(30, temperature=1.2,seq_seed=seed))
示例#30
0
# # 3:王
# # 4:\n
# print(char_idx)#9个元素的字典

# 构建网络
g=tflearn.input_data(shape=[None,maxlen,len(char_idx)])
g=tflearn.layers.recurrent.lstm(g,512,return_seq=True,name='g1')
g=tflearn.dropout(g,0.5,name='d1')
g=tflearn.layers.recurrent.lstm(g,512,name='g2')
g=tflearn.dropout(g,0.5,name='d2')
# 全连接
g=tflearn.fully_connected(g,len(char_idx),activation='softmax')
g=tflearn.regression(g,optimizer='adam',loss='categorical_crossentropy',learning_rate=0.001)# 设置损失和优化器

# 序列生成的深层神经网络模型
# clip_gradients 梯度
m=tflearn.SequenceGenerator(g,dictionary=char_idx,seq_maxlen=maxlen,clip_gradients=5.0,checkpoint_path='./model/lstm_gen/model',tensorboard_dir='./logs')

# 循环遍历,进行序列生成
for i in range(40):
    # 建立生成序列的种子,随机的
    seed=list(tflearn.data_utils.random_sequence_from_string(string_utf8,maxlen))
    # 填充数据进行训练
    m.fit(X,Y,validation_set=0.1,batch_size=1024,n_epoch=1,run_id='us_cities')
    # 调用模型进行数据生成
    # temperature  新颖程度
    # 0 表示就是样本数据
    print(''.join(m.generate(seq_length=30,temperature=1.5,seq_seed=seed)))
    print(''.join(m.generate(seq_length=30, temperature=1., seq_seed=seed)))
    print(''.join(m.generate(seq_length=30, temperature=.5, seq_seed=seed)))