def main(_): FLAGS.start_string = FLAGS.start_string #.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start_string = FLAGS.start_string sys.stdout.write("> ") sys.stdout.flush() start_string = sys.stdin.readline() while start_string: start = converter.text_to_arr(start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) print(converter.arr_to_text(arr)) sys.stdout.write("> ") sys.stdout.flush() sentence = sys.stdin.readline()
def main(_): converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, None, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) # start = converter.text_to_arr(FLAGS.seed_for_generating) seeds = [ 'var a = fun', 'function a(', 'this.', 'document.', 'window.', 'var a = document.g', 'var a;', 'jQuery' ] for seed in seeds: start = converter.text_to_arr(seed) for i in range(0, FLAGS.num_to_generate): print('Generating: ' + seed + ' -> ' + str(i)) file_name = str(uuid.uuid1()) file_path = '../../BrowserFuzzingData/generated/' + FLAGS.file_type + '/' + file_name + '.' + FLAGS.file_type arr = model.sample(FLAGS.max_length_of_generated, start, converter.vocab_size, converter.word_to_int) f = open(file_path, "wb") f.write(converter.arr_to_text(arr).encode('utf-8')) f.close()
def composePotery(): converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = [] arr = model.sample(FLAGS.max_length, start, converter.vocab_size) rawText = converter.arr_to_text(arr) return(selectPoetry(rawText))
def main(_): converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) print(converter.arr_to_text(arr))
def main(_): tc = TextConverter("", -1, byte_file=FLAGS.vocab_path) output_size = tc.vocab_size if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(output_size=output_size, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, sampling=True) model.load(FLAGS.checkpoint_path) start = tc.text_to_arr(FLAGS.start_string) generate_arr = model.sample(FLAGS.length, start, output_size) generate_text = tc.arr_to_text(generate_arr) with open(FLAGS.save_path, 'w', encoding='utf-8') as f: f.write(generate_text) print(generate_text)
def main(_): tokenizer = Tokenizer(vocab_path=FLAGS.tokenizer_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = \ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(tokenizer.vocab_size, sampling=True, n_neurons=FLAGS.n_neurons, n_layers=FLAGS.n_layers, embedding=FLAGS.embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = tokenizer.texts_to_sequences(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, tokenizer.vocab_size) print(tokenizer.sequences_to_texts(arr))
def main(_): FLAGS.start_string = FLAGS.start_string.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) print(converter.arr_to_text(arr))
def main(_): FLAGS.start_string = FLAGS.start_string.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) #创建文本转化器 if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) #下载最新模型 model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) #加载模型 start = converter.text_to_arr(FLAGS.start_string) #将input text转为id arr = model.sample(FLAGS.max_length, start, converter.vocab_size) #输出为生成的序列 print(converter.arr_to_text(arr))
def generate(): tf.compat.v1.disable_eager_execution() converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, converter.vocab_size) return converter.arr_to_text(arr)
def main(_): FLAGS.start_string = FLAGS.start_string convert = TextConvert(fname=FLAGS.convert_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = tf.train.latest_checkpoint( FLAGS.checkpoint_path) model = CharRNN(convert.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = convert.text2arr(FLAGS.start_string) arr = model.sample(FLAGS.max_length, start, convert.vocab_size) res = convert.arr2text(arr) print('get result: \n', res)
def main(_): FLAGS.start_string = FLAGS.start_string converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = \ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, state_size=FLAGS.state_size, n_layers=FLAGS.n_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_data(FLAGS.start_string) data = model.sample(FLAGS.max_length, start, converter.vocab_size) # for c in converter.data_to_text(data): # for d in c: # # print(d,end="") # time.sleep(0.5) print(converter.data_to_text(data))
def main(_): converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path = \ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = converter.text_to_arr(FLAGS.start_string) # JS/Html/CSS for i in range(0, 1): print('Generating: ' + str(i)) file_path = '../../BrowserFuzzingData/generated/' + FLAGS.file_type + '/' + str( i) + '.' + FLAGS.file_type # f = open(file_path, "x") arr = model.sample(FLAGS.max_length, start, converter.vocab_size) content = converter.arr_to_text(arr) content = content.replace("\\t", "\t") content = content.replace("\\r", "\r") content = content.replace("\\n", "\n") if FLAGS.file_type.__eq__('js'): print(content) # f.write(content) # f.close() elif FLAGS.file_type.__eq__('html'): content = post_process(content) f.write(content) f.close() # TODO: 预留给CSS,暂不作任何处理 else: pass
class Dianpin(Singleton): def __init__(self): self.text = '' self.tfmodel = None self.converter = None def model_built(self):#,vocab_size,sampling,lstm_size,num_layers,use_embedding,embedding_size): FLAGS.start_string = FLAGS.start_string.decode('utf-8') self.converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) self.tfmodel = CharRNN(self.converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) self.tfmodel.load(FLAGS.checkpoint_path) def final_predict(self): start = self.converter.text_to_arr(FLAGS.start_string) arr = self.tfmodel.sample(FLAGS.max_length, start, self.converter.vocab_size) return self.converter.arr_to_text(arr)
if line: lines.append(line) else: break if lines[0] == 'quit': print('Goodbye...') break # exit program print('Thinking...') input_seq = '\n'.join(lines) results = model.autocomplete(sess, input_seq) print('Autocomplete:') for i, r in enumerate(results): print('{0}. {1}'.format(i+1, r[len(input_seq):][:-1])) # skip context and trailing newline char else: print('---') print('Hit enter to sample more characters (or type \'quit\' to quit):') line = input() if line == 'quit': print('Goodbye...') break # exit program print('Sampling...') if input_seq is None: input_seq = np.random.choice(vocab) # pick random character sample = model.sample(sess, input_seq=input_seq, num_to_sample=500, temperature=0.5) input_seq += sample print(sample)
def poem_genetate(poem_start=u'君'): #FLAGS.start_string = FLAGS.start_string #FLAGS.start_string = FLAGS.start_string.decode('utf-8') converter = TextConverter(filename=FLAGS.converter_path) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =tf.train.latest_checkpoint(FLAGS.checkpoint_path) print FLAGS.checkpoint_path """ model = CharRNN(converter.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) """ model = CharRNN(converter.vocab_size, sampling=True, lstm_size=lstm_size, num_layers=num_layers, use_embedding=use_embedding,embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) #start = converter.text_to_arr(start_string) start1 = converter.text_to_arr(poem_start) arr = model.sample(max_length, start1, converter.vocab_size) #pl = model.poemline(max_length, start, converter.vocab_size) #sp=model.sample_hide_poetry( start, converter.vocab_size) poem=converter.arr_to_text(arr) #print (converter.arr_to_text(sp)) print('---------') print(poem) print('---------') #print(converter.arr_to_text(pl)) print('---------') #0:, 1:。 2:\n,每行12个字符。不可以有0,1,2大于1个 lines=poem.split('\n') r_poem=[] for i in range(len(lines)): if len(lines[i])==12: count=0 print lines[i][5] if lines[i][5]==',': print "true" if lines[i][5]==u',': print "u true" if lines[i][5]==u',' and lines[i][11]==u'。': for j in range(len(lines[i])): if lines[i][j]==u',' or lines[i][j]==u'。': count+=1 if count==2: r_poem.append(lines[i]) if len(r_poem)==2: break """ lines=poem.split('\n') r_poem=[] for i in range(len(lines)): if len(lines[i])==12: count=0 if lines[i][5]==0 and lines[i][11]==1: for j in range(len(lines[i])): if lines[i][j]==0 or lines[i][j]==1: count+=1 if count==2: r_poem.append(lines[i]) if len(r_poem)==2: break """ with codecs.open("app/poem.txt","w",'utf-8') as f: words="".join(r_poem) print (lines) print (r_poem) print (words) #words=words.decode('utf-8') f.write(words)
if i % 100 == 0: # Write loss and accuracy to log batch_loss, batch_acc = model.calc_loss_acc( sess, batch_x, batch_y, batch_seqlen) with open(os.path.join('.', run_name, 'loss-history.txt'), mode='a') as f: f.write('{0},{1}\n'.format(i, batch_loss)) with open(os.path.join('.', run_name, 'acc-history.txt'), mode='a') as f: f.write('{0},{1}\n'.format(i, batch_acc)) # Print status if i % 1000 == 0: print(datetime.datetime.now(), '| iter', i, 'batch_loss:', batch_loss, 'batch_acc:', batch_acc) print( repr( model.sample(sess, input_seq=np.random.choice(chars), num_to_sample=200))) # Save checkpoint if i % 10000 == 0: save_path = model.save_checkpoint( sess, os.path.join('.', run_name, 'checkpoint-{0}.ckpt'.format(i))) print('Checkpoint saved:', save_path) print('--')