def getValue(): clear_session() if (request.form['submit_btn'] == "Submit"): text = request.form['user_str'] length = request.form['user_len'] elif (request.form['submit_btn'] == "Generate"): text = " " length = random.randint(1, 40) print('Load model from checkpoint...') model = load_trained_model_from_checkpoint(config_path, checkpoint_path) print('Load BPE from files...') bpe = get_bpe_from_files(encoder_path, vocab_path) #if(text!=None): print('Generate text...') #output = generate(model, bpe, ['From the day forth, my arm'], length=20, top_k=1) output = generate(model, bpe, [str(text)], length=int(length), top_k=2) #print(output) ind = output[0].rfind("\n") temp = output[0] temp = temp[0:ind] #print(temp) output[0] = temp #print(output) try: if (request.form['tweet'] == "post"): Tweet(str(output[0])) except: print("") return render_template('index.html', t=output)
def process_task(task,uuid): print("Processing %s" % (uuid)) tic=timeit.default_timer() output = generate(model, bpe, [task['text']], length=task['length'], top_k=task['top_k']) toc=timeit.default_timer() print(output) print('Processing time %s' % (toc-tic)) return output[0]
def test_train_and_gen(self): token_dict = {chr(i): i for i in range(2 ** 9)} token_dict['Po'] = len(token_dict) token_dict['er'] = len(token_dict) model = get_model( n_vocab=len(token_dict), n_ctx=100, n_embd=30, n_head=5, n_layer=2, ) bpe = BytePairEncoding(token_dict=token_dict, bpe_rank={('P', 'o'): 0, ('e', 'r'): 1}) texts = [ 'Power, give me more power!', 'From the day forth, my arm changed.', ] space_encode = bpe.encode(' ') inputs = [bpe.encode(text) for text in texts] max_len = max(map(len, inputs)) inputs = [encode + space_encode * (max_len - len(encode)) for encode in inputs] outputs = [encode[1:] + space_encode for encode in inputs] current_path = os.path.dirname(os.path.abspath(__file__)) model_path = os.path.join(current_path, 'gen_test.h5') if os.path.exists(model_path): model.load_weights(model_path) model.fit( x=np.array(inputs * 1000), y=np.expand_dims(np.array(outputs * 1000), axis=-1), epochs=1, ) else: model.fit( x=np.array(inputs * 1000), y=np.expand_dims(np.array(outputs * 1000), axis=-1), epochs=10, ) model.save_weights(model_path) texts = [ 'Power, give me more', 'Power', 'give me more ', 'the day forth ', 'From', ] results = generate(model, bpe, texts, length=30) self.assertEqual(results[0][:len('Power, give me more power!')], 'Power, give me more power!')
def make_verse(incipit, syllables_length, should_rhyme_with=False): incipit = incipit[:1000] incipit_length = len(incipit) top_k = config_top_k errors = 0 added_words = 0 # We add one word at time until we reach the minimum/maximum length for i in range(651): full_output = keras_gpt_2.generate(text_model, bpe, [incipit], length=1, top_k=top_k) full_output = full_output[0] print('output', full_output) newOutput = full_output[len(incipit):] print('NEW output', newOutput) if (all(x.isalpha() or x.isspace() for x in newOutput) and all(x not in newOutput for x in config_forbidden)): incipit = full_output added_words += 1 errors = 0 else: errors += 1 if added_words == 0 and errors > 10: incipit = incipit + 'and ' if errors > 10: incipit = incipit + 'and ' current_length = len(incipit) - incipit_length print('length', current_length) syllables_count = syllapy.count(full_output[incipit_length:]) print('syllables', syllables_count) print('>>>>>>>>>>>>>>>>>>>>>>>>>>', syllables_count, ' in : ' + full_output[incipit_length:]) # If we find a line break and the length is greater than the minimum # we stop the text generation if syllables_count == syllables_length: print('Syllables length reached') break # If the string is greater than the allowed maximum, we stop the generation if syllables_count > syllables_length: print('TOO MANY SYLLABLES') spaces = [ pos for pos, char in enumerate(full_output) if char == ' ' ] # removes 2 last words incipit = full_output[:spaces[-2]] result = full_output[incipit_length:] # we clean double spaces in the result for i in range(3): result = result.replace(' ', ' ') result = result.strip() if should_rhyme_with: rhymes = rhymer.get_perfect_rhymes(should_rhyme_with) rhyme = should_rhyme_with print('all rhymes ', rhymes) all_rhymes = [] if '2' in rhymes and rhymes[2]: all_rhymes = rhymes[2] else: for r in rhymes: if rhymes[r]: all_rhymes = rhymes[r] break print('rhymes ', all_rhymes) random.shuffle(all_rhymes) for word in all_rhymes: print('>>> ', word) if (word is not should_rhyme_with and len(word) > 2 and all(x.isalpha() or x.isspace() for x in word)): rhyme = word break print('choosen ', rhyme) # shorten input to right number of syllables while True: toTest = result + ' ' + rhyme syllables_count = syllapy.count(toTest) print('checking ', toTest) print('syllables ', syllables_count) if (syllables_count <= syllables_length): break else: spaces = [ pos for pos, char in enumerate(result) if char == ' ' ] # removes 2 last words result = result[:spaces[-1]] while True: spaces = [pos for pos, char in enumerate(result) if char == ' '] if len(spaces) > 2: result = result[:spaces[-1]] else: return False solutions = nlp(result + ' ' + nlp.tokenizer.mask_token + ' ' + rhyme) print('solution', solutions) acceptable_solution = False for solution in solutions: solution = solution['sequence'] solution = solution.replace('[CLS]', '') solution = solution.replace('[SEP]', '') solution = solution.strip() syllables_count = syllapy.count(solution) print(solution, syllables_count) if (syllables_count == syllables_length): acceptable_solution = solution break if acceptable_solution: result = acceptable_solution break result = result.encode('utf-8', errors='ignore').decode('utf-8') return result
import os import sys from keras_gpt_2 import load_trained_model_from_checkpoint, get_bpe_from_files, generate epoch_number = 2 already_trained = True checkpoint_dir = './training_checkpoints' checkpoint_path = os.path.join(checkpoint_dir, f"ckpt_{epoch_number}") model_folder = 'models/117M' config_path = os.path.join(model_folder, 'hparams.json') checkpoint_path = os.path.join(model_folder, 'model.ckpt') encoder_path = os.path.join(model_folder, 'encoder.json') vocab_path = os.path.join(model_folder, 'vocab.bpe') print('Load model from checkpoint...') model = load_trained_model_from_checkpoint(config_path, checkpoint_path, already_trained=already_trained) print('Load BPE from files...') bpe = get_bpe_from_files(encoder_path, vocab_path) print('Generate text...') output = generate(model, bpe, ['From the day forth, my arm'], length=20, top_k=40) # If you are using the 117M model and top_k equals to 1, then the result would be: # "From the day forth, my arm was broken, and I was in a state of pain. I was in a state of pain," print(output[0])
type=int, default=1.0, help='randomness of result') parser.add_argument("-d", "--dir", type=str, default='', help='model direction') args = parser.parse_args() os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu model_folder = args.dir encoder_path = os.path.join(model_folder, 'encoder.json') vocab_path = os.path.join(model_folder, 'vocab.bpe') print('Load model from checkpoint...') model = load_gpt2_from_ckpt(model_folder) print('Load BPE from files...') bpe = get_bpe_from_files(encoder_path, vocab_path) while True: print('Input a piece of sentence: ') text = input() print('Generate text...') output = generate(model, bpe, text, length=args.length, top_k=args.topk) # If you are using the 117M model and top_k equals to 1, then the result would be: # "From the day forth, my arm was broken, and I was in a state of pain. I was in a state of pain," print(output[0])
def generate_op(text): print('Generate text...') #output = generate(model, bpe, ['From the day forth, my arm'], length=20, top_k=1) output = generate(model, bpe, [str(text)], length=20, top_k=40) return output