def getValue():
    clear_session()
    if (request.form['submit_btn'] == "Submit"):
        text = request.form['user_str']
        length = request.form['user_len']
    elif (request.form['submit_btn'] == "Generate"):
        text = " "
        length = random.randint(1, 40)

    print('Load model from checkpoint...')
    model = load_trained_model_from_checkpoint(config_path, checkpoint_path)
    print('Load BPE from files...')
    bpe = get_bpe_from_files(encoder_path, vocab_path)

    #if(text!=None):
    print('Generate text...')
    #output = generate(model, bpe, ['From the day forth, my arm'], length=20, top_k=1)
    output = generate(model, bpe, [str(text)], length=int(length), top_k=2)

    #print(output)
    ind = output[0].rfind("\n")
    temp = output[0]
    temp = temp[0:ind]
    #print(temp)
    output[0] = temp
    #print(output)

    try:
        if (request.form['tweet'] == "post"):
            Tweet(str(output[0]))
    except:
        print("")

    return render_template('index.html', t=output)
示例#2
0
def process_task(task,uuid):
    print("Processing %s" % (uuid))
    tic=timeit.default_timer()
    output = generate(model, bpe, [task['text']], length=task['length'], top_k=task['top_k'])
    toc=timeit.default_timer()
    print(output)
    print('Processing time %s' % (toc-tic))
    return output[0]
示例#3
0
 def test_train_and_gen(self):
     token_dict = {chr(i): i for i in range(2 ** 9)}
     token_dict['Po'] = len(token_dict)
     token_dict['er'] = len(token_dict)
     model = get_model(
         n_vocab=len(token_dict),
         n_ctx=100,
         n_embd=30,
         n_head=5,
         n_layer=2,
     )
     bpe = BytePairEncoding(token_dict=token_dict, bpe_rank={('P', 'o'): 0, ('e', 'r'): 1})
     texts = [
         'Power, give me more power!',
         'From the day forth, my arm changed.',
     ]
     space_encode = bpe.encode(' ')
     inputs = [bpe.encode(text) for text in texts]
     max_len = max(map(len, inputs))
     inputs = [encode + space_encode * (max_len - len(encode)) for encode in inputs]
     outputs = [encode[1:] + space_encode for encode in inputs]
     current_path = os.path.dirname(os.path.abspath(__file__))
     model_path = os.path.join(current_path, 'gen_test.h5')
     if os.path.exists(model_path):
         model.load_weights(model_path)
         model.fit(
             x=np.array(inputs * 1000),
             y=np.expand_dims(np.array(outputs * 1000), axis=-1),
             epochs=1,
         )
     else:
         model.fit(
             x=np.array(inputs * 1000),
             y=np.expand_dims(np.array(outputs * 1000), axis=-1),
             epochs=10,
         )
         model.save_weights(model_path)
     texts = [
         'Power, give me more',
         'Power',
         'give me more ',
         'the day forth ',
         'From',
     ]
     results = generate(model, bpe, texts, length=30)
     self.assertEqual(results[0][:len('Power, give me more power!')], 'Power, give me more power!')
示例#4
0
def make_verse(incipit, syllables_length, should_rhyme_with=False):

    incipit = incipit[:1000]

    incipit_length = len(incipit)
    top_k = config_top_k
    errors = 0
    added_words = 0

    # We add one word at time until we reach the minimum/maximum length

    for i in range(651):
        full_output = keras_gpt_2.generate(text_model,
                                           bpe, [incipit],
                                           length=1,
                                           top_k=top_k)
        full_output = full_output[0]
        print('output', full_output)

        newOutput = full_output[len(incipit):]
        print('NEW output', newOutput)

        if (all(x.isalpha() or x.isspace() for x in newOutput)
                and all(x not in newOutput for x in config_forbidden)):
            incipit = full_output
            added_words += 1
            errors = 0
        else:
            errors += 1
            if added_words == 0 and errors > 10:
                incipit = incipit + 'and '
            if errors > 10:
                incipit = incipit + 'and '

        current_length = len(incipit) - incipit_length
        print('length', current_length)

        syllables_count = syllapy.count(full_output[incipit_length:])
        print('syllables', syllables_count)

        print('>>>>>>>>>>>>>>>>>>>>>>>>>>', syllables_count,
              ' in : ' + full_output[incipit_length:])
        # If we find a line break and the length is greater than the minimum
        # we stop the text generation

        if syllables_count == syllables_length:
            print('Syllables length reached')
            break

        # If the string is greater than the allowed maximum, we stop the generation
        if syllables_count > syllables_length:
            print('TOO MANY SYLLABLES')
            spaces = [
                pos for pos, char in enumerate(full_output) if char == ' '
            ]
            # removes 2 last words
            incipit = full_output[:spaces[-2]]

    result = full_output[incipit_length:]

    # we clean double spaces in the result
    for i in range(3):
        result = result.replace('  ', ' ')

    result = result.strip()

    if should_rhyme_with:
        rhymes = rhymer.get_perfect_rhymes(should_rhyme_with)
        rhyme = should_rhyme_with

        print('all rhymes ', rhymes)

        all_rhymes = []

        if '2' in rhymes and rhymes[2]:
            all_rhymes = rhymes[2]
        else:
            for r in rhymes:
                if rhymes[r]:
                    all_rhymes = rhymes[r]
                    break

        print('rhymes ', all_rhymes)

        random.shuffle(all_rhymes)

        for word in all_rhymes:
            print('>>> ', word)
            if (word is not should_rhyme_with and len(word) > 2
                    and all(x.isalpha() or x.isspace() for x in word)):
                rhyme = word
                break

        print('choosen ', rhyme)

        # shorten input to right number of syllables

        while True:

            toTest = result + ' ' + rhyme
            syllables_count = syllapy.count(toTest)
            print('checking ', toTest)
            print('syllables ', syllables_count)

            if (syllables_count <= syllables_length):
                break
            else:
                spaces = [
                    pos for pos, char in enumerate(result) if char == ' '
                ]
                # removes 2 last words
                result = result[:spaces[-1]]

        while True:
            spaces = [pos for pos, char in enumerate(result) if char == ' ']

            if len(spaces) > 2:
                result = result[:spaces[-1]]
            else:
                return False

            solutions = nlp(result + ' ' + nlp.tokenizer.mask_token + ' ' +
                            rhyme)
            print('solution', solutions)

            acceptable_solution = False

            for solution in solutions:
                solution = solution['sequence']
                solution = solution.replace('[CLS]', '')
                solution = solution.replace('[SEP]', '')
                solution = solution.strip()

                syllables_count = syllapy.count(solution)
                print(solution, syllables_count)

                if (syllables_count == syllables_length):
                    acceptable_solution = solution
                    break

            if acceptable_solution:
                result = acceptable_solution
                break

    result = result.encode('utf-8', errors='ignore').decode('utf-8')
    return result
import os
import sys
from keras_gpt_2 import load_trained_model_from_checkpoint, get_bpe_from_files, generate

epoch_number = 2
already_trained = True
checkpoint_dir = './training_checkpoints'
checkpoint_path = os.path.join(checkpoint_dir, f"ckpt_{epoch_number}")

model_folder = 'models/117M'
config_path = os.path.join(model_folder, 'hparams.json')
checkpoint_path = os.path.join(model_folder, 'model.ckpt')
encoder_path = os.path.join(model_folder, 'encoder.json')
vocab_path = os.path.join(model_folder, 'vocab.bpe')

print('Load model from checkpoint...')
model = load_trained_model_from_checkpoint(config_path,
                                           checkpoint_path,
                                           already_trained=already_trained)
print('Load BPE from files...')
bpe = get_bpe_from_files(encoder_path, vocab_path)
print('Generate text...')
output = generate(model,
                  bpe, ['From the day forth, my arm'],
                  length=20,
                  top_k=40)

# If you are using the 117M model and top_k equals to 1, then the result would be:
# "From the day forth, my arm was broken, and I was in a state of pain. I was in a state of pain,"
print(output[0])
                    type=int,
                    default=1.0,
                    help='randomness of result')
parser.add_argument("-d",
                    "--dir",
                    type=str,
                    default='',
                    help='model direction')
args = parser.parse_args()

os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

model_folder = args.dir
encoder_path = os.path.join(model_folder, 'encoder.json')
vocab_path = os.path.join(model_folder, 'vocab.bpe')

print('Load model from checkpoint...')
model = load_gpt2_from_ckpt(model_folder)
print('Load BPE from files...')
bpe = get_bpe_from_files(encoder_path, vocab_path)

while True:
    print('Input a piece of sentence: ')
    text = input()
    print('Generate text...')
    output = generate(model, bpe, text, length=args.length, top_k=args.topk)

    # If you are using the 117M model and top_k equals to 1, then the result would be:
    # "From the day forth, my arm was broken, and I was in a state of pain. I was in a state of pain,"
    print(output[0])
def generate_op(text):
    print('Generate text...')
    #output = generate(model, bpe, ['From the day forth, my arm'], length=20, top_k=1)
    output = generate(model, bpe, [str(text)], length=20, top_k=40)
    return output