def train_data (inputFile, outputDir):
    sess = gpt2.start_tf_sess()
    # train for the input file 

#    tf.variable_scope("sess", reuse=True)
     
    '''
    gpt2.finetune(sess,
        "resource/"+inputFile+".txt",
        model_name=model_name,
        #run_name=inputFile,
        overwrite=True,
        steps=2)
        #_traceback = tf_stack.extract_stack())   # steps is max number of training steps    ''' 
        
        
    # generate 50 examples
    for x in range(0,10):
      #  tf.get_variable_scope().reuse_variables()
        gpt2.load_gpt2(sess)
      #  output = gpt2.generate(sess, return_as_list=True)[0]
        gpt2.generate_to_file(sess, destination_path="newoutputs/" +outputDir+str(uuid.uuid4())+".txt")

#        datalist = gpt2.generate(sess, return_as_list=True)[0]
#        print (datalist)
#        gen_to_file(outputDir, inputFile)
 #       tf.get_variable_scope(reuse=True)
        #tf.get_variable_scope().reuse_variables()

#    tf.get_variable_scope(reuse=True)
#    tf.AUTO_REUSE = True
#    sess.reuse_variables()
   # tf.reset_default_graph()
    gpt2.reset_session(sess,threads=-1,server=None)
    '''
Пример #2
0
def train(input_file):

    if os.path.exists('models/temp'):
        shutil.rmtree('models/temp')
    
    if os.path.exists('models/124M'):
        pass
    else:
        download()

    sess = gpt2.start_tf_sess()
   
    model_name = '124M'
    model_dir = 'models/'
    training_dir = 'src/training_data/'
    file_name = input_file.split('.')[0]

    gpt2.finetune(sess,
        training_dir+input_file,
        model_name=model_name,
        checkpoint_dir=model_dir+'temp/',
        run_name='',
        steps=1)
    
    gpt2.reset_session(sess)
    
    if os.path.exists('models/latest'):
        shutil.rmtree('models/latest')
    shutil.copytree('models/temp','models/latest')
    # shutil.rmtree('models/temp')
Пример #3
0
 def fine_tune(self, steps=100):
     self.download_tuning()
     self.log_function(f"* Fine tuning towards {self.tuning_description}...")
     brain = gpt2.start_tf_sess()
     gpt2.finetune(
         brain, self.tuning_path,
         model_name=self.model_name, steps=steps, run_name=self.run_name, overwrite=True
     )
     gpt2.reset_session(brain)
Пример #4
0
 def generate(self, prefix="I once", length=100):
     self.log_function("* Generating text...")
     self.log_function(f"  * {self.model_description} language model tuned towards {self.tuning_description}.")
     brain = gpt2.start_tf_sess()
     run_name = self.run_name if os.path.isdir(os.path.join("checkpoint", self.run_name)) else None
     model_name = None if os.path.isdir(os.path.join("checkpoint", self.run_name)) else self.model_name
     gpt2.load_gpt2(brain, run_name=run_name, model_name=model_name)
     text = gpt2.generate(
         brain,
         prefix=prefix, length=length, run_name=run_name, return_as_list=True, model_name=self.model_name,
     )[0]
     gpt2.reset_session(brain)
     return text
Пример #5
0
async def homepage(request):
    global generate_count
    global sess

    if request.method == 'GET':
        params = request.query_params
    elif request.method == 'POST':
        params = await request.json()
    elif request.method == 'HEAD':
        return UJSONResponse({'text': ''},
                             headers=response_header)
    
    gpt2.load_gpt2(sess, run_name=params.get('run_name', ''))

    text = gpt2.generate(sess,
                         run_name=params.get('run_name', ''),
                         length=int(params.get('length', 1023)),
                         temperature=float(params.get('temperature', 0.7)),
                         top_k=int(params.get('top_k', 0)),
                         top_p=float(params.get('top_p', 0)),
                         prefix=params.get('prefix', '')[:500],
                         truncate=params.get('truncate', None),
                         include_prefix=str(params.get('include_prefix', True)).lower() == 'true',
                         return_as_list=True)[0]

    sess = gpt2.reset_session(sess)

    gc.collect()
    return UJSONResponse({'text': text},
                         headers=response_header)
Пример #6
0
    def generate_new(self, origin, vals):
        #return ('stuff ' + str(random.randrange(10000))).encode('utf-8')
        dir = model_dir(origin)
        if dir != self.last_model:
            self.sess = gpt2.reset_session(self.sess, threads=7)
            gpt2.load_gpt2(self.sess, checkpoint_dir=dir)
            self.last_model = dir
        if self.last_model == BB_DIR:
            prompt = '<|startoftext|> '+ vals['title'] + ' - Breitbart <START> '
        else:
            prompt = vals['title'] + ' <START> '

        text = gpt2.generate(self.sess, checkpoint_dir=self.last_model, return_as_list=True, prefix=prompt,
            include_prefix=False, truncate='<|endoftext|>')[0]
        if self.last_model == BB_DIR:
            text = text.replace('[', '</p> <p class="zn-body__paragraph">')
        text = str.encode(text)
        return text
Пример #7
0
    def __init__(self, client):
        """
        Read the config from the file at the path CONFIG_PATH, if the config file is not in the expected format or
        contains a config that isn't recognised, the config is loaded from the dictionary DEFAULT_CONFIG.

        Initialise a TensorFlow session for gpt2 then load the GPT2 model as determined by the config.

        NOTE: If the config is modified after the DEFAULT_CONFIG has been loaded it, it will be overwritten.
        """
        self.client = client
        self.config = {}
        self.load_config(False)
        self.default_prompts = read_default_prompts()

        self.sess = gpt2.start_tf_sess()
        try:
            gpt2.load_gpt2(self.sess, model_name=self.config['model_name'])
        except ValueError:
            self.sess = gpt2.reset_session(self.sess)
            gpt2.load_gpt2(self.sess, model_name=self.config['model_name'])
Пример #8
0
def sample():
    version = request.args.get('version')
    length = int(request.args.get('length'))
    prompt = request.args.get('prompt')
    nsamples = int(request.args.get('nsamples'))


    global curr_version
    global sess
    if version != curr_version and version != '':
      sess = gpt2.reset_session(sess)
      gpt2.load_gpt2(sess, checkpoint_dir="/opt/app/checkpoint", run_name=version)
      curr_version = version

    out = gpt2.generate(sess, nsamples=nsamples, prefix=prompt, top_k=0, return_as_list=True,checkpoint_dir="/opt/app/checkpoint", run_name=curr_version, length=length)

    result = ""
    for i in range(nsamples):
      result += "======== SAMPLE " + str(i+1) + " ========\n"
      result += out[i] + "\n"

    return jsonify({"sample" : result})
Пример #9
0
    def buildResponse(self):
        global sess
        global responses
        global sessCritical
        global workQueue
        responses=responses+1

        workQueue=workQueue+1
        rawresponse = gpt2.generate(sess, prefix=self.grabText(), include_prefix=False, length=75, return_as_list=True)
        workQueue=workQueue-1

        # if we've used the model 5 times and we're not currently using it, reset the model to free up memory!
        if responses > trigger_collect and workQueue <= 0:
            sessCritical = True
            print("=========FREEING MEMORY=========")
            sess = gpt2.reset_session(sess)
            gpt2.load_gpt2(sess)
            responses = 0
            sessCritical = False
        print(rawresponse)
        chats = rawresponse[0].split('\n')
        return self.getUnique(chats)
Пример #10
0
def attempt_generate(sess=None):

    with open('config.json') as f:
        config = json.load(f)

    if not sess:
        sess = gpt2.start_tf_sess()
    else:
        sess = gpt2.reset_session(sess)

    gpt2.load_gpt2(sess, run_name=config['run_name'])

    file_core = 'motivational_quotes'
    time_string = datetime.now().strftime('%d-%m-%Y_%H-%M-%S')
    model_type = config['model']
    output_file = f'{file_core}_{model_type}.txt'

    gpt2.generate(sess,
                  top_k=40,
                  run_name=config['run_name'],
                  nsamples=config['samples_to_generate'],
                  destination_path=config['model_output_path'] + output_file)

    return sess
Пример #11
0
def reset_model(model, step_count, checkpoint_directory='checkpoint'):
    global tf_session
    if step_count % 10 == 0:
        tf_session = gpt2.reset_session(sess=tf_session)
        load_model(model, checkpoint_directory)
Пример #12
0
def generate_trending_tweet():
    topics = ["Biden", "Trump"]
    topic = choice(topics)
    # this is just for testing repeat topics- remove before deployment
    print("generating topical tweets on subject: " + topic)

    # update the text file with current tweets
    file_name = '../data/'+topic+'.txt'
    topical_tweets = get_topic_tweets(topic, 5000)
    t_tweet_string = " || ".join(topical_tweets)

    with open(file_name, 'w') as f:
        f.write(t_tweet_string)

    # train model
    print("training new model on scraped text for topic : "+topic)
    sess = gpt2.start_tf_sess()

    if not os.path.exists('checkpoint/'+topic):
        # train fresh model
        print("training fresh model- none found...")
        gpt2.finetune(sess,
                      dataset=file_name,
                      model_name=model_name,
                      steps=200,
                      restore_from='fresh',
                      run_name=topic,
                      print_every=1)
    else:
        # update existing model
        print("updating existing model with short run on new tweets...")
        gpt2.finetune(sess,
                      dataset=file_name,
                      model_name=model_name,
                      run_name=topic,
                      steps=100,
                      restore_from='latest',
                      print_every=1)
    # generate tweet
    print("beginning to generate tweets...")
    gpt2.generate_to_file(sess,
                          length=400,
                          destination_path='../data/generated_tweets.txt',
                          nsamples=10,
                          run_name=topic,
                          prefix=topic)
    print('done generating tweets... ')
    # reset the session to prevent errors on loop
    gpt2.reset_session(sess=sess)
    # return 1 tweet
    with open('../data/generated_tweets.txt', 'r') as f:
        texts = f.read().split('====================')
    tweets = []
    for text in texts:
        # by just taking the first tweet, we're sure we have the seed text
        tweeters = text.split(' || ')
        for tweet in tweeters:
            if topic in tweet:  # ensure it contains the topic string
                tweet = tweet.split(" ")
                # remove links
                tweet = " ".join(
                    word for word in tweet if not has_prefix(word))
                # ensure it's not just the topic word only
                if len(tweet) > len(topic)+4 & len(tweet) <= 280:
                    tweets.append(tweet)
            else:
                continue
    #print("Potential tweets:\n"+ " \n\n ".join(tweets))
    tweet = choice(tweets)
    if len(tweet) > 280:
        tweet = tweet[:280]
    return tweet
    lines = []

    for i in range(0, height):
        lines.append('')

    return lines

sess = None

for ii in range(0, generate_count):
    print(ii)
    
    if not sess:
        sess = gpt2.start_tf_sess()
    else:
        sess = gpt2.reset_session(sess)
    
    gpt2.load_gpt2(sess, run_name=run_name)

    lines = blankLines()
    prefix = ''
    hasColor = False

    while True:
        text = gpt2.generate(sess, run_name=run_name, prefix=prefix, temperature=temperature, return_as_list=True)[0]

        print('\n\noutput:')
        print(text)

        newLines = text.split('\n')
Пример #14
0
def function_generate(hparams, saved_dir):
    sess = gpt2.start_tf_sess()
    sess = gpt2.reset_session(sess)

    # determine the generate model to use
    if hparams.use_nisl_model == 1:
        info_print('The NISL model is selected.')
        generate_model_dir = '/root/src/generate_model/models'
        generate_model_name = 'nisl_model'
    else:
        # model check
        info_print('The new model is selected.')
        generate_model_dir = hparams.finetuned_model_dir
        generate_model_name = hparams.finetuned_model_name
        if not os.path.exists(
                os.path.join(
                    os.path.join(generate_model_dir, generate_model_name),
                    'checkpoint')):
            raise FileNotFoundError(
                "The specified model doesn't exist, please finetune first or set 'use_nisl_model=1'"
            )
    time.sleep(1)

    info_print(
        "Generating JS test program (approx 15 minutes with gpus when nsamples=512 - including model load time)...\n"
    )
    gpt2.load_gpt2(sess,
                   model_dir=generate_model_dir,
                   model_name=generate_model_name,
                   multi_gpu=hparams.multi_gpu)

    assert hparams.batch_size != 0, "'batch_size' cannot be 0!"
    batches = int(math.ceil(hparams.nsamples / hparams.batch_size))
    remainder = hparams.nsamples % hparams.batch_size

    all_functions = []

    for idx in range(batches):
        try:
            texts = gpt2.generate(sess,
                                  model_dir=generate_model_dir,
                                  model_name=generate_model_name,
                                  nsamples=hparams.batch_size,
                                  batch_size=hparams.batch_size,
                                  prefix=hparams.generate_prefix,
                                  top_p=hparams.top_p,
                                  top_k=hparams.top_k,
                                  temperature=hparams.temperature,
                                  include_prefix=True,
                                  return_as_list=True)

            # when the last batch, intercepted by the remainder
            if idx == batches - 1:
                if remainder != 0:
                    texts = texts[:remainder]

            for text in texts:
                functions = text.split(hparams.generate_prefix)[1:]

                # get rid of the last one to prevent syntax errors
                if len(functions) >= 2:
                    functions = functions[:-1]

                all_functions += functions

                # print
                for function in functions:
                    print(function)
                    print('=' * 50)

            info_print(f'Generating {idx+1}/{batches}.')

        except:
            continue

    # formatting
    all_functions = [i.strip() + '\n' for i in all_functions]

    # save all generated functions by gpt2 to a new file
    if not os.path.exists(saved_dir):
        os.makedirs(saved_dir)
    for idx, function in enumerate(all_functions, start=1):
        with open(os.path.join(saved_dir, f'{idx}.js'), 'w',
                  encoding='utf-8') as f:
            f.write(function)

    return all_functions
Пример #15
0
 def reset_generator(self):
     self.app.logger.info('resetting %s session', self.genre)
     self.sess = gpt2.reset_session(self.sess, threads=1)
     gpt2.load_gpt2(self.sess, checkpoint_dir=self.checkpoint_dir)
Пример #16
0
question_sentiment = "like"  #default sentiment is to ask if you like something

model_name = '124M'
run_name = "run_10"

sess2 = gpt2.start_tf_sess(threads=8)
global graph2
graph2 = tf.compat.v1.get_default_graph()

with graph2.as_default():
    #tf.compat.v1.get_variable_scope().reuse_variables()
    #with tf.compat.v1.variable_scope("m2"):
    gpt2.load_gpt2(sess2, run_name="run_10_2", scope="m2")

sess = gpt2.start_tf_sess(threads=8)
sess = gpt2.reset_session(sess, threads=8)
global graph
graph = tf.compat.v1.get_default_graph()

with graph.as_default():

    gpt2.load_gpt2(sess, run_name=run_name)

sess3 = gpt2.start_tf_sess(threads=8)
sess3 = gpt2.reset_session(sess3, threads=8)
global graph3
graph3 = tf.compat.v1.get_default_graph()

with graph3.as_default():

    gpt2.load_gpt2(sess3, run_name="run_10_3", scope="m3")
Пример #17
0
def generate_trending_tweet():
    # pick a topic
    trending = get_trending()
    topic = choice(trending)
    print("generating tweets on topic: " + topic)
    # fetch tweets on topic
    file_name = "../data/" + topic + ".txt"
    topical_tweets = get_topic_tweet(topic, 1000)
    tweet_string = " || ".join(topical_tweets)
    with open(file_name, "w") as f:
        f.write(tweet_string)
    # train a model on new tweets
    sess = gpt2.start_tf_sess()
    if not os.path.exists("checkpoint/" + topic):
        gpt2.finetune(
            sess,
            dataset=file_name,
            model_name=model_name,
            steps=2,
            restore_from="fresh",
            run_name=topic,
            print_every=1,
        )
    else:
        gpt2.finetune(
            sess,
            dataset=file_name,
            model_name=model_name,
            steps=1,
            restore_from="latest",
            run_name=topic,
            print_every=1,
        )
    # generate text with the new model
    gpt2.generate_to_file(
        sess,
        length=400,
        destination_path="../data/generated_tweets.txt",
        nsamples=5,
        run_name=topic,
        prefix=topic,
    )
    gpt2.reset_session(sess)
    # filter and return 1 valid tweet from the gerated text
    with open("../data/generated_tweets.txt", "r") as f:
        texts = f.read().split("====================")
    tweets = []
    for text in texts:
        tweeters = text.split(" || ")
        for tweet in tweeters:
            if topic in tweet:
                tweet = tweet.split(" ")
                tweet = " ".join(word for word in tweet
                                 if not filter_links(word))
                if len(tweet) > len(topic) + 4:
                    tweets.append(tweet)
            else:
                continue
    tweet = choice(tweets)
    if len(tweet) > 280:
        tweet = tweet[:280]
    return tweet