Exemplo n.º 1
0
def gpt2_finetune(hparams):
    info_print("Model finetuning, please wait. (Press Ctrl+C to exit early)")
    sess = gpt2.start_tf_sess()

    # input check
    if not os.path.exists(
            os.path.join(hparams.gpt2_model_dir, hparams.gpt2_model_name)):
        raise FileNotFoundError(
            "The specified gpt2 pretrained model doesn't exist, please restore the default params."
        )

    # clear checkpoint dir
    model_path = os.path.join(hparams.finetuned_model_dir,
                              hparams.finetuned_model_name)
    if os.path.exists(model_path):
        shutil.rmtree(model_path)

    gpt2.finetune(sess=sess,
                  dataset=hparams.data_path,
                  model_dir=hparams.gpt2_model_dir,
                  model_name=hparams.gpt2_model_name,
                  checkpoint_dir=hparams.finetuned_model_dir,
                  run_name=hparams.finetuned_model_name,
                  multi_gpu=hparams.multi_gpu,
                  steps=hparams.steps)
Exemplo n.º 2
0
    def finetune(self, corpus, return_text=True):
        """ Returns generated text sample

        Parameters
        ----------
        arg: corpus (object)
            - desc: Custom dataset text file

        arg: return_text (bool)
            - default: True
            - desc: Toggles whether to return custom-generated text in an array after fine-tuning

        Returns:
            Generated string in an array
        """
        sess = gpt2.start_tf_sess()
        gpt2.finetune(sess,
                corpus,
                model_name=self.model_name,
                steps=1000)     # steps is max number of training steps

        if return_text:
            text = gpt2.generate(sess, return_as_list=True)
            return text
        else:
            gpt2.generate(sess)	
  def fit(self,
          input_path,
          reset = True,
          overwrite = False,
          num_steps = 1000,
          batch_size = 1,
          print_every = 10,
          sample_every = 200,
          save_every = 300,
          restore_from = 'fresh',
          run_name = 'reddit_comment_generator'):
    if reset:
      tf.reset_default_graph()
      self.tf_sess = gpt2.start_tf_sess()

    if overwrite and restore_from != 'latest':
      restore_from = 'latest'

    # Finetuning the model on new data
    gpt2.finetune(self.tf_sess,
                  dataset = input_path,
                  batch_size = batch_size,
                  model_name = self.model_type,
                  steps = num_steps,
                  restore_from = restore_from,
                  run_name = run_name,
                  print_every = print_every,
                  sample_every = sample_every,
                  save_every = save_every)
Exemplo n.º 4
0
def finetune(
    model_name: str,
    text_path: str,
    num_steps: int,
    sample_length: int,
    save_every: Optional[int],
) -> None:

    # Download the model if it is not present
    if not os.path.isdir(os.path.join("models", model_name)):
        print(f"Downloading {model_name} model...")
        gpt2.download_gpt2(model_name=model_name)

    sess = gpt2.start_tf_sess()

    if save_every is None:
        save_every = int(num_steps / 4)

    gpt2.finetune(
        sess,
        text_path,
        model_name=model_name,
        steps=num_steps,
        sample_length=sample_length,
        save_every=save_every,
    )  # steps is max number of training steps

    gpt2.generate(sess)
Exemplo n.º 5
0
def main():
    args = parse_args()
    if not args.file:
        logger.error("No file entered. Use -f flag.")
        exit()
    filename = Path(args.file).stem

    logger.debug("Download model")
    gpt2.download_gpt2()

    logger.debug("Starting GPT-2 session")
    sess = gpt2.start_tf_sess()
    logger.debug("Finetuning model")
    gpt2.finetune(sess, args.file, steps=args.iteration)

    Path("Exports").mkdir(parent=False, exist_ok=True)

    logger.debug("Generating text")
    while True:
        generated_text = gpt2.generate(sess,
                                       return_as_list=True,
                                       temperature=args.temperature)[0]
        with open(f"Exports/{filename}_{args.temperature}_gpt2simple.txt",
                  "a") as f:
            test_hour = datetime.datetime.now().strftime("%Y/%m/%d %H:%M")
            f.write(f"{test_hour}\n")
            for i in generated_text:
                f.write(f"{i}\n")
    logger.info("Runtime : %.2f seconds" % (time.time() - temps_debut))
Exemplo n.º 6
0
def train(input_file):

    if os.path.exists('models/temp'):
        shutil.rmtree('models/temp')
    
    if os.path.exists('models/124M'):
        pass
    else:
        download()

    sess = gpt2.start_tf_sess()
   
    model_name = '124M'
    model_dir = 'models/'
    training_dir = 'src/training_data/'
    file_name = input_file.split('.')[0]

    gpt2.finetune(sess,
        training_dir+input_file,
        model_name=model_name,
        checkpoint_dir=model_dir+'temp/',
        run_name='',
        steps=1)
    
    gpt2.reset_session(sess)
    
    if os.path.exists('models/latest'):
        shutil.rmtree('models/latest')
    shutil.copytree('models/temp','models/latest')
    # shutil.rmtree('models/temp')
Exemplo n.º 7
0
    def prepare_fine_tuning(self, file_name: str):
        """
        prepare_fine_tuning : Personnalise et regle le modèle pour l'entrainer sur notre dataset.
        
        Args:
            file_name (str): Nom du fichier d'entrée.
        """
        if not os.path.isdir(os.path.join("models", self.model_name)):
            print(f"Downloading {self.model_name} model...")
            gpt2.download_gpt2(
                model_name=self.model_name
            )  # model is saved into current directory under /models/124M/

        sess = gpt2.start_tf_sess()

        gpt2.finetune(
            sess,
            dataset=file_name,
            model_name=self.model_name,
            steps=1000,
            restore_from="fresh",
            run_name=self.run_name,
            print_every=10,
            sample_every=200,
            save_every=500,
        )
Exemplo n.º 8
0
 def fine_tune(self, steps=100):
     self.download_tuning()
     self.log_function(f"* Fine tuning towards {self.tuning_description}...")
     brain = gpt2.start_tf_sess()
     gpt2.finetune(
         brain, self.tuning_path,
         model_name=self.model_name, steps=steps, run_name=self.run_name, overwrite=True
     )
     gpt2.reset_session(brain)
def fine_tune_gpt2():
    MODEL_NAME = '355M'
    TRAINING_DATA_PATH = '../data/liar/gpt2_training_data.txt'
    session = gpt2.start_tf_sess()
    gpt2.finetune(session,
                  TRAINING_DATA_PATH,
                  model_name=MODEL_NAME,
                  steps=1000,
                  run_name='simple2')
Exemplo n.º 10
0
 def finetune(self, url, steps):
     self.get_data(url)
     sess = gpt2.start_tf_sess()
     gpt2.finetune(
         sess,
         'data.txt',
         model_name=self.base_model,
         run_name=self.name,
         steps=steps,
     )
Exemplo n.º 11
0
 def trainGenerator(self, num_steps=50):
     #train the generator on a file and use our preset parameters
     session = gpt2.start_tf_sess()
     gpt2.finetune(session,
                   dataset=self.source,
                   model_name='124M',
                   steps=num_steps,
                   restore_from='fresh',
                   run_name='run1',
                   sample_every=200,
                   save_every=500,
                   print_every=10)
Exemplo n.º 12
0
def main():
    model_name = "117M"

    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess, ['bestiary.json', 'dndbeyond_scrape/dndbeyond.json'],
                  model_name=model_name,
                  run_name='dnd7',
                  steps=5000,
                  dataset_probs=[0.7,
                                 0.3])  # steps is max number of training steps

    gpt2.generate(sess)
Exemplo n.º 13
0
def run(path_params: str):
    # Input
    params = load_yaml(path_params)
    params_data = params['data']
    params_ml = params['ml']
    params_gen = params['generation']
    logging.debug(f"Params: {params}")

    # Init
    timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S')
    model_dir = params_ml['save_path']
    model_name = params_ml['model_size']  # Used from GPT2 to check if model is '117M' or '124M'
    run_name = f"02_gp2simple_{params_ml['model_size']}_{timestamp}"

    gpt2.download_gpt2(model_name=model_name, model_dir=model_dir)

    # Fine-tune
    sess = gpt2.start_tf_sess()

    gpt2.finetune(sess,
                  model_dir=model_dir,
                  model_name=model_name,
                  checkpoint_dir=model_dir,
                  run_name=run_name,
                  dataset=params_data['file_path'],
                  steps=params_ml['steps'],
                  learning_rate=params_ml['learning_rate'],
                  restore_from=params_ml['restore_from'],
                  print_every=params_ml['print_every'],
                  sample_every=params_ml['sample_every'],
                  save_every=params_ml['save_every'])

    # Generate
    text_generated = gpt2.generate(sess,
                                   run_name=run_name,
                                   model_dir=model_dir,
                                   model_name=model_name,
                                   prefix=params_gen['prefix'],
                                   temperature=params_gen['temperature'],
                                   return_as_list=True)

    # Output persist
    model_params_path = join(model_dir, 'gpt2_simple_params.yaml')
    with open(model_params_path, 'w') as f:
        yaml.dump(params, f, default_flow_style=False)
    logging.debug(f"Model params saved at {model_params_path}")

    makedirs(join(model_dir, run_name, 'text_generated'), exist_ok=True)
    text_generated_path = join(model_dir, run_name, 'text_generated', f'{timestamp}.txt')
    open(text_generated_path, 'w').writelines('\n'.join(text_generated))

    logging.debug(f"Text generated saved at {text_generated_path} - {len(text_generated)} total lines")
Exemplo n.º 14
0
def train_model(model_name="124M",
                data_path='C:\\\\Users\\pogop\\OneDrive\\Desktop\\NKJ.txt',
                steps=600,
                run_name='run1'):
    gpt2.download_gpt2(
        model_name=model_name
    )  # model is saved into current directory under /models/124M/
    gpt2.finetune(sess,
                  data_path,
                  model_name=model_name,
                  steps=steps,
                  run_name=run_name)  # steps is max number of training steps
    return
Exemplo n.º 15
0
def main(steps=200):
    model_name = "774M"
    if not os.path.isdir(os.path.join('models', model_name)):
        print(f"Downloading {model_name} model...")
        gpt2.download_gpt2(
            model_name=model_name
        )  # model is saved into current directory under /models/124M/

    file_name = "./datasets/gpt2_dataset.txt"

    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess, file_name, model_name=model_name,
                  steps=steps)  # steps is max number of training steps
Exemplo n.º 16
0
    def finetune(self, corpus, steps=1000, return_text=True):
        sess = gpt2.start_tf_sess()
        gpt2.finetune(sess,
                      corpus,
                      model_name=self.model_name,
                      steps=steps,
                      multi_gpu=True)  # steps is max number of training steps

        if return_text:
            text = gpt2.generate(sess, return_as_list=True)
            return text
        else:
            gpt2.generate(sess)
Exemplo n.º 17
0
def main():

    ##models:
    #model_name = "124M"
    #model_name = "355M"
    #model_name = "774M"
    #model_name = "1558M"

    model_name = "355M"
    file_name = "champ.txt"

    if not os.path.isdir(os.path.join("models", model_name)):
        print(f"Downloading {model_name} model...")
        gpt2.download_gpt2(
            model_name=model_name
        )  # model is saved into current directory under ./models/124M/

    if not os.path.isfile(file_name):
        print("please provide a filename..")
        exit()

    #GPU config
    config = tf.compat.v1.ConfigProto()
    config.gpu_options.allow_growth = True
    config.gpu_options.per_process_gpu_memory_fraction = 0.77
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    sess = tf.compat.v1.Session(config=config)

    #sess = gpt2.start_tf_sess() #old for CPU

    print('\n+++ Train model (y)? +++')
    train = input()
    if train == "" or train == "y" or train == 'yes':
        print('---> training model...\n')
        gpt2.finetune(
            sess, file_name, model_name=model_name,
            steps=100)  # steps is max number of training steps - default: 1000
    else:
        print('---> not training model...\n')
    # gpt2.generate(sess) #generate session in file

    ## generate text to file
    gen_file = 'gpt2_gentext_{:%Y%m%d_%H%M%S}.txt'.format(
        datetime.datetime.now(datetime.timezone.utc))
    gpt2.generate_to_file(sess,
                          destination_path=gen_file,
                          length=10000,
                          temperature=0.7,
                          nsamples=1,
                          batch_size=1)
Exemplo n.º 18
0
def train_GPT():
    model_name = "124M"
    if not os.path.isdir(os.path.join("models", model_name)):
        print(f"Downloading {model_name} model...")
        gpt2.download_gpt2(
            model_name=model_name
        )  # model is saved into current directory under /models/124M/

    file_name = "corpus.txt"
    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess, file_name, model_name=model_name,
                  steps=1)  # steps is max number of training steps

    return sess
Exemplo n.º 19
0
def fine_tune(inFile):
    model_name = "117M"
    gpt2.download_gpt2(
        model_name=model_name
    )  # model is saved into current directory under /models/117M/

    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess,
                  inFile,
                  model_name=model_name,
                  steps=1000,
                  save_every=100)  # steps is max number of training steps

    gpt2.generate(sess)
Exemplo n.º 20
0
def finetune():
    model_name = "124M"
    gpt2.download_gpt2(
        model_name=model_name
    )  # model is saved into current directory under /models/124M/

    session = gpt2.start_tf_sess()
    gpt2.finetune(session,
                  'clnn.txt',
                  model_name=model_name,
                  steps=arguments.training_iterations
                  )  # steps is max number of training steps

    return session
Exemplo n.º 21
0
    def generate_models(self, model_name: str, data_path: str) -> None:
        """ Generate new models given a model name and data source path. Data source path being a pre-existing gpt2 learning model """
        print(os.path.join("models", model_name))
        if not os.path.isdir(os.path.join("models", model_name)):
            print(f"Downloading {model_name} model...")
            gpt2.download_gpt2(model_name=model_name)

        gpt2.finetune(self.sess,
                      data_path,
                      model_name=model_name,
                      batch_size=1,
                      sample_every=100,
                      sample_length=100,
                      save_every=100)
        gpt2.generate(self.sess)
Exemplo n.º 22
0
def fine_tune(args, model_name='124M'):
    print(
        f'Run fine-tuning for run {args.run_name} using GPT2 model {model_name}...'
    )
    if not os.path.isdir(os.path.join("models", model_name)):
        log.info(f"Downloading {model_name} model...")
        gpt2.download_gpt2(model_name=model_name)
    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess,
                  args.data_path,
                  model_name=model_name,
                  run_name=args.run_name,
                  steps=-1,
                  sample_every=10,
                  save_every=10)
Exemplo n.º 23
0
def gpt_2():
    model_name = "117M"
    gpt2.download_gpt2(
        model_name=model_name
    )  # model is saved into current directory under /models/117M/

    sess = gpt2.start_tf_sess()
    gpt2.finetune(sess,
                  'titles.txt',
                  model_name=model_name,
                  steps=1000,
                  save_every=200,
                  sample_every=25)  # steps is max number of training steps

    gpt2.generate(sess)
Exemplo n.º 24
0
    def generate_models(self, name, data):
        model_name = name
        if not os.path.isdir(os.path.join("models", model_name)):
            print(f"Downloading {model_name} model...")
            gpt2.download_gpt2(model_name=model_name)

        data_path = data
        gpt2.finetune(
            self.sess,
            data_path,
            model_name=model_name,
            batch_size=1,
            sample_every=100,
            sample_length=100,
        )
        gpt2.generate(self.sess)
Exemplo n.º 25
0
def generator(data_1, data_2, data_3):

    game_name = data_1
    epoch = data_2
    model = data_3
    db = Database.readBlobData(game_name)
    file_name = "/content/app/data/data.txt"

    g = pyfiglet.figlet_format("Generating world...", font="slant")
    print(Fore.BLACK + Style.DIM)

    print(Fore.GREEN)
    print(Style.BRIGHT + g)
    print(Fore.BLACK + Style.DIM)
    sess = gpt2.start_tf_sess()

    sample = gpt2.finetune(sess,
                           dataset=file_name,
                           model_name=data_3,
                           steps=epoch,
                           restore_from='fresh',
                           run_name="run1",
                           print_every=1,
                           sample_every=epoch,
                           save_every=epoch)
    return sample
Exemplo n.º 26
0
def train_model(channel: str):
    file_name = 'data/%s.txt' % channel

    if not os.path.exists('model'):
        gpt2.download_gpt2(model_name=model)

    sess = gpt2.start_tf_sess()

    gpt2.finetune(sess,
                  dataset=file_name,
                  model_name=model,
                  steps=steps,
                  restore_from='latest',
                  run_name='run1',
                  print_every=100,
                  sample_every=2000,
                  save_every=500)
Exemplo n.º 27
0
def train_gpt2_model(fileName):
  model_name = "124M"
  if not os.path.isdir(os.path.join("models", model_name)):
    print(f"Downloading {model_name} model...")
    gpt2.download_gpt2(model_name=model_name)   # model is saved into current directory under /models/124M/
    

  sess = gpt2.start_tf_sess()
  gpt2.finetune(sess,
              dataset=fileName,
              model_name=model_name,
              steps=1000,
              restore_from='fresh',
              run_name='run'+fileName,
              print_every=50,
              sample_every=200,
              save_every=500)
Exemplo n.º 28
0
def train(model):
    learning_rate = 0.0001
    optimizer = 'adam'
    batch_size = 1
    model_name = model 
    sess = gpt2.start_tf_sess()

    gpt2.finetune(sess,
                  'data/lyrics.txt',
                  model_name=model_name,
                  sample_every=50,
                  save_every=50,
                  print_every=10,
                  learning_rate=learning_rate,
                  batch_size=batch_size,
                  restore_from='latest',
                  steps=500)   # max number of training steps
    return sess
Exemplo n.º 29
0
def generate_text_gpt2(input_text_file_path, save_parent_path, save_folder,
                       model_name):
    """
    generate text by gpt 2

    """
    sess = gpt2.start_tf_sess()
    # run each type for 10 times
    gpt2.finetune(sess, input_text_file_path, model_name=model_name,
                  steps=15)  # steps is max number of training steps

    gpt2.load_gpt2(sess)

    for i in range(200):
        single_text = gpt2.generate(sess, return_as_list=True)[0]
        fw = open(save_parent_path + save_folder + f"{i}.txt", "w+")
        fw.write(single_text)
        fw.close()
Exemplo n.º 30
0
def train(
    tf_session: TfSession,
    file_path_input: str = DEFAULT_FILE_PATH_INPUT,
    training_steps: int = DEFAULT_TRAINING_STEPS,
) -> None:
    if not os.path.isdir(os.path.join('models', MODEL_NAME)):
        print(f'Downloading { MODEL_NAME } model...')
        gpt2.download_gpt2(model_name=MODEL_NAME)

    gpt2.finetune(
        tf_session,
        file_path_input,
        model_name=MODEL_NAME,
        steps=training_steps,
        overwrite=True,
    )

    return tf_session