model_name='355M', steps=1000, restore_from='fresh', run_name='run1', print_every=10, sample_every=200, save_every=500 ) """After the model is trained, you can copy the checkpoint folder to Drive. do everything with Drive, its messy otherwise """ gpt2.copy_checkpoint_to_gdrive(run_name='run1') """## Generate Text From The Trained Model After you've trained the model, `generate` generates a single text from the loaded model. """ gpt2.generate(sess, run_name='run1') """ You can generate multiple texts at a time by specifing `nsamples`. Unique to GPT-2, you can pass a `batch_size` to generate multiple samples in parallel, giving a massive speedup (in Colaboratory, set a maximum of 20 for `batch_size`). * **`length`**: Number of tokens to generate (default 1023, the maximum) * **`temperature`**: The higher the temperature, the crazier the text (default 0.7, recommended to keep between 0.7 and 1.0) * **`top_k`**: Limits the generated guesses to the top *k* guesses (default 0 which disables the behavior; if the generated output is super crazy, you may want to set `top_k=40`)
!pip install gpt_2_simple -t . --no-deps --upgrade !pip install toposort import gpt_2_simple as gpt2 from google.colab import drive !cp /content/drive/MyDrive/Echidna/gpt_2_simple/gpt_2.py /content/gpt_2_simple/gpt_2.py model_name = "355M" gpt2.download_gpt2(model_name=model_name) gpt2.mount_gdrive() root_dir = '/content/drive/MyDrive/Echidna' sess = gpt2.start_tf_sess() gpt2.finetune(sess, root_dir + '/data/data.json', steps=4000, model_name=model_name, print_every=10, sample_every=200, save_every=500, run_name='echidna', restore_from='fresh' ) gpt2.copy_checkpoint_to_gdrive(run_name='echidna')
file_name = "EEG-concentrating.csv" # add your file here sess = gpt2.start_tf_sess() gpt2.finetune( sess, dataset=file_name, steps=1000, #update steps as necessary restore_from='latest', # to continue from previous run_name='eeg-concentrating-1', print_every=1, sample_every=100, save_every=500, overwrite=True) """ #If using Colab: gpt2.copy_checkpoint_to_gdrive(run_name='classical-piano-1') """ # Generation loop: gpt2.copy_checkpoint_from_gdrive(run_name='eeg-concentrating-1') sess = gpt2.start_tf_sess() gpt2.load_gpt2(sess, run_name='eeg-concentrating-1') #Examples of generation: print("Generating without previous data as input") for x in range(1, 10): gpt2.generate(sess, run_name='eeg-concentrating-1', length=100)
sess = gpt2.start_tf_sess() file_name = "rightcontent.txt" gpt2.finetune(sess, dataset=file_name, model_name='124M', steps=8000, restore_from='fresh', run_name='run_right_7000', print_every=10, sample_every=200, save_every=500, ) gpt2.copy_checkpoint_to_gdrive(run_name='run_right_7000') """You're done! Feel free to go to the **Generate Text From The Trained Model** section to generate text based on your retrained model. ## Load a Trained Model Checkpoint Running the next cell will copy the `.rar` checkpoint file from your Google Drive into the Colaboratory VM. """ gpt2.copy_checkpoint_from_gdrive(run_name='run_right_7000') sess = gpt2.start_tf_sess() gpt2.load_gpt2(sess, run_name='run_right_7000') """## Generate Text From The Trained Model
# Or any other text file to be trained sess = gpt2.start_tf_sess() gpt2.finetune(sess, dataset=fantasy, model_name='124M', steps=100, restore_from='fresh', run_name='fantasy3', print_every=2, sample_every=5, save_every=10 ) #Stop to see if it's trained enough gpt2.copy_checkpoint_to_gdrive(run_name='fantasy3') input_text="I was riding a cycle when" #this is where we enter the text to get suggestions #Other hyperparameters length=20 top_p=0.5 temperature=0.9 top_k=0.8 #To be used while running gpt2.generate(sess, prefix=input_text, length=length, temperature=temperature, top_p=top_p, top_k=top_k,