Exemplo n.º 1
0
def test_t5():
    train_data = [
        ["convert", "one", "1"],
        ["convert", "two", "2"],
    ]

    train_df = pd.DataFrame(train_data,
                            columns=["prefix", "input_text", "target_text"])

    eval_data = [
        ["convert", "three", "3"],
        ["convert", "four", "4"],
    ]

    eval_df = pd.DataFrame(eval_data,
                           columns=["prefix", "input_text", "target_text"])

    eval_df = train_df.copy()

    model_args = {
        "reprocess_input_data": True,
        "overwrite_output_dir": True,
        "max_seq_length": 10,
        "train_batch_size": 2,
        "num_train_epochs": 2,
        "save_model_every_epoch": False,
        "max_length": 20,
        "num_beams": 1,
    }

    # Create T5 Model
    model = T5Model("t5", "t5-base", args=model_args, use_cuda=False)

    # Train T5 Model on new task
    model.train_model(train_df)

    # Evaluate T5 Model on new task
    model.eval_model(eval_df)

    # Predict with trained T5 model
    model.predict(["convert: four", "convert: five"])

    # Load test
    model = T5Model("t5", "outputs", args=model_args, use_cuda=False)

    # Evaluate T5 Model on new task
    model.eval_model(eval_df)

    # Predict with trained T5 model
    model.predict(["convert: four", "convert: five"])
Exemplo n.º 2
0
def create_model(model_class, model_type, model_name, num_labels, weight, args,
                 use_cuda, cuda_device, **kwargs):
    if model_class == "ClassificationModel":
        return ClassificationModel(model_type, model_name, num_labels, weight,
                                   args, use_cuda, cuda_device, **kwargs)
    elif model_class == "MultiLabelClassificationModel":
        return MultiLabelClassificationModel(model_type, model_name,
                                             num_labels, weight, args,
                                             use_cuda, cuda_device, **kwargs)
    elif model_class == "QuestionAnsweringModel":
        return QuestionAnsweringModel(model_type, model_name, args, use_cuda,
                                      cuda_device, **kwargs)
    elif model_class == "NERModel":
        return NERModel(model_type,
                        model_name,
                        args=args,
                        use_cuda=use_cuda,
                        cuda_device=cuda_device,
                        **kwargs)
    elif model_class == "T5Model":
        args = T5Args()
        args.use_multiprocessed_decoding = False
        return T5Model(model_type,
                       model_name,
                       args=args,
                       use_cuda=use_cuda,
                       cuda_device=cuda_device,
                       **kwargs)
    else:
        raise ValueError(
            "{} is either invalid or not yet implemented.".format(model_class))
Exemplo n.º 3
0
 def __init__(self,
              model_path: str = config.DEFAULT_MODEL_PATH,
              model_architecture: str = config.MODEL_ARCHITECTURE,
              use_cuda: bool = config.GPU):
     '''
     Constructs all the necessary attributes for the MT5_Translator object.
     Parameters
     ----------
         model_path : str
             path to the mt5_translator model
         model_architecture : str
             model architecture (mt5, t5 ...)
         use_cuda : bool
             whether to use CUDA or not (if available)
     '''
     logging.info("Loading model...")
     self.model_path = model_path
     self.use_cuda = use_cuda
     self.device = True \
         if torch.cuda.is_available() and self.use_cuda else False
     self.model_args = T5Args()
     self.model_args.max_length = 512
     self.model_args.length_penalty = 1
     self.model_args.num_beams = 10
     self.model = T5Model("mt5",
                          self.model_path,
                          args=self.model_args,
                          use_cuda=self.device)
     logging.info(f"Use CUDA: {self.device}")
     logging.info(f"Num GPUs Available: {torch.cuda.device_count()}")
     logging.info(f"Model loaded")
Exemplo n.º 4
0
def paraphrase(text, cuda=False):
    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.ERROR)

    model = T5Model(model_type="t5", model_name="outputs", use_cuda=cuda)
    output = []
    # predicts paraphrases for every paragraph
    for paragraph in text:
        # add prefix
        predict_to = ["paraphrase: " + paragraph]

        preds = model.predict(predict_to)

        print("---------------------------------------------------------")

        print("Predictions >>>")
        result = []
        # appends damerau distance to every prediction and compares it with original
        for pred in preds[0]:
            result.append([pred, damerau.distance(paragraph, pred)])
            # result.append([pred, jarowinkler.distance(paragraph, pred)])
            print(pred)
            print(damerau.distance(paragraph, pred))
        # picks the most diversified prediction
        print("---------------------------------------------------------")
        best_pred = max(result, key=lambda x: x[1])[0]
        output.append(best_pred)
        print(best_pred)
    # outputs the total damerau distance and the paraphrased text
    print(*output, sep="\n")
    print("Diversified by: ", damerau.distance("".join(text), "".join(output)))
    return output
def get_model():
    model_args = {
        "reprocess_input_data": True,
        "overwrite_output_dir": True,
        "max_seq_length": 200,
        "train_batch_size": 16,
        "num_train_epochs": 10,
        "evaluate_during_training": True,
        "evaluate_during_training_steps": 500,
    }

    # Create T5 Model
    model = T5Model(model_name="t5-small",
                    model_type='t5',
                    args=model_args,
                    use_cuda=True)

    return model
def recommend(abstract: str):
    from simpletransformers.t5 import T5Model
    model_args = {
        "reprocess_input_data": True,
        "overwrite_output_dir": True,
        "max_seq_length": 256,
        "eval_batch_size": 128,
        "num_train_epochs": 1,
        "save_eval_checkpoints": False,
        "use_multiprocessing": False,
        "num_beams": None,
        "do_sample": True,
        "max_length": 50,
        "top_k": 50,
        "top_p": 0.95,
        "num_return_sequences": 3,
    }
    model = T5Model("t5","./checkpoint_15000_1", args=model_args,use_cuda=False)
    abss =["summarize: "+abstract]
    predicted_title = model.predict(abss)
    return predicted_title
Exemplo n.º 7
0
model_args = T5Args()
model_args.max_length = 50
model_args.length_penalty = 2.0
model_args.repetition_penalty = 2.0
model_args.num_beams = 5
model_args.early_stopping = True
#model_args.do_sample = True
#model_args.top_p = 0.3
model_args.num_return_sequences = 5


#model = T5Model("mt5", "persiannlp/mt5-base-parsinlu-opus-translation_fa_en", args=model_args)
model_name = "outputs_t5_small_full_2020/"
task = "xWant"
model = T5Model("t5", model_name, args=model_args)

print("predicting")
#print(model.predict(["xReact: Ali buys a book",
#                     "xReact: Ali fell on his knees"]))


# +
df = pd.read_csv("data/eval.tsv", sep="\t").astype(str)
# Prepare the data for testing
#df = df.groupby('input_text')['target_text'].apply(list)
def my_eval(df, prefix):
    df = df.groupby(['prefix','input_text'], as_index=False).agg({'target_text':lambda x: list(x)})
    truth_values = df.loc[df["prefix"] == prefix]["target_text"].tolist()
    input_values = df.loc[df["prefix"] == prefix]["input_text"].tolist()
    input_values = [prefix + ": " + str(input_text) for input_text in input_values]
Exemplo n.º 8
0
from simpletransformers.t5 import T5Model

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 128,
    "eval_batch_size": 16,
    "num_train_epochs": 1,
    "save_eval_checkpoints": False,
    "use_multiprocessing": False,
    # "silent": True,
    "num_beams": None,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

model = T5Model("outputs/best_model", args=model_args)

query = "ask_question: " + """
ANTIQUE CAST METAL 3 GLOBE CANDLABRA JADITE LAMP.

Stunning antique lamp with three candle style globes. Cast metal base with jadite green glass insert. Has been rewired with new braided cord. In excellent condition with only one chip (as pictured) on the edge of the glass insert. E9 69 on underside of metal base. Missing finial. New low wattage globes.
"""

preds = model.predict([query])

print(preds)
from simpletransformers.t5 import T5Model
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoints", "-ckpts", help="checkpoints for t5 base",type = str, default = './best_model')
parser.add_argument("--abstract", "-abs", help="abstract to generate title",type = str)
args = parser.parse_args()

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 256,
    "eval_batch_size": 128,
    "num_train_epochs": 1,
    "save_eval_checkpoints": False,
    "use_multiprocessing": False,
    "num_beams": None,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

with open(args.abstract) as f:
    data = f.read()

model = T5Model("t5",args.checkpoints, args=model_args,use_cuda=False)
abss =["summarize: "+data]
predicted_title = model.predict(abss)
print(predicted_title)
Exemplo n.º 10
0
    train_df = pd.read_csv("data/train_df_3.tsv", sep="\t").astype(str)
    eval_df = pd.read_csv("data/eval_df_3.tsv", sep="\t").astype(str)

    model_args = {
        "reprocess_input_data": True,
        "overwrite_output_dir": True,
        "max_seq_length": 128,
        "train_batch_size": 1,
        "num_train_epochs": 1,
        "save_eval_checkpoints": True,
        "save_steps": -1,
        "use_multiprocessing": False,
        # "silent": True,
        "evaluate_during_training": True,
        "evaluate_during_training_steps": 1500,
        "evaluate_during_training_verbose": True,
        "fp16": False,
        "wandb_project": "Question Generation with T5",
    }

    model = T5Model("mt5", "google/mt5-small", use_cuda=False, args=model_args)

    if hasattr(torch.cuda, 'empty_cache'):
        torch.cuda.empty_cache()

    try:
        model.train_model(train_df, eval_data=eval_df)
    except:
        if hasattr(torch.cuda, 'empty_cache'):
            torch.cuda.empty_cache()
Exemplo n.º 11
0
    "overwrite_output_dir": True,
    "max_seq_length": 256,
    "eval_batch_size": 128,
    "num_train_epochs": 1,
    "save_eval_checkpoints": False,
    "use_multiprocessing": False,
    "num_beams": None,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

model = T5Model("t5",
                "/content/drive/My Drive/outputs/best_model",
                args=model_args)

abstr = [
    "summarize: " +
    """Transfer learning, where a model is first pre-trained on a data-rich task before being finetuned  on a downstream task,
has emerged as a powerful technique in natural language processing (NLP).  
The effectiveness of transfer learning has given rise to a diversity of approaches, methodology, and practice. 
In this paper, we explore the landscape of transfer learning techniques for NLP by introducing a unified framework 
that converts all text-based language problems into a text-to-text format. 
Our systematic study compares pre-training objectives, architectures, unlabeled data sets, 
transfer approaches, and other factors on dozens of language understanding tasks. 
By combining the insights from our exploration with scale and our new Colossal Clean Crawled Corpus, we achieve state-of-the-art 
results on many benchmarks covering summarization, question answering, text classification, and more. 
To facilitate future work on transfer learning for NLP, we release our data set, pre-trained models, and code."""
]
Exemplo n.º 12
0
model_args = {
    "overwrite_output_dir": True,
    "max_seq_length": 196,
    "eval_batch_size": 32,
    "use_multiprocessing": False,
    "num_beams": None,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

# Load the trained model
model = T5Model("outputs", args=model_args)

# Load the evaluation data
df = pd.read_csv("data/train.tsv", sep="\t").astype(str)

# Prepare the data for testing
to_predict = [
    prefix + ": " + str(input_text) for prefix, input_text in zip(
        df["prefix"].tolist(), df["input_text"].tolist())
]
truth = df["target_text"].tolist()
tasks = df["prefix"].tolist()

# Get the model predictions
preds = model.predict(to_predict)
Exemplo n.º 13
0
import pandas as pd

from simpletransformers.t5 import T5Model

train_df = pd.read_csv("data/train_df.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("data/eval_df.tsv", sep="\t").astype(str)

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 16,
    "train_batch_size": 8,
    "num_train_epochs": 1,
    "save_eval_checkpoints": True,
    "save_steps": -1,
    "use_multiprocessing": False,
    "evaluate_during_training": True,
    "evaluate_during_training_steps": 15000,
    "evaluate_during_training_verbose": True,
    "fp16": False,
    "wandb_project": "Question Generation with T5",
}

model = T5Model("t5-small", args=model_args)

model.train_model(train_df, eval_data=eval_df)
Exemplo n.º 14
0
from simpletransformers.t5 import T5Model, T5Args

source = "english"
target = "spanish"

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = T5Args()
model_args.max_length = 512
model_args.min_length = 0
model_args.length_penalty = 1
model_args.num_beams = 10

model = T5Model("mt5", "outputs", args=model_args)

eval_df = pd.read_csv("data/eng-spa/eval.tsv", sep="\t").astype(str)

target_truth = [
    eval_df.loc[eval_df["prefix"] == f"translate {source} to {target}"]
    ["target_text"].tolist()
]
to_target = eval_df.loc[
    eval_df["prefix"] ==
    f"translate {source} to {target}"]["input_text"].tolist()

# print(to_target[:2])
sentences = [
    "Me gusta tocar muchos instrumentos. Adoro la música",
    "I like to play many instruments. I love music"
Exemplo n.º 15
0
import pandas as pd
from simpletransformers.t5 import T5Model

train_df = pd.read_csv("data/train.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("data/eval.tsv", sep="\t").astype(str)

model_args = {
    "max_seq_length": 196,
    "train_batch_size": 16,
    "eval_batch_size": 64,
    "num_train_epochs": 1,
    "evaluate_during_training": True,
    "evaluate_during_training_steps": 15000,
    "evaluate_during_training_verbose": True,
    "use_multiprocessing": False,
    "fp16": False,
    "save_steps": -1,
    "save_eval_checkpoints": False,
    "save_model_every_epoch": False,
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "wandb_project": "T5 mixed tasks - Binary, Multi-Label, Regression",
}

model = T5Model("t5-base", args=model_args)

model.train_model(train_df, eval_data=eval_df)
Exemplo n.º 16
0
import logging
import sacrebleu
import pandas as pd
from simpletransformers.t5 import T5Model, T5Args

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

model_args = T5Args()
model_args.max_length = 512
model_args.length_penalty = 1
model_args.num_beams = 10

model = T5Model("mt5", "outputs_base", args=model_args)

eval_df = pd.read_csv("data/eval.tsv", sep="\t").astype(str)

sinhala_truth = [
    eval_df.loc[eval_df["prefix"] == "translate english to sinhala"]
    ["target_text"].tolist()
]
to_sinhala = eval_df.loc[eval_df["prefix"] == "translate english to sinhala"][
    "input_text"].tolist()

english_truth = [
    eval_df.loc[eval_df["prefix"] == "translate sinhala to english"]
    ["target_text"].tolist()
]
to_english = eval_df.loc[eval_df["prefix"] == "translate sinhala to english"][
    "input_text"].tolist()
Exemplo n.º 17
0
import pandas as pd

from simpletransformers.t5 import T5Model

train_df = pd.read_csv("data/train_df.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("data/eval_df.tsv", sep="\t").astype(str)

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 128,
    "train_batch_size": 8,
    "num_train_epochs": 1,
    "save_eval_checkpoints": True,
    "save_steps": -1,
    "use_multiprocessing": False,
    # "silent": True,
    "evaluate_during_training": True,
    "evaluate_during_training_steps": 15000,
    "evaluate_during_training_verbose": True,
    "fp16": False,
    "wandb_project": "Question Generation with T5",
}

model = T5Model("t5-large", args=model_args)

model.train_model(train_df, eval_data=eval_df)
Exemplo n.º 18
0
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 128,
    "eval_batch_size": 128,
    "num_train_epochs": 1,
    "save_eval_checkpoints": False,
    "use_multiprocessing": False,
    "num_beams": None,
    "do_sample": True,
    "max_length": 50,
    "top_k": 50,
    "top_p": 0.95,
    "num_return_sequences": 3,
}

model = T5Model("test_outputs_large/best_model", args=model_args)

df = pd.read_csv("data/eval_df.tsv", sep="\t").astype(str)
preds = model.predict([
    "ask_question: " + description
    for description in df["input_text"].tolist()
])

questions = df["target_text"].tolist()

with open("test_outputs_large/generated_questions.txt", "w") as f:
    for i, desc in enumerate(df["input_text"].tolist()):
        pprint(desc)
        pprint(preds[i])
        print()
def main():

    logging.basicConfig(level=logging.INFO)
    transformers_logger = logging.getLogger("transformers")
    transformers_logger.setLevel(logging.WARNING)

    parser = argparse.ArgumentParser()

    # Required parameters
    parser.add_argument(
        "--data_dir",
        default=None,
        type=str,
        required=True,
        help=
        "The input data dir. Should contain the source and target files for the task.",
    )
    parser.add_argument(
        "--model_type",
        default=None,
        type=str,
        required=True,
        help="Model type, choose from [seq2seq, T5]",
    )
    parser.add_argument(
        "--model_name_or_path",
        default=None,
        type=str,
        required=True,
        help=
        "Path to pretrained model or model identifier from huggingface.co/models",
    )

    # Other parameters
    parser.add_argument("--do_train",
                        action="store_true",
                        help="Whether to run training.")
    parser.add_argument("--do_eval",
                        action="store_true",
                        help="Whether to run eval on the valid set.")
    parser.add_argument("--do_predict",
                        action="store_true",
                        help="Whether to run prediction on the test set.")
    parser.add_argument("--init_model_weights",
                        action="store_true",
                        help="Whether to initialize the model weights")
    parser.add_argument("--overwrite_output_dir",
                        action="store_true",
                        help="Whether to overwrite on the existing output dir")
    parser.add_argument("--use_multiprocessed_decoding",
                        action="store_true",
                        help="Whether to use multiprocess when decoding")
    parser.add_argument(
        "--save_model_every_epoch",
        action="store_true",
        help="Whether to save model every epoch during training")
    parser.add_argument(
        "--predict_during_training",
        action="store_true",
        help="Whether to predict after each checkpoint-saving during training")
    parser.add_argument(
        "--evaluate_during_training",
        action="store_true",
        help="Whether to evaluate after each checkpoint-saving during training"
    )
    parser.add_argument(
        "--output_dir",
        default='output_dir/',
        type=str,
        help=
        "The output directory where the model predictions and checkpoints will be written.",
    )
    parser.add_argument(
        "--save_step",
        default=0,
        type=int,
        help="Save checkpoint every X updates steps.",
    )
    parser.add_argument(
        "--train_batch_size",
        default=16,
        type=int,
        help="Size of each train batch",
    )
    parser.add_argument(
        "--eval_batch_size",
        default=16,
        type=int,
        help="Size of each eval/predict batch",
    )
    parser.add_argument(
        "--gradient_accumulation_steps",
        default=1,
        type=int,
        help="gradient accumulation steps",
    )
    parser.add_argument(
        "--learning_rate",
        default=4e-5,
        type=float,
        help="learning rate",
    )
    parser.add_argument(
        "--num_train_epochs",
        default=100,
        type=int,
        help="Number of train epochs",
    )
    parser.add_argument(
        "--max_seq_length",
        default=None,
        type=int,
        help="Max input seq length",
    )
    parser.add_argument(
        "--max_length",
        default=None,
        type=int,
        help="Max output seq length",
    )
    parser.add_argument(
        "--prediction_dir",
        default=None,
        type=str,
        help=
        "The output directory where the predictions results will be written.",
    )
    parser.add_argument(
        "--prediction_suffix",
        default=None,
        type=str,
        help=" The supplementary suffix of prediction results name.",
    )
    parser.add_argument(
        "--mask_ratio",
        default=0.0,
        type=float,
        help="the proportion of masked words in the source",
    )
    parser.add_argument(
        "--mask_length",
        default="span-poisson",
        type=str,
        choices=['subword', 'word', 'span-poisson'],
        help="when masking words, the length of mask segments",
    )
    parser.add_argument(
        '--replace_length',
        default=-1,
        type=int,
        help=
        'when masking N tokens, replace with 0, 1, or N tokens (use -1 for N)')
    parser.add_argument(
        '--poisson_lambda',
        default=3.0,
        type=float,
        help='randomly shuffle sentences for this proportion of inputs')
    parser.add_argument(
        '--dataloader_num_workers',
        default=0,
        type=int,
        help='the number of cpus used in collecting data in dataloader, '
        'note that if it is large than cpu number, the program may be stuck')
    parser.add_argument(
        '--evaluation_metric',
        default='qa',
        type=str,
        help='if pretrain passages, use \'passage\', else use \'qa\'')

    args = parser.parse_args()

    if (os.path.exists(args.output_dir) and os.listdir(args.output_dir)
            and args.do_train and not args.overwrite_output_dir):
        raise ValueError(
            "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome."
            .format(args.output_dir))

    if args.do_train == True:
        train_df = read_data_source_target(args.data_dir + "train.source",
                                           args.data_dir + "train.target")
    else:
        train_df = None

    if args.do_eval == True or args.evaluate_during_training == True:
        eval_df = read_data_source_target(args.data_dir + "valid.source",
                                          args.data_dir + "valid.target")
    else:
        eval_df = None

    if args.do_predict == True or args.predict_during_training == True:
        test_df = read_data_source_target(args.data_dir + "test.source",
                                          args.data_dir + "test.target")
    else:
        test_df = None

    model_args = {
        "reprocess_input_data": True,
        "overwrite_output_dir": args.overwrite_output_dir,
        "init_model_weights": args.init_model_weights,
        "max_seq_length": args.max_seq_length,
        "train_batch_size": args.train_batch_size,
        "eval_batch_size": args.eval_batch_size,
        "gradient_accumulation_steps": args.gradient_accumulation_steps,
        "learning_rate": args.learning_rate,
        "num_train_epochs": args.num_train_epochs,
        "save_eval_checkpoints": False,
        "save_model_every_epoch": args.save_model_every_epoch,
        "save_steps": args.save_step,
        "evaluate_during_training": args.evaluate_during_training,
        "evaluate_generated_text": True,
        "evaluate_during_training_verbose": True,
        "predict_during_training": args.predict_during_training,
        "use_multiprocessing": False,
        "output_dir": args.output_dir,
        "max_length": args.max_length,
        "manual_seed": 4,
        "mask_ratio": args.mask_ratio,
        "mask_length": args.mask_length,
        "replace_length": args.replace_length,
        "poisson_lambda": args.poisson_lambda,
        "fp16": False,
        "truncation": True,
        "dataloader_num_workers": args.dataloader_num_workers,
        "use_multiprocessed_decoding": args.use_multiprocessed_decoding,
        "evaluation_metric": args.evaluation_metric
    }

    # Initialize model
    if args.model_type == 'seq2seq':
        model = Seq2SeqModel(
            encoder_decoder_type="bart",
            encoder_decoder_name=args.model_name_or_path,
            args=model_args,
        )
    elif args.model_type == 't5':
        model = T5Model(
            model_name=args.model_name_or_path,
            args=model_args,
        )
    else:
        raise ValueError("The {} model is not supported now".format(
            args.model_type))

    # Train the model
    if args.do_train == True:
        model.train_model(train_data=train_df,
                          eval_data=eval_df,
                          test_data=test_df,
                          output_dir=args.output_dir)

    # Evaluate the model
    if args.do_eval == True:
        results = model.eval_model(eval_data=eval_df)
        print(results)

    # Use the model for prediction
    if args.do_predict == True:
        print(
            model.predict(pred_data=test_df,
                          output_dir=args.prediction_dir,
                          suffix=args.prediction_suffix))
Exemplo n.º 20
0
train_df = pd.read_csv("data/eng-spa/train.tsv", sep="\t").astype(str)
eval_df = pd.read_csv("data/eng-spa/eval.tsv", sep="\t").astype(str)

train_df["prefix"] = ""
eval_df["prefix"] = ""

model_args = T5Args()
model_args.max_seq_length = 80
model_args.train_batch_size = 10
model_args.eval_batch_size = 10
model_args.num_train_epochs = 1
model_args.evaluate_during_training = True
model_args.evaluate_during_training_steps = 30000
model_args.use_multiprocessing = False
model_args.fp16 = False
model_args.save_steps = -1
model_args.save_eval_checkpoints = False
model_args.no_cache = True
model_args.reprocess_input_data = True
model_args.overwrite_output_dir = True
model_args.preprocess_inputs = False
model_args.num_return_sequences = 1
model_args.wandb_project = "MT5 English-Spanish Translation"

model = T5Model("mt5", "google/mt5-base", args=model_args)

# Train the model
model.train_model(train_df, eval_data=eval_df)

# Optional: Evaluate the model. We'll test it properly anyway.
results = model.eval_model(eval_df, verbose=True)
Exemplo n.º 21
0
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

train_df['prefix'] = "summarize"
eval_df['prefix'] = "summarize"

model_args = {
    "reprocess_input_data": True,
    "overwrite_output_dir": True,
    "max_seq_length": 512,
    "train_batch_size": 16,
    "num_train_epochs": 4,
}

# Create T5 Model
model = T5Model("t5", "t5-small", args=model_args, use_cuda=True)

# Train T5 Model on new task
model.train_model(train_df)

# Evaluate T5 Model on new task
results = model.eval_model(eval_df)

# Predict with trained T5 model
#print(model.predict(["convert: four"]))

# In[15]:

results

# ## And We're Done !