Python FARMReader.train примеры использования

Язык программирования: Python

Пространство имен/Пакет: haystack.reader.farm

Класс/Тип: FARMReader

Метод/Функция: train

Примеров на hotexamples.com: 8

Python FARMReader.train - 8 примеров найдено. Это лучшие примеры Python кода для haystack.reader.farm.FARMReader.train, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

train(8)

save(6)

predict(5)

FARMReader(3)

eval(3)

print_time(2)

eval_on_file(1)

update_parameters(1)

Пример #1

Показать файл

def main():
    args        = docopt(__doc__)
    data_dir     = args["--data_dir"]
    if args["train"]    : 
        reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=False)
        reader.train(data_dir=data_dir, train_filename=args["--train_file_name"],dev_filename=args["--dev_file_name"],use_gpu=False, n_epochs=1, save_dir=args["--save_dir"],dev_split=0.05)
    if args["test"]     : 
        reader = FARMReader(model_name_or_path=args["--save_dir"], use_gpu=False)
        print(reader.eval_on_file(data_dir,args["--eval_file_name"],'cpu'))
    if args["cli"]      :
        reader = FARMReader(model_name_or_path=args["--save_dir"], use_gpu=False)
        query_doc_list=[]
        for text_file in list(glob.glob(data_dir+'/*.txt')):
            with open(text_file,"r") as f:
                context=f.read()
            #context=context.split(".")
            context=[context]
            for i,para in enumerate(context):    
                query_doc_list.append(Document(id=str(i),text=para))
        while 1:  
            question=input("CTRL C to exit >")
            prediction=reader.predict(question,query_doc_list)
            print("answer:>> ",prediction['answers'][0]['answer']) 
            print("-----")
            print("context:>> ",prediction['answers'][0]['context'])
            print("-------------")

Пример #2

Показать файл

Файл: Tutorial2_Finetune_a_model_on_your_data.py Проект: venuraja79/haystack

def tutorial2_finetune_a_model_on_your_data():
# ## Create Training Data
#
# There are two ways to generate training data
#
# 1. **Annotation**: You can use the annotation tool(https://github.com/deepset-ai/haystack#labeling-tool) to label
# your data, i.e. highlighting answers to your questions in a document. The tool supports structuring
# your workflow with organizations, projects, and users. The labels can be exported in SQuAD format
# that is compatible for training with Haystack.
#
# 2. **Feedback**: For production systems, you can collect training data from direct user feedback via Haystack's
# REST API interface. This includes a customizable user feedback API for providing feedback on the
# answer returned by the API. The API provides a feedback export endpoint to obtain the feedback data
# for fine-tuning your model further.
#
#
# ## Fine-tune your model
#
# Once you have collected training data, you can fine-tune your base models.
# We initialize a reader as a base model and fine-tune it on our own custom dataset (should be in SQuAD-like format).
# We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer
# Learning effects.

#**Recommendation: Run training on a GPU. To do so change the `use_gpu` arguments below to `True`

reader = FARMReader(
model_name_or_path="distilbert-base-uncased-distilled-squad",
use_gpu=True)
train_data = "data/squad20"
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data,
train_filename="dev-v2.0.json",
use_gpu=True,
n_epochs=1,
save_dir="my_model")

# Saving the model happens automatically at the end of training into the `save_dir` you specified
# However, you could also save a reader manually again via:
reader.save(directory="my_model")

# If you want to load it at a later point, just do:
new_reader = FARMReader(model_name_or_path="my_model")

Пример #3

Показать файл

Файл: Tutorial2_Finetune_a_model_on_your_data.py Проект: zxlzr/haystack

from haystack.indexing.cleaning import clean_wiki_text
from haystack.indexing.io import write_documents_to_db, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.retriever.tfidf import TfidfRetriever
from haystack.utils import print_answers

#### TRAINING #############
# Let's take a reader as a base model
reader = FARMReader(
    model_name_or_path="distilbert-base-uncased-distilled-squad",
    use_gpu=False)

# and fine-tune it on your own custom dataset (should be in SQuAD like format)
train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data,
             train_filename="train.json",
             use_gpu=False,
             n_epochs=1)

#### Use it (same as in Tutorial 1) #############

## Indexing & cleaning documents

# Let's get the data (Game of thrones articles from wikipedia)
doc_dir = "data/article_txt_got"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)

# Init Document store & write docs to it
document_store = SQLDocumentStore(url="sqlite:///qa.db")
write_documents_to_db(document_store=document_store,
                      document_dir=doc_dir,

Пример #4

Показать файл

Файл: Tutorial2_Finetune_a_model_on_your_data.py Проект: tcapilla/nttextractiveqa

# 1. **Annotation**: You can use the annotation tool(https://github.com/deepset-ai/haystack#labeling-tool) to label
# your data, i.e. highlighting answers to your questions in a document. The tool supports structuring
# your workflow with organizations, projects, and users. The labels can be exported in SQuAD format
# that is compatible for training with Haystack.
#
# 2. **Feedback**: For production systems, you can collect training data from direct user feedback via Haystack's
# REST API interface. This includes a customizable user feedback API for providing feedback on the
# answer returned by the API. The API provides a feedback export endpoint to obtain the feedback data
# for fine-tuning your model further.
#
#
# ## Fine-tune your model
#
# Once you have collected training data, you can fine-tune your base models.
# We initialize a reader as a base model and fine-tune it on our own custom dataset (should be in SQuAD-like format).
# We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer
# Learning effects.

#**Recommendation: Run training on a GPU. To do so change the `use_gpu` arguments below to `True`

reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=True)
train_data = "data/squad20"
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data, train_filename="dev-v2.0.json", use_gpu=True, n_epochs=1, save_dir="my_model")

# Saving the model happens automatically at the end of training into the `save_dir` you specified
# However, you could also save a reader manually again via:
reader.save(directory="my_model")

# If you want to load it at a later point, just do:
new_reader = FARMReader(model_name_or_path="my_model")

Пример #5

Показать файл

from haystack import Finder
from haystack.database.sql import SQLDocumentStore
from haystack.indexing.cleaning import clean_wiki_text
from haystack.indexing.io import write_documents_to_db, fetch_archive_from_http
from haystack.reader.farm import FARMReader
from haystack.retriever.tfidf import TfidfRetriever
from haystack.utils import print_answers

#### TRAINING #############
# Let's take a reader as a base model
reader = FARMReader(model_name_or_path="distilbert-base-uncased-distilled-squad", use_gpu=False)

# and fine-tune it on your own custom dataset (should be in SQuAD like format)
train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data, train_filename="train.json", n_epochs=1)


#### Use it (same as in Tutorial 1) #############

## Indexing & cleaning documents

# Let's get the data (Game of thrones articles from wikipedia)
doc_dir = "data/article_txt_got"
s3_url = "https://s3.eu-central-1.amazonaws.com/deepset.ai-farm-qa/datasets/documents/wiki_gameofthrones_txt.zip"
fetch_archive_from_http(url=s3_url, output_dir=doc_dir)


# Init Document store & write docs to it
document_store = SQLDocumentStore(url="sqlite:///qa.db")
write_documents_to_db(document_store=document_store, document_dir=doc_dir, clean_func=clean_wiki_text, only_empty_db=True)

Пример #6

Показать файл

# your workflow with organizations, projects, and users. The labels can be exported in SQuAD format
# that is compatible for training with Haystack.
#
# 2. **Feedback**: For production systems, you can collect training data from direct user feedback via Haystack's
# REST API interface. This includes a customizable user feedback API for providing feedback on the
# answer returned by the API. The API provides feedback export endpoint to obtain the feedback data
# for fine-tuning your model further.
#
#
# ## Fine-tune your model
#
# Once you have collected training data, you can fine-tune your base models.
# We initialize a reader as a base model and fine-tune it on our own custom dataset (should be in SQuAD-like format).
# We recommend using a base model that was trained on SQuAD or a similar QA dataset before to benefit from Transfer
# Learning effects.
reader = FARMReader(model_name_or_path="deepset/roberta-base-squad2",
use_gpu=False)
# train_data = "data"
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir="/home/bulelani/Desktop/odin/odin/src_new/data/training",
train_filename="answers.json",
use_gpu=False,
n_epochs=1,
save_dir="/home/bulelani/Desktop/odin/saved_models")

# Saving the model happens automatically at the end of training into the `save_dir` you specified
# However, you could also save a reader manually again via:
reader.save(directory="/home/bulelani/Desktop/odin/saved_models")

# If you want to load it at a later point, just do:
# new_reader = FARMReader(model_name_or_path="my_model")

Пример #7

Показать файл

from haystack.reader.farm import FARMReader

reader = FARMReader(
    model_name_or_path="distilbert-base-uncased-distilled-squad",
    use_gpu=False)
train_data = "/home/bulelani/Desktop/odin/odin/src_new/data/training"
# train_data = "PATH/TO_YOUR/TRAIN_DATA"
reader.train(data_dir=train_data,
             train_filename="demo.json",
             use_gpu=False,
             n_epochs=100,
             save_dir="/home/bulelani/Desktop/odin/my_model")

# Saving the model happens automatically at the end of training into the `save_dir` you specified
# However, you could also save a reader manually again via:
reader.save(directory="/home/bulelani/Desktop/odin/my_model")

Пример #8

Показать файл

from haystack.reader.farm import FARMReader

#input directory of the labels answers.json file
train_data = "/usr/src/app/data/squad20"
# output directory of the model
train_model = "/usr/src/app/data/train_model"

reader = FARMReader(
    model_name_or_path="distilbert-base-uncased-distilled-squad",
    use_gpu=False)

reader.train(data_dir=train_data,
             train_filename="answers.json",
             n_epochs=20,
             dev_split=0,
             save_dir=train_model)

print('Training successfully completed')