Exemplo n.º 1
0
def complete_and_tokens():
    # Define paths
    pack_input = os.path.join(pack_dir, "nif_raw_struct_links")
    pack_output = os.path.join(pack_dir, "nif_raw_struct_links_token")
    # Store which documents are processed, try to make input output structure
    # similar.
    pack_input_index = os.path.join(pack_input, "article.idx")
    pack_output_index = os.path.join(pack_output, "article.idx")

    logging.basicConfig(
        format="%(asctime)s - %(message)s",
        level=logging.INFO,
        filename=os.path.join(pack_dir, "complete_tokenize.log"),
    )

    pipeline = Pipeline(loaded_resource).set_reader(
        DirPackReader(),
        config={
            "suffix": ".json.gz",
            "zip_pack": True
        },
        # ).add(
        #     WikiEntityCompletion()
    ).add(WikiAddTitle()).add(SpacyProcessor(),
                              config={
                                  "processors": ["sentence", "tokenize"],
                              }).add(SubwordTokenizer(),
                                     config={
                                         "tokenizer_configs": {
                                             "pretrained_model_name":
                                             "bert-base-uncased"
                                         },
                                         "token_source":
                                         "ft.onto.base_ontology.Token",
                                     }).add(
                                         WikiArticleWriter(),
                                         config={
                                             "output_dir": pack_output,
                                             "zip_pack": True,
                                             "drop_record": True,
                                             "input_index_file":
                                             pack_input_index,
                                             "output_index_file":
                                             pack_output_index,
                                             "use_input_index": True,
                                             "serialize_method": "jsonpickle"
                                         },
                                     ).add(ProgressPrinter())
    pipeline.run(pack_input)
Exemplo n.º 2
0
from forte.huggingface import ZeroShotClassifier
from forte.stanza import StandfordNLPProcessor

from forte import Pipeline
from forte.data.readers import TerminalReader
from forte.processors.stave import StaveProcessor

nlp = Pipeline()
nlp.set_reader(TerminalReader())
nlp.add(StandfordNLPProcessor())
nlp.add(
    ZeroShotClassifier(),
    config={
        "candidate_labels": [
            "travel",
            "cooking",
            "dancing",
            "exploration",
        ],
    },
)
nlp.add(StaveProcessor())
nlp.initialize()
nlp.run()