Пример #1
0
def main():

    # Create input HTML string
    input_html = (
        "<body><p>"
        "she does not have SVS syndrome from an axillary vein thrombosis."
        "</p></body>"
    )


    # Assemble the pipeline:
    # A pipeline is consisted of a set of Components (readers and processors).
    # The data flows in the pipeline as data packs, and each component will
    # use or add information to the data packs.
    pipeline = Pipeline[DataPack]()

    # Set HTMLReader as pipeline reader:
    # HTMLReader takes in list of html strings, cleans the HTML tags and
    # stores the cleaned text in pack.
    pipeline.set_reader(HTMLReader())

    # Add SpacyProcessor to process the datapack:
    # SpacyProcessor provides functions including sentence parsing, tokenize,
    # POS tagging, lemmatization, NER, and medical entity linking. This
    # processor will do user defined tasks according to configs.
    pipeline.add(SpacyProcessor(), config={
        "processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"]
    })

    # Add StaveProcessor to visualize the processing results:
    # StaveProcessor provides easy visualization for forte users. We can
    # visualize datapack with annotations by inserting it into the forte
    # pipeline.
    pipeline.add(StaveProcessor())

    # Initialize and run the pipeline
    pipeline.initialize()
    for datapack in pipeline.process_dataset(input_html):

        # Get the results of dependency parsing
        for link in datapack.get(Dependency):
            parent = link.get_parent()
            child = link.get_child()
            # Print out the dependency between parent and child
            print(
                f"'{child.text}' has dependency '{link.dep_label}'"
                f" of parent '{parent.text}'"
            )

        # Retrieve all the MedicalEntityMention in datapack
        for medical_entity in datapack.get(MedicalEntityMention):
            # Get and print out the UMLS links
            for umls_link in medical_entity.umls_entities:
                print(umls_link)
Пример #2
0
    def setUp(self):

        self._port: int = 8880
        _file_dir_path: str = os.path.dirname(__file__)
        self._project_name: str = "serialization_pipeline_test"
        self._dataset_dir: str = os.path.abspath(
            os.path.join(_file_dir_path, "../../../",
                         "data_samples/ontonotes/00/"))
        self._test_specs_dir: str = os.path.abspath(
            os.path.join(_file_dir_path, "../data/ontology/test_specs/"))
        self._stave_processor = StaveProcessor()

        self.pl = Pipeline[DataPack](ontology_file=os.path.join(
            self._test_specs_dir, "test_stave_ontology.json"))
        self.pl.set_reader(OntonotesReader())
Пример #3
0
from forte.huggingface import ZeroShotClassifier
from forte.stanza import StandfordNLPProcessor

from forte import Pipeline
from forte.data.readers import TerminalReader
from forte.processors.stave import StaveProcessor

nlp = Pipeline()
nlp.set_reader(TerminalReader())
nlp.add(StandfordNLPProcessor())
nlp.add(
    ZeroShotClassifier(),
    config={
        "candidate_labels": [
            "travel",
            "cooking",
            "dancing",
            "exploration",
        ],
    },
)
nlp.add(StaveProcessor())
nlp.initialize()
nlp.run()
Пример #4
0
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Example code for pytorch blog post, a "one-liner" version.
"""
from forte.data.data_pack import DataPack
from forte.data.readers import HTMLReader
from forte.pipeline import Pipeline
from forte.processors.stave import StaveProcessor
from fortex.spacy import SpacyProcessor

Pipeline[DataPack](
).set_reader(
    HTMLReader()
).add(
    SpacyProcessor(), config={
        "processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"]
    }
).add(
    StaveProcessor()
).run(
    "<body><p>"
    "she does not have SVS syndrome from an axillary vein thrombosis."
    "</p></body>"
)