def main(): # Create input HTML string input_html = ( "<body><p>" "she does not have SVS syndrome from an axillary vein thrombosis." "</p></body>" ) # Assemble the pipeline: # A pipeline is consisted of a set of Components (readers and processors). # The data flows in the pipeline as data packs, and each component will # use or add information to the data packs. pipeline = Pipeline[DataPack]() # Set HTMLReader as pipeline reader: # HTMLReader takes in list of html strings, cleans the HTML tags and # stores the cleaned text in pack. pipeline.set_reader(HTMLReader()) # Add SpacyProcessor to process the datapack: # SpacyProcessor provides functions including sentence parsing, tokenize, # POS tagging, lemmatization, NER, and medical entity linking. This # processor will do user defined tasks according to configs. pipeline.add(SpacyProcessor(), config={ "processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"] }) # Add StaveProcessor to visualize the processing results: # StaveProcessor provides easy visualization for forte users. We can # visualize datapack with annotations by inserting it into the forte # pipeline. pipeline.add(StaveProcessor()) # Initialize and run the pipeline pipeline.initialize() for datapack in pipeline.process_dataset(input_html): # Get the results of dependency parsing for link in datapack.get(Dependency): parent = link.get_parent() child = link.get_child() # Print out the dependency between parent and child print( f"'{child.text}' has dependency '{link.dep_label}'" f" of parent '{parent.text}'" ) # Retrieve all the MedicalEntityMention in datapack for medical_entity in datapack.get(MedicalEntityMention): # Get and print out the UMLS links for umls_link in medical_entity.umls_entities: print(umls_link)
def setUp(self): self._port: int = 8880 _file_dir_path: str = os.path.dirname(__file__) self._project_name: str = "serialization_pipeline_test" self._dataset_dir: str = os.path.abspath( os.path.join(_file_dir_path, "../../../", "data_samples/ontonotes/00/")) self._test_specs_dir: str = os.path.abspath( os.path.join(_file_dir_path, "../data/ontology/test_specs/")) self._stave_processor = StaveProcessor() self.pl = Pipeline[DataPack](ontology_file=os.path.join( self._test_specs_dir, "test_stave_ontology.json")) self.pl.set_reader(OntonotesReader())
from forte.huggingface import ZeroShotClassifier from forte.stanza import StandfordNLPProcessor from forte import Pipeline from forte.data.readers import TerminalReader from forte.processors.stave import StaveProcessor nlp = Pipeline() nlp.set_reader(TerminalReader()) nlp.add(StandfordNLPProcessor()) nlp.add( ZeroShotClassifier(), config={ "candidate_labels": [ "travel", "cooking", "dancing", "exploration", ], }, ) nlp.add(StaveProcessor()) nlp.initialize() nlp.run()
# http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Example code for pytorch blog post, a "one-liner" version. """ from forte.data.data_pack import DataPack from forte.data.readers import HTMLReader from forte.pipeline import Pipeline from forte.processors.stave import StaveProcessor from fortex.spacy import SpacyProcessor Pipeline[DataPack]( ).set_reader( HTMLReader() ).add( SpacyProcessor(), config={ "processors": ["sentence", "tokenize", "pos", "ner", "dep", "umls_link"] } ).add( StaveProcessor() ).run( "<body><p>" "she does not have SVS syndrome from an axillary vein thrombosis." "</p></body>" )