def test_readme_03(self): """ Test: code snippet found in README.rst """ excelcy = ExcelCy() excelcy.storage.base_path = self.test_data_path excelcy.storage.config = Config(nlp_base='en_core_web_sm', train_iteration=2, train_drop=0.2) excelcy.storage.source.add( kind='text', value='Robertus Johansyah is the maintainer ExcelCy') excelcy.storage.source.add(kind='textract', value='source/source_01.txt') excelcy.storage.prepare.add(kind='phrase', value='Uber', entity='ORG') excelcy.storage.prepare.add(kind='phrase', value='Robertus Johansyah', entity='PERSON') excelcy.discover() excelcy.prepare() excelcy.train() assert excelcy.nlp( 'Uber blew through $1 million a week').ents[0].label_ == 'ORG' assert excelcy.nlp('Robertus Johansyah is maintainer ExcelCy' ).ents[0].label_ == 'PERSON'
def train_excelcy(save=False): excelcy = ExcelCy() add_stopwords(excelcy.nlp) excelcy.execute(str(constants.MODEL_DATA_DIR / 'train_model.xlsx')) if save: excelcy.save_nlp(str(constants.MODEL_DIR)) doc = excelcy.nlp(load_book_by_nr(1).content()) ships = set([ re.sub('[tT]he ', '', ent.text) for ent in doc.ents if ent.label_ == 'SHIP' ]) persons = set([ent.text for ent in doc.ents if ent.label_ == 'PERSON']) print(ships) print(persons)
from excelcy import ExcelCy from excelcy.storage import Config # test_string = 'Android Pay expands to Canada' # excelcy = ExcelCy() # excelcy.storage.config = Config(nlp_base='en_core_web_sm', train_iteration=50, train_drop=0.2) # doc = excelcy.nlp(test_string) # # showing no ORG # print([(ent.label_, ent.text) for ent in doc.ents]) # excelcy.storage.source.add(kind='text', value=test_string) # excelcy.discover() # excelcy.storage.prepare.add(kind='phrase', value='Android Pay', entity='PRODUCT') # excelcy.prepare() # excelcy.train() # doc = excelcy.nlp(test_string) # print([(ent.label_, ent.text) for ent in doc.ents]) # FAILED tests/test_excelcy.py::ExcelCyTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')} # FAILED tests/test_pipe.py::PipeTestCase::test_execute - AssertionError: assert ('$1', 'MONEY') in {('$1 million', 'MONEY'), ('Uber', 'ORG')} # FAILED tests/test_readme.py::ReadmeTestCase::test_readme_04 - AssertionError: assert ('China' == 'Himalayas' excelcy = ExcelCy() doc = excelcy.nlp('Android Pay expands to Canada') print([(ent.label_, ent.text) for ent in doc.ents]) excelcy = ExcelCy.execute(file_path='tests/data/test_data_03.xlsx') doc = excelcy.nlp('Android Pay expands to Canada') print([(ent.label_, ent.text) for ent in doc.ents])