def save(self, *args, **kwargs): if self.concept_db is None: cdb = CDB() cdb.save_dict('empty_cdb.dat') f = open('empty_cdb.dat', 'rb') cdb_obj = ConceptDB() cdb_obj.name = f'{self.name}_empty_cdb' cdb_obj.cdb_file.save(f'{self.name}_empty_cdb.dat', File(f)) cdb_obj.use_for_training = True cdb_obj.save() self.concept_db = cdb_obj super(ProjectAnnotateEntities, self).save(*args, **kwargs)
# Load the vocab model you just downloaded vocab.load_dict(os.path.join(medcat_path, 'med_ann_norm_dict.dat')) # If you have an existing CDB cdb = CDB() # cdb.load_dict(os.path.join(medcat_path, 'simple_cdb.csv')) # If you need a special CDB you can build one from a .csv file preparator = PrepareCDB(vocab=vocab) csv_paths = [os.path.join(medcat_path, 'simple_cdb.csv')]#, '<another one>', ...] csv_paths = [os.path.join(medcat_path, 'attention_cdb.csv')] cdb = preparator.prepare_csvs(csv_paths) # Save the new CDB for later cdb.save_dict(os.path.join(medcat_path, 'simple_cdb.cdb')) # To annotate documents we do doc = "My simple document with kidney failure" cat = CAT(cdb=cdb, vocab=vocab) cat.train = False doc_spacy = cat(doc) # Entities are in doc_spacy._.ents # Or to get a json doc_json = cat.get_json(doc) # To have a look at the results: from spacy import displacy # Note that this will not show all entites, but only the longest ones displacy.serve(doc_spacy, style='ent')