def markup(text_path, ann_path, html_path): """Markup text in file `text_path` with annotations in file `ann_path` as HTML and write to file `html_path` """ text, ann = get_entities_from_brat(text_path, ann_path) # print('&' * 80) print(len(ann), text_path, html_path) if not ann: return # for i, a in enumerate(ann[:5]): # s = text[a['start']:a['end']] # # print('%3d: %10s %s %s' % (i, a['type'], a['text'], s)) gaps = [text[a['end']:b['start']] for a, b in zip(ann[:-1], ann[1:])] gaps = [text[:ann[0]['start']]] + gaps + [text[ann[-1]['end']:]] gaps = [abridge(g) for g in gaps] words = ['<b>%s</b> [%s] ' % (a['text'], a['type']) for a in ann] # for i, (g, w) in enumerate(list(zip(gaps, words))[:5]): # print('%3d: "%s" -- "%s"' % (i, g, w)) # print(text[:ann[5]['end']]) gw = [g + w for g, w in zip(gaps, words)] gw.append(gaps[-1]) body = '<body>%s</body>' % ''.join(gw) marked = '<html>%s</html>' % body write_file(html_path, marked)
def predict(self, text): assert False self.prediction_count += 1 if self.prediction_count == 1: self.parameters['dataset_text_folder'] = os.path.join('..', 'data', 'temp') self.stats_graph_folder, _ = self._create_stats_graph_folder(self.parameters) # Update the deploy folder, file, and dataset dataset_type = 'deploy' ### Delete all deployment data for filepath in glob.glob(os.path.join(self.parameters['dataset_text_folder'], '{0}*'.format(dataset_type))): if os.path.isdir(filepath): shutil.rmtree(filepath) else: os.remove(filepath) ### Create brat folder and file dataset_brat_deploy_folder = os.path.join(self.parameters['dataset_text_folder'], dataset_type) utils.create_folder_if_not_exists(dataset_brat_deploy_folder) dataset_brat_deploy_filepath = os.path.join(dataset_brat_deploy_folder, 'temp_{0}.txt'.format(str(self.prediction_count).zfill(5))) with codecs.open(dataset_brat_deploy_filepath, 'w', 'UTF-8') as f: f.write(text) ### Update deploy filepaths dataset_filepaths, dataset_brat_folders = self._get_valid_dataset_filepaths(self.parameters, dataset_types=[dataset_type]) self.dataset_filepaths.update(dataset_filepaths) self.dataset_brat_folders.update(dataset_brat_folders) ### Update the dataset for the new deploy set self.dataset.update_dataset(self.dataset_filepaths, [dataset_type]) # Predict labels and output brat output_filepaths = {} prediction_output = train.prediction_step(self.sess, self.dataset, dataset_type, self.model, self.transition_params_trained, self.stats_graph_folder, self.prediction_count, self.parameters, self.dataset_filepaths) _, _, output_filepaths[dataset_type] = prediction_output conll_to_brat.output_brat(output_filepaths, self.dataset_brat_folders, self.stats_graph_folder, overwrite=True) # Print and output result text_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy', os.path.basename(dataset_brat_deploy_filepath)) annotation_filepath = os.path.join(self.stats_graph_folder, 'brat', 'deploy', '{0}.ann'.format(utils.get_basename_without_extension(dataset_brat_deploy_filepath))) text2, entities = brat_to_conll.get_entities_from_brat(text_filepath, annotation_filepath, verbose=True) assert(text == text2) return entities