def start_loop(stream_name=None, network='s3fd', redis_host='localhost', redis_port=6379): """ Starts the processing loop """ killer = GracefulKiller() LOG.info('Initializing Redis cache...') cache = redis.StrictRedis(host=redis_host, port=redis_port, db=0) LOG.info('Initializing annotator...') annotator = Annotator(network=network) LOG.info('Starting processing loop...') while True: # Get next available frame from the cache image_bytes = cache.get('{}_raw'.format(stream_name)) if image_bytes is None: continue # Annotate with bounding boxes # TODO: Get size from feed configs image = Image.frombytes('RGB', (1920, 1080), image_bytes) image = annotator.annotate(image) # Save frame to cache as raw bytes cache.set('{}_annotated'.format(stream_name), image.tobytes()) # Capture kill signals and terminate loop if killer.kill_now: LOG.info('Shutting down gracefully...') break
def executeAnnotator(logger, startTime, datetime, confpath, pattern, paths): print("execute Annotator") momentum = datetime.now() now = momentum - startTime l = list(paths) for path in l: mags = results = None mags, results = readKataMagazines(logger) units = None magazines = None results, magazines, minl, maxl = readFile(logger, pattern, now, path, mags) annotator = Annotator(None, confpath) units = annotator.doAnnotationWithConfig(results, mags, magazines) momentum = logQueryData(logger, momentum, units, None) print("execute Annotator") writeResultsToRDF(units) now = datetime.now() - momentum end = datetime.now() - startTime print("Finished queries in " + str(now)) print("REACHED THE END in " + str(end)) logger.info("Application execution ended, and it lasted for " + str(end))
def __init__(self, switch) : self.switch = switch.lower() Annotator.__init__(self, "sift" ) self.input_file = "%s/input/%s.csv" % (self.root_dir,switch) self.output_dir = "%s/output" % (self.root_dir) self.indexOf = {"coordinates" : 0, \ "codons" : 1, \ "transcript id" : 2, \ "protein id" : 3, \ "substitutions" : 4, \ "region" : 5, \ "dbsnp id" : 6, \ "snp type" : 7, \ "prediction" : 8, \ "score" : 9, \ "median info" : 10, \ "# seqs at position" : 11, \ "gene id" : 12, \ "gene name" : 13, \ "gene desc" : 14, \ "omim disease" : 15, \ "average allele freqs" : 16, \ "ceu allele freqs" : 17, \ "user comment" : 18} self.cols = ["region","snp type", "prediction","score","omim disease"] self.db_cols = ["sift_region","sift_type","sift_prediction","sift_score","sift_omim"]
def annotate_data(self, nr_docs=-1): self.logger.println("data annotator called") start_time = timeit.default_timer() annotator = Annotator() annotator.prepare_dataset(nr_docs) elapsed_seconds = timeit.default_timer() - start_time self.logger.print_time_taken("data annotation operation took", elapsed_seconds)
def generate_annotator(self, event): annotation = Annotator(self.channel_fnames, self.probability_table, self.output_dir) new_table = annotation.annotate([self.x0, self.y0], [self.x1, self.y1]) # add the new table to the groundtruth table self.groundtruth_table.loc[ new_table.index, self.column_names] = new_table.loc[:, self.column_names]
def run_annotator(): parser = argparse.ArgumentParser(description='') parser.add_argument('--source-files', dest='source_files', type=str, nargs='+', help='') args = parser.parse_args() anno = Annotator() anno.annotate(args.source_files)
class TestAnnotator(unittest.TestCase): def setUp(self): self.annotator = Annotator(TestFiles.make_io_helper()) def test_can_create(self): assert self.annotator def test_creates_prefixes_without_information(self): self.assertEqual(self.annotator.save_as_turtle(as_string=True), \ TestFiles.PREFIXES) def test_includes_added_organism_into_turtle(self): self.annotator.add_bug(*TestFiles.BOUNDING_BOX) self.assertEqual(self.annotator.save_as_turtle(as_string=True), \ TestFiles.make_rdf_file())
def __init__(self, dock_area, params_tree, config): self.dock_area = dock_area self.db = ParameterDB() self.curves = {} self.qtree_widget_items = {} self.plot_widget_items = [] # self.params = {} # [Parameter, Plot, Curve, QTreeWidgetItem] self.params_tree = params_tree self.last_dock_added = None self.xlink = None self.annotators = { a['param']: Annotator(a['param'], a) for a in config.get('annotators', []) } self.auto_pens = [ pg.mkPen('#f00'), pg.mkPen('#0f0'), pg.mkPen('#00f'), pg.mkPen('#AA0'), pg.mkPen('#0AA'), pg.mkPen('#A0A'), ] self.initialize_plots(config)
def default_annotator(mix_path, src_track_paths): from nodes import Unmixer, GenericProvider, SegmentFinder, XFadeFinder, Fingerprinter from annotator import Annotator return Annotator([ GenericProvider(src_track_paths=src_track_paths, mix_path=mix_path), Fingerprinter(lazy=False), Unmixer(), XFadeFinder(), SegmentFinder() ])
def test_sanity_check(self): """A sanity check test just to be sure that nothing is broken when making some change to the code.""" with open('tests/report.txt') as f: text = f.read() annot = Annotator.RadlexAnnotator() annotations = annot.annotate(text) self.assertEqual(77, len(annotations))
def handle_resume_post(self): # Get the name of the uploaded file file = request.files['file'] if file and self.__allowed_file(file.filename): # Save file to upload folder file.save(os.path.join(self.__app.config['UPLOAD_FOLDER'], file.filename)) # use crf here annotator = Annotator() annotated_resume = annotator.annotate_using_trained_model(self.__app.config['UPLOAD_FOLDER'] + self.__seperator + file.filename) tagged_resume = self.__crfsuite.tag_doc(annotated_resume) template = render_template('%s.xml' % self.__path_output_xml, entities=tagged_resume) response = make_response(template) response.headers['Content-Type'] = 'application/xml' return response else: return "Invalid file type, use PDF, DOC or DOCX", 406
class Caption(Resource): """REST end point for the captioning service. Accepts images (via HTTP POST requests) and returns corresponding captions. """ def __init__(self): self.annotator = Annotator() def post(self): """Create caption for an image. The POST request should include either an image or a url pointing to an image. Parameters ---------- image Image data send with the request. url URL of the image to be processed. """ parser = reqparse.RequestParser() parser.add_argument("image", type=datastructures.FileStorage, location='files') parser.add_argument("url") args = parser.parse_args() if args.image: image = Image.open(args.image) else: ssl._create_default_https_context = ssl._create_unverified_context with urllib.request.urlopen(args.url) as url: f = io.BytesIO(url.read()) image = Image.open(f) annotation = self.annotator.annotate(image) data = { "annotation": annotation, } return data, 201
from fastapi import FastAPI from annotator import Annotator from starlette.responses import JSONResponse from starlette.middleware.cors import CORSMiddleware from pydantic import BaseModel annotator = Annotator() app = FastAPI() origins = ["*"] app.add_middleware( CORSMiddleware, allow_origins=origins, allow_methods=['*'], allow_headers=['*'], ) class Text(BaseModel): text: str @app.post('/annotate_text') def annotate_text(text: Text): return JSONResponse(annotator.annotate(text.text)) @app.get('/', status_code=200) async def healthcheck(): return 'Annotator is ready!'
def setUp(self): self.annotator = Annotator(TestFiles.make_io_helper())
def build_app(config_file): global annotatr annotatr = Annotator.from_config(config_file) return app
# make Qt logs go to logger QtCore.qInstallMessageHandler(qt_message_handler) logger = logging.getLogger(__name__) logger.info('Annotator %s starting on %s (%s)', '0.0.0.0', platform.system(), sys.platform) if platform.system() == 'Windows': logger.info('Applying Windows-specific setup') # enable automatic scaling for high-DPI screens os.environ['QT_AUTO_SCREEN_SCALE_FACTOR'] = '1' # set the App ID for Windows 7 to properly display the icon in the # taskbar. import ctypes myappid = '??????' # arbitrary string try: ctypes.windll.shell32.SetCurrentProcessExplicitAppUserModelID(myappid) except: logger.error('Could not set the app model ID. If the plaftorm is older than Windows 7, this is normal.') elif platform.system() == 'Darwin': pass # logger.info('Applying Mac OS-specific setup') app = QApplication(sys.argv) window = Annotator() window.show() return_code = app.exec_() del window # prevent mac errors sys.exit(return_code)
ap.add_argument("-v", "--version", dest="version", default=False, action="store_true", help="displays the current version of the application") return ap.parse_args() if __name__ == "__main__": arguments = parse_args() if arguments.test: doctest.testmod() # unit testing sys.exit() # displays version of the program if arguments.version: sys.exit("{} {}".format(APP_TITLE, VERSION_NUMBER)) # creates the annotation engine annotator = Annotator() if arguments.fullscreen: annotator.toggle_fullscreen() # toggles fullscreen annotator.parent.protocol("WM_DELETE_WINDOW", annotator.exit_prompt) # quit event handler annotator.mainloop() # runs the main tkinter loop
# hyper parameters. May modify to change the behaviour of the model # l determines the importance of the foreground and background probability terms # bins determines how many bins to use in the histograms l = 1 num_bins = 2 # Get image if len(sys.argv) == 2: imgURL = sys.argv[1] else: imgURL = "http://www.python.org/static/community_logos/python-logo.png" img = getImage(imgURL) #annotate it with foreground and background points annotator = Annotator(img) # extract foreground and background points fg = annotator.fg bg = annotator.bg if (len(fg) == 0) or (len(bg) == 0): print("No Foreground or Background points selected, exiting") sys.exit() #time Imitalisation start = time.clock() print("Converting to grayscale and computing statistics") #compute intensities (grayscale) and collect statistics gray = rgb2gray(img)
# Save the clip print('\rClip %d complete' % clip_counter, end=' ') clip.release() clip_time = 0 clip_counter += 1 clip = cv2.VideoWriter( os.path.join(clips_folder, 'clip_%04d.mp4' % clip_counter), fourcc, fps, (fdim[1], fdim[0])) if video_time < n_frames - 1: video_time += 1 else: cap.release() break # Run the annotator annotator = Annotator([{ 'name': 'result_table', 'color': (0, 1, 0) }, { 'name': 'olympics_logo', 'color': (0, 0, 1) }, { 'name': 'stretching', 'color': (0, 1, 1) }], clips_folder, N_show_approx=100, annotation_file='demo_labels.json') annotator.main()
if not os.path.exists(clips_folder): os.mkdir(clips_folder) # Initialise the annotator annotator = Annotator( # [ # {'name': 'clarity 100', 'color': (0, 255, 0)}, # {'name': 'clarity 80', 'color': (0, 0, 255)}, # {'name': 'clarity 60', 'color': (0, 255, 255)}, # {'name': 'clarity 40', 'color': (255, 100, 0)}, # {'name': 'clarity 20', 'color': (0, 100, 255)}], [ {'name': '1', 'color': (0, 255, 0)}, {'name': '2', 'color': (0, 0, 255)}, {'name': '3', 'color': (0, 255, 255)}, {'name': '4', 'color': (255, 100, 0)}, {'name': '5', 'color': (0, 100, 255)}, {'name': '6', 'color': (0, 100, 50)}, {'name': '7', 'color': (0, 150, 100)}, {'name': '8', 'color': (50, 100, 255)}, {'name': '9', 'color': (100, 50, 50)}, {'name': '10', 'color': (50, 100, 150)}, {'name': '11', 'color': (100, 100, 200)} ], clips_folder, sort_files_list=True, N_show_approx=20, screen_ratio=16 / 9, image_resize=1, loop_duration=None, annotation_file='ourdata_section.json') # Split the video into clips print('Generating clips from the video...') annotator.video_to_clips(ourdata_filename, clips_folder, clip_length=150, overlap=0, resize=1)
'name': 'sluty', 'color': (0, 0, 1) }, { 'name': 'sexy', 'color': (0, 1, 1) }, { 'name': 'normal', 'color': (0, 1, 0) }] # Initialise MuViLab name = 'H.mp4' fn = 'E:\\download\\593310496_saturdays85\\test\\%s' % name clips_folder = './%s' % name.rsplit('.', 1)[0] # Split the main video into clips annotator = Annotator(labels, clips_folder, annotation_file='%s.json' % name, N_show_approx=10) import os if not os.path.exists(clips_folder): os.mkdir(clips_folder) annotator.video_to_clips(fn, clips_folder, clip_length=1200, overlap=0, resize=0.5) # Run the GUI annotator.main()
def get_ies_scores(self): extractor = Extractor() ies_filenames = extractor.populate_file_names(self.__ies_accuracy_test) ies_filenames = extractor.filter_by_valid_exts(ies_filenames) filenames, resume_content = extractor.read_resume_content_tika_api( ies_filenames, self.__ies_accuracy_test) filenames, resume_content = extractor.remove_empty_resumes( filenames, resume_content) resume_labels = extractor.read_resume_labels(self.__ies_accuracy_test, filenames) true_edu_insts = [ extractor.get_edu_institutions(xml_tree) for xml_tree in resume_labels ] true_edu_majors = [ extractor.get_edu_majors(xml_tree) for xml_tree in resume_labels ] true_emp_names = [ extractor.get_company_names(xml_tree) for xml_tree in resume_labels ] true_emp_jtitles = [ extractor.get_job_titles(xml_tree) for xml_tree in resume_labels ] cs = CrfSuite() cs.load_tagger() annotator = Annotator() annotated_resumes = [ annotator.annotate_using_trained_model(self.__ies_accuracy_test + self.__seperator + filename[0] + filename[1]) for filename in filenames ] predicted_entity_list = [ cs.tag_doc(resume) for resume in annotated_resumes ] ies_edu_insts = [ extractor.get_edu_institutions_from_list(entity_list) for entity_list in predicted_entity_list ] ies_edu_majors = [ extractor.get_edu_major_from_list(entity_list) for entity_list in predicted_entity_list ] ies_emp_names = [ extractor.get_company_names_from_list(entity_list) for entity_list in predicted_entity_list ] ies_emp_jtitles = [ extractor.get_company_position_from_list(entity_list) for entity_list in predicted_entity_list ] tokeniser = Tokeniser() true_edu_insts = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(true_edu_insts)) true_edu_majors = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(true_edu_majors)) true_emp_names = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(true_emp_names)) true_emp_jtitles = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(true_emp_jtitles)) ies_edu_insts = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(ies_edu_insts)) ies_edu_majors = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(ies_edu_majors)) ies_emp_names = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(ies_emp_names)) ies_emp_jtitles = tokeniser.docs_tolower( tokeniser.tokenise_doclines_to_words(ies_emp_jtitles)) edu_insts_match_score = self.score_matches(ies_edu_insts, true_edu_insts) edu_majors_match_score = self.score_matches(ies_edu_majors, true_edu_majors) emp_names_match_score = self.score_matches(ies_emp_names, true_emp_names) emp_jtitles_match_score = self.score_matches(ies_emp_jtitles, true_emp_jtitles) print(edu_insts_match_score) print(edu_majors_match_score) print(emp_names_match_score) print(emp_jtitles_match_score)
#clips forlder for this video clips_folder = os.path.join(clips_folder,each) annotation_path = os.path.join("annotation",each) annotation_path_for_vid = os.path.exists(os.path.join(annotation_path,"labels.json")) if not os.path.exists(annotation_path): os.makedirs(annotation_path) # Initialise the annotator annotator = Annotator([ {'name': 'goal', 'color': (0, 255, 0)}, {'name': 'others', 'color': (0, 0, 255)}, {'name': 'startgame', 'color': (0, 255, 255)}, {'name': 'endgame', 'color': (255, 255, 255)}, {'name': 'replay_goal', 'color': (0, 0, 0)}, {'name': 'resume','color':(64,244,226)}, {'name':'SOT','color':(66, 86, 244)}, {'name':'play','color':(244, 155, 65)}, {'name':'replay','color':(193, 52, 156)}], clips_folder, sort_files_list=True, N_show_approx=20, screen_ratio=16/9, image_resize=1, loop_duration=None, annotation_file=os.path.join(annotation_path,'labels.json')) if not os.path.exists(clips_folder): # Split the video into clips os.makedirs(os.path.join(clips_folder)) print('Generating clips from the video...') annotator.video_to_clips(os.path.join(videos_folder,each), clips_folder, clip_length=60, overlap=0, resize=0.5) if annotation_path_for_vid: if loadflag is None: resp = input("the anotations will be overwritten. continue? (y/n)")
# Create the clips folder clips_folder = 'test_overlap_clips' if os.path.exists(clips_folder): shutil.rmtree(clips_folder) os.makedirs(clips_folder) # Test the annotator from annotator import Annotator # Initialise the annotator annotator = Annotator([{ 'name': 'test_label_1', 'color': (0, 1, 0) }, { 'name': 'test_label_2', 'color': (0, 0, 1) }, { 'name': 'test_label_3', 'color': (0, 1, 1) }], clips_folder, loop_duration=2, annotation_file='overlap_annotation.json', status_file='overlap_status.json') # Create the overlapping clips annotator.video_to_clips('dummy_digits.mp4', clips_folder, resize=0.5, overlap=0.5, clip_length=6) # Run! annotator.main()
def main(): images_path = get_image_path_from_user() annotator = Annotator(categories=["dog", "cat"], images_path=images_path) annotator.begin_annotation()
yt = YouTube('https://www.youtube.com/watch?v=VZvoufQy8qc') stream = yt.streams.filter(res='144p', mime_type='video/mp4').first() print('Downloading youtube file. This may take a while.\n' + 'Let\'s be honest, this _will_ take a while...') stream.download(demo_folder, filename='youtube') # Initialise the annotator annotator = Annotator([{ 'name': 'result_table', 'color': (0, 1, 0) }, { 'name': 'olympics_logo', 'color': (0, 0, 1) }, { 'name': 'stretching', 'color': (0, 1, 1) }], clips_folder, sort_files_list=True, N_show_approx=100, screen_ratio=16 / 9, image_resize=1, loop_duration=None, annotation_file='demo_labels.json') # Split the video into clips print('Generating clips from the video...') annotator.video_to_clips(youtube_filename, clips_folder, clip_length=90, overlap=0,
def bccwj_pipeline(input_path, output_path): a = Annotator() file_names = glob.glob(os.path.join(input_path, '**/*.xml'), recursive=True) docs = load_text(file_names, encoding='utf-8') save_file(docs, a, output_path)
def main(argv): #logging detup logger = logging.getLogger('myapp') hdlr = logging.FileHandler('/tmp/myapp.log') formatter = logging.Formatter('%(asctime)s %(levelname)s %(message)s') hdlr.setFormatter(formatter) logger.addHandler(hdlr) logger.setLevel(logging.DEBUG) startTime = datetime.now() logger.info("Application execution started at " + str(startTime)) #retrieve options for this job configfile, filepattern, inputfilepath, csvlogging, target_format, source_file, source_format, special = extractConfigurations( argv) momentum = datetime.now() now = momentum - startTime units = None counter = 1 annotator = Annotator(None, configfile) full_list_magazines = None minm = 0 maxm = 0 print(filepattern) print(inputfilepath) print(configfile) #kata=True for path in inputfilepath: try: mags = results = None #print("Before anything") #print(full_list_magazines) if special == "kata": mags, results = readKataMagazines(logger) magazines = None results, magazines, min, max = readFile(logger, filepattern, now, path, mags) minm, maxm = checkLength(min, minm, maxm, logger) minm, maxm = checkLength(max, minm, maxm, logger) #print("After reading files") #print(minm) #print(maxm) #print(results) #print(magazines) #print(full_list_magazines) u = annotator.doAnnotationWithConfig(results, mags, magazines, csvlogging, units) #print("After annotating them") #print(magazines) if full_list_magazines != None: for m in magazines: if m not in full_list_magazines: full_list_magazines.append(m) #print(magazines) #print(full_list_magazines) else: full_list_magazines = magazines #for m in full_list_magazines: # print(m) if units != None: #units.extend(u) for node in u: if node not in units: units.append(node) else: units = u logger.info("Processed " + str(counter) + "/" + str(len(inputfilepath))) counter = counter + 1 #print("After everything") #print(full_list_magazines) #if counter == 10: #remove this later except Exception as e: print("Error happened during execution: " + str(path)) print("Error happened during execution: " + str(e)) logger.warn( "Unexpected error while processing data " + str(path) + ":", e) error = traceback.format_exc() print(error.upper()) if full_list_magazines != None: writeTextOutput(full_list_magazines) else: logger.error("magaine list is empty!") rank = annotator.doRanking() tfidf, limit = logCandicates(logger, full_list_magazines, rank) #logger.debug(tfidf) if units != None: print("Check ranking" + str(limit)) rank, rank_range = annotator.doRanking() print(len(rank)) if len(rank) > 0: limits = (minm, maxm) logger.info("Execute candidate ranking for " + str(rank) + " " + str(limit)) apply_weights(units, tfidf, rank, logger, limits, rank_range, limit) #use when using ranges #apply_weights(units, tfidf, rank, logger, limits, rank_range) print("convert to rdf") writeResultsToRDF(units, annotator, counter, target_format, source_file, source_format) writeXmlOutput(full_list_magazines) writeCSVOutputOfResults(full_list_magazines) annotator.writeToCSV(full_list_magazines) annotator.logConseptsByIndex(full_list_magazines) #writeResultsToRDF(u,annotator,counter, target_format, source_file, source_format) annotator.print_filtered_terms(full_list_magazines) annotator.print_included_terms(full_list_magazines) annotator.print_stats(full_list_magazines) now = datetime.now() - momentum end = datetime.now() - startTime print("Finished queries in " + str(now)) print("REACHED THE END in " + str(end)) logger.info("Application execution ended, and it lasted for " + str(end))
def mainichi_pipeline(input_path, output_path): a = Annotator() file_names = glob.glob(os.path.join(input_path, '*.sgml')) docs = load_text(file_names, encoding='shift_jis') save_file(docs, a, output_path)
def __init__(self): self.annotator = Annotator()
def __init__(self ) : Annotator.__init__(self,"snpeff") self.indexOf = indexOf
def open_file(self, filename, revert=False): """This method reads a file contents and inserts the text read into the text buffer. Several things are done while the file is read: Tags are applied according to the type of place, the previous choices are also read and, if any are found, they are applied to the tags found, so that progress can be halted and resumed in further sessions. An Annotator object is created (or retrieved, in case of a file that is simply being revisited in this session), which will hold the annotations of the user. """ # Store the name of the file currently opened self.current_filename = filename # Create or retrieve the Annotator and TextBuffer objects assigned to this # file. If we are asked to revert, then do not run this block if filename in self.results and not revert: # We have already seen this file, so a Annotator and a TextBuffer # objects were already constructed for it self.current_result = self.results[filename] self.current_buffer = self.current_result.buffer # When reusing these objects, we don't need to open the file again, but # simply to switch the text buffer associated to the text view. To do # this, we run the post_open_file() method, that also takes care of the # rest of the window. We don't need the pre_open_file() method because the # progress bar will never be needed. self.post_open_file() # No further processing needed return # Do not change the modified flag for this file self.backend = True # Create an empty TextBuffer to hold the text self.current_buffer = self.new_buffer() # If we are not reverting, then try to open the .ann file. if not revert: # Even if we are seeing this file for the first time, choices may already # have been made for it and saved in the disk. In that case, retrieve the # Annotator object from the file on disk. ann_filename = ANNOTATOR_TEMPLATE % clean_filename(filename) try: fh = self.get_ann_file(ann_filename) except IOError: # The file does not exist or is unreadable, so we will not use it pass else: # The file was successfully opened. Give the file descriptor to the method # that creates a new instance of the Annotator object with the information # read from the file. self.current_result = Annotator.from_saved(fh, self.current_buffer, self.current_filename) self.results[filename] = self.current_result # As above, no further processing of the file is needed; just user # interface stuff self.post_open_file() # Further changes are user-made, so they must be processed self.backend = False return # Start the Annotator object as an empty instance self.results[filename] = self.current_result = \ Annotator(self.current_buffer, self.current_filename) # Prepare for the opening process. self.pre_open_file() # Get the contents of the file as nodes f = self.get_input_file(filename) nodes = xml_utils.extract_nodes(f) # Record the number of places n_places = sum(1 for i in nodes if i[1] != "text") place_index = 0 for node in nodes: # node comes from the xml_utils.extract_nodes() function, which returns # several tuples. Each tuple describes a string of data in the file: # text, explicit places (with GeoNetID, ...) or implicit places text, type = node[0], node[1] # Types are either "text", "explicit" or "implicit", with everything # except "text" signaling a place tag is_place = type != "text" if is_place: # Store the original name found on the file original_text = text # The position of the current cursor is the place this piece of text # will be inserted on. We create a mark (whose position remains fixed # relative to its surroundings) because the rest of the text may change. # The mark is created with left gravity because more text will be added # by the method, but it must change to right_gravity later on, so that # newly added text does not get inserted in the name of the place. start_iter = self.get_cursor_iter() start_mark = self.current_buffer.create_mark(None, start_iter, True) # We want to slightly change the visible text for implicit places if type == "implicit": text = "(" + text + ARROW + ")" # Insert the text in the current position of the cursor self.current_buffer.insert_at_cursor(text) # When the node is a place, there are other things that must be done if is_place: # Put a mark on the end of the text, to signal the end of the place # name. This mark should have left gravity and remain so, because text # added after it must not modify the position of the mark in relative to # the place. end_iter = self.get_cursor_iter() end_mark = self.current_buffer.create_mark(None, end_iter, True) # As explained above, we need to recreate the start_mark with right # gravity start_iter = self.current_buffer.get_iter_at_mark(start_mark) start_mark = self.current_buffer.create_mark(None, start_iter, False) # Now we need to retrieve from the database more information about the # place. if type == "explicit": # node[2] contains triples with domain (physical or administrative), # GeoNet ID and type of location respectively possibilities = [(int(i[1]), i[2]) for i in node[2]] elif type == "implicit": # When dealing with implicit places, we only have the name. Retrieve # the possible GeoNet IDs from the database possibilities = self.find_by_name(node[0]) # We also want the municipality name of the place to be present in the # possibilities list possibilities = [(i, j, self.find_municipality(i)) for i, j in possibilities] # We now add all the information to the self.current_result object self.current_result.add(original_text, start_mark, end_mark, type, possibilities) # Increase the index of the places and update the progress bar showing # how much of the file has been gathered and processed place_index += 1 self.update_progress_bar(place_index, n_places) # Format the text to give cues about each place's status self.current_result.format_buffer() # After opening the file, several operations must be performed self.post_open_file() # Further changes are user-made, so they must be processed self.backend = False
def test(nlp, src, gen, bert=False, print_annotations=False, print_latex=False, verbose=False): if print_annotations: print("source:", src_line[:50]) print("summary:", gen_line[:50]) src = nlp(src) gen = nlp(gen) if verbose: print("clusters:", src._.coref_clusters, gen._.coref_clusters) ce = CompoundEquivalency() spe = SpeakerPronounEquivalency() spe.register(src) spe.register(gen) kg = KnowledgeGraph(nlp, use_bert=bert, equivalencies=[ce, spe], verbose=verbose) if print_annotations: annotator = Annotator(src, gen, latex=print_latex) kg.add_document(src) contained = 0 contained_bert = 0 missing = 0 missing_verb = 0 missing_actors = 0 missing_acteds = 0 contradiction = 0 contradiction_bert = 0 invalid_simplification = 0 total = 0 for token in gen: if token.pos_ == "VERB": total += 1 relation = kg.get_relation(token) r = kg.query_relation(relation) if r[0] == KnowledgeGraph.entailment: if print_annotations: print(util.format("contained", "blue", latex=print_latex), "|", relation, "|", r[1]) contained += 1 if r[0] == KnowledgeGraph.entailment_bert: if print_annotations: print( util.format("contained (BERT)", "blue", latex=print_latex), "|", relation, "|", r[1]) contained_bert += 1 if r[0] == KnowledgeGraph.contradiction_bert: if print_annotations: print( util.format("contradiction (BERT)", "red", latex=print_latex), "|", relation, "|", r[1]) contradiction_bert += 1 elif r[0] == KnowledgeGraph.missing_dependencies: missing += 1 if print_annotations: print( util.format("generic missing dependency", "yellow", latex=print_latex), "|", relation, "|", r[1]) elif r[0] == KnowledgeGraph.missing_actors: missing_actors += 1 if print_annotations: print( util.format("missing actors", "magenta", latex=print_latex), "|", relation, "|", r[1]) elif r[0] == KnowledgeGraph.missing_acteds: missing_acteds += 1 if print_annotations: print( util.format("missing acteds", "magenta", latex=print_latex), "|", relation, "|", r[1]) elif r[0] == KnowledgeGraph.missing_verb: missing_verb += 1 if print_annotations: print( util.format("missing verb", "magenta", latex=print_latex), "|", relation, "|", r[1]) elif r[0] == KnowledgeGraph.invalid_simplification: invalid_simplification += 1 if print_annotations: print( util.format("invalid simplification", "magenta", latex=print_latex), "|", relation, "|", r[1]) elif r[0] == KnowledgeGraph.contradiction: contradiction += 1 if print_annotations: print( util.format("contradiction", "red", latex=print_latex), "|", relation, "|", r[1]) if print_annotations: annotator.annotate(relation, r) if print_annotations: annotated_document, annotated_summary = annotator.annotated() print("Document:", " ".join(annotated_document)) print("Summary:", " ".join(annotated_summary)) if total == 0: return 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 return 100.0 * contained / total, \ 100.0 * contained_bert / total, \ 100.0 * missing / total, \ 100.0 * missing_verb / total, \ 100.0 * missing_actors / total, \ 100.0 * missing_acteds / total, \ 100.0 * contradiction / total, \ 100.0 * contradiction_bert / total, \ 100.0 * invalid_simplification / total