def elan_to_df (filename): try: eaf = pympi.Eaf (filename,'utf-8') tiers = list (eaf.get_tier_names()) except: print (filename) for idx, name in enumerate(tiers): data = eaf.get_annotation_data_for_tier (name) if name != 'default': if name.startswith('Parent'): print (data) ''' try: print (data, name) except: im,jm,km=data[1] continue ''' #if name == 'Parent': # print (data) #print (tiers) return 0
def extract_videos_from_annotations_colab(video_name, eaf_file_name, gloss_list): """ Function to extract videos from eaf annotations. Additionally it creates folders with the extracted frames for each video. """ def check_folders(gl_name): directory1 = "openpose/" + gl_name + "/" if not os.path.exists(directory1): os.makedirs(directory1) file = pympi.Eaf(file_path=eaf_file_name) tier_names = file.get_tier_names() for tier_name in tier_names: annotations = file.get_annotation_data_for_tier(tier_name) count = 0 for annotation in annotations: for gloss in gloss_list: if annotation[2] == gloss: start = annotation[0] end = annotation[1] print(start / 1000, end / 1000) check_folders(gloss) ffmpeg_extract_subclip(video_name, start / 1000, end / 1000, targetname="openpose/" + str(gloss) + "/" + "%#05d.mp4" % (count + 1)) # Comment next line if you don't want to extract the frames for each video # video_to_frames("Data/"+str(gloss)+"/Videos/"+"%#05d.mp4" % (count+1), "Data/"+str(gloss)+"/"+"%#05d" % (count+1) ) count = count + 1 if count == 0: print("No annotation found with this name")
def extract_one(self): options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog path = QFileDialog.getExistingDirectory( parent=self, caption='Select directory to save extracted video', ) if path: print(path) selected_file = pympi.Eaf(file_path=self.labl.text()) annotations2 = selected_file.get_annotation_data_for_tier( self.all_label.text()) video_name = self.video_label.text() for annotation in annotations2: # print(self.ann_label.text()) if annotation[2] == self.ann_label.text(): start = annotation[0] end = annotation[1] # print(start/1000,end/1000) ffmpeg_extract_subclip(video_name, start / 1000, end / 1000, targetname=path + '/' + str(self.ann_label.text()) + ".mp4")
def to_print(self, text): self.all_label.setText(str(text)) self.all_label.adjustSize() self.all_label.move(200, 150) self.all_label.hide() # add all the annotations to the other combo box my_file = pympi.Eaf(file_path=self.labl.text()) my_annotations = my_file.get_annotation_data_for_tier(str(text)) self.combo_annotations.clear() for annotatio in my_annotations: self.combo_annotations.addItem(str(annotatio[2]))
def extract_videos_from_annotations(root, gloss_list): """ Function to extract videos from eaf annotations. Additionally it creates folders with the extracted frames for each video. Specify the path of the eaf file and the gloss list. ex. ("./Raw_videos/original_video",["NS", "1H", "2H"]) Make sure the eaf file has the same name as the video """ def check_folders(gl_name): directory1 = "./Data/" + gl_name + "/Videos/" if not os.path.exists(directory1): os.makedirs(directory1) # Find the eaf file cwd = os.getcwd() root = str(cwd) pattern = "*.eaf" for root, dirs, files in os.walk(root, topdown=False): for name in files: if fnmatch(name, pattern): video_name = re.sub('\.eaf$', '', name) + ".mp4" print(video_name) video_name = root + "/" + video_name file = pympi.Eaf(file_path=root + "/" + name) tier_names = file.get_tier_names() for tier_name in tier_names: annotations = file.get_annotation_data_for_tier(tier_name) count = 0 for annotation in annotations: for gloss in gloss_list: if annotation[2] == gloss: start = annotation[0] end = annotation[1] print(start / 1000, end / 1000) check_folders(gloss) ffmpeg_extract_subclip( video_name, start / 1000, end / 1000, targetname="Data/" + str(gloss) + "/Videos/" + "%#05d.mp4" % (count + 1)) # Comment next line if you don't want to extract the frames for each video video_to_frames( "Data/" + str(gloss) + "/Videos/" + "%#05d.mp4" % (count + 1), "Data/" + str(gloss) + "/" + "%#05d" % (count + 1)) count = count + 1 if count == 0: print("No annotation found with this name")
def process(file, aclew_data, t=5, start=30, n=5): global selected length = int(float(aclew_data.length_of_recording)) eaf = pympi.Eaf(file) existing_nums = [ int(x) for _, _, x in eaf.get_annotation_data_for_tier("code_num") ] existing_nums.sort() last_n = existing_nums[-1] new_n = last_n + 1 existing = [(x / 60000, y / 60000) for x, y, _ in eaf.get_annotation_data_for_tier('context')] minute_range = range(start, length - t) shuffle(minute_range) i = 0 for x in minute_range: if i == n: break if not overlaps(x, existing, t): existing.append((x, x + 5)) i += 1 rand_ints = list(range(new_n, new_n + n)) shuffle(rand_ints) for x in zip(existing[-n:], rand_ints): ts = ((x[0][0] + 2) * 60000), ((x[0][1] - 1) * 60000) eaf.add_annotation("code", ts[0], ts[1]) eaf.add_annotation("code_num", ts[0], ts[1], value=str(x[1])) eaf.add_annotation("on_off", ts[0], ts[1], value="{}_{}".format(ts[0], ts[1])) eaf.add_annotation("context", ts[0] - 120000, ts[1] + 60000) eaf.add_tier("notes") selected = selected.append( { 'aclew_id': aclew_data.aclew_id, 'corpus': aclew_data.corpus, 'clip_num': x[1], 'onset': ts[0], 'offset': ts[1] }, ignore_index=True) eaf.to_file(os.path.join(output_dir, os.path.basename(file)))
def on_click(self): options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog fileName, _ = QFileDialog.getOpenFileName( None, "Choose a file", "", "All Files (*);;Python Files (*.py)", options=options) if fileName: self.combo.clear() self.labl.setText(str(fileName)) self.labl.adjustSize() self.labl.move(190, 16) self.file_eaf = pympi.Eaf(file_path=fileName) self.tier_names = self.file_eaf.get_tier_names() for tier_name in self.tier_names: self.combo.addItem(str(tier_name))
def all_annotations(self): # print(self.all_label.text()) options = QFileDialog.Options() options |= QFileDialog.DontUseNativeDialog path = QFileDialog.getExistingDirectory( parent=self, caption='Select directory to save extracted videos', ) if path: print(path) selected_file = pympi.Eaf(file_path=self.labl.text()) annotations = selected_file.get_annotation_data_for_tier( self.all_label.text()) video_name = self.video_label.text() count = 0 for annotation in annotations: # t1 = annotation[0]/1000 # t2 = annotation[1]/1000 # input_video_path = video_name # output_video_path = path+'/'+str(annotation[0])+".mp4" # print(output_video_path) # with VideoFileClip(input_video_path) as video: # new = video.subclip(t_start= t1) # new.write_videofile(filename=output_video_path, audio_codec='aac') start = annotation[0] end = annotation[1] print(start / 1000, end / 1000) ffmpeg_extract_subclip(video_name, start / 1000, end / 1000, targetname=path + '/' + str(annotation[2]) + '_' + str(count) + ".mp4") # Comment next line if you don't want to extract the frames for each video # video_to_frames("Data/"+str(gloss)+"/Videos/"+"%#05d.mp4" % (count+1), "Data/"+str(gloss)+"/"+"%#05d" % (count+1) ) count = count + 1 print("FINISHED")
out.write(frame) # Write out frame to video cv2.imshow('video', frame) if (cv2.waitKey(1) & 0xFF) == ord('q'): # Hit `q` to exit break # Release everything if job is finished out.release() cv2.destroyAllWindows() print("The output video is {}".format(output)) # Create annotations based on predictions array x = pympi.Eaf(author="Manolis") x.add_tier(tier_id="tier1") p = np.asarray(predictions_array) s = np.asarray(p) size = s.size # print(size) # print(size) st = 0 end = 0 arr = [] j = 0 i = 0 # master_boolean = True # print("strarting values: " + str(st)) while i < size - 1:
import pympi import re elan_file = pympi.Eaf("kpv_izva20150705-02-b.eaf") tiers = elan_file.get_tier_ids_for_linguistic_type('orthT') utterances = [] for tier in tiers: annotations = elan_file.get_annotation_data_for_tier(tier) sent = [] for a in annotations: if not re.findall(r"[\.!?…]$", a[2]): sent.append(a) else: sent.append(a) utterances.append(sent) sent = [] data = [] for u in utterances:
def __init__(self, filename, config): self.__document = pympi.Eaf() self.__filename = filename self.__overwrite = bool(config.get('overwrite-output', False))
def check_folders(): directory1 = "./Extracted_videos" if not os.path.exists(directory1): os.makedirs(directory1) root = './' pattern = "*.eaf" for root, dirs, files in os.walk(root, topdown=False): for name in files: if fnmatch(name, pattern): if args.v == "default": video_name = re.sub('\.eaf$', '', name)+".mp4" else: video_name = args.v file = pympi.Eaf(file_path=name) tier_names = file.get_tier_names() gloss = args.g check_folders() for tier_name in tier_names: annotations = file.get_annotation_data_for_tier(tier_name) count = 0 for annotation in annotations: if annotation[2] == gloss: start = annotation[0] end = annotation[1] print(start/1000,end/1000) ffmpeg_extract_subclip(video_name, start/1000, end/1000, targetname="Extracted_videos/"+"%#05d.mp4" % (count+1)) count = count+1 if count == 0: print("No annotation found with this name")
timestamps = timestamps[0][1:-1].split(",") if len(c) > 0: complete_transcript.insert(i, c) complete_speaker_data.insert(i, timestamps) i += 1 # /////////////////////////////////////////////////////////////////////////////////////////////////////// # Loads ELAN file as an ELAN object # Use MasterTemplateFinal if starting new annotation, # otherwise load pre-processed file with hand-entered annotations # /////////////////////////////////////////////////////////////////////////////////////////////////////// #elan_file = "MasterTemplateFinal.etf" elan_file = "Trial_" + str(trial_num) + "_" + str(trial_date) + '/Trial_'+ str(trial_num) +".eaf" elan_obj = pympi.Eaf(elan_file) pympi.Elan.parse_eaf(elan_file, elan_obj) # /////////////////////////////////////////////////////////////////////////////////////////////////////// # Make sure the lines are clear and fix the timestamps to the correct time # Convert the IBM transcript to the error-checked version # /////////////////////////////////////////////////////////////////////////////////////////////////////// clear_dictated_lines() correct_app_time(complete_speaker_data) transcript = correct_IBM_to_error_checked(transcript, complete_transcript) # /////////////////////////////////////////////////////////////////////////////////////////////////////// # Fill in tiers, starting with Dictated/Typed errors and Corrections # /////////////////////////////////////////////////////////////////////////////////////////////////////// for i in range(1, len(app_conversation)):