def decode_frames(d_set, src_dir, dest_dir, num_threads, num_annots, max_ratio, include_absent): # Get list of annotations # Download & extract the annotation list annotations, clips, vids = youtube_bb.parse_annotations(d_set, src_dir) # Filter out annotations with no matching video print(d_set + \ ': Filtering out last, missing, and or absent frames (if requested)...') present_annots = [] for annot in annotations: yt_id = annot[0] class_id = annot[2] obj_id = annot[4] annot_clip_path = src_dir + '/' + d_set + '/' + class_id + '/' annot_clip_name = yt_id + '+' + class_id + '+' + obj_id + '.mp4' clip_name = yt_id + '+' + class_id + '+' + obj_id # If video exists if (os.path.exists(annot_clip_path + annot_clip_name)): # If we are including all frames, or if the labeled object is present if (include_absent or (annot[5] == 'present')): # If this is not the first or last frame annot_clip = next((x for x in clips if x.name == clip_name), None) if ((int(annot_clip.stop ) != int(annot[1])) and \ (int(annot_clip.start) != int(annot[1]))): present_annots.append(annot) # Gather subset of random annotations print(d_set + ': Gathering annotations/frames to decode...') random.shuffle(present_annots) if num_annots == 0: # Convert all present annotations annot_to_convert = present_annots else: assert(len(present_annots) >= num_annots), \ "Number of frames requested exceeds number of present frames" annot_to_convert = present_annots[:num_annots] # Run frame decoding in parallel, extract frames from each video #for annot in annot_to_convert: # decode_frame(clips,annot,d_set,src_dir,dest_dir) with futures.ThreadPoolExecutor(max_workers=num_threads) as executor: fs = [executor.submit( \ decode_frame,clips,annot,max_ratio,d_set,src_dir,dest_dir) \ for annot in annot_to_convert] for i, f in enumerate(futures.as_completed(fs)): # Check for an exception in the workers. try: f.result() except Exception as exc: print('decode failed', exc) else: # Write progress to error so that it can be seen sys.stderr.write( \ "Decoded frame: {} / {} \r".format(i, len(annot_to_convert))) print(d_set + ': Finished decoding frames!') return annot_to_convert
def parse_and_sched(dl_dir='videos', num_threads=4): """Download the entire youtube-bb data set into `dl_dir`. """ # Make the download directory if it doesn't already exist check_call(['mkdir', '-p', dl_dir]) # For each of the four datasets for d_set in youtube_bb.d_sets: annotations, clips, vids = youtube_bb.parse_annotations(d_set, dl_dir) youtube_bb.sched_downloads(d_set, dl_dir, num_threads, vids)
def parse_and_sched(dl_dir='videos', num_threads=4, dl_cls_by_filter=-1, offset_min=-1, offset_max=-1, FREE_SPACE_LIMIT=-1): """Download the entire youtube-bb data set into `dl_dir`. """ # Make the download directory if it doesn't already exist check_call(['mkdir', '-p', dl_dir]) # read_csv_with_alternate_reader = True # For each of the four datasets rec_ind_glb = 0 rec_ind = -1 for d_set in youtube_bb.d_sets: offset_min_tmp = offset_min offset_max_tmp = offset_max if read_csv_with_alternate_reader: offset_min_tmp -= rec_ind_glb offset_max_tmp -= rec_ind_glb annotations, clips, vids, rig = youtube_bb.parse_annotations( d_set, dl_dir, rec_ind, offset_min_tmp, offset_max_tmp, dl_cls_by_filter, read_csv_with_alternate_reader=read_csv_with_alternate_reader) if read_csv_with_alternate_reader: rec_ind_glb += rig rec_ind = -1 offset_min_tmp = 0 offset_max_tmp = offset_max - offset_min rec_ind = youtube_bb.sched_downloads(d_set, dl_dir, num_threads, vids, rec_ind, offset_min_tmp, offset_max_tmp, FREE_SPACE_LIMIT) print("Processing finished of all datasets")
def parse_and_sched(dl_dir='videos', num_threads=4): """Download the entire youtube-bb data set into `dl_dir`. """ # Make the download directory if it doesn't already exist check_call(['if', 'not', 'exist', dl_dir, 'mkdir', dl_dir], shell=True) # For each of the four datasets for d_set in youtube_bb.d_sets: annotations, clips, vids = youtube_bb.parse_annotations(d_set, dl_dir) annot = annotations vids = [vid for vid in vids if vid.yt_id in vid_names] sys.stderr.write( \ "vids"+str(len(vids))) youtube_bb.sched_downloads(annot, d_set, dl_dir, num_threads, vids)
def parse_and_sched(dl_dir='videos', num_threads=4): """Download the entire youtube-bb data set into `dl_dir`. """ # Make the download directory if it doesn't already exist # dl_dir = os.path.join(os.getcwd(), dl_dir) check_call(['if', 'not', 'exist', dl_dir, 'mkdir', dl_dir], shell=True) # For each of the four datasets for d_set in youtube_bb.d_sets: annotations, clips, vids = youtube_bb.parse_annotations(d_set, dl_dir) print(annotations[1:10]) print(clips[1:10]) print(vids[1:10]) print((len(annotations), len(clips), len(vids))) break youtube_bb.sched_downloads(d_set, dl_dir, num_threads, vids)