def _parse_pairs(self, txtfile): pairs = [] for x in readcsv(os.path.join(self.lfwdir, txtfile), separator='\t'): if len(x) == 3: pairs.append( (ImageCategory(category=x[0], filename=os.path.join( self.lfwdir, x[0], '%s_%04d.jpg' % (x[0], int(x[1])))), ImageCategory(category=x[0], filename=os.path.join( self.lfwdir, x[0], '%s_%04d.jpg' % (x[0], int(x[2])))))) elif len(x) == 4: pairs.append( (ImageCategory(category=x[0], filename=os.path.join( self.lfwdir, x[0], '%s_%04d.jpg' % (x[0], int(x[1])))), ImageCategory(category=x[2], filename=os.path.join( self.lfwdir, x[2], '%s_%04d.jpg' % (x[2], int(x[3])))))) else: pass return pairs
def _parse_cls(self, imageset='train'): """ImageNet Classification, imageset = {train, val}""" import xmltodict if imageset == 'train': imagesetfile = 'train_cls.txt' elif imageset == 'val': imagesetfile = 'val.txt' else: raise ValueError('unsupported imageset') csv = readcsv(os.path.join(self.datadir, 'ImageSets', 'CLS-LOC', imagesetfile), separator=' ') for (subpath, k) in csv: xmlfile = '%s.xml' % os.path.join(self.datadir, 'Annotations', 'CLS-LOC', imageset, subpath) imfile = '%s.JPEG' % os.path.join(self.datadir, 'Data', 'CLS-LOC', imageset, subpath) if os.path.exists(xmlfile): d = xmltodict.parse(open(xmlfile, 'r').read()) objlist = d['annotation']['object'] if islist( d['annotation']['object']) else [ d['annotation']['object'] ] yield ImageCategory(filename=imfile, category=objlist[0]['name']) else: yield ImageCategory(filename=imfile, category=filepath(subpath))
def _parse_loc(self, imageset='train'): """ImageNet localization, imageset = {train, val}""" import xmltodict if imageset == 'train': imagesetfile = 'train_loc.txt' elif imageset == 'val': imagesetfile = 'val.txt' else: raise ValueError('unsupported imageset') csv = readcsv(os.path.join(self.datadir, 'ImageSets', 'CLS-LOC', imagesetfile), separator=' ') for (path, k) in csv: xmlfile = '%s.xml' % os.path.join(self.datadir, 'Annotations', 'CLS-LOC', imageset, path) d = xmltodict.parse(open(xmlfile, 'r').read()) imfile = '%s.JPEG' % os.path.join(self.datadir, 'Data', 'CLS-LOC', imageset, path) objlist = d['annotation']['object'] if islist( d['annotation']['object']) else [d['annotation']['object']] for obj in objlist: yield ImageDetection(filename=imfile, category=obj['name'], xmin=int(obj['bndbox']['xmin']), ymin=int(obj['bndbox']['ymin']), xmax=int(obj['bndbox']['xmax']), ymax=int(obj['bndbox']['ymax']))
def _dataset(self, csvfile): csv = readcsv(csvfile) d_index_to_category = self.categories() vidlist = [] for row in csv[1:]: videoid = row[0] actions = row[-2] sceneloc = row[2] v = Scene(filename=os.path.join(self.datadir, '%s.mp4' % videoid), category=sceneloc) fps = v.probe()['streams'][0]['avg_frame_rate'] fps = float(fps.split('/')[0]) / float(fps.split('/')[1]) v.framerate( fps ) # FIXME: better handling of time based clips to avoid ffprobe if len(actions) > 0: for a in actions.split(';'): (category, startsec, endsec) = a.split(' ') try: v.add( Activity(category=d_index_to_category[category], startframe=float(startsec) * fps, endframe=float(endsec) * fps, attributes={'csvfile': row})) except KeyboardInterrupt: raise except Exception as e: print( '[vipy.dataset.charades]: SKIPPING invalid activity row="%s" with error "%s"' % (str(row), str(e))) vidlist.append(v) return vidlist
def vggface2_to_vggface1(self): assert os.path.exists( os.path.join(self.datadir, 'class_overlap_vgg1_2.txt') ), 'Download class_overlap_vgg1_2.txt to "%s"' % self.datadir csv = readcsv(os.path.join(self.datadir, 'class_overlap_vgg1_2.txt'), separator=' ', ignoreheader=True) return {x[0]: x[1] for x in csv}
def parse(self): """ Return a list of ImageDetections for all URLs in facescrub """ imset = [] imdir = remkdir(os.path.join(self._datadir, 'images')) csv_actors = readcsv(os.path.join(self._datadir, 'facescrub_actors.txt'), separator='\t') for (subjectname, imageid, faceid, url, bbox, sha256) in csv_actors[1:]: categoryname = subjectname.replace(' ', '_') (xmin, ymin, xmax, ymax) = bbox.split(',') imset.append( ImageDetection(url=url, filename=os.path.join( imdir, '%s_%s.jpg' % (categoryname, imageid)), category=categoryname, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, attributes={'GENDER': 'male'})) csv_actresses = readcsv(os.path.join(self._datadir, 'facescrub_actresses.txt'), separator='\t') for (subjectname, imageid, faceid, url, bbox, sha256) in csv_actresses[1:]: categoryname = subjectname.replace(' ', '_') (xmin, ymin, xmax, ymax) = bbox.split(',') imset.append( ImageDetection(url=url, filename=os.path.join( imdir, '%s_%s.jpg' % (categoryname, imageid)), category=categoryname, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, attributes={'GENDER': 'female'})) return imset
def fold(self, foldnum=1): """Return the foldnum as a list of vipy.image.Scene objects, each containing all vipy.object.Detection faces in the current image""" # fold_file = os.path.join(self.folds_dir, 'FDDB-fold-%02d.txt' % foldnum) k = 0 rows = readcsv(os.path.join(self.folds_dir, 'FDDB-fold-%02d-ellipseList.txt' % foldnum), separator=' ') imscenes = [] while k < len(rows): filename = rows[k][0] num_faces = int(rows[k + 1][0]) bbox = [rows[j] for j in range(k + 2, k + 2 + num_faces)] k = k + 2 + len(bbox) # This ignores the rotation ims = Scene(filename=os.path.join(self.rootdir, '%s.jpg' % filename), objects=[Detection('face', xcentroid=bb[3], ycentroid=bb[4], width=2 * float(bb[1]), height=2 * float(bb[0])) for bb in bbox]) imscenes.append(ims) return imscenes
def dataset(self): """Return a generator to iterate over dataset""" SCHEMA = [ 'id', 'url', 'left', 'top', 'right', 'bottom', 'pose', 'detection_score', 'curation' ] for f in txtlist(os.path.join(self.datadir, 'files')): for r in readcsv(f, separator=' '): im = ImageDetection(url=r[2], category=filebase(f), xmin=float(r[3]), ymin=float(r[4]), xmax=float(r[5]), ymax=float(r[6]), attributes=dict(zip(SCHEMA, r))) yield im
def tinyset(self, size=1000): """Return the first (size) image objects in the dataset""" outlist = [] imglist = np.random.permutation( [f[0] for f in readcsv(self._imagelist())]) for (k, f) in enumerate(imglist): print('[megaface.dataset][%d/%d]: importing "%s"' % (k, size, f)) A = self._attributes(os.path.join(self.datadir, f)) outlist = outlist + [ ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f)).boundingbox( xmin=A['bounding_box']['x'], ymin=A['bounding_box']['y'], width=A['bounding_box']['width'], height=A['bounding_box']['height']) ] if k > size: break return outlist
def take(self, n): """Randomly select n frames from dataset""" takelist = [] SCHEMA = [ 'id', 'url', 'left', 'top', 'right', 'bottom', 'pose', 'detection_score', 'curation' ] for csvfile in np.random.choice( txtlist(os.path.join(self.datadir, 'data')), n): csv = readcsv(csvfile, separator=' ') r = csv[np.random.randint(1, len(csv))] # not including header im = ImageDetection(url=r[2], category=filebase(csvfile), xmin=float(r[3]), ymin=float(r[4]), xmax=float(r[5]), ymax=float(r[6]), attributes=dict(zip(SCHEMA, r))) takelist.append(im) return takelist
def export(tsvfile, tsvnames, outdir, csvfile): csvlist = [] d_mid_to_name = {x[0]: x[1] for x in readcsv(tsvnames, separator='\t')} with open(tsvfile, 'r') as tsvF: reader = csv.reader(tsvF, delimiter='\t') i = 0 for row in reader: MID, imgSearchRank, faceID, data = row[0], row[1], row[ 4], base64.b64decode(row[-1]) saveDir = os.path.join(outdir, MID) savePath = os.path.join(saveDir, "{}-{}.jpg".format(imgSearchRank, faceID)) i += 1 csvlist.append((savePath, d_mid_to_name[MID])) if i % 100 == 0: print("[msceleb.csv][%d]: Extracting CSV (%s,%s,%s)" % (i, savePath, MID, d_mid_to_name[MID])) print(writecsv(csvlist, csvfile))
def tinyset(self, size=1000): """Return the first (size) image objects in the trainset""" outlist = [] if not os.path.exists( os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')): print('[MF2.tinyset]: generating csv file for MF2') self._trainset() imglist = np.random.permutation([ f[0] for f in readcsv( os.path.join(self.datadir, 'Megaface_Challenge_1M_disjoint_LOOSE.csv')) ]) for (k, f) in enumerate(imglist): print('[MF2.tinyset][%d/%d]: importing "%s"' % (k, size, f)) outlist = outlist + [ ImageDetection(filename=os.path.join(self.datadir, f), category=filebase(f)) ] if k > size: break return outlist
def __init__(self, videodir, repodir, contrib=False, stride=1, verbose=True, n_videos=None, d_category_to_shortlabel=None): """Parse MEVA annotations (http://mevadata.org) for KNown Facility 1 dataset into vipy.video.Scene() objects Kwiver packet format: https://gitlab.kitware.com/meva/meva-data-repo/blob/master/documents/KPF-specification-v4.pdf Inputs: -videodir=str: path to Directory containing 'drop-01' -repodir=str: path to directory containing clone of https://gitlab.kitware.com/meva/meva-data-repo -stride=int: the temporal stride in frames for importing bounding boxes, vipy will do linear interpoluation and boundary handling -n_videos=int: only return an integer number of videos, useful for debugging or for previewing dataset -contrib=bool: include the noisy contrib anntations from DIVA performers -d_category_to_shortlabel is a dictionary mapping category names to a short displayed label on the video. The standard for visualization is that tracked objects are displayed with their category label (e.g. 'Person', 'Vehicle'), and activities are labeled according to the set of objects that performing the activity. When an activity occurs, the set of objects are labeled with the same color as 'Noun Verbing' (e.g. 'Person Entering', 'Person Reading', 'Vehicle Starting') where 'Verbing' is provided by the shortlabel. This is optional, and will use the default mapping if None -verbose=bool: Parsing verbosity """ self.videodir = videodir self.repodir = repodir assert os.path.exists( os.path.join(self.videodir, 'drop-01') ), "Invalid input - videodir must contain the drop-01, drop-02 and drop-03 subdirectories. See http://mevadata.org/#getting-data" assert os.path.exists( os.path.join(self.repodir, 'annotation') ), "Invalid input - repodir must contain the clone of https://gitlab.kitware.com/meva/meva-data-repo" self._d_category_to_shortlabel = { 'person_abandons_package': 'Abandoning', 'person_closes_facility_door': 'Closing', 'person_closes_trunk': 'Closing trunk', 'person_closes_vehicle_door': 'Closing door', 'person_embraces_person': 'Hugging', 'person_enters_scene_through_structure': 'Entering', 'person_enters_vehicle': 'Entering', 'person_exits_scene_through_structure': 'Exiting', 'person_exits_vehicle': 'Exiting', 'hand_interacts_with_person': 'Using hand', 'person_carries_heavy_object': 'Carrying', 'person_interacts_with_laptop': 'Using laptop', 'person_loads_vehicle': 'Loading', 'person_transfers_object': 'Transferring', 'person_opens_facility_door': 'Opening door', 'person_opens_trunk': 'Opening trunk', 'person_opens_vehicle_door': 'Opening door', 'person_talks_to_person': 'Talking', 'person_picks_up_object': 'Picking up', 'person_purchases': 'Purchasing', 'person_reads_document': 'Reading', 'person_rides_bicycle': 'Riding', 'person_puts_down_object': 'Putting down', 'person_sits_down': 'Sitting', 'person_stands_up': 'Standing', 'person_talks_on_phone': 'Talking', 'person_texts_on_phone': 'Texting', 'person_steals_object': 'Stealing', 'person_unloads_vehicle': 'Unloading', 'vehicle_drops_off_person': 'Dropping off', 'vehicle_picks_up_person': 'Picking up', 'vehicle_reverses': 'Reversing', 'vehicle_starts': 'Starting', 'vehicle_stops': 'Stopping', 'vehicle_turns_left': 'Turning left', 'vehicle_turns_right': 'Turning right', 'vehicle_makes_u_turn': 'Turning around' } self._d_category_to_shortlabel = { k: v.lower() for (k, v) in self._d_category_to_shortlabel.items() } self._d_oldcategory_to_shortlabel = { 'Closing_Trunk': 'Closing', 'Open_Trunk': 'Opening', 'Riding': 'Riding', 'Talking': 'Talking', 'person_talks_to_person': 'Talking', 'Transport_HeavyCarry': 'Carrying', 'Unloading': 'Unloading', 'abandon_package': 'Abandoning', 'hand_interaction': 'Using Hand', 'object_transfer': 'Transferring', 'person_closes_facility_door': 'Closing', 'person_closes_vehicle_door': 'Closing', 'person_enters_through_structure': 'Entering', 'person_enters_vehicle': 'Entering', 'person_exits_through_structure': 'Exiting', 'person_exits_vehicle': 'Exiting', 'person_laptop_interaction': 'Interacting', 'person_loads_vehicle': 'Loading', 'person_opens_facility_door': 'Opening', 'person_opens_vehicle_door': 'Opening', 'person_person_embrace': 'Hugging', 'person_picks_up_object': 'Picking up', 'person_purchasing': 'Purchasing', 'person_reading_document': 'Reading', 'person_sets_down_object': 'Setting down', 'person_sitting_down': 'Sitting', 'person_standing_up': 'Standing', 'person_stands_up': 'Standing', 'specialized_talking_phone': 'Talking', 'specialized_texting_phone': 'Texting', 'theft': 'Theft', 'vehicle_drops_off_person': 'Dropping off', 'vehicle_picks_up_person': 'Picking up', 'vehicle_reversing': 'Reversing', 'vehicle_starting': 'Starting', 'vehicle_stopping': 'Stopping', 'vehicle_turning_left': 'Turning left', 'vehicle_turning_right': 'Turning right', 'vehicle_u_turn': 'Turning around' } self._d_oldcategory_to_newcategory = { k: v for (k, v) in readcsv( os.path.join(self.repodir, 'documents', 'activity-name-mapping.csv'))[1:] } d_category_to_shortlabel = d_category_to_shortlabel if d_category_to_shortlabel is not None else self._d_category_to_shortlabel d_videoname_to_path = {filebase(f): f for f in self._get_videos()} yamlfiles = zip(self._get_types_yaml(), self._get_geom_yaml(), self._get_activities_yaml()) yamlfiles = [ y for y in yamlfiles if contrib is True or 'contrib' not in y[0] ] yamlfiles = list( yamlfiles)[0:n_videos] if n_videos is not None else list(yamlfiles) if verbose: print('[vipy.dataset.meva.KF1]: Loading %d YAML files' % len(yamlfiles)) if len(yamlfiles) > 100 and vipy.globals.num_workers() == 1: print( '[vipy.dataset.meva.KF1]: This takes a while since parsing YAML files in python is painfully slow, consider calling "vipy.globals.num_workers(8)" before loading the dataset for parallel parsing' ) if vipy.globals.num_workers() > 1: from vipy.batch import Batch self._vidlist = Batch(list(yamlfiles)).map( lambda tga: self._parse_video(d_videoname_to_path, d_category_to_shortlabel, tga[0], tga[1], tga[2], stride=stride, verbose=verbose)) else: self._vidlist = [ self._parse_video(d_videoname_to_path, d_category_to_shortlabel, t, g, a, stride=stride, verbose=verbose) for (t, g, a) in yamlfiles ] self._vidlist = [v for v in self._vidlist if v is not None]
def wordnetid_to_name(self): csv = readcsv(os.path.join(self.datadir, 'identity_meta.csv'), ignoreheader=True) return {str(x[0]): str(x[1]).replace('"', '') for x in csv}
def _dataset(self, csvfile): # AVA csv format: video_id, middle_frame_timestamp, scaled_person_box (xmin, ymin, xmax, ymax), action_id, person_id # video_id: YouTube identifier # middle_frame_timestamp: in seconds from the start of the YouTube. # person_box: top-left (x1, y1) and bottom-right (x2,y2) normalized with respect to frame size, where (0.0, 0.0) corresponds to the top left, and (1.0, 1.0) corresponds to bottom right. # action_id: identifier of an action class, see ava_action_list_v2.2.pbtxt # person_id: a unique integer allowing this box to be linked to other boxes depicting the same person in adjacent frames of this video. assert self._isdownloaded( ), "Dataset not downloaded. download() first or manually download '%s' into '%s'" % ( URL, self.datadir) csv = readcsv(csvfile) d_videoid_to_rows = groupbyasdict(csv, lambda x: x[0]) vidlist = [] d_category_to_index = self.categories() d_index_to_category = {v: k for (k, v) in d_category_to_index.items()} for (k_video, (video_id, rowlist)) in enumerate(d_videoid_to_rows.items()): url = 'https://www.youtube.com/watch?v=%s' % video_id print( '[vipy.dataset.ava][%d/%d]: Parsing "%s" with %d activities' % (k_video, len(d_videoid_to_rows), url, len(rowlist))) startframe = 30 * min([float(x[1]) for x in rowlist]) endframe = 30 * (max([float(x[1]) for x in rowlist]) + 1.5) framerate = 30000 / 1001.0 # FIXME: is this correct in general, or do we need to grab this from ffprobe? v = vipy.video.Scene(url=url, filename=os.path.join(self.datadir, video_id), startframe=startframe, endframe=endframe, framerate=framerate) # Download or skip if not v.isdownloaded(): print( '[vipy.dataset.ava][%d/%d]: Downloading "%s" to get (width, height) required for AVA bounding boxes' % (k_video, len(d_videoid_to_rows), url)) v.download(ignoreErrors=True) if not v.isdownloaded(): print( '[vipy.dataset.ava][%d/%d]: Download failed - SKIPPING' % (k_video, len(d_videoid_to_rows))) continue (height, width) = v.shape() # Tracks are "actor_id" across the video tracks = groupbyasdict(rowlist, lambda x: x[7]) d_tracknum_to_track = {} for (tracknum, tracklist) in tracks.items(): (keyframes, boxes) = zip(*[((float(x[1]) * framerate) - startframe, BoundingBox(xmin=width * float(x[2]), ymin=height * float(x[3]), xmax=width * float(x[4]), ymax=height * float(x[5]))) for x in tracklist]) t = Track(keyframes=keyframes, boxes=boxes, category=tracknum) d_tracknum_to_track[tracknum] = t v.add(t) # Every row is a separate three second long activity centered at startsec involving one actor for (video_id, startsec, xmin, ymin, xmax, ymax, activity_id, actor_id) in rowlist: t = d_tracknum_to_track[actor_id] act_startframe = (float(startsec) * framerate) - startframe try: a = Activity( startframe=max( 0, int(np.round( (act_startframe - 1.5 * framerate)))), endframe=int( np.round((act_startframe + 1.5 * framerate))), category=d_index_to_category[int(activity_id)], tracks={t.id(): t}) v.add(a) except KeyboardInterrupt: raise except Exception as e: print( '[vipy.dataset.ava]: actor_id=%s, activity_id=%s, video_id=%s - SKIPPING with error "%s"' % (actor_id, activity_id, video_id, str(e))) vidlist.append(v) return vidlist
def subjectid(self): return {k:v for (k,v) in readcsv(os.path.join(self.datadir, 'names.txt'), separator=' ')}
def subjects(self): (subjectid, subjectname) = zip(*readcsv(os.path.join(self.datadir, 'names.txt'), separator=' ')) return subjectname
def _parse(self): outlist = [] id2name = {k:v for (k,v) in readcsv(os.path.join(self.datadir, 'names.txt'), separator=' ')} for d in dirlist(self.datadir): outlist = outlist + [ImageDetection(filename=imfile, category=id2name[str(filebase(d))], xmin=13, ymin=13, xmax=250 - 13, ymax=250 - 13) for imfile in imlist(d)] return outlist