def imagetuplelist(list_of_tuples_of_image_files, outdir, title='Image Visualization', imagewidth=64): """Imageset but put tuples on same row""" k_divid = 0 # Create summary page to show precomputed images outdir = remkdir(outdir) filename = os.path.join(remkdir(outdir), 'index.html') f = open(filename, 'w') f.write('<!DOCTYPE html>\n') f.write('<html>\n') f.write('<body>\n') f.write('<div id="container" style="width:2400px">\n') f.write('<div id="header">\n') f.write('<h1 style="margin-bottom:0;">Title: %s</h1><br>\n' % title) localtime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())) f.write('Summary HTML generated on %s<br>\n' % localtime) f.write('Number of Tuples: %d<br>\n' % len(list_of_tuples_of_image_files)) f.write('</div>\n') f.write('<br>\n') f.write('<hr>\n') f.write('<div id="%04d" style="float:left;">\n' % k_divid) k_divid = k_divid + 1 # Generate images and html for (k, imsrclist) in enumerate(list_of_tuples_of_image_files): f.write('<p>\n</p>\n') for imsrc in imsrclist: shutil.copyfile(imsrc, os.path.join(outdir, filetail(imsrc))) imdst = filetail(imsrc) f.write('<b>Filename: %s</b><br>\n' % imdst) f.write('<p>\n</p>\n') f.write('<br>\n') for imsrc in imsrclist: imdst = filetail(imsrc) f.write('<img src="%s" alt="image" width=%d/>' % (imdst, imagewidth)) f.write('\n<p>\n</p>\n') f.write('<hr>\n') f.write('<p>\n</p>\n') f.write('</div>\n') f.write('</body>\n') f.write('</html>\n') f.close() return filename
def __init__(self, datadir): self.datadir = remkdir(datadir) self.ytfdir = datadir if not os.path.isdir(os.path.join(self.datadir, 'frame_images_DB')): raise ValueError( 'Download YouTubeFaces dataset with "wget %s -O %s; cd %s; tar zxvf YouTubeFaces.tar.gz", and initialize with YouTubeFace(datadir="%s/YouTubeFaces")' % (URL, os.path.join(self.datadir, 'YouTubeFaces.tar.gz'), self.datadir, self.datadir))
def facebookprofile(fbid, outdir='./imgs', cleanup=True, hierarchical=False, redownload=False): if hierarchical: subdir = remkdir(os.path.join(outdir, str(int( float(fbid) / 1E4)))) # 10000 images per directory outfile = os.path.join(subdir, '%d.jpg' % int(fbid)) # outdir/1000/10000001.jpg else: outfile = os.path.join(outdir, '%d.jpg' % int(fbid)) url = "http://graph.facebook.com/picture?id=" + str(fbid) + "&width=800" if not os.path.exists(outfile) or redownload: try: print('[facebookprofile.download]: Downloading "%s" to "%s"' % (url, outfile)) user_agent = np.random.choice(common_user_agents) headers = {'User-Agent': user_agent} req = urllib.request.Request(url, None, headers) imgfile = urllib.request.urlopen(req) total_size = int( imgfile.info().getheader('Content-Length').strip()) downloaded = 0 CHUNK = 256 * 10240 with open(outfile, 'wb') as fp: while True: chunk = imgfile.read(CHUNK) downloaded += len(chunk) # print math.floor( (downloaded / total_size) * 100 ) if not chunk: break fp.write(chunk) # urllib.urlretrieve(url, outfile) s = os.path.getsize(outfile) if cleanup and (s < 11000 or s == 10626 or s == 10491): print( '[facebookprofile.download]: deleting invalid file "%s"' % outfile) os.remove(outfile) except (urllib.request.HTTPError): print('[fb_image.download]: Skipping "%s"' % (url)) except (urllib.request.URLError): print('[fb_image.download]: Skipping "%s"' % (url)) except KeyboardInterrupt: raise except: raise
def extract(tsvfile, outdir): """https://github.com/cmusatyalab/openface/blob/master/data/ms-celeb-1m/extract.py""" with open(tsvfile, 'r') as tsvF: reader = csv.reader(tsvF, delimiter='\t') i = 0 for row in reader: MID, imgSearchRank, faceID, data = row[0], row[1], row[ 4], base64.b64decode(row[-1]) saveDir = os.path.join(outdir, MID) savePath = os.path.join(saveDir, "{}-{}.jpg".format(imgSearchRank, faceID)) remkdir(saveDir) with open(savePath, 'wb') as f: f.write(data) i += 1 if i % 1000 == 0: print("Extracted {} images.".format(i))
def verbs(): """Return a list of verbs from verbnet that can be used to define a set of activities""" try_import('nltk') import nltk nltkdir = remkdir(os.path.join( os.environ['VIPY_CACHE'], 'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir() os.environ['NLTK_DATA'] = nltkdir print('[vipy.annotation.verbs]: Downloading verbnet to "%s"' % tempfile.gettempdir()) nltk.download('verbnet', tempfile.gettempdir()) from nltk.corpus import verbnet return verbnet.lemmas()
def parse(self): """ Return a list of ImageDetections for all URLs in facescrub """ imset = [] imdir = remkdir(os.path.join(self._datadir, 'images')) csv_actors = readcsv(os.path.join(self._datadir, 'facescrub_actors.txt'), separator='\t') for (subjectname, imageid, faceid, url, bbox, sha256) in csv_actors[1:]: categoryname = subjectname.replace(' ', '_') (xmin, ymin, xmax, ymax) = bbox.split(',') imset.append( ImageDetection(url=url, filename=os.path.join( imdir, '%s_%s.jpg' % (categoryname, imageid)), category=categoryname, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, attributes={'GENDER': 'male'})) csv_actresses = readcsv(os.path.join(self._datadir, 'facescrub_actresses.txt'), separator='\t') for (subjectname, imageid, faceid, url, bbox, sha256) in csv_actresses[1:]: categoryname = subjectname.replace(' ', '_') (xmin, ymin, xmax, ymax) = bbox.split(',') imset.append( ImageDetection(url=url, filename=os.path.join( imdir, '%s_%s.jpg' % (categoryname, imageid)), category=categoryname, xmin=xmin, ymin=ymin, xmax=xmax, ymax=ymax, attributes={'GENDER': 'female'})) return imset
def basic_level_categories(): """Return a list of nouns from wordnet that can be used as an initial list of basic level object categories""" try_import('nltk') import nltk nltkdir = remkdir(os.path.join( os.environ['VIPY_CACHE'], 'nltk')) if 'VIPY_CACHE' in os.environ else tempfile.gettempdir() os.environ['NLTK_DATA'] = nltkdir print( '[vipy.annotation.basic_level_categories]: Downloading wordnet to "%s"' % tempfile.gettempdir()) nltk.download('wordnet', tempfile.gettempdir()) from nltk.corpus import wordnet nouns = [] allowed_lexnames = [ 'noun.animal', 'noun.artifact', 'noun.body', 'noun.food', 'noun.object', 'noun.plant' ] for synset in list(wordnet.all_synsets('n')): if synset.lexname() in allowed_lexnames: nouns.append(str(synset.lemmas()[0].name()).lower()) nouns.sort() return nouns
def fetchjson(self): """Download JSON if not already downloaded""" if self._jsonfile is None: self._jsonfile = os.path.join( remkdir(os.environ["VIPY_CACHE"] if "VIPY_CACHE" in os.environ else tempdir()), filetail(self._jsonurl), ) if not os.path.exists(self._jsonfile): print('[pycollector.video]: Fetching "%s"' % self._jsonurl) try: vipy.downloader.s3( self._jsonurl, self._jsonfile ) # TODO - this is a problem to assume vipy user also has access to S3. We should decouple this dependency of using vipy except KeyboardInterrupt: raise except Exception as e: print( '[pycollector.video]: S3 download error "%s" - SKIPPING' % str(e)) jsonfile = None return self
def __init__(self, datadir): """Caltech256, provide a datadir='/path/to/store/caltech256' """ self.datadir = remkdir(datadir)
def __init__(self, outdir): """download URLS above to outdir, then run export()""" self.outdir = remkdir(outdir) if not self._downloaded(): print('[vipy.dataset.mnist]: downloading MNIST to "%s"' % self.outdir) self._wget()
def __init__(self, datadir): self.datadir = remkdir(datadir) if not os.path.isdir(os.path.join(self.datadir)): raise ValueError('Download AFLW dataset manually to "%s" ' % self.datadir)
def __init__(self, datadir): """AVA, provide a datadir='/path/to/store/ava' """ self.datadir = remkdir(datadir) if not self._isdownloaded(): self.download()
def __init__(self, datadir): """Human motion dataset, provide a datadir='/path/to/store/hmdb' """ self.datadir = remkdir(datadir)
def __init__(self, datadir='/proj/janus3/megaface'): self.datadir = remkdir(datadir)
def __init__(self, datadir): """Provide datadir=/path/to/ILSVRC2012""" self.datadir = remkdir(datadir)
def __init__(self, datadir): """ETHZShapes, provide a datadir='/path/to/store/ethzshapes' """ self.datadir = remkdir(datadir)
def __init__(self, datadir): self.datadir = remkdir(datadir)
def __init__(self, datadir='/proj/janus3/vgg-face/curated/vgg_face_dataset'): self.datadir = remkdir(datadir) self._subjects = None
def __init__(self, datadir): """Activitynet, provide a datadir='/path/to/store/activitynet' """ self._url = URL self.datadir = remkdir(datadir) if not self._isdownloaded(): self.download()
def __init__(self, datadir): """Kinetics, provide a datadir='/path/to/store/kinetics' """ self.datadir = remkdir(datadir) self._url = 'https://storage.googleapis.com/deepmind-media/Datasets/kinetics400.tar.gz' self._name = 'kinetics400'
def __init__(self, datadir): """Datadir contains the unpacked contents of LFW from $URL -> /path/to/lfw""" self.lfwdir = remkdir(os.path.join(remkdir(datadir), 'lfw', 'lfw'))
def __init__(self, datadir): """KTH ACtions dataset, provide a datadir='/path/to/store/kthactions' """ self.datadir = remkdir(datadir)