def export(outdir=None, clean=False): # Unpack dataset cache = Cache(cacheroot=outdir, subdir=VISET) outfile = cache.abspath('%s.csv' % VISET) if clean: cache.clean() elif os.path.isfile(outfile): print '[bobo.viset.kthactions]: exporting "%s"' % outfile return outfile print '[bobo.viset.kthactions][WARNING]: downloads will not show percent progress since content length is unknown' for (url, label, sha1) in zip(URLS, LABELS, SHA1): cache.unpack(cache.get(url, sha1), cache.abspath(label), cleanup=False) # Check for frame export utility #if not isexe('ffmpeg'): # raise IOError('[bobo.viset.kthactions]: ffmpeg not found on path') # Video list with open(outfile, 'wb') as csvfile: f = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) for (idx_category, category) in enumerate(os.listdir(cache.root())): if os.path.isdir(os.path.join(cache.root(), category)): for (idx_video, filename) in enumerate(os.listdir(os.path.join(cache.root(), category))): [avibase,ext] = os.path.splitext(filename) if ext == '.avi': imdir = cache.abspath(os.path.join(category, avibase)) print '[bobo.viset.kthactions]: exporting "%s" to "%s"' % (filename, imdir) f.writerow([os.path.join(category, avibase, 'im_%08d.png'), category]); cmd = "ffmpeg -i \'%s\' %s/im_%%08d.png &> /dev/null" % (os.path.join(cache.root(), category, filename), imdir) remkdir(imdir) if os.system(cmd) != 0: raise IOError('Error running ffmpeg') return outfile
def youtube(tag, n_pages=1, outdir=None): url = 'https://www.youtube.com/results?search_query=%s&page=%d' vidlist = [] for k in range(0, n_pages): user_agent = random.choice(common_user_agents) headers = {'User-Agent':user_agent} search_request = urllib2.Request(url % (tag.replace(' ','+'), k+1), None, headers) search_results = urllib2.urlopen(search_request) search_data = search_results.read() datalist = search_data.split('href="/watch?') vidlist.extend(['https://www.youtube.com/watch?%s' % vid.split('"')[0] for vid in datalist if 'DOCTYPE' not in vid.split('"')[0]]) vidlist = list(set(vidlist)) # unique if outdir is not None: download(vidlist, os.path.join(remkdir(outdir), 'youtube_'+tofilename(tag)+'_%04d.mp4')) return(vidlist)
def ustream(tag, n_pages=1, outdir=None): url = 'http://www.ustream.tv/search?q=%s' vidlist = [] for k in range(0, 1): user_agent = random.choice(common_user_agents) headers = {'User-Agent':user_agent} search_request = urllib2.Request(url % (tag.replace(' ','+')), None, headers) search_results = urllib2.urlopen(search_request) search_data = search_results.read() datalist = search_data.split('href="/recorded/') vidlist.extend(['http://www.ustream.tv/recorded/%s' % vid.split('"')[0] for vid in datalist if 'DOCTYPE' not in vid.split('"')[0]]) vidlist = list(set(vidlist)) # unique print vidlist if outdir is not None: download(vidlist, os.path.join(remkdir(outdir), 'ustream_'+tofilename(tag)+'_%04d.mp4')) return(vidlist)
def frames(viddir): for v in videolist(viddir): (outdir, ext) = os.path.splitext(v) cmd = "ffmpeg -i \'%s\' %s/%%08d.png &> /dev/null" % (v, remkdir(outdir)) print '[bobo.viset.videodir]: exporting frames from "%s" to "%s"' % (v, outdir) os.system(cmd)
def setroot(self, path): self._cacheroot = path remkdir(self._cacheroot)
def frames(viddir): for v in videolist(viddir): (outdir, ext) = os.path.splitext(v) cmd = 'ffmpeg -r 25 -i \'%s\' -qscale:v 2 -vf "scale=-1:240" %s/%%08d.jpg &> /dev/null' % (v, remkdir(outdir)) print '[bobo.viset.videodir]: exporting frames from "%s" to "%s"' % (v, outdir) os.system(cmd)