def _videos_by_tags(tags): """Return a list of dicts where each dict looks something like this: {'id': 123, 'tag': 'abc123', 'Status': 'Processing', 'finished': False} And if there's no VidlySubmission the dict will just look like this: {'id': 124} The advantage of this function is that you only need to do 1 query to Vid.ly for a long list of tags. """ all_results = vidly.query(tags.keys()) video_contexts = [] for tag, event in tags.items(): video_context = { 'id': event.id, } if event.duration: video_context['duration'] = event.duration video_context['duration_human'] = show_duration(event.duration) qs = VidlySubmission.objects.filter(event=event, tag=tag) for vidly_submission in qs.order_by('-submission_time')[:1]: video_context['tag'] = tag results = all_results.get(tag, {}) video_context['status'] = results.get('Status') video_context['finished'] = results.get('Status') == 'Finished' if video_context['finished']: if not vidly_submission.finished: vidly_submission.finished = timezone.now() vidly_submission.save() if not event.archive_time: event.archive_time = timezone.now() event.save() elif results.get('Status') == 'Error': if not vidly_submission.errored: vidly_submission.errored = timezone.now() vidly_submission.save() else: video_context['estimated_time_left'] = ( vidly_submission.get_estimated_time_left() ) break video_contexts.append(video_context) return video_contexts
def _videos_by_tags(tags): """Return a list of dicts where each dict looks something like this: {'id': 123, 'tag': 'abc123', 'Status': 'Processing', 'finished': False} And if there's no VidlySubmission the dict will just look like this: {'id': 124} The advantage of this function is that you only need to do 1 query to Vid.ly for a long list of tags. """ all_results = vidly.query(tags.keys()) video_contexts = [] for tag, event in tags.items(): video_context = { 'id': event.id, } if event.duration: video_context['duration'] = event.duration video_context['duration_human'] = show_duration(event.duration) qs = VidlySubmission.objects.filter(event=event, tag=tag) for vidly_submission in qs.order_by('-submission_time')[:1]: video_context['tag'] = tag results = all_results.get(tag, {}) video_context['status'] = results.get('Status') video_context['finished'] = results.get('Status') == 'Finished' if video_context['finished']: if not vidly_submission.finished: vidly_submission.finished = timezone.now() vidly_submission.save() if not event.archive_time: event.archive_time = timezone.now() event.save() elif results.get('Status') == 'Error': if not vidly_submission.errored: vidly_submission.errored = timezone.now() vidly_submission.save() else: video_context['estimated_time_left'] = ( vidly_submission.get_estimated_time_left()) break video_contexts.append(video_context) return video_contexts
def fetch_duration(event, save=False, save_locally=False, verbose=False, use_https=True, video_url=None): """return number of seconds or None""" if video_url: assert not save_locally else: # The 'filepath' is only not None if 'save_locally' is true video_url, filepath = get_video_url(event, use_https, save_locally, verbose=verbose) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError('%s has a 0 byte Content-Length' % video_url) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError('%s is a text/html document' % video_url) try: ffmpeg_location = getattr(settings, 'FFMPEG_LOCATION', 'ffmpeg') if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = wrap_subprocess(command) t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: # Because it's not safe to keep the event object open too # long, as it might have been edited in another thread, # just do an update here. Event.objects.filter(id=event.id).update(duration=seconds) if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def get_video_url(event, use_https, save_locally, verbose=False): if event.upload: return event.upload.url, None elif event.template and 'Vid.ly' in event.template.name: assert event.template_environment.get('tag'), "No Vid.ly tag value" token_protected = event.privacy != Event.PRIVACY_PUBLIC hd = False qs = (VidlySubmission.objects.filter(event=event).filter( tag=event.template_environment['tag'])) for submission in qs.order_by('-submission_time')[:1]: hd = submission.hd token_protected = submission.token_protection tag = event.template_environment['tag'] video_url = '%s/%s?content=video&format=' % ( settings.VIDLY_BASE_URL, tag, ) if hd: video_url += 'hd_mp4' else: video_url += 'mp4' if token_protected: video_url += '&token=%s' % vidly.tokenize(tag, 60) elif event.template and 'Ogg Video' in event.template.name: assert event.template_environment.get('url'), "No Ogg Video url value" video_url = event.template_environment['url'] elif event.template and 'YouTube' in event.template.name: assert event.template_environment.get('id'), "No YouTube ID value" video_url = 'https://www.youtube.com/watch?v={}'.format( event.template_environment['id']) return video_url, None else: raise AssertionError("Not valid template") response = requests.head(video_url) _count = 0 while response.status_code in (302, 301): video_url = response.headers['Location'] response = requests.head(video_url) _count += 1 if _count > 5: # just too many times break response = requests.head(video_url) assert response.status_code == 200, response.status_code if verbose: # pragma: no cover if response.headers.get('Content-Length'): print("Content-Length: %s" % (filesizeformat(int(response.headers['Content-Length'])), )) if not use_https: video_url = video_url.replace('https://', 'http://') if save_locally: # store it in a temporary location dir_ = tempfile.mkdtemp('videoinfo') if 'Vid.ly' in event.template.name: filepath = os.path.join(dir_, '%s.mp4' % tag) else: filepath = os.path.join( dir_, os.path.basename(urlparse.urlparse(video_url).path)) t0 = time.time() _download_file(video_url, filepath) t1 = time.time() if verbose: # pragma: no cover seconds = int(t1 - t0) print "Took", show_duration(seconds, include_seconds=True), print "to download" video_url = filepath else: filepath = None return video_url, filepath
def fetch_screencapture( event, save=False, save_locally=False, verbose=False, use_https=True, import_=True, import_if_possible=False, video_url=None, set_first_available=False, import_immediately=False, timestamps=None, callback=None, ): """return number of files that were successfully created or None""" assert event.duration, "no duration" # When you set `import_` to False, it creates the JPEGs and leaves # them there in a predictable location (so they can be swept up # by import_screencaptures later). # However, if you want to continue doing that plus at least # try to import the created pictures, then set # `import_if_possible=True`. # Then, if the import fails, the pictures are still there to be # picked up by the import_screencaptures() later. if import_if_possible: import_ = False if video_url: assert not save_locally else: video_url, filepath = get_video_url( event, use_https, save_locally, verbose=verbose, ) if import_: save_dir = tempfile.mkdtemp('screencaptures-%s' % event.id) else: # Instead of importing we're going to put them in a directory # that does NOT get deleted when it has created the screecaps. save_dir = os.path.join(tempfile.gettempdir(), settings.SCREENCAPTURES_TEMP_DIRECTORY_NAME) if not os.path.isdir(save_dir): os.mkdir(save_dir) directory_name = '%s_%s' % (event.id, event.slug) save_dir = os.path.join(save_dir, directory_name) if not os.path.isdir(save_dir): os.mkdir(save_dir) def format_time(seconds): m = seconds / 60 s = seconds % 60 h = m / 60 m = m % 60 return '%02d:%02d:%02d' % (h, m, s) # First, assume we will delete the temporary save_dir. # This is toggled if an exception happens in importing # the pictures. delete_save_dir = True try: if verbose: # pragma: no cover print "Video duration:", print show_duration(event.duration, include_seconds=True) ffmpeg_location = getattr(settings, 'FFMPEG_LOCATION', 'ffmpeg') if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" incr = float(event.duration) / settings.SCREENCAPTURES_NO_PICTURES seconds = 0 created = 0 t0 = time.time() number = 0 output_template = os.path.join(save_dir, 'screencap-%02d.jpg') all_out = [] all_err = [] def extract_frame(seconds, save_name): command = [ ffmpeg_location, '-ss', format_time(seconds), '-i', video_url, '-vframes', '1', save_name, ] if verbose: # pragma: no cover print ' '.join(command) out, err = wrap_subprocess(command) all_out.append(out) all_err.append(err) if timestamps is not None: for timestamp in timestamps: extract_frame(timestamp, output_template % timestamp) if callback: created = _callback_files( callback, _get_files(save_dir), delete_opened_files=True, ) # else: # raise NotImplementedError if import_immediately: created += _import_files( event, _get_files(save_dir), delete_opened_files=True, timestamp=timestamp, ) else: while seconds < event.duration: number += 1 extract_frame(seconds, output_template % number) seconds += incr if import_immediately: created += _import_files( event, _get_files(save_dir), set_first_available=set_first_available, delete_opened_files=True, ) # If 'set_first_available' *was* true, it should have at # that point set the picture for that first one. if created: set_first_available = False t1 = time.time() files = _get_files(save_dir) if verbose: # pragma: no cover print("Took %.2f seconds to extract %d pictures" % (t1 - t0, len(files))) if import_ or import_if_possible: if (verbose and not files and not import_immediately): # pragma: no cover print "No output. Error:" print '\n'.join(all_err) try: created += _import_files( event, files, set_first_available=set_first_available) except Exception: delete_save_dir = False raise if verbose: # pragma: no cover print "Created", created, "pictures" # end of this section, so add some margin print "\n" return created else: if verbose: # pragma: no cover print "Created Temporary Directory", save_dir print '\t' + '\n\t'.join(os.listdir(save_dir)) return len(files) finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath)) if (delete_save_dir and os.path.isdir(save_dir) and (import_ or import_if_possible)): if verbose: # pragma: no cover print "Deleting temporary directory" print save_dir shutil.rmtree(save_dir)
def test_show_duration_long_format(self): result = show_duration(60 * 60) eq_(result, '1 hour') result = show_duration(60) eq_(result, '1 minute') result = show_duration(2 * 60 * 60 + 10 * 60) eq_(result, '2 hours 10 minutes') result = show_duration(1 * 60 * 60 + 1 * 60) eq_(result, '1 hour 1 minute') result = show_duration(1 * 60 * 60 + 1 * 60 + 1) eq_(result, '1 hour 1 minute') result = show_duration(2 * 60 * 60 + 2 * 60) eq_(result, '2 hours 2 minutes') result = show_duration(1 * 60 * 60 + 1 * 60 + 1, include_seconds=True) eq_(result, '1 hour 1 minute 1 second') result = show_duration(1 * 60 * 60 + 1 * 60 + 2, include_seconds=True) eq_(result, '1 hour 1 minute 2 seconds') result = show_duration(49) eq_(result, '49 seconds') result = show_duration(66.61) eq_(result, '1 minute') result = show_duration(66.61, include_seconds=True) eq_(result, '1 minute 6 seconds') result = show_duration(0, include_seconds=True) eq_(result, '0 seconds') result = show_duration(0) eq_(result, '0 seconds')
def test_show_duration_long_format(self): result = show_duration(60 * 60) eq_(result, '1 hour') result = show_duration(60) eq_(result, '1 minute') result = show_duration(2 * 60 * 60 + 10 * 60) eq_(result, '2 hours 10 minutes') result = show_duration(1 * 60 * 60 + 1 * 60) eq_(result, '1 hour 1 minute') result = show_duration(1 * 60 * 60 + 1 * 60 + 1) eq_(result, '1 hour 1 minute') result = show_duration(2 * 60 * 60 + 2 * 60) eq_(result, '2 hours 2 minutes') result = show_duration(1 * 60 * 60 + 1 * 60 + 1, include_seconds=True) eq_(result, '1 hour 1 minute 1 second') result = show_duration(1 * 60 * 60 + 1 * 60 + 2, include_seconds=True) eq_(result, '1 hour 1 minute 2 seconds') result = show_duration(49) eq_(result, '49 seconds') result = show_duration(66.61) eq_(result, '1 minute') result = show_duration(66.61, include_seconds=True) eq_(result, '1 minute 6 seconds') result = show_duration(0, include_seconds=True) eq_(result, '0 seconds') result = show_duration(0) eq_(result, '0 seconds')
def fetch_duration( event, save=False, save_locally=False, verbose=False, use_https=True, video_url=None ): """return number of seconds or None""" if video_url: assert not save_locally else: # The 'filepath' is only not None if 'save_locally' is true video_url, filepath = get_video_url( event, use_https, save_locally, verbose=verbose ) # Some videos might return a 200 OK on a HEAD but are corrupted # and contains nothing if not save_locally: assert '://' in video_url head = requests.head(video_url) if head.headers.get('Content-Length') == '0': # corrupt file! raise AssertionError( '%s has a 0 byte Content-Length' % video_url ) if head.headers.get('Content-Type', '').startswith('text/html'): # Not a URL to an actual file! raise AssertionError( '%s is a text/html document' % video_url ) try: ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" command = [ ffmpeg_location, '-i', video_url, ] if verbose: # pragma: no cover print ' '.join(command) t0 = time.time() out, err = wrap_subprocess(command) t1 = time.time() if verbose: # pragma: no cover print "Took", t1 - t0, "seconds to extract duration information" matches = REGEX.findall(err) if matches: found, = matches hours = int(found[0]) minutes = int(found[1]) minutes += hours * 60 seconds = int(found[2]) seconds += minutes * 60 if save: # Because it's not safe to keep the event object open too # long, as it might have been edited in another thread, # just do an update here. Event.objects.filter(id=event.id).update(duration=seconds) if verbose: # pragma: no cover print show_duration(seconds, include_seconds=True) return seconds elif verbose: # pragma: no cover print "No Duration output. Error:" print err finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath))
def get_video_url(event, use_https, save_locally, verbose=False): if event.upload: return event.upload.url, None elif event.template and 'Vid.ly' in event.template.name: assert event.template_environment.get('tag'), "No Vid.ly tag value" token_protected = event.privacy != Event.PRIVACY_PUBLIC hd = False qs = ( VidlySubmission.objects .filter(event=event) .filter(tag=event.template_environment['tag']) ) for submission in qs.order_by('-submission_time')[:1]: hd = submission.hd token_protected = submission.token_protection tag = event.template_environment['tag'] video_url = '%s/%s?content=video&format=' % ( settings.VIDLY_BASE_URL, tag, ) if hd: video_url += 'hd_mp4' else: video_url += 'mp4' if token_protected: video_url += '&token=%s' % vidly.tokenize(tag, 60) elif event.template and 'Ogg Video' in event.template.name: assert event.template_environment.get('url'), "No Ogg Video url value" video_url = event.template_environment['url'] elif event.template and 'YouTube' in event.template.name: assert event.template_environment.get('id'), "No YouTube ID value" video_url = 'https://www.youtube.com/watch?v={}'.format( event.template_environment['id'] ) return video_url, None else: raise AssertionError("Not valid template") response = requests.head(video_url) _count = 0 while response.status_code in (302, 301): video_url = response.headers['Location'] response = requests.head(video_url) _count += 1 if _count > 5: # just too many times break response = requests.head(video_url) assert response.status_code == 200, response.status_code if verbose: # pragma: no cover if response.headers.get('Content-Length'): print ( "Content-Length: %s" % ( filesizeformat(int(response.headers['Content-Length'])), ) ) if not use_https: video_url = video_url.replace('https://', 'http://') if save_locally: # store it in a temporary location dir_ = tempfile.mkdtemp('videoinfo') if 'Vid.ly' in event.template.name: filepath = os.path.join(dir_, '%s.mp4' % tag) else: filepath = os.path.join( dir_, os.path.basename(urlparse.urlparse(video_url).path) ) t0 = time.time() _download_file(video_url, filepath) t1 = time.time() if verbose: # pragma: no cover seconds = int(t1 - t0) print "Took", show_duration(seconds, include_seconds=True), print "to download" video_url = filepath else: filepath = None return video_url, filepath
def fetch_screencapture( event, save=False, save_locally=False, verbose=False, use_https=True, import_=True, import_if_possible=False, video_url=None, set_first_available=False, import_immediately=False, timestamps=None, callback=None, ): """return number of files that were successfully created or None""" assert event.duration, "no duration" # When you set `import_` to False, it creates the JPEGs and leaves # them there in a predictable location (so they can be swept up # by import_screencaptures later). # However, if you want to continue doing that plus at least # try to import the created pictures, then set # `import_if_possible=True`. # Then, if the import fails, the pictures are still there to be # picked up by the import_screencaptures() later. if import_if_possible: import_ = False if video_url: assert not save_locally else: video_url, filepath = get_video_url( event, use_https, save_locally, verbose=verbose, ) if import_: save_dir = tempfile.mkdtemp('screencaptures-%s' % event.id) else: # Instead of importing we're going to put them in a directory # that does NOT get deleted when it has created the screecaps. save_dir = os.path.join( tempfile.gettempdir(), settings.SCREENCAPTURES_TEMP_DIRECTORY_NAME ) if not os.path.isdir(save_dir): os.mkdir(save_dir) directory_name = '%s_%s' % (event.id, event.slug) save_dir = os.path.join(save_dir, directory_name) if not os.path.isdir(save_dir): os.mkdir(save_dir) def format_time(seconds): m = seconds / 60 s = seconds % 60 h = m / 60 m = m % 60 return '%02d:%02d:%02d' % (h, m, s) # First, assume we will delete the temporary save_dir. # This is toggled if an exception happens in importing # the pictures. delete_save_dir = True try: if verbose: # pragma: no cover print "Video duration:", print show_duration(event.duration, include_seconds=True) ffmpeg_location = getattr( settings, 'FFMPEG_LOCATION', 'ffmpeg' ) if verbose and not which(ffmpeg_location): print ffmpeg_location, "is not an executable path" incr = float(event.duration) / settings.SCREENCAPTURES_NO_PICTURES seconds = 0 created = 0 t0 = time.time() number = 0 output_template = os.path.join(save_dir, 'screencap-%02d.jpg') all_out = [] all_err = [] def extract_frame(seconds, save_name): command = [ ffmpeg_location, '-ss', format_time(seconds), '-i', video_url, '-vframes', '1', save_name, ] if verbose: # pragma: no cover print ' '.join(command) out, err = wrap_subprocess(command) all_out.append(out) all_err.append(err) if timestamps is not None: for timestamp in timestamps: extract_frame(timestamp, output_template % timestamp) if callback: created = _callback_files( callback, _get_files(save_dir), delete_opened_files=True, ) # else: # raise NotImplementedError if import_immediately: created += _import_files( event, _get_files(save_dir), delete_opened_files=True, timestamp=timestamp, ) else: while seconds < event.duration: number += 1 extract_frame(seconds, output_template % number) seconds += incr if import_immediately: created += _import_files( event, _get_files(save_dir), set_first_available=set_first_available, delete_opened_files=True, ) # If 'set_first_available' *was* true, it should have at # that point set the picture for that first one. if created: set_first_available = False t1 = time.time() files = _get_files(save_dir) if verbose: # pragma: no cover print ( "Took %.2f seconds to extract %d pictures" % ( t1 - t0, len(files) ) ) if import_ or import_if_possible: if ( verbose and not files and not import_immediately ): # pragma: no cover print "No output. Error:" print '\n'.join(all_err) try: created += _import_files( event, files, set_first_available=set_first_available ) except Exception: delete_save_dir = False raise if verbose: # pragma: no cover print "Created", created, "pictures" # end of this section, so add some margin print "\n" return created else: if verbose: # pragma: no cover print "Created Temporary Directory", save_dir print '\t' + '\n\t'.join(os.listdir(save_dir)) return len(files) finally: if save_locally: if os.path.isfile(filepath): shutil.rmtree(os.path.dirname(filepath)) if ( delete_save_dir and os.path.isdir(save_dir) and (import_ or import_if_possible) ): if verbose: # pragma: no cover print "Deleting temporary directory" print save_dir shutil.rmtree(save_dir)