def setUpClass(cls): super(TestSaveSubsToStore, cls).setUpClass() cls.course = CourseFactory.create( org=cls.org, number=cls.number, display_name=cls.display_name) cls.subs = { 'start': [100, 200, 240, 390, 1000], 'end': [200, 240, 380, 1000, 1500], 'text': [ 'subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5' ] } cls.subs_id = str(uuid4()) filename = 'subs_{0}.srt.sjson'.format(cls.subs_id) cls.content_location = StaticContent.compute_location(cls.course.id, filename) # incorrect subs cls.unjsonable_subs = {1} # set can't be serialized cls.unjsonable_subs_id = str(uuid4()) filename_unjsonable = 'subs_{0}.srt.sjson'.format(cls.unjsonable_subs_id) cls.content_location_unjsonable = StaticContent.compute_location(cls.course.id, filename_unjsonable)
def setUp(self): super(TestSaveSubsToStore, self).setUp() self.course = CourseFactory.create( org=self.org, number=self.number, display_name=self.display_name) self.subs = { 'start': [100, 200, 240, 390, 1000], 'end': [200, 240, 380, 1000, 1500], 'text': [ 'subs #1', 'subs #2', 'subs #3', 'subs #4', 'subs #5' ] } self.subs_id = str(uuid4()) filename = 'subs_{0}.srt.sjson'.format(self.subs_id) self.content_location = StaticContent.compute_location(self.course.id, filename) self.addCleanup(self.clear_subs_content) # incorrect subs self.unjsonable_subs = set([1]) # set can't be serialized self.unjsonable_subs_id = str(uuid4()) filename_unjsonable = 'subs_{0}.srt.sjson'.format(self.unjsonable_subs_id) self.content_location_unjsonable = StaticContent.compute_location(self.course.id, filename_unjsonable) self.clear_subs_content()
def asset_index(request, org, course, name, start=None, maxresults=None): """ Display an editable asset library org, course, name: Attributes of the Location for the item to edit :param start: which index of the result list to start w/, used for paging results :param maxresults: maximum results """ location = get_location_and_verify_access(request, org, course, name) upload_asset_callback_url = reverse('upload_asset', kwargs={ 'org': org, 'course': course, 'coursename': name }) course_module = modulestore().get_item(location) course_reference = StaticContent.compute_location(org, course, name) if maxresults is not None: maxresults = int(maxresults) start = int(start) if start else 0 assets = contentstore().get_all_content_for_course( course_reference, start=start, maxresults=maxresults, sort=[('uploadDate', DESCENDING)] ) else: assets = contentstore().get_all_content_for_course( course_reference, sort=[('uploadDate', DESCENDING)] ) asset_json = [] for asset in assets: asset_id = asset['_id'] asset_location = StaticContent.compute_location(asset_id['org'], asset_id['course'], asset_id['name']) # note, due to the schema change we may not have a 'thumbnail_location' in the result set _thumbnail_location = asset.get('thumbnail_location', None) thumbnail_location = Location(_thumbnail_location) if _thumbnail_location is not None else None asset_locked = asset.get('locked', False) asset_json.append(_get_asset_json(asset['displayname'], asset['uploadDate'], asset_location, thumbnail_location, asset_locked)) return render_to_response('asset_index.html', { 'context_course': course_module, 'asset_list': json.dumps(asset_json), 'upload_asset_callback_url': upload_asset_callback_url, 'update_asset_callback_url': reverse('update_asset', kwargs={ 'org': org, 'course': course, 'name': name }) })
def asset_index(request, org, course, name): """ Display an editable asset library org, course, name: Attributes of the Location for the item to edit """ location = get_location_and_verify_access(request, org, course, name) upload_asset_callback_url = reverse('upload_asset', kwargs={ 'org': org, 'course': course, 'coursename': name }) course_module = modulestore().get_item(location) course_reference = StaticContent.compute_location(org, course, name) assets = contentstore().get_all_content_for_course(course_reference) # sort in reverse upload date order assets = sorted(assets, key=lambda asset: asset['uploadDate'], reverse=True) if request.META.get('HTTP_ACCEPT', "").startswith("application/json"): return JsonResponse(assets_to_json_dict(assets)) asset_display = [] for asset in assets: asset_id = asset['_id'] display_info = {} display_info['displayname'] = asset['displayname'] display_info['uploadDate'] = get_default_time_display(asset['uploadDate']) asset_location = StaticContent.compute_location(asset_id['org'], asset_id['course'], asset_id['name']) display_info['url'] = StaticContent.get_url_path_from_location(asset_location) display_info['portable_url'] = StaticContent.get_static_path_from_location(asset_location) # note, due to the schema change we may not have a 'thumbnail_location' in the result set _thumbnail_location = asset.get('thumbnail_location', None) thumbnail_location = Location(_thumbnail_location) if _thumbnail_location is not None else None display_info['thumb_url'] = StaticContent.get_url_path_from_location(thumbnail_location) if thumbnail_location is not None else None asset_display.append(display_info) return render_to_response('asset_index.html', { 'context_course': course_module, 'assets': asset_display, 'upload_asset_callback_url': upload_asset_callback_url, 'remove_asset_callback_url': reverse('remove_asset', kwargs={ 'org': org, 'course': course, 'name': name }) })
def course_image_url(course, image_name=None): """Return url for image_name or default course image in given course assets. It allows us to override default course image in templates when this function is used whith image_name parameter, if the image is available. (see course_about.html) """ image = image_name or course.course_image try: loc = StaticContent.compute_location(course.location.course_key, image) _ = contentstore().find(loc) except NotFoundError: loc = StaticContent.compute_location(course.location.course_key, course.course_image) return loc.to_deprecated_string()
def test_fail_downloading_subs(self): # Disabled 11/14/13 # This test is flakey because it performs an HTTP request on an external service # Re-enable when `requests.get` is patched using `mock.patch` raise SkipTest bad_youtube_subs = { 0.5: 'BAD_YOUTUBE_ID1', 1.0: 'BAD_YOUTUBE_ID2', 2.0: 'BAD_YOUTUBE_ID3' } self.clear_subs_content(bad_youtube_subs) with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException): transcripts_utils.download_youtube_subs(bad_youtube_subs, self.course) # Check assets status after importing subtitles. for subs_id in bad_youtube_subs.values(): filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.org, self.number, filename ) with self.assertRaises(NotFoundError): contentstore().find(content_location) self.clear_subs_content(bad_youtube_subs)
def test_success_downloading_subs(self): # Disabled 11/14/13 # This test is flakey because it performs an HTTP request on an external service # Re-enable when `requests.get` is patched using `mock.patch` raise SkipTest good_youtube_subs = { 0.5: 'JMD_ifUUfsU', 1.0: 'hI10vDNYz4M', 2.0: 'AKqURZnYqpk' } self.clear_subs_content(good_youtube_subs) # Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown transcripts_utils.download_youtube_subs(good_youtube_subs, self.course) # Check assets status after importing subtitles. for subs_id in good_youtube_subs.values(): filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.org, self.number, filename ) self.assertTrue(contentstore().find(content_location)) self.clear_subs_content(good_youtube_subs)
def test_images_upload(self): # http://www.django-rest-framework.org/api-guide/parsers/#fileuploadparser course_run = CourseFactory() expected_filename = 'course_image.png' content_key = StaticContent.compute_location(course_run.id, expected_filename) assert course_run.course_image != expected_filename try: contentstore().find(content_key) self.fail('No image should be associated with a new course run.') except NotFoundError: pass url = reverse('api:v1:course_run-images', kwargs={'pk': str(course_run.id)}) # PNG. Single black pixel content = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01\x08\x02\x00\x00\x00\x90wS' \ b'\xde\x00\x00\x00\x0cIDATx\x9cc```\x00\x00\x00\x04\x00\x01\xf6\x178U\x00\x00\x00\x00IEND\xaeB`\x82' # We are intentionally passing the incorrect JPEG extension here upload = SimpleUploadedFile('card_image.jpg', content, content_type='image/png') response = self.client.post(url, {'card_image': upload}, format='multipart') assert response.status_code == 200 course_run = modulestore().get_course(course_run.id) assert course_run.course_image == expected_filename expected = {'card_image': RequestFactory().get('').build_absolute_uri(course_image_url(course_run))} assert response.data == expected # There should now be an image stored contentstore().find(content_key)
def course_dashboard(request, org, course, name): """ Display an editable asset library org, course, name: Attributes of the Location for the item to edit """ courseEnrollments = CourseEnrollment.objects.order_by('user').filter(course_id=org+'/'+course+'/'+name) location = get_location_and_verify_access(request, org, course, name) upload_asset_callback_url = reverse('upload_asset', kwargs={ 'org': org, 'course': course, 'coursename': name }) course_module = modulestore().get_item(location) course_reference = StaticContent.compute_location(org, course, name) return render_to_response('dashboard_index.html', { 'context_course': course_module, 'course_reference':course_reference, 'courseEnrollments':courseEnrollments })
def _upload_file(videoId, lang, location): if lang == 'en': filename = 'subs_{0}.srt.sjson'.format(videoId) else: filename = '{0}_subs_{1}.srt.sjson'.format(lang, videoId) path = os.path.join(TEST_ROOT, 'uploads/', filename) f = open(os.path.abspath(path)) mime_type = "application/json" content_location = StaticContent.compute_location( location.org, location.course, filename ) sc_partial = partial(StaticContent, content_location, filename, mime_type) content = sc_partial(f.read()) (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=None ) del_cached_content(thumbnail_location) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location contentstore().save(content) del_cached_content(content.location)
def test_success_video_module_source_subs_uploading(self): self.item.data = textwrap.dedent( """ <video youtube=""> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4"/> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.webm"/> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.ogv"/> </video> """ ) modulestore().update_item(self.item, self.user.id) link = reverse("upload_transcripts") filename = os.path.splitext(os.path.basename(self.good_srt_file.name))[0] resp = self.client.post( link, { "locator": self.video_usage_key, "transcript-file": self.good_srt_file, "video_list": json.dumps([{"type": "html5", "video": filename, "mode": "mp4"}]), }, ) self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.content).get("status"), "Success") item = modulestore().get_item(self.video_usage_key) self.assertEqual(item.sub, filename) content_location = StaticContent.compute_location(self.course.id, "subs_{0}.srt.sjson".format(filename)) self.assertTrue(contentstore().find(content_location))
def _upload_asset(request, course_key): ''' This method allows for POST uploading of files into the course asset library, which will be supported by GridFS in MongoDB. ''' # Does the course actually exist?!? Get anything from it to prove its # existence try: modulestore().get_course(course_key) except ItemNotFoundError: # no return it as a Bad Request response logging.error("Could not find course: %s", course_key) return HttpResponseBadRequest() # compute a 'filename' which is similar to the location formatting, we're # using the 'filename' nomenclature since we're using a FileSystem paradigm # here. We're just imposing the Location string formatting expectations to # keep things a bit more consistent upload_file = request.FILES['file'] filename = upload_file.name mime_type = upload_file.content_type content_loc = StaticContent.compute_location(course_key, filename) chunked = upload_file.multiple_chunks() sc_partial = partial(StaticContent, content_loc, filename, mime_type) if chunked: content = sc_partial(upload_file.chunks()) tempfile_path = upload_file.temporary_file_path() else: content = sc_partial(upload_file.read()) tempfile_path = None # first let's see if a thumbnail can be created (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=tempfile_path ) # delete cached thumbnail even if one couldn't be created this time (else # the old thumbnail will continue to show) del_cached_content(thumbnail_location) # now store thumbnail location only if we could create it if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content contentstore().save(content) del_cached_content(content.location) # readback the saved content - we need the database timestamp readback = contentstore().find(content.location) locked = getattr(content, 'locked', False) response_payload = { 'asset': _get_asset_json(content.name, readback.last_modified_at, content.location, content.thumbnail_location, locked), 'msg': _('Upload completed') } return JsonResponse(response_payload)
def test_success_downloading_subs(self): response = textwrap.dedent("""<?xml version="1.0" encoding="utf-8" ?> <transcript> <text start="0" dur="0.27"></text> <text start="0.27" dur="2.45">Test text 1.</text> <text start="2.72">Test text 2.</text> <text start="5.43" dur="1.73">Test text 3.</text> </transcript> """) good_youtube_sub = 'good_id_2' self.clear_sub_content(good_youtube_sub) with patch('xmodule.video_module.transcripts_utils.requests.get') as mock_get: mock_get.return_value = Mock(status_code=200, text=response, content=response) # Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings) mock_get.assert_any_call('http://video.google.com/timedtext', params={'lang': 'en', 'v': 'good_id_2'}) # Check asset status after import of transcript. filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub) content_location = StaticContent.compute_location(self.course.id, filename) self.assertTrue(contentstore().find(content_location)) self.clear_sub_content(good_youtube_sub)
def test_success_generating_subs(self): youtube_subs = { 0.5: 'JMD_ifUUfsU', 1.0: 'hI10vDNYz4M', 2.0: 'AKqURZnYqpk' } srt_filedata = textwrap.dedent(""" 1 00:00:10,500 --> 00:00:13,000 Elephant's Dream 2 00:00:15,000 --> 00:00:18,000 At the left we can see... """) self.clear_subs_content(youtube_subs) # Check transcripts_utils.TranscriptsGenerationException not thrown. # Also checks that uppercase file extensions are supported. transcripts_utils.generate_subs_from_source(youtube_subs, 'SRT', srt_filedata, self.course) # Check assets status after importing subtitles. for subs_id in youtube_subs.values(): filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.course.id, filename ) self.assertTrue(contentstore().find(content_location)) self.clear_subs_content(youtube_subs)
def verify_content_links(module, base_dir, static_content_store, link, remap_dict=None): if link.startswith('/static/'): # yes, then parse out the name path = link[len('/static/'):] static_pathname = base_dir / path if os.path.exists(static_pathname): try: content_loc = StaticContent.compute_location(module.location.org, module.location.course, path) filename = os.path.basename(path) mime_type = mimetypes.guess_type(filename)[0] with open(static_pathname, 'rb') as f: data = f.read() content = StaticContent(content_loc, filename, mime_type, data, import_path=path) # first let's save a thumbnail so we can get back a thumbnail location (thumbnail_content, thumbnail_location) = static_content_store.generate_thumbnail(content) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location #then commit the content static_content_store.save(content) new_link = StaticContent.get_url_path_from_location(content_loc) if remap_dict is not None: remap_dict[link] = new_link return new_link except Exception, e: logging.exception('Skipping failed content load from {0}. Exception: {1}'.format(path, e))
def test_success_video_module_source_subs_uploading(self): self.item.data = textwrap.dedent(""" <video youtube=""> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.mp4"/> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.webm"/> <source src="http://www.quirksmode.org/html5/videos/big_buck_bunny.ogv"/> </video> """) modulestore().update_item(self.item, self.user.id) link = reverse('upload_transcripts') filename = os.path.splitext(os.path.basename(self.good_srt_file.name))[0] resp = self.client.post(link, { 'locator': self.video_usage_key, 'transcript-file': self.good_srt_file, 'video_list': json.dumps([{ 'type': 'html5', 'video': filename, 'mode': 'mp4', }]) }) self.assertEqual(resp.status_code, 200) self.assertEqual(json.loads(resp.content).get('status'), 'Success') item = modulestore().get_item(self.video_usage_key) self.assertEqual(item.sub, filename) content_location = StaticContent.compute_location( self.course.id, 'subs_{0}.srt.sjson'.format(filename)) self.assertTrue(contentstore().find(content_location))
def test_compute_location(self): # We had a bug that __ got converted into a single _. Make sure that substitution of INVALID_CHARS (like space) # still happen. asset_location = StaticContent.compute_location( SlashSeparatedCourseKey('mitX', '400', 'ignore'), 'subs__1eo_jXvZnE .srt.sjson' ) self.assertEqual(AssetLocation(u'mitX', u'400', u'ignore', u'asset', u'subs__1eo_jXvZnE_.srt.sjson', None), asset_location)
def upload_image(course_key, upload_file): filename = upload_file.name mime_type = upload_file.content_type content_loc = StaticContent.compute_location(course_key, filename) chunked = upload_file.multiple_chunks() sc_partial = partial(StaticContent, content_loc, filename, mime_type) if chunked: content = sc_partial(upload_file.chunks()) tempfile_path = upload_file.temporary_file_path() else: content = sc_partial(upload_file.read()) tempfile_path = None # first let's see if a thumbnail can be created (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=tempfile_path ) # delete cached thumbnail even if one couldn't be created this time (else # the old thumbnail will continue to show) del_cached_content(thumbnail_location) # now store thumbnail location only if we could create it if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content contentstore().save(content) del_cached_content(content.location) return filename
def test_happy_path(self, modulestore_type, create_after_overview): """ What happens when everything works like we expect it to. If `create_after_overview` is True, we will temporarily disable thumbnail creation so that the initial CourseOverview is created without an image_set, and the CourseOverviewImageSet is created afterwards. If `create_after_overview` is False, we'll create the CourseOverviewImageSet at the same time as the CourseOverview. """ # Create a real (oversized) image... image = Image.new("RGB", (800, 400), "blue") image_buff = StringIO() image.save(image_buff, format="JPEG") image_buff.seek(0) image_name = "big_course_image.jpeg" with self.store.default_store(modulestore_type): course = CourseFactory.create( default_store=modulestore_type, course_image=image_name ) # Save a real image here... course_image_asset_key = StaticContent.compute_location(course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/jpeg', image_buff) contentstore().save(course_image_content) # If create_after_overview is True, disable thumbnail generation so # that the CourseOverview object is created and saved without an # image_set at first (it will be lazily created later). if create_after_overview: self.set_config(enabled=False) # Now generate the CourseOverview... course_overview = CourseOverview.get_from_id(course.id) # If create_after_overview is True, no image_set exists yet. Verify # that, then switch config back over to True and it should lazily # create the image_set on the next get_from_id() call. if create_after_overview: self.assertFalse(hasattr(course_overview, 'image_set')) self.set_config(enabled=True) course_overview = CourseOverview.get_from_id(course.id) self.assertTrue(hasattr(course_overview, 'image_set')) image_urls = course_overview.image_urls config = CourseOverviewImageConfig.current() # Make sure the thumbnail names come out as expected... self.assertTrue(image_urls['raw'].endswith('big_course_image.jpeg')) self.assertTrue(image_urls['small'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.small))) self.assertTrue(image_urls['large'].endswith('big_course_image-jpeg-{}x{}.jpg'.format(*config.large))) # Now make sure our thumbnails are of the sizes we expect... for image_url, expected_size in [(image_urls['small'], config.small), (image_urls['large'], config.large)]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) self.assertEqual(image.size, expected_size)
def test_different_resolutions(self, src_dimensions): """ Test various resolutions of images to make thumbnails of. Note that our test sizes are small=(200, 100) and large=(400, 200). 1. Images should won't be blown up if it's too small, so a (100, 50) resolution image will remain (100, 50). 2. However, images *will* be converted using our format and quality settings (JPEG, 75% -- the PIL default). This is because images with relatively small dimensions not compressed properly. 3. Image thumbnail naming will maintain the naming convention of the target resolution, even if the image was not actually scaled to that size (i.e. it was already smaller). This is mostly because it's simpler to be consistent, but it also lets us more easily tell which configuration a thumbnail was created under. """ # Create a source image... image = Image.new("RGB", src_dimensions, "blue") image_buff = StringIO() image.save(image_buff, format="PNG") image_buff.seek(0) image_name = "src_course_image.png" course = CourseFactory.create(course_image=image_name) # Save the image to the contentstore... course_image_asset_key = StaticContent.compute_location(course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/png', image_buff) contentstore().save(course_image_content) # Now generate the CourseOverview... config = CourseOverviewImageConfig.current() course_overview = CourseOverview.get_from_id(course.id) image_urls = course_overview.image_urls for image_url, target in [(image_urls['small'], config.small), (image_urls['large'], config.large)]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) # Naming convention for thumbnail self.assertTrue(image_url.endswith('src_course_image-png-{}x{}.jpg'.format(*target))) # Actual thumbnail data src_x, src_y = src_dimensions target_x, target_y = target image_x, image_y = image.size # I'm basically going to assume the image library knows how to do # the right thing in terms of handling aspect ratio. We're just # going to make sure that small images aren't blown up, and that # we never exceed our target sizes self.assertLessEqual(image_x, target_x) self.assertLessEqual(image_y, target_y) if src_x < target_x and src_y < target_y: self.assertEqual(src_x, image_x) self.assertEqual(src_y, image_y)
def asset_index(request, org, course, name): """ Display an editable asset library org, course, name: Attributes of the Location for the item to edit """ location = get_location_and_verify_access(request, org, course, name) upload_asset_callback_url = reverse("upload_asset", kwargs={"org": org, "course": course, "coursename": name}) course_module = modulestore().get_item(location) course_reference = StaticContent.compute_location(org, course, name) assets = contentstore().get_all_content_for_course(course_reference) # sort in reverse upload date order assets = sorted(assets, key=lambda asset: asset["uploadDate"], reverse=True) asset_display = [] for asset in assets: asset_id = asset["_id"] display_info = {} display_info["displayname"] = asset["displayname"] display_info["uploadDate"] = get_default_time_display(asset["uploadDate"].timetuple()) asset_location = StaticContent.compute_location(asset_id["org"], asset_id["course"], asset_id["name"]) display_info["url"] = StaticContent.get_url_path_from_location(asset_location) # note, due to the schema change we may not have a 'thumbnail_location' in the result set _thumbnail_location = asset.get("thumbnail_location", None) thumbnail_location = Location(_thumbnail_location) if _thumbnail_location is not None else None display_info["thumb_url"] = ( StaticContent.get_url_path_from_location(thumbnail_location) if thumbnail_location is not None else None ) asset_display.append(display_info) return render_to_response( "asset_index.html", { "active_tab": "assets", "context_course": course_module, "assets": asset_display, "upload_asset_callback_url": upload_asset_callback_url, }, )
def course_image_url(course): """Returns the image url for the course.""" try: loc = StaticContent.compute_location(course.location.course_key, course.course_image) except InvalidKeyError: return '' path = StaticContent.serialize_asset_key_with_slash(loc) return path
def asset_location(location, filename): """ Return asset location. `location` is module location. """ # If user transcript filename is empty, raise `TranscriptException` to avoid `InvalidKeyError`. if not filename: raise TranscriptException("Transcript not uploaded yet") return StaticContent.compute_location(location.course_key, filename)
def upload_to_local(self): content_loc = StaticContent.compute_location(self.course_key, self.filename) mime_type = 'application/json' # Note: cribbed from common/lib/xmodule/xmodule/video_module/transcripts_utils.py save_subs_to_store() filedata = json.dumps(self.subs, indent=2) content = StaticContent(content_loc, self.filename, mime_type, filedata) contentstore().save(content) del_cached_content(content_loc)
def _upload_file(subs_file, location, filename): mime_type = subs_file.content_type content_location = StaticContent.compute_location( location.course_key, filename ) content = StaticContent(content_location, filename, mime_type, subs_file.read()) contentstore().save(content) del_cached_content(content.location)
def download_transcripts(request): """ Passes to user requested transcripts file. Raises Http404 if unsuccessful. """ locator = request.GET.get('locator') subs_id = request.GET.get('subs_id') if not locator: log.debug('GET data without "locator" property.') raise Http404 try: item = _get_item(request, request.GET) except (InvalidKeyError, ItemNotFoundError): log.debug("Can't find item by locator.") raise Http404 if item.category != 'video': log.debug('transcripts are supported only for video" modules.') raise Http404 try: if not subs_id: raise NotFoundError filename = subs_id content_location = StaticContent.compute_location( item.location.course_key, 'subs_{filename}.srt.sjson'.format(filename=filename), ) sjson_transcript = contentstore().find(content_location).data except NotFoundError: # Try searching in VAL for the transcript as a last resort transcript = None if is_val_transcript_feature_enabled_for_course(item.location.course_key): transcript = get_video_transcript_content( language_code=u'en', edx_video_id=item.edx_video_id, youtube_id_1_0=item.youtube_id_1_0, html5_sources=item.html5_sources, ) if not transcript: raise Http404 filename = os.path.splitext(os.path.basename(transcript['file_name']))[0].encode('utf8') sjson_transcript = transcript['content'] # convert sjson content into srt format. transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt') if not transcript_content: raise Http404 # Construct an HTTP response response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8') response['Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format(filename=filename) return response
def upload_file(filename, location): path = os.path.join(TEST_ROOT, "uploads/", filename) f = open(os.path.abspath(path)) mime_type = "application/json" content_location = StaticContent.compute_location(location.course_key, filename) content = StaticContent(content_location, filename, mime_type, f.read()) contentstore().save(content) del_cached_content(content.location)
def asset_location(location, filename): """ Return asset location. `location` is module location. """ return StaticContent.compute_location( location.org, location.course, filename )
def course_image_url(course): """Try to look up the image url for the course. If it's not found, log an error and return the dead link""" if course.static_asset_path or modulestore().get_modulestore_type(course.location.course_id) == XML_MODULESTORE_TYPE: return '/static/' + (course.static_asset_path or getattr(course, 'data_dir', '')) + "/images/course_image.jpg" else: loc = StaticContent.compute_location(course.location.org, course.location.course, course.course_image) _path = StaticContent.get_url_path_from_location(loc) return _path
def clear_subs_content(self): """Remove, if transcripts content exists.""" for youtube_id in self.get_youtube_ids().values(): filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location(self.course.id, filename) try: content = contentstore().find(content_location) contentstore().delete(content.get_id()) except NotFoundError: pass
def _upload_file(file, location): filename = 'subs_{}.srt.sjson'.format(_get_subs_id(file.name)) mime_type = file.content_type content_location = StaticContent.compute_location( location.org, location.course, filename ) sc_partial = partial(StaticContent, content_location, filename, mime_type) content = sc_partial(file.read()) (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=None ) del_cached_content(thumbnail_location) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location contentstore().save(content) del_cached_content(content.location)
def course_image_url(course, image_key='course_image'): """Try to look up the image url for the course. If it's not found, log an error and return the dead link. image_key can be one of the three: 'course_image', 'hero_image', 'thumbnail_image' """ if course.static_asset_path: # If we are a static course with the image_key attribute # set different than the default, return that path so that # courses can use custom course image paths, otherwise just # return the default static path. url = '/static/' + (course.static_asset_path or getattr(course, 'data_dir', '')) if hasattr(course, image_key) and getattr(course, image_key) != course.fields[image_key].default: url += '/' + getattr(course, image_key) else: url += '/images/' + image_key + '.jpg' elif not getattr(course, image_key): # if image_key is empty, use the default image url from settings url = settings.STATIC_URL + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL else: loc = StaticContent.compute_location(course.id, getattr(course, image_key)) url = StaticContent.serialize_asset_key_with_slash(loc) return url
def test_success_downloading_chinese_transcripts(self): # Disabled 11/14/13 # This test is flakey because it performs an HTTP request on an external service # Re-enable when `requests.get` is patched using `mock.patch` raise SkipTest good_youtube_sub = 'j_jEn79vS3g' # Chinese, utf-8 self.clear_sub_content(good_youtube_sub) # Check transcripts_utils.GetTranscriptsFromYouTubeException not thrown transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings) # Check assets status after importing subtitles. for subs_id in good_youtube_subs.values(): filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.course.id, filename) self.assertTrue(contentstore().find(content_location)) self.clear_sub_content(good_youtube_sub)
def student_view(self, context=None): """ The student view of the MarkdownXBlock. """ if self.filename: # These can only be imported when the XBlock is running on the LMS # or CMS. Do it at runtime so that the workbench is usable for # regular XML content. from xmodule.contentstore.content import StaticContent from xmodule.contentstore.django import contentstore from xmodule.exceptions import NotFoundError from opaque_keys import InvalidKeyError try: course_id = self.xmodule_runtime.course_id loc = StaticContent.compute_location(course_id, self.filename) asset = contentstore().find(loc) content = asset.data except (NotFoundError, InvalidKeyError): pass else: content = self.content html_content = "" if content: html_content = markdown2.markdown(content, extras=self.extras) # Render the HTML template context = {'content': html_content} html = loader.render_template('templates/main.html', context) frag = Fragment(html) if "fenced-code-blocks" in self.extras: frag.add_css_url( self.runtime.local_resource_url(self, 'public/css/pygments.css')) return frag
def test_subs_uploading_with_byte_order_mark(self): """ Test uploading subs containing BOM(Byte Order Mark), e.g. U+FEFF """ filedata = textwrap.dedent(""" 1 00:00:10,500 --> 00:00:13,000 Test ufeff characters 2 00:00:15,000 --> 00:00:18,000 At the left we can see... """).encode('utf-8-sig') # Verify that ufeff character is in filedata. self.assertIn("ufeff", filedata) self.ufeff_srt_file.write(filedata) self.ufeff_srt_file.seek(0) link = reverse('upload_transcripts') filename = os.path.splitext(os.path.basename(self.ufeff_srt_file.name))[0] resp = self.client.post(link, { 'locator': self.video_usage_key, 'transcript-file': self.ufeff_srt_file, 'video_list': json.dumps([{ 'type': 'html5', 'video': filename, 'mode': 'mp4', }]) }) self.assertEqual(resp.status_code, 200) content_location = StaticContent.compute_location( self.course.id, 'subs_{0}.srt.sjson'.format(filename)) self.assertTrue(contentstore().find(content_location)) subs_text = json.loads(contentstore().find(content_location).data).get('text') self.assertIn("Test ufeff characters", subs_text)
def mobi_course_info(request, course, action=None): course_logo = course_image_url(course) imgurl = course_logo if action in ["homefalls", "all", "hot", "latest", "my", "search"]: try: course_mini_info = course.id.split('/') asset_location = StaticContent.compute_location(course_mini_info[0], course_mini_info[1], 'mobi-logo-img.jpg') imgurl = StaticContent.get_url_path_from_location(asset_location) except: print "=========================fail load mobi image===============================" print "We will load this info to log" return { "id": course.id.replace('/', '.'), "name": course.display_name_with_default, "logo": request.get_host() + course_image_url(course), "org": course.display_org_with_default, "course_number": course.display_number_with_default, "start_date": course.start.strftime("%Y-%m-%d"), "about": get_course_about_section(course, 'short_description'), "category": course.category, "imgurl": request.get_host() + imgurl }
def test_fail_downloading_subs(self, mock_get): mock_get.return_value = Mock(status_code=404, text='Error 404') bad_youtube_subs = { 0.5: 'BAD_YOUTUBE_ID1', 1.0: 'BAD_YOUTUBE_ID2', 2.0: 'BAD_YOUTUBE_ID3' } self.clear_subs_content(bad_youtube_subs) with self.assertRaises(transcripts_utils.GetTranscriptsFromYouTubeException): transcripts_utils.download_youtube_subs(bad_youtube_subs, self.course, settings) # Check assets status after importing subtitles. for subs_id in bad_youtube_subs.values(): filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.course.id, filename ) with self.assertRaises(NotFoundError): contentstore().find(content_location) self.clear_subs_content(bad_youtube_subs)
def course_image_url(course): """Try to look up the image url for the course. If it's not found, log an error and return the dead link""" if course.static_asset_path or modulestore().get_modulestore_type( course.location.course_id) == XML_MODULESTORE_TYPE: # If we are a static course with the course_image attribute # set different than the default, return that path so that # courses can use custom course image paths, otherwise just # return the default static path. url = '/static/' + (course.static_asset_path or getattr(course, 'data_dir', '')) if hasattr( course, 'course_image' ) and course.course_image != course.fields['course_image'].default: url += '/' + course.course_image else: url += '/images/course_image.jpg' else: loc = StaticContent.compute_location(course.location.org, course.location.course, course.course_image) url = StaticContent.get_url_path_from_location(loc) return url
def test_downloading_subs_using_transcript_name(self, mock_get): """ Download transcript using transcript name in url """ good_youtube_sub = 'good_id_2' self.clear_sub_content(good_youtube_sub) transcripts_utils.download_youtube_subs(good_youtube_sub, self.course, settings) mock_get.assert_any_call('http://video.google.com/timedtext', params={ 'lang': 'en', 'v': 'good_id_2', 'name': 'Custom' }) # Check asset status after import of transcript. filename = 'subs_{0}.srt.sjson'.format(good_youtube_sub) content_location = StaticContent.compute_location( self.course.id, filename) self.assertTrue(contentstore().find(content_location)) self.clear_sub_content(good_youtube_sub)
def get_pgreport_csv(course_id): """Get progress of students.""" course_key = get_coursekey(course_id) location = StaticContent.compute_location(course_key, "progress_students.csv.gz") store = contentstore() try: gzipfile = StringIO.StringIO() content = store.find(location, throw_on_not_found=True, as_stream=True) for gzipdata in content.stream_data(): gzipfile.write(gzipdata) gzipfile.seek(0) gzipcsv = gzip.GzipFile(fileobj=gzipfile, mode='rb') for csvrow in gzipcsv.readlines(): print csvrow, gzipcsv.close() except NotFoundError as e: log.warn(" * Csv does not exists: {}".format(e)) finally: gzipfile.close()
def create_image(cls, prefix, dimensions, color, name, locked=False): """ Creates an image. Args: prefix: the prefix to use e.g. split vs mongo dimensions: tuple of (width, height) color: the background color of the image name: the name of the image; can be a format string locked: whether or not the asset should be locked Returns: StaticContent: the StaticContent object for the created image """ new_image = Image.new('RGB', dimensions, color) new_buf = StringIO() new_image.save(new_buf, format='png') new_buf.seek(0) new_name = name.format(prefix) new_key = StaticContent.compute_location(cls.courses[prefix].id, new_name) new_content = StaticContent(new_key, new_name, 'image/png', new_buf.getvalue(), locked=locked) contentstore().save(new_content) return new_content
def get_transcript(self, subs_id): ''' Returns transcript in *.srt format. Args: `subs_id`: str, subtitles id Raises: - NotFoundError if cannot find transcript file in storage. - ValueError if transcript file is empty or incorrect JSON. - KeyError if transcript file has incorrect format. ''' filename = 'subs_{0}.srt.sjson'.format(subs_id) content_location = StaticContent.compute_location( self.location.org, self.location.course, filename) sjson_transcripts = contentstore().find(content_location) str_subs = _generate_srt_from_sjson(json.loads(sjson_transcripts.data), speed=1.0) if not str_subs: log.debug('generate_srt_from_sjson produces no subtitles') raise ValueError return str_subs
def course_image_url(course): """Try to look up the image url for the course. If it's not found, log an error and return the dead link""" if course.static_asset_path or modulestore().get_modulestore_type( course.id) == ModuleStoreEnum.Type.xml: # If we are a static course with the course_image attribute # set different than the default, return that path so that # courses can use custom course image paths, otherwise just # return the default static path. url = '/static/' + (course.static_asset_path or getattr(course, 'data_dir', '')) if hasattr( course, 'course_image' ) and course.course_image != course.fields['course_image'].default: url += '/' + course.course_image else: url += '/images/course_image.jpg' elif not course.course_image: # if course_image is empty, use the default image url from settings url = settings.STATIC_URL + settings.DEFAULT_COURSE_ABOUT_IMAGE_URL else: loc = StaticContent.compute_location(course.id, course.course_image) url = StaticContent.serialize_asset_key_with_slash(loc) return url
def _parse_video_xml(cls, xml, id_generator=None): """ Parse video fields out of xml_data. The fields are set if they are present in the XML. Arguments: id_generator is used to generate course-specific urls and identifiers """ field_data = {} # Convert between key types for certain attributes -- # necessary for backwards compatibility. conversions = { # example: 'start_time': cls._example_convert_start_time } # Convert between key names for certain attributes -- # necessary for backwards compatibility. compat_keys = { 'from': 'start_time', 'to': 'end_time' } sources = xml.findall('source') if sources: field_data['html5_sources'] = [ele.get('src') for ele in sources] track = xml.find('track') if track is not None: field_data['track'] = track.get('src') handout = xml.find('handout') if handout is not None: field_data['handout'] = handout.get('src') transcripts = xml.findall('transcript') if transcripts: field_data['transcripts'] = {tr.get('language'): tr.get('src') for tr in transcripts} for attr, value in xml.items(): if attr in compat_keys: attr = compat_keys[attr] if attr in cls.metadata_to_strip + ('url_name', 'name'): continue if attr == 'youtube': speeds = cls._parse_youtube(value) for speed, youtube_id in speeds.items(): # should have made these youtube_id_1_00 for # cleanliness, but hindsight doesn't need glasses normalized_speed = speed[:-1] if speed.endswith('0') else speed # If the user has specified html5 sources, make sure we don't use the default video if youtube_id != '' or 'html5_sources' in field_data: field_data['youtube_id_{0}'.format(normalized_speed.replace('.', '_'))] = youtube_id elif attr in conversions: field_data[attr] = conversions[attr](value) elif attr not in cls.fields: field_data.setdefault('xml_attributes', {})[attr] = value else: # We export values with json.dumps (well, except for Strings, but # for about a month we did it for Strings also). field_data[attr] = deserialize_field(cls.fields[attr], value) course_id = getattr(id_generator, 'target_course_id', None) # Update the handout location with current course_id if 'handout' in field_data.keys() and course_id: handout_location = StaticContent.get_location_from_path(field_data['handout']) if isinstance(handout_location, AssetLocator): handout_new_location = StaticContent.compute_location(course_id, handout_location.path) field_data['handout'] = StaticContent.serialize_asset_key_with_slash(handout_new_location) # For backwards compatibility: Add `source` if XML doesn't have `download_video` # attribute. if 'download_video' not in field_data and sources: field_data['source'] = field_data['html5_sources'][0] # For backwards compatibility: if XML doesn't have `download_track` attribute, # it means that it is an old format. So, if `track` has some value, # `download_track` needs to have value `True`. if 'download_track' not in field_data and track is not None: field_data['download_track'] = True video_asset_elem = xml.find('video_asset') if ( edxval_api and video_asset_elem is not None and 'edx_video_id' in field_data ): # Allow ValCannotCreateError to escape edxval_api.import_from_xml( video_asset_elem, field_data['edx_video_id'], course_id=course_id ) # load license if it exists field_data = LicenseMixin.parse_license_from_xml(field_data, xml) return field_data
def test_compute_location(self): # We had a bug that __ got converted into a single _. Make sure that substitution of INVALID_CHARS (like space) # still happen. asset_location = StaticContent.compute_location('mitX', '400', 'subs__1eo_jXvZnE .srt.sjson') self.assertEqual(Location(u'c4x', u'mitX', u'400', u'asset', u'subs__1eo_jXvZnE_.srt.sjson', None), asset_location)
def upload_asset(request, org, course, coursename): ''' This method allows for POST uploading of files into the course asset library, which will be supported by GridFS in MongoDB. ''' # construct a location from the passed in path location = get_location_and_verify_access(request, org, course, coursename) # Does the course actually exist?!? Get anything from it to prove its # existence try: modulestore().get_item(location) except: # no return it as a Bad Request response logging.error('Could not find course' + location) return HttpResponseBadRequest() if 'file' not in request.FILES: return HttpResponseBadRequest() # compute a 'filename' which is similar to the location formatting, we're # using the 'filename' nomenclature since we're using a FileSystem paradigm # here. We're just imposing the Location string formatting expectations to # keep things a bit more consistent upload_file = request.FILES['file'] filename = upload_file.name mime_type = upload_file.content_type content_loc = StaticContent.compute_location(org, course, filename) chunked = upload_file.multiple_chunks() sc_partial = partial(StaticContent, content_loc, filename, mime_type) if chunked: content = sc_partial(upload_file.chunks()) tempfile_path = upload_file.temporary_file_path() else: content = sc_partial(upload_file.read()) tempfile_path = None # first let's see if a thumbnail can be created (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=tempfile_path) # delete cached thumbnail even if one couldn't be created this time (else # the old thumbnail will continue to show) del_cached_content(thumbnail_location) # now store thumbnail location only if we could create it if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content contentstore().save(content) del_cached_content(content.location) # readback the saved content - we need the database timestamp readback = contentstore().find(content.location) locked = getattr(content, 'locked', False) response_payload = { 'asset': _get_asset_json(content.name, readback.last_modified_at, content.location, content.thumbnail_location, locked), 'msg': _('Upload completed') } return JsonResponse(response_payload)
def check_transcripts(request): """ Check state of transcripts availability. request.GET['data'] has key `videos`, which can contain any of the following:: [ {u'type': u'youtube', u'video': u'OEoXaMPEzfM', u'mode': u'youtube'}, {u'type': u'html5', u'video': u'video1', u'mode': u'mp4'} {u'type': u'html5', u'video': u'video2', u'mode': u'webm'} ] `type` is youtube or html5 `video` is html5 or youtube video_id `mode` is youtube, ,p4 or webm Returns transcripts_presence dict:: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Error', } try: __, videos, item = _validate_transcripts_data(request) except TranscriptsRequestValidationException as e: return error_response(transcripts_presence, e.message) transcripts_presence['status'] = 'Success' filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location( item.location.org, item.location.course, filename ) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location( item.location.org, item.location.course, filename ) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['youtube_local'] = True except NotFoundError: log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_api = copy.deepcopy(settings.YOUTUBE_API) youtube_api['params']['v'] = youtube_id youtube_response = requests.get(youtube_api['url'], params=youtube_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence['youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n") ) if json.loads(local_transcripts) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location( item.location.org, item.location.course, filename ) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug("Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = json.loads(html5_subs[0]) == json.loads(html5_subs[1]) command, subs_to_use = _transcripts_logic(transcripts_presence, videos) transcripts_presence.update({ 'command': command, 'subs': subs_to_use, }) return JsonResponse(transcripts_presence)
def test_different_resolutions(self, src_dimensions): """ Test various resolutions of images to make thumbnails of. Note that our test sizes are small=(200, 100) and large=(400, 200). 1. Images should won't be blown up if it's too small, so a (100, 50) resolution image will remain (100, 50). 2. However, images *will* be converted using our format and quality settings (JPEG, 75% -- the PIL default). This is because images with relatively small dimensions not compressed properly. 3. Image thumbnail naming will maintain the naming convention of the target resolution, even if the image was not actually scaled to that size (i.e. it was already smaller). This is mostly because it's simpler to be consistent, but it also lets us more easily tell which configuration a thumbnail was created under. """ # Create a source image... image = Image.new("RGB", src_dimensions, "blue") image_buff = StringIO() image.save(image_buff, format="PNG") image_buff.seek(0) image_name = "src_course_image.png" course = CourseFactory.create(course_image=image_name) # Save the image to the contentstore... course_image_asset_key = StaticContent.compute_location( course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/png', image_buff) contentstore().save(course_image_content) # Now generate the CourseOverview... config = CourseOverviewImageConfig.current() course_overview = CourseOverview.get_from_id(course.id) image_urls = course_overview.image_urls for image_url, target in [(image_urls['small'], config.small), (image_urls['large'], config.large)]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) # Naming convention for thumbnail self.assertTrue( image_url.endswith( 'src_course_image-png-{}x{}.jpg'.format(*target))) # Actual thumbnail data src_x, src_y = src_dimensions target_x, target_y = target image_x, image_y = image.size # I'm basically going to assume the image library knows how to do # the right thing in terms of handling aspect ratio. We're just # going to make sure that small images aren't blown up, and that # we never exceed our target sizes self.assertLessEqual(image_x, target_x) self.assertLessEqual(image_y, target_y) if src_x < target_x and src_y < target_y: self.assertEqual(src_x, image_x) self.assertEqual(src_y, image_y)
def download_transcripts(request): """ Passes to user requested transcripts file. Raises Http404 if unsuccessful. """ locator = request.GET.get('locator') subs_id = request.GET.get('subs_id') if not locator: log.debug('GET data without "locator" property.') raise Http404 try: item = _get_item(request, request.GET) except (InvalidKeyError, ItemNotFoundError): log.debug("Can't find item by locator.") raise Http404 if item.category != 'video': log.debug('transcripts are supported only for video" modules.') raise Http404 try: if not subs_id: raise NotFoundError filename = subs_id content_location = StaticContent.compute_location( item.location.course_key, 'subs_{filename}.srt.sjson'.format(filename=filename), ) sjson_transcript = contentstore().find(content_location).data except NotFoundError: # Try searching in VAL for the transcript as a last resort transcript = None if is_val_transcript_feature_enabled_for_course( item.location.course_key): transcript = get_video_transcript_content( language_code=u'en', edx_video_id=item.edx_video_id, youtube_id_1_0=item.youtube_id_1_0, html5_sources=item.html5_sources, ) if not transcript: raise Http404 filename = os.path.splitext(os.path.basename( transcript['file_name']))[0].encode('utf8') sjson_transcript = transcript['content'] # convert sjson content into srt format. transcript_content = Transcript.convert(sjson_transcript, input_format='sjson', output_format='srt') if not transcript_content: raise Http404 # Construct an HTTP response response = HttpResponse(transcript_content, content_type='application/x-subrip; charset=utf-8') response[ 'Content-Disposition'] = 'attachment; filename="{filename}.srt"'.format( filename=filename) return response
def test_happy_path(self, modulestore_type, create_after_overview): """ What happens when everything works like we expect it to. If `create_after_overview` is True, we will temporarily disable thumbnail creation so that the initial CourseOverview is created without an image_set, and the CourseOverviewImageSet is created afterwards. If `create_after_overview` is False, we'll create the CourseOverviewImageSet at the same time as the CourseOverview. """ # Create a real (oversized) image... image = Image.new("RGB", (800, 400), "blue") image_buff = StringIO() image.save(image_buff, format="JPEG") image_buff.seek(0) image_name = "big_course_image.jpeg" with self.store.default_store(modulestore_type): course = CourseFactory.create(default_store=modulestore_type, course_image=image_name) # Save a real image here... course_image_asset_key = StaticContent.compute_location( course.id, course.course_image) course_image_content = StaticContent(course_image_asset_key, image_name, 'image/jpeg', image_buff) contentstore().save(course_image_content) # If create_after_overview is True, disable thumbnail generation so # that the CourseOverview object is created and saved without an # image_set at first (it will be lazily created later). if create_after_overview: self.set_config(enabled=False) # Now generate the CourseOverview... course_overview = CourseOverview.get_from_id(course.id) # If create_after_overview is True, no image_set exists yet. Verify # that, then switch config back over to True and it should lazily # create the image_set on the next get_from_id() call. if create_after_overview: self.assertFalse(hasattr(course_overview, 'image_set')) self.set_config(enabled=True) course_overview = CourseOverview.get_from_id(course.id) self.assertTrue(hasattr(course_overview, 'image_set')) image_urls = course_overview.image_urls config = CourseOverviewImageConfig.current() # Make sure the thumbnail names come out as expected... self.assertTrue( image_urls['raw'].endswith('big_course_image.jpeg')) self.assertTrue(image_urls['small'].endswith( 'big_course_image-jpeg-{}x{}.jpg'.format(*config.small))) self.assertTrue(image_urls['large'].endswith( 'big_course_image-jpeg-{}x{}.jpg'.format(*config.large))) # Now make sure our thumbnails are of the sizes we expect... for image_url, expected_size in [ (image_urls['small'], config.small), (image_urls['large'], config.large) ]: image_key = StaticContent.get_location_from_path(image_url) image_content = AssetManager.find(image_key) image = Image.open(StringIO(image_content.data)) self.assertEqual(image.size, expected_size)
def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir): """ Export all modules from `modulestore` and content from `contentstore` as xml to `root_dir`. `modulestore`: A `ModuleStore` object that is the source of the modules to export `contentstore`: A `ContentStore` object that is the source of the content to export, can be None `course_key`: The `CourseKey` of the `CourseModuleDescriptor` to export `root_dir`: The directory to write the exported xml to `course_dir`: The name of the directory inside `root_dir` to write the course content to """ course = modulestore.get_course(course_key) fsm = OSFS(root_dir) export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir) root = lxml.etree.Element('unknown') # export only the published content with modulestore.branch_setting(ModuleStoreEnum.Branch.published_only, course_key): course.add_xml_to_node(root) with export_fs.open('course.xml', 'w') as course_xml: lxml.etree.ElementTree(root).write(course_xml) # export the static assets policies_dir = export_fs.makeopendir('policies') if contentstore: contentstore.export_all_for_course( course_key, root_dir + '/' + course_dir + '/static/', root_dir + '/' + course_dir + '/policies/assets.json', ) # If we are using the default course image, export it to the # legacy location to support backwards compatibility. if course.course_image == course.fields['course_image'].default: try: course_image = contentstore.find( StaticContent.compute_location(course.id, course.course_image), ) except NotFoundError: pass else: output_dir = root_dir + '/' + course_dir + '/static/images/' if not os.path.isdir(output_dir): os.makedirs(output_dir) with OSFS(output_dir).open('course_image.jpg', 'wb') as course_image_file: course_image_file.write(course_image.data) # export the static tabs export_extra_content(export_fs, modulestore, course_key, 'static_tab', 'tabs', '.html') # export the custom tags export_extra_content(export_fs, modulestore, course_key, 'custom_tag_template', 'custom_tags') # export the course updates export_extra_content(export_fs, modulestore, course_key, 'course_info', 'info', '.html') # export the 'about' data (e.g. overview, etc.) export_extra_content(export_fs, modulestore, course_key, 'about', 'about', '.html') # export the grading policy course_run_policy_dir = policies_dir.makeopendir(course.location.name) with course_run_policy_dir.open('grading_policy.json', 'w') as grading_policy: grading_policy.write(dumps(course.grading_policy, cls=EdxJSONEncoder)) # export all of the course metadata in policy.json with course_run_policy_dir.open('policy.json', 'w') as course_policy: policy = {'course/' + course.location.name: own_metadata(course)} course_policy.write(dumps(policy, cls=EdxJSONEncoder)) # NOTE: this code assumes that verticals are the top most draftable container # should we change the application, then this assumption will no longer be valid # NOTE: we need to explicitly implement the logic for setting the vertical's parent # and index here since the XML modulestore cannot load draft modules draft_verticals = modulestore.get_items( course_key, category='vertical', revision=ModuleStoreEnum.RevisionOption.draft_only) if len(draft_verticals) > 0: draft_course_dir = export_fs.makeopendir(DRAFT_DIR) for draft_vertical in draft_verticals: parent_loc = modulestore.get_parent_location( draft_vertical.location, revision=ModuleStoreEnum.RevisionOption.draft_preferred) # Don't try to export orphaned items. if parent_loc is not None: logging.debug('parent_loc = {0}'.format(parent_loc)) if parent_loc.category in DIRECT_ONLY_CATEGORIES: draft_vertical.xml_attributes[ 'parent_sequential_url'] = parent_loc.to_deprecated_string( ) sequential = modulestore.get_item(parent_loc) index = sequential.children.index(draft_vertical.location) draft_vertical.xml_attributes[ 'index_in_children_list'] = str(index) draft_vertical.runtime.export_fs = draft_course_dir node = lxml.etree.Element('unknown') draft_vertical.add_xml_to_node(node)
def export_to_xml(modulestore, contentstore, course_key, root_dir, course_dir): """ Export all modules from `modulestore` and content from `contentstore` as xml to `root_dir`. `modulestore`: A `ModuleStore` object that is the source of the modules to export `contentstore`: A `ContentStore` object that is the source of the content to export, can be None `course_key`: The `CourseKey` of the `CourseModuleDescriptor` to export `root_dir`: The directory to write the exported xml to `course_dir`: The name of the directory inside `root_dir` to write the course content to """ with modulestore.bulk_operations(course_key): course = modulestore.get_course(course_key, depth=None) # None means infinite fsm = OSFS(root_dir) export_fs = course.runtime.export_fs = fsm.makeopendir(course_dir) root = lxml.etree.Element('unknown') # export only the published content with modulestore.branch_setting(ModuleStoreEnum.Branch.published_only, course_key): # change all of the references inside the course to use the xml expected key type w/o version & branch xml_centric_course_key = CourseLocator(course_key.org, course_key.course, course_key.run, deprecated=True) adapt_references(course, xml_centric_course_key, export_fs) course.add_xml_to_node(root) with export_fs.open('course.xml', 'w') as course_xml: lxml.etree.ElementTree(root).write(course_xml) # export the static assets policies_dir = export_fs.makeopendir('policies') if contentstore: contentstore.export_all_for_course( course_key, root_dir + '/' + course_dir + '/static/', root_dir + '/' + course_dir + '/policies/assets.json', ) # If we are using the default course image, export it to the # legacy location to support backwards compatibility. if course.course_image == course.fields['course_image'].default: try: course_image = contentstore.find( StaticContent.compute_location(course.id, course.course_image), ) except NotFoundError: pass else: output_dir = root_dir + '/' + course_dir + '/static/images/' if not os.path.isdir(output_dir): os.makedirs(output_dir) with OSFS(output_dir).open('course_image.jpg', 'wb') as course_image_file: course_image_file.write(course_image.data) # export the static tabs export_extra_content(export_fs, modulestore, course_key, xml_centric_course_key, 'static_tab', 'tabs', '.html') # export the custom tags export_extra_content(export_fs, modulestore, course_key, xml_centric_course_key, 'custom_tag_template', 'custom_tags') # export the course updates export_extra_content(export_fs, modulestore, course_key, xml_centric_course_key, 'course_info', 'info', '.html') # export the 'about' data (e.g. overview, etc.) export_extra_content(export_fs, modulestore, course_key, xml_centric_course_key, 'about', 'about', '.html') # export the grading policy course_run_policy_dir = policies_dir.makeopendir(course.location.name) with course_run_policy_dir.open('grading_policy.json', 'w') as grading_policy: grading_policy.write( dumps(course.grading_policy, cls=EdxJSONEncoder, sort_keys=True, indent=4)) # export all of the course metadata in policy.json with course_run_policy_dir.open('policy.json', 'w') as course_policy: policy = {'course/' + course.location.name: own_metadata(course)} course_policy.write( dumps(policy, cls=EdxJSONEncoder, sort_keys=True, indent=4)) #### DRAFTS #### # xml backed courses don't support drafts! if course.runtime.modulestore.get_modulestore_type( ) != ModuleStoreEnum.Type.xml: # NOTE: we need to explicitly implement the logic for setting the vertical's parent # and index here since the XML modulestore cannot load draft modules with modulestore.branch_setting( ModuleStoreEnum.Branch.draft_preferred, course_key): draft_modules = modulestore.get_items( course_key, qualifiers={'category': { '$nin': DIRECT_ONLY_CATEGORIES }}, revision=ModuleStoreEnum.RevisionOption.draft_only) if draft_modules: draft_course_dir = export_fs.makeopendir(DRAFT_DIR) # accumulate tuples of draft_modules and their parents in # this list: draft_node_list = [] for draft_module in draft_modules: parent_loc = modulestore.get_parent_location( draft_module.location, revision=ModuleStoreEnum.RevisionOption. draft_preferred) # if module has no parent, set its parent_url to `None` parent_url = None if parent_loc is not None: parent_url = parent_loc.to_deprecated_string() draft_node = draft_node_constructor( draft_module, location=draft_module.location, url=draft_module.location.to_deprecated_string(), parent_location=parent_loc, parent_url=parent_url, ) draft_node_list.append(draft_node) for draft_node in get_draft_subtree_roots(draft_node_list): # only export the roots of the draft subtrees # since export_from_xml (called by `add_xml_to_node`) # exports a whole tree # ensure module has "xml_attributes" attr if not hasattr(draft_node.module, 'xml_attributes'): draft_node.module.xml_attributes = {} # Don't try to export orphaned items # and their descendents if draft_node.parent_location is None: continue logging.debug('parent_loc = {0}'.format( draft_node.parent_location)) draft_node.module.xml_attributes[ 'parent_url'] = draft_node.parent_url parent = modulestore.get_item( draft_node.parent_location) index = parent.children.index( draft_node.module.location) draft_node.module.xml_attributes[ 'index_in_children_list'] = str(index) draft_node.module.runtime.export_fs = draft_course_dir adapt_references(draft_node.module, xml_centric_course_key, draft_course_dir) node = lxml.etree.Element('unknown') draft_node.module.add_xml_to_node(node)
def _upload_asset(request, course_key): ''' This method allows for POST uploading of files into the course asset library, which will be supported by GridFS in MongoDB. ''' # Does the course actually exist?!? Get anything from it to prove its # existence try: modulestore().get_course(course_key) except ItemNotFoundError: # no return it as a Bad Request response logging.error("Could not find course: %s", course_key) return HttpResponseBadRequest() # compute a 'filename' which is similar to the location formatting, we're # using the 'filename' nomenclature since we're using a FileSystem paradigm # here. We're just imposing the Location string formatting expectations to # keep things a bit more consistent upload_file = request.FILES['file'] filename = upload_file.name mime_type = upload_file.content_type size = get_file_size(upload_file) # If file is greater than a specified size, reject the upload # request and send a message to the user. Note that since # the front-end may batch large file uploads in smaller chunks, # we validate the file-size on the front-end in addition to # validating on the backend. (see cms/static/js/views/assets.js) max_file_size_in_bytes = settings.MAX_ASSET_UPLOAD_FILE_SIZE_IN_MB * 1000**2 if size > max_file_size_in_bytes: return JsonResponse( { 'error': _('File {filename} exceeds maximum size of ' '{size_mb} MB. Please follow the instructions here ' 'to upload a file elsewhere and link to it instead: ' '{faq_url}').format( filename=filename, size_mb=settings.MAX_ASSET_UPLOAD_FILE_SIZE_IN_MB, faq_url=settings.MAX_ASSET_UPLOAD_FILE_SIZE_URL, ) }, status=413) content_loc = StaticContent.compute_location(course_key, filename) chunked = upload_file.multiple_chunks() sc_partial = partial(StaticContent, content_loc, filename, mime_type) if chunked: content = sc_partial(upload_file.chunks()) tempfile_path = upload_file.temporary_file_path() else: content = sc_partial(upload_file.read()) tempfile_path = None # first let's see if a thumbnail can be created (thumbnail_content, thumbnail_location) = contentstore().generate_thumbnail( content, tempfile_path=tempfile_path, ) # delete cached thumbnail even if one couldn't be created this time (else # the old thumbnail will continue to show) del_cached_content(thumbnail_location) # now store thumbnail location only if we could create it if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content contentstore().save(content) del_cached_content(content.location) # readback the saved content - we need the database timestamp readback = contentstore().find(content.location) locked = getattr(content, 'locked', False) response_payload = { 'asset': _get_asset_json(content.name, content.content_type, readback.last_modified_at, content.location, content.thumbnail_location, locked), 'msg': _('Upload completed') } return JsonResponse(response_payload)
def asset_location(location, filename): """ Return asset location. `location` is module location. """ return StaticContent.compute_location(location.course_key, filename)
def process_extra(self, root, courselike, root_courselike_dir, xml_centric_courselike_key, export_fs): # Export the modulestore's asset metadata. asset_dir = root_courselike_dir + '/' + AssetMetadata.EXPORTED_ASSET_DIR + '/' if not os.path.isdir(asset_dir): os.makedirs(asset_dir) asset_root = lxml.etree.Element(AssetMetadata.ALL_ASSETS_XML_TAG) course_assets = self.modulestore.get_all_asset_metadata( self.courselike_key, None) for asset_md in course_assets: # All asset types are exported using the "asset" tag - but their asset type is specified in each asset key. asset = lxml.etree.SubElement(asset_root, AssetMetadata.ASSET_XML_TAG) asset_md.to_xml(asset) with OSFS(asset_dir).open(AssetMetadata.EXPORTED_ASSET_FILENAME, 'w') as asset_xml_file: lxml.etree.ElementTree(asset_root).write(asset_xml_file) # export the static assets policies_dir = export_fs.makeopendir('policies') if self.contentstore: self.contentstore.export_all_for_course( self.courselike_key, root_courselike_dir + '/static/', root_courselike_dir + '/policies/assets.json', ) # If we are using the default course image, export it to the # legacy location to support backwards compatibility. if courselike.course_image == courselike.fields[ 'course_image'].default: try: course_image = self.contentstore.find( StaticContent.compute_location( courselike.id, courselike.course_image), ) except NotFoundError: pass else: output_dir = root_courselike_dir + '/static/images/' if not os.path.isdir(output_dir): os.makedirs(output_dir) with OSFS(output_dir).open('course_image.jpg', 'wb') as course_image_file: course_image_file.write(course_image.data) # export the static tabs export_extra_content(export_fs, self.modulestore, self.courselike_key, xml_centric_courselike_key, 'static_tab', 'tabs', '.html') # export the custom tags export_extra_content(export_fs, self.modulestore, self.courselike_key, xml_centric_courselike_key, 'custom_tag_template', 'custom_tags') # export the course updates export_extra_content(export_fs, self.modulestore, self.courselike_key, xml_centric_courselike_key, 'course_info', 'info', '.html') # export the 'about' data (e.g. overview, etc.) export_extra_content(export_fs, self.modulestore, self.courselike_key, xml_centric_courselike_key, 'about', 'about', '.html') course_policy_dir_name = courselike.location.run if courselike.url_name != courselike.location.run and courselike.url_name == 'course': # Use url_name for split mongo because course_run is not used when loading policies. course_policy_dir_name = courselike.url_name course_run_policy_dir = policies_dir.makeopendir( course_policy_dir_name) # export the grading policy with course_run_policy_dir.open('grading_policy.json', 'w') as grading_policy: grading_policy.write( dumps(courselike.grading_policy, cls=EdxJSONEncoder, sort_keys=True, indent=4)) # export all of the course metadata in policy.json with course_run_policy_dir.open('policy.json', 'w') as course_policy: policy = { 'course/' + courselike.location.name: own_metadata(courselike) } course_policy.write( dumps(policy, cls=EdxJSONEncoder, sort_keys=True, indent=4)) # xml backed courses don't support drafts! if courselike.runtime.modulestore.get_modulestore_type( ) != ModuleStoreEnum.Type.xml: _export_drafts(self.modulestore, self.courselike_key, export_fs, xml_centric_courselike_key)
def course_image_url(course): """Returns the image url for the course.""" loc = StaticContent.compute_location(course.location.course_key, course.course_image) path = StaticContent.serialize_asset_key_with_slash(loc) return path
def sub_id_to_location(cls, sub_id): """ A helper to compute a static file location from a subtitle id. """ return StaticContent.compute_location( cls.course.id, u'subs_{0}.srt.sjson'.format(sub_id))
def import_static_content( modules, course_loc, course_data_path, static_content_store, target_location_namespace, subpath='static', verbose=False): remap_dict = {} # now import all static assets static_dir = course_data_path / subpath try: with open(course_data_path / 'policies/assets.json') as f: policy = json.load(f) except (IOError, ValueError) as err: # xml backed courses won't have this file, only exported courses; # so, its absence is not really an exception. policy = {} verbose = True mimetypes_list = mimetypes.types_map.values() for dirname, _, filenames in os.walk(static_dir): for filename in filenames: content_path = os.path.join(dirname, filename) if filename.endswith('~'): if verbose: log.debug('skipping static content %s...', content_path) continue if verbose: log.debug('importing static content %s...', content_path) try: with open(content_path, 'rb') as f: data = f.read() except IOError: if filename.startswith('._'): # OS X "companion files". See # http://www.diigo.com/annotated/0c936fda5da4aa1159c189cea227e174 continue # Not a 'hidden file', then re-raise exception raise # strip away leading path from the name fullname_with_subpath = content_path.replace(static_dir, '') if fullname_with_subpath.startswith('/'): fullname_with_subpath = fullname_with_subpath[1:] content_loc = StaticContent.compute_location( target_location_namespace.org, target_location_namespace.course, fullname_with_subpath ) policy_ele = policy.get(content_loc.name, {}) displayname = policy_ele.get('displayname', filename) locked = policy_ele.get('locked', False) mime_type = policy_ele.get('contentType') # Check extracted contentType in list of all valid mimetypes if not mime_type or mime_type not in mimetypes_list: mime_type = mimetypes.guess_type(filename)[0] # Assign guessed mimetype content = StaticContent( content_loc, displayname, mime_type, data, import_path=fullname_with_subpath, locked=locked ) # first let's save a thumbnail so we can get back a thumbnail location thumbnail_content, thumbnail_location = static_content_store.generate_thumbnail(content) if thumbnail_content is not None: content.thumbnail_location = thumbnail_location # then commit the content try: static_content_store.save(content) except Exception as err: log.exception('Error importing {0}, error={1}'.format( fullname_with_subpath, err )) # store the remapping information which will be needed # to subsitute in the module data remap_dict[fullname_with_subpath] = content_loc.name return remap_dict
def course_image_url(course): """Returns the image url for the course.""" loc = StaticContent.compute_location(course.location.course_key, course.course_image) path = loc.to_deprecated_string() return path
def get_transcripts_presence(videos, item): """ fills in the transcripts_presence dictionary after for a given component with its list of videos. Returns transcripts_presence dict: html5_local: list of html5 ids, if subtitles exist locally for them; is_youtube_mode: bool, if we have youtube_id, and as youtube mode is of higher priority, reflect this with flag; youtube_local: bool, if youtube transcripts exist locally; youtube_server: bool, if youtube transcripts exist on server; youtube_diff: bool, if youtube transcripts exist on youtube server, and are different from local youtube ones; current_item_subs: string, value of item.sub field; status: string, 'Error' or 'Success'; subs: string, new value of item.sub field, that should be set in module; command: string, action to front-end what to do and what to show to user. """ transcripts_presence = { 'html5_local': [], 'html5_equal': False, 'is_youtube_mode': False, 'youtube_local': False, 'youtube_server': False, 'youtube_diff': True, 'current_item_subs': None, 'status': 'Success', } filename = 'subs_{0}.srt.sjson'.format(item.sub) content_location = StaticContent.compute_location(item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['current_item_subs'] = item.sub except NotFoundError: pass # Check for youtube transcripts presence youtube_id = videos.get('youtube', None) if youtube_id: transcripts_presence['is_youtube_mode'] = True # youtube local filename = 'subs_{0}.srt.sjson'.format(youtube_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: local_transcripts = contentstore().find(content_location).data transcripts_presence['youtube_local'] = True except NotFoundError: log.debug("Can't find transcripts in storage for youtube id: %s", youtube_id) # youtube server youtube_text_api = copy.deepcopy(settings.YOUTUBE['TEXT_API']) youtube_text_api['params']['v'] = youtube_id youtube_transcript_name = youtube_video_transcript_name( youtube_text_api) if youtube_transcript_name: youtube_text_api['params']['name'] = youtube_transcript_name youtube_response = requests.get('http://' + youtube_text_api['url'], params=youtube_text_api['params']) if youtube_response.status_code == 200 and youtube_response.text: transcripts_presence['youtube_server'] = True #check youtube local and server transcripts for equality if transcripts_presence['youtube_server'] and transcripts_presence[ 'youtube_local']: try: youtube_server_subs = get_transcripts_from_youtube( youtube_id, settings, item.runtime.service(item, "i18n")) if json.loads( local_transcripts ) == youtube_server_subs: # check transcripts for equality transcripts_presence['youtube_diff'] = False except GetTranscriptsFromYouTubeException: pass # Check for html5 local transcripts presence html5_subs = [] for html5_id in videos['html5']: filename = 'subs_{0}.srt.sjson'.format(html5_id) content_location = StaticContent.compute_location( item.location.course_key, filename) try: html5_subs.append(contentstore().find(content_location).data) transcripts_presence['html5_local'].append(html5_id) except NotFoundError: log.debug( "Can't find transcripts in storage for non-youtube video_id: %s", html5_id) if len(html5_subs) == 2: # check html5 transcripts for equality transcripts_presence['html5_equal'] = json.loads( html5_subs[0]) == json.loads(html5_subs[1]) command, subs_to_use = _transcripts_logic(transcripts_presence, videos) transcripts_presence.update({ 'command': command, 'subs': subs_to_use, }) return transcripts_presence