def import_handler(request, tag=None, package_id=None, branch=None, version_guid=None, block=None): """ The restful handler for importing a course. GET html: return html page for import page json: not supported POST or PUT json: import a course via the .tar.gz file specified in request.FILES """ location = BlockUsageLocator(package_id=package_id, branch=branch, version_guid=version_guid, block_id=block) if not has_course_access(request.user, location): raise PermissionDenied() old_location = loc_mapper().translate_locator_to_location(location) if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: data_root = path(settings.GITHUB_REPO_ROOT) course_subdir = "{0}-{1}-{2}".format(old_location.org, old_location.course, old_location.name) course_dir = data_root / course_subdir filename = request.FILES['course-data'].name if not filename.endswith('.tar.gz'): return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': 1 }, status=415 ) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size ) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': 1 }, status=409 ) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": location.url_reverse('import'), "thumbnailUrl": "" }] }) else: # This was the last chunk. # Use sessions to keep info about import progress session_status = request.session.setdefault("import_status", {}) key = location.package_id + filename session_status[key] = 1 request.session.modified = True # Do everything from now on in a try-finally block to make sure # everything is properly cleaned up. try: tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': 1 }, status=400 ) finally: tar_file.close() session_status[key] = 2 request.session.modified = True # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None fname = "course.xml" dirpath = get_dir_for_fname(course_dir, fname) if not dirpath: return JsonResponse( { 'ErrMsg': _('Could not find the course.xml file in the package.'), 'Stage': 2 }, status=415 ) logging.debug('found course.xml at {0}'.format(dirpath)) if dirpath != course_dir: for fname in os.listdir(dirpath): shutil.move(dirpath / fname, course_dir) _module_store, course_items = import_from_xml( modulestore('direct'), settings.GITHUB_REPO_ROOT, [course_subdir], load_error_modules=False, static_content_store=contentstore(), target_location_namespace=old_location, draft_store=modulestore() ) new_location = course_items[0].location logging.debug('new course at {0}'.format(new_location)) session_status[key] = 3 request.session.modified = True auth.add_users(request.user, CourseInstructorRole(new_location), request.user) auth.add_users(request.user, CourseStaffRole(new_location), request.user) logging.debug('created all course groups at {0}'.format(new_location)) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=W0703 return JsonResponse( { 'ErrMsg': str(exception), 'Stage': session_status[key] }, status=400 ) finally: shutil.rmtree(course_dir) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html course_module = modulestore().get_item(old_location) return render_to_response('import.html', { 'context_course': course_module, 'successful_import_redirect_url': location.url_reverse("course"), 'import_status_url': location.url_reverse("import_status", "fillerName"), }) else: return HttpResponseNotFound()
def import_handler(request, course_key_string): """ The restful handler for importing a course. GET html: return html page for import page json: not supported POST or PUT json: import a course via the .tar.gz file specified in request.FILES """ course_key = CourseKey.from_string(course_key_string) if not has_course_access(request.user, course_key): raise PermissionDenied() if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: data_root = path(settings.GITHUB_REPO_ROOT) course_subdir = "{0}-{1}-{2}".format(course_key.org, course_key.course, course_key.run) course_dir = data_root / course_subdir filename = request.FILES['course-data'].name if not filename.endswith('.tar.gz'): return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': 1 }, status=415) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': 1 }, status=409) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int( content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse_course_url('import_handler', course_key), "thumbnailUrl": "" }] }) else: # This was the last chunk. # Use sessions to keep info about import progress session_status = request.session.setdefault( "import_status", {}) key = unicode(course_key) + filename session_status[key] = 1 request.session.modified = True # Do everything from now on in a try-finally block to make sure # everything is properly cleaned up. try: tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': 1 }, status=400) finally: tar_file.close() session_status[key] = 2 request.session.modified = True # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk( directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None fname = "course.xml" dirpath = get_dir_for_fname(course_dir, fname) if not dirpath: return JsonResponse( { 'ErrMsg': _('Could not find the course.xml file in the package.' ), 'Stage': 2 }, status=415) logging.debug('found course.xml at {0}'.format(dirpath)) if dirpath != course_dir: for fname in os.listdir(dirpath): shutil.move(dirpath / fname, course_dir) _module_store, course_items = import_from_xml( modulestore('direct'), settings.GITHUB_REPO_ROOT, [course_subdir], load_error_modules=False, static_content_store=contentstore(), target_course_id=course_key, draft_store=modulestore()) new_location = course_items[0].location logging.debug('new course at {0}'.format(new_location)) session_status[key] = 3 request.session.modified = True # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=W0703 log.exception("error importing course") return JsonResponse( { 'ErrMsg': str(exception), 'Stage': session_status[key] }, status=400) finally: shutil.rmtree(course_dir) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html course_module = modulestore().get_course(course_key) return render_to_response( 'import.html', { 'context_course': course_module, 'successful_import_redirect_url': reverse_course_url('course_handler', course_key), 'import_status_url': reverse_course_url("import_status_handler", course_key, kwargs={'filename': "fillerName"}), }) else: return HttpResponseNotFound()
def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func): """ Parameterized function containing the meat of import_handler. """ if not has_course_author_access(request.user, courselike_key): raise PermissionDenied() if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: # Do everything in a try-except block to make sure everything is properly cleaned up. try: data_root = path(settings.GITHUB_REPO_ROOT) subdir = base64.urlsafe_b64encode(repr(courselike_key)) course_dir = data_root / subdir filename = request.FILES['course-data'].name # Use sessions to keep info about import progress session_status = request.session.setdefault( "import_status", {}) courselike_string = unicode(courselike_key) + filename _save_request_status(request, courselike_string, 0) if not filename.endswith('.tar.gz'): _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': -1 }, status=415) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search( request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): _save_request_status(request, courselike_string, -1) log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': -1 }, status=409) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int( content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse_course_url('import_handler', courselike_key), "thumbnailUrl": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except _save_request_status(request, courselike_string, -1) if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) log.exception("error importing course") return JsonResponse({ 'ErrMsg': str(exception), 'Stage': -1 }, status=400) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import %s: Upload complete", courselike_key) _save_request_status(request, courselike_string, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: _save_request_status(request, courselike_string, -1) return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': -1 }, status=400) finally: tar_file.close() log.info("Course import %s: Uploaded file extracted", courselike_key) _save_request_status(request, courselike_string, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None dirpath = get_dir_for_fname(course_dir, root_name) if not dirpath: _save_request_status(request, courselike_string, -2) return JsonResponse( { 'ErrMsg': _('Could not find the {0} file in the package.'). format(root_name), 'Stage': -2 }, status=415) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found %s at %s', root_name, dirpath) log.info("Course import %s: Extracted file verified", courselike_key) _save_request_status(request, courselike_string, 3) courselike_items = import_func( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_id=courselike_key) new_location = courselike_items[0].location logging.debug('new course at %s', new_location) log.info("Course import %s: Course import successful", courselike_key) _save_request_status(request, courselike_string, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=broad-except log.exception("error importing course") return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -session_status[courselike_string] }, status=400) finally: if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import %s: Temp data cleared", courselike_key) # set failed stage number with negative sign in case of unsuccessful import if session_status[courselike_string] != 4: _save_request_status( request, courselike_string, -abs(session_status[courselike_string])) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html status_url = reverse_course_url("import_status_handler", courselike_key, kwargs={'filename': "fillerName"}) return render_to_response( 'import.html', { context_name: courselike_module, 'successful_import_redirect_url': successful_url, 'import_status_url': status_url, 'library': isinstance(courselike_key, LibraryLocator) }) else: return HttpResponseNotFound()
def import_handler(request, course_key_string): """ The restful handler for importing a course. GET html: return html page for import page json: not supported POST or PUT json: import a course via the .tar.gz file specified in request.FILES """ course_key = CourseKey.from_string(course_key_string) if not has_course_access(request.user, course_key): raise PermissionDenied() if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'): if request.method == 'GET': raise NotImplementedError('coming soon') else: # Do everything in a try-except block to make sure everything is properly cleaned up. try: data_root = path(settings.GITHUB_REPO_ROOT) course_subdir = "{0}-{1}-{2}".format(course_key.org, course_key.course, course_key.run) course_dir = data_root / course_subdir filename = request.FILES['course-data'].name # Use sessions to keep info about import progress session_status = request.session.setdefault("import_status", {}) key = unicode(course_key) + filename _save_request_status(request, key, 0) if not filename.endswith('.tar.gz'): _save_request_status(request, key, -1) return JsonResponse( { 'ErrMsg': _('We only support uploading a .tar.gz file.'), 'Stage': -1 }, status=415 ) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): _save_request_status(request, key, -1) log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size ) return JsonResponse( { 'ErrMsg': _('File upload corrupted. Please try again'), 'Stage': -1 }, status=409 ) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse_course_url('import_handler', course_key), "thumbnailUrl": "" }] }) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=W0703 _save_request_status(request, key, -1) if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import {0}: Temp data cleared".format(course_key)) log.exception( "error importing course" ) return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -1 }, status=400 ) # try-finally block for proper clean up after receiving last chunk. try: # This was the last chunk. log.info("Course import {0}: Upload complete".format(course_key)) _save_request_status(request, key, 1) tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: _save_request_status(request, key, -1) return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0], 'Stage': -1 }, status=400 ) finally: tar_file.close() log.info("Course import {0}: Uploaded file extracted".format(course_key)) _save_request_status(request, key, 2) # find the 'course.xml' file def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None fname = "course.xml" dirpath = get_dir_for_fname(course_dir, fname) if not dirpath: _save_request_status(request, key, -2) return JsonResponse( { 'ErrMsg': _('Could not find the course.xml file in the package.'), 'Stage': -2 }, status=415 ) dirpath = os.path.relpath(dirpath, data_root) logging.debug('found course.xml at {0}'.format(dirpath)) log.info("Course import {0}: Extracted file verified".format(course_key)) _save_request_status(request, key, 3) course_items = import_from_xml( modulestore(), request.user.id, settings.GITHUB_REPO_ROOT, [dirpath], load_error_modules=False, static_content_store=contentstore(), target_course_id=course_key, ) new_location = course_items[0].location logging.debug('new course at {0}'.format(new_location)) log.info("Course import {0}: Course import successful".format(course_key)) _save_request_status(request, key, 4) # Send errors to client with stage at which error occurred. except Exception as exception: # pylint: disable=W0703 log.exception( "error importing course" ) return JsonResponse( { 'ErrMsg': str(exception), 'Stage': -session_status[key] }, status=400 ) finally: if course_dir.isdir(): shutil.rmtree(course_dir) log.info("Course import {0}: Temp data cleared".format(course_key)) # set failed stage number with negative sign in case of unsuccessful import if session_status[key] != 4: _save_request_status(request, key, -abs(session_status[key])) return JsonResponse({'Status': 'OK'}) elif request.method == 'GET': # assume html course_module = modulestore().get_course(course_key) return render_to_response('import.html', { 'context_course': course_module, 'successful_import_redirect_url': reverse_course_url('course_handler', course_key), 'import_status_url': reverse_course_url("import_status_handler", course_key, kwargs={'filename': "fillerName"}), }) else: return HttpResponseNotFound()
def test_library_import(self): """ Try importing a known good library archive, and verify that the contents of the library have completely replaced the old contents. """ # Create some blocks to overwrite library = LibraryFactory.create(modulestore=self.store) lib_key = library.location.library_key test_block = ItemFactory.create( category="vertical", parent_location=library.location, user_id=self.user.id, publish_item=False, ) test_block2 = ItemFactory.create(category="vertical", parent_location=library.location, user_id=self.user.id, publish_item=False) # Create a library and blocks that should remain unmolested. unchanged_lib = LibraryFactory.create() unchanged_key = unchanged_lib.location.library_key test_block3 = ItemFactory.create( category="vertical", parent_location=unchanged_lib.location, user_id=self.user.id, publish_item=False) test_block4 = ItemFactory.create( category="vertical", parent_location=unchanged_lib.location, user_id=self.user.id, publish_item=False) # Refresh library. library = self.store.get_library(lib_key) children = [ self.store.get_item(child).url_name for child in library.children ] self.assertEqual(len(children), 2) self.assertIn(test_block.url_name, children) self.assertIn(test_block2.url_name, children) unchanged_lib = self.store.get_library(unchanged_key) children = [ self.store.get_item(child).url_name for child in unchanged_lib.children ] self.assertEqual(len(children), 2) self.assertIn(test_block3.url_name, children) self.assertIn(test_block4.url_name, children) extract_dir = path(tempfile.mkdtemp()) try: tar = tarfile.open( path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') safetar_extractall(tar, extract_dir) library_items = import_library_from_xml( self.store, self.user.id, settings.GITHUB_REPO_ROOT, [extract_dir / 'library'], load_error_modules=False, static_content_store=contentstore(), target_id=lib_key) finally: shutil.rmtree(extract_dir) self.assertEqual(lib_key, library_items[0].location.library_key) library = self.store.get_library(lib_key) children = [ self.store.get_item(child).url_name for child in library.children ] self.assertEqual(len(children), 3) self.assertNotIn(test_block.url_name, children) self.assertNotIn(test_block2.url_name, children) unchanged_lib = self.store.get_library(unchanged_key) children = [ self.store.get_item(child).url_name for child in unchanged_lib.children ] self.assertEqual(len(children), 2) self.assertIn(test_block3.url_name, children) self.assertIn(test_block4.url_name, children)
def import_course(request, org, course, name): """ This method will handle a POST request to upload and import a .tar.gz file into a specified course """ location = get_location_and_verify_access(request, org, course, name) @contextmanager def wfile(filename, dirname): """ A with-context that creates `filename` on entry and removes it on exit. `filename` is truncted on creation. Additionally removes dirname on exit. """ open(filename, "w").close() try: yield filename finally: os.remove(filename) shutil.rmtree(dirname) if request.method == 'POST': data_root = path(settings.GITHUB_REPO_ROOT) course_subdir = "{0}-{1}-{2}".format(org, course, name) course_dir = data_root / course_subdir filename = request.FILES['course-data'].name if not filename.endswith('.tar.gz'): return JsonResponse( {'ErrMsg': 'We only support uploading a .tar.gz file.'}, status=415 ) temp_filepath = course_dir / filename if not course_dir.isdir(): os.mkdir(course_dir) logging.debug('importing course to {0}'.format(temp_filepath)) # Get upload chunks byte ranges try: matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"]) content_range = matches.groupdict() except KeyError: # Single chunk # no Content-Range header, so make one that will work content_range = {'start': 0, 'stop': 1, 'end': 2} # stream out the uploaded files in chunks to disk if int(content_range['start']) == 0: mode = "wb+" else: mode = "ab+" size = os.path.getsize(temp_filepath) # Check to make sure we haven't missed a chunk # This shouldn't happen, even if different instances are handling # the same session, but it's always better to catch errors earlier. if size < int(content_range['start']): log.warning( "Reported range %s does not match size downloaded so far %s", content_range['start'], size ) return JsonResponse( {'ErrMsg': 'File upload corrupted. Please try again'}, status=409 ) # The last request sometimes comes twice. This happens because # nginx sends a 499 error code when the response takes too long. elif size > int(content_range['stop']) and size == int(content_range['end']): return JsonResponse({'ImportStatus': 1}) with open(temp_filepath, mode) as temp_file: for chunk in request.FILES['course-data'].chunks(): temp_file.write(chunk) size = os.path.getsize(temp_filepath) if int(content_range['stop']) != int(content_range['end']) - 1: # More chunks coming return JsonResponse({ "files": [{ "name": filename, "size": size, "deleteUrl": "", "deleteType": "", "url": reverse('import_course', kwargs={ 'org': location.org, 'course': location.course, 'name': location.name }), "thumbnailUrl": "" }] }) else: # This was the last chunk. # 'Lock' with status info. status_file = data_root / (course + filename + ".lock") # Do everything from now on in a with-context, to be sure we've # properly cleaned up. with wfile(status_file, course_dir): with open(status_file, 'w+') as sf: sf.write("Extracting") tar_file = tarfile.open(temp_filepath) try: safetar_extractall(tar_file, (course_dir + '/').encode('utf-8')) except SuspiciousOperation as exc: return JsonResponse( { 'ErrMsg': 'Unsafe tar file. Aborting import.', 'SuspiciousFileOperationMsg': exc.args[0] }, status=400 ) with open(status_file, 'w+') as sf: sf.write("Verifying") # find the 'course.xml' file dirpath = None def get_all_files(directory): """ For each file in the directory, yield a 2-tuple of (file-name, directory-path) """ for dirpath, _dirnames, filenames in os.walk(directory): for filename in filenames: yield (filename, dirpath) def get_dir_for_fname(directory, filename): """ Returns the dirpath for the first file found in the directory with the given name. If there is no file in the directory with the specified name, return None. """ for fname, dirpath in get_all_files(directory): if fname == filename: return dirpath return None fname = "course.xml" dirpath = get_dir_for_fname(course_dir, fname) if not dirpath: return JsonResponse( {'ErrMsg': 'Could not find the course.xml file in the package.'}, status=415 ) logging.debug('found course.xml at {0}'.format(dirpath)) if dirpath != course_dir: for fname in os.listdir(dirpath): shutil.move(dirpath / fname, course_dir) _module_store, course_items = import_from_xml( modulestore('direct'), settings.GITHUB_REPO_ROOT, [course_subdir], load_error_modules=False, static_content_store=contentstore(), target_location_namespace=location, draft_store=modulestore() ) logging.debug('new course at {0}'.format(course_items[0].location)) with open(status_file, 'w') as sf: sf.write("Updating course") create_all_course_groups(request.user, course_items[0].location) logging.debug('created all course groups at {0}'.format(course_items[0].location)) return JsonResponse({'Status': 'OK'}) else: course_module = modulestore().get_item(location) return render_to_response('import.html', { 'context_course': course_module, 'successful_import_redirect_url': reverse('course_index', kwargs={ 'org': location.org, 'course': location.course, 'name': location.name, }) })
def extract_source(source_archive, target): """ Extract the archive into the given target directory. """ with tarfile.open(source_archive) as tar_file: safetar_extractall(tar_file, target)
def test_library_import(self): """ Try importing a known good library archive, and verify that the contents of the library have completely replaced the old contents. """ # Create some blocks to overwrite library = LibraryFactory.create(modulestore=self.store) lib_key = library.location.library_key test_block = ItemFactory.create( category="vertical", parent_location=library.location, user_id=self.user.id, publish_item=False, ) test_block2 = ItemFactory.create( category="vertical", parent_location=library.location, user_id=self.user.id, publish_item=False ) # Create a library and blocks that should remain unmolested. unchanged_lib = LibraryFactory.create() unchanged_key = unchanged_lib.location.library_key test_block3 = ItemFactory.create( category="vertical", parent_location=unchanged_lib.location, user_id=self.user.id, publish_item=False ) test_block4 = ItemFactory.create( category="vertical", parent_location=unchanged_lib.location, user_id=self.user.id, publish_item=False ) # Refresh library. library = self.store.get_library(lib_key) children = [self.store.get_item(child).url_name for child in library.children] self.assertEqual(len(children), 2) self.assertIn(test_block.url_name, children) self.assertIn(test_block2.url_name, children) unchanged_lib = self.store.get_library(unchanged_key) children = [self.store.get_item(child).url_name for child in unchanged_lib.children] self.assertEqual(len(children), 2) self.assertIn(test_block3.url_name, children) self.assertIn(test_block4.url_name, children) extract_dir = path(tempfile.mkdtemp()) try: tar = tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') safetar_extractall(tar, extract_dir) library_items = import_library_from_xml( self.store, self.user.id, settings.GITHUB_REPO_ROOT, [extract_dir / 'library'], load_error_modules=False, static_content_store=contentstore(), target_id=lib_key ) finally: shutil.rmtree(extract_dir) self.assertEqual(lib_key, library_items[0].location.library_key) library = self.store.get_library(lib_key) children = [self.store.get_item(child).url_name for child in library.children] self.assertEqual(len(children), 3) self.assertNotIn(test_block.url_name, children) self.assertNotIn(test_block2.url_name, children) unchanged_lib = self.store.get_library(unchanged_key) children = [self.store.get_item(child).url_name for child in unchanged_lib.children] self.assertEqual(len(children), 2) self.assertIn(test_block3.url_name, children) self.assertIn(test_block4.url_name, children)