def test_library_import_branch_settings(self, branch_setting):
        """
        Try importing a known good library archive under either branch setting.
        The branch setting should have no effect on library import.
        """
        with self.store.branch_setting(branch_setting):
            library = LibraryFactory.create(modulestore=self.store)
            lib_key = library.location.library_key
            extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
            # the extract_dir needs to be passed as a relative dir to
            # import_library_from_xml
            extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)

            try:
                with tarfile.open(
                        path(TEST_DATA_DIR) / 'imports' /
                        'library.HhJfPD.tar.gz') as tar:
                    safetar_extractall(tar, extract_dir)
                import_library_from_xml(self.store,
                                        self.user.id,
                                        settings.GITHUB_REPO_ROOT,
                                        [extract_dir_relative / 'library'],
                                        load_error_modules=False,
                                        static_content_store=contentstore(),
                                        target_id=lib_key)
            finally:
                shutil.rmtree(extract_dir)
    def test_library_import_branch_settings_again(self, branch_setting):
        # Construct the contentstore for storing the import
        with MongoContentstoreBuilder().build() as source_content:
            # Construct the modulestore for storing the import (using the previously created contentstore)
            with SPLIT_MODULESTORE_SETUP.build(
                    contentstore=source_content) as source_store:
                # Use the test branch setting.
                with source_store.branch_setting(branch_setting):
                    source_library_key = LibraryLocator(org='TestOrg',
                                                        library='TestProbs')

                    extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
                    # the extract_dir needs to be passed as a relative dir to
                    # import_library_from_xml
                    extract_dir_relative = path.relpath(
                        extract_dir, settings.DATA_DIR)

                    try:
                        with tarfile.open(
                                path(TEST_DATA_DIR) / 'imports' /
                                'library.HhJfPD.tar.gz') as tar:
                            safetar_extractall(tar, extract_dir)
                        import_library_from_xml(
                            source_store,
                            self.user.id,
                            settings.GITHUB_REPO_ROOT,
                            [extract_dir_relative / 'library'],
                            static_content_store=source_content,
                            target_id=source_library_key,
                            load_error_modules=False,
                            raise_on_failure=True,
                            create_if_not_present=True,
                        )
                    finally:
                        shutil.rmtree(extract_dir)
    def test_library_import_branch_settings_again(self, branch_setting):
        # Construct the contentstore for storing the import
        with MongoContentstoreBuilder().build() as source_content:
            # Construct the modulestore for storing the import (using the previously created contentstore)
            with SPLIT_MODULESTORE_SETUP.build(contentstore=source_content) as source_store:
                # Use the test branch setting.
                with source_store.branch_setting(branch_setting):
                    source_library_key = LibraryLocator(org='TestOrg', library='TestProbs')

                    extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
                    # the extract_dir needs to be passed as a relative dir to
                    # import_library_from_xml
                    extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)

                    try:
                        with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
                            safetar_extractall(tar, extract_dir)
                        import_library_from_xml(
                            source_store,
                            self.user.id,
                            settings.GITHUB_REPO_ROOT,
                            [extract_dir_relative / 'library'],
                            static_content_store=source_content,
                            target_id=source_library_key,
                            load_error_modules=False,
                            raise_on_failure=True,
                            create_if_not_present=True,
                        )
                    finally:
                        shutil.rmtree(extract_dir)
    def test_library_import_branch_settings(self, branch_setting):
        """
        Try importing a known good library archive under either branch setting.
        The branch setting should have no effect on library import.
        """
        with self.store.branch_setting(branch_setting):
            library = LibraryFactory.create(modulestore=self.store)
            lib_key = library.location.library_key
            extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
            # the extract_dir needs to be passed as a relative dir to
            # import_library_from_xml
            extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)

            try:
                with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
                    safetar_extractall(tar, extract_dir)
                import_library_from_xml(
                    self.store,
                    self.user.id,
                    settings.GITHUB_REPO_ROOT,
                    [extract_dir_relative / 'library'],
                    load_error_modules=False,
                    static_content_store=contentstore(),
                    target_id=lib_key
                )
            finally:
                shutil.rmtree(extract_dir)
示例#5
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name,
               language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    set_code_owner_attribute_from_module(__name__)
    courselike_key = CourseKey.from_string(course_key_string)
    try:
        user = User.objects.get(pk=user_id)
    except User.DoesNotExist:
        with translation_language(language):
            self.status.fail(_(u'Unknown User ID: {0}').format(user_id))
        return
    if not has_course_author_access(user, courselike_key):
        with translation_language(language):
            self.status.fail(_(u'Permission denied'))
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(
        repr(courselike_key).encode('utf-8')).decode('utf-8')
    course_dir = data_root / subdir
    try:
        self.status.set_state(u'Unpacking')

        if not archive_name.endswith(u'.tar.gz'):
            with translation_language(language):
                self.status.fail(
                    _(u'We only support uploading a .tar.gz file.'))
                return

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():
            os.mkdir(course_dir)

        LOGGER.debug(u'importing course to {0}'.format(temp_filepath))

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not course_import_export_storage.exists(archive_path):
            LOGGER.info(u'Course import %s: Uploaded file %s not found',
                        courselike_key, archive_path)
            with translation_language(language):
                self.status.fail(_(u'Tar file not found'))
            return
        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:

                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)

                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)
        LOGGER.info(u'Course import %s: Download from storage complete',
                    courselike_key)
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get(u'/')
                fake_request.user = user
                from .views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(
                    fake_request, courselike_key)
                LOGGER.info(
                    u'entrance exam milestone content reference for course %s has been removed',
                    courselike_module.id)
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        LOGGER.exception(u'Error importing course %s',
                         courselike_key,
                         exc_info=True)
        self.status.fail(text_type(exception))
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + u'/'))
        except SuspiciousOperation as exc:
            LOGGER.info(u'Course import %s: Unsafe tar file - %s',
                        courselike_key, exc.args[0])
            with translation_language(language):
                self.status.fail(_(u'Unsafe tar file. Aborting import.'))
            return
        finally:
            tar_file.close()

        LOGGER.info(u'Course import %s: Uploaded file extracted',
                    courselike_key)
        self.status.set_state(u'Verifying')
        self.status.increment_completed_steps()

        # find the 'course.xml' file
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            with translation_language(language):
                self.status.fail(
                    _(u'Could not find the {0} file in the package.').format(
                        root_name))
                return

        dirpath = os.path.relpath(dirpath, data_root)
        LOGGER.debug(u'found %s at %s', root_name, dirpath)

        LOGGER.info(u'Course import %s: Extracted file verified',
                    courselike_key)
        self.status.set_state(u'Updating')
        self.status.increment_completed_steps()

        courselike_items = import_func(modulestore(),
                                       user.id,
                                       settings.GITHUB_REPO_ROOT, [dirpath],
                                       load_error_modules=False,
                                       static_content_store=contentstore(),
                                       target_id=courselike_key)

        new_location = courselike_items[0].location
        LOGGER.debug(u'new course at %s', new_location)

        LOGGER.info(u'Course import %s: Course import successful',
                    courselike_key)
    except Exception as exception:  # pylint: disable=broad-except
        LOGGER.exception(u'error importing course', exc_info=True)
        self.status.fail(text_type(exception))
    finally:
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        if self.status.state == u'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={u'category': u'chapter'},
                    settings={u'is_entrance_exam': True})[0]

                metadata = {
                    u'entrance_exam_id':
                    text_type(entrance_exam_chapter.location)
                }
                CourseMetadata.update_from_dict(metadata, course, user)
                from .views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(u'Course %s Entrance exam imported', course.id)
    def test_library_import(self):
        """
        Try importing a known good library archive, and verify that the
        contents of the library have completely replaced the old contents.
        """
        # Create some blocks to overwrite
        library = LibraryFactory.create(modulestore=self.store)
        lib_key = library.location.library_key
        test_block = ItemFactory.create(
            category="vertical",
            parent_location=library.location,
            user_id=self.user.id,
            publish_item=False,
        )
        test_block2 = ItemFactory.create(category="vertical",
                                         parent_location=library.location,
                                         user_id=self.user.id,
                                         publish_item=False)
        # Create a library and blocks that should remain unmolested.
        unchanged_lib = LibraryFactory.create()
        unchanged_key = unchanged_lib.location.library_key
        test_block3 = ItemFactory.create(
            category="vertical",
            parent_location=unchanged_lib.location,
            user_id=self.user.id,
            publish_item=False)
        test_block4 = ItemFactory.create(
            category="vertical",
            parent_location=unchanged_lib.location,
            user_id=self.user.id,
            publish_item=False)
        # Refresh library.
        library = self.store.get_library(lib_key)
        children = [
            self.store.get_item(child).url_name for child in library.children
        ]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block.url_name, children)
        self.assertIn(test_block2.url_name, children)

        unchanged_lib = self.store.get_library(unchanged_key)
        children = [
            self.store.get_item(child).url_name
            for child in unchanged_lib.children
        ]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block3.url_name, children)
        self.assertIn(test_block4.url_name, children)

        extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
        # the extract_dir needs to be passed as a relative dir to
        # import_library_from_xml
        extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)

        try:
            with tarfile.open(
                    path(TEST_DATA_DIR) / 'imports' /
                    'library.HhJfPD.tar.gz') as tar:
                safetar_extractall(tar, extract_dir)
            library_items = import_library_from_xml(
                self.store,
                self.user.id,
                settings.GITHUB_REPO_ROOT, [extract_dir_relative / 'library'],
                load_error_modules=False,
                static_content_store=contentstore(),
                target_id=lib_key)
        finally:
            shutil.rmtree(extract_dir)

        self.assertEqual(lib_key, library_items[0].location.library_key)
        library = self.store.get_library(lib_key)
        children = [
            self.store.get_item(child).url_name for child in library.children
        ]
        self.assertEqual(len(children), 3)
        self.assertNotIn(test_block.url_name, children)
        self.assertNotIn(test_block2.url_name, children)

        unchanged_lib = self.store.get_library(unchanged_key)
        children = [
            self.store.get_item(child).url_name
            for child in unchanged_lib.children
        ]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block3.url_name, children)
        self.assertIn(test_block4.url_name, children)
示例#7
0
def _import_handler(request, courselike_key, root_name, successful_url, context_name, courselike_module, import_func):
    """
    Parameterized function containing the meat of import_handler.
    """
    if not has_course_author_access(request.user, courselike_key):
        raise PermissionDenied()

    if 'application/json' in request.META.get('HTTP_ACCEPT', 'application/json'):
        if request.method == 'GET':
            raise NotImplementedError('coming soon')
        else:
            # Do everything in a try-except block to make sure everything is properly cleaned up.
            try:
                data_root = path(settings.GITHUB_REPO_ROOT)
                subdir = base64.urlsafe_b64encode(repr(courselike_key))
                course_dir = data_root / subdir
                filename = request.FILES['course-data'].name

                # Use sessions to keep info about import progress
                session_status = request.session.setdefault("import_status", {})
                courselike_string = unicode(courselike_key) + filename
                _save_request_status(request, courselike_string, 0)

                # If the course has an entrance exam then remove it and its corresponding milestone.
                # current course state before import.
                if root_name == COURSE_ROOT:
                    if courselike_module.entrance_exam_enabled:
                        remove_entrance_exam_milestone_reference(request, courselike_key)
                        log.info(
                            "entrance exam milestone content reference for course %s has been removed",
                            courselike_module.id
                        )

                if not filename.endswith('.tar.gz'):
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': _('We only support uploading a .tar.gz file.'),
                            'Stage': -1
                        },
                        status=415
                    )

                temp_filepath = course_dir / filename
                if not course_dir.isdir():
                    os.mkdir(course_dir)

                logging.debug('importing course to {0}'.format(temp_filepath))

                # Get upload chunks byte ranges
                try:
                    matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                    content_range = matches.groupdict()
                except KeyError:    # Single chunk
                    # no Content-Range header, so make one that will work
                    content_range = {'start': 0, 'stop': 1, 'end': 2}

                # stream out the uploaded files in chunks to disk
                if int(content_range['start']) == 0:
                    mode = "wb+"
                else:
                    mode = "ab+"
                    size = os.path.getsize(temp_filepath)
                    # Check to make sure we haven't missed a chunk
                    # This shouldn't happen, even if different instances are handling
                    # the same session, but it's always better to catch errors earlier.
                    if size < int(content_range['start']):
                        _save_request_status(request, courselike_string, -1)
                        log.warning(
                            "Reported range %s does not match size downloaded so far %s",
                            content_range['start'],
                            size
                        )
                        return JsonResponse(
                            {
                                'ErrMsg': _('File upload corrupted. Please try again'),
                                'Stage': -1
                            },
                            status=409
                        )
                    # The last request sometimes comes twice. This happens because
                    # nginx sends a 499 error code when the response takes too long.
                    elif size > int(content_range['stop']) and size == int(content_range['end']):
                        return JsonResponse({'ImportStatus': 1})

                with open(temp_filepath, mode) as temp_file:
                    for chunk in request.FILES['course-data'].chunks():
                        temp_file.write(chunk)

                size = os.path.getsize(temp_filepath)

                if int(content_range['stop']) != int(content_range['end']) - 1:
                    # More chunks coming
                    return JsonResponse({
                        "files": [{
                            "name": filename,
                            "size": size,
                            "deleteUrl": "",
                            "deleteType": "",
                            "url": reverse_course_url('import_handler', courselike_key),
                            "thumbnailUrl": ""
                        }]
                    })
            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                _save_request_status(request, courselike_string, -1)
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared", courselike_key)

                log.exception(
                    "error importing course"
                )
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -1
                    },
                    status=400
                )

            # try-finally block for proper clean up after receiving last chunk.
            try:
                # This was the last chunk.
                log.info("Course import %s: Upload complete", courselike_key)
                _save_request_status(request, courselike_string, 1)

                tar_file = tarfile.open(temp_filepath)
                try:
                    safetar_extractall(tar_file, (course_dir + '/').encode('utf-8'))
                except SuspiciousOperation as exc:
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': 'Unsafe tar file. Aborting import.',
                            'SuspiciousFileOperationMsg': exc.args[0],
                            'Stage': -1
                        },
                        status=400
                    )
                finally:
                    tar_file.close()

                log.info("Course import %s: Uploaded file extracted", courselike_key)
                _save_request_status(request, courselike_string, 2)

                # find the 'course.xml' file
                def get_all_files(directory):
                    """
                    For each file in the directory, yield a 2-tuple of (file-name,
                    directory-path)
                    """
                    for dirpath, _dirnames, filenames in os.walk(directory):
                        for filename in filenames:
                            yield (filename, dirpath)

                def get_dir_for_fname(directory, filename):
                    """
                    Returns the dirpath for the first file found in the directory
                    with the given name.  If there is no file in the directory with
                    the specified name, return None.
                    """
                    for fname, dirpath in get_all_files(directory):
                        if fname == filename:
                            return dirpath
                    return None

                dirpath = get_dir_for_fname(course_dir, root_name)
                if not dirpath:
                    _save_request_status(request, courselike_string, -2)
                    return JsonResponse(
                        {
                            'ErrMsg': _('Could not find the {0} file in the package.').format(root_name),
                            'Stage': -2
                        },
                        status=415
                    )

                dirpath = os.path.relpath(dirpath, data_root)
                logging.debug('found %s at %s', root_name, dirpath)

                log.info("Course import %s: Extracted file verified", courselike_key)
                _save_request_status(request, courselike_string, 3)

                with dog_stats_api.timer(
                    'courselike_import.time',
                    tags=[u"courselike:{}".format(courselike_key)]
                ):
                    courselike_items = import_func(
                        modulestore(), request.user.id,
                        settings.GITHUB_REPO_ROOT, [dirpath],
                        load_error_modules=False,
                        static_content_store=contentstore(),
                        target_id=courselike_key
                    )

                new_location = courselike_items[0].location
                logging.debug('new course at %s', new_location)

                log.info("Course import %s: Course import successful", courselike_key)
                _save_request_status(request, courselike_string, 4)

            # Send errors to client with stage at which error occurred.
            except Exception as exception:   # pylint: disable=broad-except
                log.exception(
                    "error importing course"
                )
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -session_status[courselike_string]
                    },
                    status=400
                )

            finally:
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared", courselike_key)
                # set failed stage number with negative sign in case of unsuccessful import
                if session_status[courselike_string] != 4:
                    _save_request_status(request, courselike_string, -abs(session_status[courselike_string]))

                # status == 4 represents that course has been imported successfully.
                if session_status[courselike_string] == 4 and root_name == COURSE_ROOT:
                    # Reload the course so we have the latest state
                    course = modulestore().get_course(courselike_key)
                    if course.entrance_exam_enabled:
                        entrance_exam_chapter = modulestore().get_items(
                            course.id,
                            qualifiers={'category': 'chapter'},
                            settings={'is_entrance_exam': True}
                        )[0]

                        metadata = {'entrance_exam_id': unicode(entrance_exam_chapter.location)}
                        CourseMetadata.update_from_dict(metadata, course, request.user)
                        add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                        log.info("Course %s Entrance exam imported", course.id)

            return JsonResponse({'Status': 'OK'})
    elif request.method == 'GET':  # assume html
        status_url = reverse_course_url(
            "import_status_handler", courselike_key, kwargs={'filename': "fillerName"}
        )
        return render_to_response('import.html', {
            context_name: courselike_module,
            'successful_import_redirect_url': successful_url,
            'import_status_url': status_url,
            'library': isinstance(courselike_key, LibraryLocator)
        })
    else:
        return HttpResponseNotFound()
示例#8
0
def extract_source(source_archive, target):
    """
    Extract the archive into the given target directory.
    """
    with tarfile.open(source_archive) as tar_file:
        safetar_extractall(tar_file, target)
    def handle(self, *args, **options):
        """
        Given a content library archive path, import the corresponding course to mongo.
        """

        archive_path = options['archive_path']
        username = options['owner_username']

        data_root = Path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(os.path.basename(archive_path))
        course_dir = data_root / subdir

        # Extract library archive
        tar_file = tarfile.open(archive_path)
        try:
            safetar_extractall(tar_file, course_dir.encode('utf-8'))
        except SuspiciousOperation as exc:
            raise CommandError(
                u'\n=== Course import {0}: Unsafe tar file - {1}\n'.format(
                    archive_path, exc.args[0]))
        finally:
            tar_file.close()

        # Paths to the library.xml file
        abs_xml_path = os.path.join(course_dir, 'library')
        rel_xml_path = os.path.relpath(abs_xml_path, data_root)

        # Gather library metadata from XML file
        xml_root = etree.parse(abs_xml_path / 'library.xml').getroot()
        if xml_root.tag != 'library':
            raise CommandError(
                u'Failed to import {0}: Not a library archive'.format(
                    archive_path))

        metadata = xml_root.attrib
        org = metadata['org']
        library = metadata['library']
        display_name = metadata['display_name']

        # Fetch user and library key
        user = User.objects.get(username=username)
        courselike_key, created = _get_or_create_library(
            org, library, display_name, user)

        # Check if data would be overwritten
        ans = ''
        while not created and ans not in ['y', 'yes', 'n', 'no']:
            inp = raw_input(
                u'Library "{0}" already exists, overwrite it? [y/n] '.format(
                    courselike_key))
            ans = inp.lower()
        if ans.startswith('n'):
            print(u'Aborting import of "{0}"'.format(courselike_key))
            return

        # At last, import the library
        try:
            import_library_from_xml(modulestore(),
                                    user.id,
                                    settings.GITHUB_REPO_ROOT, [rel_xml_path],
                                    load_error_modules=False,
                                    static_content_store=contentstore(),
                                    target_id=courselike_key)
        except Exception:
            print(u'\n=== Failed to import library-v1:{0}+{1}'.format(
                org, library))
            raise

        print(u'Library "{0}" imported to "{1}"'.format(
            archive_path, courselike_key))
示例#10
0
def _import_handler(request, courselike_key, root_name, successful_url,
                    context_name, courselike_module, import_func):
    """
    Parameterized function containing the meat of import_handler.
    """
    if not has_course_author_access(request.user, courselike_key):
        raise PermissionDenied()

    if 'application/json' in request.META.get('HTTP_ACCEPT',
                                              'application/json'):
        if request.method == 'GET':
            raise NotImplementedError('coming soon')
        else:
            # Do everything in a try-except block to make sure everything is properly cleaned up.
            try:
                data_root = path(settings.GITHUB_REPO_ROOT)
                subdir = base64.urlsafe_b64encode(repr(courselike_key))
                course_dir = data_root / subdir
                filename = request.FILES['course-data'].name

                # Use sessions to keep info about import progress
                session_status = request.session.setdefault(
                    "import_status", {})
                courselike_string = unicode(courselike_key) + filename
                _save_request_status(request, courselike_string, 0)

                # If the course has an entrance exam then remove it and its corresponding milestone.
                # current course state before import.
                if root_name == COURSE_ROOT:
                    if courselike_module.entrance_exam_enabled:
                        remove_entrance_exam_milestone_reference(
                            request, courselike_key)
                        log.info(
                            "entrance exam milestone content reference for course %s has been removed",
                            courselike_module.id)

                if not filename.endswith('.tar.gz'):
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg':
                            _('We only support uploading a .tar.gz file.'),
                            'Stage':
                            -1
                        },
                        status=415)

                temp_filepath = course_dir / filename
                if not course_dir.isdir():
                    os.mkdir(course_dir)

                logging.debug('importing course to {0}'.format(temp_filepath))

                # Get upload chunks byte ranges
                try:
                    matches = CONTENT_RE.search(
                        request.META["HTTP_CONTENT_RANGE"])
                    content_range = matches.groupdict()
                except KeyError:  # Single chunk
                    # no Content-Range header, so make one that will work
                    content_range = {'start': 0, 'stop': 1, 'end': 2}

                # stream out the uploaded files in chunks to disk
                if int(content_range['start']) == 0:
                    mode = "wb+"
                else:
                    mode = "ab+"
                    size = os.path.getsize(temp_filepath)
                    # Check to make sure we haven't missed a chunk
                    # This shouldn't happen, even if different instances are handling
                    # the same session, but it's always better to catch errors earlier.
                    if size < int(content_range['start']):
                        _save_request_status(request, courselike_string, -1)
                        log.warning(
                            "Reported range %s does not match size downloaded so far %s",
                            content_range['start'], size)
                        return JsonResponse(
                            {
                                'ErrMsg':
                                _('File upload corrupted. Please try again'),
                                'Stage':
                                -1
                            },
                            status=409)
                    # The last request sometimes comes twice. This happens because
                    # nginx sends a 499 error code when the response takes too long.
                    elif size > int(content_range['stop']) and size == int(
                            content_range['end']):
                        return JsonResponse({'ImportStatus': 1})

                with open(temp_filepath, mode) as temp_file:
                    for chunk in request.FILES['course-data'].chunks():
                        temp_file.write(chunk)

                size = os.path.getsize(temp_filepath)

                if int(content_range['stop']) != int(content_range['end']) - 1:
                    # More chunks coming
                    return JsonResponse({
                        "files": [{
                            "name":
                            filename,
                            "size":
                            size,
                            "deleteUrl":
                            "",
                            "deleteType":
                            "",
                            "url":
                            reverse_course_url('import_handler',
                                               courselike_key),
                            "thumbnailUrl":
                            ""
                        }]
                    })
            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                _save_request_status(request, courselike_string, -1)
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared",
                             courselike_key)

                log.exception("error importing course")
                return JsonResponse({
                    'ErrMsg': str(exception),
                    'Stage': -1
                },
                                    status=400)

            # try-finally block for proper clean up after receiving last chunk.
            try:
                # This was the last chunk.
                log.info("Course import %s: Upload complete", courselike_key)
                _save_request_status(request, courselike_string, 1)

                tar_file = tarfile.open(temp_filepath)
                try:
                    safetar_extractall(tar_file,
                                       (course_dir + '/').encode('utf-8'))
                except SuspiciousOperation as exc:
                    _save_request_status(request, courselike_string, -1)
                    return JsonResponse(
                        {
                            'ErrMsg': 'Unsafe tar file. Aborting import.',
                            'SuspiciousFileOperationMsg': exc.args[0],
                            'Stage': -1
                        },
                        status=400)
                finally:
                    tar_file.close()

                log.info("Course import %s: Uploaded file extracted",
                         courselike_key)
                _save_request_status(request, courselike_string, 2)

                # find the 'course.xml' file
                def get_all_files(directory):
                    """
                    For each file in the directory, yield a 2-tuple of (file-name,
                    directory-path)
                    """
                    for dirpath, _dirnames, filenames in os.walk(directory):
                        for filename in filenames:
                            yield (filename, dirpath)

                def get_dir_for_fname(directory, filename):
                    """
                    Returns the dirpath for the first file found in the directory
                    with the given name.  If there is no file in the directory with
                    the specified name, return None.
                    """
                    for fname, dirpath in get_all_files(directory):
                        if fname == filename:
                            return dirpath
                    return None

                dirpath = get_dir_for_fname(course_dir, root_name)
                if not dirpath:
                    _save_request_status(request, courselike_string, -2)
                    return JsonResponse(
                        {
                            'ErrMsg':
                            _('Could not find the {0} file in the package.').
                            format(root_name),
                            'Stage':
                            -2
                        },
                        status=415)

                dirpath = os.path.relpath(dirpath, data_root)
                logging.debug('found %s at %s', root_name, dirpath)

                log.info("Course import %s: Extracted file verified",
                         courselike_key)
                _save_request_status(request, courselike_string, 3)

                with dog_stats_api.timer(
                        'courselike_import.time',
                        tags=[u"courselike:{}".format(courselike_key)]):
                    courselike_items = import_func(
                        modulestore(),
                        request.user.id,
                        settings.GITHUB_REPO_ROOT, [dirpath],
                        load_error_modules=False,
                        static_content_store=contentstore(),
                        target_id=courselike_key)

                new_location = courselike_items[0].location
                logging.debug('new course at %s', new_location)

                log.info("Course import %s: Course import successful",
                         courselike_key)
                _save_request_status(request, courselike_string, 4)

            # Send errors to client with stage at which error occurred.
            except Exception as exception:  # pylint: disable=broad-except
                log.exception("error importing course")
                return JsonResponse(
                    {
                        'ErrMsg': str(exception),
                        'Stage': -session_status[courselike_string]
                    },
                    status=400)

            finally:
                if course_dir.isdir():
                    shutil.rmtree(course_dir)
                    log.info("Course import %s: Temp data cleared",
                             courselike_key)
                # set failed stage number with negative sign in case of unsuccessful import
                if session_status[courselike_string] != 4:
                    _save_request_status(
                        request, courselike_string,
                        -abs(session_status[courselike_string]))

                # status == 4 represents that course has been imported successfully.
                if session_status[
                        courselike_string] == 4 and root_name == COURSE_ROOT:
                    # Reload the course so we have the latest state
                    course = modulestore().get_course(courselike_key)
                    if course.entrance_exam_enabled:
                        entrance_exam_chapter = modulestore().get_items(
                            course.id,
                            qualifiers={'category': 'chapter'},
                            settings={'is_entrance_exam': True})[0]

                        metadata = {
                            'entrance_exam_id':
                            unicode(entrance_exam_chapter.location)
                        }
                        CourseMetadata.update_from_dict(
                            metadata, course, request.user)
                        add_entrance_exam_milestone(course.id,
                                                    entrance_exam_chapter)
                        log.info("Course %s Entrance exam imported", course.id)

            return JsonResponse({'Status': 'OK'})
    elif request.method == 'GET':  # assume html
        status_url = reverse_course_url("import_status_handler",
                                        courselike_key,
                                        kwargs={'filename': "fillerName"})
        return render_to_response(
            'import.html', {
                context_name: courselike_module,
                'successful_import_redirect_url': successful_url,
                'import_status_url': status_url,
                'library': isinstance(courselike_key, LibraryLocator)
            })
    else:
        return HttpResponseNotFound()
示例#11
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name,
               language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    current_step = 'Unpacking'
    courselike_key = CourseKey.from_string(course_key_string)
    set_code_owner_attribute_from_module(__name__)
    set_custom_attributes_for_course_key(courselike_key)
    log_prefix = f'Course import {courselike_key}'
    self.status.set_state(current_step)

    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(
        repr(courselike_key).encode('utf-8')).decode('utf-8')
    course_dir = data_root / subdir

    def validate_user():
        """Validate if the user exists otherwise log error. """
        try:
            return User.objects.get(pk=user_id)
        except User.DoesNotExist as exc:
            with translation_language(language):
                self.status.fail(UserErrors.USER_PERMISSION_DENIED)
            LOGGER.error(f'{log_prefix}: Unknown User: {user_id}')
            monitor_import_failure(courselike_key, current_step, exception=exc)
            return

    def user_has_access(user):
        """Return True if user has studio write access to the given course."""
        has_access = has_course_author_access(user, courselike_key)
        if not has_access:
            message = f'User permission denied: {user.username}'
            with translation_language(language):
                self.status.fail(UserErrors.COURSE_PERMISSION_DENIED)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return has_access

    def file_is_supported():
        """Check if it is a supported file."""
        file_is_valid = archive_name.endswith('.tar.gz')

        if not file_is_valid:
            message = f'Unsupported file {archive_name}'
            with translation_language(language):
                self.status.fail(UserErrors.INVALID_FILE_TYPE)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return file_is_valid

    def file_exists_in_storage():
        """Verify archive path exists in storage."""
        archive_path_exists = course_import_export_storage.exists(archive_path)

        if not archive_path_exists:
            message = f'Uploaded file {archive_path} not found'
            with translation_language(language):
                self.status.fail(UserErrors.FILE_NOT_FOUND)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
        return archive_path_exists

    def verify_root_name_exists(course_dir, root_name):
        """Verify root xml file exists."""
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            message = UserErrors.FILE_MISSING.format(root_name)
            with translation_language(language):
                self.status.fail(message)
            LOGGER.error(f'{log_prefix}: {message}')
            monitor_import_failure(courselike_key,
                                   current_step,
                                   message=message)
            return
        return dirpath

    user = validate_user()
    if not user:
        return

    if not user_has_access(user):
        return

    if not file_is_supported():
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    try:
        LOGGER.info(f'{log_prefix}: unpacking step started')

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():
            os.mkdir(course_dir)

        LOGGER.info(f'{log_prefix}: importing course to {temp_filepath}')

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not file_exists_in_storage():
            return

        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:

                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)

                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)

        LOGGER.info(f'{log_prefix}: Download from storage complete')
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get('/')
                fake_request.user = user
                from .views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(
                    fake_request, courselike_key)
                LOGGER.info(
                    f'{log_prefix}: entrance exam milestone content reference has been removed'
                )
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(f'{log_prefix}: Temp data cleared')

        self.status.fail(UserErrors.UNKNOWN_ERROR_IN_UNPACKING)
        LOGGER.exception(f'{log_prefix}: Unknown error while unpacking',
                         exc_info=True)
        monitor_import_failure(courselike_key,
                               current_step,
                               exception=exception)
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + '/'))
        except SuspiciousOperation as exc:
            with translation_language(language):
                self.status.fail(UserErrors.UNSAFE_TAR_FILE)
            LOGGER.error(f'{log_prefix}: Unsafe tar file')
            monitor_import_failure(courselike_key, current_step, exception=exc)
            return
        finally:
            tar_file.close()

        current_step = 'Verifying'
        self.status.set_state(current_step)
        self.status.increment_completed_steps()
        LOGGER.info(
            f'{log_prefix}: Uploaded file extracted. Verification step started'
        )

        dirpath = verify_root_name_exists(course_dir, root_name)
        if not dirpath:
            return

        if not validate_course_olx(courselike_key, dirpath, self.status):
            return

        dirpath = os.path.relpath(dirpath, data_root)

        current_step = 'Updating'
        self.status.set_state(current_step)
        self.status.increment_completed_steps()
        LOGGER.info(
            f'{log_prefix}: Extracted file verified. Updating course started')

        courselike_items = import_func(
            modulestore(),
            user.id,
            settings.GITHUB_REPO_ROOT,
            [dirpath],
            load_error_modules=False,
            static_content_store=contentstore(),
            target_id=courselike_key,
            verbose=True,
        )

        new_location = courselike_items[0].location
        LOGGER.debug('new course at %s', new_location)

        LOGGER.info(f'{log_prefix}: Course import successful')
        set_custom_attribute('course_import_completed', True)
    except (CourseImportException, InvalidProctoringProvider,
            DuplicateCourseError) as known_exe:
        handle_course_import_exception(courselike_key, known_exe, self.status)
    except Exception as exception:  # pylint: disable=broad-except
        handle_course_import_exception(courselike_key,
                                       exception,
                                       self.status,
                                       known=False)
    finally:
        if course_dir.isdir():
            shutil.rmtree(course_dir)
            LOGGER.info(f'{log_prefix}: Temp data cleared')

        if self.status.state == 'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={'category': 'chapter'},
                    settings={'is_entrance_exam': True})[0]

                metadata = {
                    'entrance_exam_id': str(entrance_exam_chapter.location)
                }
                CourseMetadata.update_from_dict(metadata, course, user)
                from .views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(
                    f'Course import {course.id}: Entrance exam imported')
示例#12
0
def import_olx(self, user_id, course_key_string, archive_path, archive_name, language):
    """
    Import a course or library from a provided OLX .tar.gz archive.
    """
    courselike_key = CourseKey.from_string(course_key_string)
    try:
        user = User.objects.get(pk=user_id)
    except User.DoesNotExist:
        with respect_language(language):
            self.status.fail(_(u'Unknown User ID: {0}').format(user_id))
        return
    if not has_course_author_access(user, courselike_key):
        with respect_language(language):
            self.status.fail(_(u'Permission denied'))
        return

    is_library = isinstance(courselike_key, LibraryLocator)
    is_course = not is_library
    if is_library:
        root_name = LIBRARY_ROOT
        courselike_module = modulestore().get_library(courselike_key)
        import_func = import_library_from_xml
    else:
        root_name = COURSE_ROOT
        courselike_module = modulestore().get_course(courselike_key)
        import_func = import_course_from_xml

    # Locate the uploaded OLX archive (and download it from S3 if necessary)
    # Do everything in a try-except block to make sure everything is properly cleaned up.
    data_root = path(settings.GITHUB_REPO_ROOT)
    subdir = base64.urlsafe_b64encode(repr(courselike_key))
    course_dir = data_root / subdir
    try:
        self.status.set_state(u'Unpacking')

        if not archive_name.endswith(u'.tar.gz'):
            with respect_language(language):
                self.status.fail(_(u'We only support uploading a .tar.gz file.'))
                return

        temp_filepath = course_dir / get_valid_filename(archive_name)
        if not course_dir.isdir():  # pylint: disable=no-value-for-parameter
            os.mkdir(course_dir)

        LOGGER.debug(u'importing course to {0}'.format(temp_filepath))

        # Copy the OLX archive from where it was uploaded to (S3, Swift, file system, etc.)
        if not course_import_export_storage.exists(archive_path):
            LOGGER.info(u'Course import %s: Uploaded file %s not found', courselike_key, archive_path)
            with respect_language(language):
                self.status.fail(_(u'Tar file not found'))
            return
        with course_import_export_storage.open(archive_path, 'rb') as source:
            with open(temp_filepath, 'wb') as destination:
                def read_chunk():
                    """
                    Read and return a sequence of bytes from the source file.
                    """
                    return source.read(FILE_READ_CHUNK)
                for chunk in iter(read_chunk, b''):
                    destination.write(chunk)
        LOGGER.info(u'Course import %s: Download from storage complete', courselike_key)
        # Delete from source location
        course_import_export_storage.delete(archive_path)

        # If the course has an entrance exam then remove it and its corresponding milestone.
        # current course state before import.
        if is_course:
            if courselike_module.entrance_exam_enabled:
                fake_request = RequestFactory().get(u'/')
                fake_request.user = user
                from contentstore.views.entrance_exam import remove_entrance_exam_milestone_reference
                # TODO: Is this really ok?  Seems dangerous for a live course
                remove_entrance_exam_milestone_reference(fake_request, courselike_key)
                LOGGER.info(
                    u'entrance exam milestone content reference for course %s has been removed',
                    courselike_module.id
                )
    # Send errors to client with stage at which error occurred.
    except Exception as exception:  # pylint: disable=broad-except
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        LOGGER.exception(u'Error importing course %s', courselike_key, exc_info=True)
        self.status.fail(text_type(exception))
        return

    # try-finally block for proper clean up after receiving file.
    try:
        tar_file = tarfile.open(temp_filepath)
        try:
            safetar_extractall(tar_file, (course_dir + u'/').encode(u'utf-8'))
        except SuspiciousOperation as exc:
            LOGGER.info(u'Course import %s: Unsafe tar file - %s', courselike_key, exc.args[0])
            with respect_language(language):
                self.status.fail(_(u'Unsafe tar file. Aborting import.'))
            return
        finally:
            tar_file.close()

        LOGGER.info(u'Course import %s: Uploaded file extracted', courselike_key)
        self.status.set_state(u'Verifying')
        self.status.increment_completed_steps()

        # find the 'course.xml' file
        def get_all_files(directory):
            """
            For each file in the directory, yield a 2-tuple of (file-name,
            directory-path)
            """
            for directory_path, _dirnames, filenames in os.walk(directory):
                for filename in filenames:
                    yield (filename, directory_path)

        def get_dir_for_filename(directory, filename):
            """
            Returns the directory path for the first file found in the directory
            with the given name.  If there is no file in the directory with
            the specified name, return None.
            """
            for name, directory_path in get_all_files(directory):
                if name == filename:
                    return directory_path
            return None

        dirpath = get_dir_for_filename(course_dir, root_name)
        if not dirpath:
            with respect_language(language):
                self.status.fail(_(u'Could not find the {0} file in the package.').format(root_name))
                return

        dirpath = os.path.relpath(dirpath, data_root)
        LOGGER.debug(u'found %s at %s', root_name, dirpath)

        LOGGER.info(u'Course import %s: Extracted file verified', courselike_key)
        self.status.set_state(u'Updating')
        self.status.increment_completed_steps()

        with dog_stats_api.timer(
            u'courselike_import.time',
            tags=[u"courselike:{}".format(courselike_key)]
        ):
            courselike_items = import_func(
                modulestore(), user.id,
                settings.GITHUB_REPO_ROOT, [dirpath],
                load_error_modules=False,
                static_content_store=contentstore(),
                target_id=courselike_key
            )

        new_location = courselike_items[0].location
        LOGGER.debug(u'new course at %s', new_location)

        LOGGER.info(u'Course import %s: Course import successful', courselike_key)
    except Exception as exception:   # pylint: disable=broad-except
        LOGGER.exception(u'error importing course', exc_info=True)
        self.status.fail(text_type(exception))
    finally:
        if course_dir.isdir():  # pylint: disable=no-value-for-parameter
            shutil.rmtree(course_dir)
            LOGGER.info(u'Course import %s: Temp data cleared', courselike_key)

        if self.status.state == u'Updating' and is_course:
            # Reload the course so we have the latest state
            course = modulestore().get_course(courselike_key)
            if course.entrance_exam_enabled:
                entrance_exam_chapter = modulestore().get_items(
                    course.id,
                    qualifiers={u'category': u'chapter'},
                    settings={u'is_entrance_exam': True}
                )[0]

                metadata = {u'entrance_exam_id': text_type(entrance_exam_chapter.location)}
                CourseMetadata.update_from_dict(metadata, course, user)
                from contentstore.views.entrance_exam import add_entrance_exam_milestone
                add_entrance_exam_milestone(course.id, entrance_exam_chapter)
                LOGGER.info(u'Course %s Entrance exam imported', course.id)
    def test_library_import(self):
        """
        Try importing a known good library archive, and verify that the
        contents of the library have completely replaced the old contents.
        """
        # Create some blocks to overwrite
        library = LibraryFactory.create(modulestore=self.store)
        lib_key = library.location.library_key
        test_block = ItemFactory.create(
            category="vertical",
            parent_location=library.location,
            user_id=self.user.id,
            publish_item=False,
        )
        test_block2 = ItemFactory.create(
            category="vertical",
            parent_location=library.location,
            user_id=self.user.id,
            publish_item=False
        )
        # Create a library and blocks that should remain unmolested.
        unchanged_lib = LibraryFactory.create()
        unchanged_key = unchanged_lib.location.library_key
        test_block3 = ItemFactory.create(
            category="vertical",
            parent_location=unchanged_lib.location,
            user_id=self.user.id,
            publish_item=False
        )
        test_block4 = ItemFactory.create(
            category="vertical",
            parent_location=unchanged_lib.location,
            user_id=self.user.id,
            publish_item=False
        )
        # Refresh library.
        library = self.store.get_library(lib_key)
        children = [self.store.get_item(child).url_name for child in library.children]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block.url_name, children)
        self.assertIn(test_block2.url_name, children)

        unchanged_lib = self.store.get_library(unchanged_key)
        children = [self.store.get_item(child).url_name for child in unchanged_lib.children]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block3.url_name, children)
        self.assertIn(test_block4.url_name, children)

        extract_dir = path(tempfile.mkdtemp(dir=settings.DATA_DIR))
        # the extract_dir needs to be passed as a relative dir to
        # import_library_from_xml
        extract_dir_relative = path.relpath(extract_dir, settings.DATA_DIR)

        try:
            with tarfile.open(path(TEST_DATA_DIR) / 'imports' / 'library.HhJfPD.tar.gz') as tar:
                safetar_extractall(tar, extract_dir)
            library_items = import_library_from_xml(
                self.store,
                self.user.id,
                settings.GITHUB_REPO_ROOT,
                [extract_dir_relative / 'library'],
                load_error_modules=False,
                static_content_store=contentstore(),
                target_id=lib_key
            )
        finally:
            shutil.rmtree(extract_dir)

        self.assertEqual(lib_key, library_items[0].location.library_key)
        library = self.store.get_library(lib_key)
        children = [self.store.get_item(child).url_name for child in library.children]
        self.assertEqual(len(children), 3)
        self.assertNotIn(test_block.url_name, children)
        self.assertNotIn(test_block2.url_name, children)

        unchanged_lib = self.store.get_library(unchanged_key)
        children = [self.store.get_item(child).url_name for child in unchanged_lib.children]
        self.assertEqual(len(children), 2)
        self.assertIn(test_block3.url_name, children)
        self.assertIn(test_block4.url_name, children)
示例#14
0
    def post(self, request, course_key_string):
        """
        The restful handler for importing a course.

        GET
            json: return json import status
        POST or PUT
            json: import a course via the .tar.gz file specified inrequest.FILES
        """
        courselike_key = CourseKey.from_string(course_key_string)
        library = isinstance(courselike_key, LibraryLocator)

        if library:
            root_name = LIBRARY_ROOT
            import_func = import_library_from_xml
        else:
            root_name = COURSE_ROOT
            import_func = import_course_from_xml

        filename = request.FILES['course-data'].name
        courselike_string = unicode(courselike_key) + filename
        data_root = path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(repr(courselike_key))
        course_dir = data_root / subdir

        status_key = "import_export.import.status:{}|{}".format(
            request.user.username, courselike_string)

        # Do everything in a try-except block to make sure everything is
        # properly cleaned up.
        try:
            # Cache the import progress
            self._save_request_status(request, courselike_string, 0)
            if not filename.endswith('.tar.gz'):
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message':
                        _('We only support uploading a .tar.gz file.'),
                        'stage':
                        -1
                    },
                    status=415)

            temp_filepath = course_dir / filename

            # Only handle exceptions caused by the directory already existing,
            # to avoid a potential race condition caused by the "check and go"
            # method.
            try:
                os.makedirs(course_dir)
            except OSError as exc:
                if exc.errno != exc.EEXIST:
                    raise

            logging.debug('importing course to %s', temp_filepath)

            # Get upload chunks byte ranges
            try:
                matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                content_range = matches.groupdict()
            except KeyError:  # Single chunk
                # no Content-Range header, so make one that will work
                content_range = {'start': 0, 'stop': 1, 'end': 2}

            # stream out the uploaded files in chunks to disk
            if int(content_range['start']) == 0:
                mode = "wb+"
            else:
                mode = "ab+"
                size = os.path.getsize(temp_filepath)
                # Check to make sure we haven't missed a chunk
                # This shouldn't happen, even if different instances are
                # handling the same session, but it's always better to catch
                # errors earlier.
                if size < int(content_range['start']):
                    self._save_request_status(request, courselike_string, -1)
                    log.warning(
                        "Reported range %s does not match size downloaded so "
                        "far %s", content_range['start'], size)
                    return JsonResponse(
                        {
                            'error_message':
                            _('File upload corrupted. Please try again'),
                            'stage':
                            -1
                        },
                        status=409)
                # The last request sometimes comes twice. This happens because
                # nginx sends a 499 error code when the response takes too long.
                elif size > int(content_range['stop']) \
                        and size == int(content_range['end']):
                    return JsonResponse({'ImportStatus': 1})

            with open(temp_filepath, mode) as temp_file:
                for chunk in request.FILES['course-data'].chunks():
                    temp_file.write(chunk)

            size = os.path.getsize(temp_filepath)

            if int(content_range['stop']) != int(content_range['end']) - 1:
                # More chunks coming
                return JsonResponse({
                    "files": [{
                        "name": filename,
                        "size": size,
                        "delete_url": "",
                        "delete_type": "",
                        "thumbnail_url": ""
                    }]
                })
        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            self._save_request_status(request, courselike_string, -1)
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info("Course import %s: Temp data cleared", courselike_key)

            log.exception("error importing course")
            return JsonResponse({
                'error_message': str(exception),
                'stage': -1
            },
                                status=400)

        # try-finally block for proper clean up after receiving last chunk.
        try:
            # This was the last chunk.
            log.info("Course import %s: Upload complete", courselike_key)
            self._save_request_status(request, courselike_string, 1)

            tar_file = tarfile.open(temp_filepath)
            try:
                safetar_extractall(tar_file,
                                   (course_dir + '/').encode('utf-8'))
            except SuspiciousOperation as exc:
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': 'Unsafe tar file. Aborting import.',
                        'suspicious_operation_message': exc.args[0],
                        'stage': -1
                    },
                    status=400)
            finally:
                tar_file.close()

            log.info("Course import %s: Uploaded file extracted",
                     courselike_key)
            self._save_request_status(request, courselike_string, 2)

            # find the 'course.xml' file
            def get_all_files(directory):
                """
                For each file in the directory, yield a 2-tuple of (file-name,
                directory-path)
                """
                for dirpath, _dirnames, filenames in os.walk(directory):
                    for filename in filenames:
                        yield (filename, dirpath)

            def get_dir_for_fname(directory, filename):
                """
                Returns the dirpath for the first file found in the directory
                with the given name.  If there is no file in the directory with
                the specified name, return None.
                """
                for fname, dirpath in get_all_files(directory):
                    if fname == filename:
                        return dirpath
                return None

            dirpath = get_dir_for_fname(course_dir, root_name)
            if not dirpath:
                self._save_request_status(request, courselike_string, -2)
                return JsonResponse(
                    {
                        'error_message':
                        _('Could not find the {root_xml_file} file in the package.'
                          ).format(root_xml_file=root_name),
                        'stage':
                        -2
                    },
                    status=415)

            dirpath = os.path.relpath(dirpath, data_root)
            logging.debug('found %s at %s', root_name, dirpath)

            log.info("Course import %s: Extracted file verified",
                     courselike_key)
            self._save_request_status(request, courselike_string, 3)

            with dog_stats_api.timer(
                    'courselike_import.time',
                    tags=[u"courselike:{}".format(courselike_key)]):
                courselike_items = import_func(
                    modulestore(),
                    request.user.id,
                    settings.GITHUB_REPO_ROOT,
                    [dirpath],
                    load_error_modules=False,
                    static_content_store=contentstore(),
                    target_id=courselike_key,
                )

            new_location = courselike_items[0].location
            logging.debug('new course at %s', new_location)

            log.info("Course import %s: Course import successful",
                     courselike_key)
            self._save_request_status(request, courselike_string, 4)

        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            log.exception("error importing course")
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -cache.get(status_key)
                },
                status=400)

        finally:
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared",
                    courselike_key  # pylint: disable=no-value-for-parameter
                )
            # set failed stage number with negative sign in case of an
            # unsuccessful import
            if cache.get(status_key) != 4:
                self._save_request_status(request, courselike_string,
                                          -abs(cache.get(status_key)))

        return JsonResponse({'status': 'OK'})
示例#15
0
    def post(self, request, course_key_string):
        """
        The restful handler for importing a course.

        GET
            json: return json import status
        POST or PUT
            json: import a course via the .tar.gz file specified inrequest.FILES
        """
        courselike_key = CourseKey.from_string(course_key_string)
        library = isinstance(courselike_key, LibraryLocator)

        if library:
            root_name = LIBRARY_ROOT
            import_func = import_library_from_xml
        else:
            root_name = COURSE_ROOT
            import_func = import_course_from_xml

        filename = request.FILES['course-data'].name
        courselike_string = unicode(courselike_key) + filename
        data_root = path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(repr(courselike_key))
        course_dir = data_root / subdir

        status_key = "import_export.import.status:{}|{}".format(
            request.user.username,
            courselike_string
        )

        # Do everything in a try-except block to make sure everything is
        # properly cleaned up.
        try:
            # Cache the import progress
            self._save_request_status(request, courselike_string, 0)
            if not filename.endswith('.tar.gz'):
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': _(
                            'We only support uploading a .tar.gz file.'
                        ),
                        'stage': -1
                    },
                    status=415
                )

            temp_filepath = course_dir / filename

            # Only handle exceptions caused by the directory already existing,
            # to avoid a potential race condition caused by the "check and go"
            # method.
            try:
                os.makedirs(course_dir)
            except OSError as exc:
                if exc.errno != exc.EEXIST:
                    raise

            logging.debug('importing course to %s', temp_filepath)

            # Get upload chunks byte ranges
            try:
                matches = CONTENT_RE.search(request.META["HTTP_CONTENT_RANGE"])
                content_range = matches.groupdict()
            except KeyError:    # Single chunk
                # no Content-Range header, so make one that will work
                content_range = {'start': 0, 'stop': 1, 'end': 2}

            # stream out the uploaded files in chunks to disk
            if int(content_range['start']) == 0:
                mode = "wb+"
            else:
                mode = "ab+"
                size = os.path.getsize(temp_filepath)
                # Check to make sure we haven't missed a chunk
                # This shouldn't happen, even if different instances are
                # handling the same session, but it's always better to catch
                # errors earlier.
                if size < int(content_range['start']):
                    self._save_request_status(request, courselike_string, -1)
                    log.warning(
                        "Reported range %s does not match size downloaded so "
                        "far %s",
                        content_range['start'],
                        size
                    )
                    return JsonResponse(
                        {
                            'error_message': _(
                                'File upload corrupted. Please try again'
                            ),
                            'stage': -1
                        },
                        status=409
                    )
                # The last request sometimes comes twice. This happens because
                # nginx sends a 499 error code when the response takes too long.
                elif size > int(content_range['stop']) \
                        and size == int(content_range['end']):
                    return JsonResponse({'ImportStatus': 1})

            with open(temp_filepath, mode) as temp_file:
                for chunk in request.FILES['course-data'].chunks():
                    temp_file.write(chunk)

            size = os.path.getsize(temp_filepath)

            if int(content_range['stop']) != int(content_range['end']) - 1:
                # More chunks coming
                return JsonResponse({
                    "files": [{
                        "name": filename,
                        "size": size,
                        "delete_url": "",
                        "delete_type": "",
                        "thumbnail_url": ""
                    }]
                })
        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            self._save_request_status(request, courselike_string, -1)
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared", courselike_key
                )

            log.exception("error importing course")
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -1
                },
                status=400
            )

        # try-finally block for proper clean up after receiving last chunk.
        try:
            # This was the last chunk.
            log.info("Course import %s: Upload complete", courselike_key)
            self._save_request_status(request, courselike_string, 1)

            tar_file = tarfile.open(temp_filepath)
            try:
                safetar_extractall(
                    tar_file,
                    (course_dir + '/').encode('utf-8'))
            except SuspiciousOperation as exc:
                self._save_request_status(request, courselike_string, -1)
                return JsonResponse(
                    {
                        'error_message': 'Unsafe tar file. Aborting import.',
                        'suspicious_operation_message': exc.args[0],
                        'stage': -1
                    },
                    status=400
                )
            finally:
                tar_file.close()

            log.info(
                "Course import %s: Uploaded file extracted", courselike_key
            )
            self._save_request_status(request, courselike_string, 2)

            # find the 'course.xml' file
            def get_all_files(directory):
                """
                For each file in the directory, yield a 2-tuple of (file-name,
                directory-path)
                """
                for dirpath, _dirnames, filenames in os.walk(directory):
                    for filename in filenames:
                        yield (filename, dirpath)

            def get_dir_for_fname(directory, filename):
                """
                Returns the dirpath for the first file found in the directory
                with the given name.  If there is no file in the directory with
                the specified name, return None.
                """
                for fname, dirpath in get_all_files(directory):
                    if fname == filename:
                        return dirpath
                return None

            dirpath = get_dir_for_fname(course_dir, root_name)
            if not dirpath:
                self._save_request_status(request, courselike_string, -2)
                return JsonResponse(
                    {

                        'error_message': _(
                            'Could not find the {root_xml_file} file in the package.'
                        ).format(root_xml_file=root_name),
                        'stage': -2
                    },
                    status=415
                )

            dirpath = os.path.relpath(dirpath, data_root)
            logging.debug('found %s at %s', root_name, dirpath)

            log.info(
                "Course import %s: Extracted file verified",
                courselike_key
            )
            self._save_request_status(request, courselike_string, 3)

            with dog_stats_api.timer(
                'courselike_import.time',
                tags=[u"courselike:{}".format(courselike_key)]
            ):
                courselike_items = import_func(
                    modulestore(),
                    request.user.id,
                    settings.GITHUB_REPO_ROOT,
                    [dirpath],
                    load_error_modules=False,
                    static_content_store=contentstore(),
                    target_id=courselike_key,
                )

            new_location = courselike_items[0].location
            logging.debug('new course at %s', new_location)

            log.info(
                "Course import %s: Course import successful", courselike_key
            )
            self._save_request_status(request, courselike_string, 4)

        # Send errors to client with stage at which error occurred.
        except Exception as exception:  # pylint: disable=broad-except
            log.exception(
                "error importing course"
            )
            return JsonResponse(
                {
                    'error_message': str(exception),
                    'stage': -cache.get(status_key)
                },
                status=400
            )

        finally:
            if course_dir.isdir():  # pylint: disable=no-value-for-parameter
                shutil.rmtree(course_dir)
                log.info(
                    "Course import %s: Temp data cleared", courselike_key  # pylint: disable=no-value-for-parameter
                )
            # set failed stage number with negative sign in case of an
            # unsuccessful import
            if cache.get(status_key) != 4:
                self._save_request_status(
                    request,
                    courselike_string,
                    -abs(cache.get(status_key))
                )

        return JsonResponse({'status': 'OK'})
    def handle(self, *args, **options):
        """
        Given a content library archive path, import the corresponding course to mongo.
        """

        archive_path = options['archive_path']
        username = options['owner_username']

        data_root = Path(settings.GITHUB_REPO_ROOT)
        subdir = base64.urlsafe_b64encode(os.path.basename(archive_path))
        course_dir = data_root / subdir

        # Extract library archive
        tar_file = tarfile.open(archive_path)
        try:
            safetar_extractall(tar_file, course_dir.encode('utf-8'))
        except SuspiciousOperation as exc:
            raise CommandError(u'\n=== Course import {0}: Unsafe tar file - {1}\n'.format(archive_path, exc.args[0]))
        finally:
            tar_file.close()

        # Paths to the library.xml file
        abs_xml_path = os.path.join(course_dir, 'library')
        rel_xml_path = os.path.relpath(abs_xml_path, data_root)

        # Gather library metadata from XML file
        xml_root = etree.parse(abs_xml_path / 'library.xml').getroot()
        if xml_root.tag != 'library':
            raise CommandError(u'Failed to import {0}: Not a library archive'.format(archive_path))

        metadata = xml_root.attrib
        org = metadata['org']
        library = metadata['library']
        display_name = metadata['display_name']

        # Fetch user and library key
        user = User.objects.get(username=username)
        courselike_key, created = _get_or_create_library(org, library, display_name, user)

        # Check if data would be overwritten
        ans = ''
        while not created and ans not in ['y', 'yes', 'n', 'no']:
            inp = raw_input(u'Library "{0}" already exists, overwrite it? [y/n] '.format(courselike_key))
            ans = inp.lower()
        if ans.startswith('n'):
            print(u'Aborting import of "{0}"'.format(courselike_key))
            return

        # At last, import the library
        try:
            import_library_from_xml(
                modulestore(), user.id,
                settings.GITHUB_REPO_ROOT, [rel_xml_path],
                load_error_modules=False,
                static_content_store=contentstore(),
                target_id=courselike_key
            )
        except Exception:
            print(u'\n=== Failed to import library-v1:{0}+{1}'.format(org, library))
            raise

        print(u'Library "{0}" imported to "{1}"'.format(archive_path, courselike_key))