def test_add_descriptors(self): """ Test add_descriptors. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER1 bundle = XBundle(keep_urls=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True) bundle2.import_from_directory() expected = expected_data.URL_NAME_ORIG self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_import_export(self): # pylint: disable=no-self-use """ Test import then export. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) tdir = mkdtemp() try: bundle.export_to_directory(tdir) knownDir = os.path.join("input_testdata", "mitx.01.exported") knownTempDir = os.path.join(tdir, 'mitx.01.exported') newDir = os.path.join(tdir, "mitx.01") # Transform xml files to remove spaces. This allows for cross tests # to pass across platforms with slightly different xml serializers # (see: travis). We copy the files for easy cleanup. copytree(knownDir, knownTempDir) _normalize_xml(tdir) check_call(["diff", "-r", knownTempDir, newDir]) finally: rmtree(tdir)
def test_export_and_keep_urls(self): """ Test the changes to url_name after export_to_directory and import. """ # Note url_name_orig in chapter. input_xml = input_data.URL_NAME_ORIG_IN_CHAPTER2 bundle = XBundle(keep_urls=True, force_studio_format=True) bundle.load(file_from_string(input_xml)) # str(bundle) doesn't change input xml, but export_to_directory will. self.assertEqual(clean_xml(input_xml), clean_xml(str(bundle))) old_current_dir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.export_to_directory() bundle2 = XBundle(keep_urls=True, force_studio_format=True) bundle2.import_from_directory() expected = expected_data.KEEP_URLS_FORCE_STUDIO_FORMAT self.assertEqual(clean_xml(expected), clean_xml(str(bundle2))) finally: os.chdir(old_current_dir) rmtree(tempdir)
def test_export_import(self): """ Test export then import. """ bundle = XBundle() cxmls = input_data.COURSE pxmls = input_data.POLICIES bundle.set_course(etree.XML(cxmls)) bundle.add_policies(etree.XML(pxmls)) bundle.add_about_file("overview.html", "hello overview") xbin = str(bundle) tdir = mkdtemp() try: bundle.export_to_directory(tdir) # Test round- trip. xb2 = XBundle() xb2.import_from_directory(os.path.join(tdir, 'mitx.01')) xbreloaded = str(xb2) self.assertEqual(clean_xml(xbin), clean_xml(xbreloaded)) finally: rmtree(tdir)
def test_import_export(self): # pylint: disable=no-self-use """ Test import then export. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) tdir = mkdtemp() try: bundle.export_to_directory(tdir) knownDir = os.path.join("input_testdata", "mitx.01.exported") knownTempDir = os.path.join(tdir, 'mitx.01.exported') newDir = os.path.join(tdir, "mitx.01") # Transform xml files to remove spaces. This allows for cross tests # to pass across platforms with slightly different xml serializers # (see: travis). We copy the files for easy cleanup. copytree(knownDir, knownTempDir) _normalize_xml(tdir) check_call([ "diff", "-r", knownTempDir, newDir ]) finally: rmtree(tdir)
def test_import_url_name(self): """ Test that we import url_name as url_name_orig. """ bundle = XBundle(keep_urls=True, keep_studio_urls=True) bundle.import_from_directory(os.path.join('input_testdata', 'mitx.01')) bundle_string = str(bundle) expected = expected_data.KEEP_URLS self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
def test_fix_old_course_section(self): """ Test fix_old_course_section. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "sections")) # Section element should be removed. expected = expected_data.MISSING_SECTION self.assertEqual(clean_xml(expected), clean_xml(str(bundle)))
def test_fix_old_course_section(self): """ Test fix_old_course_section. """ bundle = XBundle() bundle.import_from_directory( os.path.join("input_testdata", "sections")) # Section element should be removed. expected = expected_data.MISSING_SECTION self.assertEqual(clean_xml(expected), clean_xml(str(bundle)))
def import_course_from_path(path, repo_id, user_id): """ Import course from an OLX directory. Args: path (unicode): path to extracted OLX tree user_id (int): pk of Django user doing the import """ bundle = XBundle() bundle.import_from_directory(path) return import_course(bundle, repo_id, user_id)
def test_import_skip_hidden(self): """ Test skip_hidden flag. """ bundle = XBundle(skip_hidden=True) path = os.path.join('input_testdata', 'mitx.01') bundle.import_from_directory(path) expected = expected_data.SKIP_HIDDEN self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
def test_preserve_url_name(self): """ Test that preserve_url_name imports as url_name and not url_name_orig. """ bundle = XBundle( keep_urls=True, keep_studio_urls=True, preserve_url_name=True) bundle.import_from_directory('input_testdata/mitx.01') bundle_string = str(bundle) expected = expected_data.PRESERVE_URL_NAME self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
def test_preserve_url_name(self): """ Test that preserve_url_name imports as url_name and not url_name_orig. """ bundle = XBundle(keep_urls=True, keep_studio_urls=True, preserve_url_name=True) bundle.import_from_directory('input_testdata/mitx.01') bundle_string = str(bundle) expected = expected_data.PRESERVE_URL_NAME self.assertEqual(clean_xml(expected), clean_xml(bundle_string))
def documents_from_olx(olx_path): # pylint: disable=too-many-locals """ Extract text from OLX directory Args: olx_path (str): The path to the directory with the OLX data Returns: list of tuple: A list of (bytes of content, metadata) """ documents = [] bundle = XBundle() bundle.import_from_directory(olx_path) for index, vertical in enumerate(bundle.course.findall(".//vertical")): content = get_text_from_element(vertical) documents.append(( content, { "key": f"vertical_{index + 1}", "content_type": CONTENT_TYPE_VERTICAL, "title": vertical.attrib.get("display_name") or "", "mime_type": "application/xml", }, )) counter = _infinite_counter() for root, _, files in os.walk(olx_path): for filename in files: _, extension = os.path.splitext(filename) extension_lower = extension.lower() if extension_lower in VALID_TEXT_FILE_TYPES: with open(os.path.join(root, filename), "rb") as f: filebytes = f.read() mimetype = mimetypes.types_map.get(extension_lower) documents.append(( filebytes, { "key": f"document_{next(counter)}_{filename}", "content_type": CONTENT_TYPE_FILE, "mime_type": mimetype, }, )) return documents
def import_course_from_path(path, repo_id, user_id): """ Import course from an OLX directory. Args: path (unicode): Path to extracted OLX tree repo_id (int): Primary key of repository course belongs to user_id (int): Primary key of Django user doing the import Returns: course (learningresources.Course) """ bundle = XBundle( keep_urls=True, keep_studio_urls=True, preserve_url_name=True ) bundle.import_from_directory(path) static_dir = join(path, 'static') course = import_course(bundle, repo_id, user_id, static_dir) return course
def test_unicode_in_html(self): """ Test that unicode doesn't cause problems in overview file. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) bundle.add_about_file("overview.html", "\u2e18 interrobang \u203d") expected = expected_data.ESCAPED_UNICODE self.assertEqual(clean_xml(str(bundle)), clean_xml(expected)) # Reimport to start from a clean slate. This time use bytes. bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) bundle.add_about_file("overview.html", "\u2e18 interrobang \u203d".encode('utf-8')) self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
def test_unicode_in_html(self): """ Test that unicode doesn't cause problems in overview file. """ bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) bundle.add_about_file("overview.html", "\u2e18 interrobang \u203d") expected = expected_data.ESCAPED_UNICODE self.assertEqual(clean_xml(str(bundle)), clean_xml(expected)) # Reimport to start from a clean slate. This time use bytes. bundle = XBundle() bundle.import_from_directory(os.path.join("input_testdata", "mitx.01")) bundle.add_about_file( "overview.html", "\u2e18 interrobang \u203d".encode('utf-8')) self.assertEqual(clean_xml(str(bundle)), clean_xml(expected))
def import_course_from_path(path, repo_id, user_id): """ Import course from an OLX directory. Args: path (unicode): Path to extracted OLX tree repo_id (int): Primary key of repository course belongs to user_id (int): Primary key of Django user doing the import Returns: course (learningresources.Course) """ bundle = XBundle(keep_urls=True, keep_studio_urls=True, preserve_url_name=True) bundle.import_from_directory(path) static_dir = join(path, 'static') with transaction.atomic(): course = import_course(bundle, repo_id, user_id, static_dir) return course
def test_import_large(self): """ Test import of a course slightly larger than mitx.01. """ bundle = XBundle() path = os.path.join('input_testdata', 'content-devops-0001') bundle.import_from_directory(path) expected_path = os.path.join('input_testdata', 'content-devops-0001.out.xml') with open(expected_path) as f: self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle))) tempdir = mkdtemp() try: bundle.export_to_directory(tempdir, xml_only=True, newfmt=True) for _, _, files in os.walk(os.path.join(tempdir, "0.001")): for filename in files: # We set xml_only=True so there shouldn't be anything else. self.assertTrue(filename.endswith(".xml")) finally: rmtree(tempdir)
def test_import_large(self): """ Test import of a course slightly larger than mitx.01. """ bundle = XBundle() path = os.path.join('input_testdata', 'content-devops-0001') bundle.import_from_directory(path) expected_path = os.path.join( 'input_testdata', 'content-devops-0001.out.xml') with open(expected_path) as f: self.assertEqual(clean_xml(f.read()), clean_xml(str(bundle))) tempdir = mkdtemp() try: bundle.export_to_directory(tempdir, xml_only=True, newfmt=True) for _, _, files in os.walk(os.path.join(tempdir, "0.001")): for filename in files: # We set xml_only=True so there shouldn't be anything else. self.assertTrue(filename.endswith(".xml")) finally: rmtree(tempdir)