def test_save(self): """ Test save method. """ input_xml = "<xbundle><metadata /><course /></xbundle>" bundle = XBundle() bundle.load(file_from_string(input_xml)) self.assertEqual(clean_xml(str(bundle)), clean_xml(input_xml)) curdir = os.getcwd() tempdir = mkdtemp() try: os.chdir(tempdir) bundle.save() with open(os.path.join(tempdir, "xbundle.xml")) as f: self.assertEqual(clean_xml(f.read()), clean_xml(input_xml)) bundle.save(filename="other.xml") with open(os.path.join(tempdir, "other.xml")) as f: self.assertEqual(clean_xml(f.read()), clean_xml(input_xml)) handle_path = os.path.join(tempdir, "third.xml") with open(handle_path, "w") as f: bundle.save(file_handle=f) with open(handle_path) as f: self.assertEqual(clean_xml(f.read()), clean_xml(input_xml)) finally: os.chdir(curdir) rmtree(tempdir)
def export(self): meta = self.meta sys.stderr.write("metadata = %s\n" % meta) fn = self.dir / 'contents/Syllabus/index.htm' sxml = self.parse_broken_html(fn=fn) edxxml = etree.Element('course') edxxml.set('dirname', os.path.basename(os.getcwd())) edxxml.set('semester', self.DefaultSemester) for k, v in meta.items(): edxxml.set(k, v) self.processed_files = [ fn ] # track which content files have been ingested, to avoid duplication self.files_to_copy = { } # dict of files (key=OCW source, val=edX static dest) to copy to "/static" self.processed_pdf_files = [] self.element_counts = defaultdict(int) self.do_chapters(sxml, edxxml) policies = self.policies # grab course image via index.htm self.get_course_image() # make xbundle xb = XBundle(force_studio_format=True) xb.DefaultOrg = self.DefaultOrg xb.set_course(edxxml) xb.add_policies(policies) self.add_about_files(xb) def c(x): return len(xb.course.findall(".//%s" % x)) elist = [ "chapter", "sequential", "vertical", "problem", "html", "video" ] xbundle_counts = {x: c(x) for x in elist} self.element_counts['n_static_files'] = len(self.files_to_copy) self.element_counts['n_ocw_files_processed'] = len( self.processed_files) # save it outfn = self.output_fn or ('%s_xbundle.xml' % self.cid) if outfn.endswith(".xml"): xb.save(outfn) self.copy_static_files(".") elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"): tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle") cdir = path(tempd) / "course" os.mkdir(cdir) self.copy_static_files(cdir) xb.export_to_directory(cdir, dir_include_course_id=False) curdir = os.path.abspath(os.curdir) cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn) print cmd os.system(cmd) shutil.rmtree(tempd) else: if not os.path.exists(outfn): print "Making directory for output: %s" % outfn os.mkdir(outfn) self.copy_static_files(outfn) xb.export_to_directory(outfn, dir_include_course_id=False) print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4) print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4) print "Done, wrote to %s" % outfn
def export(self): meta = self.meta sys.stderr.write("metadata = %s\n" % meta) fn = self.dir / 'contents/Syllabus/index.htm' sxml = self.parse_broken_html(fn=fn) edxxml = etree.Element('course') edxxml.set('dirname',os.path.basename(os.getcwd())) edxxml.set('semester', self.DefaultSemester) for k, v in meta.items(): edxxml.set(k,v) self.processed_files = [fn] # track which content files have been ingested, to avoid duplication self.files_to_copy = {} # dict of files (key=OCW source, val=edX static dest) to copy to "/static" self.processed_pdf_files = [] self.element_counts = defaultdict(int) self.do_chapters(sxml, edxxml) policies = self.policies # grab course image via index.htm self.get_course_image() # make xbundle xb = XBundle(force_studio_format=True) xb.DefaultOrg = self.DefaultOrg xb.set_course(edxxml) xb.add_policies(policies) self.add_about_files(xb) def c(x): return len(xb.course.findall(".//%s" % x)) elist = ["chapter", "sequential", "vertical", "problem", "html", "video"] xbundle_counts = {x:c(x) for x in elist} self.element_counts['n_static_files'] = len(self.files_to_copy) self.element_counts['n_ocw_files_processed'] = len(self.processed_files) # save it outfn = self.output_fn or ('%s_xbundle.xml' % self.cid) if outfn.endswith(".xml"): xb.save(outfn) self.copy_static_files(".") elif outfn.endswith(".tar.gz") or outfn.endswith(".tgz"): tempd = tempfile.mkdtemp(prefix="tmp_ocw2xbundle") cdir = path(tempd) / "course" os.mkdir(cdir) self.copy_static_files(cdir) xb.export_to_directory(cdir, dir_include_course_id=False) curdir = os.path.abspath(os.curdir) cmd = "cd %s; tar czf '%s/%s' course" % (tempd, curdir, outfn) print cmd os.system(cmd) shutil.rmtree(tempd) else: if not os.path.exists(outfn): print "Making directory for output: %s" % outfn os.mkdir(outfn) self.copy_static_files(outfn) xb.export_to_directory(outfn, dir_include_course_id=False) print "OCW element counts: %s" % json.dumps(self.element_counts, indent=4) print "edX XML element counts: %s" % json.dumps(xbundle_counts, indent=4) print "Done, wrote to %s" % outfn