def extract_courses(filename, courses): """Extract names and codes of all courses from an XML file containing infolists. Params: filename: path to the XML file courses: dict to write into Returns: dict of courses """ try: xmldoc = ET.parse(filename) root = xmldoc.getroot() ilisty = root.find('informacneListy') for il in ilisty.findall('informacnyList'): if il.find('kod') is not None: kod = il.find('kod').text nazov = il.find('nazov').text kod = utils.parse_code(kod) courses[kod] = nazov except: print "Error: ", sys.exc_value finally: return courses
def test_parse_code(self): data = { 'FMFI.KJP/1-MXX-151/00': '1-MXX-151_00', u'FMFI.KAMŠ/2-PMS-119/10': u'2-PMS-119_10', 'FMFI.KI/1-INF-160/00': '1-INF-160_00' } for string, code in data.iteritems(): self.assertEqual(utils.parse_code(string), code)
def process_file(filename, output_path=None, lang='sk', verbose=True): xmldoc = ET.parse(filename) root = xmldoc.getroot() organizacnaJednotka = root.find('organizacnaJednotka').text ilisty = root.find('informacneListy') if verbose: print " Nasiel som %d informacnych listov." % len(ilisty.findall('informacnyList')) # elementy, ktore sa budu parsovat z XML-ka # kluc => XPath (kluc sa pouziva neskor v template) elements = {'kod': 'kod', 'nazov': 'nazov', 'kredit': 'kredit', 'sposobUkoncenia': 'sposobUkoncenia', 'studijnyProgram': 'studijneProgramy/studijnyProgram/popis', 'datumSchvalenia': 'datumSchvalenia', 'obsahovaNapln': '_ON_/texty', 'vahaHodnotenia': '_VH_/texty', 'garanti': 'garanti/garant/plneMeno'} data = [] # spracovanie informacnych listov jednotlivych predmetov for il in ilisty.findall('informacnyList'): # preskocime predmety, ktore nie su statne skusky if il.find('_ON_') is None: continue d = {'lang' : lang, 'organizacnaJednotka': organizacnaJednotka} for key, path in elements.iteritems(): if il.find(path) is not None: if path.startswith('_'): d[key] = utils.get_text(il.find(path)) elif key == 'studijnyProgram': d[key] = [el.text for el in il.findall(path)] else: d[key] = il.find(path).text else: d[key] = '' # uprava kodov predmetov d['kod'] = utils.parse_code(d['kod']) data.append(d) # nacitanie HTML sablony script_abs_path = os.path.dirname(os.path.abspath(__file__)) tpl_path = os.path.join(script_abs_path, 'templates') env = Environment(loader=FileSystemLoader(tpl_path)) tpl_name = 'template_statne-skusky_table_%s.html' % lang html_tpl = env.get_template(tpl_name) # zapis do suborov for course in data: kod_predmetu = course['kod'] html = html_tpl.render(course) filename = '%s.html' % kod_predmetu if output_path is not None: path = os.path.join(output_path, filename) if not os.path.exists(output_path): os.mkdir(output_path) else: path = filename with open(path, 'w') as f: f.write(html.encode('utf8'))
def extract_infolists(filename, lang='sk', mode='regular', webpages={}, verbose=True): """Extract all infolists with all of their courses from a study program XML file. Params: filename: path to the XML file lang: language Returns: list of infolists with cou dics """ xmldoc = ET.parse(filename) root = xmldoc.getroot() organizacnaJednotka = root.find('organizacnaJednotka').text vysokaSkola = root.find('vysokaSkola').text fakulta = root.find('fakulta').text ilisty = root.find('informacneListy') if verbose: print " Nasiel som %d informacnych listov." % len(ilisty.findall('informacnyList')) # elementy, ktore sa budu parsovat z XML-ka # kluc => XPath (kluc sa pouziva neskor v template) elements = {'kod': 'kod', 'nazov': 'nazov', 'kredit': 'kredit', 'sposobVyucby': 'sposobVyucby', 'rozsahTyzdenny': 'rozsahTyzdenny', 'rozsahSemestranly': 'rozsahSemestranly', 'rokRocnikStudPlan': 'rokRocnikStudPlan', 'kodSemesterStudPlan': 'kodSemesterStudPlan', 'sposobUkoncenia': 'sposobUkoncenia', 'studijnyProgram': 'studijneProgramy/studijnyProgram/popis', 'podmienujucePredmety': 'podmienujucePredmety', 'vylucujucePredmety': 'vylucujucePredmety', 'doplujuceUdaje': 'doplujuceUdaje', 'zabezpecuju': 'zabezpecuju', 'strucnaOsnova': '_SO_/texty', 'ciel': '_C_/texty', 'zaverecneHodnotenie': '_Z_/texty/p', 'literatura': '_L_/texty', 'priebezneHodnotenie': '_P_/texty/p', 'obsahovaPrerekvizita': '_O_/texty', 'sylabus': '_S_/texty', 'datumSchvalenia': 'datumSchvalenia', 'vahaHodnotenia': '_VH_/texty/p', 'garanti': 'garanti/garant/plneMeno', 'jazyk': '_PJ_/texty/p', 'obsahovaNapln': '_ON_/texty', 'podmienkyAbsolvovania': '_PA_/texty', 'vysledkyVzdelavania': '_VV_/texty' } data = [] # spracovanie informacnych listov jednotlivych predmetov for il in ilisty.findall('informacnyList'): # preskocime statne skusky, tie sa spracuvaju inym skriptom if mode=='regular' and (il.find('_ON_') is not None): continue if mode=='statnice' and (il.find('_ON_') is None): continue d = {'lang' : lang, 'organizacnaJednotka': organizacnaJednotka, 'vysokaSkola': vysokaSkola, 'fakulta': fakulta } for key, path in elements.iteritems(): if il.find(path) is not None: if key != 'vahaHodnotenia' and path.startswith('_'): d[key] = utils.get_text(il.find(path)) elif key in ['studijnyProgram', 'jazyk']: d[key] = [el.text for el in il.findall(path)] if key == 'jazyk': d[key] = list(set(d[key])) else: d[key] = il.find(path).text else: d[key] = '' # uprava kodov predmetov d['kod'] = utils.parse_code(d['kod']) # domovska stranka predmetu if d['kod'] in webpages: d['webStranka'] = webpages[d['kod']] data.append(d) return data
# coding=utf8 import utils print utils.parse_code(u"FMFI.KAMŠ+KAI/1-EFM-380/00"); print utils.parse_code(u"PriF.KBCh/N-bCBI-303/10"); print utils.parse_code(u"PriF.KBCh/N-bCBI-303/3ecf/10"); print utils.parse_code(u"alebo"); print utils.replace_codes(u"FMFI.KAMŠ/1-MAT-282/00 alebo FMFI.KAMŠ/2-INF-175/15", add_links=True); print utils.replace_codes(u"FMFI.KI+KAI/2-INF-262/15 - Bezpečnosť IT infraštruktúry a FMFI.KI/2-INF-178/15 - Kryptológia (1) a FMFI.KI/2-INF-223/15 - Riadenie IT bezpečnosti a FMFI.KI/2-INF-183/15 - Počítačové siete (2) a FMFI.KI/2-INF-176/15 - UNIX pre administrátorov a FMFI.KI/2-INF-224/15 - Teória informácie a teória kódovania (1) a FMFI.KI/2-INF-225/15 - Teória informácie a teória kódovania (2)");