def get_all_subject(insertDB): """ :param insertDB: if True, then all of the data will be inserted into mongoDB """ start = time.time() builder = addrBuilder('2018', 'spring') subjectGetter = XmlGetter() subjectGetter.add_get_content_coroutine(builder.build()) print('Please wait, getting all subjects...') a = subjectGetter.run_getter() all_addr = InfoExtractor(a) addr_list = all_addr.find_all_sub() courseGetter = XmlGetter() for i in addr_list: courseGetter.add_get_content_coroutine(i) print('Please wait, getting all courses...') a = courseGetter.run_getter() all_courses = InfoExtractor(a) courses_list = all_courses.find_all_courses() detail_courses = list(map(lambda x: x + '?mode=detail', courses_list)) detailGetter = XmlGetter() for i in detail_courses: detailGetter.add_get_content_coroutine(i) print('Please wait, getting all sections...') a = detailGetter.run_getter() all_detail = JsonDumper(a) all_detail.dump('allCourses.json', insertDB) print('Time used: ', time.time() - start)
def __init__(self): self.__ext_queue = {} if os.path.isfile(config.path_extract_list): self.__ext_queue = pickle.loads( open(config.path_extract_list).read()) self.__ie = InfoExtractor(config.path_extract_onto + "/seminar.xml", config.path_extract_onto) # id : item{title, url, filename, decision } pass