示例#1
0
def get_all_subject(insertDB):
    """

    :param insertDB: if True, then all of the data will be inserted into mongoDB
    """
    start = time.time()
    builder = addrBuilder('2018', 'spring')
    subjectGetter = XmlGetter()
    subjectGetter.add_get_content_coroutine(builder.build())
    print('Please wait, getting all subjects...')
    a = subjectGetter.run_getter()
    all_addr = InfoExtractor(a)
    addr_list = all_addr.find_all_sub()
    courseGetter = XmlGetter()
    for i in addr_list:
        courseGetter.add_get_content_coroutine(i)
    print('Please wait, getting all courses...')
    a = courseGetter.run_getter()
    all_courses = InfoExtractor(a)
    courses_list = all_courses.find_all_courses()

    detail_courses = list(map(lambda x: x + '?mode=detail', courses_list))
    detailGetter = XmlGetter()
    for i in detail_courses:
        detailGetter.add_get_content_coroutine(i)
    print('Please wait, getting all sections...')
    a = detailGetter.run_getter()
    all_detail = JsonDumper(a)

    all_detail.dump('allCourses.json', insertDB)

    print('Time used: ', time.time() - start)
示例#2
0
    def __init__(self):
        self.__ext_queue = {}
        if os.path.isfile(config.path_extract_list):
            self.__ext_queue = pickle.loads(
                open(config.path_extract_list).read())

        self.__ie = InfoExtractor(config.path_extract_onto + "/seminar.xml",
                                  config.path_extract_onto)
        # id : item{title, url, filename, decision }
        pass