Exemplo n.º 1
0
def _crawl_transfer(year, url):
    soup = get_page(url)
    links = soup.find('div', 'clearfix').find_all('a')

    for link in links[1:]:
        text = link.text
        link = link.get('href', '')
        target = urljoin(url, link)

        sub = get_or_create(Subject, text)
        t = get_or_create(Examtype, TRANSFER_EXAMS)
        sheet = Sheet(target, year, None, sub, t)
        db.session.add(sheet)

    db.session.commit()
Exemplo n.º 2
0
def _crawl_detail(department_name, url):
    depart = get_or_create(Department, department_name)

    soup = get_page(url)
    years = soup.find('table', 'listview').find_all('tr')

    for year in years[1:]:
        which_year = year.find_all('td')[0].text
        links = year.find_all('a')
        for link in links:
            text = link.text
            link = link.get('href', '')
            target = urljoin(url, link)

            sub = get_or_create(Subject, text)
            t = get_or_create(Examtype, AFTER_GRADUATE_EXAMS)
            sheet = Sheet(target, int(which_year), depart, sub, t)
            db.session.add(sheet)

    db.session.commit()