示例#1
0
def get_personal_details_table(url):
    soup = get_page(url)
    rows = soup.find('table', attrs={
        'cellspacing': '2',
        'border': '0'
    }).find_all('tr')[1:]
    return rows
示例#2
0
def login_action(account):
    """
    :param account
    :return: page source response
    """
    soup = get_page(urljoin(nthu_library_url.info_system, '?func=file&file_name=login1'))
    login_url = soup.find('form').attrs.get('action')
    resp = post_page(login_url, data=account.to_dict())
    return resp
示例#3
0
def login_action(account):
    """
    :param account
    :return: page source response
    """
    soup = get_page(urljoin(nthu_library_url.info_system, '?func=file&file_name=login1'))
    login_url = soup.find('form').attrs.get('action')
    resp = post_page(login_url, data=account.to_dict())
    return resp
示例#4
0
def crawl_available_space():
    soup = get_page(nthu_library_url.available_space)
    infos = soup.find('section', 'status').find_all('tr')
    space = dict()
    for info in infos[1:]:
        item = info.find_all('td')
        text = item[0].text
        number = item[1].text
        space[text] = number
    return space
示例#5
0
def _crawl_transfer(url):
    soup = get_page(url)
    links = soup.find('div', 'clearfix').find_all('a')
    transfer_detail = dict()
    for link in links[1:]:
        text = link.text
        link = link.get('href', '')
        target = urljoin(url, link)
        transfer_detail[text] = target
    return transfer_detail
示例#6
0
def crawl_available_space():
    soup = get_page(nthu_library_url.available_space)
    infos = soup.find('section', 'status').find_all('tr')
    space = dict()
    for info in infos[1:]:
        item = info.find_all('td')
        text = item[0].text
        number = item[1].text
        space[text] = number
    return space
示例#7
0
def _crawl_transfer(url):
    soup = get_page(url)
    links = soup.find('div', 'clearfix').find_all('a')
    transfer_detail = dict()
    for link in links[1:]:
        text = link.text
        link = link.get('href', '')
        target = urljoin(url, link)
        transfer_detail[text] = target
    return transfer_detail
示例#8
0
def _crawl_transfer(year, url):
    soup = get_page(url)
    links = soup.find('div', 'clearfix').find_all('a')

    for link in links[1:]:
        text = link.text
        link = link.get('href', '')
        target = urljoin(url, link)

        sub = get_or_create(Subject, text)
        t = get_or_create(Examtype, TRANSFER_EXAMS)
        sheet = Sheet(target, year, None, sub, t)
        db.session.add(sheet)

    db.session.commit()
示例#9
0
def _crawl_detail(url):
    soup = get_page(url)
    years = soup.find('table', 'listview').find_all('tr')
    department_detail = dict()
    for year in years[1:]:
        which_year = year.find_all('td')[0].text
        links = year.find_all('a')
        yearly_detail = dict()
        for link in links:
            text = link.text
            link = link.get('href', '')
            target = urljoin(url, link)
            yearly_detail[text] = target
        department_detail[which_year] = yearly_detail
    return department_detail
示例#10
0
def crawl_past_year_questions():
    soup = get_page(nthu_library_url.past_year_questions_url)
    table = soup.find_all('div', 'clearfix')
    blocks = table[0].find_all('div', '')
    for block in blocks[1:]:
        links = block.find_all('a')
        for link in links:
            link = link.get('href', '')
            url = nthu_library_url.past_year_questions + link
            _crawl_detail(url)
    transferLinks = soup.find('ul', 'list02 clearfix').find_all('a')
    for transferLink in transferLinks:
        link = transferLink.get('href', '')
        url = nthu_library_url.past_year_questions + link
        _crawl_transfer(url)
示例#11
0
def _crawl_detail(url):
    soup = get_page(url)
    years = soup.find('table', 'listview').find_all('tr')
    department_detail = dict()
    for year in years[1:]:
        which_year = year.find_all('td')[0].text
        links = year.find_all('a')
        yearly_detail = dict()
        for link in links:
            text = link.text
            link = link.get('href', '')
            target = urljoin(url, link)
            yearly_detail[text] = target
        department_detail[which_year] = yearly_detail
    return department_detail
示例#12
0
def _crawl_detail(department_name, url):
    depart = get_or_create(Department, department_name)

    soup = get_page(url)
    years = soup.find('table', 'listview').find_all('tr')

    for year in years[1:]:
        which_year = year.find_all('td')[0].text
        links = year.find_all('a')
        for link in links:
            text = link.text
            link = link.get('href', '')
            target = urljoin(url, link)

            sub = get_or_create(Subject, text)
            t = get_or_create(Examtype, AFTER_GRADUATE_EXAMS)
            sheet = Sheet(target, int(which_year), depart, sub, t)
            db.session.add(sheet)

    db.session.commit()
示例#13
0
def crawl_personal_page(session_url):
    soup = get_page(urljoin(session_url, '?func=BOR-INFO'))
    tables = soup.find_all('table', attrs={'cellspacing': '2'})

    # 流通狀態連結
    resource_links = dict()

    # 圖書館流通狀態
    status = {}
    for row in tables[0].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip()
        a_tag = cols[1].find('a')
        val = a_tag.text.strip()
        link = re.findall("'(.*?)'", a_tag.get('href'))[0]
        status[key] = val
        resource_links[key] = link

    # 聯絡資料
    person = {}
    for row in tables[1].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip() or '地址'
        val = cols[1].text.strip()
        person[key] = person[key] + val if key in person else val

    # 管理資訊
    manage = {}
    for row in tables[2].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip()
        val = cols[1].text.strip()
        if key == '讀者權限資料':
            val = re.findall("borstatus='(.*)'", val)[0]
        manage[key] = val

    result = dict()
    result['user'] = person
    result['status'] = status
    result['user']['manage'] = manage
    return resource_links, result
示例#14
0
def crawl_personal_page(session_url):
    soup = get_page(urljoin(session_url, '?func=BOR-INFO'))
    tables = soup.find_all('table', attrs={'cellspacing': '2'})

    # 流通狀態連結
    resource_links = dict()

    # 圖書館流通狀態
    status = {}
    for row in tables[0].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip()
        a_tag = cols[1].find('a')
        val = a_tag.text.strip()
        link = re.findall("'(.*?)'", a_tag.get('href'))[0]
        status[key] = val
        resource_links[key] = link

    # 聯絡資料
    person = {}
    for row in tables[1].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip() or '地址'
        val = cols[1].text.strip()
        person[key] = person[key] + val if key in person else val

    # 管理資訊
    manage = {}
    for row in tables[2].find_all('tr'):
        cols = get_cols(row)
        key = cols[0].text.strip()
        val = cols[1].text.strip()
        if key == '讀者權限資料':
            val = re.findall("borstatus='(.*)'", val)[0]
        manage[key] = val

    result = dict()
    result['user'] = person
    result['status'] = status
    result['user']['manage'] = manage
    return resource_links, result
示例#15
0
def crawl_past_year_questions():
    soup = get_page(nthu_library_url.past_year_questions_url)
    table = soup.find_all('div', 'clearfix')
    blocks = table[0].find_all('div', '')
    after_graduate_exams = dict()
    for block in blocks[1:]:
        links = block.find_all('a')
        for link in links:
            text = link.text
            link = link.get('href', '')
            url = nthu_library_url.past_year_questions + link
            target = _crawl_detail(url)
            after_graduate_exams[text] = target
    transferLinks = soup.find('ul', 'list02 clearfix').find_all('a')
    transfer_exams = dict()
    for transferLink in transferLinks:
        text = transferLink.text
        link = transferLink.get('href', '')
        url = nthu_library_url.past_year_questions + link
        target = _crawl_transfer(url)
        transfer_exams[text] = target
    return {'研究所考古題': after_graduate_exams, '轉學考考古題': transfer_exams}
示例#16
0
def crawl_past_year_questions():
    soup = get_page(nthu_library_url.past_year_questions_url)
    table = soup.find_all('div', 'clearfix')
    blocks = table[0].find_all('div', '')
    after_graduate_exams = dict()
    for block in blocks[1:]:
        links = block.find_all('a')
        for link in links:
            text = link.text
            link = link.get('href', '')
            url = nthu_library_url.past_year_questions + link
            target = _crawl_detail(url)
            after_graduate_exams[text] = target
    transferLinks = soup.find('ul', 'list02 clearfix').find_all('a')
    transfer_exams = dict()
    for transferLink in transferLinks:
        text = transferLink.text
        link = transferLink.get('href', '')
        url = nthu_library_url.past_year_questions + link
        target = _crawl_transfer(url)
        transfer_exams[text] = target
    return {'研究所考古題': after_graduate_exams, '轉學考考古題': transfer_exams}
示例#17
0
def get_personal_details_table(url):
    soup = get_page(url)
    rows = soup.find('table', attrs={'cellspacing': '2', 'border': '0'}).find_all('tr')[1:]
    return rows
示例#18
0
def crawl_available_space():
    soup = get_page(nthu_library_url.available_space)
    info = soup.find('section', 'status').find_all('td')
    for data in info:
	    data = data.text
示例#19
0
def _crawl_transfer(url):
    soup = get_page(url)
    links = soup.find('div', 'clearfix').find_all('a')
    for link in links[1:]:
        link = link.get('href', '')
        target = urljoin(url, link)
示例#20
0
def _crawl_detail(url):
    soup = get_page(url)
    links = soup.find('table', 'listview').find_all('a')
    for link in links:
        link = link.get('href', '')
        target = urljoin(url, link)