Пример #1
0
def crawl_top_circulations(type, query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        year_tag = table.get('summary')
        year, tag = re.findall('(\d+)年(\S+)',year_tag)[0]
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
                ref = None

            circulaion = Circulation(
                type=type,
                bookname=title.text.strip(' /'),
                url=ref.get('href') if ref else None,
                rank=rk.text,
                tag=tag,
                year=year,
                count=cnt.text
            )
            db.session.add(circulaion)
    db.session.commit()
Пример #2
0
def get_circulation_links():
    return [
        ({'text': a.text, 'href': a.get('href')},
         urljoin(nthu_library_url.top_circulations, a.get('href')))
        for resp in get_pages([
            nthu_library_url.top_circulations,
            nthu_library_url.top_circulations_bc2007])
        for a in build_soup(resp).find(id='cwrp').find_all('a')
    ]
Пример #3
0
def get_circulation_links():
    return [
        ({'text': a.text, 'href': a.get('href')},
         urljoin(nthu_library_url.top_circulations, a.get('href')))
        for resp in get_pages([
            nthu_library_url.top_circulations,
            nthu_library_url.top_circulations_bc2007])
        for a in build_soup(resp).find(id='cwrp').find_all('a')
    ]
Пример #4
0
def crawl_top_circulations(query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'rank': rk.text,
                'book_name': title.text.strip(' /'),
                'link': ref.get('href') if ref else None,
                'circulations': cnt.text
            })
        results[table.get('summary')] = books
    return results
Пример #5
0
def crawl_top_circulations(query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'rank': rk.text,
                'bookname': title.text.strip(' /'),
                'link': ref.get('href') if ref else None,
                'times': cnt.text
            })
        results[table.get('summary')] = books
    return results
Пример #6
0
def crawl_top_circulations(rank_type, query):
    results = dict()
    for content in get_pages(query):
        table = build_soup(content).find('table', 'listview')
        books = list()
        year_tag = table.get('summary')
        year, tag = re.findall('(\d+)年(\S+)', year_tag)[0]
        for row in table.find_all('tr')[1:]:
            try:
                rk, title, ref, cnt = row.findChildren()
            except ValueError:
                # for year 2003, there's no <a> tag
                rk, title, cnt = row.findChildren()
            books.append({
                'type': rank_type,
                'book_name': title.text.strip(' /'),
                'url': ref.get('href') if ref else None,
                'rank': rk.text,
                'tag': tag,
                'year': year,
                'circulations': cnt.text
            })
        results[table.get('summary')] = books
    return results