Python crawl示例

编程语言: Python

命名空间/包名称: spider

方法/功能: crawl

hotexamples.com的示例: 7

Python crawl - 已找到7个示例。这些是从开源项目中提取的最受好评的spider.crawl现实Python示例。您可以评价示例，以帮助我们提高示例质量。

示例#1

显示文件

文件： app.py 项目： Deadalusmask/WBzz-api

def user_profile(uid):
    uid = uid.strip()
    if not uid:
        return json.dumps({
            'msg': 'uid is required'
        }),400

    s = spider.start_session()
    user = spider.profile(s, uid)

    if 'uid' not in user.keys():
        return json.dumps({
            'msg': 'user uid {} not exist'.format(uid)
        }),404

    if int(user['fans_count']) > count_limit or int(user['follow_count']) > count_limit:
        return json.dumps({
            'msg': 'user fans({}) or follows({}) count limit exceed'.format(user['fans_count'], int(user['follow_count']))
        }),403

    res = spider.crawl(s, uid)
    with_addr = request.args.get('with_addr', '')

    if with_addr and int(with_addr) is 1:
        res = spider.with_addr(s, res)

    return json.dumps({
        'user': user,
        'circle': res
    }),200

示例#2

显示文件

def main():
    # Set arguments
    args = sys.argv[1:]

    # Check how to crawl decision
    if len(args) == 1:
        url = args[0]
        print("Started crawling " + url + " domain")
        spider = mySpider()
        spider.crawl(url, float('inf'))
        print("Finished crawling " + url)
    elif len(args) == 2:
        url = args[0]
        maxFiles = int(args[1])
        print("Started crawling " + url + " domain")
        spider = mySpider()
        spider.crawl(url, maxFiles)
        print("Finished crawling " + url)
    else:
        print_usage()

示例#3

显示文件

文件： console.py 项目： kiarash78/MIR-course-project

def crawl_menu():
    print('Enter number of start links')
    n = int(input())
    li = []
    for i in range(n):
        print(f'enter link number {i}')
        li.append(input())
    print('enter number of docs')
    count = int(input())
    print('enter output path')
    path = input()
    print(
        'are you sure you want to crawl? it might take a few minutes (enter yes for crawling, anything else for stopping)'
    )
    should_crawl = input().lower() == 'yes'
    if not should_crawl:
        return

    t0 = time()
    crawl(li, crawl_limit=count, output_path=path)
    t1 = time()
    total_time = t1 - t0
    print(f'finished crawling! It took {total_time} seconds')

示例#4

显示文件

文件： dataset_building.py 项目： frsi/social-network-project

def generate_graph(start_link_list, n_link_to_follow = 2000, verbose = True):

    graph = None
    pid = os.getpid()
    i = 1
    num_links = len(start_link_list)

    if verbose:
        print(str(pid) + " --- must crawl " + str(num_links) + " links.")
    for link in start_link_list:
        link = link.rstrip()
        graph = crawl(link, n_link_to_follow, graph, pid)
        i += 1
        if verbose:
            print(str(pid) + " --- Crawled " + str(i) + " links out of " + str(num_links))

    write_graph(graph, 'graph_generated_by_'+str(pid))

示例#5

显示文件

文件： run_app.py 项目： adamb70/cottagemap.cc

def crawler_task():
    print('Starting crawl task')
    for region in list(REGION_TABLES.keys()):
        crawl(region, settings.PROCESS_COUNT)

示例#6

显示文件

def crawl_proxy(index):
    crawl(index)
    return jsonify({
        "status": 0,
        "msg": "ok"
    })

示例#7

显示文件

from spider import search, crawl
from utilities import cli
import sys


if __name__ == '__main__':
    key_word = input('Enter comic name: ')
    comics = search(key_word)
    if not comics:
        print('No item found')
        sys.exit()

    name = cli.select(comics.keys())
    comic, link = name, comics[name]
    ready = cli.confirm(f'Download {comic} now?')
    if ready:
        crawl(comic, link)

    print('DONE!')