Python Crawl примеры использования

Язык программирования: Python

Пространство имен/Пакет: models

Класс/Тип: Crawl

Примеров на hotexamples.com: 3

Python Crawl - 3 примера найдено. Это лучшие примеры Python кода для models.Crawl, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

save(2)

Crawl(1)

action(1)

site(1)

status(1)

Пример #1

Показать файл

Файл: views.py Проект: dip-kush/CrawlerUI

def crawlingController(request):
    crawling_spec = {}
    if request.method == 'POST':
        print request.FILES
        crawling_spec["login_script"] = request.FILES.get('login-script', None)
        crawling_spec["login_url"] = request.POST.get('login-url', "")
        crawling_spec["form_values_script"] = request.FILES.get('form-values-script', None)
        crawling_spec["base_address"] = request.POST.get('base-address', "")
        crawling_spec["start_url"] = request.POST.get('start-url', "")
        crawling_spec["black_list_urls"] = request.POST.get('black-list-urls', "")
        crawling_spec["scope_urls"] = request.POST.get('scope-urls', "")
        crawling_spec["wait_time"] = request.POST.get('wait-time', "")
        crawling_spec["depth"] = request.POST.get('depth', "100")
        #crawling_spec["proxy_address"] = request.POST.get('proxy-address', "")
        #print crawling_spec
        login_data = ""
        form_data = ""
        lines = crawling_spec['login_script'].readlines()
        for line in lines:
            login_data = login_data+line.strip()
        lines = crawling_spec['form_values_script'].readlines()
        for line in lines:
            form_data = form_data+line.strip()

        bs =  BeautifulSoup(form_data)
        print bs
        #print bs.findAll("tr") 
        obj = Crawl(login_script =  crawling_spec["login_script"], login_url = crawling_spec["login_url"] , \
                                    form_values_script = crawling_spec["form_values_script"] , \
                                    base_address =  crawling_spec["base_address"],start_url =  crawling_spec["start_url"], \
                                    black_list_urls =  crawling_spec["black_list_urls"], \
                                    scope_urls =  crawling_spec["scope_urls"], \
                                    wait_time =  crawling_spec["wait_time"], \
                                    depth = crawling_spec["depth"])
                                    #proxy_address = crawling_spec["proxy_address"])
        #print login_script, login_url, form_values_script, base_address, start_url, black_list_urls, scope_urls, wait_time
        obj.save()
        crawling_spec["login_script"] = login_data
        crawling_spec["form_values_script"] = form_data
        fsm = initializeParams(crawling_spec)
        #pathSourcetoSink(fsm, crawl, crawl.login_url)
        #print graph
        if fsm:
            returnJsonGraph(fsm.graph)
            number_of_nodes = fsm.graph.number_of_nodes()
            number_of_edges = len(fsm.graph.edges())
            nodes = getNodes(fsm.graph)
            edges = getEdges(fsm.graph)
            crawl = Crawl.objects.latest("id")
            pathSourcetoSink(fsm, crawl)    
            print crawl.id
            workflows = getWfs(crawl.id)
            print "workflows"
            print workflows
            #print edges
            #print nodes
            return render(request, 'run.html', {'num_nodes': number_of_nodes,'num_edges':number_of_edges, 'nodes': nodes, 'edges': edges, 'workflows': workflows})
        else:
            return render(request, "error.html")

Пример #2

Показать файл

Файл: ebay.py Проект: dlluncor/simple-crawl

    def crawl_brands():
        """
        Get all offical recorded brands from eBay.
        """
        logging.info("Crawl cell phone brands from eBay")

        site = EbaySpider.get_site()
        crawl = Crawl()
        crawl.site = EbaySpider.name
        crawl.action = "crawl brands"
        crawl.status = 'SUCCESS'

        try:
            brands = EbaySpider.extract_data_from_ajax_request(EbaySpider.URLS['data']['brands'])
            for name in brands:
                count = brands[name]
                brands[name] = {}
                brands[name]['count'] = count
                url = EbaySpider.URLS['data']['model'] + "&" +  urllib.urlencode({'Brand': name})
                print(url)
                models = EbaySpider.extract_data_from_ajax_request(url)

                brands[name]['models'] = models
            site.brands = brands
        except Exception as e:
            logging.info(">>>> Fail to crawl brands from ebay:%s" % traceback.format_exc())
            msg = traceback.format_exc()
            crawl.status = msg[:1024]

        site.save()
        crawl.save()

Пример #3

Показать файл

def save_crawl(url):
    remove_crawl(url)
    session = Session()
    crawl = Crawl(url = url, ts = datetime.now())
    session.add(crawl)
    session.commit()
    crawl_id = crawl.id
    session.close()
    return crawl_id