Python Scraper.Scraper примеры использования

Язык программирования: Python

Пространство имен/Пакет: scrapely

Класс/Тип: Scraper

Метод/Функция: Scraper

Примеров на hotexamples.com: 9

Python Scraper.Scraper - 9 примеров найдено. Это лучшие примеры Python кода для scrapely.Scraper.Scraper, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Scraper(9)

scrape(9)

train(9)

fromfile(2)

scrape_page(2)

tofile(2)

train_from_htmlpage(2)

HtmlPage(1)

add_template(1)

Пример #1

Показать файл

def auto_generate(sampleurl,data,common_url):
    s = Scraper()
    s.train(sampleurl, data)
    res = (s.scrape(common_url))[0]
    for k,v in res.items():
        res[k] = v[0].replace('\n', '').strip()
    return res

Пример #2

Показать файл

Файл: parsers.py Проект: 0xh/modelscraper

    def _fallback(self, template, html, source):
        if not self.scrapely_parser:
            self.scrapely_parser = Scraper()

        html = self.scrapely_parser.HtmlPage(body=html)
        db_objct = self.db.read(uri, objct)
        if not db_objct:
            data = db_objct.attrs_to_dict()

            self.scrapely_parser.train_from_htmlpage(html, data)
            attr_dicts = self.scrapely_parser.scrape_page(html)

            for attr_dict in attr_dicts:
                objct = template._replicate(name=template.name, url=source.url)
                # Add the parsed values.
                objct.attrs_from_dict(attr_dict)
                yield objct
        return []

Пример #3

Показать файл

Файл: test_scraper.py Проект: xyb/scrapely

    def test_extraction(self):

        samples_encoding = 'latin1'
        [(html1, data1), (html2, data2)] = list(iter_samples(
            'scraper_loadstore', html_encoding=samples_encoding))
        sc = Scraper()
        page1 = HtmlPage(body=html1, encoding=samples_encoding)
        sc.train_from_htmlpage(page1, data1)

        page2 = HtmlPage(body=html2, encoding=samples_encoding)
        extracted_data = sc.scrape_page(page2)
        self._assert_extracted(extracted_data, data2)

        # check still works after serialize/deserialize 
        f = StringIO()
        sc.tofile(f)
        f.seek(0)
        sc = Scraper.fromfile(f)
        extracted_data = sc.scrape_page(page2)
        self._assert_extracted(extracted_data, data2)

Пример #4

Показать файл

# Google Search Package: https://breakingcode.wordpress.com/2010/06/29/google-search-python/
# Scrapely Package: https://github.com/scrapy/scrapely
# https://www.analyticsvidhya.com/blog/2015/10/beginner-guide-web-scraping-beautiful-soup-python/
# https://stackoverflow.com/questions/3898574/google-search-using-python-script

#imports
import urllib2
from bs4 import BeautifulSoup
from googlesearch.googlesearch import GoogleSearch
import csv
from scrapely import Scraper
from bs4 import UnicodeDammit
from collections import Counter
import re
import time
s = Scraper()


query = raw_input("Search Query: ")
try:
    n = int(raw_input("# of Websites to Scrape: "))
except ValueError:
    print "Enter Valid # of Websites"
    sys.exit()
'''
UNIXtime = int(time.time())
filename = query.replace(" ","_").lower()+"_"+str(n)+"_"+str(UNIXtime)
print filename
'''
# initialize dictionary to store search results
# rows: Name, Author, Description, Url

Пример #5

Показать файл

 def __init__(self, threshold=0.75, k=5):
     self.threshold = threshold
     self.k = k
     self.scraper = Scraper()

Пример #6

Показать файл

"""
    @author: 挖掘机小王子
    @contact: [email protected]
    @software: PyCharm
    @file: test.py
    @time: 2019/12/6 11:53
    @desc:
"""
from scrapely import Scraper
import requests


scraper = Scraper()

url = 'https://www.ituring.com.cn/article'
data = {'name': 'duxingzhe', 'title': '这两天的面试经验总结'}
# response = requests.get(url).text
scraper.train(url, data)
result = scraper.scrape(url, encoding='utf-8')
print(result)

Пример #7

Показать файл

BUS = Namespace("http://purl.org/wikibus/omnibusrevue/")
OR = Namespace("http://purl.org/wikibus/omnibusrevue/bus/")
FOAF = Namespace("http://xmlns.com/foaf/0.1/")

def CreateGraph(busId, busData):
    graph = Graph()
    busRes = OR[busId]
    graph.add((busRes, RDF.type, BUS["Bus"]))
    graph.add((busRes, FOAF["page"], Literal(busUrlFormatWithName.format(busData[0]['model'][0].encode('utf-8'), busId))))
    for key in busData[0]:        
        obj = busData[0][key][0].encode('utf-8')
        if obj <> "k.A":
            graph.add((busRes, BUS[key], Literal(obj)))
    return graph.serialize(format='turtle')

busScraper = Scraper()
busScraper.train(busUrlFormat % '1120301', exampleData)

offset = 0
while True:
    html = scraperwiki.scrape(catalogUrlFormat % offset)
    root = lxml.html.fromstring(html)
    busIds = root.cssselect('input[type=checkbox]')
    if len(busIds) > 0:
        for busCheckbox in busIds:
            busUrl = busUrlFormat % busCheckbox.attrib['value']
            busGraph = CreateGraph(busCheckbox.attrib['value'], busScraper.scrape(busUrl))
            dataStored = {'url': busUrl, 'graph': busGraph}
            scraperwiki.sqlite.save(unique_keys=['url'], data=dataStored)
        offset += 20
    else:

Пример #8

Показать файл

def scrapely_test():
    s = Scraper()
    train_url = 'http://example.python-scraping.com/view/Afghanistan-1'
    s.train(train_url, {'name': 'Afghanistan', 'population': '29,121,286'})
    test_url = 'http://example.python-scraping.com/view/United-Kingdom-239'
    print(s.scrape(test_url))

Пример #9

Показать файл

Файл: scrapely_test1.py Проект: yuandra/scraperwiki-scraper-vault

import urllib, scraperwiki
from scrapely import Scraper

s = Scraper()                             # note how we're *not* using Scraper() - this uses our custom version
url1 = 'http://www.thefest.com/store/beatles-ornaments/the-beatles-applique-stocking-p-3901'
data = {'name': 'THE BEATLES APPLIQUE STOCKING', 'category': 'Beatles Ornaments', 'description': 'BRAND NEW- If you are good, maybe Santa will put something special in this poly/cotton applique stocking - He will have to work overtime to fill this! Measures 19" diagonally from upper left facing to the tip of the toe. This is the first Christmas Beatles Stocking ever offered!', 'price': '$20.00', 'catalog number': '7287'}
s.train(url1,data)
url2 = 'http://www.thefest.com/store/beatles-ornaments/yellow-submarines-light-set-p-3876'
print s.scrape(url2)import urllib, scraperwiki
from scrapely import Scraper

s = Scraper()                             # note how we're *not* using Scraper() - this uses our custom version
url1 = 'http://www.thefest.com/store/beatles-ornaments/the-beatles-applique-stocking-p-3901'
data = {'name': 'THE BEATLES APPLIQUE STOCKING', 'category': 'Beatles Ornaments', 'description': 'BRAND NEW- If you are good, maybe Santa will put something special in this poly/cotton applique stocking - He will have to work overtime to fill this! Measures 19" diagonally from upper left facing to the tip of the toe. This is the first Christmas Beatles Stocking ever offered!', 'price': '$20.00', 'catalog number': '7287'}
s.train(url1,data)
url2 = 'http://www.thefest.com/store/beatles-ornaments/yellow-submarines-light-set-p-3876'
print s.scrape(url2)