Python Configuration示例

编程语言: Python

命名空间/包名称: scalpel

类/类型: Configuration

hotexamples.com的示例: 5

Python Configuration - 已找到5个示例。这些是从开源项目中提取的最受好评的scalpel.Configuration现实Python示例。您可以评价示例，以帮助我们提高示例质量。

常用方法

显示隐藏

Configuration(5)

常用方法

Configuration (5)

示例#1

显示文件

文件： any_io.py 项目： lewoudar/scalpel

async def main() -> None:
    backup = Path(__file__).parent / 'backup.mp'
    config = Configuration(backup_filename=f'{backup}', item_processors=[date_processor])
    spider = StaticSpider(urls=['http://quotes.toscrape.com'], parse=parse, config=config)
    await spider.run()
    print(spider.statistics())
    # you can do whatever you want with the results
    async for item in read_mp(backup, decoder=spider.config.msgpack_decoder):
        print(item)

示例#2

显示文件

async def main() -> None:
    backup = Path(__file__).parent / 'backup.mp'
    config = Configuration(selenium_driver_log_file=None,
                           backup_filename=f'{backup}',
                           item_processors=[date_processor])
    spider = SeleniumSpider(urls=['http://quotes.toscrape.com'],
                            parse=parse,
                            config=config)
    await spider.run()
    print(spider.statistics())
    # you can do whatever you want with the results
    async for quote in read_mp(filename=backup, decoder=datetime_decoder):
        print(quote)

示例#3

显示文件

文件： httpbin.py 项目： lewoudar/scalpel

async def main() -> None:
    backup = Path(__file__).parent / 'backup.mp'
    config = Configuration(selenium_driver_log_file=None, backup_filename=f'{backup}', item_processors=[date_processor])
    sel_spider = SeleniumSpider(urls=['http://httpbin.org/'], parse=parse, config=config)
    await sel_spider.run()
    print(sel_spider.statistics())
    # you can do whatever you want with the results
    async for quote_data in read_mp(filename=backup, decoder=datetime_decoder):
        print('****', quote_data['title'], '****')
        print(quote_data['description'])
        print('== operations ==')
        for operation in quote_data['operations']:
            print('\tmethod:', operation['method'])
            print('\tpath:', operation['path'])
            print('\tdescription:', operation['description'], end='\n\n')

示例#4

显示文件

    next_link = None
    try:
        element = response.driver.find_element_by_xpath(
            '//nav/ul/li[@class="next"]/a')
        next_link = element.get_attribute('href')
    except NoSuchElementException:
        pass

    if next_link is not None:
        response.follow(next_link)


def date_processor(item: dict) -> dict:
    item['date'] = datetime.now()
    return item


if __name__ == '__main__':
    backup = Path(__file__).parent / 'backup.mp'
    config = Configuration(selenium_driver_log_file=None,
                           backup_filename=f'{backup}',
                           item_processors=[date_processor])
    sel_spider = SeleniumSpider(urls=['http://quotes.toscrape.com'],
                                parse=parse,
                                config=config)
    sel_spider.run()
    print(sel_spider.statistics())
    # you can do whatever you want with the results
    for quote_data in read_mp(filename=backup, decoder=datetime_decoder):
        print(quote_data)

示例#5

显示文件

文件： green.py 项目： lewoudar/scalpel

            'message': quote.xpath('./span[@class="text"]/text()').get(),
            'author': quote.xpath('./span/small/text()').get(),
            'tags': quote.xpath('./div/a/text()').getall(),
        }
        spider.save_item(data)

    next_link = response.xpath('//nav/ul/li[@class="next"]/a').xpath(
        '@href').get()
    if next_link is not None:
        response.follow(next_link)


def date_processor(item: dict) -> dict:
    item['date'] = datetime.now()
    return item


if __name__ == '__main__':
    backup = Path(__file__).parent / 'backup.mp'
    config = Configuration(backup_filename=f'{backup}',
                           item_processors=[date_processor])
    spider = StaticSpider(urls=['http://quotes.toscrape.com'],
                          parse=parse,
                          config=config)
    spider.run()
    print(spider.statistics())
    # you can do whatever you want with the results
    for quote_data in read_mp(filename=backup,
                              decoder=spider.config.msgpack_decoder):
        print(quote_data)