Python Cache.push примеры использования

Язык программирования: Python

Пространство имен/Пакет: diskcache

Класс/Тип: Cache

Метод/Функция: push

Примеров на hotexamples.com: 3

Python Cache.push - 3 примера найдено. Это лучшие примеры Python кода для diskcache.Cache.push, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Cache(30)

set(30)

get(30)

close(30)

clear(18)

iterkeys(8)

delete(8)

pop(5)

expire(3)

pull(3)

push(3)

stats(3)

add(2)

cull(2)

memoize(2)

_sql(1)

create_tag_index(1)

__contains__(1)

__setitem__(1)

__getitem__(1)

reset(1)

__delitem__(1)

touch(1)

Пример #1

Показать файл

class JrjSpider(scrapy.Spider):
    name = 'jrj'
    allowed_domains = ['stock.jrj.com.cn']
    start_urls = ['http://*****:*****@class="titmain"]//h1//text()'
        temp = './/div[@class="texttit_m1"]//p//text()'
        #        print(response)
        item = CrawlerItem()
        item['link'] = response.url
        ans0 = response.xpath(temp0).getall()
        ans1 = response.xpath(temp).getall()
        item['title'] = ans0
        item['content'] = ans1
        preInfo = None
        if (self.preInfoUrlDict != None):
            preInfo = self.preInfoUrlDict[item['link']]
        elif (len(self.preInfoList) == 1):
            preInfo = self.preInfoList[0]
        ansFinal = {
            'type': 'crawlerResult',
            'content': {
                'link': item['link'],
                'title': ans0,
                'content': ans1,
                'preInfo': preInfo
            }
        }
        ansJson = json.dumps(ansFinal)
        self.cacheAgent.push(ansJson)
        #        self.cache.close()
        #        self.cacheAgent.close()
        yield item

Пример #2

Показать файл

Файл: general.py Проект: powerfulbean/StellarTemp

class GeneralSpider(scrapy.Spider):
    name = 'general'
    allowed_domains = []
    start_urls = ['http://finance.jrj.com.cn/2020/04/24012529362098.shtml']

    def __init__(self,
                 cacheCrawlerPath='',
                 cacheKey='',
                 cacheAgentPath='',
                 *args,
                 **kwargs):
        super().__init__(*args, **kwargs)
        self.cacheKey = cacheKey
        self.cacheCrawlerPath = cacheCrawlerPath
        print('aa', cacheCrawlerPath, 'bb', cacheAgentPath, cacheKey)
        self.cache = Cache(cacheCrawlerPath)
        self.cacheAgent = Cache(cacheAgentPath)
        self.oContentExtract = CContentExtract('boilerpipe')
        #        jsonStr = self.cache[int(cacheKey)]
        _, jsonStr = self.cache.pull()
        print('cc', jsonStr)
        if (jsonStr == None):
            oUrlList = [
                'http://finance.jrj.com.cn/2020/04/24012529362098.shtml'
            ]
            self.start_urls = oUrlList
        else:
            oUrlList = json.loads(jsonStr)
            self.start_urls = oUrlList['urlList']
            self.logInfo = oUrlList['logInfo']
            self.preInfoList: list = oUrlList['preInfo']
            self.preInfoUrlDict = None
            if (len(self.preInfoList) == len(self.start_urls)):
                self.preInfoUrlDict = dict()
                for idx, url in enumerate(self.start_urls):
                    self.preInfoUrlDict[url] = self.preInfoList[idx]

            logInfo = {'type': 'logInfo', 'content': {'data': self.logInfo}}
            logInfoStr = json.dumps(logInfo)
            self.cacheAgent.push(logInfoStr)

    def parse(self, response):
        temp0 = './/div[@class="titmain"]//h1//text()'
        temp = './/div[@class="texttit_m1"]//p//text()'
        #        print(response)
        item = CrawlerItem()
        item['link'] = response.url
        html = response.text
        #        print(html)
        #        ans0 = response.xpath(temp0).getall()
        #        ans1 = response.xpath(temp).getall()
        ans0, ans1 = self.oContentExtract.boilerpipe(html)
        item['title'] = ans0
        item['content'] = ans1
        print(ans0, ans1)
        preInfo = None
        if (self.preInfoUrlDict != None):
            preInfo = self.preInfoUrlDict[item['link']]
        elif (len(self.preInfoList) == 1):
            preInfo = self.preInfoList[0]
        ansFinal = {
            'type': 'crawlerResult',
            'content': {
                'data': {
                    'link': item['link'],
                    'title': ans0,
                    'content': ans1
                },
                'preInfo': preInfo
            }
        }
        ansJson = json.dumps(ansFinal)
        self.cacheAgent.push(ansJson)
        #        self.cache.close()
        #        self.cacheAgent.close()
        yield item

Пример #3

Показать файл

class CCrawlerManager:
    def __init__(self, name, workDirectory: str, oLog: CLog, cachePath: str,
                 cacheAgentPath: str):
        self.workDirectory = workDirectory
        self.jobsList = None
        self.oLog = oLog
        self.outputFolder = workDirectory
        self.name = name + '_crawler'
        self.jobCnt = 0
        self._cachePathCrawler = cachePath
        self._cachePathAgent = cacheAgentPath
        self.cache = Cache(cachePath)

    def _newProcess(self, crawlerName, oUrlCacheKey: str):
        outFilePath = 'file:///' + self.outputFolder + self.name + '.json'
        #        print(outFilePath,urlsFilePath)
        #        process = subprocess.Popen(['scrapy','crawl',crawlerName,'-o',outFilePath,'-a',
        #                                    'cacheCrawlerPath='+ self._cachePathCrawler,'-a',
        #                                    'cacheKey='+oUrlCacheKey,'-a',
        #                                    'cacheAgentPath=' + self._cachePathAgent],
        #                                   shell=True,
        #                                   cwd=self.workDirectory)
        #        print('scrapy','crawl',crawlerName,'-o',outFilePath,'-a',
        #                                    'cacheCrawlerPath='+ self._cachePathCrawler,'-a',
        #                                    'cacheKey='+oUrlCacheKey,'-a',
        #                                    'cacheAgentPath=' + self._cachePathAgent)
        process = subprocess.Popen([
            'scrapy', 'crawl', crawlerName, '-a', 'cacheCrawlerPath=' +
            self._cachePathCrawler, '-a', 'cacheKey=' + oUrlCacheKey, '-a',
            'cacheAgentPath=' + self._cachePathAgent
        ],
                                   shell=True,
                                   cwd=self.workDirectory)
        #        print('scrapy','crawl',crawlerName,'-a',
        #                                    'cacheCrawlerPath='+ self._cachePathCrawler,'-a',
        #                                    'cacheKey='+oUrlCacheKey,'-a',
        #                                    'cacheAgentPath=' + self._cachePathAgent)

        return process

    def engineStart(self, jobsList: list):
        for oUrlList in jobsList:
            oUrlList.index = self.jobCnt
            tempKey = self._prepareJob(oUrlList.exportJson())
            self.oLog.safeRecordTime(str(oUrlList.index) + "start")
            temp = self._newProcess('general', tempKey)
            #            temp.wait()
            self.oLog.safeRecordTime(str(oUrlList.index) + "end")
            return temp

    def _prepareJob(self, content: str):
        #        key = str(self.jobCnt)
        #        if(self.cache.get(key)==False):
        #            raise ValueError("this key exists in the cache")
        #            return None
        #        else:
        #            self.cache[key] = content
        #            self.jobCnt+=1
        #            return key
        self.jobCnt += 1
        #        print(self.cache.directory)
        key = self.cache.push(content)
        return str(key)

    def closeCache(self):
        self.cache.close()