Exemplo n.º 1
0
 def __init__(self, crawler):
     # FIXME move sh_scrapy.hsref to python-hubstorage and drop it
     try:
         from sh_scrapy.hsref import hsref
         self.hsref = hsref
     except ImportError:
         raise NotConfigured
     settings = crawler.settings
     mode = 'cs'
     if settings.get('PAGE_STORAGE_MODE') == 'VERSIONED_CACHE':
         mode = 'vcs'
     self.trim_html = False
     if settings.getbool('PAGE_STORAGE_TRIM_HTML'):
         self.trim_html = True
     self.enabled, self.on_error_enabled = _get_enabled_status(settings)
     self.limits = {
         'all': crawler.settings.getint('PAGE_STORAGE_LIMIT'),
         'error': crawler.settings.getint('PAGE_STORAGE_ON_ERROR_LIMIT'),
     }
     self.counters = {
         'all': 0,
         'error': 0,
     }
     self.cookies_seen = set()
     endpoint = urlpathjoin(hsref.project.collections.url, mode,
                            _COLLECTION_NAME)
     logger.info("HubStorage: writing pages to %s", endpoint)
     hsref.job.metadata.apipost('collection',
                                jl=urlpathjoin(mode, _COLLECTION_NAME))
     self._writer = hsref.client.batchuploader.create_writer(
         endpoint, content_encoding='gzip', size=20)
     crawler.signals.connect(self.spider_closed,
                             signal=signals.spider_closed)
Exemplo n.º 2
0
def unset_testbotgroup(hsproject):
    hsproject.settings.apidelete('botgroups')
    hsproject.settings.expire()
    # Additional step to delete botgroups in JobQ
    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP)
    requests.delete(url, auth=hsproject.auth)
Exemplo n.º 3
0
def set_testbotgroup(hsproject):
    hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]})
    # Additional step to populate JobQ's botgroups table
    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running')
    requests.post(url, auth=hsproject.auth, data='null')
    hsproject.settings.expire()
Exemplo n.º 4
0
def unset_testbotgroup(hsproject):
    hsproject.settings.apidelete('botgroups')
    hsproject.settings.expire()
    # Additional step to delete botgroups in JobQ
    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP)
    requests.delete(url, auth=hsproject.auth)
Exemplo n.º 5
0
def set_testbotgroup(hsproject):
    hsproject.settings.apipost(jl={'botgroups': [TEST_BOTGROUP]})
    # Additional step to populate JobQ's botgroups table
    url = urlpathjoin(TEST_ENDPOINT, 'botgroups', TEST_BOTGROUP, 'max_running')
    requests.post(url, auth=hsproject.auth, data='null')
    hsproject.settings.expire()