def reindex(src, dest): from elasticsearch import helpers from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() helpers.reindex(dm.es, src, dest) dm.es.indices.delete(src)
def alias(alias, index): ''' Creates an Elasticsearch index alias ''' from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() dm.es.indices.delete_alias(index=alias, name='_all', ignore=404) dm.es.indices.put_alias(alias, index)
def provider_map(delete=False): ''' Adds favicons and metadata for harvesters to Elasticsearch ''' from six.moves.urllib import parse as urllib_parse from scrapi import registry from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es if delete: es.indices.delete(index='share_providers', ignore=[404]) for harvester_name, harvester in registry.items(): with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f: favicon = urllib_parse.quote(base64.encodestring(f.read())) es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + favicon, 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
def gen_harvester(shortname=None, longname=None, url=None, favicon_dataurl=None, **kwargs): assert shortname and longname and url and favicon_dataurl logger.info('Generating harvester {}'.format(shortname)) from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es es.index( 'share_providers', shortname, body={ 'favicon': favicon_dataurl, 'short_name': shortname, 'long_name': longname, 'url': url }, id=shortname, refresh=True ) return type( '{}Harvester'.format(shortname.lower().capitalize()), (PushApiHarvester, ), dict(short_name=shortname, long_name=longname, url=url) )
def provider_map(delete=False): ''' Adds favicons and metadata for harvesters to Elasticsearch ''' from six.moves.urllib import parse as urllib_parse from scrapi import registry from scrapi.base.helpers import null_on_error from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es if delete: es.indices.delete(index='share_providers', ignore=[404]) from scrapi.harvesters.push_api import gen_harvesters gen_harvesters() for harvester_name, harvester in registry.items(): if not null_on_error(es.get, log=False)(index='share_providers', doc_type=harvester_name, id=harvester_name): with open("img/favicons/{}_favicon.ico".format(harvester.short_name), "rb") as f: favicon = urllib_parse.quote(base64.encodestring(f.read())) es.index( 'share_providers', harvester.short_name, body={ 'favicon': 'data:image/png;base64,' + favicon, 'short_name': harvester.short_name, 'long_name': harvester.long_name, 'url': harvester.url }, id=harvester.short_name, refresh=True ) print(es.count('share_providers', body={'query': {'match_all': {}}})['count'])
def gen_harvester(shortname=None, longname=None, url=None, favicon_dataurl=None, **kwargs): assert shortname and longname and url and favicon_dataurl logger.info('Generating harvester {}'.format(shortname)) from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() es = dm.es es.index('share_providers', shortname, body={ 'favicon': favicon_dataurl, 'short_name': shortname, 'long_name': longname, 'url': url }, id=shortname, refresh=True) return type('{}Harvester'.format(shortname.lower().capitalize()), (PushApiHarvester, ), dict(short_name=shortname, long_name=longname, url=url))
def reindex(src, dest): ''' Reindexes documents from index {src} to index {dest}''' from elasticsearch import helpers from scrapi.processing.elasticsearch import DatabaseManager dm = DatabaseManager() dm.setup() helpers.reindex(dm.es, src, dest) dm.es.indices.delete(src)