def test(): # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print res.total # "1" print res.docs[0] # Searching with snippets # res = client.search("search engine", snippet_sizes={'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
class TAS_Import(): def __init__(self, index_name, host="172.16.20.7", port=6382, db=0): self.client = Client(index_name, host, port) self.host = host self.port = port self.index_name = index_name self.redis = Redis() def add_indexing(self, schema): self.client.create_index(schema, False, False, []) return ["Done"] def add_data(self, data): for i, rr in enumerate(data): index = i + 1 print rr name, age, location = rr['name'], rr['age'], rr['location'] self.client.add_document(index, NAME=name, AGE=age, LOCATION=location) return ["Done"] def drop_index(self): try: self.client.drop_index() except: pass
def create_website_items_index(): "Creates Index Definition." # CREATE index client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache()) try: client.drop_index() # drop if already exists except ResponseError: # will most likely raise a ResponseError if index does not exist # ignore and create index pass except Exception: raise_redisearch_error() idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)]) # Index fields mentioned in e-commerce settings idx_fields = frappe.db.get_single_value("E Commerce Settings", "search_index_fields") idx_fields = idx_fields.split(",") if idx_fields else [] if "web_item_name" in idx_fields: idx_fields.remove("web_item_name") idx_fields = list(map(to_search_field, idx_fields)) client.create_index( [TextField("web_item_name", sortable=True)] + idx_fields, definition=idx_def, ) reindex_all_web_items() define_autocomplete_dictionary()
def create_product_search_index_async(): print 'Creating Search Index' client = Client('productIndex') client.create_index([ TextField('title', weight=5.0), TextField('description'), TextField('tags'), TextField('category') ]) products = Product.objects.filter(active=True) cache.set('Search_index_total', len(products), timeout=None) index = 0 for product in products: title = product.name description = product.description category = ','.join([cat.name for cat in product.category.all()]) tag = product.tags tag_maps = ProductTagMap.objects.filter(product=product) for tag_map in tag_maps: tag = tag + tag_map.tag.tag + ' ' category_varients = [] for pv in ProductVarientList.objects.filter(product=product): for cv in pv.key.all(): category_varients.append(cv.value) tag += ' '.join(list(set(category_varients))) client.add_document(str(product.id), title=title, description=description, tags=tag, category=category) cache.set('Search_index_index', index, timeout=None) index += 1 return True
class SearchDemo: def __init__(self, args): self.index = args.index self.client = Client(self.index, host=args.host, port=args.port) def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('WORDCOUNT', sortable=True), TextField('BYLINE', no_stem=True, sortable=True), TextField('DOCUMENTTYPE', sortable=True), TextField('HEADLINE', sortable=True), TagField('KEYWORDS', separator=';'), NumericField('MULTIMEDIA', sortable=True), TextField('NEWDESK', sortable=True), NumericField('PRINTPAGE', sortable=True), NumericField('PUBDATE', sortable=True), TextField('SECTIONNAME', sortable=True), TextField('SNIPPET', sortable=True), TextField('TYPEOFMATERIAL', sortable=True), TextField('WEBURL') ])
class RediSearchClient(object): def __init__(self, index_name): self.client = Client(index_name) self.index_name = index_name def build_index(self, line_doc_path, n_docs): line_pool = LineDocPool(line_doc_path) try: self.client.drop_index() except: pass self.client.create_index([TextField('title'), TextField('url'), TextField('body')]) for i, d in enumerate(line_pool.doc_iterator()): self.client.add_document(i, nosave = True, title = d['doctitle'], url = d['url'], body = d['body']) if i + 1 == n_docs: break if i % 1000 == 0: print "{}/{} building index".format(i, n_docs) def search(self, query): q = Query(query).paging(0, 5).verbatim() res = self.client.search(q) # print res.total # "1" return res
def create_website_items_index(): "Creates Index Definition." # CREATE index client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache()) # DROP if already exists try: client.drop_index() except Exception: pass idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)]) # Based on e-commerce settings idx_fields = frappe.db.get_single_value('E Commerce Settings', 'search_index_fields') idx_fields = idx_fields.split(',') if idx_fields else [] if 'web_item_name' in idx_fields: idx_fields.remove('web_item_name') idx_fields = list(map(to_search_field, idx_fields)) client.create_index( [TextField("web_item_name", sortable=True)] + idx_fields, definition=idx_def, ) reindex_all_web_items() define_autocomplete_dictionary()
def cache_to_redis(data: dict): if REDIS_HOSTNAME == '': print('REDIS_HOSTNAME environment variable is not set') return client = Client('games', host=REDIS_HOSTNAME, port=REDIS_PORT) indexCreated = False maxAltNames = len(max(data.values(), key=lambda d: len(d['alt_names']))['alt_names']) while not indexCreated: try: client.create_index([TextField('name', weight=10), *[TextField('alt_name_%d' % i, weight=10) for i in range(maxAltNames)], TextField('summary', weight=1)], TextField('cover', weight=0), TextField('thumb', weight=0)) indexCreated = True except Exception: print('Failed to create index, retrying %s') time.sleep(3) for k, v in data.items(): client.add_document(k, name=v['name'], **{'alt_name_%d' % i: n for i, n in enumerate(v['alt_names'])}, cover=v['cover'], thumb=v['thumb'], summary=v['summary']) print('done')
def build_ipa_index(): start_time = time.time() rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', '')) rs_client = Client('IPAIndex', conn=rc) print( 'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt' ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str) print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt' ipa_index_ou = pd.read_csv(ipa_index_ou_url, sep='\t', na_values=['da_indicare', '*****@*****.**'], dtype=str) ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou'] == 'Ufficio_Transizione_Digitale'] try: rs_client.drop_index() except: pass # Index already dropped rs_client.create_index([ TextField('ipa_code', weight=2.0), TextField('name', weight=2.0, sortable=True), TextField('site'), TextField('pec'), TextField('city', weight=1.4), TextField('county'), TextField('region'), TagField('type'), TextField('rtd_name'), TextField('rtd_pec'), TextField('rtd_mail'), ]) print('Created index `IPAIndex`', flush=True) print('Feeding `IPAIndex` with data from `amministrazioni.txt`', flush=True) for index, row in ipa_index_amm.iterrows(): rs_client.add_document(row['cod_amm'], language='italian', replace=True, **get_ipa_amm_item(row)) print('Feeding `IPAIndex` with data from `ou.txt`', flush=True) for index, row in ipa_index_ou.iterrows(): rs_client.add_document(row['cod_amm'], partial=True, **get_ipa_rtd_item(row)) finish_time = time.time() print('`IPAIndex` build completed in {0} seconds'.format( round(finish_time - start_time, 2)), flush=True)
def create_index(cls): error_message = "Unable to create Index. Try Again" redis_enabled = os.getenv("REDIS_SEARCH", False) if redis_enabled: client = Client("tower", port=6379, host=os.getenv('REDIS_HOST')) try: client.create_index(document) cls.build_index(client) print("Watcher Index created successfully") except ResponseError as err: print(err) else: print(error_message)
def insert(): # insertion of search/suggestion data suggestion_client = Client('movie') suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')]) for i in range(0, len(movie_df)): suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i]) # insertion of auto-completion data completion_client = AutoCompleter('ac') for i in range(0, len(movie_df)): completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
class RandomWikipediaImport(object): def __init__(self): self.rs = Client('wikipedia') self.rs.create_index((TextField('title', weight=5.0), TextField('body'))) print(f'>>> Created index') def insert_random_loop(self): i = 1 while True: ra = wikipedia.random() article = wikipedia.page(ra) self.rs.add_document(f'doc{i}', title=article.title, body=article.content) print(f'>>> Inserted {article.title}') i += 1
class TAS_Import(): def __init__(self, index_name, host=ip, port=port, db=db): self.client = Client(index_name, host, port) self.host = host self.port = port #self.redis = Redis() def add_indexing_schema(self, schema): self.client.create_index(schema, False, False, []) return ["Done"] def add_data(self, rdata, company, doc_id, project): for i, rr in enumerate(rdata): index = doc_id + company + "CMDIC" + str(i + 1) + project l1, l2, l3 = rr l1 = config_obj.StringEscape(l1) self.client.add_document(index, DATA=l1, PAGE=l2, BBOX=l3) return ["Done"] def drop_index(self): try: self.client.drop_index() except Exception as e: #print 'Error',e pass def start(self, data, doc_id, company, project): status = 1 index_name = project + "_DOCUMENT_" + str(doc_id) self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('PAGE'), TextField('BBOX') ] status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, company, doc_id, project) status = 5 return [status]
class EventProcessor(): def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error) # self.client.create_index([TextField('title', weight=5.0), TextField('body')]) def get_objects_in_image(self, image): # TODO: call RedisAI module objects = [ "key", "passport", "wallet", "car", "bag", "watch", "book", "satchel", "laptop", "camera", "mobile_phone" ] tags = [] tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) return tags def process(self, msg): print("Going to process message and and store it", msg) # print(float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) # print(type(float(msg["LON"])), type(float(msg["LAT"])), msg["CCTV_ID"]) try: self.r.geoadd("CCTV_LOCATION", float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) msg["TAGS"] = self.get_objects_in_image(msg.get("IMAGE", "")) # print("Going to store this in search", msg) doc_unique_key = msg["CCTV_ID"] + "_" + msg["TS"] self.client.add_document(doc_unique_key, CCTV_ID=doc_unique_key, TAGS=",".join(msg["TAGS"])) except Exception as error: print("Error while adding ccty data", error)
def index(): client = Client('sh') # client.drop_index() client.create_index(txt=1.0) chapters = {} with open('will_play_text.csv') as fp: r = csv.reader(fp, delimiter=';') for line in r: #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."] play, chapter, character, text = line[1], line[2], line[4], line[5] d = chapters.setdefault('{}:{}'.format(play, chapter), {}) d['play'] = play d['text'] = d.get('text', '') + ' ' + text for chapter, doc in chapters.iteritems(): print chapter, doc client.add_document(chapter, nosave=True, txt=doc['text'])
def get(self, request): # data=request.data mes = {} search_key = request.GET.get('key') print(search_key) all_classes = Course.objects.all() print("开始创建索引——————————————————————————") # 创建一个客户端与给定索引名称 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') # 创建索引定义和模式 client.create_index((TextField('title'), TextField('body'))) print('索引创建完毕————————————————————————————————') print('开始添加数据————————————————————————————————') for i in all_classes: print(str(i.id) + str(i.title)) # 索引文 client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info, language='chinese') print(333333333) print('数据添加完毕————————————————————————————————') print(client.info()) # 查找搜索 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') res = client.search(search_key) print('查询结束————————————————————————————————————————————————') id_list = [] print(res.docs) for i in res.docs: # print(i.title) # 取出title,以@切割,取课程ID查询,然后序列化展示 id = i.title.split('@')[1] id_list.append(id) course = Course.objects.filter(id__in=id_list).all() c = CourseSerializersModel(course, many=True) mes['course'] = c.data mes['code'] = 200 mes['message'] = '搜索完毕' return Response(mes)
class SearchDemo: def __init__(self, args): self.index = args.index self.client = Client(self.index, host=args.host, port=args.port) def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('ORDERNUMBER'), NumericField('QUANTITYORDERED', sortable=True), NumericField('PRICEEACH', sortable=True), NumericField('ORDERLINENUMBER'), NumericField('SALES', sortable=True), TextField('ORDERDATE'), TextField('STATUS', sortable=True), NumericField('QTR_ID', sortable=True), NumericField('MONTH_ID', sortable=True), NumericField('YEAR_ID', sortable=True), TextField('PRODUCTLINE', sortable=True), NumericField('MSRP', sortable=True), TextField('PRODUCTCODE', sortable=True), TextField('CUSTOMERNAME', sortable=True), TextField('PHONE'), TextField('ADDRESSLINE1'), TextField('ADDRESSLINE2'), TextField('CITY', sortable=True), TextField('STATE', sortable=True), TextField('POSTALCODE', sortable=True), TextField('COUNTRY', sortable=True), TextField('TERRITORY', sortable=True), TextField('CONTACTLASTNAME'), TextField('CONTACTFIRSTNAME'), TextField('DEALSIZE', sortable=True) ])
class Hub(object): dconn = None # document store connection sconn = None # search index connection qconn = None # queue connection gh = None autocomplete = None repo = None _ts = None _hubkey = 'hub:catalog' _ixname = 'ix' _acname = 'ac' def __init__(self, ghlogin_or_token=None, docs_url=None, search_url=None, queue_url=None, repo=None): timestamp = datetime.utcnow() logger.info('Initializing temporary hub {}'.format(timestamp)) if ghlogin_or_token: self.gh = Github(ghlogin_or_token) elif 'GITHUB_TOKEN' in os.environ: self.gh = Github(os.environ['GITHUB_TOKEN']) else: logger.info('Env var ' 'GITHUB_TOKEN' ' not found') if docs_url: pass elif 'DOCS_REDIS_URL' in os.environ: docs_url = os.environ['DOCS_REDIS_URL'] else: logger.critical('No Redis for document storage... bye bye.') raise RuntimeError('No Redis for document storage... bye bye.') self.dconn = ReJSONClient().from_url(docs_url) if search_url: pass elif 'SEARCH_REDIS_URL' in os.environ: search_url = os.environ['SEARCH_REDIS_URL'] else: search_url = docs_url conn = Redis(connection_pool=ConnectionPool().from_url(search_url)) self.sconn = RediSearchClient(self._ixname, conn=conn) self.autocomplete = AutoCompleter(self._acname, conn=conn) if queue_url: pass elif 'QUEUE_REDIS_URL' in os.environ: queue_url = os.environ['QUEUE_REDIS_URL'] else: queue_url = docs_url self.qconn = StrictRedis.from_url(queue_url) if repo: pass elif 'REDISMODULES_REPO' in os.environ: repo = os.environ['REDISMODULES_REPO'] else: logger.critical('No REDISMODULES_REPO... bye bye.') raise RuntimeError('No REDISMODULES_REPO... bye bye.') self.repo = repo # Check if hub exists if self.dconn.exists(self._hubkey): self._ts = datetime.fromtimestamp( float(self.dconn.jsonget(self._hubkey, Path('.created')))) logger.info('Latching to hub {}'.format(self._ts)) else: self._ts = timestamp logger.info('Creating hub {}'.format(self._ts)) self.createHub() self.addModulesRepo(self.repo) def get_repo_url(self): return 'https://github.com/{}'.format(self.repo) def createHub(self): logger.info('Creating the hub in the database {}'.format(self._ts)) # Store the master modules catalog as an object self.dconn.jsonset( self._hubkey, Path.rootPath(), { 'created': str(_toepoch(self._ts)), 'modules': {}, 'submissions': [], 'submit_enabled': False }) # Create a RediSearch index for the modules # TODO: catch errors self.sconn.create_index( (TextField('name', sortable=True), TextField('description'), NumericField('stargazers_count', sortable=True), NumericField('forks_count', sortable=True), NumericField('last_modified', sortable=True)), stopwords=stopwords) def deleteHub(self): # TODO pass def addModule(self, mod): logger.info('Adding module to hub {}'.format(mod['name'])) # Store the module object as a document m = RedisModule(self.dconn, self.sconn, self.autocomplete, mod['name']) m.save(mod) # Add a reference to it in the master catalog self.dconn.jsonset( self._hubkey, Path('.modules["{}"]'.format(m.get_id())), { 'id': m.get_id(), 'key': m.get_key(), 'created': str(_toepoch(self._ts)), }) # Schedule a job to refresh repository statistics, starting from now and every hour s = Scheduler(connection=self.qconn) job = s.schedule( scheduled_time=datetime(1970, 1, 1), func=callRedisModuleUpateStats, args=[m.get_id()], interval=60 * 60, # every hour repeat=None, # indefinitely ttl=0, result_ttl=0) return m """ Adds modules to the hub from a local directory TODO: deprecate asap """ def addModulesPath(self, path): logger.info('Loading modules from local path {}'.format(path)) # Iterate module JSON files for filename in os.listdir(path): if filename.endswith(".json"): with open('{}/{}'.format(path, filename)) as fp: mod = json.load(fp) m = self.addModule(mod['name'], mod) """ Adds a modules to the hub from a github repository """ def addModulesRepo(self, name, path='/modules/'): # TODO: check for success q = Queue(connection=self.qconn) q.enqueue(callLoadModulesFromRepo, name, path) def loadModulesFromRepo(self, name, path): logger.info('Loading modules from Github {} {}'.format(name, path)) # TODO: error handling, sometimes not all contents are imported? repo = self.gh.get_repo(name) files = repo.get_dir_contents(path) for f in files: mod = json.loads(f.decoded_content) m = self.addModule(mod) """ Submits a module to the hub """ def submitModule(self, repo_id, **kwargs): logger.info('Module submitted to hub {}'.format(repo_id)) repo_id = repo_id.lower() ts = datetime.utcnow() res = {'id': repo_id, 'status': 'failed'} if not self.dconn.jsonget(self._hubkey, Path('submit_enabled')): res['message'] = 'Module submission is currently disabled' return res # Check if the module is already listed m = RedisModule(self.dconn, self.sconn, self.autocomplete, repo_id) if m.exists: # TODO: return in search results res['message'] = 'Module already listed in the hub' return res # Check if there's an active submission, or if the failure was too recent submission = Submission(self.dconn, repo_id) if submission.exists: status = submission.status if status != 'failed': res['status'] = 'active' res['message'] = 'Active submission found for module' return res else: # TODO: handle failed submissions res['message'] = 'Module already submitted to the hub and had failed, please reset manually for now' return res # Store the new submission submission.save(**kwargs) # Record the submission in the catalog # TODO: find a good use for that, e.g. 5 last submissions self.dconn.jsonarrappend(self._hubkey, Path('.submissions'), { 'id': submission.get_id(), 'created': submission.created, }) # Add a job to process the submission q = Queue(connection=self.qconn) job = q.enqueue(callProcessSubmission, submission.get_id()) if job is None: res['message'] = 'Submission job could not be created' # TODO: design retry path logger.error( 'Could not create submission processing job for {}'.format( submission.get_id())) else: res['status'] = 'queued' submission.status = res['status'] submission.job = job.id return res def viewSubmissionStatus(self, repo_id): submission = Submission(self.dconn, repo_id) if submission.exists: res = { 'id': submission.get_id(), 'status': submission.status, 'message': submission.message, } if 'finished' == res['status']: res['pull_number'] = submission.pull_number res['pull_url'] = submission.pull_url return res def processSubmission(self, repo_id): logger.info('Processing submision for {}'.format(repo_id)) submission = Submission(self.dconn, repo_id) if submission.exists: return submission.process(self.gh, self.repo) def viewModules(self, query=None, sort=None): if not query: # Use a purely negative query to get all modules query = '-etaoinshrdlu' q = Query(query).no_content().paging(0, 1000) if sort: if sort == 'relevance': pass elif sort == 'update': q.sort_by('last_modified') elif sort == 'stars': q.sort_by('stargazers_count', asc=False) elif sort == 'forks': q.sort_by('forks_count', asc=False) elif sort == 'name': q.sort_by('name') results = self.sconn.search(q) mods = [] fetch_duration = 0 # TODO: this should be pipelined for doc in results.docs: m = RedisModule(self.dconn, self.sconn, self.autocomplete, doc.id) res, duration = _durationms(m.to_dict) mods.append(res) fetch_duration += duration return { 'results': results.total, 'search_duration': '{:.3f}'.format(results.duration), 'fetch_duration': '{:.3f}'.format(fetch_duration), 'total_duration': '{:.3f}'.format(fetch_duration + results.duration), 'modules': mods, } def viewSearchSuggestions(self, prefix): suggestions = self.autocomplete.get_suggestions(prefix) return [s.string for s in suggestions]
#-------------------------------------------- # Import the whole dirctory to redisearch # Create the index and the documents # Change the the dirt to your document's path #-------------------------------------------- import os from redisearch import Client, Query, TextField dirt = "/path/to/the/documents/" # Change it to your own path client = Client("BoxGroup", port=6379) # 6379 as default client.create_index([TextField('title'), TextField('body')]) filelist = os.listdir(dirt) filelist = sorted(filelist) try: filelist.remove(".git") except: print("git目录不存在,已跳过") filecounter = 0 for filename in filelist: openfilename = dirt + filename with open(openfilename, "r+") as f: data = f.read() try: client.add_document(filecounter, title=filename, body=data, language="chinese") except:
import pandas as pd import json from tqdm import tqdm from redisearch import Client, TextField, NumericField, Query from time import sleep from rediscluster import StrictRedisCluster sleep(15) nodes = [{'host': "173.17.0.2", 'port': "7000"}] rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True) client=Client('week1', conn=rc) client.create_index([TextField('name'), TextField('surname'), TextField('job')]) dat = pd.read_csv("test.csv") for idx, row in tqdm(dat.iterrows()): client.add_document(f"{row['index']}", replace=True, partial=True, name = f"{row['name']}", surname = f"{row['surname']}", job = f"{row['job']}")
import hashlib import gpxpy import gpxpy.gpx from redisearch import Client, Query, TextField, GeoField, NumericField client = Client( 'attractions', host='127.0.0.1', password='', port=6379 ) client.create_index([ TextField('title', weight=5.0), TextField('description'), NumericField('verified', sortable=True), GeoField('geo'), ]) gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8') gpx = gpxpy.parse(gpx_file) for waypoint in gpx.waypoints: if "Verified" in waypoint.comment: v = 1 else: v = 0 t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude) client.add_document(
class UserCache: def __init__(self): self.client = Client("api_user_index", app.config["REDIS_HOST"], app.config["REDIS_PORT"]) def create_user_index(self, users): """ Creates a new user index if not exists :param users: :return: """ definition = IndexDefinition(prefix=['doc:', 'user:']) try: self.client.create_index( (TextField("first_name"), TextField("last_name"), TextField("email"), NumericField("age"), NumericField("is_employee"), NumericField("user_id", sortable=True)), definition=definition) except redis.exceptions.ResponseError: return False indexer = self.client.batch_indexer(chunk_size=len(users)) for user in users: fields = { "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), } indexer.add_document(f"doc:{user.id}", **fields) indexer.commit() return True def cache_single_user(self, user): """ Caches a single user :param user: :return: """ self.client.redis.hset( f"doc:{user.id}", mapping={ "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), }) return True def search(self, filters, page, per_page): """ Searches through redis :return: """ q = Query(self.build_query(filters)).paging( (page - 1) * per_page, per_page).sort_by("user_id") return self.client.search(q) def build_query(self, filters): query = [] age = "+@age:[minAge maxAge]" for filter_name, value in filters.items(): # Ugly non-solid way if value is not None: if filter_name == "firstName" and len(value) > 1: query.append(f"+@first_name:{value}*") if filter_name == "lastName" and len(value) > 1: query.append(f"+@last_name:{value}*") if filter_name == "email" and len(value) > 1: query.append(f"+@email:{value}*") if filter_name == "minAge": age = age.replace("minAge", str(value)) if filter_name == "maxAge": age = age.replace("maxAge", str(value)) if filter_name == "isEmployee": query.append(f"+@is_employee:{int(value)}") age = age.replace("minAge", "0") age = age.replace("maxAge", "100") query.append(age) return " ".join(query)
# Creating a client with a given index name client = Client("cveIndex") try: client.info() except Exception as e: if e.args[0] != "Unknown Index name": print("You must be running a redis server with the redisearch module installed") exit() # IndexDefinition is avaliable for RediSearch 2.0+ definition = IndexDefinition(prefix=['cve:']) # Creating the index definition and schema try: client.create_index((TextField("id"), TextField("description"), TextField("configurations")), definition=definition) except: # Index already exists. Delete and recreate client.drop_index() print("Index already exists. Dropping. Delete keys and try again.") exit() def process_CVE_file(file): with open(file, 'r', encoding="utf8") as f: json = ujson.decode(f.read()) cve_items = json['CVE_Items'] for cve_item in cve_items: cve_id = cve_item['cve']['CVE_data_meta']['ID'] cve_desc = cve_item['cve']['description']['description_data'][0]['value'] cve_configurations = str(cve_item['configurations']['nodes']) # Sanitizing special characters to prevent them from being tokenized away
def open_redis(): if not os.path.isdir('./nvd_data_feeds/'): os.mkdir('./nvd_data_feeds/') print('Creating the docker container with redislabs/redisearch\n') Popen([ 'docker', 'run', '--rm', '--name', 'amadeus', '-p', '6379:6379', 'redislabs/redisearch:latest' ]) sleep(6) urls = [ 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2021.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2020.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2019.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2018.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2017.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2016.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2015.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2014.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2013.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2012.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2011.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2010.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2009.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2008.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2007.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2006.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2005.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2004.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2003.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-2002.json.zip', 'https://nvd.nist.gov/feeds/json/cpematch/1.0/nvdcpematch-1.0.json.zip' ] print('\nDownloading and unziping json feeds') if not os.path.isdir('./downloads/'): os.mkdir('./downloads/') tam = len(urls) dl = 0 for url in urls: name = url.split('/')[-1] response = get(url) open('./downloads/' + name, 'wb').write(response.content) with ZipFile('./downloads/' + name, 'r') as zip_ref: zip_ref.extractall('./nvd_data_feeds/') dl += 1 prog = dl / tam done = int(50 * prog) stdout.write('\r[%s%s%s]%s' % ('Progres > ', '=' * (done - 1) + '>', ' ' * (50 - done), str(round(prog * 100)) + '%')) rmtree('./downloads/') print('\n') print('Start processing CVE feeds') # Create a normal redis connection conn = redis.Redis('localhost') # Creating a client with a given index name client = Client('cveIndex') # IndexDefinition is avaliable for RediSearch 2.0+ definition = IndexDefinition(prefix=['cve:']) # Creating the index definition and schema try: client.create_index((TextField('id'), TextField('description'), TextField('configurations')), definition=definition) except: # Index already exists. Delete and recreate client.drop_index() print('Index already exists\nDropping\nDelete keys and try again') exit() def process_CVE_file(file): with open(file, 'r', encoding='utf8') as f: json = ujson.decode(f.read()) cve_items = json['CVE_Items'] for cve_item in cve_items: cve_id = cve_item['cve']['CVE_data_meta']['ID'] cve_desc = cve_item['cve']['description']['description_data'][ 0]['value'] cve_configurations = str(cve_item['configurations']['nodes']) # Sanitizing special characters to prevent them from being tokenized away cve_desc_sanitized = cve_desc.replace(':', 'cc11').replace( '.', 'pp22').replace('*', 'ss33') cve_configurations_sanitized = cve_configurations.replace( ':', 'cc11').replace('.', 'pp22').replace('*', 'ss33') # Indexing a document for RediSearch 2.0+ client.redis.hset('cve:' + cve_id, mapping={ 'id': cve_id, 'description': cve_desc_sanitized, 'configurations': cve_configurations_sanitized }) print('Processed ' + file) with ThreadPoolExecutor(max_workers=20) as pool: futures = [] for i in range(2002, 2021): future = pool.submit( process_CVE_file, './nvd_data_feeds/nvdcve-1.1-{0}.json'.format(i)) futures.append(future) json_list = [x.result() for x in as_completed(futures)] print('Done processing CVE feeds\nProcessing NVD CPE match feed') with open('./nvd_data_feeds/nvdcpematch-1.0.json', 'r', encoding='utf8') as f: json = ujson.decode(f.read()) matches = json['matches'] for match in matches: rootUri = match['cpe23Uri'] keyName = rootUri if 'versionStartIncluding' in match: keyName += ';;versionStartIncluding=' + match[ 'versionStartIncluding'] if 'versionStartExcluding' in match: keyName += ';;versionStartExcluding=' + match[ 'versionStartExcluding'] if 'versionEndIncluding' in match: keyName += ';;versionEndIncluding=' + match[ 'versionEndIncluding'] if 'versionEndExcluding' in match: keyName += ';;versionEndExcluding=' + match[ 'versionEndExcluding'] if len(match['cpe_name']) > 0: # if CPE list is empty no need to include it in cache valueString = ';;'.join(x['cpe23Uri'] for x in match['cpe_name']) conn.set(keyName, valueString) print('\nAMADEUS is already launched!')
import re from datetime import datetime import json import logging from random import randint from time import sleep #stagger reading and indexing for parallel sleep(randint(1, 10)) logging.basicConfig(filename='parse.log',level=logging.INFO) client = Client('medline') try: client.create_index([TextField('abstract')]) except ResponseError: pass with open(sys.argv[1], 'r') as f: data=f.read() recs = data.split("<PubmedArticle>"); recs = recs[1:] indexer = client.batch_indexer(chunk_size=500) count = 0 for r in recs:
from redisearch import Client, TextField # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.create_index((TextField('title', weight=5.0), TextField('body'))) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch impements a search engine on top of redis')
import time from redisearch import Client, TextField, NumericField, Query from redis.exceptions import ResponseError file = open('test_set_tweets.txt', 'r') client = Client('Tweets') client.redis.flushdb() client.create_index([TextField('tweet'), TextField('timestamp')]) start = time.time() for x, line in enumerate(file.readlines()): content = line.strip().split('\t') try: if len(content) == 4: # tem data client.add_document('-'.join(content[:2]), tweet=content[-2], timestamp=content[-1]) else: client.add_document('-'.join(content[:2]), tweet=content[-1], timestamp='') except ResponseError: pass if x % 1000 == 0: print(x, 'lines indexed...') end = time.time() print("Indexing time elapsed", end - start) total = 0 for i in range(30): start = time.time()
class BaseSearchHandler(BaseSearchHandlerSupport): def __init__(self): super().__init__() self._entity = None # This will only be here as an example self.is_replacement = False self.current_replacement = None self.is_set_entity = False # Subs are all of the subfields we would need to search through self.subs: Dict[str, BaseSearchHandler] = {} self.current_doc_id = None self.current_doc_id_list = set() self.current_client = None self.print_sub = False self.use_sub_query = False self._super_ids = [] self._sub_ids = [] self.finished_alter = False self.search_sub = False self._processor: Optional[Processor] = None def __setitem__(self, key: str, value: Any): if key not in self.requirements.keys() and (not self.is_replacement): return self.is_set_entity = False self.current_client = None if isinstance(value, dict): if len(value) == 0: return self.handle_input_dict_key(key, value) else: _instance_type = type(value) # check that the value is the right type if is_generic(_instance_type): _str_type = to_str(_instance_type) self.query_builder.insert_by_type_str(_str_type, key, value) self.insert_builder.insert_by_type_str(_str_type, key, value) @property def replacement(self): if self.current_replacement is None: self.current_replacement = BaseSearchHandler() self.current_replacement.is_replacement = True self.current_replacement.insert_builder.is_replacement = True # self.current_replacement. return self.current_replacement @property def entity(self): if self._entity is None: raise AttributeError("You haven't set entity yet") return self._entity @entity.setter def entity(self, _entity: str): self._entity = _entity @property def processor(self): if self._processor is None: raise AttributeError("The processor hasn't been set yet.") return self._processor @processor.setter def processor(self, _processor: Processor): self._processor = _processor self.set_sub_processors() @property def requirements(self): return self._requirements_str @requirements.setter def requirements(self, _requirements: dict): """If we set it here we'd go through each dict item and create string version of each key""" # Document id will allow us to figure out which documents are involved with subkeys _requirements["entity"] = str _requirements["super_id"] = str self.process_requirements(_requirements) if not self.is_sub_key: self.create_sub_handlers() @property def dreq(self): return self._dreq @dreq.setter def dreq(self, _req): self._dreq = _req self.reset() self.requirements = _req self.replacement.requirements = _req @property def allrequirements(self): return self._dreq @allrequirements.setter def allrequirements(self, _req): self._dreq = _req self.reset() self.requirements = _req self.replacement.requirements = _req @property def doc_id(self): """ We get the current doc_id if it exists""" return self.current_doc_id @doc_id.setter def doc_id(self, _doc_id: str): self.current_doc_id = _doc_id @property def verbatim(self): return self.query_builder.build_exact() @property def client(self): """client Get the client for the user. If it doesn't exist yet, create a new one with the given stop words. For subkeys it adds fields if we've created them recently. .. code-block:: python >>> self.client.add_document(_id, payload, **records) Returns ------- [type] A redis connected client. Gets connection from Jamboree processor. """ # self.processor if self.current_client is None: # We would insert a connection here. Use the connection from the search processor to operate. with logger.catch(ResponseError): self.current_client = Client(self.index, conn=self.processor.rconn) if len(self.indexable) > 0: self.current_client.create_index( self.indexable, stopwords=[ "but", "there", "these", "they", "this", "to" ], ) if self.is_sub_key: if not self.finished_alter: for i in self.indexable: with suppress(ResponseError): self.current_client.alter_schema_add([i]) self.finished_alter = True return self.current_client @property def general(self): return self.query_builder.general @general.setter def general(self, term: str): """ Push a general term into the query. It can only be done once. Don't put it to a filter key.""" if not isinstance(term, str): logger.error("Term isn't a string") return self.query_builder.general = term """ This is when things get weird """ def create_sub_handlers(self): """ Creates subhandlers for the given index""" for name, subkey in self.subfields.items(): subhandler = BaseSearchHandler() subhandler.is_sub_key = True subhandler.index = subkey subhandler.insert_builder.is_sub = True self.replacement.subs[name] = copy(subhandler) self.subs[name] = subhandler def set_sub_processors(self): """ If there are any sub queries, set processors to them """ if len(self.subfields) > 0: self.use_sub_query = True for name in self.subfields.keys(): self.subs[name].processor = self.processor with suppress(Exception): self.replacement.subs[name].processor = self.processor def set_entity(self): if self.is_set_entity is False: self["entity"] = { "type": "TEXT", "is_filter": True, "values": { "is_exact": True, "term": self.entity }, } self.is_set_entity = True def verbatim_docs(self): built = self.query_builder.build_exact() q = Query(built).no_stopwords().paging(0, 1000000) results = self.client.search(q) result_docs = results.docs return result_docs def general_docs(self): built = self.query_builder.build() q = Query(built).paging(0, 1000000) results = self.client.search(q) result_docs = results.docs return result_docs def verbatim_sub_ids(self): super_id_set = set() sub_id_set = set() for key, sub in self.subs.items(): sub.print_sub = True built = sub.query_builder.build() # logger.warning(built) built = built.strip(" ") is_falsy = not built if is_falsy: continue # logger.error(built) verb_items = sub.general_docs() current_super_ids = [] current_subs = [] for verb in verb_items: try: _verb_id = verb.id _super_id = verb.super_id full_dict = verb.__dict__ self.keystore.add(_super_id, key, full_dict) current_subs.append(_verb_id) current_super_ids.append(_super_id) except Exception as e: logger.error(str(e)) if len(current_super_ids) > 0: if len(super_id_set) == 0: super_id_set.update(current_super_ids) else: super_id_set = super_id_set.intersection(current_super_ids) sub_id_set.update(current_subs) return list(super_id_set), list(sub_id_set) def verbatim_doc_ids(self): q = Query(self.verbatim).no_content().paging(0, 1000000) results = self.client.search(q) ids = [res.id for res in results.docs] return ids def handle_input_dict_key(self, name: str, item: dict): """ Figures out where to put the input dictionary for the query """ if self.is_sub(name): # If this is a subkey we'll run the same operation again # Check to see if the subkey is empty and has information that is reducible to "type" self.use_sub_query = True self.search_sub = True reqs = self.loaded_dict_to_requirements(item) # logger.debug(reqs) self.subs[name].requirements = reqs for k, v in item.items(): self.subs[name][k] = v else: # If it's not queryable don't try adding anything if not is_queryable_dict(item): return self.insert_builder.from_dict(name, item) self.query_builder.from_dict(name, item) def normal_find(self, limit_ids=None): built = self.query_builder.build() q = Query(built).paging(0, 1000000) if limit_ids is not None and len(limit_ids) > 0: q.limit_ids(*limit_ids) results = self.client.search(q) result_docs = results.docs return result_docs def normal_find_ids(self, limit_ids=None): _query = self.query_builder.build() q = Query(_query).no_content().paging(0, 1000000) if limit_ids is not None and len(limit_ids) > 0: q.limit_ids(*limit_ids) results = self.client.search(q) result_docs = results.docs return [res.id for res in result_docs] def sub_find(self): sup_ids, sub_ids = self.verbatim_sub_ids() if len(sub_ids) == 0: return [] results = self.normal_find(limit_ids=sup_ids) results_dicts = [] for result in results: _id, idict = split_doc(result) idict.pop("payload", None) subitems = self.keystore.get(_id) idict.update(subitems) results_dicts.append(idict) return results_dicts def normal_insert(self, allow_duplicates=False): if allow_duplicates == False: verbatim_docs = self.verbatim_docs() if len(verbatim_docs) > 0 and allow_duplicates == False: # Not adding docs because we're not allowing duplicates return verbatim_docs[0].id, False insert_variables = self.insert_builder.build() _doc_id = self.insert_builder.doc_id index_name = self.client.index_name fields = [i.redis_args()[0] for i in self.indexable] with logger.catch(message=f"{index_name} - {fields}", reraise=True): self.client.add_document(_doc_id, payload=_doc_id, **insert_variables) return _doc_id, True def sub_insert(self, allow_duplicates=False): _super_id, _did_insert = self.normal_insert( allow_duplicates=allow_duplicates) # logger.info(f'Did insert: {_did_insert}') if _did_insert: for key, sub in self.subs.items(): if len(sub.insert_builder._insert_dict) > 0: sub.insert_builder.super_id = _super_id sub.normal_insert(allow_duplicates=True) return _super_id def find_sub_dictionaries(self, super_id): """ Finds a subdictionary by superid inside of the database. """ # Should use the find within function for every subkey mega_dict = ADict() for key, sub in self.subs.items(): key_dict = ADict() try: res = sub.client.search(f'"{super_id}"') if res.total == 0: continue dd = [dictify(doc, False) for doc in res.docs] key_dict[key] = dd[0] except ResponseError: pass mega_dict.update(key_dict) return mega_dict def find(self): """Given the items we've set, find all matching items""" self.set_entity() self.keystore.reset() if self.use_sub_query and self.search_sub: return self.sub_find() normal = self.normal_find() if len(self.subs) == 0: if len(normal) > 0: return [doc_convert(x) for x in normal] return normal ndicts = [] for i in normal: _i = dictify(i) mega = self.find_sub_dictionaries(_i.id) if len(mega) > 0: _i.update(mega.to_dict()) ndicts.append(_i) return ndicts def pick(self, _id: str): """ Given an id find the element with the top level id. We aren't searching lower level_ids. After we pull all of the """ self.set_entity() self.keystore.reset() doc = self.client.load_document(_id) dd = doc.__dict__ doc = ADict(**dd) _id = doc.pop("id", None) doc.pop("payload", None) doc_z = len(doc) > 0 if len(self.subs) == 0: if not doc_z: return None doc.update({"id": _id}) return doc if doc_z: sub_dicts = self.find_sub_dictionaries(_id) # if len(sub_dicts) > 0: doc.update(sub_dicts) doc.update({"id": _id}) return doc return None def update(self): """ # UPDATE Given the items or ID we've set, partial update every matching document. If we have the document_ids already, replace those items """ self.set_entity() self.keystore.reset() replacement_variables = self.replacement.insert_builder.build() if self.use_sub_query == False: doc_ids = self.verbatim_doc_ids() batcher = self.client.batch_indexer(chunk_size=len(doc_ids)) for doc_id in doc_ids: batcher.add_document(doc_id, replace=True, partial=True, **replacement_variables) batcher.commit() else: sup_ids, sub_ids = self.verbatim_sub_ids() norm_ids = self.normal_find_ids(limit_ids=sup_ids) batcher = self.client.batch_indexer(chunk_size=len(norm_ids)) for doc_id in norm_ids: batcher.add_document(doc_id, replace=True, partial=True, **replacement_variables) batcher.commit() for sub in self.subs.values(): subreplacement = sub.insert_builder.build() if len(subreplacement) > 0: subbatcher = sub.client.batch_indexer( chunk_size=len(sub_ids)) for _id in sub_ids: self.client.add_document(_id, replace=True, partial=True, **subreplacement) subbatcher.commit() def update_id(self, _id): self.set_entity() self.keystore.reset() doc = self.client.load_document(_id) doc_dict, is_exist = single_doc_check_convert(doc) if not is_exist: return replacement_variables = self.replacement.insert_builder.build() self.client.add_document(_id, replace=True, partial=True, **replacement_variables) doc = self.client.load_document(_id) # if len(self.subs) > 0: # subreplacement = sub.insert_builder.build() # def insert_many(self, list_of_items): # self.client.ba # pass def insert(self, allow_duplicates=False): """ # INSERT Given all of the items we've set, add documents """ self.set_entity() self.keystore.reset() previous_id = None if self.use_sub_query: previous_id = self.sub_insert(allow_duplicates=allow_duplicates) else: previous_id, _ = self.normal_insert( allow_duplicates=allow_duplicates) return previous_id def remove(self): """Remove all documents that match a query. Given a query, remove every document that matches the results of that query. :: >>> search['name'] = 'sample_name' >>> search['category'] = 'sample_query' >>> search.remove() """ self.set_entity() self.keystore.reset() if self.use_sub_query and self.search_sub: removable = set() sup_ids, sub_ids = self.verbatim_sub_ids() norm_ids = self.normal_find_ids(limit_ids=sup_ids) removable = removable.intersection(sup_ids) removable = removable.intersection(norm_ids) [self.client.delete_document(_id) for _id in removable] for sub in self.subs.values(): for _id in sub_ids: sub.client.delete_document(_id) else: norm_ids = self.normal_find_ids() [self.client.delete_document(_id) for _id in norm_ids] def reset(self): """Reset all local variables""" self.reset_builders() self.is_set_entity = True self.is_replacement = False self.current_replacement = None self.current_client = None self.use_sub_query = False
def load_data(redis_server, redis_port, redis_password): load_client = Client( 'fortune500-v1', host=redis_server, password=redis_password, port=redis_port ) load_ac = AutoCompleter( 'ac', conn = load_client.redis ) definition = IndexDefinition( prefix=['fortune500:'], language='English', score_field='title', score=0.5 ) load_client.create_index( ( TextField("title", weight=5.0), TextField('website'), TextField('company'), NumericField('employees', sortable=True), TextField('industry', sortable=True), TextField('sector', sortable=True), TextField('hqcity', sortable=True), TextField('hqstate', sortable=True), TextField('ceo'), TextField('ceoTitle'), NumericField('rank', sortable=True), NumericField('assets', sortable=True), NumericField('revenues', sortable=True), NumericField('profits', sortable=True), NumericField('equity', sortable=True), TagField('tags'), TextField('ticker') ), definition=definition) with open('./fortune500.csv', encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count > 0: load_ac.add_suggestions(Suggestion(row[1].replace('"', ''), 1.0)) load_client.redis.hset( "fortune500:%s" %(row[1].replace(" ", '')), mapping = { 'title': row[1], 'company': row[1], 'rank': row[0], 'website': row[2], 'employees': row[3], 'sector': row[4], 'tags': ",".join(row[4].replace('&', '').replace(',', '').replace(' ', ' ').split()).lower(), 'industry': row[5], 'hqcity': row[8], 'hqstate': row[9], 'ceo': row[12], 'ceoTitle': row[13], 'ticker': row[15], 'revenues': row[17], 'profits': row[19], 'assets': row[21], 'equity': row[22] }) line_count += 1 # Finally Create the alias load_client.aliasadd("fortune500")