def test(): # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print res.total # "1" print res.docs[0] # Searching with snippets # res = client.search("search engine", snippet_sizes={'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
def start(self, data, index_name): status = 1 self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('SECTION_TYPE'), TextField('DOCID'), TextField('PAGE'), TextField('GRIDID'), TextField("ROWCOL"), TextField('BBOX'), TextField("PAGE_GRID_SE"), TextField('Rowspan'), TextField('Colspan') ] #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')]) status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, index_name) status = 5 return [status]
def create_product_search_index_async(): print 'Creating Search Index' client = Client('productIndex') client.create_index([ TextField('title', weight=5.0), TextField('description'), TextField('tags'), TextField('category') ]) products = Product.objects.filter(active=True) cache.set('Search_index_total', len(products), timeout=None) index = 0 for product in products: title = product.name description = product.description category = ','.join([cat.name for cat in product.category.all()]) tag = product.tags tag_maps = ProductTagMap.objects.filter(product=product) for tag_map in tag_maps: tag = tag + tag_map.tag.tag + ' ' category_varients = [] for pv in ProductVarientList.objects.filter(product=product): for cv in pv.key.all(): category_varients.append(cv.value) tag += ' '.join(list(set(category_varients))) client.add_document(str(product.id), title=title, description=description, tags=tag, category=category) cache.set('Search_index_index', index, timeout=None) index += 1 return True
def cache_to_redis(data: dict): if REDIS_HOSTNAME == '': print('REDIS_HOSTNAME environment variable is not set') return client = Client('games', host=REDIS_HOSTNAME, port=REDIS_PORT) indexCreated = False maxAltNames = len(max(data.values(), key=lambda d: len(d['alt_names']))['alt_names']) while not indexCreated: try: client.create_index([TextField('name', weight=10), *[TextField('alt_name_%d' % i, weight=10) for i in range(maxAltNames)], TextField('summary', weight=1)], TextField('cover', weight=0), TextField('thumb', weight=0)) indexCreated = True except Exception: print('Failed to create index, retrying %s') time.sleep(3) for k, v in data.items(): client.add_document(k, name=v['name'], **{'alt_name_%d' % i: n for i, n in enumerate(v['alt_names'])}, cover=v['cover'], thumb=v['thumb'], summary=v['summary']) print('done')
def test_search(self): pool = RedisPool(urls=("localhost", 6379)) search = pool.search("def_index") self.assertIsNotNone(search) # IndexDefinition is available for RediSearch 2.0+ definition = IndexDefinition(prefix=['doc:', 'article:']) # Creating the index definition and schema search.create_index( (TextField("title", weight=5.0), TextField("body")), definition=definition) search.hset('doc:1', mapping={ 'title': 'RediSearch', 'body': 'Redisearch impements a search engine on top of redis' }) # Simple search res = search.search("search engine") # the result has the total number of results, and a list of documents self.assertEqual(res.total, 1) self.assertEqual(res.docs[0].title, "RediSearch")
def to_search_field(self): if self.type == 'int': kwargs = { "name": self.name, "sortable": self.sortable, "no_index": self.no_index } return NumericField(**kwargs) if self.type == 'str' or self.type == 'map': kwargs = { "name": self.name, "weight": self.weight, "sortable": self.sortable, "no_stem": self.no_stem, "no_index": self.no_index, "phonetic_matcher": self.phonetic_matcher } return TextField(**kwargs) else: kwargs = { "name": self.name, "weight": self.weight, "sortable": self.sortable, "no_stem": self.no_stem, "no_index": self.no_index, "phonetic_matcher": self.phonetic_matcher } return TextField(**kwargs)
def build_ipa_index(): start_time = time.time() rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', '')) rs_client = Client('IPAIndex', conn=rc) print( 'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt' ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str) print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt' ipa_index_ou = pd.read_csv(ipa_index_ou_url, sep='\t', na_values=['da_indicare', '*****@*****.**'], dtype=str) ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou'] == 'Ufficio_Transizione_Digitale'] try: rs_client.drop_index() except: pass # Index already dropped rs_client.create_index([ TextField('ipa_code', weight=2.0), TextField('name', weight=2.0, sortable=True), TextField('site'), TextField('pec'), TextField('city', weight=1.4), TextField('county'), TextField('region'), TagField('type'), TextField('rtd_name'), TextField('rtd_pec'), TextField('rtd_mail'), ]) print('Created index `IPAIndex`', flush=True) print('Feeding `IPAIndex` with data from `amministrazioni.txt`', flush=True) for index, row in ipa_index_amm.iterrows(): rs_client.add_document(row['cod_amm'], language='italian', replace=True, **get_ipa_amm_item(row)) print('Feeding `IPAIndex` with data from `ou.txt`', flush=True) for index, row in ipa_index_ou.iterrows(): rs_client.add_document(row['cod_amm'], partial=True, **get_ipa_rtd_item(row)) finish_time = time.time() print('`IPAIndex` build completed in {0} seconds'.format( round(finish_time - start_time, 2)), flush=True)
def __init__(self, client): self.client = client self.file_path = "./index-data/area.csv" self.fields = ( NumericField("index"), NumericField("areaId"), TextField("areaTitle"), TextField("areaBody"), )
def to_field(k, v): if v == "BOOL": return TextField(k, sortable=True) elif v == "NUMERIC": return NumericField(k, sortable=True) elif v == "TEXT": return TextField(k) elif v == "TAG": return TagField(k) else: return GeoField(k)
def import_brewery_geo(r, rsclient): # create the brewery redisearch index ftidxfields = [ TextField('name', weight=5.0), TextField('address'), TextField('city'), TextField('state'), TextField('country'), NumericField('id', sortable=True), GeoField('location') ] rsclient.create_index([*ftidxfields]) with open(brewerygeofile) as geofile: geo = csv.reader(geofile) for row in geo: if geo.line_num == 1: # skip the header line continue # use the brewery id to generate the brewery key added earlier brewery_key = "{}:{}".format(brewery, row[1]) # get all the data from the brewery hash binfo = r.hgetall(brewery_key) if not (any(binfo)): print ("\tERROR: Missing info for {}, skipping geo import".format(brewery_key)) continue # add the brewery document to the index ftaddfields = { 'name': binfo[b'name'].decode(), 'address': binfo[b'address1'].decode(), 'city': binfo[b'city'].decode(), 'state': binfo[b'state'].decode(), 'country': binfo[b'country'].decode(), 'id': row[1], 'location': "{},{}".format(row[3], row[2]) } try: rsclient.add_document( "brewery:{}".format(row[1]), score=1.0, replace=True, partial=True, **ftaddfields ) except Exception as e: print ("\tERROR: Failed to add document for {}: {}".format(brewery_key, e)) continue
def create_website_items_index(): "Creates Index Definition." # CREATE index client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache()) # DROP if already exists try: client.drop_index() except Exception: pass idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)]) # Based on e-commerce settings idx_fields = frappe.db.get_single_value('E Commerce Settings', 'search_index_fields') idx_fields = idx_fields.split(',') if idx_fields else [] if 'web_item_name' in idx_fields: idx_fields.remove('web_item_name') idx_fields = list(map(to_search_field, idx_fields)) client.create_index( [TextField("web_item_name", sortable=True)] + idx_fields, definition=idx_def, ) reindex_all_web_items() define_autocomplete_dictionary()
def create_website_items_index(): "Creates Index Definition." # CREATE index client = Client(make_key(WEBSITE_ITEM_INDEX), conn=frappe.cache()) try: client.drop_index() # drop if already exists except ResponseError: # will most likely raise a ResponseError if index does not exist # ignore and create index pass except Exception: raise_redisearch_error() idx_def = IndexDefinition([make_key(WEBSITE_ITEM_KEY_PREFIX)]) # Index fields mentioned in e-commerce settings idx_fields = frappe.db.get_single_value("E Commerce Settings", "search_index_fields") idx_fields = idx_fields.split(",") if idx_fields else [] if "web_item_name" in idx_fields: idx_fields.remove("web_item_name") idx_fields = list(map(to_search_field, idx_fields)) client.create_index( [TextField("web_item_name", sortable=True)] + idx_fields, definition=idx_def, ) reindex_all_web_items() define_autocomplete_dictionary()
def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error)
def __init__(self, hostname: str) -> None: self._idx_name = "movies" super().__init__(hostname, self._idx_name) try: info_exists = self._client.info() if info_exists: self._client.drop_index() except: pass definition = IndexDefinition(prefix=[f'{self._idx_name}:']) self._client.create_index(( TextField('Title'), TextField('Plot'), TextField('imdbID'), ), definition=definition)
def start(self, data, doc_id, company, project): status = 1 index_name = project + "_DOCUMENT_" + str(doc_id) self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('PAGE'), TextField('BBOX') ] status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, company, doc_id, project) status = 5 return [status]
def createHub(self): logger.info('Creating the hub in the database {}'.format(self._ts)) # Store the master modules catalog as an object self.dconn.jsonset( self._hubkey, Path.rootPath(), { 'created': str(_toepoch(self._ts)), 'modules': {}, 'submissions': [], 'submit_enabled': False }) # Create a RediSearch index for the modules # TODO: catch errors self.sconn.create_index( (TextField('name', sortable=True), TextField('description'), NumericField('stargazers_count', sortable=True), NumericField('forks_count', sortable=True), NumericField('last_modified', sortable=True)), stopwords=stopwords)
def create_index_definition(self, drop_existing=False): """ Create an index definition. Do nothing if it already exists. """ if drop_existing: self.client.drop_index() definition = IndexDefinition(prefix=[self.keys.pre("resource:")]) try: self.client.create_index([ TextField('body', weight=1), TextField('repo_name', weight=1.5), TextField('language', weight=1), TextField('lists') ], definition=definition) except ResponseError: print("Index already exists.")
def build_index(self, line_doc_path, n_docs): line_pool = LineDocPool(line_doc_path) try: self.client.drop_index() except: pass self.client.create_index([TextField('title'), TextField('url'), TextField('body')]) for i, d in enumerate(line_pool.doc_iterator()): self.client.add_document(i, nosave = True, title = d['doctitle'], url = d['url'], body = d['body']) if i + 1 == n_docs: break if i % 1000 == 0: print "{}/{} building index".format(i, n_docs)
def get(self, request): # data=request.data mes = {} search_key = request.GET.get('key') print(search_key) all_classes = Course.objects.all() print("开始创建索引——————————————————————————") # 创建一个客户端与给定索引名称 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') # 创建索引定义和模式 client.create_index((TextField('title'), TextField('body'))) print('索引创建完毕————————————————————————————————') print('开始添加数据————————————————————————————————') for i in all_classes: print(str(i.id) + str(i.title)) # 索引文 client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info, language='chinese') print(333333333) print('数据添加完毕————————————————————————————————') print(client.info()) # 查找搜索 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') res = client.search(search_key) print('查询结束————————————————————————————————————————————————') id_list = [] print(res.docs) for i in res.docs: # print(i.title) # 取出title,以@切割,取课程ID查询,然后序列化展示 id = i.title.split('@')[1] id_list.append(id) course = Course.objects.filter(id__in=id_list).all() c = CourseSerializersModel(course, many=True) mes['course'] = c.data mes['code'] = 200 mes['message'] = '搜索完毕' return Response(mes)
def create_user_index(self, users): """ Creates a new user index if not exists :param users: :return: """ definition = IndexDefinition(prefix=['doc:', 'user:']) try: self.client.create_index( (TextField("first_name"), TextField("last_name"), TextField("email"), NumericField("age"), NumericField("is_employee"), NumericField("user_id", sortable=True)), definition=definition) except redis.exceptions.ResponseError: return False indexer = self.client.batch_indexer(chunk_size=len(users)) for user in users: fields = { "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), } indexer.add_document(f"doc:{user.id}", **fields) indexer.commit() return True
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
def insert(): # insertion of search/suggestion data suggestion_client = Client('movie') suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')]) for i in range(0, len(movie_df)): suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i]) # insertion of auto-completion data completion_client = AutoCompleter('ac') for i in range(0, len(movie_df)): completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
def create_index(self): client = self.get_indexed_client() definition = IndexDefinition(prefix=['stock:']) index_fields = [ TextField("SC_NAME"), ] for index in self.numeric_indexes: index_fields.append(NumericField(index)) try: # FT.CREATE idx:stock ON HASH PREFIX 1 stock: SCHEMA SC_NAME TEXT ... client.create_index(index_fields, definition=definition) except redis.exceptions.ResponseError as e: # FT.DROPINDEX idx:stock DD if (str(e) != "Index already exists"): raise e
def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('WORDCOUNT', sortable=True), TextField('BYLINE', no_stem=True, sortable=True), TextField('DOCUMENTTYPE', sortable=True), TextField('HEADLINE', sortable=True), TagField('KEYWORDS', separator=';'), NumericField('MULTIMEDIA', sortable=True), TextField('NEWDESK', sortable=True), NumericField('PRINTPAGE', sortable=True), NumericField('PUBDATE', sortable=True), TextField('SECTIONNAME', sortable=True), TextField('SNIPPET', sortable=True), TextField('TYPEOFMATERIAL', sortable=True), TextField('WEBURL') ])
import hashlib import gpxpy import gpxpy.gpx from redisearch import Client, Query, TextField, GeoField, NumericField client = Client( 'attractions', host='127.0.0.1', password='', port=6379 ) client.create_index([ TextField('title', weight=5.0), TextField('description'), NumericField('verified', sortable=True), GeoField('geo'), ]) gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8') gpx = gpxpy.parse(gpx_file) for waypoint in gpx.waypoints: if "Verified" in waypoint.comment: v = 1 else: v = 0 t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude)
from redisearch import Client, TextField # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.create_index((TextField('title', weight=5.0), TextField('body'))) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch impements a search engine on top of redis')
import re from datetime import datetime import json import logging from random import randint from time import sleep #stagger reading and indexing for parallel sleep(randint(1, 10)) logging.basicConfig(filename='parse.log',level=logging.INFO) client = Client('medline') try: client.create_index([TextField('abstract')]) except ResponseError: pass with open(sys.argv[1], 'r') as f: data=f.read() recs = data.split("<PubmedArticle>"); recs = recs[1:] indexer = client.batch_indexer(chunk_size=500) count = 0 for r in recs:
import pandas as pd import json from tqdm import tqdm from redisearch import Client, TextField, NumericField, Query from time import sleep from rediscluster import StrictRedisCluster sleep(15) nodes = [{'host': "173.17.0.2", 'port': "7000"}] rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True) client=Client('week1', conn=rc) client.create_index([TextField('name'), TextField('surname'), TextField('job')]) dat = pd.read_csv("test.csv") for idx, row in tqdm(dat.iterrows()): client.add_document(f"{row['index']}", replace=True, partial=True, name = f"{row['name']}", surname = f"{row['surname']}", job = f"{row['job']}")
#-------------------------------------------- # Import the whole dirctory to redisearch # Create the index and the documents # Change the the dirt to your document's path #-------------------------------------------- import os from redisearch import Client, Query, TextField dirt = "/path/to/the/documents/" # Change it to your own path client = Client("BoxGroup", port=6379) # 6379 as default client.create_index([TextField('title'), TextField('body')]) filelist = os.listdir(dirt) filelist = sorted(filelist) try: filelist.remove(".git") except: print("git目录不存在,已跳过") filecounter = 0 for filename in filelist: openfilename = dirt + filename with open(openfilename, "r+") as f: data = f.read() try: client.add_document(filecounter, title=filename, body=data, language="chinese") except:
def to_search_field(field): if field == "tags": return TagField("tags", separator=",") return TextField(field)