def create_product_search_index_async(): print 'Creating Search Index' client = Client('productIndex') client.create_index([ TextField('title', weight=5.0), TextField('description'), TextField('tags'), TextField('category') ]) products = Product.objects.filter(active=True) cache.set('Search_index_total', len(products), timeout=None) index = 0 for product in products: title = product.name description = product.description category = ','.join([cat.name for cat in product.category.all()]) tag = product.tags tag_maps = ProductTagMap.objects.filter(product=product) for tag_map in tag_maps: tag = tag + tag_map.tag.tag + ' ' category_varients = [] for pv in ProductVarientList.objects.filter(product=product): for cv in pv.key.all(): category_varients.append(cv.value) tag += ' '.join(list(set(category_varients))) client.add_document(str(product.id), title=title, description=description, tags=tag, category=category) cache.set('Search_index_index', index, timeout=None) index += 1 return True
class RediSearchClient(object): def __init__(self, index_name): self.client = Client(index_name) self.index_name = index_name def build_index(self, line_doc_path, n_docs): line_pool = LineDocPool(line_doc_path) try: self.client.drop_index() except: pass self.client.create_index([TextField('title'), TextField('url'), TextField('body')]) for i, d in enumerate(line_pool.doc_iterator()): self.client.add_document(i, nosave = True, title = d['doctitle'], url = d['url'], body = d['body']) if i + 1 == n_docs: break if i % 1000 == 0: print "{}/{} building index".format(i, n_docs) def search(self, query): q = Query(query).paging(0, 5).verbatim() res = self.client.search(q) # print res.total # "1" return res
def cache_to_redis(data: dict): if REDIS_HOSTNAME == '': print('REDIS_HOSTNAME environment variable is not set') return client = Client('games', host=REDIS_HOSTNAME, port=REDIS_PORT) indexCreated = False maxAltNames = len(max(data.values(), key=lambda d: len(d['alt_names']))['alt_names']) while not indexCreated: try: client.create_index([TextField('name', weight=10), *[TextField('alt_name_%d' % i, weight=10) for i in range(maxAltNames)], TextField('summary', weight=1)], TextField('cover', weight=0), TextField('thumb', weight=0)) indexCreated = True except Exception: print('Failed to create index, retrying %s') time.sleep(3) for k, v in data.items(): client.add_document(k, name=v['name'], **{'alt_name_%d' % i: n for i, n in enumerate(v['alt_names'])}, cover=v['cover'], thumb=v['thumb'], summary=v['summary']) print('done')
def test(): # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print res.total # "1" print res.docs[0] # Searching with snippets # res = client.search("search engine", snippet_sizes={'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
class TAS_Import(): def __init__(self, index_name, host="172.16.20.7", port=6382, db=0): self.client = Client(index_name, host, port) self.host = host self.port = port self.index_name = index_name self.redis = Redis() def add_indexing(self, schema): self.client.create_index(schema, False, False, []) return ["Done"] def add_data(self, data): for i, rr in enumerate(data): index = i + 1 print rr name, age, location = rr['name'], rr['age'], rr['location'] self.client.add_document(index, NAME=name, AGE=age, LOCATION=location) return ["Done"] def drop_index(self): try: self.client.drop_index() except: pass
def build_ipa_index(): start_time = time.time() rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', '')) rs_client = Client('IPAIndex', conn=rc) print( 'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt' ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str) print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt' ipa_index_ou = pd.read_csv(ipa_index_ou_url, sep='\t', na_values=['da_indicare', '*****@*****.**'], dtype=str) ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou'] == 'Ufficio_Transizione_Digitale'] try: rs_client.drop_index() except: pass # Index already dropped rs_client.create_index([ TextField('ipa_code', weight=2.0), TextField('name', weight=2.0, sortable=True), TextField('site'), TextField('pec'), TextField('city', weight=1.4), TextField('county'), TextField('region'), TagField('type'), TextField('rtd_name'), TextField('rtd_pec'), TextField('rtd_mail'), ]) print('Created index `IPAIndex`', flush=True) print('Feeding `IPAIndex` with data from `amministrazioni.txt`', flush=True) for index, row in ipa_index_amm.iterrows(): rs_client.add_document(row['cod_amm'], language='italian', replace=True, **get_ipa_amm_item(row)) print('Feeding `IPAIndex` with data from `ou.txt`', flush=True) for index, row in ipa_index_ou.iterrows(): rs_client.add_document(row['cod_amm'], partial=True, **get_ipa_rtd_item(row)) finish_time = time.time() print('`IPAIndex` build completed in {0} seconds'.format( round(finish_time - start_time, 2)), flush=True)
def insert(): # insertion of search/suggestion data suggestion_client = Client('movie') suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')]) for i in range(0, len(movie_df)): suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i]) # insertion of auto-completion data completion_client = AutoCompleter('ac') for i in range(0, len(movie_df)): completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
class RandomWikipediaImport(object): def __init__(self): self.rs = Client('wikipedia') self.rs.create_index((TextField('title', weight=5.0), TextField('body'))) print(f'>>> Created index') def insert_random_loop(self): i = 1 while True: ra = wikipedia.random() article = wikipedia.page(ra) self.rs.add_document(f'doc{i}', title=article.title, body=article.content) print(f'>>> Inserted {article.title}') i += 1
class TAS_Import(): def __init__(self, index_name, host=ip, port=port, db=db): self.client = Client(index_name, host, port) self.host = host self.port = port #self.redis = Redis() def add_indexing_schema(self, schema): self.client.create_index(schema, False, False, []) return ["Done"] def add_data(self, rdata, company, doc_id, project): for i, rr in enumerate(rdata): index = doc_id + company + "CMDIC" + str(i + 1) + project l1, l2, l3 = rr l1 = config_obj.StringEscape(l1) self.client.add_document(index, DATA=l1, PAGE=l2, BBOX=l3) return ["Done"] def drop_index(self): try: self.client.drop_index() except Exception as e: #print 'Error',e pass def start(self, data, doc_id, company, project): status = 1 index_name = project + "_DOCUMENT_" + str(doc_id) self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('PAGE'), TextField('BBOX') ] status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, company, doc_id, project) status = 5 return [status]
class EventProcessor(): def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error) # self.client.create_index([TextField('title', weight=5.0), TextField('body')]) def get_objects_in_image(self, image): # TODO: call RedisAI module objects = [ "key", "passport", "wallet", "car", "bag", "watch", "book", "satchel", "laptop", "camera", "mobile_phone" ] tags = [] tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) tags.append(objects[r.randint(0, 10)]) return tags def process(self, msg): print("Going to process message and and store it", msg) # print(float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) # print(type(float(msg["LON"])), type(float(msg["LAT"])), msg["CCTV_ID"]) try: self.r.geoadd("CCTV_LOCATION", float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"]) msg["TAGS"] = self.get_objects_in_image(msg.get("IMAGE", "")) # print("Going to store this in search", msg) doc_unique_key = msg["CCTV_ID"] + "_" + msg["TS"] self.client.add_document(doc_unique_key, CCTV_ID=doc_unique_key, TAGS=",".join(msg["TAGS"])) except Exception as error: print("Error while adding ccty data", error)
def index(): client = Client('sh') # client.drop_index() client.create_index(txt=1.0) chapters = {} with open('will_play_text.csv') as fp: r = csv.reader(fp, delimiter=';') for line in r: #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."] play, chapter, character, text = line[1], line[2], line[4], line[5] d = chapters.setdefault('{}:{}'.format(play, chapter), {}) d['play'] = play d['text'] = d.get('text', '') + ' ' + text for chapter, doc in chapters.iteritems(): print chapter, doc client.add_document(chapter, nosave=True, txt=doc['text'])
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields'] def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) if self.hasHeader == True: next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row) n += 1 print('Finished loading ' + str(n) + ' rows.') def addRow(self, row): args = {} idx = 0 fieldnum = 0 for val in row: idx += 1 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue args[self.fields[fieldnum][0]] = val fieldnum += 1 doc = 'doc' + str(idx) if self.docid > 0: doc = row[self.docid - 1] self.client.add_document(doc, replace=True, **args)
def save_item(self, watcher): client = Client("tower", port=6379, host=os.getenv('REDIS_HOST')) client.add_document(watcher.id, clientIp=watcher.client_ip, service=watcher.service, errorMessage=watcher.error_message, stackTrace=watcher.stack_trace, numberRange=watcher.number_range, dateTime=watcher.date_added.strftime("%Y-%m-%d")) payload = { "clientIp": watcher.client_ip, "service": watcher.service, "errorMessage": watcher.error_message, "stackTrace": watcher.stack_trace, "dateTime": watcher.date_added } if os.getenv('ENABLE_SLACK') == 'true': self.send_to_slack(payload)
def get(self, request): # data=request.data mes = {} search_key = request.GET.get('key') print(search_key) all_classes = Course.objects.all() print("开始创建索引——————————————————————————") # 创建一个客户端与给定索引名称 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') # 创建索引定义和模式 client.create_index((TextField('title'), TextField('body'))) print('索引创建完毕————————————————————————————————') print('开始添加数据————————————————————————————————') for i in all_classes: print(str(i.id) + str(i.title)) # 索引文 client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info, language='chinese') print(333333333) print('数据添加完毕————————————————————————————————') print(client.info()) # 查找搜索 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') res = client.search(search_key) print('查询结束————————————————————————————————————————————————') id_list = [] print(res.docs) for i in res.docs: # print(i.title) # 取出title,以@切割,取课程ID查询,然后序列化展示 id = i.title.split('@')[1] id_list.append(id) course = Course.objects.filter(id__in=id_list).all() c = CourseSerializersModel(course, many=True) mes['course'] = c.data mes['code'] = 200 mes['message'] = '搜索完毕' return Response(mes)
from redisearch import Client, TextField, NumericField, Query from time import sleep from rediscluster import StrictRedisCluster sleep(15) i = 0 nodes = [{'host': "173.17.0.2", 'port': "7000"}] rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True) client = Client('week1', conn=rc) #client.create_index([TextField('day'), TextField('filename'), TextField('protocol'), TextField('task_monitor_id'), TextField('task_id'), TextField('job_id'), TextField('site_name')]) client.create_index([TextField('protocol'), TextField('site_name')]) dat = pd.read_csv("results_2018-05-01.csv.gz") for idx, row in tqdm(dat.iterrows()): #client.add_document(f"{row['index']}", day=f"{row['day']}", filename = f"{row['filename']}", protocol = f"{row['protocol']}", task_monitor_id = f"{row['task_monitor_id']}", task_id = f"{row['task_id']}", job_id = f"{row['job_id']}", site_name = f"{row['site_name']}") client.add_document(f"{row['day']:0.0f}_{row['index']}", replace=True, partial=True, protocol=f"{row['protocol']}", site_name=f"{row['site_name']}") i += 1 if i == 1000: break #print(client.search("@protocol:local")) #payload=f"{row['index']}", #replace=True, partial=True, #f"{row['day']:0.0f}_{row['index']}"
import json from redisearch import Client, TextField, TagField with open('wocka.json', 'r') as f: jokes = json.load(f) hostname = 'redis-17235.laurent.cs.redislabs.com' port = 17235 client = Client('jokes', hostname, port) client.create_index((TextField('title'), TextField('body'), TextField('category'), TagField('label'))) for joke in jokes: client.add_document(joke['id'], title=joke['title'], body=joke['body'], category=joke['category'], label=joke['category']) print("number of jokes in the json file: " + str(len(jokes))) info = client.info() print(info)
from redisearch import Client, TextField # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.create_index((TextField('title', weight=5.0), TextField('body'))) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch impements a search engine on top of redis')
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.search = False self.index = args.index if self.index is not None: self.search = True self.search_client = Client(self.index, self.host, self.port) self.info = self.search_client.info()['fields'] self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.ignore = args.ignore self.docid = args.docid self.client = redis.Redis(args.host, args.port) self.fields = [] def addRow(self, row, num): values = dict() row_id = row[0] geo_id = 'zip-' geo_pos = '' lat = 0 lon = 0 idx = 0 fieldnum = 0 for val in row: idx += 1 if self.fields[idx - 1] == 'regionidzip': geo_id += val if self.fields[idx - 1] == 'latitude': lat = float(val) / 1000000 if self.fields[idx - 1] == 'longitude': lon = float(val) / 1000000 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue if self.search == True and self.info[fieldnum][2] == 'NUMERIC' and val == '': val = '0' values[self.fields[idx - 1]] = val fieldnum += 1 values['geopos'] = str(lon) + ',' + str(lat) geo_vals = [lon, lat, row_id] self.client.geoadd(geo_id, *geo_vals) if self.search == True: doc = 'doc-' + str(num) if self.docid > 0: doc = row[self.docid - 1] self.search_client.add_document(doc, replace=True, **values) else: self.client.hmset(row_id, values) def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) self.fields = next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row, n) n += 1 print('Finished loading ' + str(n) + ' rows.')
class CSVImporter: def __init__(self, args): self.host = args.host self.port = args.port self.index = args.index self.file = open(args.file, 'r') self.delimiter = args.delimiter self.rows = args.rows self.hasHeader = args.header self.ignore = args.ignore self.docid = args.docid self.nosave = args.nosave self.date = args.date self.format = args.format self.client = Client(self.index, self.host, self.port) self.fields = self.client.info()['fields'] def dateToMillis(self, val): try: d = datetime.strptime(val, self.format) except ValueError: print("Invalid data format: " + val) return 0 return str(int(d.strftime('%s')) * 1000) def adjustTagValue(self, val, sep): i = 0 insinglequotes = False indoublequotes = False newTag = False newVal = '' while i < len(val): if val[i] == '\'' and not indoublequotes : insinglequotes = not insinglequotes elif val[i] == '"' and not insinglequotes: indoublequotes = not indoublequotes; else: if val[i] == ',' and not insinglequotes and not indoublequotes: newVal += sep else: newVal += val[i] i += 1 newVal = re.sub('[\t ]*' + sep + '[\t ]*', sep, newVal) return re.sub('[\[\]]', '', newVal) def addRow(self, row, num): args = {} idx = 0 fieldnum = 0 for val in row: idx += 1 if self.ignore is not None and idx in self.ignore or idx == self.docid: continue if self.date is not None and idx in self.date: val = self.dateToMillis(val) if self.fields[fieldnum][2] == 'TAG': val = self.adjustTagValue(val, self.fields[fieldnum][4]) args[self.fields[fieldnum][0]] = val fieldnum += 1 doc = 'doc-' + str(num) if self.docid > 0: doc = row[self.docid - 1] self.client.add_document(doc, replace=True, nosave=self.nosave, **args) def loafFile(self): reader = csv.reader(self.file, delimiter=self.delimiter) if self.hasHeader == True: next(reader) n = 0 for row in reader: if self.rows > 0 and n == self.rows: break self.addRow(row, n) n += 1 print('Finished loading ' + str(n) + ' rows.')
from redisearch import Client, TextField import json UNIQ = "db.txt" # Creating a client with a given index name client = Client('myIndex') docs = [] with open(UNIQ) as corpus: # Indexing a document for row in corpus: corp = json.loads(row) if corp["url_id"] in docs: continue doc_id = corp["url_id"] print(f"adding document id {doc_id}") try: client.add_document(corp["url_id"], title=corp["title"], body=corp["url"]) except Exception: continue docs.append(corp["url_id"])
# Change the the dirt to your document's path #-------------------------------------------- import os from redisearch import Client, Query, TextField dirt = "/path/to/the/documents/" # Change it to your own path client = Client("BoxGroup", port=6379) # 6379 as default client.create_index([TextField('title'), TextField('body')]) filelist = os.listdir(dirt) filelist = sorted(filelist) try: filelist.remove(".git") except: print("git目录不存在,已跳过") filecounter = 0 for filename in filelist: openfilename = dirt + filename with open(openfilename, "r+") as f: data = f.read() try: client.add_document(filecounter, title=filename, body=data, language="chinese") except: print("Document already exist.") filecounter += 1
import pandas as pd import json from tqdm import tqdm from redisearch import Client, TextField, NumericField, Query from time import sleep from rediscluster import StrictRedisCluster sleep(15) nodes = [{'host': "173.17.0.2", 'port': "7000"}] rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True) client=Client('week1', conn=rc) client.create_index([TextField('name'), TextField('surname'), TextField('job')]) dat = pd.read_csv("test.csv") for idx, row in tqdm(dat.iterrows()): client.add_document(f"{row['index']}", replace=True, partial=True, name = f"{row['name']}", surname = f"{row['surname']}", job = f"{row['job']}")
client = Client( 'attractions', host='127.0.0.1', password='', port=6379 ) client.create_index([ TextField('title', weight=5.0), TextField('description'), NumericField('verified', sortable=True), GeoField('geo'), ]) gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8') gpx = gpxpy.parse(gpx_file) for waypoint in gpx.waypoints: if "Verified" in waypoint.comment: v = 1 else: v = 0 t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude) client.add_document( hashlib.md5(t.encode('utf-8')).hexdigest(), description = waypoint.name, geo = "%s,%s" %(waypoint.longitude, waypoint.latitude), verified = v, )
import time from redisearch import Client, TextField, NumericField, Query from redis.exceptions import ResponseError file = open('test_set_tweets.txt', 'r') client = Client('Tweets') client.redis.flushdb() client.create_index([TextField('tweet'), TextField('timestamp')]) start = time.time() for x, line in enumerate(file.readlines()): content = line.strip().split('\t') try: if len(content) == 4: # tem data client.add_document('-'.join(content[:2]), tweet=content[-2], timestamp=content[-1]) else: client.add_document('-'.join(content[:2]), tweet=content[-1], timestamp='') except ResponseError: pass if x % 1000 == 0: print(x, 'lines indexed...') end = time.time() print("Indexing time elapsed", end - start) total = 0 for i in range(30): start = time.time()
class TAS_Import(): def __init__(self, index_name, host="172.16.20.7", port=6382, db=0): self.client = Client(index_name, host, port) self.host = host self.port = port self.config_obj = redis_config.TAS_AutoCompleter( host, port, db, "Default") #self.redis = Redis() def add_indexing_schema(self, schema): self.client.create_index(schema, False, False, []) return ["Done"] def add_data(self, rdata, index_name): for i, rr in enumerate(rdata): #print rr,type(rr[2]) l1, l2, l3, l4, l5, l6, l7, l8, l9 = rr index = index_name + str(i + 1) + l3 + l4 + l5 + l6 #print 'index_name', index_name, index, l3, l4, l5, l6 l1 = self.config_obj.StringEscape(l1) l2 = l2.strip() self.client.add_document(index, DATA=l1, SECTION_TYPE=l2, DOCID=l3, PAGE=l4, GRIDID=l5, ROWCOL=l6, BBOX=l7, PAGE_GRID_SE="%s_%s_%s" % (l4, l5, l2), Rowspan=l8, Colspan=l9) return ["Done"] def drop_index(self): try: self.client.drop_index() except Exception as e: print 'Error', e pass def start(self, data, index_name): status = 1 self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('SECTION_TYPE'), TextField('DOCID'), TextField('PAGE'), TextField('GRIDID'), TextField("ROWCOL"), TextField('BBOX'), TextField("PAGE_GRID_SE"), TextField('Rowspan'), TextField('Colspan') ] #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')]) status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, index_name) status = 5 return [status]