def test(): # Creating a client with a given index name client = Client('myIndex') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print res.total # "1" print res.docs[0] # Searching with snippets # res = client.search("search engine", snippet_sizes={'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
def general_search(request) -> Response: """ Default full text search on all resources if no sources are specified. Faceted search if sources are specified. **query**: Query to search. **source**: Multiple sources can be specifed. """ client = Client(INDEX_NAME, conn=get_redis_connection()) query = request.GET.get('query') sort_stars = request.GET.get('sort-stars') resources = request.GET.getlist('source') languages = request.GET.getlist('language') awesome_lists = request.GET.getlist('awesome-list') query = format_query(query, resources, languages, awesome_lists) results = client.search(Query(query)) results = [doc.__dict__ for doc in results.docs] if sort_stars == "true": results.sort(key=lambda x: int(x['stargazers_count']), reverse=True) return Response({ "docs": results })
class RediSearchClient(object): def __init__(self, index_name): self.client = Client(index_name) self.index_name = index_name def build_index(self, line_doc_path, n_docs): line_pool = LineDocPool(line_doc_path) try: self.client.drop_index() except: pass self.client.create_index([TextField('title'), TextField('url'), TextField('body')]) for i, d in enumerate(line_pool.doc_iterator()): self.client.add_document(i, nosave = True, title = d['doctitle'], url = d['url'], body = d['body']) if i + 1 == n_docs: break if i % 1000 == 0: print "{}/{} building index".format(i, n_docs) def search(self, query): q = Query(query).paging(0, 5).verbatim() res = self.client.search(q) # print res.total # "1" return res
def product_search(request): search_key = request.POST.get('search_key', "").strip() if len(search_key) == 0: return JsonResponse({'product_detail_list': []}) for t in [ 'tee', 't shirt', 't-shirt', 'tees', 't shirts', 't-shirts', 'tshirts' ]: search_key = 'tshirt' if search_key == t else search_key client = Client('productIndex') q = Query(search_key) q.paging(0, 60) product_id_list = [] try: res = client.search(q) for data in res.docs: product_id_list.append(data.id) except Exception: index = create_product_search_index() create_product_autocompleter() res = client.search(q) for data in res.docs: product_id_list.append(data.id) if len(product_id_list) == 0: sk = search_key.split() for substr in sk: if len(substr) > 0: q._query_string = substr res = client.search(q) for data in res.docs: product_id_list.append(data.id) product_id_list = list(set(product_id_list)) product_detail_list = product_view.cached_product_detail(product_id_list) context = { 'product_detail_list': product_detail_list, 'total_number_of_products': len(product_detail_list), 'no_of_products': len(product_detail_list), 'subtypes': True, } return JsonResponse(context)
def magic_fb_search(request): if not request.user.is_authenticated(): return JsonResponse({'product_detail_list': []}) user = request.user client = Client('productIndex') try: # if search indes is not there it will create a search index res = client.search('test') except Exception: index = create_product_search_index() create_product_autocompleter() # up to here fb_likes = UserFbLikes.objects.filter(user=user) likes_product_map = {} for fb_like in fb_likes: try: res = client.search(fb_like.fb_page) for data in res.docs: likes_product_map.update({ str(data.id): likes_product_map.get(str(data.id), []) + [fb_like.fb_page] }) except Exception: print fb_like.fb_page product_list = Product.objects.filter( id__in=likes_product_map.keys()).order_by('-id') product_detail_list = product_view.product_details(product_list) for i in range(len(product_detail_list)): product_id = str(product_detail_list[i].get('id')) likes = likes_product_map.get(product_id) product_detail_list[i].update({'fb_likes': list(set(likes))}) context = {'product_detail_list': product_detail_list} return JsonResponse(context)
def searchdb(search_content): global total client = Client("BoxGroup", port=6379) search_content = ' '.join(jieba.cut(search_content)) q = Query(search_content).verbatim().paging(0, 500) res = client.search(q) total = res.total titlelist = [] i = 0 while i < res.total: titlelist.append(res.docs[i].title) i += 1 if res.total > 0: return titlelist elif res.total == 0: return "No result found"
class CacheEngine: def __init__(self, hostname: str, idx_name: str, port=6379) -> None: self._ready = False self._setup_client(hostname, idx_name, port) def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None: try: self._client = Client(idx_name, host=hostname, port=port) self._auto_compl = AutoCompleter(idx_name, hostname, port=port) self._hostname = hostname self._port = port self._idx = idx_name self._ready = True LOGGER.info("Cache engine is ready") except: self._client = None LOGGER.error("Cache engine is faulty!") def add_doc(self, doc_id: str, data: dict) -> Any: if dict is None: return False results = self._client.redis.hset(doc_id, mapping=data) return results def search(self, text_to_search: str) -> Result: results: Result = self._client.search(text_to_search) return results def get_doc(self, doc_id) -> Document: try: data = self._client.load_document(doc_id) return data except: return None def add_suggestion(self, suggestion) -> bool: results = None try: results = self._auto_compl.add_suggestions(Suggestion(suggestion)) except: return False return True def get_suggestion(self, str_to_suggest: str) -> List: suggs = self._auto_compl.get_suggestions(str_to_suggest, fuzzy=len(str_to_suggest) > 3) return suggs
def search(cls, query, offset=0, paginate=10): client = Client("tower", port=6379, host=os.getenv('REDIS_HOST')) q = Query(query).paging(offset, paginate) res = client.search(q) result = [] for doc in res.docs: value_dict = { 'id': doc.id, 'client_ip': doc.clientIp, 'service': doc.service, 'error_message': doc.errorMessage, 'stack_trace': doc.stackTrace, 'numberRange': doc.numberRange } result.append(value_dict) print(res) return result
def product_search(query, limit=10, fuzzy_search=True): search_results = {"from_redisearch": True, "results": []} if not is_redisearch_enabled(): # Redisearch module not enabled search_results["from_redisearch"] = False search_results["results"] = get_product_data(query, 0, limit) return search_results if not query: return search_results red = frappe.cache() query = clean_up_query(query) # TODO: Check perf/correctness with Suggestions & Query vs only Query # TODO: Use Levenshtein Distance in Query (max=3) ac = AutoCompleter(make_key(WEBSITE_ITEM_NAME_AUTOCOMPLETE), conn=red) client = Client(make_key(WEBSITE_ITEM_INDEX), conn=red) suggestions = ac.get_suggestions( query, num=limit, fuzzy=fuzzy_search and len(query) > 3 # Fuzzy on length < 3 can be real slow ) # Build a query query_string = query for s in suggestions: query_string += f"|('{clean_up_query(s.string)}')" q = Query(query_string) results = client.search(q) search_results["results"] = list(map(convert_to_dict, results.docs)) search_results["results"] = sorted( search_results["results"], key=lambda k: frappe.utils.cint(k["ranking"]), reverse=True) return search_results
def get(self, request): # data=request.data mes = {} search_key = request.GET.get('key') print(search_key) all_classes = Course.objects.all() print("开始创建索引——————————————————————————") # 创建一个客户端与给定索引名称 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') # 创建索引定义和模式 client.create_index((TextField('title'), TextField('body'))) print('索引创建完毕————————————————————————————————') print('开始添加数据————————————————————————————————') for i in all_classes: print(str(i.id) + str(i.title)) # 索引文 client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info, language='chinese') print(333333333) print('数据添加完毕————————————————————————————————') print(client.info()) # 查找搜索 client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666') res = client.search(search_key) print('查询结束————————————————————————————————————————————————') id_list = [] print(res.docs) for i in res.docs: # print(i.title) # 取出title,以@切割,取课程ID查询,然后序列化展示 id = i.title.split('@')[1] id_list.append(id) course = Course.objects.filter(id__in=id_list).all() c = CourseSerializersModel(course, many=True) mes['course'] = c.data mes['code'] = 200 mes['message'] = '搜索完毕' return Response(mes)
class search: def __init__(self): self.redis_info = config.Config.redis_info self.ip, self.port, self.db = self.redis_info["host"], self.redis_info[ "port"], self.redis_info["db"] index_name = self.redis_info["tb_name"] self.client = Client(index_name, self.ip, self.port) #self.rd_con = self.make_redis_connection() self.escape1 = re.compile(r'&#\d+;') self.escape2 = re.compile( r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~') self.escape3 = re.compile(r'\s+') pass def StringEscape(self, search_str): search_str = re.sub(self.escape1, '', search_str) search_str = re.sub(self.escape2, '', search_str) search_str = re.sub(self.escape3, ' ', search_str) return search_str.strip() def make_redis_connection(self): ip, port, db = self.config.get('redis_search', 'storage').split('##') self.ip = ip self.port = port redis_conn = redis.StrictRedis(host=ip, port=str(port), db=str(db)) return redis_conn def search_exact_Query(self, string): string = self.StringEscape(string) query = "(@look_cmp:%s*)|(@cmp_k:%s*)" % (string, string) res = self.client.search(Query(query).paging(0, 10000)) arr = [] for x in res.docs: arr.append({"k": x.cmp_k, "n": x.cmp_name}) arr.sort(key=lambda x: len(x['n'])) return [{"message": "done", "data": arr}]
from redisearch import Client, TextField client = Client('myIndex') while True: query = input("query > ") if query: q = client.search(query) n_match = len(q.docs) print(f"found match {n_match} for keyword {query}") for row in q.docs: print(row.id, row.title, row.body)
class TAS_AutoCompleter: def __init__(self, host='172.16.20.7', port=6382, db=0, autocomplete_name='Default'): self.client = Client(autocomplete_name, host, port) self.ipAdd = host self.ipPort = port self.db = db self.redisConn = redis.StrictRedis(host=self.ipAdd, port=self.ipPort, db=self.db) self.autocomplete = AutoCompleter(autocomplete_name, host, port) self.escape1 = re.compile(r'&#\d+;') self.escape2 = re.compile( r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~') self.escape3 = re.compile(r'\s+') def search_using_FT(self, search_text, index): search_text = search_text.replace(' ', '*') query_string = 'FT.SEARCH ' + index + ' ' + search_text + ' LIMIT 0 100' res = self.redisConn.execute_command(query_string) fs = [] for i, rr in enumerate(res): if i == 0: continue if i % 2 != 0: continue fs.append(rr) return fs def search_exact_Query_using_ft(self, index, query): query_string = 'FT.SEARCH ' + index + ' ' + query + ' LIMIT 0 1000' res = self.redisConn.execute_command(query_string) fs = [] for i, rr in enumerate(res): if i == 0: continue if i % 2 != 0: continue fs.append(rr) return fs def StringEscape(self, search_str): search_str = re.sub(self.escape1, '', search_str) search_str = re.sub(self.escape2, '', search_str) search_str = re.sub(self.escape3, ' ', search_str) return search_str.strip() def simple_search(self, text): res = self.client.search(text) fs = [] if res: for i, rr in enumerate(res.docs): fs.append([ rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL, rr.DATA, rr.id, rr.PAGE ]) return fs def search_exact_Query(self, query): return self.client.search(Query(query).paging(0, 1000)) def search_query_convert_bk(self, query): res = self.search_exact_Query(query) fs = {} if res: for i, rr in enumerate(res.docs): vv = rr.DOCID + "_" + rr.PAGE + "_" + rr.GRIDID if vv in fs: fs[vv]['count'] = fs[vv]['count'] + 1 fs[vv]['info'].append([ rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL, rr.DATA, rr.id, rr.PAGE ]) else: fs[vv] = { 'count': 1, 'info': [[ rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL, rr.DATA, rr.id, rr.PAGE ]] } #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert(self, query, fs): res = self.search_exact_Query(query) if res: for i, rr in enumerate(res.docs): vv = rr.PAGE + "_" + rr.GRIDID if vv in fs: fs[vv] = fs[vv] + 1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_result(self, query): res = self.search_exact_Query(query) fs = [] if res: for i, rr in enumerate(res.docs): fs.append([rr.DATA, rr.PAGE, rr.BBOX]) return fs def search_query_convert_result_auto(self, query, fs): res = self.search_exact_Query(query) if res: for i, rr in enumerate(res.docs): DATA, DOCID, GRIDID, PAGE, BBOX = rr.DATA, rr.DOCID, rr.GRIDID, rr.PAGE, rr.BBOX if not DATA: continue grid_id = str(DOCID) + "##" + str(PAGE) + "$" + str(GRIDID) if DATA not in fs: fs[DATA] = {} if grid_id not in fs[DATA]: fs[DATA][grid_id] = [] fs[DATA][grid_id].append(BBOX) return fs def search_query_convert_result_page_grouping(self, query): res = self.search_exact_Query(query) fs = {} if res: for i, rr in enumerate(res.docs): #print rr page = rr.PAGE bbox = rr.BBOX if page not in fs: fs[page] = [] fs[page].append([rr.DATA, rr.BBOX]) return fs def search_using_Query(self, search_text, index): search_text = search_text query = '@DATA:"%s"' % search_text #,search_text+"*") #query = '@BBOX:"%s"'%('109') res = self.client.search(Query(query).paging(0, 10000)) fs = [] if res: for i, rr in enumerate(res.docs): fs.append([ rr.DOCID, rr.SECTION_TYPE, rr.GRIDID, rr.BBOX, rr.ROWCOL, rr.DATA, rr.id, rr.PAGE ]) return fs
from redisearch import Client, Query, TextField, GeoField, NumericField, GeoFilter, NumericFilter client = Client('attractions', host='127.0.0.1', password='', port=6379) print("Full text search for a 'ball string':") q = Query("ball string").verbatim() res = client.search(q) for doc in res.docs: print("\t", doc.description) print( "Full text search for a 'ball string' search within 300 miles of Kansas City that is verified" ) q = Query("ball string").add_filter( GeoFilter('geo', -94.5786, 39.0997, 300, unit='mi')).add_filter(NumericFilter('verified', 1, 1)).verbatim() res = client.search(q) for doc in res.docs: print("\t", doc.description)
from redisearch import Client, TextField, NumericField, Query # Creating a client with a given index name client = Client('myIndex', port=6380, host='localhost') # Creating the index definition and schema client.drop_index() client.create_index([TextField('title', weight=5.0), TextField('body')]) # Indexing a document client.add_document( 'doc1', title='RediSearch', body='Redisearch implements a search engine on top of redis') # Simple search res = client.search("search engine") # the result has the total number of results, and a list of documents print(res.total) # "1" print(res.docs[0]) # Searching with snippets #res = client.search("search engine", snippet_sizes = {'body': 50}) # Searching with complex parameters: q = Query("search engine").verbatim().no_content().paging(0, 5) res = client.search(q)
class TAS_Redisearch(): #Constructor def __init__(self, table_name, host="localhost", port=6381): try: self.client = Client(table_name, host, port) self.host = host self.port = port self.table_name = table_name self.redis = Redis() self.LIMIT = 10 except Exception as e: print 'yyy' print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n" print >> sys.stderr, e #Will set the no of results to show def set_result_limit(self, num): self.LIMIT = num return #Defines the schema for Redisearch def set_schema(self, schema): try: return self.client.create_index( schema, False, False, [] ) #last empty list will ensure that default stopwords will not be ignored except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside set_schema Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Deletes index(table) def drop_index(self): try: return self.client.drop_index() except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside drop_index Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Deletes a document(row) by document_index def delete_document(self, document_index): try: return self.client.delete_document(document_index) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside delete_document Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #############################################SEARCHES BELOW####################################### #Uses python libraries def py_search(self, query, result_limit=-1): if result_limit == -1: result_limit = self.LIMIT try: return self.client.search(Query(query).paging(0, result_limit)) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside py_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print sys.stderr, e #Search with default parameters [will return dictionary] def generic_search(self, search_text, result_limit=-1): if result_limit == -1: result_limit = self.LIMIT query_string = "FT.SEARCH " + self.table_name + " " + search_text + " LIMIT 0 " + str( result_limit) try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside generic_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def free_exact_search(self, key, result_limit=-1): org_key = key l = [] try: if result_limit == -1: result_limit = self.LIMIT key = self.clean_string(key) returned = self.py_search("*", result_limit) for result in returned.docs: result_dict = vars(result) if org_key in result_dict.values(): l.append(result_dict) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside value_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e return l #{fieldname:[value1, value2], fieldname:[value1, value2]} def exact_search(self, input_dict, result_limit=-1): formed_str = "" l = [] for field, value_list in input_dict.items(): formed_str += "@" + field + ":(" for key in value_list: key = self.clean_string(key) formed_str += "(\'" + key + "\') | " formed_str = formed_str.rstrip(' |') formed_str += ") " print "PASSED: ", formed_str returned = self.py_search(formed_str, result_limit) print "RETURNED:", returned for result in returned.docs: result_dict = vars(result) for itr, ktr in input_dict.items(): if result_dict[itr] in ktr: l.append(result_dict) return l #Search with the passed query def custom_search(self, query_string): try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside custom_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #Search in 'search_in_field' [if any of the element in 'list_to_union' is found then include it in the result def union_search(self, list_to_union, search_in_field): query_string = "FT.SEARCH " + self.table_name + " " union_text = "@" + search_in_field + ":(" for text in list_to_union: union_text += text + "|" union_text = union_text.rstrip("|") union_text += ")" query_string += union_text try: res = self.redis.execute_command(query_string) return Result(res, True) except Exception as e: print >> sys.stderr, "TAS_Redisearch Error inside union_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e #will return all the dictionary for all the categories if no arguments are passed def category_taxonomy_dict(self, category='*'): try: cat_taxo_dict = {} total_docs = self.client.info()['num_docs'] query_string = "" if category == '*': query_string = category else: query_string = "@CATEGORY:" + category result = self.py_search(query_string, total_docs) for single_result in result.docs: try: category = single_result.CATEGORY taxoname = single_result.TAXONAME except Exception as ex: pass if not category in cat_taxo_dict: cat_taxo_dict[category] = [] elif taxoname not in cat_taxo_dict[category]: cat_taxo_dict[category].append(taxoname) except Exception as e: sys.stderr, "TAS_Redisearch Error inside category_taxonomy_dict Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e return cat_taxo_dict def total_record(self): try: return int(self.client.info()['num_docs']) except Exception as e: sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def get_all_records(self): try: total = str(self.total_record()) res = self.redis.execute_command("FT.SEARCH " + self.table_name + " * LIMIT 0 " + total) return Result(res, True) except Exception as e: sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n" print >> sys.stderr, e def clean_string(self, key): key = key.replace(',', ' ') key = key.replace('.', ' ') key = key.replace('<', ' ') key = key.replace('>', ' ') key = key.replace('{', ' ') key = key.replace('}', ' ') key = key.replace('[', ' ') key = key.replace(']', ' ') key = key.replace('"', ' ') key = key.replace('\'', ' ') key = key.replace(':', ' ') key = key.replace(';', ' ') key = key.replace('!', ' ') key = key.replace('@', ' ') key = key.replace('#', ' ') key = key.replace('$', ' ') key = key.replace('%', ' ') key = key.replace('^', ' ') key = key.replace('&', ' ') key = key.replace('*', ' ') key = key.replace('(', ' ') key = key.replace(')', ' ') key = key.replace('-', ' ') key = key.replace('+', ' ') key = key.replace('=', ' ') key = key.replace('~', ' ') return key
key = 'doc:6' r.hset( key, 'title' , 'Pirate adventures') r.hset( key, 'body' , 'do you dare to fight other pirates and live many adventures to get the golden chest') r.hset( key, 'url' , 'www.piratelife.com') r.hset( key, 'visits', '600') # ------------------------------------------------------------ # Full text Search with rediSearch module. # ------------------------------------------------------------ client = Client( index_name = 'database_idx' , host= REDIS_HOST , port= REDIS_PORT , conn= r , password = REDIS_AUTH ) # Simple search res = client.search( "adventures" ) # the result has the total number of results, and a list of documents print( '\n Number of found docs: {}'.format( res.total ) ) # "2" for d in res.docs: print( '{}. title: {}, body: {}'.format( d.id, d.title, d.body ) ) # "RediSearch" print( '\n\n End \n' )
class Hub(object): dconn = None # document store connection sconn = None # search index connection qconn = None # queue connection gh = None autocomplete = None repo = None _ts = None _hubkey = 'hub:catalog' _ixname = 'ix' _acname = 'ac' def __init__(self, ghlogin_or_token=None, docs_url=None, search_url=None, queue_url=None, repo=None): timestamp = datetime.utcnow() logger.info('Initializing temporary hub {}'.format(timestamp)) if ghlogin_or_token: self.gh = Github(ghlogin_or_token) elif 'GITHUB_TOKEN' in os.environ: self.gh = Github(os.environ['GITHUB_TOKEN']) else: logger.info('Env var ' 'GITHUB_TOKEN' ' not found') if docs_url: pass elif 'DOCS_REDIS_URL' in os.environ: docs_url = os.environ['DOCS_REDIS_URL'] else: logger.critical('No Redis for document storage... bye bye.') raise RuntimeError('No Redis for document storage... bye bye.') self.dconn = ReJSONClient().from_url(docs_url) if search_url: pass elif 'SEARCH_REDIS_URL' in os.environ: search_url = os.environ['SEARCH_REDIS_URL'] else: search_url = docs_url conn = Redis(connection_pool=ConnectionPool().from_url(search_url)) self.sconn = RediSearchClient(self._ixname, conn=conn) self.autocomplete = AutoCompleter(self._acname, conn=conn) if queue_url: pass elif 'QUEUE_REDIS_URL' in os.environ: queue_url = os.environ['QUEUE_REDIS_URL'] else: queue_url = docs_url self.qconn = StrictRedis.from_url(queue_url) if repo: pass elif 'REDISMODULES_REPO' in os.environ: repo = os.environ['REDISMODULES_REPO'] else: logger.critical('No REDISMODULES_REPO... bye bye.') raise RuntimeError('No REDISMODULES_REPO... bye bye.') self.repo = repo # Check if hub exists if self.dconn.exists(self._hubkey): self._ts = datetime.fromtimestamp( float(self.dconn.jsonget(self._hubkey, Path('.created')))) logger.info('Latching to hub {}'.format(self._ts)) else: self._ts = timestamp logger.info('Creating hub {}'.format(self._ts)) self.createHub() self.addModulesRepo(self.repo) def get_repo_url(self): return 'https://github.com/{}'.format(self.repo) def createHub(self): logger.info('Creating the hub in the database {}'.format(self._ts)) # Store the master modules catalog as an object self.dconn.jsonset( self._hubkey, Path.rootPath(), { 'created': str(_toepoch(self._ts)), 'modules': {}, 'submissions': [], 'submit_enabled': False }) # Create a RediSearch index for the modules # TODO: catch errors self.sconn.create_index( (TextField('name', sortable=True), TextField('description'), NumericField('stargazers_count', sortable=True), NumericField('forks_count', sortable=True), NumericField('last_modified', sortable=True)), stopwords=stopwords) def deleteHub(self): # TODO pass def addModule(self, mod): logger.info('Adding module to hub {}'.format(mod['name'])) # Store the module object as a document m = RedisModule(self.dconn, self.sconn, self.autocomplete, mod['name']) m.save(mod) # Add a reference to it in the master catalog self.dconn.jsonset( self._hubkey, Path('.modules["{}"]'.format(m.get_id())), { 'id': m.get_id(), 'key': m.get_key(), 'created': str(_toepoch(self._ts)), }) # Schedule a job to refresh repository statistics, starting from now and every hour s = Scheduler(connection=self.qconn) job = s.schedule( scheduled_time=datetime(1970, 1, 1), func=callRedisModuleUpateStats, args=[m.get_id()], interval=60 * 60, # every hour repeat=None, # indefinitely ttl=0, result_ttl=0) return m """ Adds modules to the hub from a local directory TODO: deprecate asap """ def addModulesPath(self, path): logger.info('Loading modules from local path {}'.format(path)) # Iterate module JSON files for filename in os.listdir(path): if filename.endswith(".json"): with open('{}/{}'.format(path, filename)) as fp: mod = json.load(fp) m = self.addModule(mod['name'], mod) """ Adds a modules to the hub from a github repository """ def addModulesRepo(self, name, path='/modules/'): # TODO: check for success q = Queue(connection=self.qconn) q.enqueue(callLoadModulesFromRepo, name, path) def loadModulesFromRepo(self, name, path): logger.info('Loading modules from Github {} {}'.format(name, path)) # TODO: error handling, sometimes not all contents are imported? repo = self.gh.get_repo(name) files = repo.get_dir_contents(path) for f in files: mod = json.loads(f.decoded_content) m = self.addModule(mod) """ Submits a module to the hub """ def submitModule(self, repo_id, **kwargs): logger.info('Module submitted to hub {}'.format(repo_id)) repo_id = repo_id.lower() ts = datetime.utcnow() res = {'id': repo_id, 'status': 'failed'} if not self.dconn.jsonget(self._hubkey, Path('submit_enabled')): res['message'] = 'Module submission is currently disabled' return res # Check if the module is already listed m = RedisModule(self.dconn, self.sconn, self.autocomplete, repo_id) if m.exists: # TODO: return in search results res['message'] = 'Module already listed in the hub' return res # Check if there's an active submission, or if the failure was too recent submission = Submission(self.dconn, repo_id) if submission.exists: status = submission.status if status != 'failed': res['status'] = 'active' res['message'] = 'Active submission found for module' return res else: # TODO: handle failed submissions res['message'] = 'Module already submitted to the hub and had failed, please reset manually for now' return res # Store the new submission submission.save(**kwargs) # Record the submission in the catalog # TODO: find a good use for that, e.g. 5 last submissions self.dconn.jsonarrappend(self._hubkey, Path('.submissions'), { 'id': submission.get_id(), 'created': submission.created, }) # Add a job to process the submission q = Queue(connection=self.qconn) job = q.enqueue(callProcessSubmission, submission.get_id()) if job is None: res['message'] = 'Submission job could not be created' # TODO: design retry path logger.error( 'Could not create submission processing job for {}'.format( submission.get_id())) else: res['status'] = 'queued' submission.status = res['status'] submission.job = job.id return res def viewSubmissionStatus(self, repo_id): submission = Submission(self.dconn, repo_id) if submission.exists: res = { 'id': submission.get_id(), 'status': submission.status, 'message': submission.message, } if 'finished' == res['status']: res['pull_number'] = submission.pull_number res['pull_url'] = submission.pull_url return res def processSubmission(self, repo_id): logger.info('Processing submision for {}'.format(repo_id)) submission = Submission(self.dconn, repo_id) if submission.exists: return submission.process(self.gh, self.repo) def viewModules(self, query=None, sort=None): if not query: # Use a purely negative query to get all modules query = '-etaoinshrdlu' q = Query(query).no_content().paging(0, 1000) if sort: if sort == 'relevance': pass elif sort == 'update': q.sort_by('last_modified') elif sort == 'stars': q.sort_by('stargazers_count', asc=False) elif sort == 'forks': q.sort_by('forks_count', asc=False) elif sort == 'name': q.sort_by('name') results = self.sconn.search(q) mods = [] fetch_duration = 0 # TODO: this should be pipelined for doc in results.docs: m = RedisModule(self.dconn, self.sconn, self.autocomplete, doc.id) res, duration = _durationms(m.to_dict) mods.append(res) fetch_duration += duration return { 'results': results.total, 'search_duration': '{:.3f}'.format(results.duration), 'fetch_duration': '{:.3f}'.format(fetch_duration), 'total_duration': '{:.3f}'.format(fetch_duration + results.duration), 'modules': mods, } def viewSearchSuggestions(self, prefix): suggestions = self.autocomplete.get_suggestions(prefix) return [s.string for s in suggestions]
class TAS_AutoCompleter: def __init__(self, host=ip, port=port, db=db, autocomplete_name='Default'): self.client = Client(autocomplete_name,host,port) self.ipAdd = host self.ipPort = port self.db = db self.redisConn = redis.StrictRedis(host=self.ipAdd, port=self.ipPort, db=self.db) self.autocomplete = AutoCompleter(autocomplete_name, host, port) self.escape1 = re.compile(r'&#\d+;') self.escape2 = re.compile(r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~') self.escape3 = re.compile(r'\s+') #self.redisConn.execute_command('SET',*['Start','Here Started']) def search_using_FT(self,search_text,index): search_text = search_text.replace(' ','*') query_string = 'FT.SEARCH '+index+' '+search_text+' LIMIT 0 100' res = self.redisConn.execute_command(query_string) fs = [] for i,rr in enumerate(res): if i == 0:continue if i % 2 != 0:continue fs.append(rr) return fs def search_exact_Query_using_ft(self,index,query): query_string = 'FT.SEARCH '+index+' '+query+' LIMIT 0 1000' res = self.redisConn.execute_command(query_string) fs = [] for i,rr in enumerate(res): if i == 0:continue if i % 2 != 0:continue fs.append(rr) return fs def StringEscape(self, search_str): search_str = re.sub(self.escape1, '', search_str) search_str = re.sub(self.escape2, '', search_str) search_str = re.sub(self.escape3, ' ', search_str) return search_str.strip() def simple_search(self,text): res = self.client.search(text) fs = [] if res: for i,rr in enumerate(res.docs): fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_exact_Query(self,query): return self.client.search(Query(query).paging(0, 10000)) def search_query_convert_bk(self,query): res = self.search_exact_Query(query) fs = {} if res: for i,rr in enumerate(res.docs): vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv]['count'] = fs[vv]['count']+1 fs[vv]['info'].append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) else: fs[vv] = {'count': 1 ,'info': [[rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]]} #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_gridandpage(self,query,fs): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv].append(rr.BBOX) else: fs[vv] = [rr.BBOX] #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert(self,query,fs): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv] = fs[vv]+1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def get_header(self, query): res = self.search_exact_Query(query) text = '' if res: for i,rr in enumerate(res.docs): text = text + " "+rr.DATA return text def get_header_all_FT(self, query, index): res = self.search_exact_Query(query) query_string = 'FT.SEARCH '+index+' '+query+' LIMIT 0 10000' res = self.redisConn.execute_command(query_string) print res if res: for i,rr in enumerate(res.docs): print rr text.append({'txt':rr.DATA, 'rc':rr.ROWCOL}) return text def get_header_all(self, query): res = self.search_exact_Query(query) text = [] if res: for i,rr in enumerate(res.docs): print rr text.append({'txt':rr.DATA, 'rc':rr.ROWCOL, 'rowspan': rr.Rowspan, 'colspan': rr.Colspan}) return text def search_query_convert_docs_wise(self,query,fs): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv] = fs[vv]+1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_docs_wise_v1test(self,query,gfs,doc_id, cnt): #vres_doc = doc_id res = self.search_exact_Query(query) return res if res: fs = {} for i,rr in enumerate(res.docs): vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv] = fs[vv]+1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) if fs: gfs.setdefault(doc_id, {}) for vv, c in fs.items(): gfs[doc_id][vv] = gfs[doc_id].get(vv, 0)+(c*cnt) return gfs def search_query_convert_docs_wise_v1(self,query,gfs,doc_id, cnt): #vres_doc = doc_id res = self.search_exact_Query(query) if res: fs = {} for i,rr in enumerate(res.docs): vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv] = fs[vv]+1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) if fs: gfs.setdefault(doc_id, {}) for vv, c in fs.items(): gfs[doc_id][vv] = gfs[doc_id].get(vv, 0)+(c*cnt) return gfs def search_query_convert_docs_wise_v2_order(self,query,fs,doc_id): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID if vv not in fs: fs[vv] = len(fs.keys()) return fs def search_query_convert_docs_wise_v2(self,query,fs,doc_id): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): #if doc_id not in fs: # fs[doc_id] = {} vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID if vv not in fs: fs[vv] = [] fs[vv].append([rr.ROWCOL,rr.BBOX, query, rr.SECTION_TYPE]) #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_docs_wise_v2_mquery(self,query,fs,doc_id, query_wise_res): res = self.search_exact_Query(query) if res: query_wise_res.setdefault(query,[]) for i,rr in enumerate(res.docs): #if doc_id not in fs: # fs[doc_id] = {} vv = rr.DOCID+"_"+rr.PAGE+"_"+rr.GRIDID if vv not in fs: fs[vv] = [] fs[vv].append([rr.ROWCOL,rr.BBOX, query, rr.SECTION_TYPE]) query_wise_res[query].append(vv) #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_testing(self,query,fs): res = self.search_exact_Query(query) if res: for i,rr in enumerate(res.docs): print [query , rr] vv = rr.PAGE+"_"+rr.GRIDID if vv in fs: fs[vv] = fs[vv]+1 else: fs[vv] = 1 #fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_query_convert_result(self,query): res = self.search_exact_Query(query) fs = [] if res: for i,rr in enumerate(res.docs): fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs def search_using_Query(self,search_text,index): search_text = search_text query = '@DATA:"%s"'%search_text #,search_text+"*") #query = '@BBOX:"%s"'%('109') res = self.client.search(Query(query).paging(0, 10000)) fs = [] if res: for i,rr in enumerate(res.docs): fs.append([rr.DOCID,rr.SECTION_TYPE,rr.GRIDID,rr.BBOX,rr.ROWCOL,rr.DATA,rr.id,rr.PAGE]) return fs
class UserCache: def __init__(self): self.client = Client("api_user_index", app.config["REDIS_HOST"], app.config["REDIS_PORT"]) def create_user_index(self, users): """ Creates a new user index if not exists :param users: :return: """ definition = IndexDefinition(prefix=['doc:', 'user:']) try: self.client.create_index( (TextField("first_name"), TextField("last_name"), TextField("email"), NumericField("age"), NumericField("is_employee"), NumericField("user_id", sortable=True)), definition=definition) except redis.exceptions.ResponseError: return False indexer = self.client.batch_indexer(chunk_size=len(users)) for user in users: fields = { "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), } indexer.add_document(f"doc:{user.id}", **fields) indexer.commit() return True def cache_single_user(self, user): """ Caches a single user :param user: :return: """ self.client.redis.hset( f"doc:{user.id}", mapping={ "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), }) return True def search(self, filters, page, per_page): """ Searches through redis :return: """ q = Query(self.build_query(filters)).paging( (page - 1) * per_page, per_page).sort_by("user_id") return self.client.search(q) def build_query(self, filters): query = [] age = "+@age:[minAge maxAge]" for filter_name, value in filters.items(): # Ugly non-solid way if value is not None: if filter_name == "firstName" and len(value) > 1: query.append(f"+@first_name:{value}*") if filter_name == "lastName" and len(value) > 1: query.append(f"+@last_name:{value}*") if filter_name == "email" and len(value) > 1: query.append(f"+@email:{value}*") if filter_name == "minAge": age = age.replace("minAge", str(value)) if filter_name == "maxAge": age = age.replace("maxAge", str(value)) if filter_name == "isEmployee": query.append(f"+@is_employee:{int(value)}") age = age.replace("minAge", "0") age = age.replace("maxAge", "100") query.append(age) return " ".join(query)
class BaseDocument(object): is_redis: bool = True query: str = "*" # the default search string for this document class Definition(BaseDefinition): # definition template for this document pass def __init__(self, db, prefix: str = None): """ # rBaseDocument A RediSearch document but without imput validation ## Param conn - Redis connection prefix - name of the document i.e. PERSONA or None, in this case we take the name of the class ## Remarks After the index creation (first time) the index definition is no longer synced with the database. You must maintain manually the changes on Redis or simply delete the index with: ```> FT.DROPINDEX idx:movie``` And let redis to recreate it. This is usually fast but can't be an option in a production environment. """ self.db = db if not prefix: prefix = type(self).__name__.upper() self.prefix = prefix.upper() self.idx = Client(f"idx{self.db.delim}{self.prefix}", conn=db.r) # build index list for RediSearch and columns for an html table of the data index = [] self.columns = [ ] # list to columns to appear in an auto generated html table self.dependant = [] # fields that depends of a foreign key self.index = [] # list of index field names self.uniques = [] # list of fields that must be uniques logger.debug(f"Members of document type {self.prefix}") for field in self.Definition(): logger.debug(f"{field.name}({field.type}): {field.render_kw}") if field.render_kw: # include field in index if field.render_kw.get('indexed', False): self.index.append( field.name) # append to index field names list if field.type in ('DecimalField', 'FloatField', 'IntegerField'): index.append(NumericField(field.name, sortable=True)) else: index.append(TextField(field.name, sortable=True)) # include field in html table columns if field.render_kw.get('on_table', False): self.columns.append(field.name) # the field has unique values if field.render_kw.get('unique', False): self.uniques.append(field.name) # append to uniques if not field.name in self.index: # append to index list self.index.append(field.name) if field.type in ('DecimalField', 'FloatField', 'IntegerField'): index.append( NumericField(field.name, sortable=True)) else: index.append(TextField(field.name, sortable=True)) # build index try: self.idx.create_index( index, definition=IndexDefinition( prefix=[f'{self.prefix}{self.db.delim}'])) except Exception as ex: pass def info(self) -> str: s = f"{self.prefix} information" print(f"\n{s}\n" + '=' * len(s)) print( f"Document members: {[(f.name,f.type) for f in self.Definition()]}" ) print(f"Indices: {self.index}") print(f"Foreign keys: {self.dependant}") l = [] for a, b in self.db.dependants: if b.prefix == self.prefix: l.append(a.prefix) print(f"Documents that depend of this document: {l}") print(f"Unique members: {self.uniques}") print(f"Number of documents: {self.search('*').total}") print("") def k(self, id: str) -> str: """ return a complete id: name+delim+id """ return self.sanitize(id) def get(self, id: str) -> DotMap: """ return a document or None ## Param * id - is the full id """ p = self.db.r.hgetall(self.sanitize(id)) if p: return DotMap(self.unescape_doc(self.discover(p))) else: return None def validate_foreigns(self, doc: dict) -> None: """ Called before save. Check if the object has the mandatory foreign fields and their values exists on the referenced document. Also check the uniqueness of unique fields ## Param * doc - the dict to be saved in the document ## Exceptions rFKNotExists, rUnique """ for d, f in self.db.dependants: if d.prefix == self.prefix: if doc.get(f.prefix.lower()) is None: raise rFKNotExistsException( f"The member {f.prefix.lower()} of {self.prefix} does not exist in the document.", doc) if not self.db.r.exists(doc.get(f.prefix.lower())): raise rFKNotExistsException( f"The member {d.prefix}.{f.prefix.lower()}, with value {doc.get(f.prefix.lower())}, does not exist as a foreign key of {f.prefix.upper()}", doc) # test uniqueness for d in self.uniques: q = f"@{d}:\"{doc.get(d)}\"" if doc.get(d) and self.search(q).total > 0: # print(f"testing uniqueness of {d} by searching {q}") raise rUniqueException( f"Value {self.db.qunescape(doc.get(d))} already exists in document {self.prefix}, member {d}" ) def escape_doc(self, doc: dict) -> dict: """ qescape all str fields """ esc_doc = {} for k, v in doc.items(): if type(v).__name__ == 'str': esc_doc[k] = self.db.qescape(v) else: esc_doc[k] = v return esc_doc def before_save(self, doc: dict) -> dict: """ Check, sanitize, etc... Raise Exception on error ## Param * doc - The dict to be saved, before perform the checkin ## Exceptions rBeforeSaveException e.g. if doc.get('field_name') is None: raise rBeforeSaveException(f"field_name can not be None") ## Return The checked, sanitized doc """ # 1. check types and escape strings # check if all members of the doc are string, int or float new_doc = {} try: for k, v in doc.items(): # print(f"type of {k} is {type(v).__name__}") # if it is a DotMap, only include the id or None t = type(v).__name__ if t in ('DotMap', 'dict'): new_doc[k] = v.get('id', None) elif t in ('int', 'NoneType'): new_doc[k] = v elif t in ('str', ): new_doc[k] = self.db.qescape(v) elif t in ('Arrow', 'datetime', 'date', 'time'): new_doc[k] = str(arrow.get(v)) # normalize to iso else: new_doc[k] = str(v) except Exception as ex: raise rTypeException( f"Error checkin datatypes, only str, int or float allowed: {ex}" ) # 2. validate fks self.validate_foreigns(new_doc) return new_doc def sanitize(self, id: str) -> str: """ Sanitize and id before use it ## Param * id - the str to sanitize ## Exceptions rsaveException if the key is invalid (len==0) """ # sanitize the id -> remove non alpha-numeric characters and the delimitator from the id id = self.db.delim.join( [self.db.key_sanitize(t) for t in id.split(self.db.delim)]) # remove any delim character after the document name if id.startswith(self.prefix + self.db.delim): id_part = ''.join([t for t in id.split(self.db.delim)[1:]]) if len(id_part) == 0: raise rSaveException("Len of id cant be zero", {'id': id}) id = f"{self.prefix}{self.db.delim}{id_part}" else: # prefix the id with the document name id = self.db.k(self.prefix, id) return id.upper() def after_save(self, doc: dict, id: str) -> None: """ Do tasks after save ## Param * doc - the saved dict * id - the id of the saved doc ## Exceptions rAfterSaveException """ return None def s(self, **doc: dict) -> str: """ call save with func params as a dict """ return self.save(doc) def save(self, doc: DotMap) -> str: """ save the dictionary and return his id """ try: # if there isn't an id field, create and populate it if doc.get('id', None) is None: # the counters always ends with _KEY NOM_COMPTADOR = f"{self.prefix.upper()}_KEY" # create the counter if it not exists n = self.db.r.get(NOM_COMPTADOR) if n is None: self.db.r.set(NOM_COMPTADOR, 1) n = 1 # rpad with zeros doc['id'] = f'{n}'.rjust(8, '0') self.db.r.incr(NOM_COMPTADOR) # sanitize the id doc['id'] = self.sanitize(doc['id']) # call before_save, can raise an exception doc = self.before_save(doc) # si no hi ha camp de creacio, el cream i el populam if doc.get('created_at', None) is None: doc['created_at'] = self.db.now() # el camp updated_on el populam sempre doc['updated_at'] = self.db.now() # salvam el diccionari self.idx.redis.hset(doc['id'], mapping=doc) # cridam after save self.after_save(doc, doc['id']) return doc['id'] except Exception as ex: logger.error( f"Database error while saving doc id {doc.get('id')}: {ex}") raise rSaveException(ex, doc) def before_delete(self, id: str) -> None: """ Check if we can delete this document At this stage, we can delete if this document is not the key of a foreign key raising an Exception if not ## Param * id - is the complete id prefix:id ## Exception rBeforeDeleteException """ id = self.sanitize(id) for d in self.db.dependants: # dependants està organitzat com p.e. (PERSONA, PAIS) # miram si la dependència s'aplica a aquest document if (self.prefix == d[1].prefix ): # volem esborrar un pais i persona en depén # print(f"{d[0].prefix} depén de {self.prefix}, comprovant si hi ha algun doc a {d[0].prefix} amb la clau {id}") cad = f'@{d[1].prefix.lower()}:{id}' # print(f"La cadena de busqueda a {d[0].prefix} es {cad}") if d[0].search(cad).total > 0: raise rDeleteFKException( f"Cant delete {id} of {self.prefix} because there are document of {d[0].prefix} that have this key.", {"id": id}) def after_delete(self, id: str) -> None: """ Perform some action after deletion ## Param * id - the complete id prefix:id * doc - the deleted document ## rAfterDeleteException """ pass def delete(self, id: str) -> None: """ Remove a key from the hash. before_delete can throw an Exception ## Param * id - the complete id prefix:id ## Exceptions rDeleteException """ id = self.sanitize(id) self.before_delete(id) try: self.db.r.delete(id) except Exception as ex: raise rDeleteException(ex, {'id': id}) self.after_delete(id) def unescape_doc(self, doc: dict) -> dict: """ qunescape all str fields """ esc_doc = {} for k, v in doc.items(): if type(v).__name__ == 'str': esc_doc[k] = self.db.qunescape(v) else: esc_doc[k] = v return esc_doc def discover(self, doc: dict) -> DotMap: """ discover first level foreign keys and include the result into the dict """ n = {} # for each member of the doc for k, v in doc.items(): # if this field is dependant if k.upper() in self.dependant: # include a get of the foreign key as member_name.data n[k] = self.unescape_doc(DotMap(self.db.r.hgetall(v))) else: if type(v).__name__ == 'str': n[k] = self.db.qunescape(v) else: n[k] = v return DotMap(n) def search(self, query: str = "*", start: int = 0, num: int = 10, sort_by: str = 'id', direction: bool = True, slop=0) -> list: """ perform a query with the index ## Param * query - is the string query * start - page form record start * num - number of records to include into the result * sort_by - field to order by, defaul: *id* * direction - asc True desc False * slop - number of non matched terms (Levensthein distance), default: *0* ## Exception rSearchException ## Return A list of records """ try: q = Query(query).slop(slop).sort_by(sort_by, direction).paging(start, num) result = self.idx.search(q) if len(self.dependant) == 0: return result # discover first level foreign keys docs = result.docs if result.total > 0: # and len(self.dependant)>0: docs_with_discover = [] # new list of docs # for each document for doc in self.db.docs_to_dict(result.docs): # append to the list of new docs docs_with_discover.append(self.discover(doc)) docs = docs_with_discover # return the result as a resisearch result return DotMap(total=result.total, docs=docs) except Exception as ex: raise rSearchException(str(ex), {'query': query}) def paginate(self, query: str = "*", page: int = 1, num: int = 10, sort_by: str = 'id', direction: bool = True, slop: int = 0) -> Pagination: try: tic = time.perf_counter() start = (page - 1) * num # count total of docs to calculate the total of pages total = self.idx.search(Query(query).slop(slop).paging(0, 0)).total # construct the query, paginated start and num q = Query(query).slop(slop).sort_by(sort_by, direction).paging(start, num) # perform the query items = self.idx.search(q).docs elapsed_time = time.perf_counter() - tic logger.debug( f"Pagination over {self.prefix}({query}) with {num} of {total} results done in {(elapsed_time*1000):0.3f}ms" ) p = Pagination(page=page, per_page=num, total=total, items=items) return p except Exception as ex: raise rSearchException(str(ex), {'query': query})
client.redis.flushdb() client.create_index([TextField('tweet'), TextField('timestamp')]) start = time.time() for x, line in enumerate(file.readlines()): content = line.strip().split('\t') try: if len(content) == 4: # tem data client.add_document('-'.join(content[:2]), tweet=content[-2], timestamp=content[-1]) else: client.add_document('-'.join(content[:2]), tweet=content[-1], timestamp='') except ResponseError: pass if x % 1000 == 0: print(x, 'lines indexed...') end = time.time() print("Indexing time elapsed", end - start) total = 0 for i in range(30): start = time.time() res = client.search(Query("@tweet:(ok | fine)")) end = time.time() total += end - start print("Query time elapsed", total / 30)