def initialize(): apps = redis_conn.smembers("apps:index") if not apps: try: redis_search.create_index([ redisearch.TextField("appid"), redisearch.TextField("name"), redisearch.TextField("summary"), redisearch.TextField("description", 0.2), redisearch.TextField("keywords"), ]) except: pass
def _get_custom_field_from_input(custom_index_input: "custom_index") -> redisearch.client.Field: input_type = custom_index_input["type"] field_name = custom_index_input["field_name"] options = custom_index_input.get("options") if input_type == "text": weight = 1.0 sortable = False no_stem = False no_index = False if options: sortable = bool(options.get("sortable")) no_stem = bool(options.get("no_stem")) no_index = bool(options.get("no_index")) cust_weight = options.get("weight") if isinstance(cust_weight, (int, float)) and cust_weight >= 0 and cust_weight <= 1: weight = float(cust_weight) return redisearch.TextField(field_name, weight=weight, sortable=sortable, no_stem=no_stem, no_index=no_index) elif input_type == "tag": separator = "," no_index = False if options: separator = options.get("separator") or "," no_index = bool(options.get("no_index")) return redisearch.TagField(field_name, separator=separator, no_index=no_index) elif input_type == "number": sortable = False no_index = False if options: sortable = bool(options.get("sortable")) no_index = bool(options.get("no_index")) return redisearch.NumericField(field_name, sortable=sortable, no_index=no_index) else: raise RuntimeError(f"Index type {input_type} is not supported")
def __init__( self, index: str = 'test', *, fields: Iterable[Any] = (redisearch.TextField('text'),), host: str = 'localhost', port: int = 6379, access_mode: str = 'c', use_pipeline: bool = False, chunk_size: int = 10000, connect: bool = True, max_connect_attempts: int = 3, **conn_info, ): self._conn = None self._conn_pipe = None self._host = host self._port = port # NOTE: RediSearch index can only be created in database index 0. self._n = 0 self._index = index self._fields = fields self._access_mode = access_mode self._use_pipeline = use_pipeline self._chunk_size = chunk_size self._max_connect_attempts = max_connect_attempts self._conn_info = conn_info if connect: self.connect() else: self._pre_connect()
def get_redisearch_cli(chat_id): idx = get_index_name(chat_id) # TODO supports for redis authentication & cluster cli = redisearch.Client(idx, host=REDIS_HOST, port=REDIS_PORT) logger.debug('get client with idx %s for chat %s', idx, chat_id) try: # cli.drop_index() # TODO dedicate API for dropping index cli.create_index([ redisearch.TextField('msg', weight=5.), redisearch.TextField('msg_id', weight=0.), redisearch.TextField('user', weight=0.), redisearch.TextField('ts', weight=0.), ]) except redis.exceptions.ResponseError as e: if e.message != 'Index already exists': raise return cli
def startup_event(): remote_add_cmd = [ "flatpak", "--user", "remote-add", "--if-not-exists", "flathub", "https://flathub.org/repo/flathub.flatpakrepo", ] subprocess.run(remote_add_cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) apps = redis_conn.smembers("apps:index") if not apps: try: redis_search.create_index([ redisearch.TextField("name"), redisearch.TextField("summary"), redisearch.TextField("description", 0.2), redisearch.TextField("keywords"), ]) except: pass
def force_create_transaction_index( index: str, custom_indexes: Optional[Iterable["custom_index"]] = None) -> None: """Create (and overwrite if necessary) index for a transaction type with optional custom_indexes""" # Delete the index with this name if necessary delete_index(index) client = _get_redisearch_index_client(index) # Set standard transaction indexes index_fields = [ redisearch.TextField("tag"), redisearch.NumericField("timestamp", sortable=True), redisearch.NumericField("block_id", sortable=True), ] # Add custom indexes if they exist if custom_indexes: for idx in custom_indexes: index_fields.append(_get_custom_field_from_input(idx)) # Create the actual index client.create_index(index_fields)
def savetoredis(req_id, colnames, datavalues, expired_time): db.hmset("%s:cols" % req_id, {'cols': colnames}) client = redisearch.Client(req_id) indexes = [] for col in colnames: if "score" in col or "diff" in col or "row" in col or "z_score" in col or "p_value" in col: indexes.append(redisearch.NumericField(col, sortable=True)) else: indexes.append(redisearch.TextField(col, sortable=True)) client.create_index(indexes) for i in range(0, len(datavalues)): fields = { colnames[j]: datavalues[i][colnames[j]] for j in range(0, len(colnames)) } client.add_document("%s_%d" % (req_id, i), **fields) # ---- set expiry for columns and documents ---- #db.expire("%s:cols"%req_id,expired_time) let's comment for now and see how it goes drop_index.apply_async((req_id, ), countdown=expired_time)
def __init__(self): # setup redis clients self.r = redis.Redis(host=redis_host, port=redis_port) self.rs = redisearch.Client('product_name', host=redis_host, port=redis_port) try: self.rs.create_index( (redisearch.NumericField('id'), redisearch.TextField('name'), redisearch.TextField('description'), redisearch.TextField('vendor'), redisearch.NumericField('price'), redisearch.TextField('currency'), redisearch.TextField('category'), redisearch.TextField('images'))) except Exception: print(f'error creating index') print(f'index info: {self.rs.info()}')
class RediSearch(BaseMatcher): """RediSearch. Args: alpha (float): Similarity threshold in range (0,1]. similarity (str, BaseSimilarity): Similarity measure instance or name. db (Dict[str, Any]): Options passed directly to 'RediSearchDatabase()'. index (str): RediSearch index name for storage. Kwargs: Options forwarded to 'BaseMatcher()'. """ NAME = 'redisearch-match' _FIELDS = (redisearch.TextField('term'), ) def __init__( self, *, alpha: float = 0.7, similarity: str = None, # NOTE: Hijack 'db' parameter from 'BaseMatcher' db: Dict[str, Any] = None, **kwargs, ): super().__init__(**kwargs) self._alpha = None self._similarity = None self.alpha = alpha self.similarity = similarity if db is None: db = {} self._db = RediSearchDatabase( index=db.pop('index', 'facet'), fields=type(self)._FIELDS, **db, ) # NOTE: Use document count as document IDs self._doc_id = len(self._db) @property def alpha(self): return self._alpha @alpha.setter def alpha(self, alpha: float): self._alpha = alpha @alpha.setter def alpha(self, alpha: float): self._alpha = get_alpha(alpha) @property def similarity(self): return self._similarity @similarity.setter def similarity(self, similarity): # NOTE: Clear cache database if similarity measure changes because # results may differ. if self._cache_db is not None and self._cache_db.ping(): self._cache_db.clear() self._similarity = get_similarity(similarity) def insert(self, string: str): """Insert string into database.""" self._db.set(str(self._doc_id), {'term': string}) self._doc_id += 1 def search( self, string: str, *, alpha: float = None, similarity: str = None, rank: bool = True, ) -> Union[List[Tuple[str, float]], List[str]]: """Approximate dictionary matching. Args: alpha (float): Similarity threshold. similarity (str): Instance of similarity measure or similarity name. """ alpha = (self._alpha if alpha is None else get_alpha(alpha)) similarity = (self._similarity if similarity is None else get_similarity(similarity)) # NOTE: Cached data assumes approximate string matching parameters # (similariy measure) are the same with the exception of 'alpha' # because results may differ. Therefore, do not use cache database # if similarity measure from argument differs from internal # similarity measure. use_cache = (similarity == self._similarity and self._cache_db is not None) if use_cache: strings_and_similarities = self._cache_db.get(string) if strings_and_similarities is not None: return strings_and_similarities candidate_strings = [ document.term for document in self._db.get(string).docs ] similarities = [ self._similarity(string, candidate_string) for candidate_string in candidate_strings ] strings_and_similarities = list( filter(lambda ss: ss[1] >= alpha, zip(candidate_strings, similarities))) if rank: strings_and_similarities.sort(key=lambda ss: ss[1], reverse=True) # NOTE: Need a way to limit database and only cache heavy hitters. if use_cache: self._cache_db.set(string, strings_and_similarities) return strings_and_similarities
class RediSearchSimstring(BaseSimstring): """RediSearch implementation of Simstring algorithm. Args: db (Dict[str, Any]): Options passed directly to 'RediSearchDatabase()'. Kwargs: Options forwarded to 'BaseSimstring()'. """ NAME = 'redisearch-simstring' _FIELDS = ( redisearch.TextField('term', no_stem=True), redisearch.TextField('ng', no_stem=False), redisearch.NumericField('sz', sortable=True), ) def __init__( self, *, # NOTE: Hijack 'db' parameter from 'BaseMatcher' db: Dict[str, Any] = None, **kwargs, ): super().__init__(**kwargs) if db is None: db = {} self._db = RediSearchDatabase( index=db.pop('index', 'facet'), fields=type(self)._FIELDS, **db, ) # NOTE: Use document count as document IDs self._doc_id = len(self._db) def get_strings(self, size: int, feature: str) -> List[str]: """Get strings corresponding to feature size and query feature.""" query = ( redisearch.Query(feature).verbatim().limit_fields('ng').add_filter( redisearch.NumericFilter('sz', size, size)).return_fields('term')) return [document.term for document in self._db.get(query).docs] def insert(self, string: str): """Insert string into database.""" features = self._ngram.get_features(string) # NOTE: RediSearch does not supports storing lists in a field, # so we create a document for each feature. Downside is the high # redundancy of data and extra storage. for i, feature in enumerate(features): self._db.set( str(self._doc_id + i), { 'term': string, 'sz': len(features), 'ng': feature, }, ) self._doc_id += len(features)
def main(): print("hello!") r = redis.Redis(host=redis_host, port=redis_port) rs = redisearch.Client('recordIndex', redis_host, redis_port) # flush to get a fresh db # TODO - remove when dockerized r.flushall() record_collection = [{ 'title': 'Brothers and Sisters', 'artist': 'Allman Brothers', 'year': 1973, 'genre': ['rock', 'southern rock', 'blues rock'] }, { 'title': 'Aja', 'artist': 'Steely Dan', 'year': 1977, 'genre': ['rock', 'pop'] }, { 'title': 'Can\'t Buy a Thrill', 'artist': 'Steely Dan', 'year': 1972, 'genre': ['rock', 'pop'] }, { 'title': 'Deguello', 'artist': 'ZZ Top', 'year': 1979, 'genre': ['rock'] }, { 'title': 'American Beauty', 'artist': 'Grateful Dead', 'year': 1970, 'genre': ['rock', 'psychedelic rock'] }, { 'title': 'Second Helping', 'artist': 'Lynard Skynard', 'year': 1974, 'genre': ['rock', 'southern rock'] }, { 'title': 'The Joker', 'artist': 'Steve Biller Band', 'year': 1973, 'genre': ['rock', 'blues rock'] }, { 'title': 'Book of Dreams', 'artist': 'Steve Biller Band', 'year': 1977, 'genre': ['rock'] }, { 'title': 'Rumours', 'artist': 'Fleetwood Mac', 'year': 1977, 'genre': ['rock', 'pop'] }, { 'title': 'Where We All Belong', 'artist': 'Marshall Tucker Band', 'year': 1974, 'genre': ['rock', 'southern rock'] }] try: rs.create_index((redisearch.TextField('title', sortable=True), redisearch.TextField('artist', sortable=True), redisearch.NumericField('year', sortable=True), redisearch.TagField('genre', separator=','))) except Exception: print(f'Error creating index: {sys.exc_info()}') print(f'index info: {rs.info()}') run = True load_data(rs, record_collection) while run: txt = input("enter a search term: ") if (txt == "quit"): run = False break txt_arr = txt.split(' ', 1) print(f'searching {txt_arr}') if (txt_arr[0] == 'title'): res = rs.search(f'@title:{txt_arr[1]}') print(res) elif (txt_arr[0] == 'artist'): res = rs.search(f'@artist:{txt_arr[1]}') print(res) elif (txt_arr[0] == 'year'): full_txt_arr = txt.split(' ') former = full_txt_arr[1] latter = full_txt_arr[1] if (len(full_txt_arr) == 3): latter = full_txt_arr[2] res = rs.search(f'@year:[{former} {latter}]') print(res) elif (txt_arr[0] == 'genre'): pass else: print("invalid query")