def __init__(self, client): self.client = client self.file_path = "./index-data/area.csv" self.fields = ( NumericField("index"), NumericField("areaId"), TextField("areaTitle"), TextField("areaBody"), )
def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('ORDERNUMBER'), NumericField('QUANTITYORDERED', sortable=True), NumericField('PRICEEACH', sortable=True), NumericField('ORDERLINENUMBER'), NumericField('SALES', sortable=True), TextField('ORDERDATE'), TextField('STATUS', sortable=True), NumericField('QTR_ID', sortable=True), NumericField('MONTH_ID', sortable=True), NumericField('YEAR_ID', sortable=True), TextField('PRODUCTLINE', sortable=True), NumericField('MSRP', sortable=True), TextField('PRODUCTCODE', sortable=True), TextField('CUSTOMERNAME', sortable=True), TextField('PHONE'), TextField('ADDRESSLINE1'), TextField('ADDRESSLINE2'), TextField('CITY', sortable=True), TextField('STATE', sortable=True), TextField('POSTALCODE', sortable=True), TextField('COUNTRY', sortable=True), TextField('TERRITORY', sortable=True), TextField('CONTACTLASTNAME'), TextField('CONTACTFIRSTNAME'), TextField('DEALSIZE', sortable=True) ])
def clientpush(self): client = Client('Checkout') client.create_index([ NumericField('Key'), TextField('UsageClass'), TextField('CheckoutType'), TextField('MaterialType'), NumericField('CheckoutYear'), NumericField('CheckoutMonth'), NumericField('Checkouts'), TextField('Title'), TextField('Creator'), TextField('Subjects'), TextField('Publisher'), TextField('PublicationYear') ]) db_connection, _ = self.connect() cursor = db_connection.cursor() cursor.execute('SELECT * FROM customers') results = cursor.fetchall() i = 0 for result in results: client.add_document('doc%s' % i, Key=result[0], UsageClass=result[1], CheckoutType=result[2], MaterialType=result[3], CheckoutYear=result[4], CheckoutMonth=result[5], Checkouts=result[6], Title=result[7], Creator=result[8], Subjects=result[9], Publisher=result[10], PublicationYear=result[11]) i += 1 print(i) res = client.search('BOOK') print("{} {}".format(res.total, res.docs[0].Title)) res1 = client.search("use") print(res1) q = Query('use').verbatim().no_content().paging(0, 5) res1 = client.search(q) print(res1) cursor.close() db_connection.close()
def to_search_field(self): if self.type == 'int': kwargs = { "name": self.name, "sortable": self.sortable, "no_index": self.no_index } return NumericField(**kwargs) if self.type == 'str' or self.type == 'map': kwargs = { "name": self.name, "weight": self.weight, "sortable": self.sortable, "no_stem": self.no_stem, "no_index": self.no_index, "phonetic_matcher": self.phonetic_matcher } return TextField(**kwargs) else: kwargs = { "name": self.name, "weight": self.weight, "sortable": self.sortable, "no_stem": self.no_stem, "no_index": self.no_index, "phonetic_matcher": self.phonetic_matcher } return TextField(**kwargs)
def start(self, data, index_name): status = 1 self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('SECTION_TYPE'), TextField('DOCID'), TextField('PAGE'), TextField('GRIDID'), TextField("ROWCOL"), TextField('BBOX'), TextField("PAGE_GRID_SE"), TextField('Rowspan'), TextField('Colspan') ] #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')]) status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, index_name) status = 5 return [status]
def createHub(self): logger.info('Creating the hub in the database {}'.format(self._ts)) # Store the master modules catalog as an object self.dconn.jsonset( self._hubkey, Path.rootPath(), { 'created': str(_toepoch(self._ts)), 'modules': {}, 'submissions': [], 'submit_enabled': False }) # Create a RediSearch index for the modules # TODO: catch errors self.sconn.create_index( (TextField('name', sortable=True), TextField('description'), NumericField('stargazers_count', sortable=True), NumericField('forks_count', sortable=True), NumericField('last_modified', sortable=True)), stopwords=stopwords)
def to_field(k, v): if v == "BOOL": return TextField(k, sortable=True) elif v == "NUMERIC": return NumericField(k, sortable=True) elif v == "TEXT": return TextField(k) elif v == "TAG": return TagField(k) else: return GeoField(k)
def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('WORDCOUNT', sortable=True), TextField('BYLINE', no_stem=True, sortable=True), TextField('DOCUMENTTYPE', sortable=True), TextField('HEADLINE', sortable=True), TagField('KEYWORDS', separator=';'), NumericField('MULTIMEDIA', sortable=True), TextField('NEWDESK', sortable=True), NumericField('PRINTPAGE', sortable=True), NumericField('PUBDATE', sortable=True), TextField('SECTIONNAME', sortable=True), TextField('SNIPPET', sortable=True), TextField('TYPEOFMATERIAL', sortable=True), TextField('WEBURL') ])
def import_brewery_geo(r, rsclient): # create the brewery redisearch index ftidxfields = [ TextField('name', weight=5.0), TextField('address'), TextField('city'), TextField('state'), TextField('country'), NumericField('id', sortable=True), GeoField('location') ] rsclient.create_index([*ftidxfields]) with open(brewerygeofile) as geofile: geo = csv.reader(geofile) for row in geo: if geo.line_num == 1: # skip the header line continue # use the brewery id to generate the brewery key added earlier brewery_key = "{}:{}".format(brewery, row[1]) # get all the data from the brewery hash binfo = r.hgetall(brewery_key) if not (any(binfo)): print ("\tERROR: Missing info for {}, skipping geo import".format(brewery_key)) continue # add the brewery document to the index ftaddfields = { 'name': binfo[b'name'].decode(), 'address': binfo[b'address1'].decode(), 'city': binfo[b'city'].decode(), 'state': binfo[b'state'].decode(), 'country': binfo[b'country'].decode(), 'id': row[1], 'location': "{},{}".format(row[3], row[2]) } try: rsclient.add_document( "brewery:{}".format(row[1]), score=1.0, replace=True, partial=True, **ftaddfields ) except Exception as e: print ("\tERROR: Failed to add document for {}: {}".format(brewery_key, e)) continue
def create_user_index(self, users): """ Creates a new user index if not exists :param users: :return: """ definition = IndexDefinition(prefix=['doc:', 'user:']) try: self.client.create_index( (TextField("first_name"), TextField("last_name"), TextField("email"), NumericField("age"), NumericField("is_employee"), NumericField("user_id", sortable=True)), definition=definition) except redis.exceptions.ResponseError: return False indexer = self.client.batch_indexer(chunk_size=len(users)) for user in users: fields = { "first_name": user.first_name.translate(str.maketrans({"-": r"\-"})), "last_name": user.last_name.translate(str.maketrans({"-": r"\-"})), "email": user.email.translate(str.maketrans({"-": r"\-"})), "age": user.age, "user_id": user.id, "is_employee": int(user.is_employee), } indexer.add_document(f"doc:{user.id}", **fields) indexer.commit() return True
def start(self, data, doc_id, company, project): status = 1 index_name = project + "_DOCUMENT_" + str(doc_id) self.drop_index() self.client = Client(index_name, self.host, self.port) status = 2 schema = [ NumericField('INDEX'), TextField('DATA'), TextField('PAGE'), TextField('BBOX') ] status = 3 self.add_indexing_schema(schema) status = 4 self.add_data(data, company, doc_id, project) status = 5 return [status]
def create_index(self): client = self.get_indexed_client() definition = IndexDefinition(prefix=['stock:']) index_fields = [ TextField("SC_NAME"), ] for index in self.numeric_indexes: index_fields.append(NumericField(index)) try: # FT.CREATE idx:stock ON HASH PREFIX 1 stock: SCHEMA SC_NAME TEXT ... client.create_index(index_fields, definition=definition) except redis.exceptions.ResponseError as e: # FT.DROPINDEX idx:stock DD if (str(e) != "Index already exists"): raise e
def ftadd_beers(r, rsclient): # create beer index ftidxfields = [ TextField('name', weight=5.0), TextField('brewery'), NumericField('breweryid', sortable=True), TextField('category'), NumericField('categoryid'), TextField('style'), NumericField('styleid'), TextField('description'), NumericField('abv', sortable=True), NumericField('ibu', sortable=True), TagField('favorite') ] rsclient.create_index([*ftidxfields]) header = [] dontadd = 0 with open(beerfile) as csvfile: beers = csv.reader(csvfile) for row in beers: docid = '' docscore = 1.0 ftaddfields = {} if beers.line_num == 1: header = row continue for idx, field in enumerate(row): if idx == 0: docid = "{}:{}".format(beer, field) continue # idx 1 is brewery name if idx == 1: if field == "": # something is wrong with the csv, skip this line. print("\tEJECTING: {}".format(row)) dontadd = 1 break bkey = "{}:{}".format(brewery, field) ftaddfields['brewery'] = r.hget(bkey, 'name') ftaddfields['breweryid'] = field # idx 2 is beer name elif idx == 2: ftaddfields['name'] = field # idx 3 is category ID elif idx == 3: catname = 'None' if int(field) != -1: # get the category key and hget the name of the category ckey = "{}:{}".format(category, field) catname = r.hget(ckey, 'cat_name') ftaddfields['category'] = catname ftaddfields['categoryid'] = field # idx 4 is style ID elif idx == 4: stylename = 'None' if int(field) != -1: skey = "{}:{}".format(style, field) stylename = r.hget(skey, 'style_name') ftaddfields['style'] = stylename ftaddfields['styleid'] = field # idx 5 is ABV elif idx == 5: ftaddfields['abv'] = field # update the document score based on ABV docscore = get_beer_doc_score(field) # idx 6 is IBU elif idx == 6: ftaddfields['ibu'] = field if dontadd: dontadd = 0 continue # add beer document rsclient.add_document(docid, score=docscore, **ftaddfields)
import gpxpy import gpxpy.gpx from redisearch import Client, Query, TextField, GeoField, NumericField client = Client( 'attractions', host='127.0.0.1', password='', port=6379 ) client.create_index([ TextField('title', weight=5.0), TextField('description'), NumericField('verified', sortable=True), GeoField('geo'), ]) gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8') gpx = gpxpy.parse(gpx_file) for waypoint in gpx.waypoints: if "Verified" in waypoint.comment: v = 1 else: v = 0 t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude) client.add_document(
def load_data(redis_server, redis_port, redis_password): load_client = Client( 'fortune500-v1', host=redis_server, password=redis_password, port=redis_port ) load_ac = AutoCompleter( 'ac', conn = load_client.redis ) definition = IndexDefinition( prefix=['fortune500:'], language='English', score_field='title', score=0.5 ) load_client.create_index( ( TextField("title", weight=5.0), TextField('website'), TextField('company'), NumericField('employees', sortable=True), TextField('industry', sortable=True), TextField('sector', sortable=True), TextField('hqcity', sortable=True), TextField('hqstate', sortable=True), TextField('ceo'), TextField('ceoTitle'), NumericField('rank', sortable=True), NumericField('assets', sortable=True), NumericField('revenues', sortable=True), NumericField('profits', sortable=True), NumericField('equity', sortable=True), TagField('tags'), TextField('ticker') ), definition=definition) with open('./fortune500.csv', encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count > 0: load_ac.add_suggestions(Suggestion(row[1].replace('"', ''), 1.0)) load_client.redis.hset( "fortune500:%s" %(row[1].replace(" ", '')), mapping = { 'title': row[1], 'company': row[1], 'rank': row[0], 'website': row[2], 'employees': row[3], 'sector': row[4], 'tags': ",".join(row[4].replace('&', '').replace(',', '').replace(' ', ' ').split()).lower(), 'industry': row[5], 'hqcity': row[8], 'hqstate': row[9], 'ceo': row[12], 'ceoTitle': row[13], 'ticker': row[15], 'revenues': row[17], 'profits': row[19], 'assets': row[21], 'equity': row[22] }) line_count += 1 # Finally Create the alias load_client.aliasadd("fortune500")
'error_message': doc.errorMessage, 'stack_trace': doc.stackTrace, 'numberRange': doc.numberRange } result.append(value_dict) print(res) return result document = [ TextField('clientIp', weight=5.0), TextField('service', weight=1.0), TextField('errorMessage', weight=10.0), TextField('stackTrace'), TextField('dateTime', weight=10.0), NumericField('numberRange') ] if __name__ == '__main__': parser = argparse.ArgumentParser( description="Build the Search Index for RediSearch") parser.add_argument( '-d', '--delete', dest='del_index', help= "Deletes the former index. Must only be used if there is an index already", required=False, const=True, default=False, nargs='?')
def __init__(self, db, prefix: str = None): """ # rBaseDocument A RediSearch document but without imput validation ## Param conn - Redis connection prefix - name of the document i.e. PERSONA or None, in this case we take the name of the class ## Remarks After the index creation (first time) the index definition is no longer synced with the database. You must maintain manually the changes on Redis or simply delete the index with: ```> FT.DROPINDEX idx:movie``` And let redis to recreate it. This is usually fast but can't be an option in a production environment. """ self.db = db if not prefix: prefix = type(self).__name__.upper() self.prefix = prefix.upper() self.idx = Client(f"idx{self.db.delim}{self.prefix}", conn=db.r) # build index list for RediSearch and columns for an html table of the data index = [] self.columns = [ ] # list to columns to appear in an auto generated html table self.dependant = [] # fields that depends of a foreign key self.index = [] # list of index field names self.uniques = [] # list of fields that must be uniques logger.debug(f"Members of document type {self.prefix}") for field in self.Definition(): logger.debug(f"{field.name}({field.type}): {field.render_kw}") if field.render_kw: # include field in index if field.render_kw.get('indexed', False): self.index.append( field.name) # append to index field names list if field.type in ('DecimalField', 'FloatField', 'IntegerField'): index.append(NumericField(field.name, sortable=True)) else: index.append(TextField(field.name, sortable=True)) # include field in html table columns if field.render_kw.get('on_table', False): self.columns.append(field.name) # the field has unique values if field.render_kw.get('unique', False): self.uniques.append(field.name) # append to uniques if not field.name in self.index: # append to index list self.index.append(field.name) if field.type in ('DecimalField', 'FloatField', 'IntegerField'): index.append( NumericField(field.name, sortable=True)) else: index.append(TextField(field.name, sortable=True)) # build index try: self.idx.create_index( index, definition=IndexDefinition( prefix=[f'{self.prefix}{self.db.delim}'])) except Exception as ex: pass
import sys import TAS_Redisearch class TAS_Import(): def __init__(self, index_name, host="localhost", port=6381, db=0): pass def add_data(self, data, rsObj): for i, rr in enumerate(data): index = i + 1 name, age, location = rr rsObj.client.add_document(INDEX=index, NAME=name, AGE=age, LOCATION=location) if __name__ == "__main__": obj = TAS_Redisearch.TAS_Redisearch("USERS", 'localhost', '6381') obj.drop_index() res = obj.set_schema([ NumericField('INDEX'), TextField('NAME'), TextField('AGE'), TextField('LOCATION') ]) f = open("input.txt", "r") dd = json.loads(f.read()) data_red = obj.add_data(dd)