def __init__(self): self.r = redis.from_url(config.EVENT_BROKER_URL) self.client = Client('CCTV_DATA') try: self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')]) except Exception as error: print("Error while creatign index", error)
def build_ipa_index(): start_time = time.time() rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', '')) rs_client = Client('IPAIndex', conn=rc) print( 'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt' ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str) print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True) ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt' ipa_index_ou = pd.read_csv(ipa_index_ou_url, sep='\t', na_values=['da_indicare', '*****@*****.**'], dtype=str) ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou'] == 'Ufficio_Transizione_Digitale'] try: rs_client.drop_index() except: pass # Index already dropped rs_client.create_index([ TextField('ipa_code', weight=2.0), TextField('name', weight=2.0, sortable=True), TextField('site'), TextField('pec'), TextField('city', weight=1.4), TextField('county'), TextField('region'), TagField('type'), TextField('rtd_name'), TextField('rtd_pec'), TextField('rtd_mail'), ]) print('Created index `IPAIndex`', flush=True) print('Feeding `IPAIndex` with data from `amministrazioni.txt`', flush=True) for index, row in ipa_index_amm.iterrows(): rs_client.add_document(row['cod_amm'], language='italian', replace=True, **get_ipa_amm_item(row)) print('Feeding `IPAIndex` with data from `ou.txt`', flush=True) for index, row in ipa_index_ou.iterrows(): rs_client.add_document(row['cod_amm'], partial=True, **get_ipa_rtd_item(row)) finish_time = time.time() print('`IPAIndex` build completed in {0} seconds'.format( round(finish_time - start_time, 2)), flush=True)
def to_field(k, v): if v == "BOOL": return TextField(k, sortable=True) elif v == "NUMERIC": return NumericField(k, sortable=True) elif v == "TEXT": return TextField(k) elif v == "TAG": return TagField(k) else: return GeoField(k)
def insert(): # insertion of search/suggestion data suggestion_client = Client('movie') suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')]) for i in range(0, len(movie_df)): suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i]) # insertion of auto-completion data completion_client = AutoCompleter('ac') for i in range(0, len(movie_df)): completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
def create(self): try: self.client.drop_index() except: pass self.client.create_index([ NumericField('WORDCOUNT', sortable=True), TextField('BYLINE', no_stem=True, sortable=True), TextField('DOCUMENTTYPE', sortable=True), TextField('HEADLINE', sortable=True), TagField('KEYWORDS', separator=';'), NumericField('MULTIMEDIA', sortable=True), TextField('NEWDESK', sortable=True), NumericField('PRINTPAGE', sortable=True), NumericField('PUBDATE', sortable=True), TextField('SECTIONNAME', sortable=True), TextField('SNIPPET', sortable=True), TextField('TYPEOFMATERIAL', sortable=True), TextField('WEBURL') ])
def to_search_field(field): if field == "tags": return TagField("tags", separator=",") return TextField(field)
def load_data(redis_server, redis_port, redis_password): load_client = Client( 'fortune500-v1', host=redis_server, password=redis_password, port=redis_port ) load_ac = AutoCompleter( 'ac', conn = load_client.redis ) definition = IndexDefinition( prefix=['fortune500:'], language='English', score_field='title', score=0.5 ) load_client.create_index( ( TextField("title", weight=5.0), TextField('website'), TextField('company'), NumericField('employees', sortable=True), TextField('industry', sortable=True), TextField('sector', sortable=True), TextField('hqcity', sortable=True), TextField('hqstate', sortable=True), TextField('ceo'), TextField('ceoTitle'), NumericField('rank', sortable=True), NumericField('assets', sortable=True), NumericField('revenues', sortable=True), NumericField('profits', sortable=True), NumericField('equity', sortable=True), TagField('tags'), TextField('ticker') ), definition=definition) with open('./fortune500.csv', encoding='utf-8') as csv_file: csv_reader = csv.reader(csv_file, delimiter=',') line_count = 0 for row in csv_reader: if line_count > 0: load_ac.add_suggestions(Suggestion(row[1].replace('"', ''), 1.0)) load_client.redis.hset( "fortune500:%s" %(row[1].replace(" ", '')), mapping = { 'title': row[1], 'company': row[1], 'rank': row[0], 'website': row[2], 'employees': row[3], 'sector': row[4], 'tags': ",".join(row[4].replace('&', '').replace(',', '').replace(' ', ' ').split()).lower(), 'industry': row[5], 'hqcity': row[8], 'hqstate': row[9], 'ceo': row[12], 'ceoTitle': row[13], 'ticker': row[15], 'revenues': row[17], 'profits': row[19], 'assets': row[21], 'equity': row[22] }) line_count += 1 # Finally Create the alias load_client.aliasadd("fortune500")
def ftadd_beers(r, rsclient): # create beer index ftidxfields = [ TextField('name', weight=5.0), TextField('brewery'), NumericField('breweryid', sortable=True), TextField('category'), NumericField('categoryid'), TextField('style'), NumericField('styleid'), TextField('description'), NumericField('abv', sortable=True), NumericField('ibu', sortable=True), TagField('favorite') ] rsclient.create_index([*ftidxfields]) header = [] dontadd = 0 with open(beerfile) as csvfile: beers = csv.reader(csvfile) for row in beers: docid = '' docscore = 1.0 ftaddfields = {} if beers.line_num == 1: header = row continue for idx, field in enumerate(row): if idx == 0: docid = "{}:{}".format(beer, field) continue # idx 1 is brewery name if idx == 1: if field == "": # something is wrong with the csv, skip this line. print("\tEJECTING: {}".format(row)) dontadd = 1 break bkey = "{}:{}".format(brewery, field) ftaddfields['brewery'] = r.hget(bkey, 'name') ftaddfields['breweryid'] = field # idx 2 is beer name elif idx == 2: ftaddfields['name'] = field # idx 3 is category ID elif idx == 3: catname = 'None' if int(field) != -1: # get the category key and hget the name of the category ckey = "{}:{}".format(category, field) catname = r.hget(ckey, 'cat_name') ftaddfields['category'] = catname ftaddfields['categoryid'] = field # idx 4 is style ID elif idx == 4: stylename = 'None' if int(field) != -1: skey = "{}:{}".format(style, field) stylename = r.hget(skey, 'style_name') ftaddfields['style'] = stylename ftaddfields['styleid'] = field # idx 5 is ABV elif idx == 5: ftaddfields['abv'] = field # update the document score based on ABV docscore = get_beer_doc_score(field) # idx 6 is IBU elif idx == 6: ftaddfields['ibu'] = field if dontadd: dontadd = 0 continue # add beer document rsclient.add_document(docid, score=docscore, **ftaddfields)
import json from redisearch import Client, TextField, TagField with open('wocka.json', 'r') as f: jokes = json.load(f) hostname = 'redis-17235.laurent.cs.redislabs.com' port = 17235 client = Client('jokes', hostname, port) client.create_index((TextField('title'), TextField('body'), TextField('category'), TagField('label'))) for joke in jokes: client.add_document(joke['id'], title=joke['title'], body=joke['body'], category=joke['category'], label=joke['category']) print("number of jokes in the json file: " + str(len(jokes))) info = client.info() print(info)