Пример #1
0
 def __init__(self):
     self.r = redis.from_url(config.EVENT_BROKER_URL)
     self.client = Client('CCTV_DATA')
     try:
         self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')])
     except Exception as error:
         print("Error while creatign index", error)
Пример #2
0
def build_ipa_index():
    start_time = time.time()
    rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', ''))
    rs_client = Client('IPAIndex', conn=rc)

    print(
        'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it',
        flush=True)
    ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt'
    ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str)

    print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True)
    ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt'
    ipa_index_ou = pd.read_csv(ipa_index_ou_url,
                               sep='\t',
                               na_values=['da_indicare', '*****@*****.**'],
                               dtype=str)
    ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou']
                                    == 'Ufficio_Transizione_Digitale']

    try:
        rs_client.drop_index()
    except:
        pass  # Index already dropped

    rs_client.create_index([
        TextField('ipa_code', weight=2.0),
        TextField('name', weight=2.0, sortable=True),
        TextField('site'),
        TextField('pec'),
        TextField('city', weight=1.4),
        TextField('county'),
        TextField('region'),
        TagField('type'),
        TextField('rtd_name'),
        TextField('rtd_pec'),
        TextField('rtd_mail'),
    ])
    print('Created index `IPAIndex`', flush=True)

    print('Feeding `IPAIndex` with data from `amministrazioni.txt`',
          flush=True)
    for index, row in ipa_index_amm.iterrows():
        rs_client.add_document(row['cod_amm'],
                               language='italian',
                               replace=True,
                               **get_ipa_amm_item(row))

    print('Feeding `IPAIndex` with data from `ou.txt`', flush=True)
    for index, row in ipa_index_ou.iterrows():
        rs_client.add_document(row['cod_amm'],
                               partial=True,
                               **get_ipa_rtd_item(row))

    finish_time = time.time()
    print('`IPAIndex` build completed in {0} seconds'.format(
        round(finish_time - start_time, 2)),
          flush=True)
Пример #3
0
def to_field(k, v):
    if v == "BOOL":
        return TextField(k, sortable=True)
    elif v == "NUMERIC":
        return NumericField(k, sortable=True)
    elif v == "TEXT":
        return TextField(k)
    elif v == "TAG":
        return TagField(k)
    else:
        return GeoField(k)
def insert():
    # insertion of search/suggestion data
    suggestion_client = Client('movie')
    suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')])

    for i in range(0, len(movie_df)):
        suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i])

    # insertion of auto-completion data
    completion_client = AutoCompleter('ac')

    for i in range(0, len(movie_df)):
        completion_client.add_suggestions(Suggestion(movie_df['title'][i]))
    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('WORDCOUNT', sortable=True),
            TextField('BYLINE', no_stem=True, sortable=True),
            TextField('DOCUMENTTYPE', sortable=True),
            TextField('HEADLINE', sortable=True),
            TagField('KEYWORDS', separator=';'),
            NumericField('MULTIMEDIA', sortable=True),
            TextField('NEWDESK', sortable=True),
            NumericField('PRINTPAGE', sortable=True),
            NumericField('PUBDATE', sortable=True),
            TextField('SECTIONNAME', sortable=True),
            TextField('SNIPPET', sortable=True),
            TextField('TYPEOFMATERIAL', sortable=True),
            TextField('WEBURL')
        ])
Пример #6
0
def to_search_field(field):
    if field == "tags":
        return TagField("tags", separator=",")

    return TextField(field)
Пример #7
0
def load_data(redis_server, redis_port, redis_password):
   load_client = Client(
      'fortune500-v1',
      host=redis_server,
      password=redis_password,
      port=redis_port
   )
   load_ac = AutoCompleter(
   'ac',
   conn = load_client.redis
   )
   
   definition = IndexDefinition(
           prefix=['fortune500:'],
           language='English',
           score_field='title',
           score=0.5
           )
   load_client.create_index(
           (
               TextField("title", weight=5.0),
               TextField('website'),
               TextField('company'),
               NumericField('employees', sortable=True),
               TextField('industry', sortable=True),
               TextField('sector', sortable=True),
               TextField('hqcity', sortable=True),
               TextField('hqstate', sortable=True),
               TextField('ceo'),
               TextField('ceoTitle'),
               NumericField('rank', sortable=True),
               NumericField('assets', sortable=True),
               NumericField('revenues', sortable=True),
               NumericField('profits', sortable=True),
               NumericField('equity', sortable=True),
               TagField('tags'),
               TextField('ticker')
               ),        
       definition=definition)

   with open('./fortune500.csv', encoding='utf-8') as csv_file:
      csv_reader = csv.reader(csv_file, delimiter=',')
      line_count = 0
      for row in csv_reader:
         if line_count > 0:
            load_ac.add_suggestions(Suggestion(row[1].replace('"', ''),  1.0))
            load_client.redis.hset(
                    "fortune500:%s" %(row[1].replace(" ", '')),
                    mapping = {
                        'title': row[1],
                        'company': row[1],
                        'rank': row[0],
                        'website': row[2],
                        'employees': row[3],
                        'sector': row[4],
                        'tags': ",".join(row[4].replace('&', '').replace(',', '').replace('  ', ' ').split()).lower(),
                        'industry': row[5],
                        'hqcity': row[8],
                        'hqstate': row[9],
                        'ceo': row[12],
                        'ceoTitle': row[13],
                        'ticker': row[15],
                        'revenues': row[17],
                        'profits': row[19],
                        'assets': row[21],
                        'equity': row[22]

               })
         line_count += 1
   # Finally Create the alias
   load_client.aliasadd("fortune500")
Пример #8
0
def ftadd_beers(r, rsclient):

    # create beer index
    ftidxfields = [
        TextField('name', weight=5.0),
        TextField('brewery'),
        NumericField('breweryid', sortable=True),
        TextField('category'),
        NumericField('categoryid'),
        TextField('style'),
        NumericField('styleid'),
        TextField('description'),
        NumericField('abv', sortable=True),
        NumericField('ibu', sortable=True),
        TagField('favorite')
    ]
    rsclient.create_index([*ftidxfields])

    header = []
    dontadd = 0
    with open(beerfile) as csvfile:
        beers = csv.reader(csvfile)
        for row in beers:
            docid = ''
            docscore = 1.0
            ftaddfields = {}

            if beers.line_num == 1:
                header = row
                continue

            for idx, field in enumerate(row):
                if idx == 0:
                    docid = "{}:{}".format(beer, field)
                    continue

                # idx 1 is brewery name
                if idx == 1:

                    if field == "":
                        # something is wrong with the csv, skip this line.
                        print("\tEJECTING: {}".format(row))
                        dontadd = 1
                        break
                    bkey = "{}:{}".format(brewery, field)
                    ftaddfields['brewery'] = r.hget(bkey, 'name')
                    ftaddfields['breweryid'] = field

                # idx 2 is beer name
                elif idx == 2:

                    ftaddfields['name'] = field

                # idx 3 is category ID
                elif idx == 3:

                    catname = 'None'
                    if int(field) != -1:
                        # get the category key and hget the name of the category
                        ckey = "{}:{}".format(category, field)
                        catname = r.hget(ckey, 'cat_name')

                    ftaddfields['category'] = catname
                    ftaddfields['categoryid'] = field

                # idx 4 is style ID
                elif idx == 4:

                    stylename = 'None'

                    if int(field) != -1:
                        skey = "{}:{}".format(style, field)
                        stylename = r.hget(skey, 'style_name')

                    ftaddfields['style'] = stylename
                    ftaddfields['styleid'] = field

                # idx 5 is ABV
                elif idx == 5:

                    ftaddfields['abv'] = field

                    # update the document score based on ABV
                    docscore = get_beer_doc_score(field)

                # idx 6 is IBU
                elif idx == 6:

                    ftaddfields['ibu'] = field

            if dontadd:
                dontadd = 0
                continue

            # add beer document
            rsclient.add_document(docid, score=docscore, **ftaddfields)
import json
from redisearch import Client, TextField, TagField

with open('wocka.json', 'r') as f:
    jokes = json.load(f)

hostname = 'redis-17235.laurent.cs.redislabs.com'
port = 17235

client = Client('jokes', hostname, port)
client.create_index((TextField('title'), TextField('body'),
                     TextField('category'), TagField('label')))

for joke in jokes:
    client.add_document(joke['id'],
                        title=joke['title'],
                        body=joke['body'],
                        category=joke['category'],
                        label=joke['category'])

print("number of jokes in the json file: " + str(len(jokes)))

info = client.info()
print(info)