class SearchDemo:
    def __init__(self, args):
        self.index = args.index
        self.client = Client(self.index, host=args.host, port=args.port)

    def create(self):
        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([
            NumericField('WORDCOUNT', sortable=True),
            TextField('BYLINE', no_stem=True, sortable=True),
            TextField('DOCUMENTTYPE', sortable=True),
            TextField('HEADLINE', sortable=True),
            TagField('KEYWORDS', separator=';'),
            NumericField('MULTIMEDIA', sortable=True),
            TextField('NEWDESK', sortable=True),
            NumericField('PRINTPAGE', sortable=True),
            NumericField('PUBDATE', sortable=True),
            TextField('SECTIONNAME', sortable=True),
            TextField('SNIPPET', sortable=True),
            TextField('TYPEOFMATERIAL', sortable=True),
            TextField('WEBURL')
        ])
Exemplo n.º 2
0
def general_search(request) -> Response:
    """
    Default full text search on all resources if no sources are specified.

    Faceted search if sources are specified.

    **query**: Query to search.
    **source**: Multiple sources can be specifed.
    """

    client = Client(INDEX_NAME, conn=get_redis_connection())

    query = request.GET.get('query')
    sort_stars = request.GET.get('sort-stars')
    resources = request.GET.getlist('source')
    languages = request.GET.getlist('language')
    awesome_lists = request.GET.getlist('awesome-list')

    query = format_query(query, resources, languages, awesome_lists)
    results = client.search(Query(query))
    results = [doc.__dict__ for doc in results.docs]
    if sort_stars == "true":
        results.sort(key=lambda x: int(x['stargazers_count']), reverse=True)

    return Response({
        "docs": results
    })
Exemplo n.º 3
0
 def __init__(self):
     self.r = redis.from_url(config.EVENT_BROKER_URL)
     self.client = Client('CCTV_DATA')
     try:
         self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')])
     except Exception as error:
         print("Error while creatign index", error)
Exemplo n.º 4
0
def index_size(index_name):
    # Creating a client with a given index name
    # print("in index_size index_name=" + index_name)
    client = Client(index_name, 'redis', 6379)
    all_info = client.info()
    # print(all_info)
    return all_info
Exemplo n.º 5
0
def test():
    # Creating a client with a given index name
    client = Client('myIndex')

    # Creating the index definition and schema
    client.drop_index()
    client.create_index([TextField('title', weight=5.0), TextField('body')])

    # Indexing a document
    client.add_document(
        'doc1',
        title='RediSearch',
        body='Redisearch implements a search engine on top of redis')

    # Simple search
    res = client.search("search engine")

    # the result has the total number of results, and a list of documents
    print res.total  # "1"
    print res.docs[0]

    # Searching with snippets
    # res = client.search("search engine", snippet_sizes={'body': 50})

    # Searching with complex parameters:
    q = Query("search engine").verbatim().no_content().paging(0, 5)
    res = client.search(q)
Exemplo n.º 6
0
def main():
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument('-u', '--url', help='Redis URL', type=str, default='redis://127.0.0.1:6379')
    args = parser.parse_args()

    # Set up Redis connection
    url = urlparse(args.url)
    r = redis.StrictRedis(host=url.hostname, port=url.port)

    rsbeer = Client(ftbeeridx, conn=r)
    rsbrewery = Client(ftbreweryidx, conn=r)

    for rsclient in [rsbeer, rsbrewery]:
        try:
            cinfo = rsclient.info()
        except:
            continue


        print("dropping index {}".format(cinfo['index_name']))
        rsclient.drop_index()

    print ("Importing categories...")
    import_csv(r, category, catfile)
    print ("Importing styles...")
    import_csv(r, style, stylefile)
    print ("Importing breweries...")
    import_csv(r, brewery, breweryfile)
    print ("Adding brewery geo data to RediSearch...")
    import_brewery_geo(r, rsbrewery)
    print ("Adding beer data to RediSearch...")
    ftadd_beers(r, rsbeer)
    print ("Done.")
Exemplo n.º 7
0
 def start(self, data, index_name):
     status = 1
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('SECTION_TYPE'),
         TextField('DOCID'),
         TextField('PAGE'),
         TextField('GRIDID'),
         TextField("ROWCOL"),
         TextField('BBOX'),
         TextField("PAGE_GRID_SE"),
         TextField('Rowspan'),
         TextField('Colspan')
     ]
     #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')])
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, index_name)
     status = 5
     return [status]
Exemplo n.º 8
0
 def __init__(self, urls: List[str], max_per_list: int = MAX_RES_PER_LIST):
     self.urls = urls
     self.client = Client(INDEX_NAME,
                          host=REDIS_HOST,
                          port=REDIS_PORT,
                          password=REDIS_PASSWORD)
     self.keys = Keys(KEY_PREFIX)
     self.max = max_per_list
Exemplo n.º 9
0
 def __init__(self, args):
     self.index = args.index
     self.client = Client(self.index, host=args.host, port=args.port)
     self.redis = redis.Redis(args.host, args.port)
     self.fields = []
     info = self.client.info()['fields']
     for f in info:
         self.fields.append(f[0])
Exemplo n.º 10
0
def build_ipa_index():
    start_time = time.time()
    rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', ''))
    rs_client = Client('IPAIndex', conn=rc)

    print(
        'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it',
        flush=True)
    ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt'
    ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str)

    print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True)
    ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt'
    ipa_index_ou = pd.read_csv(ipa_index_ou_url,
                               sep='\t',
                               na_values=['da_indicare', '*****@*****.**'],
                               dtype=str)
    ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou']
                                    == 'Ufficio_Transizione_Digitale']

    try:
        rs_client.drop_index()
    except:
        pass  # Index already dropped

    rs_client.create_index([
        TextField('ipa_code', weight=2.0),
        TextField('name', weight=2.0, sortable=True),
        TextField('site'),
        TextField('pec'),
        TextField('city', weight=1.4),
        TextField('county'),
        TextField('region'),
        TagField('type'),
        TextField('rtd_name'),
        TextField('rtd_pec'),
        TextField('rtd_mail'),
    ])
    print('Created index `IPAIndex`', flush=True)

    print('Feeding `IPAIndex` with data from `amministrazioni.txt`',
          flush=True)
    for index, row in ipa_index_amm.iterrows():
        rs_client.add_document(row['cod_amm'],
                               language='italian',
                               replace=True,
                               **get_ipa_amm_item(row))

    print('Feeding `IPAIndex` with data from `ou.txt`', flush=True)
    for index, row in ipa_index_ou.iterrows():
        rs_client.add_document(row['cod_amm'],
                               partial=True,
                               **get_ipa_rtd_item(row))

    finish_time = time.time()
    print('`IPAIndex` build completed in {0} seconds'.format(
        round(finish_time - start_time, 2)),
          flush=True)
Exemplo n.º 11
0
    def __init__(self, host=ip, port=port, db=db, autocomplete_name='Default'):
	self.client = Client(autocomplete_name,host,port)
        self.ipAdd = host 
        self.ipPort  = port
        self.db = db
        self.redisConn = redis.StrictRedis(host=self.ipAdd, port=self.ipPort, db=self.db)
        self.autocomplete = AutoCompleter(autocomplete_name, host, port)
        self.escape1 = re.compile(r'&#\d+;')
        self.escape2 = re.compile(r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~')
        self.escape3 = re.compile(r'\s+')
Exemplo n.º 12
0
 def __init__(self, table_name, host="localhost", port=6381):
     try:
         self.client = Client(table_name, host, port)
         self.host = host
         self.port = port
         self.table_name = table_name
         self.redis = Redis()
         self.LIMIT = 10
     except Exception as e:
         print 'yyy'
         print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n"
         print >> sys.stderr, e
Exemplo n.º 13
0
 def __init__(self, args):
     self.host = args.host
     self.port = args.port
     self.index = args.index
     self.file = open(args.file, 'r')
     self.delimiter = args.delimiter
     self.rows = args.rows
     self.hasHeader = args.header
     self.ignore = args.ignore
     self.docid = args.docid
     self.client = Client(self.index, self.host, self.port)
     self.fields = self.client.info()['fields']
Exemplo n.º 14
0
 def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None:
     try:
         self._client = Client(idx_name, host=hostname, port=port)
         self._auto_compl = AutoCompleter(idx_name, hostname, port=port)
         self._hostname = hostname
         self._port = port
         self._idx = idx_name
         self._ready = True
         LOGGER.info("Cache engine is ready")
     except:
         self._client = None
         LOGGER.error("Cache engine is faulty!")
Exemplo n.º 15
0
 def __init__(self):
     self.redis_info = config.Config.redis_info
     self.ip, self.port, self.db = self.redis_info["host"], self.redis_info[
         "port"], self.redis_info["db"]
     index_name = self.redis_info["tb_name"]
     self.client = Client(index_name, self.ip, self.port)
     #self.rd_con = self.make_redis_connection()
     self.escape1 = re.compile(r'&#\d+;')
     self.escape2 = re.compile(
         r',|\.|<|>|{|}|[|]|"|\'|:|;|!|@|#|\$|%|\^|&|\*|\(|\)|-|\+|=|~')
     self.escape3 = re.compile(r'\s+')
     pass
Exemplo n.º 16
0
 def create_index(cls):
     error_message = "Unable to create Index. Try Again"
     redis_enabled = os.getenv("REDIS_SEARCH", False)
     if redis_enabled:
         client = Client("tower", port=6379, host=os.getenv('REDIS_HOST'))
         try:
             client.create_index(document)
             cls.build_index(client)
             print("Watcher Index created successfully")
         except ResponseError as err:
             print(err)
     else:
         print(error_message)
Exemplo n.º 17
0
def before_request():
    g.redis = redis.StrictRedis(
        host=app.config['REDIS_HOST'],
        port=app.config['REDIS_PORT'],
    )
    g.rsbeer = Client(
        'beerIdx',
        conn=g.redis
    )
    g.rsbrewery = Client(
        'breweryIdx',
        conn=g.redis
    )
Exemplo n.º 18
0
def refresh_search_keys(request):
    if (request.user.is_authenticated() and request.user.is_staff):
        client = Client('productIndex')
        total_old_docts = client.info()['num_docs']
        delete_status = client.drop_index()
        new_index = False
        if delete_status == 'OK':
            new_index = create_product_search_index()
        auto_completer = AutoCompleter('productAutocompleter')
        auto_completer_old_count = auto_completer.len()
        create_product_autocompleter()
        auto_completer_new_count = auto_completer.len()
        return JsonResponse({'success': True})
    else:
        return JsonResponse({'success': False})
Exemplo n.º 19
0
class RandomWikipediaImport(object):

    def __init__(self):
        self.rs = Client('wikipedia')
        self.rs.create_index((TextField('title', weight=5.0), TextField('body')))
        print(f'>>> Created index')

    def insert_random_loop(self):
        i = 1
        while True:
            ra = wikipedia.random()
            article = wikipedia.page(ra)
            self.rs.add_document(f'doc{i}', title=article.title, body=article.content)
            print(f'>>> Inserted {article.title}')
            i += 1
 def __init__(self, args):
     self.host = args.host
     self.port = args.port
     self.search = False
     self.index = args.index
     if self.index is not None:
         self.search = True
         self.search_client  = Client(self.index, self.host, self.port)
         self.info = self.search_client.info()['fields']
     self.file = open(args.file, 'r')
     self.delimiter = args.delimiter
     self.rows = args.rows
     self.ignore = args.ignore
     self.docid = args.docid
     self.client = redis.Redis(args.host, args.port)
     self.fields = [] 
Exemplo n.º 21
0
 def get_indexed_client(self):
     return Client(
         "idx:stock",
         host=self.hostname,
         port=self.port,
         password=self.password,
     )
Exemplo n.º 22
0
def searchdb(search_content):
    global total
    client = Client("BoxGroup", port=6379)
    search_content = ' '.join(jieba.cut(search_content))
    q = Query(search_content).verbatim().paging(0, 500)
    res = client.search(q)
    total = res.total
    titlelist = []
    i = 0
    while i < res.total:
        titlelist.append(res.docs[i].title)
        i += 1
    if res.total > 0:
        return titlelist
    elif res.total == 0:
        return "No result found"
Exemplo n.º 23
0
class CacheEngine:
    def __init__(self, hostname: str, idx_name: str, port=6379) -> None:
        self._ready = False
        self._setup_client(hostname, idx_name, port)

    def _setup_client(self, hostname: str, idx_name: str, port=6379) -> None:
        try:
            self._client = Client(idx_name, host=hostname, port=port)
            self._auto_compl = AutoCompleter(idx_name, hostname, port=port)
            self._hostname = hostname
            self._port = port
            self._idx = idx_name
            self._ready = True
            LOGGER.info("Cache engine is ready")
        except:
            self._client = None
            LOGGER.error("Cache engine is faulty!")

    def add_doc(self, doc_id: str, data: dict) -> Any:
        if dict is None:
            return False
        results = self._client.redis.hset(doc_id, mapping=data)
        return results

    def search(self, text_to_search: str) -> Result:
        results: Result = self._client.search(text_to_search)
        return results

    def get_doc(self, doc_id) -> Document:
        try:
            data = self._client.load_document(doc_id)
            return data
        except:
            return None

    def add_suggestion(self, suggestion) -> bool:
        results = None
        try:
            results = self._auto_compl.add_suggestions(Suggestion(suggestion))
        except:
            return False
        return True

    def get_suggestion(self, str_to_suggest: str) -> List:
        suggs = self._auto_compl.get_suggestions(str_to_suggest,
                                                 fuzzy=len(str_to_suggest) > 3)
        return suggs
Exemplo n.º 24
0
 def search(cls, query, offset=0, paginate=10):
     client = Client("tower", port=6379, host=os.getenv('REDIS_HOST'))
     q = Query(query).paging(offset, paginate)
     res = client.search(q)
     result = []
     for doc in res.docs:
         value_dict = {
             'id': doc.id,
             'client_ip': doc.clientIp,
             'service': doc.service,
             'error_message': doc.errorMessage,
             'stack_trace': doc.stackTrace,
             'numberRange': doc.numberRange
         }
         result.append(value_dict)
     print(res)
     return result
Exemplo n.º 25
0
class RediSearchClient(object):
    def __init__(self, index_name):
        self.client = Client(index_name)
        self.index_name = index_name

    def build_index(self, line_doc_path, n_docs):
        line_pool = LineDocPool(line_doc_path)

        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([TextField('title'), TextField('url'), TextField('body')])

        for i, d in enumerate(line_pool.doc_iterator()):
            self.client.add_document(i, nosave = True, title = d['doctitle'],
                    url = d['url'], body = d['body'])

            if i + 1 == n_docs:
                break

            if i % 1000 == 0:
                print "{}/{} building index".format(i, n_docs)

    def search(self, query):
        q = Query(query).paging(0, 5).verbatim()
        res = self.client.search(q)
        # print res.total # "1"
        return res
Exemplo n.º 26
0
 def start(self, data, doc_id, company, project):
     status = 1
     index_name = project + "_DOCUMENT_" + str(doc_id)
     self.drop_index()
     self.client = Client(index_name, self.host, self.port)
     status = 2
     schema = [
         NumericField('INDEX'),
         TextField('DATA'),
         TextField('PAGE'),
         TextField('BBOX')
     ]
     status = 3
     self.add_indexing_schema(schema)
     status = 4
     self.add_data(data, company, doc_id, project)
     status = 5
     return [status]
Exemplo n.º 27
0
    def clientpush(self):
        client = Client('Checkout')

        client.create_index([
            NumericField('Key'),
            TextField('UsageClass'),
            TextField('CheckoutType'),
            TextField('MaterialType'),
            NumericField('CheckoutYear'),
            NumericField('CheckoutMonth'),
            NumericField('Checkouts'),
            TextField('Title'),
            TextField('Creator'),
            TextField('Subjects'),
            TextField('Publisher'),
            TextField('PublicationYear')
        ])

        db_connection, _ = self.connect()
        cursor = db_connection.cursor()
        cursor.execute('SELECT * FROM customers')
        results = cursor.fetchall()
        i = 0
        for result in results:
            client.add_document('doc%s' % i,
                                Key=result[0],
                                UsageClass=result[1],
                                CheckoutType=result[2],
                                MaterialType=result[3],
                                CheckoutYear=result[4],
                                CheckoutMonth=result[5],
                                Checkouts=result[6],
                                Title=result[7],
                                Creator=result[8],
                                Subjects=result[9],
                                Publisher=result[10],
                                PublicationYear=result[11])
            i += 1
            print(i)
        res = client.search('BOOK')

        print("{}   {}".format(res.total, res.docs[0].Title))
        res1 = client.search("use")
        print(res1)
        q = Query('use').verbatim().no_content().paging(0, 5)
        res1 = client.search(q)
        print(res1)
        cursor.close()
        db_connection.close()
Exemplo n.º 28
0
class EventProcessor():
    def __init__(self):
        self.r = redis.from_url(config.EVENT_BROKER_URL)
        self.client = Client('CCTV_DATA')
        try:
            self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')])
        except Exception as error:
            print("Error while creatign index", error)

        # self.client.create_index([TextField('title', weight=5.0), TextField('body')])

    def get_objects_in_image(self, image):
        # TODO: call RedisAI module
        objects = [
            "key", "passport", "wallet", "car", "bag", "watch", "book",
            "satchel", "laptop", "camera", "mobile_phone"
        ]
        tags = []
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])

        return tags

    def process(self, msg):
        print("Going to process message and and store it", msg)
        # print(float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"])
        # print(type(float(msg["LON"])), type(float(msg["LAT"])), msg["CCTV_ID"])
        try:
            self.r.geoadd("CCTV_LOCATION", float(msg["LON"]),
                          float(msg["LAT"]), msg["CCTV_ID"])
            msg["TAGS"] = self.get_objects_in_image(msg.get("IMAGE", ""))
            # print("Going to store this in search", msg)

            doc_unique_key = msg["CCTV_ID"] + "_" + msg["TS"]

            self.client.add_document(doc_unique_key,
                                     CCTV_ID=doc_unique_key,
                                     TAGS=",".join(msg["TAGS"]))

        except Exception as error:
            print("Error while adding ccty data", error)
Exemplo n.º 29
0
def product_search(query, limit=10, fuzzy_search=True):
    search_results = {"from_redisearch": True, "results": []}

    if not is_redisearch_enabled():
        # Redisearch module not enabled
        search_results["from_redisearch"] = False
        search_results["results"] = get_product_data(query, 0, limit)
        return search_results

    if not query:
        return search_results

    red = frappe.cache()
    query = clean_up_query(query)

    # TODO: Check perf/correctness with Suggestions & Query vs only Query
    # TODO: Use Levenshtein Distance in Query (max=3)
    ac = AutoCompleter(make_key(WEBSITE_ITEM_NAME_AUTOCOMPLETE), conn=red)
    client = Client(make_key(WEBSITE_ITEM_INDEX), conn=red)
    suggestions = ac.get_suggestions(
        query,
        num=limit,
        fuzzy=fuzzy_search
        and len(query) > 3  # Fuzzy on length < 3 can be real slow
    )

    # Build a query
    query_string = query

    for s in suggestions:
        query_string += f"|('{clean_up_query(s.string)}')"

    q = Query(query_string)

    results = client.search(q)
    search_results["results"] = list(map(convert_to_dict, results.docs))
    search_results["results"] = sorted(
        search_results["results"],
        key=lambda k: frappe.utils.cint(k["ranking"]),
        reverse=True)

    return search_results
Exemplo n.º 30
0
def index():
    client = Client('sh')
#    client.drop_index()
    client.create_index(txt=1.0)
    chapters = {}
    with open('will_play_text.csv') as fp:

        r = csv.reader(fp, delimiter=';')
        for line in r:
            #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."]

            play, chapter, character, text = line[1], line[2], line[4], line[5]

            d = chapters.setdefault('{}:{}'.format(play, chapter), {})
            d['play'] = play
            d['text'] = d.get('text', '') + ' ' + text

    for chapter, doc in chapters.iteritems():
        print chapter, doc
        client.add_document(chapter, nosave=True, txt=doc['text'])
Exemplo n.º 31
0
class CSVImporter:
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.index = args.index
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.hasHeader = args.header
        self.ignore = args.ignore
        self.docid = args.docid
        self.client = Client(self.index, self.host, self.port)
        self.fields = self.client.info()['fields']

    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        if self.hasHeader == True:
            next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row)
            n += 1
        print('Finished loading ' + str(n) + ' rows.')

    def addRow(self, row):
        args = {}
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue
            args[self.fields[fieldnum][0]] = val
            fieldnum += 1

        doc = 'doc' + str(idx)
        if self.docid > 0:
            doc = row[self.docid - 1]
        self.client.add_document(doc, replace=True, **args)