Exemplo n.º 1
0
def index_size(index_name):
    # Creating a client with a given index name
    # print("in index_size index_name=" + index_name)
    client = Client(index_name, 'redis', 6379)
    all_info = client.info()
    # print(all_info)
    return all_info
Exemplo n.º 2
0
def refresh_search_keys(request):
    if (request.user.is_authenticated() and request.user.is_staff):
        client = Client('productIndex')
        total_old_docts = client.info()['num_docs']
        delete_status = client.drop_index()
        new_index = False
        if delete_status == 'OK':
            new_index = create_product_search_index()
        auto_completer = AutoCompleter('productAutocompleter')
        auto_completer_old_count = auto_completer.len()
        create_product_autocompleter()
        auto_completer_new_count = auto_completer.len()
        return JsonResponse({'success': True})
    else:
        return JsonResponse({'success': False})
Exemplo n.º 3
0
class CSVImporter:
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.index = args.index
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.hasHeader = args.header
        self.ignore = args.ignore
        self.docid = args.docid
        self.client = Client(self.index, self.host, self.port)
        self.fields = self.client.info()['fields']

    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        if self.hasHeader == True:
            next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row)
            n += 1
        print('Finished loading ' + str(n) + ' rows.')

    def addRow(self, row):
        args = {}
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue
            args[self.fields[fieldnum][0]] = val
            fieldnum += 1

        doc = 'doc' + str(idx)
        if self.docid > 0:
            doc = row[self.docid - 1]
        self.client.add_document(doc, replace=True, **args)
Exemplo n.º 4
0
    def get(self, request):
        # data=request.data
        mes = {}
        search_key = request.GET.get('key')
        print(search_key)
        all_classes = Course.objects.all()
        print("开始创建索引——————————————————————————")
        # 创建一个客户端与给定索引名称
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        # 创建索引定义和模式
        client.create_index((TextField('title'), TextField('body')))
        print('索引创建完毕————————————————————————————————')
        print('开始添加数据————————————————————————————————')

        for i in all_classes:
            print(str(i.id) + str(i.title))
            # 索引文
            client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info,
                                language='chinese')
            print(333333333)
        print('数据添加完毕————————————————————————————————')
        print(client.info())
        # 查找搜索
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        res = client.search(search_key)
        print('查询结束————————————————————————————————————————————————')
        id_list = []
        print(res.docs)
        for i in res.docs:
            # print(i.title)  # 取出title,以@切割,取课程ID查询,然后序列化展示
            id = i.title.split('@')[1]
            id_list.append(id)
        course = Course.objects.filter(id__in=id_list).all()
        c = CourseSerializersModel(course, many=True)
        mes['course'] = c.data
        mes['code'] = 200
        mes['message'] = '搜索完毕'
        return Response(mes)
Exemplo n.º 5
0
    )
    parser_results = parser.parse_args()

    if parser_results.open:
        s = str(subprocess.check_output('docker ps -a', shell=True))
        if s.__contains__('amadeus'):
            print(
                'Already there are running a redis server with the redisearch module installed'
            )
            exit()
        open_redis()
        exit()

    client = Client('cveIndex')
    try:
        client.info()
    except Exception as e:
        if e.args[0] != 'Unknown Index name':
            print(
                'You must be running a redis server with the redisearch module installed'
            )
            exit()

    if parser_results.close:
        close_redis()
        exit()

    if parser_results.a and parser_results.target is None:
        parser.error(
            '-a requires -t to specify the target to which apply the analysis')
class CSVImporter: 
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.search = False
        self.index = args.index
        if self.index is not None:
            self.search = True
            self.search_client  = Client(self.index, self.host, self.port)
            self.info = self.search_client.info()['fields']
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.ignore = args.ignore
        self.docid = args.docid
        self.client = redis.Redis(args.host, args.port)
        self.fields = [] 
        
    def addRow(self, row, num):
        values = dict()
        row_id = row[0]
        geo_id = 'zip-'
        geo_pos = ''
        lat = 0
        lon = 0
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.fields[idx - 1] == 'regionidzip':
                geo_id += val
            if self.fields[idx - 1] == 'latitude':
                lat = float(val) / 1000000
            if self.fields[idx - 1] == 'longitude':
                lon = float(val) / 1000000

            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue

            if self.search == True and self.info[fieldnum][2] == 'NUMERIC' and val == '':
                val = '0'
 
            values[self.fields[idx - 1]] = val
            fieldnum += 1       
        values['geopos'] = str(lon) + ',' + str(lat)
        geo_vals = [lon, lat, row_id]
            
        self.client.geoadd(geo_id, *geo_vals)
        if self.search == True:
            doc = 'doc-' + str(num)
            if self.docid > 0:
                doc = row[self.docid - 1]
            
            self.search_client.add_document(doc, replace=True, **values)
        else:
            self.client.hmset(row_id, values)

    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        self.fields = next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row, n)
            n += 1

        print('Finished loading ' + str(n) + ' rows.')
Exemplo n.º 7
0
class CSVImporter: 
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.index = args.index
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.hasHeader = args.header
        self.ignore = args.ignore
        self.docid = args.docid
        self.nosave = args.nosave
        self.date = args.date
        self.format = args.format
        self.client  = Client(self.index, self.host, self.port)
        self.fields = self.client.info()['fields']
    
    def dateToMillis(self, val):
        try:
            d = datetime.strptime(val, self.format)
        except ValueError:
            print("Invalid data format: " + val)
            return 0
        return str(int(d.strftime('%s')) * 1000)
    
    def adjustTagValue(self, val, sep):
        i = 0
        insinglequotes = False
        indoublequotes = False
        newTag = False
        newVal = ''
        while i < len(val):
            if val[i] == '\'' and not indoublequotes :
                insinglequotes = not insinglequotes
            elif val[i] == '"' and not insinglequotes:
               indoublequotes = not indoublequotes;
            else: 
                if val[i] == ',' and not insinglequotes and not indoublequotes:
                    newVal += sep
                else:
                    newVal += val[i]
            i += 1
        newVal = re.sub('[\t ]*' + sep + '[\t ]*', sep, newVal)
        return re.sub('[\[\]]', '', newVal)
                
        
    def addRow(self, row, num):
        args = {}
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue
            if self.date is not None and idx in self.date:
                val = self.dateToMillis(val)
            if self.fields[fieldnum][2] == 'TAG':
                val = self.adjustTagValue(val, self.fields[fieldnum][4]) 
            args[self.fields[fieldnum][0]] = val
            fieldnum += 1
        
        doc = 'doc-' + str(num)
        if self.docid > 0:
            doc = row[self.docid - 1]
        self.client.add_document(doc, replace=True, nosave=self.nosave, **args)
         
    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        if self.hasHeader == True:
            next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row, n)
            n += 1
        print('Finished loading ' + str(n) + ' rows.')
Exemplo n.º 8
0
class SearchDemo:
    def __init__(self, args):
        self.index = args.index
        self.client = Client(self.index, host=args.host, port=args.port)
        self.redis = redis.Redis(args.host, args.port)
        self.fields = []
        info = self.client.info()['fields']
        for f in info:
            self.fields.append(f[0])

    def printSearchResults(self, total, docs, headers, duration, withid=False):
        print('Number of results: ' + str(total))
        if total == 0:
            return
        data = []
        for d in docs:
            row = []
            if withid == True:
                row.append(d.id)
            for f in headers:
                val = ''
                try:
                    val = getattr(d, f)
                except AttributeError:
                    pass
                if val is not None:
                    val = val.decode('utf-8')
                row.append(val)
            data.append(row)
        if withid == True:
            headers.insert(0, 'Document ID')
        print(tabulate(data, headers, tablefmt='grid', floatfmt=".2f"))
        print('Execution time : ' + str(duration) + ' ms')

    def getHeadersFromRow(self, row):
        headers = []
        i = 0
        for t in row:
            if i % 2 == 0:
                headers.append(t)
            i += 1
        return headers

    def getValuesFromRow(self, row, ignore):
        values = []
        i = 0
        for t in row:
            if i % 2 == 1 and i not in ignore:
                if t is not None:
                    t = t.decode('utf-8')
                values.append(t)
            i += 1
        return values

    def getIgnoreList(self, headers):
        ignore = []
        ret = headers[:]
        i = 0
        for h in headers:
            if h.startswith('__generated_alias'):
                ignore.append(i + 1)
                ret.remove(h)
            i += 2
        return ret, ignore

    def printAggregateResult(self, res, duration, ignore_generated=True):
        num_results = res[0]
        print('Number of results: ' + str(res[0]))
        if num_results == 0:
            return
        rows = res[1:]
        headers = self.getHeadersFromRow(rows[0])
        ignore = []
        if ignore_generated == True:
            headers, ignore = self.getIgnoreList(headers)
        data = []
        for r in rows:
            data.append(self.getValuesFromRow(r, ignore))
        print(tabulate(data, headers, tablefmt='grid', floatfmt=".2f"))
        print('Execution time : ' + str(duration) + ' ms')

    def queryToArgs(self, query):
        query = " ".join(query.split())
        data = StringIO(query)
        reader = csv.reader(data, delimiter=' ')
        return next(reader)

    def readQuery(self):
        query = raw_input('Enter query:')
        return query.strip()

    def executeQuery(self, query):
        res = None
        parts = self.queryToArgs(query)
        try:
            st = datetime.datetime.now()
            res = self.redis.execute_command(*parts)
            duration = (datetime.datetime.now() - st).total_seconds() * 1000
        except redis.exceptions.ResponseError, e:
            print('Error: ' + str(e))
            return

        nocontent = res[0] == 0 or not isinstance(res[2], (list, ))
        if parts[0].upper().startswith('FT.SEARCH'):
            results = Result(res,
                             not nocontent,
                             duration=duration,
                             has_payload=False)
            if nocontent == True:
                self.printSearchResults(results.total, results.docs, [],
                                        duration, True)
            else:
                self.printSearchResults(results.total, results.docs,
                                        self.getHeadersFromRow(res[2]),
                                        duration, True)
        else:
            self.printAggregateResult(res, duration)
Exemplo n.º 9
0
        if title:
                title = title[0]
        else:
                title = ""
                
        abstract = re.findall('<Abstract>([\s\S]*?)</Abstract>', r)
        if abstract:
                abstract = re.sub("\n\s*", "", abstract[0])
                abstract = re.sub('<AbstractText Label="(.*?)".*?>', " \\1: ", abstract)
                abstract = re.sub("<\/*Abstract.*?>", "", abstract)
                abstract = re.sub("<Copyright.*?>.*</Copyright.*?>", "", abstract)
                abstract = re.sub("\(PsycINFO Database Record", "", abstract)
        else:
                abstract = ""

        # type is ignored for now, but for future reference...
        type = re.findall("<PublicationType UI=.*?>(.*?)</PublicationType>", r)
        if type:
                type = str(type)
        else:
                type = str([])
                
        indexer.add_document(pmid, replace=True, abstract=abstract, title=title, type=type)

# flush any remaining documents
indexer.commit()        

docs = client.info()['num_docs']

logging.info(datetime.now().isoformat() + " imported " + str(count) + " records from " + sys.argv[1] + "(" + docs + " total)")
Exemplo n.º 10
0
class TAS_Redisearch():

    #Constructor
    def __init__(self, table_name, host="localhost", port=6381):
        try:
            self.client = Client(table_name, host, port)
            self.host = host
            self.port = port
            self.table_name = table_name
            self.redis = Redis()
            self.LIMIT = 10
        except Exception as e:
            print 'yyy'
            print >> sys.stderr, "TAS_Redisearch Error inside Constructor Index:\'", table_name, "\' HOST:\'", host, "\' PORT:\'", port, "\'\n"
            print >> sys.stderr, e

    #Will set the no of results to show
    def set_result_limit(self, num):
        self.LIMIT = num
        return

    #Defines the schema for Redisearch
    def set_schema(self, schema):
        try:
            return self.client.create_index(
                schema, False, False, []
            )  #last empty list will ensure that default stopwords will not be ignored
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside set_schema Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes index(table)
    def drop_index(self):
        try:
            return self.client.drop_index()
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside drop_index Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Deletes a document(row) by document_index
    def delete_document(self, document_index):
        try:
            return self.client.delete_document(document_index)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside delete_document Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #############################################SEARCHES BELOW#######################################

    #Uses python libraries
    def py_search(self, query, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        try:
            return self.client.search(Query(query).paging(0, result_limit))
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside py_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print sys.stderr, e

    #Search with default parameters [will return dictionary]
    def generic_search(self, search_text, result_limit=-1):
        if result_limit == -1:
            result_limit = self.LIMIT
        query_string = "FT.SEARCH " + self.table_name + " " + search_text + " LIMIT 0 " + str(
            result_limit)
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside generic_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def free_exact_search(self, key, result_limit=-1):
        org_key = key
        l = []
        try:
            if result_limit == -1:
                result_limit = self.LIMIT
            key = self.clean_string(key)
            returned = self.py_search("*", result_limit)
            for result in returned.docs:
                result_dict = vars(result)
                if org_key in result_dict.values():
                    l.append(result_dict)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside value_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return l

    #{fieldname:[value1, value2], fieldname:[value1, value2]}
    def exact_search(self, input_dict, result_limit=-1):
        formed_str = ""
        l = []
        for field, value_list in input_dict.items():
            formed_str += "@" + field + ":("
            for key in value_list:
                key = self.clean_string(key)
                formed_str += "(\'" + key + "\') | "
            formed_str = formed_str.rstrip(' |')
            formed_str += ") "
        print "PASSED: ", formed_str
        returned = self.py_search(formed_str, result_limit)
        print "RETURNED:", returned
        for result in returned.docs:
            result_dict = vars(result)
            for itr, ktr in input_dict.items():
                if result_dict[itr] in ktr:
                    l.append(result_dict)

        return l

    #Search with the passed query
    def custom_search(self, query_string):
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside custom_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #Search in 'search_in_field' [if any of the element in 'list_to_union' is found then include it in the result
    def union_search(self, list_to_union, search_in_field):
        query_string = "FT.SEARCH " + self.table_name + " "
        union_text = "@" + search_in_field + ":("
        for text in list_to_union:
            union_text += text + "|"

        union_text = union_text.rstrip("|")
        union_text += ")"
        query_string += union_text
        try:
            res = self.redis.execute_command(query_string)
            return Result(res, True)
        except Exception as e:
            print >> sys.stderr, "TAS_Redisearch Error inside union_search Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    #will return all the dictionary for all the categories if no arguments are passed
    def category_taxonomy_dict(self, category='*'):
        try:
            cat_taxo_dict = {}
            total_docs = self.client.info()['num_docs']
            query_string = ""
            if category == '*':
                query_string = category
            else:
                query_string = "@CATEGORY:" + category
            result = self.py_search(query_string, total_docs)
            for single_result in result.docs:
                try:
                    category = single_result.CATEGORY
                    taxoname = single_result.TAXONAME
                except Exception as ex:
                    pass
                if not category in cat_taxo_dict:
                    cat_taxo_dict[category] = []
                elif taxoname not in cat_taxo_dict[category]:
                    cat_taxo_dict[category].append(taxoname)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside category_taxonomy_dict Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e
        return cat_taxo_dict

    def total_record(self):
        try:
            return int(self.client.info()['num_docs'])
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def get_all_records(self):
        try:
            total = str(self.total_record())
            res = self.redis.execute_command("FT.SEARCH " + self.table_name +
                                             " * LIMIT 0 " + total)
            return Result(res, True)
        except Exception as e:
            sys.stderr, "TAS_Redisearch Error inside total_records Index:\'", self.table_name, "\' HOST:\'", self.host, "\' PORT:\'", self.port, "\'\n"
            print >> sys.stderr, e

    def clean_string(self, key):
        key = key.replace(',', ' ')
        key = key.replace('.', ' ')
        key = key.replace('<', ' ')
        key = key.replace('>', ' ')
        key = key.replace('{', ' ')
        key = key.replace('}', ' ')
        key = key.replace('[', ' ')
        key = key.replace(']', ' ')
        key = key.replace('"', ' ')
        key = key.replace('\'', ' ')
        key = key.replace(':', ' ')
        key = key.replace(';', ' ')
        key = key.replace('!', ' ')
        key = key.replace('@', ' ')
        key = key.replace('#', ' ')
        key = key.replace('$', ' ')
        key = key.replace('%', ' ')
        key = key.replace('^', ' ')
        key = key.replace('&', ' ')
        key = key.replace('*', ' ')
        key = key.replace('(', ' ')
        key = key.replace(')', ' ')
        key = key.replace('-', ' ')
        key = key.replace('+', ' ')
        key = key.replace('=', ' ')
        key = key.replace('~', ' ')

        return key
Exemplo n.º 11
0
import json
from redisearch import Client, TextField, TagField

with open('wocka.json', 'r') as f:
    jokes = json.load(f)

hostname = 'redis-17235.laurent.cs.redislabs.com'
port = 17235

client = Client('jokes', hostname, port)
client.create_index((TextField('title'), TextField('body'),
                     TextField('category'), TagField('label')))

for joke in jokes:
    client.add_document(joke['id'],
                        title=joke['title'],
                        body=joke['body'],
                        category=joke['category'],
                        label=joke['category'])

print("number of jokes in the json file: " + str(len(jokes)))

info = client.info()
print(info)