Python Client.add_document示例，redisearch.Client.add_document Python示例

示例#1

0

显示文件

文件： views.py 项目： abinash-kumar/pythod

def create_product_search_index_async():
    print 'Creating Search Index'
    client = Client('productIndex')
    client.create_index([
        TextField('title', weight=5.0),
        TextField('description'),
        TextField('tags'),
        TextField('category')
    ])
    products = Product.objects.filter(active=True)
    cache.set('Search_index_total', len(products), timeout=None)
    index = 0
    for product in products:
        title = product.name
        description = product.description
        category = ','.join([cat.name for cat in product.category.all()])
        tag = product.tags
        tag_maps = ProductTagMap.objects.filter(product=product)
        for tag_map in tag_maps:
            tag = tag + tag_map.tag.tag + ' '
        category_varients = []
        for pv in ProductVarientList.objects.filter(product=product):
            for cv in pv.key.all():
                category_varients.append(cv.value)
        tag += ' '.join(list(set(category_varients)))
        client.add_document(str(product.id),
                            title=title,
                            description=description,
                            tags=tag,
                            category=category)
        cache.set('Search_index_index', index, timeout=None)
        index += 1
    return True

示例#2

0

显示文件

class RediSearchClient(object):
    def __init__(self, index_name):
        self.client = Client(index_name)
        self.index_name = index_name

    def build_index(self, line_doc_path, n_docs):
        line_pool = LineDocPool(line_doc_path)

        try:
            self.client.drop_index()
        except:
            pass

        self.client.create_index([TextField('title'), TextField('url'), TextField('body')])

        for i, d in enumerate(line_pool.doc_iterator()):
            self.client.add_document(i, nosave = True, title = d['doctitle'],
                    url = d['url'], body = d['body'])

            if i + 1 == n_docs:
                break

            if i % 1000 == 0:
                print "{}/{} building index".format(i, n_docs)

    def search(self, query):
        q = Query(query).paging(0, 5).verbatim()
        res = self.client.search(q)
        # print res.total # "1"
        return res

示例#3

0

显示文件

文件： populate_db.py 项目： 6rotoms/igdb-pdt

def cache_to_redis(data: dict):
    if REDIS_HOSTNAME == '':
        print('REDIS_HOSTNAME environment variable is not set')
        return
    client = Client('games', host=REDIS_HOSTNAME, port=REDIS_PORT)
    indexCreated = False
    maxAltNames = len(max(data.values(), key=lambda d: len(d['alt_names']))['alt_names'])
    while not indexCreated:
        try:
            client.create_index([TextField('name', weight=10),
                                *[TextField('alt_name_%d' % i, weight=10) for i in range(maxAltNames)],
                                TextField('summary', weight=1)],
                                TextField('cover', weight=0),
                                TextField('thumb', weight=0))
            indexCreated = True
        except Exception:
            print('Failed to create index, retrying %s')
            time.sleep(3)

    for k, v in data.items():
        client.add_document(k,
                            name=v['name'],
                            **{'alt_name_%d' % i: n for i, n in enumerate(v['alt_names'])},
                            cover=v['cover'],
                            thumb=v['thumb'],
                            summary=v['summary'])
    print('done')

示例#4

0

显示文件

def test():
    # Creating a client with a given index name
    client = Client('myIndex')

    # Creating the index definition and schema
    client.drop_index()
    client.create_index([TextField('title', weight=5.0), TextField('body')])

    # Indexing a document
    client.add_document(
        'doc1',
        title='RediSearch',
        body='Redisearch implements a search engine on top of redis')

    # Simple search
    res = client.search("search engine")

    # the result has the total number of results, and a list of documents
    print res.total  # "1"
    print res.docs[0]

    # Searching with snippets
    # res = client.search("search engine", snippet_sizes={'body': 50})

    # Searching with complex parameters:
    q = Query("search engine").verbatim().no_content().paging(0, 5)
    res = client.search(q)

示例#5

0

显示文件

class TAS_Import():
    def __init__(self, index_name, host="172.16.20.7", port=6382, db=0):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        self.index_name = index_name
        self.redis = Redis()

    def add_indexing(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, data):
        for i, rr in enumerate(data):
            index = i + 1
            print rr
            name, age, location = rr['name'], rr['age'], rr['location']
            self.client.add_document(index,
                                     NAME=name,
                                     AGE=age,
                                     LOCATION=location)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except:
            pass

示例#6

0

显示文件

def build_ipa_index():
    start_time = time.time()
    rc = redis.Redis(password=os.environ.get('REDIS_PASSWORD', ''))
    rs_client = Client('IPAIndex', conn=rc)

    print(
        'Getting file `amministrazioni.txt` from https://www.indicepa.gov.it',
        flush=True)
    ipa_index_amm_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=amministrazioni.txt'
    ipa_index_amm = pd.read_csv(ipa_index_amm_url, sep='\t', dtype=str)

    print('Getting file `ou.txt` from https://www.indicepa.gov.it', flush=True)
    ipa_index_ou_url = 'https://www.indicepa.gov.it/public-services/opendata-read-service.php?dstype=FS&filename=ou.txt'
    ipa_index_ou = pd.read_csv(ipa_index_ou_url,
                               sep='\t',
                               na_values=['da_indicare', '*****@*****.**'],
                               dtype=str)
    ipa_index_ou = ipa_index_ou.loc[lambda ipa_index_ou: ipa_index_ou['cod_ou']
                                    == 'Ufficio_Transizione_Digitale']

    try:
        rs_client.drop_index()
    except:
        pass  # Index already dropped

    rs_client.create_index([
        TextField('ipa_code', weight=2.0),
        TextField('name', weight=2.0, sortable=True),
        TextField('site'),
        TextField('pec'),
        TextField('city', weight=1.4),
        TextField('county'),
        TextField('region'),
        TagField('type'),
        TextField('rtd_name'),
        TextField('rtd_pec'),
        TextField('rtd_mail'),
    ])
    print('Created index `IPAIndex`', flush=True)

    print('Feeding `IPAIndex` with data from `amministrazioni.txt`',
          flush=True)
    for index, row in ipa_index_amm.iterrows():
        rs_client.add_document(row['cod_amm'],
                               language='italian',
                               replace=True,
                               **get_ipa_amm_item(row))

    print('Feeding `IPAIndex` with data from `ou.txt`', flush=True)
    for index, row in ipa_index_ou.iterrows():
        rs_client.add_document(row['cod_amm'],
                               partial=True,
                               **get_ipa_rtd_item(row))

    finish_time = time.time()
    print('`IPAIndex` build completed in {0} seconds'.format(
        round(finish_time - start_time, 2)),
          flush=True)

示例#7

0

显示文件

文件： suggestion_completion_data.py 项目： src-x/what-the-flick

def insert():
    # insertion of search/suggestion data
    suggestion_client = Client('movie')
    suggestion_client.create_index([TextField('title'), TagField('genres', separator = '|')])

    for i in range(0, len(movie_df)):
        suggestion_client.add_document(movie_df['tmdbId'][i], title = movie_df['title'][i], genres = movie_df['genres'][i])

    # insertion of auto-completion data
    completion_client = AutoCompleter('ac')

    for i in range(0, len(movie_df)):
        completion_client.add_suggestions(Suggestion(movie_df['title'][i]))

示例#8

0

显示文件

    def clientpush(self):
        client = Client('Checkout')

        client.create_index([
            NumericField('Key'),
            TextField('UsageClass'),
            TextField('CheckoutType'),
            TextField('MaterialType'),
            NumericField('CheckoutYear'),
            NumericField('CheckoutMonth'),
            NumericField('Checkouts'),
            TextField('Title'),
            TextField('Creator'),
            TextField('Subjects'),
            TextField('Publisher'),
            TextField('PublicationYear')
        ])

        db_connection, _ = self.connect()
        cursor = db_connection.cursor()
        cursor.execute('SELECT * FROM customers')
        results = cursor.fetchall()
        i = 0
        for result in results:
            client.add_document('doc%s' % i,
                                Key=result[0],
                                UsageClass=result[1],
                                CheckoutType=result[2],
                                MaterialType=result[3],
                                CheckoutYear=result[4],
                                CheckoutMonth=result[5],
                                Checkouts=result[6],
                                Title=result[7],
                                Creator=result[8],
                                Subjects=result[9],
                                Publisher=result[10],
                                PublicationYear=result[11])
            i += 1
            print(i)
        res = client.search('BOOK')

        print("{}   {}".format(res.total, res.docs[0].Title))
        res1 = client.search("use")
        print(res1)
        q = Query('use').verbatim().no_content().paging(0, 5)
        res1 = client.search(q)
        print(res1)
        cursor.close()
        db_connection.close()

示例#9

0

显示文件

文件： load_random.py 项目： yuvadm/redisearch-wikipedia-test

class RandomWikipediaImport(object):

    def __init__(self):
        self.rs = Client('wikipedia')
        self.rs.create_index((TextField('title', weight=5.0), TextField('body')))
        print(f'>>> Created index')

    def insert_random_loop(self):
        i = 1
        while True:
            ra = wikipedia.random()
            article = wikipedia.page(ra)
            self.rs.add_document(f'doc{i}', title=article.title, body=article.content)
            print(f'>>> Inserted {article.title}')
            i += 1

示例#10

0

显示文件

class TAS_Import():
    def __init__(self, index_name, host=ip, port=port, db=db):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        #self.redis = Redis()

    def add_indexing_schema(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, rdata, company, doc_id, project):
        for i, rr in enumerate(rdata):
            index = doc_id + company + "CMDIC" + str(i + 1) + project
            l1, l2, l3 = rr
            l1 = config_obj.StringEscape(l1)
            self.client.add_document(index, DATA=l1, PAGE=l2, BBOX=l3)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except Exception as e:
            #print 'Error',e
            pass

    def start(self, data, doc_id, company, project):
        status = 1
        index_name = project + "_DOCUMENT_" + str(doc_id)
        self.drop_index()
        self.client = Client(index_name, self.host, self.port)
        status = 2
        schema = [
            NumericField('INDEX'),
            TextField('DATA'),
            TextField('PAGE'),
            TextField('BBOX')
        ]
        status = 3
        self.add_indexing_schema(schema)
        status = 4
        self.add_data(data, company, doc_id, project)
        status = 5
        return [status]

示例#11

0

显示文件

class EventProcessor():
    def __init__(self):
        self.r = redis.from_url(config.EVENT_BROKER_URL)
        self.client = Client('CCTV_DATA')
        try:
            self.client.create_index([TextField('CCTV_ID'), TagField('TAGS')])
        except Exception as error:
            print("Error while creatign index", error)

        # self.client.create_index([TextField('title', weight=5.0), TextField('body')])

    def get_objects_in_image(self, image):
        # TODO: call RedisAI module
        objects = [
            "key", "passport", "wallet", "car", "bag", "watch", "book",
            "satchel", "laptop", "camera", "mobile_phone"
        ]
        tags = []
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])
        tags.append(objects[r.randint(0, 10)])

        return tags

    def process(self, msg):
        print("Going to process message and and store it", msg)
        # print(float(msg["LON"]), float(msg["LAT"]), msg["CCTV_ID"])
        # print(type(float(msg["LON"])), type(float(msg["LAT"])), msg["CCTV_ID"])
        try:
            self.r.geoadd("CCTV_LOCATION", float(msg["LON"]),
                          float(msg["LAT"]), msg["CCTV_ID"])
            msg["TAGS"] = self.get_objects_in_image(msg.get("IMAGE", ""))
            # print("Going to store this in search", msg)

            doc_unique_key = msg["CCTV_ID"] + "_" + msg["TS"]

            self.client.add_document(doc_unique_key,
                                     CCTV_ID=doc_unique_key,
                                     TAGS=",".join(msg["TAGS"]))

        except Exception as error:
            print("Error while adding ccty data", error)

示例#12

0

显示文件

def index():
    client = Client('sh')
    #    client.drop_index()
    client.create_index(txt=1.0)
    chapters = {}
    with open('will_play_text.csv') as fp:

        r = csv.reader(fp, delimiter=';')
        for line in r:
            #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."]

            play, chapter, character, text = line[1], line[2], line[4], line[5]

            d = chapters.setdefault('{}:{}'.format(play, chapter), {})
            d['play'] = play
            d['text'] = d.get('text', '') + ' ' + text

    for chapter, doc in chapters.iteritems():
        print chapter, doc
        client.add_document(chapter, nosave=True, txt=doc['text'])

示例#13

0

显示文件

文件： shakespeare.py 项目： RedisLabsModules/RediSearch

def index():
    client = Client('sh')
#    client.drop_index()
    client.create_index(txt=1.0)
    chapters = {}
    with open('will_play_text.csv') as fp:

        r = csv.reader(fp, delimiter=';')
        for line in r:
            #['62816', 'Merchant of Venice', '9', '3.2.74', 'PORTIA', "I'll begin it,--Ding, dong, bell."]

            play, chapter, character, text = line[1], line[2], line[4], line[5]

            d = chapters.setdefault('{}:{}'.format(play, chapter), {})
            d['play'] = play
            d['text'] = d.get('text', '') + ' ' + text

    for chapter, doc in chapters.iteritems():
        print chapter, doc
        client.add_document(chapter, nosave=True, txt=doc['text'])

示例#14

0

显示文件

class CSVImporter:
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.index = args.index
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.hasHeader = args.header
        self.ignore = args.ignore
        self.docid = args.docid
        self.client = Client(self.index, self.host, self.port)
        self.fields = self.client.info()['fields']

    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        if self.hasHeader == True:
            next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row)
            n += 1
        print('Finished loading ' + str(n) + ' rows.')

    def addRow(self, row):
        args = {}
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue
            args[self.fields[fieldnum][0]] = val
            fieldnum += 1

        doc = 'doc' + str(idx)
        if self.docid > 0:
            doc = row[self.docid - 1]
        self.client.add_document(doc, replace=True, **args)

示例#15

0

显示文件

文件： watcher.py 项目： moluwole/WatchTower

    def save_item(self, watcher):
        client = Client("tower", port=6379, host=os.getenv('REDIS_HOST'))

        client.add_document(watcher.id,
                            clientIp=watcher.client_ip,
                            service=watcher.service,
                            errorMessage=watcher.error_message,
                            stackTrace=watcher.stack_trace,
                            numberRange=watcher.number_range,
                            dateTime=watcher.date_added.strftime("%Y-%m-%d"))

        payload = {
            "clientIp": watcher.client_ip,
            "service": watcher.service,
            "errorMessage": watcher.error_message,
            "stackTrace": watcher.stack_trace,
            "dateTime": watcher.date_added
        }

        if os.getenv('ENABLE_SLACK') == 'true':
            self.send_to_slack(payload)

示例#16

0

显示文件

文件： views.py 项目： ckjh/education

    def get(self, request):
        # data=request.data
        mes = {}
        search_key = request.GET.get('key')
        print(search_key)
        all_classes = Course.objects.all()
        print("开始创建索引——————————————————————————")
        # 创建一个客户端与给定索引名称
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        # 创建索引定义和模式
        client.create_index((TextField('title'), TextField('body')))
        print('索引创建完毕————————————————————————————————')
        print('开始添加数据————————————————————————————————')

        for i in all_classes:
            print(str(i.id) + str(i.title))
            # 索引文
            client.add_document('result' + str(datetime.now()), title=i.title + '@' + str(i.id), info=i.info,
                                language='chinese')
            print(333333333)
        print('数据添加完毕————————————————————————————————')
        print(client.info())
        # 查找搜索
        client = Client('CII' + str(datetime.now()), host=settings.SIP, port='6666')

        res = client.search(search_key)
        print('查询结束————————————————————————————————————————————————')
        id_list = []
        print(res.docs)
        for i in res.docs:
            # print(i.title)  # 取出title，以@切割，取课程ID查询，然后序列化展示
            id = i.title.split('@')[1]
            id_list.append(id)
        course = Course.objects.filter(id__in=id_list).all()
        c = CourseSerializersModel(course, many=True)
        mes['course'] = c.data
        mes['code'] = 200
        mes['message'] = '搜索完毕'
        return Response(mes)

示例#17

0

显示文件

from redisearch import Client, TextField, NumericField, Query
from time import sleep
from rediscluster import StrictRedisCluster

sleep(15)
i = 0

nodes = [{'host': "173.17.0.2", 'port': "7000"}]
rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True)

client = Client('week1', conn=rc)
#client.create_index([TextField('day'), TextField('filename'), TextField('protocol'), TextField('task_monitor_id'), TextField('task_id'), TextField('job_id'), TextField('site_name')])
client.create_index([TextField('protocol'), TextField('site_name')])
dat = pd.read_csv("results_2018-05-01.csv.gz")

for idx, row in tqdm(dat.iterrows()):
    #client.add_document(f"{row['index']}", day=f"{row['day']}", filename = f"{row['filename']}", protocol = f"{row['protocol']}", task_monitor_id = f"{row['task_monitor_id']}", task_id = f"{row['task_id']}", job_id = f"{row['job_id']}",  site_name = f"{row['site_name']}")
    client.add_document(f"{row['day']:0.0f}_{row['index']}",
                        replace=True,
                        partial=True,
                        protocol=f"{row['protocol']}",
                        site_name=f"{row['site_name']}")
    i += 1
    if i == 1000:
        break

#print(client.search("@protocol:local"))

#payload=f"{row['index']}",
#replace=True, partial=True,
#f"{row['day']:0.0f}_{row['index']}"

示例#18

0

显示文件

文件： populate_db.py 项目： redislabs-demos/redisearch-jokes

import json
from redisearch import Client, TextField, TagField

with open('wocka.json', 'r') as f:
    jokes = json.load(f)

hostname = 'redis-17235.laurent.cs.redislabs.com'
port = 17235

client = Client('jokes', hostname, port)
client.create_index((TextField('title'), TextField('body'),
                     TextField('category'), TagField('label')))

for joke in jokes:
    client.add_document(joke['id'],
                        title=joke['title'],
                        body=joke['body'],
                        category=joke['category'],
                        label=joke['category'])

print("number of jokes in the json file: " + str(len(jokes)))

info = client.info()
print(info)

示例#19

0

显示文件

from redisearch import Client, TextField

# Creating a client with a given index name
client = Client('myIndex')

# Creating the index definition and schema
client.create_index((TextField('title', weight=5.0), TextField('body')))

# Indexing a document
client.add_document(
    'doc1',
    title='RediSearch',
    body='Redisearch impements a search engine on top of redis')

示例#20

0

显示文件

文件： ImportCSV.py 项目： Redislabs-Solution-Architects/Redis-Enterprise-demos

class CSVImporter: 
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.search = False
        self.index = args.index
        if self.index is not None:
            self.search = True
            self.search_client  = Client(self.index, self.host, self.port)
            self.info = self.search_client.info()['fields']
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.ignore = args.ignore
        self.docid = args.docid
        self.client = redis.Redis(args.host, args.port)
        self.fields = [] 
        
    def addRow(self, row, num):
        values = dict()
        row_id = row[0]
        geo_id = 'zip-'
        geo_pos = ''
        lat = 0
        lon = 0
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.fields[idx - 1] == 'regionidzip':
                geo_id += val
            if self.fields[idx - 1] == 'latitude':
                lat = float(val) / 1000000
            if self.fields[idx - 1] == 'longitude':
                lon = float(val) / 1000000

            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue

            if self.search == True and self.info[fieldnum][2] == 'NUMERIC' and val == '':
                val = '0'
 
            values[self.fields[idx - 1]] = val
            fieldnum += 1       
        values['geopos'] = str(lon) + ',' + str(lat)
        geo_vals = [lon, lat, row_id]
            
        self.client.geoadd(geo_id, *geo_vals)
        if self.search == True:
            doc = 'doc-' + str(num)
            if self.docid > 0:
                doc = row[self.docid - 1]
            
            self.search_client.add_document(doc, replace=True, **values)
        else:
            self.client.hmset(row_id, values)

    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        self.fields = next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row, n)
            n += 1

        print('Finished loading ' + str(n) + ' rows.')

示例#21

0

显示文件

class CSVImporter: 
    def __init__(self, args):
        self.host = args.host
        self.port = args.port
        self.index = args.index
        self.file = open(args.file, 'r')
        self.delimiter = args.delimiter
        self.rows = args.rows
        self.hasHeader = args.header
        self.ignore = args.ignore
        self.docid = args.docid
        self.nosave = args.nosave
        self.date = args.date
        self.format = args.format
        self.client  = Client(self.index, self.host, self.port)
        self.fields = self.client.info()['fields']
    
    def dateToMillis(self, val):
        try:
            d = datetime.strptime(val, self.format)
        except ValueError:
            print("Invalid data format: " + val)
            return 0
        return str(int(d.strftime('%s')) * 1000)
    
    def adjustTagValue(self, val, sep):
        i = 0
        insinglequotes = False
        indoublequotes = False
        newTag = False
        newVal = ''
        while i < len(val):
            if val[i] == '\'' and not indoublequotes :
                insinglequotes = not insinglequotes
            elif val[i] == '"' and not insinglequotes:
               indoublequotes = not indoublequotes;
            else: 
                if val[i] == ',' and not insinglequotes and not indoublequotes:
                    newVal += sep
                else:
                    newVal += val[i]
            i += 1
        newVal = re.sub('[\t ]*' + sep + '[\t ]*', sep, newVal)
        return re.sub('[\[\]]', '', newVal)
                
        
    def addRow(self, row, num):
        args = {}
        idx = 0
        fieldnum = 0
        for val in row:
            idx += 1
            if self.ignore is not None and idx in self.ignore or idx == self.docid:
                continue
            if self.date is not None and idx in self.date:
                val = self.dateToMillis(val)
            if self.fields[fieldnum][2] == 'TAG':
                val = self.adjustTagValue(val, self.fields[fieldnum][4]) 
            args[self.fields[fieldnum][0]] = val
            fieldnum += 1
        
        doc = 'doc-' + str(num)
        if self.docid > 0:
            doc = row[self.docid - 1]
        self.client.add_document(doc, replace=True, nosave=self.nosave, **args)
         
    def loafFile(self):
        reader = csv.reader(self.file, delimiter=self.delimiter)
        if self.hasHeader == True:
            next(reader)
        n = 0
        for row in reader:
            if self.rows > 0 and n == self.rows:
                break
            self.addRow(row, n)
            n += 1
        print('Finished loading ' + str(n) + ' rows.')

示例#22

0

显示文件

from redisearch import Client, TextField
import json

UNIQ = "db.txt"
# Creating a client with a given index name
client = Client('myIndex')

docs = []

with open(UNIQ) as corpus:
    # Indexing a document
    for row in corpus:
        corp = json.loads(row)
        if corp["url_id"] in docs:
            continue
        doc_id = corp["url_id"]
        print(f"adding document id {doc_id}")
        try:
            client.add_document(corp["url_id"],
                                title=corp["title"],
                                body=corp["url"])
        except Exception:
            continue
        docs.append(corp["url_id"])

示例#23

0

显示文件

# Change the the dirt to your document's path
#--------------------------------------------

import os
from redisearch import Client, Query, TextField

dirt = "/path/to/the/documents/"  # Change it to your own path

client = Client("BoxGroup", port=6379)  # 6379 as default
client.create_index([TextField('title'), TextField('body')])

filelist = os.listdir(dirt)
filelist = sorted(filelist)
try:
    filelist.remove(".git")
except:
    print("git目录不存在，已跳过")
filecounter = 0
for filename in filelist:
    openfilename = dirt + filename
    with open(openfilename, "r+") as f:
        data = f.read()
        try:
            client.add_document(filecounter,
                                title=filename,
                                body=data,
                                language="chinese")
        except:
            print("Document already exist.")
        filecounter += 1

示例#24

0

显示文件

import pandas as pd 
import json
from tqdm import tqdm
from redisearch import Client, TextField, NumericField, Query
from time import sleep
from rediscluster import StrictRedisCluster

sleep(15)

nodes = [{'host': "173.17.0.2", 'port': "7000"}]
rc = StrictRedisCluster(startup_nodes=nodes, decode_responses=True)


client=Client('week1', conn=rc)
client.create_index([TextField('name'), TextField('surname'), TextField('job')])
dat = pd.read_csv("test.csv")


for idx, row in tqdm(dat.iterrows()):
	client.add_document(f"{row['index']}", replace=True, partial=True, name = f"{row['name']}", surname = f"{row['surname']}", job = f"{row['job']}")

示例#25

0

显示文件

client = Client(
   'attractions',
   host='127.0.0.1',
   password='',
   port=6379
   )

client.create_index([
   TextField('title', weight=5.0),
   TextField('description'),
   NumericField('verified', sortable=True),
   GeoField('geo'),
])


gpx_file = open('All_States_Offbeat_Tourist_Attractions.gpx', 'r', encoding='utf-8')

gpx = gpxpy.parse(gpx_file)

for waypoint in gpx.waypoints:
    if "Verified" in waypoint.comment:
        v = 1
    else:
        v = 0
    t = "%s,%s,%s" %(waypoint.name, waypoint.longitude, waypoint.latitude)
    client.add_document(
        hashlib.md5(t.encode('utf-8')).hexdigest(),
        description = waypoint.name,
        geo = "%s,%s" %(waypoint.longitude, waypoint.latitude),
        verified = v,
    )

示例#26

0

显示文件

import time
from redisearch import Client, TextField, NumericField, Query
from redis.exceptions import ResponseError

file = open('test_set_tweets.txt', 'r')
client = Client('Tweets')
client.redis.flushdb()
client.create_index([TextField('tweet'), TextField('timestamp')])
start = time.time()
for x, line in enumerate(file.readlines()):
    content = line.strip().split('\t')
    try:
        if len(content) == 4:  # tem data
            client.add_document('-'.join(content[:2]),
                                tweet=content[-2],
                                timestamp=content[-1])
        else:
            client.add_document('-'.join(content[:2]),
                                tweet=content[-1],
                                timestamp='')
    except ResponseError:
        pass
    if x % 1000 == 0:
        print(x, 'lines indexed...')

end = time.time()
print("Indexing time elapsed", end - start)

total = 0
for i in range(30):
    start = time.time()

示例#27

0

显示文件

class TAS_Import():
    def __init__(self, index_name, host="172.16.20.7", port=6382, db=0):
        self.client = Client(index_name, host, port)
        self.host = host
        self.port = port
        self.config_obj = redis_config.TAS_AutoCompleter(
            host, port, db, "Default")
        #self.redis = Redis()

    def add_indexing_schema(self, schema):
        self.client.create_index(schema, False, False, [])
        return ["Done"]

    def add_data(self, rdata, index_name):
        for i, rr in enumerate(rdata):
            #print  rr,type(rr[2])
            l1, l2, l3, l4, l5, l6, l7, l8, l9 = rr
            index = index_name + str(i + 1) + l3 + l4 + l5 + l6
            #print 'index_name', index_name, index, l3, l4, l5, l6
            l1 = self.config_obj.StringEscape(l1)
            l2 = l2.strip()
            self.client.add_document(index,
                                     DATA=l1,
                                     SECTION_TYPE=l2,
                                     DOCID=l3,
                                     PAGE=l4,
                                     GRIDID=l5,
                                     ROWCOL=l6,
                                     BBOX=l7,
                                     PAGE_GRID_SE="%s_%s_%s" % (l4, l5, l2),
                                     Rowspan=l8,
                                     Colspan=l9)
        return ["Done"]

    def drop_index(self):
        try:
            self.client.drop_index()
        except Exception as e:
            print 'Error', e
            pass

    def start(self, data, index_name):
        status = 1
        self.drop_index()
        self.client = Client(index_name, self.host, self.port)
        status = 2
        schema = [
            NumericField('INDEX'),
            TextField('DATA'),
            TextField('SECTION_TYPE'),
            TextField('DOCID'),
            TextField('PAGE'),
            TextField('GRIDID'),
            TextField("ROWCOL"),
            TextField('BBOX'),
            TextField("PAGE_GRID_SE"),
            TextField('Rowspan'),
            TextField('Colspan')
        ]
        #rsObj.set_schema([NumericField('INDEX'), TextField('DOCID'), TextField('CATEGORY'), TextField('TAXONAME'), TextField('VALUE'), TextField('XML_REF'), TextField('REF_KEY')])
        status = 3
        self.add_indexing_schema(schema)
        status = 4
        self.add_data(data, index_name)
        status = 5
        return [status]