def __init__(self, db_name): self.db_name = db_name self.client = MongoClient('127.0.0.1', 27017) self.db = self.client[self.db_name] self.solr_url = 'http://127.0.0.1:8999/solr' self.solr = SOLR(self.solr_url) self.core_name = SOLR_CORE_NAME
def __init__(self, db_name, ip='127.0.0.1', port=27017): self.db_name = db_name self.db = MongoClient(ip, port)[db_name] self.db_test = MongoClient(ip, port)[db_name + '_test'] self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = SOLR_CORE_NAME self.solr = SOLR(self.solr_url)
def __init__(self, db_name, solr_name=SOLR_CORE_NAME, mongodb_ip='127.0.0.1', solr_ip='127.0.0.1', port=27017): self.db_name = db_name self.db = MongoClient(mongodb_ip, port)[db_name] self.solr_url = 'http://' + solr_ip + ':8999/solr' self.solr_core = solr_name self.solr = SOLR(self.solr_url)
def __init__(self, ip, port, db_name, collection_name, solr_name=SOLR_CORE_NAME): self.dirpath = 'data/' + db_name self.db_name = db_name self.collection_name = collection_name self.db = MongoClient(ip, port)[db_name] self.collection = self.db[collection_name] self.data = [] self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = solr_name self.solr = SOLR(self.solr_url)
class Update_data(): def __init__(self, db_name, solr_name=SOLR_CORE_NAME, mongodb_ip='127.0.0.1', solr_ip='127.0.0.1', port=27017): self.db_name = db_name self.db = MongoClient(mongodb_ip, port)[db_name] self.solr_url = 'http://' + solr_ip + ':8999/solr' self.solr_core = solr_name self.solr = SOLR(self.solr_url) def write_data2solr(self, collection_name): query = 'scene_str:' + self.db_name + ' AND topic_str:' + collection_name self.solr.delete_solr_by_query(self.solr_core, query) for x in self.db[collection_name].find(): data_one = x.copy() data_one['scene'] = self.db_name data_one['topic'] = collection_name data_one['_id'] = str(data_one['_id']) if collection_name in ['refuse2chat', 'sentiment']: self.solr.update_solr(data_one, self.solr_core) continue if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' data_one.pop('equal_questions') for q in x['equal_questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.solr_core)
class BaseClass(): def __init__(self, ip, port, db_name, collection_name, solr_name=SOLR_CORE_NAME): self.dirpath = 'data/' + db_name self.db_name = db_name self.collection_name = collection_name self.db = MongoClient(ip, port)[db_name] self.collection = self.db[collection_name] self.data = [] self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = solr_name self.solr = SOLR(self.solr_url) def write_data2mongodb(self): self.collection.drop() self.collection.insert(self.data) if self.collection_name in ['refuse2chat', 'sentiment']: self.collection.create_index('question') else: self.collection.create_index('group') self.collection.create_index('label') def write_data2solr(self): query = 'scene_str:'+self.db_name + ' AND topic_str:' +\ self.collection_name self.solr.delete_solr_by_query(self.solr_core, query) for x in self.collection.find(): data_one = x.copy() data_one['scene'] = self.db_name data_one['topic'] = self.collection_name data_one['_id'] = str(data_one['_id']) if self.collection_name in ['refuse2chat', 'sentiment']: self.solr.update_solr(data_one, self.solr_core) continue if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' data_one.pop('equal_questions') for q in x['equal_questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.solr_core)
class Mongodb(): def __init__(self, db_name, ip='127.0.0.1', port=27017): self.db_name = db_name self.db = MongoClient(ip, port)[db_name] self.db_test = MongoClient(ip, port)[db_name + '_test'] self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = SOLR_CORE_NAME self.solr = SOLR(self.solr_url) def write(self, collection, data): try: self.db[collection].drop() self.db[collection].insert(data) self.db_test[collection].drop() self.db_test[collection].insert(data) return 1 except: traceback.print_exc() return 0 def write_data2solr(self, collection): query = 'scene_str:'+self.db_name+' AND topic_str:' +\ collection self.solr.delete_solr_by_query(self.solr_core, query) for x in self.db[collection].find(): data_one = x.copy() data_one['scene'] = self.db_name data_one['topic'] = collection data_one['_id'] = str(data_one['_id']) if collection in ['instruction']: self.solr.update_solr(data_one, self.solr_core) continue if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' data_one.pop('questions') for q in x['questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.solr_core)
class Update(): def __init__(self, ip, db_name): self.db_name = db_name self.db = MongoClient('127.0.0.1', 27017)[db_name] self.core_name = SOLR_CORE_NAME self.solr_url = 'http://127.0.0.1:8999/solr' self.solr = SOLR(self.solr_url) def load_log(self, server_name): #_id, collection, cmd, ids, comment, status, time if server_name == 'develop': query = {'status': '0'} elif server_name == 'master': query = {'status': '1'} else: query = {'status': '3'} logs = [x for x in self.db.log.find(query).sort('time')] return logs def check_solr_core(self): if not self.solr.solr_core_exists(self.core_name): self.solr.create_solr_core(self.core_name) def update_data(self, collection, cmd, _id): def insert_automata(data, collection): if collection in ['automata']: questions = data['questions'].copy() data.pop('questions') for q in questions: data['question'] = q self.solr.update_solr(data, self.core_name) elif collection in ['instruction']: self.solr.update_solr(data, self.core_name) else: return None def insert(collection, _id): data = self.db[collection].find_one({'_id': ObjectId(_id)}) if not data: return data_one = data.copy() data_one['_id'] = str(data_one['_id']) data_one['scene'] = self.db_name data_one['topic'] = collection if self.db_name == 'automata': return insert_automata(data_one) if collection in ['refuse2chat', 'sentiment']: self.solr.update_solr(data_one, self.core_name) return None if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' data_one.pop('equal_questions') for q in data['equal_questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.core_name) if cmd == 'create': insert(collection, _id) elif cmd == 'update': self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id) insert(collection, _id) elif cmd == 'delete': self.solr.delete_solr_by_query(self.core_name, '_id_str:' + _id) else: return 0 def update(self, server_name): try: logs = self.load_log(server_name) if not logs: print('no update!') return 1 for log in logs: if log['cmd'] == 'create': self.check_solr_core() for _id in log['ids']: self.update_data(log['collection'], log['cmd'], _id) if server_name == 'develop': value = {'status': '1'} elif server_name == 'master': value = {'status': '2'} else: return 0 self.db.log.update_one({'_id': log['_id']}, {'$set': value}) return 1 except Exception: traceback.print_exc() return 0
class Update(): def __init__(self, ip, db_name): self.db_name = db_name self.db = MongoClient('127.0.0.1', 27017)[db_name] self.core_name = SOLR_CORE_NAME self.solr_url = 'http://127.0.0.1:8999/solr' self.solr = SOLR(self.solr_url) def check_solr_core(self): if not self.solr.solr_core_exists(self.core_name): self.solr.create_solr_core(self.core_name) def update_data(self, collection): def insert(data): if not data: return data_one = data.copy() data_one['_id'] = str(data_one['_id']) data_one['scene'] = self.db_name data_one['topic'] = collection if 'super_intention' in data_one: if data_one['super_intention'] == '': data_one['super_intention'] = 'null' if 'equal_questions' in data_one: data_one.pop('equal_questions') for q in data['equal_questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.core_name) elif 'questions' in data_one: data_one.pop('questions') for q in data['questions']: data_one['question'] = q data_one['question_ik'] = q data_one['question_cn'] = q self.solr.update_solr(data_one, self.core_name) else: self.solr.update_solr(data_one, self.core_name) self.solr.delete_solr_by_query( self.core_name, 'scene_str:' + self.db_name + ' AND topic_str:' + collection) data = [x for x in self.db[collection].find()] for d in data: insert(d) def update(self): try: collections = self.db.collection_names() if 'log' in collections: collections.remove('log') for collection in collections: print('start ' + collection) self.update_data(collection) return 1 except Exception: traceback.print_exc() return 0
def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME): self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = solr_core self.solr = SOLR(self.solr_url)
class SearchData(): def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME): self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = solr_core self.solr = SOLR(self.solr_url) def search_answer(self, select='*:*', scene_topic=[]): try: fields = ['answers', 'emotion_url', 'media', 'timeout'] select_parts = [] for scene in scene_topic: s = 'scene_str:' + scene if scene_topic[scene] != []: s = '('+ s + ' AND (topic_str:'+ \ ' OR topic_str:'.join(scene_topic[scene]) +'))' select_parts.append(s) select = '(' + ' OR '.join(select_parts) + ') AND ' + select data = [ x for x in self.solr.query_solr(self.solr_core, select, fields, 1).docs ] data = data[0] return { 'answer': random.sample(data['answers'], 1)[0], 'emotion': data['emotion_url'][0], 'media': data['media'][0], 'timeout': data['timeout'][0] } except: traceback.print_exc() return { 'answer': None, 'emotion': None, 'media': None, 'timeout': None } def search_questions(self, select='*:*', scene_topic=[], fields=['question'], max_num=10): try: def pro_data(data): for key in data.keys(): data[key] = data[key][0] return data select_parts = [] for scene in scene_topic: s = 'scene_str:' + scene if scene_topic[scene] != []: s = '('+ s + ' AND (topic_str:'+ \ ' OR topic_str:'.join(scene_topic[scene]) +'))' select_parts.append(s) select = '(' + ' OR '.join(select_parts) + ') AND ' + select data = [ pro_data(x) for x in self.solr.query_solr( self.solr_core, select, fields, max_num).docs ] return data except: traceback.print_exc() return None def sale_id2description(self, _id, scene): try: select = 'scene_str:' + scene + ' AND _id_str:' + _id fields = ['description'] data = [ x for x in self.solr.query_solr(self.solr_core, select, fields, 1).docs ] data = data[0] return data['description'] except: traceback.print_exc() return None def sale_type2answers(self, scene, t=''): try: def pro_data(data): for key in data.keys(): data[key] = data[key][0] return data if t: select = 'scene_str:' + scene + ' AND type_str:' + t else: select = 'scene_str' + scene + ' AND type_str:*' fields = [ '_id', 'answers', 'type', 'emotion_url', 'media', 'timeout' ] max_num = 20 data = [ pro_data(x) for x in self.solr.query_solr( self.solr_core, select, fields, max_num).docs ] return data except: traceback.print_exc() return None
class Data_backup(): def __init__(self, db_name): self.db_name = db_name self.client = MongoClient('127.0.0.1', 27017) self.db = self.client[self.db_name] self.solr_url = 'http://127.0.0.1:8999/solr' self.solr = SOLR(self.solr_url) self.core_name = SOLR_CORE_NAME def data_dump(self, datapath, log_id): if not os.path.exists(datapath): os.mkdir(datapath) dirpath = os.path.join(datapath, log_id) if os.path.exists(dirpath): shutil.rmtree(dirpath) os.mkdir(dirpath) cmd_dump = 'mongodump -d ' + self.db_name + ' -o ' + dirpath try: os.system(cmd_dump) return 1 except: traceback.print_exc() return 0 def mongodb_restore(self, dirpath): self.client.drop_database(self.db_name) self.client.drop_database(self.db_name + '_test') dbpath = os.path.join(dirpath, self.db_name) cmd_restore1 = 'mongorestore -d ' + self.db_name + ' ' + dbpath cmd_restore2 = 'mongorestore -d ' + self.db_name + '_test ' + dbpath if os.system(cmd_restore1): return 0 if os.system(cmd_restore2): return 0 return 1 def solr_restore(self): collections = self.db.collection_names() if 'log' in collections: collections.remove('log') try: for collection in collections: query = '(scene_str:' + self.db_name + \ ' AND topic_str:' + collection + ')' self.solr.delete_solr_by_query(self.core_name, query) for data in self.db[collection].find(): data_one = data.copy() data_one['scene'] = self.db_name data_one['topic'] = collection data_one['_id'] = str(data_one['_id']) if collection in ['refuse2chat', 'sentiment']: self.solr.update_solr(data_one, self.core_name) break data_one.pop('equal_questions') for q in data['equal_questions']: data_one['question'] = q self.solr.update_solr(data_one, self.core_name) return 1 except: traceback.print_exc() return 0 def data_restore(self, dirpath, _id): dirpath = os.path.join(dirpath, _id) return self.mongodb_restore(dirpath) and self.solr_restore()
class SearchSolr(): def __init__(self, ip='127.0.0.1', solr_core=SOLR_CORE_NAME): self.solr_url = 'http://' + ip + ':8999/solr' self.solr_core = solr_core self.solr = SOLR(self.solr_url) def load_data(self, select='*:*', fields=[], max_num=10, flag=False): try: def pro_x(x): y = {} y['store_id'] = x['store_id'][0] y['category'] = x['category'][0] y['instruction'] = x['instruction'][0] if 'entities' in x: y['entities'] = x['entities'] else: y['entities'] = [''] y['answers'] = x['answer'] y['emotion_name'] = 'null' y['emotion_url'] = 'null' if 'media' in x: y['media'] = x['media'][0] y['timeout'] = '15' else: y['media'] = 'null' y['timeout'] = '0' return y Data = {} def pro_y(x): y = {} y['store_id'] = x['store_id'][0] y['category'] = x['category'][0] y['intent'] = x['intent'] y['questions'] = x['question'] if 'entities' in x: y['entities'] = x['entities'] else: y['entities'] = '' if y['intent'] + '|' + y['entities'] in Data: Data[y['intent'] + '|' + y['entities']]['questions'].append(x['question'][0]) else: Data[y['intent'] + '|' + y['entities']] = y return y if flag == True: data = [ pro_x(x) for x in self.solr.query_solr( self.solr_core, select, fields, max_num).docs ] else: data = [ pro_y(x) for x in self.solr.query_solr( self.solr_core, select, fields, max_num).docs ] data = [] for key in Data.keys(): data.append(Data[key]) return data except: traceback.print_exc() return None