def test_regex_from_native(self): self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern) self.assertEqual(0, Regex.from_native(re.compile(b'')).flags) regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X) self.assertEqual(re.I | re.L | re.M | re.S | re.X, Regex.from_native(regex).flags) unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
def test_regex_from_native(self): self.assertEqual('.*', Regex.from_native(re.compile('.*')).pattern) self.assertEqual(0, Regex.from_native(re.compile(b'')).flags) regex = re.compile(b'', re.I | re.L | re.M | re.S | re.X) self.assertEqual( re.I | re.L | re.M | re.S | re.X, Regex.from_native(regex).flags) unicode_regex = re.compile('', re.U) self.assertEqual(re.U, Regex.from_native(unicode_regex).flags)
def gen_regex_search_query(search_query): # TODO sanitize this user input before querying the DB with it pattern = re.compile(search_query) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE regex.flags ^= re.IGNORECASE return regex
def get_response_statements(self): """ Return only statements that are in response to another statement. A statement must exist which lists the closest matching statement in the in_response_to field. Otherwise, the logic adapter may find a closest matching statement that does not have a known response. """ # '程序员涤生' 原来的逻辑是根据in_response_to字段来判断是否是问题,如果一个句子出现在了其他句子的in_response_to字段中,那么该句子可以做为问题, # 因此需要先查出in_response_to字段中的text,然后查出在这些text集合中的句子,做为问题,这样的效率非常慢, # 通过在句子中加入Q和A标记,我们可以利用正则来直接匹配出表示问题的句子, # 并且我们只返回text字段,大大提升了查询的效率。 pattern = re.compile('^Q ') regex = Regex.from_native(pattern) # response_query = self.statements.find({'text': 'Q 今天天气怎么样?'}, {'text': 1}) response_query = self.statements.find({'text': {'$regex': regex}}, {'text': 1}) statement_objects = [] statement_vec = [] import datetime as dt starttime2 = dt.datetime.now() for r in response_query: try: # 此处考虑直接使用text对应的向量,从系统启动时就构建好的text-vec索引文件中获取 text_vec_indx = IntentClassifier().text_vec_indx vec = text_vec_indx.get(r['text'],None) if vec is not None: # 注意:下面这两个数组一定要保证长度一样,否则计算相似度的时候根据索引来取原文本会出先位置偏移,导致无法获取正确的答案!! statement_vec.append(vec) statement_objects.append(self.mongo_to_object({'text': r['text']})) except Exception as e: logging.warning("出现异常%s,问题句子为:%s", str(e), r['text']) endtime2 = dt.datetime.now() logging.debug("===========get_response_statements的for循环构造向量耗时: %s秒", (endtime2 - starttime2).seconds) return statement_objects, statement_vec
async def find_files_in_directory(self, path): result = [] pattern = re.compile('^' + re.escape(path)) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE async for record in self._db.find({'absolute_path': { '$regex': regex }}): result.append(str(record['_id'])) return result
def do_query(self, tag_part, min_sentiment, min_relevance): results = self.db.find({ 'tags': { '$elemMatch': { 'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))), 'sentiment': {'$gte': min_sentiment}, 'relevance': {'$gte': min_relevance}, } } }) bson_results = [] for r in results: bson_results.append(bson.json_util.dumps(r, sort_keys=True, indent=4)) return bson_results
def getQueryCondition(queryConditions): conditions = [] for fieldInfo in queryConditions: if fieldInfo["type"] == 'text' or fieldInfo[ "type"] == 'array' or fieldInfo["type"] == 'datetime': pattern = re.compile(r'.*' + fieldInfo['query'] + '.*', re.I) regex = Regex.from_native(pattern) regex.flags ^= re.UNICODE conditions.append({fieldInfo["name"]: regex}) elif fieldInfo["type"] == 'number': if fieldInfo["query"] != "": try: num = float(fieldInfo["query"]) conditions.append({fieldInfo["name"]: {"$eq": num}}) except: conditions.append({fieldInfo["name"]: {"$eq": -99999}}) else: conditions.append({fieldInfo["name"]: {"$gte": -99999}}) return {"$or": conditions}
def do_query(self, tag_part, min_sentiment, min_relevance): results = self.db.find({ 'tags': { '$elemMatch': { 'tag': Regex.from_native(re.compile('.*{0}.*'.format(tag_part))), 'sentiment': { '$gte': min_sentiment }, 'relevance': { '$gte': min_relevance }, } } }) bson_results = [] for r in results: bson_results.append( bson.json_util.dumps(r, sort_keys=True, indent=4)) return bson_results
def choose_user_job_recommend(self, conditon): lauage_name = { 'python': 'zhilian_python_BJ', 'java': 'zhilian_java_BJ' } user_chose_recommed_jobs = {} paremt = re.compile(conditon) regex = Regex.from_native(paremt) regex.flags = re.UNICODE for k, v in lauage_name.items(): cursor = self.connect()[v].find( {"responsibility": { "$regex": regex, "$options": "i" }}) count_list = [] for x in cursor: count_list.append(x) user_chose_recommed_jobs[k] = count_list print(user_chose_recommed_jobs) return user_chose_recommed_jobs
def queryTable(table, request, additionalColumns={"_id": 0}, additionalConditions=None, aggregationConditions=False): if additionalConditions is None: additionalConditions = [] pageNum = int(request.POST["pageNum"]) pageSize = int(request.POST["pageSize"]) queryConditions = json.loads(request.POST["fields"]) conditions = getQueryCondition(queryConditions) if 'advance' in request.POST and request.POST['advance'] == '1': queryFields = json.loads(request.POST["queryFields"]) multiConditions = json.loads(request.POST["multiConditions"]) for field in queryFields: if field['type'] == 'datetime': datetime_from = multiConditions[field['value'] + "_from"] if datetime_from != '': additionalConditions.append({ field['value']: { "$gte": datetime_from.replace("T", " ") } }) datetime_to = multiConditions[field['value'] + "_to"] if datetime_to != '': additionalConditions.append({ field['value']: { "$lte": datetime_to.replace("T", " ") } }) elif field['type'] == 'number': try: num_from = float(multiConditions[field['value'] + "_from"]) additionalConditions.append( {field['value']: { "$gte": num_from }}) except: pass try: num_to = float(multiConditions[field['value'] + "_to"]) additionalConditions.append( {field['value']: { "$lte": num_to }}) except: pass else: query_t = multiConditions[field['value']] if query_t == '': continue pattern_t = re.compile(r'.*' + query_t + '.*', re.I) regex_t = Regex.from_native(pattern_t) regex_t.flags ^= re.UNICODE additionalConditions.append({field['value']: regex_t}) # CTRL + ALT + SHIFT + J同时选中相同单词 if aggregationConditions: aggregationConditionsT = aggregationConditions.copy() if additionalConditions: additionalConditions.append(conditions) conditions = {"$and": additionalConditions} aggregationConditionsT.append({'$match': conditions}) total = len( table.aggregate(aggregationConditionsT)._CommandCursor__data) # 注意顺序,应该先sort再skip和limit!!! aggregationConditionsT.append( {"$sort": { request.POST["sortProp"]: int(request.POST["order"]) }}) aggregationConditionsT.append({'$skip': pageSize * (pageNum - 1)}) aggregationConditionsT.append({"$limit": pageSize}) result = list(table.aggregate(aggregationConditionsT)) else: # 如果有额外的查询条件,使用and语句加入条件 if additionalConditions: additionalConditions.append(conditions) query = table.find({"$and": additionalConditions}, additionalColumns) # 没有额外的查询条件,直接查询 else: query = table.find(conditions, additionalColumns) sortCondition = [(request.POST["sortProp"], int(request.POST["order"])) ] # .collation({"locale": "en"})不区分大小写 query.sort(sortCondition).skip( pageSize * (pageNum - 1)).limit(pageSize).collation( {"locale": "en"}) result = list(query) total = query.count() return result, total
def setUp(self): """Set up class.""" super(BSONRegexLocaleTest, self).setUp() from bson import Regex self.regex = Regex.from_native(self.regex)
def setUp(self): """Set up function.""" from bson import Regex super(BSONRegexWithoutFlagTest, self).setUp() self.regex = Regex.from_native(self.regex)
def _convert_regex(self, regex): cr = Regex.from_native(re.compile(regex)) cr.flags = re.UNICODE return cr