def __generate_filter(self, kv): pending = [] if kv.get('round'): pending.append(templates.get_round(kv.get('round'))) if kv.get('location'): pending.append(templates.get_terms('location', kv.get('location'))) if kv.get('domestic', None) is not None: pending.append(self.__generate_domestic_filter(kv.get('domestic'))) if kv.get('public', None) is not None: pending.append(self.__generate_public_filter(kv.get('public'))) if kv.get('tag'): if kv.get('operator', 'or') == 'and': for t in kv.get('tag', []): pending.append( templates.get_term('tags', t.lower().strip())) else: pending.append( templates.get_terms('tags', [t.lower() for t in kv.get('tag')])) if kv.get('investor'): for iv in kv.get('investor'): pending.append(templates.get_fuzzy('investor', iv)) if kv.get('investorId'): pending.append( templates.get_terms('investorId', kv.get('investorId'))) if kv.get('previous_investor'): for iv in kv.get('previous_investor'): pending.append(templates.get_fuzzy('previous_investor', iv)) if kv.get('date'): pending.append(templates.get_terms('funding_year', kv.get('date'))) if kv.get('funding_date'): today = datetime.today().date() for fd in kv.get('funding_date'): if fd == 'latest7': start, end = today - timedelta(days=7), today + timedelta( days=1) pending.append( templates.get_range('last_funding_date', end, start)) elif fd == 'latest30': start, end = today - timedelta(days=30), today + timedelta( days=1) pending.append( templates.get_range('last_funding_date', end, start)) elif fd == 'latest90': start, end = today - timedelta(days=90), today + timedelta( days=1) pending.append( templates.get_range('last_funding_date', end, start)) elif fd.isalnum(): pending.append(templates.get_term('funding_year', int(fd))) if kv.get('source'): pending.append(templates.get_terms('source', kv.get('source'))) return pending
def generate_query(self): self.__intent_classify() query = {} if self.intent == 'tag': for tag in self.query.get('and', []): query.setdefault('bool', {}).setdefault('must', []).append( templates.get_terms('tags', [tag.lower()])) # query.setdefault('bool', {}).setdefault('must', []).append(templates.get_tag_template(tag)) for tag in self.query.get('or', []): query.setdefault('bool', {}).setdefault('should', []).extend( templates.get_fast_tag_template(tag.lower())) if self.query.get('or'): query['bool']['minimum_number_should_match'] = 1 for tag in self.query.get('not', []): query.setdefault('bool', {}).setdefault('must_not', []).append( templates.get_tag_template(tag.lower())) if self.intent == 'general': name = self.query query.setdefault('bool', {}).setdefault('should', []).append( templates.get_name_template(name)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_fuzzy('name', name, 5)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_fuzzy('alias', name)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_term('members', name)) query.setdefault('bool', {})['minimum_number_should_match'] = 1 if self.filters: query.setdefault('bool', {}).setdefault('must', []).extend( self.__generate_filter(self.filters)) if self.nested: query.setdefault('bool', {}).setdefault('must', []).extend( self.__generate_filter(self.nested)) return query
def generate_query(self, logger=None): self.__intent_classify(logger) # logger.info('End to classify intent') query = {} if isinstance(self.input, str) or isinstance(self.input, unicode): query.setdefault('bool', {}).setdefault('should', []).append( templates.get_term('name', self.input, 10)) if self.intent == 'tag': for tag in self.query.get('and', []): query.setdefault('bool', {}).setdefault('must', []).append( templates.get_terms('tags', [tag.lower()])) # query.setdefault('bool', {}).setdefault('must', []).append(templates.get_tag_template(tag)) for tag in self.query.get('or', []): query.setdefault('bool', {}).setdefault('should', []).extend( templates.get_fast_tag_template(tag.lower())) if self.query.get('or'): query['bool']['minimum_number_should_match'] = 1 for tag in self.query.get('not', []): query.setdefault('bool', {}).setdefault('must_not', []).append( templates.get_tag_template(tag.lower())) if self.intent == 'general': name = self.query # parsed_names = self.name_parser.segment(name) # query.setdefault('bool', {}).setdefault('should', []).append(templates.get_term('alias', name, 100)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_name_template(name)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_fuzzy('name', name, 5)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_fuzzy('alias', name)) query.setdefault('bool', {}).setdefault('should', []).append( templates.get_term('members', name)) # fuzzy name # if parsed_names: # for parsed_name in parsed_names.split(): # query.setdefault('bool', {}).setdefault('should', []).append( # templates.get_term('alias', parsed_name)) query.setdefault('bool', {})['minimum_number_should_match'] = 1 if self.filters: if logger: logger.info('Filter', self.filters) query.setdefault('bool', {}).setdefault('must', []).extend( self.__generate_filter(self.filters)) return query
def generate_query(self, field, extend=False): query = dict() if self.input: if self.input.strip() in self.news_tags: query = templates.get_term( 'features', self.news_tags.get(self.input.strip())) elif extend: if self.len <= self.tag_max_len: key = filter( lambda x: (x not in self.stopwords) and len(x) > 1, self.seg.cut4search(self.input)) query.setdefault('bool', {}).setdefault('must', []).append( templates.get_string_template(field, ' '.join(key), '100%')) else: if isinstance(self.input, str) or isinstance( self.input, unicode): key = filter( lambda x: (x not in self.stopwords) and len(x) > 1, self.seg.cut4search(self.input)) if self.len <= self.tag_max_len: query.setdefault('bool', {}).setdefault( 'must', []).append(templates.get_term(field, self.input)) elif self.len <= 20: query.setdefault('bool', {}).setdefault('must', []).append( templates.get_string_template( field, ' '.join(key), '100%')) else: query.setdefault('bool', {}).setdefault('must', []).append( templates.get_string_template( field, ' '.join(key), '95%')) if self.filters: for key in self.filters.keys(): if self.filters.get(key): query.setdefault('bool', {}).setdefault('must', []).append( templates.get_terms(key, self.filters.get(key))) if not extend and (not query): return {"match_all": {}} return query
def extend_query(self, es_query): query = self.__intent_classify() print 'intent', self.intent print 'query', query if self.intent == 'yellow': es_query.setdefault('bool', {}).setdefault('should', []).append( templates.get_term('tags', query, boost=5)) return es_query elif self.intent == 'tag': # es_query.setdefault('bool', {}).setdefault('should', []).append(templates.get_term('name', ''.join(query), # boost=50)) es_query.setdefault('bool', {}).setdefault('should', []).append( templates.get_keyword_template(*query)) # es_query["bool"]["should"].append(templates.get_fuzzy('name', ''.join(query), boost=10)) es_query["bool"]["minimum_number_should_match"] = 1 # for tag in query: # es_query.setdefault('bool', {}).setdefault('should', []).append( # templates.get_term('description', tag, 0.5)) # print es_query return es_query elif self.intent == 'general': es_query["bool"]["should"].append( templates.get_fuzzy('name', query, boost=50)) es_query["bool"]["should"].append( templates.get_fuzzy('alias', query)) es_query["bool"]["should"].append( templates.get_term('description', query)) es_query["bool"]["should"].append( templates.get_term('tags', query, boost=5)) es_query["bool"]["minimum_number_should_match"] = 1 # print es_query return es_query elif self.intent == 'empty': return es_query
def generate_query(self): query = {} self.__intent_classify() if self.intent == 'tag': query_piece = templates.get_nested_template( 'investor_tag', 'investor_tag.tag', self.query) query.setdefault('bool', {}).setdefault('must', []).append(query_piece) elif self.intent == 'general': query.setdefault( 'bool', {})['should'] = templates.get_investor_name_completion( self.query) query['bool']['minimum_number_should_match'] = 1 if self.online: query.setdefault('bool', {}).setdefault('must', []).append( templates.get_term('online', True)) if self.filters: query.setdefault('bool', {}).setdefault('must', []).extend( self.__generate_filter(self.filters)) return query
def __generate_filter(self, kv): pending = [templates.get_term('oid', self.org)] if kv.get('status'): pending.append(templates.get_terms('status', kv.get('status'))) if kv.get('location'): pending.append(templates.get_terms('location', kv.get('location'))) if kv.get('assignee'): pending.append(templates.get_terms('assignee', kv.get('assignee'))) if kv.get('sponsor'): sponsor = [s for s in kv.get('sponsor') if s] pending.append(templates.get_terms('sponsor', sponsor)) if kv.get('portfolioStatus'): pending.append( templates.get_terms('portfolioStatus', kv.get('portfolioStatus'))) if kv.get('stage'): pending.append(templates.get_terms('stage', kv.get('stage'))) if kv.get('portfolioStage'): pending.append( templates.get_terms('portfolioStage', kv.get('portfolioStage'))) return pending
def __generate_filter(self, kv): global collection_ranking_threshold pending = [] if kv.get('round'): pending.append(templates.get_terms('round', kv.get('round', []))) if kv.get('date'): pending.append( templates.get_terms('established', self.__extend_date(kv.get('date')))) if kv.get('location'): pending.append(templates.get_terms('location', kv.get('location'))) if kv.get('domestic', None) is not None: pending.append(self.__generate_domestic_filter(kv.get('domestic'))) if kv.get('team'): pending.append(templates.get_terms('team', kv.get('team'))) if kv.get('threshold'): pending.append( templates.get_range('ranking_score', 1, collection_ranking_threshold)) if kv.get('yellow'): pending.append(templates.get_terms('yellows', kv.get('yellow'))) if kv.get('status'): pending.append(templates.get_terms('status', kv.get('status'))) if kv.get('tag'): if kv.get('operator', 'and') == 'or': pending.append( templates.get_terms('tags', [t.lower() for t in kv.get('tag')])) else: for t in kv.get('tag', []): pending.append( templates.get_term('tags', t.lower().strip())) if kv.get('category'): pending.append(templates.get_terms('category', kv.get('category'))) if kv.get('industry'): pending.append( templates.get_nested_term('nested_tag.id', 'industry', kv.get('industry'))) if kv.get('topic'): pending.append( templates.get_nested_term('nested_tag.id', 'topic', kv.get('topic'))) if kv.get('funding_date'): today = datetime.today().date() for fd in kv.get('funding_date'): if fd == 'latest7': start, end = today - timedelta(days=7), today pending.append( templates.get_range('last_funding_date', end, start)) elif fd == 'latest30': start, end = today - timedelta(days=30), today pending.append( templates.get_range('last_funding_date', end, start)) elif fd == 'latest90': start, end = today - timedelta(days=90), today pending.append( templates.get_range('last_funding_date', end, start)) elif fd.isalnum(): start = datetime.strptime('%s-01-01' % fd, '%Y-%m-%d') end = datetime.strptime('%s-12-31' % fd, '%Y-%m-%d') pending.append( templates.get_range('last_funding_date', end, start)) return pending