示例#1
0
文件: query.py 项目: yujiye/Codes
    def __generate_filter(self, kv):

        pending = []
        if kv.get('round'):
            pending.append(templates.get_round(kv.get('round')))
        if kv.get('location'):
            pending.append(templates.get_terms('location', kv.get('location')))
        if kv.get('domestic', None) is not None:
            pending.append(self.__generate_domestic_filter(kv.get('domestic')))
        if kv.get('public', None) is not None:
            pending.append(self.__generate_public_filter(kv.get('public')))
        if kv.get('tag'):
            if kv.get('operator', 'or') == 'and':
                for t in kv.get('tag', []):
                    pending.append(
                        templates.get_term('tags',
                                           t.lower().strip()))
            else:
                pending.append(
                    templates.get_terms('tags',
                                        [t.lower() for t in kv.get('tag')]))
        if kv.get('investor'):
            for iv in kv.get('investor'):
                pending.append(templates.get_fuzzy('investor', iv))
        if kv.get('investorId'):
            pending.append(
                templates.get_terms('investorId', kv.get('investorId')))
        if kv.get('previous_investor'):
            for iv in kv.get('previous_investor'):
                pending.append(templates.get_fuzzy('previous_investor', iv))
        if kv.get('date'):
            pending.append(templates.get_terms('funding_year', kv.get('date')))
        if kv.get('funding_date'):
            today = datetime.today().date()
            for fd in kv.get('funding_date'):
                if fd == 'latest7':
                    start, end = today - timedelta(days=7), today + timedelta(
                        days=1)
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd == 'latest30':
                    start, end = today - timedelta(days=30), today + timedelta(
                        days=1)
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd == 'latest90':
                    start, end = today - timedelta(days=90), today + timedelta(
                        days=1)
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd.isalnum():
                    pending.append(templates.get_term('funding_year', int(fd)))
        if kv.get('source'):
            pending.append(templates.get_terms('source', kv.get('source')))
        return pending
示例#2
0
文件: query.py 项目: yujiye/Codes
    def generate_query(self):

        self.__intent_classify()
        query = {}
        if self.intent == 'tag':
            for tag in self.query.get('and', []):
                query.setdefault('bool', {}).setdefault('must', []).append(
                    templates.get_terms('tags', [tag.lower()]))
                # query.setdefault('bool', {}).setdefault('must', []).append(templates.get_tag_template(tag))
            for tag in self.query.get('or', []):
                query.setdefault('bool', {}).setdefault('should', []).extend(
                    templates.get_fast_tag_template(tag.lower()))
            if self.query.get('or'):
                query['bool']['minimum_number_should_match'] = 1
            for tag in self.query.get('not', []):
                query.setdefault('bool', {}).setdefault('must_not', []).append(
                    templates.get_tag_template(tag.lower()))
        if self.intent == 'general':
            name = self.query
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_name_template(name))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_fuzzy('name', name, 5))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_fuzzy('alias', name))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_term('members', name))
            query.setdefault('bool', {})['minimum_number_should_match'] = 1
        if self.filters:
            query.setdefault('bool', {}).setdefault('must', []).extend(
                self.__generate_filter(self.filters))
        if self.nested:
            query.setdefault('bool', {}).setdefault('must', []).extend(
                self.__generate_filter(self.nested))
        return query
示例#3
0
文件: query.py 项目: yujiye/Codes
    def generate_query(self, logger=None):

        self.__intent_classify(logger)
        # logger.info('End to classify intent')
        query = {}
        if isinstance(self.input, str) or isinstance(self.input, unicode):
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_term('name', self.input, 10))
        if self.intent == 'tag':
            for tag in self.query.get('and', []):
                query.setdefault('bool', {}).setdefault('must', []).append(
                    templates.get_terms('tags', [tag.lower()]))
                # query.setdefault('bool', {}).setdefault('must', []).append(templates.get_tag_template(tag))
            for tag in self.query.get('or', []):
                query.setdefault('bool', {}).setdefault('should', []).extend(
                    templates.get_fast_tag_template(tag.lower()))
            if self.query.get('or'):
                query['bool']['minimum_number_should_match'] = 1
            for tag in self.query.get('not', []):
                query.setdefault('bool', {}).setdefault('must_not', []).append(
                    templates.get_tag_template(tag.lower()))
        if self.intent == 'general':
            name = self.query
            # parsed_names = self.name_parser.segment(name)
            # query.setdefault('bool', {}).setdefault('should', []).append(templates.get_term('alias', name, 100))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_name_template(name))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_fuzzy('name', name, 5))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_fuzzy('alias', name))
            query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_term('members', name))
            # fuzzy name
            # if parsed_names:
            #     for parsed_name in parsed_names.split():
            #         query.setdefault('bool', {}).setdefault('should', []).append(
            #             templates.get_term('alias', parsed_name))
            query.setdefault('bool', {})['minimum_number_should_match'] = 1

        if self.filters:
            if logger:
                logger.info('Filter', self.filters)
            query.setdefault('bool', {}).setdefault('must', []).extend(
                self.__generate_filter(self.filters))
        return query
示例#4
0
文件: query.py 项目: yujiye/Codes
    def generate_query(self, field, extend=False):

        query = dict()
        if self.input:
            if self.input.strip() in self.news_tags:
                query = templates.get_term(
                    'features', self.news_tags.get(self.input.strip()))
            elif extend:
                if self.len <= self.tag_max_len:
                    key = filter(
                        lambda x: (x not in self.stopwords) and len(x) > 1,
                        self.seg.cut4search(self.input))
                    query.setdefault('bool', {}).setdefault('must', []).append(
                        templates.get_string_template(field, ' '.join(key),
                                                      '100%'))
            else:
                if isinstance(self.input, str) or isinstance(
                        self.input, unicode):
                    key = filter(
                        lambda x: (x not in self.stopwords) and len(x) > 1,
                        self.seg.cut4search(self.input))
                    if self.len <= self.tag_max_len:
                        query.setdefault('bool', {}).setdefault(
                            'must',
                            []).append(templates.get_term(field, self.input))
                    elif self.len <= 20:
                        query.setdefault('bool',
                                         {}).setdefault('must', []).append(
                                             templates.get_string_template(
                                                 field, ' '.join(key), '100%'))
                    else:
                        query.setdefault('bool',
                                         {}).setdefault('must', []).append(
                                             templates.get_string_template(
                                                 field, ' '.join(key), '95%'))
        if self.filters:
            for key in self.filters.keys():
                if self.filters.get(key):
                    query.setdefault('bool', {}).setdefault('must', []).append(
                        templates.get_terms(key, self.filters.get(key)))
        if not extend and (not query):
            return {"match_all": {}}

        return query
示例#5
0
文件: query.py 项目: yujiye/Codes
    def extend_query(self, es_query):

        query = self.__intent_classify()
        print 'intent', self.intent
        print 'query', query

        if self.intent == 'yellow':
            es_query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_term('tags', query, boost=5))
            return es_query
        elif self.intent == 'tag':
            # es_query.setdefault('bool', {}).setdefault('should', []).append(templates.get_term('name', ''.join(query),
            #                                                                                    boost=50))
            es_query.setdefault('bool', {}).setdefault('should', []).append(
                templates.get_keyword_template(*query))
            # es_query["bool"]["should"].append(templates.get_fuzzy('name', ''.join(query), boost=10))
            es_query["bool"]["minimum_number_should_match"] = 1
            # for tag in query:
            #     es_query.setdefault('bool', {}).setdefault('should', []).append(
            #         templates.get_term('description', tag, 0.5))
            # print es_query
            return es_query
        elif self.intent == 'general':
            es_query["bool"]["should"].append(
                templates.get_fuzzy('name', query, boost=50))
            es_query["bool"]["should"].append(
                templates.get_fuzzy('alias', query))
            es_query["bool"]["should"].append(
                templates.get_term('description', query))
            es_query["bool"]["should"].append(
                templates.get_term('tags', query, boost=5))
            es_query["bool"]["minimum_number_should_match"] = 1
            # print es_query
            return es_query
        elif self.intent == 'empty':
            return es_query
示例#6
0
文件: query.py 项目: yujiye/Codes
    def generate_query(self):

        query = {}
        self.__intent_classify()
        if self.intent == 'tag':
            query_piece = templates.get_nested_template(
                'investor_tag', 'investor_tag.tag', self.query)
            query.setdefault('bool', {}).setdefault('must',
                                                    []).append(query_piece)
        elif self.intent == 'general':
            query.setdefault(
                'bool', {})['should'] = templates.get_investor_name_completion(
                    self.query)
            query['bool']['minimum_number_should_match'] = 1
        if self.online:
            query.setdefault('bool', {}).setdefault('must', []).append(
                templates.get_term('online', True))
        if self.filters:
            query.setdefault('bool', {}).setdefault('must', []).extend(
                self.__generate_filter(self.filters))
        return query
示例#7
0
文件: query.py 项目: yujiye/Codes
    def __generate_filter(self, kv):

        pending = [templates.get_term('oid', self.org)]
        if kv.get('status'):
            pending.append(templates.get_terms('status', kv.get('status')))
        if kv.get('location'):
            pending.append(templates.get_terms('location', kv.get('location')))
        if kv.get('assignee'):
            pending.append(templates.get_terms('assignee', kv.get('assignee')))
        if kv.get('sponsor'):
            sponsor = [s for s in kv.get('sponsor') if s]
            pending.append(templates.get_terms('sponsor', sponsor))
        if kv.get('portfolioStatus'):
            pending.append(
                templates.get_terms('portfolioStatus',
                                    kv.get('portfolioStatus')))
        if kv.get('stage'):
            pending.append(templates.get_terms('stage', kv.get('stage')))
        if kv.get('portfolioStage'):
            pending.append(
                templates.get_terms('portfolioStage',
                                    kv.get('portfolioStage')))
        return pending
示例#8
0
文件: query.py 项目: yujiye/Codes
    def __generate_filter(self, kv):

        global collection_ranking_threshold
        pending = []
        if kv.get('round'):
            pending.append(templates.get_terms('round', kv.get('round', [])))
        if kv.get('date'):
            pending.append(
                templates.get_terms('established',
                                    self.__extend_date(kv.get('date'))))
        if kv.get('location'):
            pending.append(templates.get_terms('location', kv.get('location')))
        if kv.get('domestic', None) is not None:
            pending.append(self.__generate_domestic_filter(kv.get('domestic')))
        if kv.get('team'):
            pending.append(templates.get_terms('team', kv.get('team')))
        if kv.get('threshold'):
            pending.append(
                templates.get_range('ranking_score', 1,
                                    collection_ranking_threshold))
        if kv.get('yellow'):
            pending.append(templates.get_terms('yellows', kv.get('yellow')))
        if kv.get('status'):
            pending.append(templates.get_terms('status', kv.get('status')))
        if kv.get('tag'):
            if kv.get('operator', 'and') == 'or':
                pending.append(
                    templates.get_terms('tags',
                                        [t.lower() for t in kv.get('tag')]))
            else:
                for t in kv.get('tag', []):
                    pending.append(
                        templates.get_term('tags',
                                           t.lower().strip()))
        if kv.get('category'):
            pending.append(templates.get_terms('category', kv.get('category')))
        if kv.get('industry'):
            pending.append(
                templates.get_nested_term('nested_tag.id', 'industry',
                                          kv.get('industry')))
        if kv.get('topic'):
            pending.append(
                templates.get_nested_term('nested_tag.id', 'topic',
                                          kv.get('topic')))
        if kv.get('funding_date'):
            today = datetime.today().date()
            for fd in kv.get('funding_date'):
                if fd == 'latest7':
                    start, end = today - timedelta(days=7), today
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd == 'latest30':
                    start, end = today - timedelta(days=30), today
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd == 'latest90':
                    start, end = today - timedelta(days=90), today
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
                elif fd.isalnum():
                    start = datetime.strptime('%s-01-01' % fd, '%Y-%m-%d')
                    end = datetime.strptime('%s-12-31' % fd, '%Y-%m-%d')
                    pending.append(
                        templates.get_range('last_funding_date', end, start))
        return pending