Пример #1
0
def build_query_phrase_as_regex(phrase):
    # Split into sub_phrases (remove whitespace, punctuation etc.)
    sub_phrases = re.split(RE_SPLIT_WORD_UNICODE, phrase)
    # If there are no sub_phrases - ignore this phrase
    search_str_Q = Q()
    if sub_phrases:
        # Build regex - all sub_phrases we've found separated by 'non-word' characters
        # and also allow VAV and/or HEI in front of each word.
        # NOTE: regex syntax is DB dependent - this works on postgres
        re_words = [u'\u05D5?\u05D4?' + word for word in sub_phrases]
        regex = PG_RE_PHRASE_START + PG_RE_NON_WORD_CHARS.join(
            re_words) + PG_RE_PHRASE_END
        search_str_Q = Q(content__iregex=regex)
    return search_str_Q
Пример #2
0
def build_query_phrase_as_regex(phrase):
    # Split into sub_phrases (remove whitespace, punctuation etc.)
    sub_phrases = re.split(RE_SPLIT_WORD_UNICODE, phrase)
    # If there are no sub_phrases - ignore this phrase
    search_str_Q = Q()
    if sub_phrases:
        # Build regex - all sub_phrases we've found separated by 'non-word' characters
        # and also allow VAV and/or HEI in front of each word.
        # NOTE: regex syntax is DB dependent - this works on postgres
        re_words = [u'\u05D5?\u05D4?' + word for word in sub_phrases]
        regex = PG_RE_PHRASE_START + PG_RE_NON_WORD_CHARS.join(
            re_words) + PG_RE_PHRASE_END
        search_str_Q = Q(content__iregex=regex)
    return search_str_Q
Пример #3
0
def parse_to_q_object(get_params, params_dict):
    member_query = MEMBER_MODEL.objects.filter(id__in=params_dict['members_ids'])
    fixed_member_ids = [member.id for member in member_query]
    if IS_ELECTIONS_MODE:
        feeds = Facebook_Feed.objects.filter(persona__alt_object_id__in=fixed_member_ids)
    else:
        feeds = Facebook_Feed.objects.filter(persona__object_id__in=fixed_member_ids)

    # all members asked for (through member search of party search), with OR between them.
    members_OR_parties_Q = Q()
    if feeds:
        members_OR_parties_Q = Q(feed__id__in=[x.id for x in feeds])

    # tags - search for all tags specified by their id
    tags_Q = Q()
    if params_dict['tags_ids']:
        # | Q(tags__synonyms__proper_form_of_tag__id=tag_id)
        tag_bundle_ids = [tag.id for tag in Tag.objects.filter_bundle(id__in=params_dict['tags_ids'])]
        tags_to_queries = [Q(tags__id=tag_id) for tag_id in tag_bundle_ids]
        if settings.DEBUG:
            print 'tags_to_queries:', len(tags_to_queries)
        for query_for_single_tag in tags_to_queries:
            # tags_Q is empty for the first iteration
            tags_Q = join_queries(query_for_single_tag, tags_Q, or_)

    if settings.DEBUG:
        print 'tags_Q:', tags_Q

    # keywords - searched both in content and in tags of posts.
    search_str_Q = Q()
    # If regexes cause security / performance problem - switch this flag
    # to False to use a (not as good) text search instead
    use_regex = True
    for phrase in params_dict['phrases']:
        if use_regex:
            # Split into words (remove whitespace, punctuation etc.)
            words = re.split(RE_SPLIT_WORD_UNICODE, phrase)
            # If there are no words - ignore this phrase
            if words:
                # Build regex - all words we've found separated by 'non-word' characters
                # and also allow VAV and/or HEI in front of each word.
                # NOTE: regex syntax is DB dependent - this works on postgres
                re_words = [u'\u05D5?\u05D4?' + word for word in words]
                regex = PG_RE_PHRASE_START + PG_RE_NON_WORD_CHARS.join(re_words) + PG_RE_PHRASE_END
                search_str_Q = join_queries(Q(content__iregex=regex), search_str_Q, or_)
        else:
            # Fallback code to use if we want to disable regex-based search
            search_str_Q = join_queries(Q(content__icontains=phrase), search_str_Q, or_)
        search_str_Q = Q(tags__name__contains=phrase) | search_str_Q

    # tags query and keyword query concatenated. Logic is set according to request input
    request_operator = get_params.get('tags_and_search_str_operator', DEFAULT_OPERATOR)

    if settings.DEBUG:
        print 'selected_operator:', request_operator
    selected_operator = and_ if request_operator == 'and_operator' else or_

    search_str_with_tags_Q = join_queries(tags_Q, search_str_Q, selected_operator)

    if settings.DEBUG:
        print 'search_str_with_tags_Q:', search_str_with_tags_Q
        print '\n'

    query_Q = join_queries(members_OR_parties_Q, search_str_with_tags_Q, and_)

    if params_dict['excluded']:
        excluded_query = Q(status_id__in=params_dict['excluded'])
        excluded_query.negate()
        query_Q = join_queries(query_Q, excluded_query, and_)

    if settings.DEBUG:
        from qserializer import QSerializer
        qser = QSerializer()
        print 'query to be executed:'
        from pprint import pprint
        pprint(qser.serialize(query_Q.clone()))
    return query_Q
Пример #4
0
def parse_to_q_object(get_params, params_dict):
    member_query = MEMBER_MODEL.objects.filter(
        id__in=params_dict['members_ids'])
    fixed_member_ids = [member.id for member in member_query]
    if IS_ELECTIONS_MODE:
        feeds = Facebook_Feed.objects.filter(
            persona__alt_object_id__in=fixed_member_ids)
    else:
        feeds = Facebook_Feed.objects.filter(
            persona__object_id__in=fixed_member_ids)

    # all members asked for (through member search of party search), with OR between them.
    members_OR_parties_Q = Q()
    if feeds:
        members_OR_parties_Q = Q(feed__id__in=[x.id for x in feeds])

    # tags - search for all tags specified by their id
    tags_Q = Q()
    if params_dict['tags_ids']:
        # | Q(tags__synonyms__proper_form_of_tag__id=tag_id)
        tag_bundle_ids = [
            tag.id for tag in Tag.objects.filter_bundle(
                id__in=params_dict['tags_ids'])
        ]
        tags_to_queries = [Q(tags__id=tag_id) for tag_id in tag_bundle_ids]
        if settings.DEBUG:
            print 'tags_to_queries:', len(tags_to_queries)
        for query_for_single_tag in tags_to_queries:
            # tags_Q is empty for the first iteration
            tags_Q = join_queries(query_for_single_tag, tags_Q, or_)

    if settings.DEBUG:
        print 'tags_Q:', tags_Q

    # keywords - searched both in content and in tags of posts.
    search_str_Q = Q()
    # If regexes cause security / performance problem - switch this flag
    # to False to use a (not as good) text search instead
    use_regex = True
    for phrase in params_dict['phrases']:
        if use_regex:
            # Split into words (remove whitespace, punctuation etc.)
            words = re.split(RE_SPLIT_WORD_UNICODE, phrase)
            # If there are no words - ignore this phrase
            if words:
                # Build regex - all words we've found separated by 'non-word' characters
                # and also allow VAV and/or HEI in front of each word.
                # NOTE: regex syntax is DB dependent - this works on postgres
                re_words = [u'\u05D5?\u05D4?' + word for word in words]
                regex = PG_RE_PHRASE_START + PG_RE_NON_WORD_CHARS.join(
                    re_words) + PG_RE_PHRASE_END
                search_str_Q = join_queries(Q(content__iregex=regex),
                                            search_str_Q, or_)
        else:
            # Fallback code to use if we want to disable regex-based search
            search_str_Q = join_queries(Q(content__icontains=phrase),
                                        search_str_Q, or_)
        search_str_Q = Q(tags__name__contains=phrase) | search_str_Q

    # tags query and keyword query concatenated. Logic is set according to request input
    request_operator = get_params.get('tags_and_search_str_operator',
                                      DEFAULT_OPERATOR)

    if settings.DEBUG:
        print 'selected_operator:', request_operator
    selected_operator = and_ if request_operator == 'and_operator' else or_

    search_str_with_tags_Q = join_queries(tags_Q, search_str_Q,
                                          selected_operator)

    if settings.DEBUG:
        print 'search_str_with_tags_Q:', search_str_with_tags_Q
        print '\n'

    query_Q = join_queries(members_OR_parties_Q, search_str_with_tags_Q, and_)

    if params_dict['excluded']:
        excluded_query = Q(status_id__in=params_dict['excluded'])
        excluded_query.negate()
        query_Q = join_queries(query_Q, excluded_query, and_)

    if settings.DEBUG:
        from qserializer import QSerializer
        qser = QSerializer()
        print 'query to be executed:'
        from pprint import pprint
        pprint(qser.serialize(query_Q.clone()))
    return query_Q