示例#1
0
文件: views.py 项目: srhrshr/zds-site
    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = Match(_type='post') \
            & Terms(forum_pk=self.authorized_forums) \
            & Term(is_visible=True) \
            & MultiMatch(query=self.search_query, fields=['text_html'])

        functions_score = [
            {'filter': Match(position=1), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_first']},
            {'filter': Match(is_useful=True), 'weight': settings.ZDS_APP['search']['boosts']['post']['if_useful']},
            {
                'filter': Range(like_dislike_ratio={'gt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_above_1']
            },
            {
                'filter': Range(like_dislike_ratio={'lt': 1}),
                'weight': settings.ZDS_APP['search']['boosts']['post']['ld_ratio_below_1']
            }
        ]

        scored_query = FunctionScore(query=query, boost_mode='multiply', functions=functions_score)

        return scored_query
示例#2
0
def create_enum_range_min_max_filter(field, query_term):
    """Creates an ElasticSearch combined enum range filter.

    For example the fields `climbing_rating_min` and `climbing_rating_max` are
    combined into a single search field. Searching for `crat=4c,6b` returns
    the waypoints where the min/max climbing ratings match the given range.
    """
    query_terms = query_term.split(',')
    map_enum = partial(map_enum_to_int, field._enum_mapper)
    range_values = list(map(map_enum, query_terms))
    range_values = [t for t in range_values if t is not None]

    n = len(range_values)
    if n != 2:
        return None

    kwargs_start = {field.field_min: {'gt': range_values[1]}}
    kwargs_end = {field.field_max: {'lt': range_values[0]}}
    return Bool(must_not=Bool(should=[
        Range(**kwargs_start),
        Range(**kwargs_end),
        Bool(must=[
            Missing(field=field.field_min),
            Missing(field=field.field_max)
        ])
    ]))
示例#3
0
 def inner(values):
     opening_date, closing_date = values[0].split("--")
     closing_date_in_range = Range(
         **{"closing_date": {
             "gte": opening_date,
             "lte": closing_date
         }})
     opening_date_in_range = Range(
         **{"opening_date": {
             "gte": opening_date,
             "lte": closing_date
         }})
     contains_range = Q(
         "bool",
         must=[
             Range(**{"opening_date": {
                 "lt": opening_date
             }}),
             Range(**{"closing_date": {
                 "gt": closing_date
             }}),
         ],
     )
     return Q(
         "bool",
         should=[
             closing_date_in_range, opening_date_in_range, contains_range
         ],
     )
示例#4
0
    def get_queryset_posts(self):
        """Search in posts, and remove result if the forum is not allowed for the user or if the message is invisible.

        Score is modified if:

        + post is the first one in a topic;
        + post is marked as "useful";
        + post has a like/dislike ratio above (has more likes than dislikes) or below (the other way around) 1.0.
        """

        query = (
            Match(_type="post")
            & Terms(forum_pk=self.authorized_forums)
            & Term(is_visible=True)
            & MultiMatch(query=self.search_query, fields=["text_html"])
        )

        functions_score = [
            {"filter": Match(position=1), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_first"]},
            {"filter": Match(is_useful=True), "weight": settings.ZDS_APP["search"]["boosts"]["post"]["if_useful"]},
            {
                "filter": Range(like_dislike_ratio={"gt": 1}),
                "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_above_1"],
            },
            {
                "filter": Range(like_dislike_ratio={"lt": 1}),
                "weight": settings.ZDS_APP["search"]["boosts"]["post"]["ld_ratio_below_1"],
            },
        ]

        scored_query = FunctionScore(query=query, boost_mode="multiply", functions=functions_score)

        return scored_query
示例#5
0
 def test_create_filter_date(self):
     self.assertEqual(create_filter('idate', '', SearchImage), None)
     self.assertEqual(create_filter('idate', 'invalid date', SearchImage),
                      None)
     self.assertEqual(
         create_filter('idate', '2016-01-01', SearchImage),
         Range(date_time={
             'gte': '2016-01-01',
             'lte': '2016-01-01'
         }))
     self.assertEqual(
         create_filter('idate', '2016-01-01,invalid date', SearchImage),
         Range(date_time={
             'gte': '2016-01-01',
             'lte': '2016-01-01'
         }))
     self.assertEqual(
         create_filter('idate', '2016-01-01,2016-01-01', SearchImage),
         Range(date_time={
             'gte': '2016-01-01',
             'lte': '2016-01-01'
         }))
     self.assertEqual(
         create_filter('idate', '2016-01-01,2016-01-03', SearchImage),
         Range(date_time={
             'gte': '2016-01-01',
             'lte': '2016-01-03'
         }))
示例#6
0
def create_date_range_filter(field, query_term):
    """Creates an ElasticSearch date-range filter.

    This filter type is currently only used for Outing.date_start/date_end.

    Valid query terms are:
        2016-01-01
        2016-01-01,2016-01-01
        2016-01-01,2016-01-03

    """
    query_terms = query_term.split(',')
    range_values = list(map(parse_date, query_terms))
    range_values = [t for t in range_values if t is not None]

    n = len(range_values)
    if n == 0:
        return None
    elif n == 1 or range_values[0] == range_values[1]:
        # single date
        kwargs_start = {field.field_date_start: {'lte': range_values[0]}}
        kwargs_end = {field.field_date_end: {'gte': range_values[0]}}
        return Bool(must=[Range(**kwargs_start), Range(**kwargs_end)])
    else:
        # date range
        kwargs_start = {field.field_date_start: {'gt': range_values[1]}}
        kwargs_end = {field.field_date_end: {'lt': range_values[0]}}
        return Bool(must_not=Bool(
            should=[Range(
                **kwargs_start), Range(**kwargs_end)]))
示例#7
0
def get_filters(data):
    filters = []

    if 'price_min' in data:
        filters.append(Range(price={"gte": data['price_min']}))

    if 'price_max' in data:
        filters.append(Range(price={"lte": data['price_max']}))

    if 'area_min' in data:
        filters.append(Range(area={"gte": data['area_min']}))

    if 'area_max' in data:
        filters.append(Range(area={"lte": data['area_max']}))

    if 'rooms' in data:
        filters.append(Terms(rooms=data['rooms'].split(',')))

    if 'city' in data:
        filters.append(Term(city=data['city']))

    if 'balcony_type' in data:
        filters.append(Terms(balcony_type=data['balcony_type'].split(',')))

    if data.get('mortgage', False):
        filters.append(Term(mortgage=False))

    if data.get('army_mortgage', False):
        filters.append(Term(army_mortgage=False))

    return filters
示例#8
0
 def test_create_filter_integer_range(self):
     self.assertEqual(
         create_filter('not a valid field', '1200,2400', SearchRoute), None)
     self.assertEqual(create_filter('ele', '', SearchRoute), None)
     self.assertEqual(create_filter('ele', 'invalid term', SearchRoute),
                      None)
     self.assertEqual(create_filter('ele', '1200', SearchRoute), None)
     self.assertEqual(
         create_filter('ele', '1200,invalid term', SearchRoute), None)
     self.assertEqual(
         create_filter('ele', 'invalid term,2400', SearchRoute), None)
     self.assertEqual(
         create_filter('ele', '1200,2400', SearchRoute),
         Bool(must_not=Bool(should=[
             Range(elevation_min={'gt': 2400}),
             Range(elevation_max={'lt': 1200}),
             Bool(must=[
                 Missing(field='elevation_min'),
                 Missing(field='elevation_max')
             ])
         ])))
     self.assertEqual(
         create_filter('height', '1200,2400', SearchWaypoint),
         Bool(must_not=Bool(should=[
             Range(height_min={'gt': 2400}),
             Range(height_max={'lt': 1200}),
             Bool(must=[
                 Missing(field='height_min'),
                 Missing(field='height_max')
             ])
         ])))
def test_keyed_range_filter():
    """Test range filter."""
    range_query = {"None": {"lt": 1}, "1+": {"gte": 1}}
    rfilter = keyed_range_filter("field", range_query)

    assert rfilter(["None"]) == Range(field={"lt": 1})
    assert rfilter(["1+"]) == Range(field={"gte": 1})
    assert rfilter(["None", "1+"]) == Range(field={"gte": 1, "lt": 1})
示例#10
0
 def _get_query_for_range(self, sources, lookup, value):
     query = None  # if set Q() as default it will be MatchAll() anytime
     for source in sources:
         if query is None:
             query = Range(**{source: {lookup: value}})
         else:
             query = query | Range(**{source: {lookup: value}})
     return query
示例#11
0
def test_date_range_filter(app, input_date):
    """Test date range filter date validation and query."""
    from_filter = date_range_filter("field", "gte")
    to_filter = date_range_filter("field", "lte")

    try:
        assert from_filter([input_date]) == Range(field={"gte": input_date})
        assert to_filter([input_date]) == Range(field={"lte": input_date})
    except:
        with pytest.raises(ValueError) as err:
            from_filter([input_date])
            to_filter([input_date])
示例#12
0
 def test_create_filter_date_range(self):
     self.assertEqual(create_filter('date', '', SearchOuting), None)
     self.assertEqual(create_filter('date', 'invalid date', SearchOuting),
                      None)
     self.assertEqual(
         create_filter('date', '2016-01-01', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,invalid date', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,2016-01-01', SearchOuting),
         Bool(must=[
             Range(date_start={'lte': '2016-01-01'}),
             Range(date_end={'gte': '2016-01-01'})
         ]))
     self.assertEqual(
         create_filter('date', '2016-01-01,2016-01-03', SearchOuting),
         Bool(must_not=Bool(should=[
             Range(date_start={'gt': '2016-01-03'}),
             Range(date_end={'lt': '2016-01-01'})
         ])))
示例#13
0
def test_current_ranged_loans_filter(app):
    """Test ranged current loans filter."""
    with app.app_context():
        rfilter = overdue_loans_filter("field")

        current_loans_query = Terms(
            state=current_app.config["CIRCULATION_STATES_LOAN_ACTIVE"])

        assert rfilter(["Overdue"]) == Range(
            field={"lt": str(arrow.utcnow().date())}) & current_loans_query
        assert rfilter(["Upcoming return"]) == Range(
            field={
                "gte": str(arrow.utcnow().date()),
                "lte": str((arrow.utcnow() + timedelta(days=7)).date())
            }) & current_loans_query
示例#14
0
def create_date_filter(field, query_term):
    """Creates an ElasticSearch date-range filter for a single field.

    This filter type is currently only used for Image.date_time

    Valid query terms are:
        2016-01-01
        2016-01-01,2016-01-01
        2016-01-01,2016-01-03

    """
    query_terms = query_term.split(',')
    range_values = list(map(parse_date, query_terms))
    range_values = [t for t in range_values if t is not None]

    n = len(range_values)
    if n == 0:
        return None
    elif n == 1:
        range_values.append(range_values[0])

    kwargs = {
        field._field_date: {
            'gte': range_values[0],
            'lte': range_values[1]
        }
    }
    return Range(**kwargs)
示例#15
0
 def inner(values):
     ineq_opers = [{
         'strict': 'gt',
         'nonstrict': 'gte'
     }, {
         'strict': 'lt',
         'nonstrict': 'lte'
     }]
     range_query = []
     for _range in values:
         range_ends = _range.split('--')
         range_args = dict()
         # Add the proper values to the dict
         for (range_end, strict, opers) in zip(range_ends, ['>', '<'],
                                               ineq_opers):  # noqa
             if range_end:
                 # If first char is '>' for start or '<' for end
                 if range_end[0] == strict:
                     dict_key = opers['strict']
                     range_end = range_end[1:]
                 else:
                     dict_key = opers['nonstrict']
                 range_args[dict_key] = range_end
         range_query.append(Range(**{field: range_args}))
     return Bool(should=range_query)
示例#16
0
def etl(index='cf_rfem_hist_price',
        start_date='2018-12-26',
        end_date='2019-03-25',
        symbol='rfem'):
    ESLowLevelClientByConnection.get_instance()
    search = Search(index=index, using='high_level_client')[0:100]
    search.query = Q(
        Bool(must=[
            Range(date={
                'gte': '2018-12-26',
                'lte': '2019-03-25'
            }),
            Term(symbol='rfem')
        ]))
    aggs = A(
        DateHistogram(field='date',
                      interval='1d',
                      format='yyyy-MM-dd',
                      min_doc_count=1))
    response = search.execute()
    hits = response['hits']
    hits = hits['hits']
    XX = []
    for hit in hits:
        X = []
        X.append(hit['_source']['changeOverTime'])
        X.append(hit['_source']['changePercent'])
        X.append(hit['_source']['volume'])
        XX.append(X)
    return (XX)
示例#17
0
def create_range_filter(field, query_term):
    """Creates an ElasticSearch range filter.

    E.g. the call `create_range_filter(elevation_field, '1500,2500') creates
    the following filter:
        {'range': {'elevation': {'gte': 1500, 'lte': 2500}}}
    """
    query_terms = query_term.split(',')
    range_values = list(map(parse_num, query_terms))

    n = len(range_values)
    range_from = range_values[0] if n > 0 else None
    range_to = range_values[1] if n > 1 else None

    if range_from is None and range_to is None:
        return None

    range_params = {}
    if range_from is not None and not math.isnan(range_from):
        range_params['gte'] = range_from
    if range_to is not None and not math.isnan(range_to):
        range_params['lte'] = range_to

    kwargs = {field._name: range_params}
    return Range(**kwargs)
示例#18
0
    def get_value_filter(self, filter_value):
        f, t = None, None
        try:
            if '-' in filter_value:
                f, t = filter_value.split('-', 1)
            else:
                t = f = int(filter_value)
            if not f:
                f = None
            else:
                f = int(f)
            if not t:
                t = None
            else:
                t = int(t)
        except ValueError:
            f, t = None, None

        limits = {}
        if f is not None:
            limits['gte'] = f
        if t is not None:
            limits['lte'] = t

        return Range(**{self._params['field']: limits})
示例#19
0
def getUSWDSquery(indexbase, query, version, agency, domaintype, sort):
    index = indexbase + '-uswds2'
    try:
        query = int(query)
    except:
        query = 0

    s = Search(using=es, index=index)
    if sort == 'Score':
        s = s.sort('-data.total_score')
    else:
        s = s.sort('domain')
    s = s.query(Bool(should=[Range(data__total_score={'gte': query})]))
    if version != 'all versions':
        if version == 'detected versions':
            s = s.query("query_string", query='v*', fields=['data.uswdsversion'])
        else:
            versionquery = '"' + version + '"'
            s = s.query("query_string", query=versionquery, fields=['data.uswdsversion'])
    if agency != 'All Agencies':
        agencyquery = '"' + agency + '"'
        s = s.query("query_string", query=agencyquery, fields=['agency'])
    if domaintype != 'All Branches':
        domaintypequery = '"' + domaintype + '"'
        s = s.query("query_string", query=domaintypequery, fields=['domaintype'])

    return s
示例#20
0
def build_range_query(field, operator, value):
    """Crea una condición 'Range' para Elasticsearch.

    Args:
        field (str): Campo de la condición.
        value (int): Número contra el que se debería comparar el campo.
        operator (str): Operador a utilizar (>, =>, <, =<)

    Returns:
        Query: Condición Range para Elasticsearch

    """
    if operator == '<':
        es_operator = 'lt'
    elif operator == '<=':
        es_operator = 'lte'
    elif operator == '>':
        es_operator = 'gt'
    elif operator == '>=':
        es_operator = 'gte'
    else:
        raise ValueError('Invalid operator.')

    options = {es_operator: value}
    return Range(**{field: options})
示例#21
0
def create_enum_range_filter(field, query_term):
    """Creates an ElasticSearch enum range filter.

    E.g. the call `create_enum_range_filter(quality, 'medium,great')
    creates the following filter:
        {'range': {'quality': {'gte': 2, 'lte': 4}}}
    """
    query_terms = query_term.split(',')
    map_enum = partial(map_enum_to_int, field._enum_mapper)
    range_values = list(map(map_enum, query_terms))

    n = len(range_values)
    range_from = range_values[0] if n > 0 else None
    range_to = range_values[1] if n > 1 else None

    if range_from is None and range_to is None:
        return None

    range_params = {}
    if range_from is not None:
        range_params['gte'] = range_from
    if range_to is not None:
        range_params['lte'] = range_to

    kwargs = {field._name: range_params}
    return Range(**kwargs)
示例#22
0
    def inner(values):
        if len(values) != 1 or values[0].count('--') != 1 or values[0] == '--':
            raise RESTValidationError(
                errors=[FieldError(field, 'Invalid range format.')])

        range_ends = values[0].split('--')
        range_args = dict()

        ineq_opers = [{'strict': 'gt', 'nonstrict': 'gte'},
                      {'strict': 'lt', 'nonstrict': 'lte'}]
        date_maths = [start_date_math, end_date_math]

        # Add the proper values to the dict
        for (range_end, strict, opers,
             date_math) in zip(range_ends, ['>', '<'], ineq_opers, date_maths):

            if range_end != '':
                # If first char is '>' for start or '<' for end
                if range_end[0] == strict:
                    dict_key = opers['strict']
                    range_end = range_end[1:]
                else:
                    dict_key = opers['nonstrict']

                if date_math:
                    range_end = '{0}||{1}'.format(range_end, date_math)

                range_args[dict_key] = range_end

        args = kwargs.copy()
        args.update(range_args)

        return Range(**{field: args})
示例#23
0
def test_date_range_filter(app):
    """Test date range filter date validation and query."""

    tests = ["", "a string", "2020-02-02"]

    for input_date in tests:
        from_filter = date_range_filter("field", "gte")
        to_filter = date_range_filter("field", "lte")

        try:
            assert from_filter([input_date
                                ]) == Range(field={"gte": input_date})
            assert to_filter([input_date]) == Range(field={"lte": input_date})
        except (ValueError, AssertionError):
            with pytest.raises(ValueError):
                from_filter([input_date])
                to_filter([input_date])
示例#24
0
def test_range_filter():
    """Test range filter."""
    f = range_filter('test',
                     start_date_math='startmath',
                     end_date_math='endmath')
    assert f(['1821--1940']) == Range(test={
        'gte': '1821||startmath',
        'lte': '1940||endmath',
    })
    assert f(['>1821--']) == Range(test={'gt': '1821||startmath'})
    assert f(['1821--<1940']) == Range(test={
        'gte': '1821||startmath',
        'lt': '1940||endmath'
    })

    assert pytest.raises(RESTValidationError, f, ['2016'])
    assert pytest.raises(RESTValidationError, f, ['--'])
示例#25
0
 def test_create_filter_range(self):
     self.assertEqual(
         create_filter('not a valid field', '1500,2500', SearchWaypoint),
         None)
     self.assertEqual(create_filter('walt', '', SearchWaypoint), None)
     self.assertEqual(
         create_filter('walt', 'not a, number', SearchWaypoint), None)
     self.assertEqual(create_filter('walt', '1500,2500', SearchWaypoint),
                      Range(elevation={
                          'gte': 1500,
                          'lte': 2500
                      }))
     self.assertEqual(
         create_filter('walt', '1500.5,2500.99', SearchWaypoint),
         Range(elevation={
             'gte': 1500.5,
             'lte': 2500.99
         }))
     self.assertEqual(create_filter('walt', '1500,', SearchWaypoint),
                      Range(elevation={'gte': 1500}))
     self.assertEqual(create_filter('walt', '1500', SearchWaypoint),
                      Range(elevation={'gte': 1500}))
     self.assertEqual(create_filter('walt', ',2500', SearchWaypoint),
                      Range(elevation={'lte': 2500}))
     self.assertEqual(create_filter('walt', 'NaN,2500', SearchWaypoint),
                      Range(elevation={'lte': 2500}))
     self.assertEqual(create_filter('walt', '1500,NaN', SearchWaypoint),
                      Range(elevation={'gte': 1500}))
def main():
    config_file = "config-cleaner.yml"
    logging.basicConfig(format='%(asctime)s %(levelname)s:%(message)s',
                        datefmt='%Y-%m-%d, %H:%M:%S')
    with open(config_file, 'r') as stream:
        try:
            config = yaml.safe_load(stream)
            logging.getLogger().setLevel(config['settings']['log-level'])
            thread_count = config['settings']['thread_count']
            cluster = config['settings']['cluster']
            index = config['settings']['index']
            field = config['settings']['field']
            years = config['settings']['years']
            logging.info('Loaded settings started')

        except yaml.YAMLError as exc:
            logging.error(f"Cannot load file: {config_file} - Error: {exc}")
            exit()
    logging.getLogger('elasticsearch').setLevel(logging.WARN)
    logging.info(f"connecting to cluster {cluster} index {index}")
    client = Elasticsearch(list(cluster.split(",")))
    s = Search(using=client, index=index)
    total = s.count()
    old_documents = s.filter(
        # adapt to months if needed
        # https://elasticsearch-dsl.readthedocs.io/en/2.2.0/search_dsl.html#queries
        Range(**{field: {
            "lt": f"now-{years}y"
        }}))

    matches = old_documents.count()
    if matches < 1:
        logging.warning(
            f"no documents older than {years} year(s) found ({total} total)")
        return
    items_deleted = 0
    items_failed = 0
    logging.info(
        f"{matches} of {total} documents older than {years} year(s), deleting..."
    )

    for success, info in parallel_bulk(client,
                                       delete_actions(old_documents.scan()),
                                       thread_count=thread_count,
                                       raise_on_exception=False,
                                       raise_on_error=False):
        if not success:
            logging.warning(f"failed: {info}")
            items_failed += 1
        else:
            items_deleted += 1
            if items_deleted % 10000 == 0:
                logging.info(f"deleted documents: {items_deleted}")

    logging.info(
        f"deleted: {items_deleted} failed: {items_failed} documents from index: {index}"
    )
示例#27
0
    def search_author_messages(self, author):
        """Search messages by author."""
        s = MessageDoc.search()
        time_gte = 'now-1d'

        s = s.filter('match', **{'author.id': author.id}) \
            .query(Range(timestamp={'gte': time_gte, 'lt': 'now'}))
        for message in s.scan():
            print('-' * 40)
            print(message.to_dict())
示例#28
0
    def inner(values):
        args = {}
        for range_key, mappings in range_query.items():
            if range_key in values:
                for key, value in mappings.items():
                    args[key] = value

        args.update(kwargs.copy())

        return Range(**{field: args})
示例#29
0
    def inner(values):
        value = values and values[0]

        if value == "upcoming":
            today = datetime.utcnow().strftime("%Y-%m-%d")
            return Range(**{"opening_date": {"gte": today}})

        if value == "all":
            return Q()

        return date_range_filter(values)
示例#30
0
    def inner(values):
        value = values and values[0]

        if value == "upcoming":
            now = datetime.utcnow().isoformat()
            return Range(**{"start_datetime": {"gte": now}})

        if value == "all":
            return Q()

        return date_range_filter(values)