Exemplo n.º 1
0
def term_facet(host='localhost:9200',
               terms=['bibleverse'],
               _type='habakkuk',
               date_filter=[],
               size=10):
    ret = []
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))

    q = q.search(size=0)
    for term in terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    print json.dumps(json.loads(q.to_search_json()),indent=2)

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        print "Total",facet,resultset.facets[facet]['total']
        for row in resultset.facets[facet]['terms']:
            print "\t",row['term'],row['count']
            ret.append((facet,row['term']))
        
    return ret
Exemplo n.º 2
0
def facets(host='localhost:9200',
          facet_terms=['bibleverse'],
          _type='habakkuk',
          date_filter=[],
          size=10):
    ret = {}
    conn = ES(host)
    q = MatchAllQuery()
    if date_filter:
        start,end = date_filter
        q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',
                                                        start.isoformat(),
                                                        end.isoformat(),
                                                        include_upper=False)))

    q = q.search(size=0)
    for term in facet_terms:
        q.facet.add_term_facet(term,order='count',size=size)
        
    es_logger.info(q.serialize())

    resultset = conn.search(query=q, indices=_type+'-*', doc_types=[_type])
    for facet in resultset.facets:
        ret[facet] = []
        for row in resultset.facets[facet]['terms']:
            ret[facet].append({"value":row['term'],"count":row['count']})

    logger.debug("facets return|'%s'"%json.dumps(ret))
    return ret
Exemplo n.º 3
0
def dump(start,end,backupdir,eshost):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    _type = 'habakkuk'
    q = MatchAllQuery()
    q = FilteredQuery(q, RangeFilter(qrange=ESRange('created_at_date',start,end,include_upper=False)))
    q = q.search()
    # print json.dumps(json.loads(q.to_search_json()),indent=2)
    resultset = conn.search(query=q,indices=_type+"-*", doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data for %s - %s\n"%(start,end))
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    ext = datetime.strftime(start,'%Y-%m-%d')
    backup = os.path.join(backupdir,"habakkuk-%s.json.gz"%ext)

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)
Exemplo n.º 4
0
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(
         TermFacet(field="topic", facet_filter=BoolFilter(must_not=TermQuery(field="reviewed", value=True)))
     )
     serialized = query.serialize()
     self.assertTrue(serialized["facets"]["topic"]["facet_filter"]["bool"])
Exemplo n.º 5
0
    def test_nested_filter(self):

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 1)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc.de.1'),
                    PrefixQuery('shares.role', '11')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 0)
        print ', '.join([r['body'] for r in resultset])

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'shares',
                BoolQuery(must=[
                    PrefixQuery('shares.orgid', 'abc'),
                    PrefixQuery('shares.role', '111')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        print ', '.join([r['body'] for r in resultset])

        print
Exemplo n.º 6
0
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name='nested', path='resellers')
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u'doc_count': 4})
Exemplo n.º 7
0
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
Exemplo n.º 8
0
 def test_terms_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['tag']['terms'], [{u'count': 2, u'term': u'foo'},
                                                          {u'count': 1, u'term': u'bar'}])
Exemplo n.º 9
0
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(
         TermFacet(field='topic',
                   facet_filter=BoolFilter(
                       must_not=TermQuery(field='reviewed', value=True))))
     serialized = query.serialize()
     self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
Exemplo n.º 10
0
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     resultset = self.conn.search(query=q, indices=["test-index"], doc_types=["test-type"])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
Exemplo n.º 11
0
 def test_nested_agg(self):
     q = MatchAllQuery()
     q = q.search()
     nested = NestedAgg(name="nested", path="resellers")
     q.agg.add(nested)
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEqual(resultset.total, 3)
     self.assertEqual(resultset.aggs.nested, {u"doc_count": 4})
Exemplo n.º 12
0
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name='max', field='position')
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u'value': 3})
Exemplo n.º 13
0
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name='missing', field='integer')
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u'doc_count': 1})
Exemplo n.º 14
0
    def test_min_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MinAgg(name="min", field="position")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.min, {u'value': 1})
Exemplo n.º 15
0
    def test_missing_agg(self):

        q = MatchAllQuery()
        q = q.search()
        missing = MissingAgg(name="missing", field="integer")
        q.agg.add(missing)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.missing, {u"doc_count": 1})
Exemplo n.º 16
0
 def test_iterator(self):
     resultset = self.conn.search(Search(MatchAllQuery(), size=20),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset]), 20)
     resultset = self.conn.search(Search(MatchAllQuery(), size=10),
                                  self.index_name, self.document_type)
     self.assertEqual(len([p for p in resultset[:10]]), 10)
     self.assertEqual(resultset[10].uuid, "11111")
     self.assertEqual(resultset.total, 1000)
Exemplo n.º 17
0
    def test_max_agg(self):

        q = MatchAllQuery()
        q = q.search()
        max_agg = MaxAgg(name="max", field="position")
        q.agg.add(max_agg)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)
        self.assertEqual(resultset.aggs.max, {u"value": 3})
Exemplo n.º 18
0
    def test_GeoDistanceFilter(self):
        gq = GeoDistanceFilter("pin.location", {"lat": 40, "lon": 70}, "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)

        gq = GeoDistanceFilter("pin.location", [70, 40], "200km")
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEqual(resultset.total, 1)
Exemplo n.º 19
0
    def test_GeoBoundingBoxFilter(self):
        gq = GeoBoundingBoxFilter("pin.location", location_tl={"lat" : 40.717, "lon" : 70.99}, location_br={"lat" : 40.03, "lon" : 72.0})
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoBoundingBoxFilter("pin.location", [70.99, 40.717], [74.1, 40.03])
        q = FilteredQuery(MatchAllQuery(), gq)
        result2 = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(result2.total, 1)
def get_top_authors():
    q = MatchAllQuery()
    q = q.search()
    q.facet.add_term_facet('author')
    es = get_connection()
    facets = es.search(q, ELASTICSEARCH_INDEX, 'post').facets
    authors = []
    for term in facets['author']['terms']:
        authors.append(get_author(id=term['term']))
    return authors
Exemplo n.º 21
0
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name='reverse', field='id')
        nested = NestedAgg(name='nested', path='resellers', sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested['doc_count'], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u'doc_count': 2})
Exemplo n.º 22
0
    def test_reverse_nested_agg(self):
        q = MatchAllQuery()
        q = q.search()
        reverse_nested = ReverseNestedAgg(name="reverse", field="id")
        nested = NestedAgg(name="nested", path="resellers", sub_aggs=[reverse_nested])

        q.agg.add(nested)
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEqual(resultset.total, 3)

        self.assertEqual(resultset.aggs.nested["doc_count"], 4)
        self.assertEqual(resultset.aggs.nested.reverse, {u"doc_count": 2})
Exemplo n.º 23
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
Exemplo n.º 24
0
 def test_terms_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, TermFilter('tag', 'foo'))
     q = q.search()
     q.facet.add_term_facet('tag')
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 2)
     self.assertEquals(result['facets']['tag']['terms'], [{
         u'count': 2,
         u'term': u'foo'
     }])
Exemplo n.º 25
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(q, RangeFilter(qrange=ESRange('date',
         datetime.date(2011, 4, 1),
         datetime.date(2011, 5, 1),
         include_upper=False)))
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000}])
Exemplo n.º 26
0
 def test_date_facet_filter(self):
     q = MatchAllQuery()
     q = FilteredQuery(
         q,
         RangeFilter(
             qrange=ESRange("date", datetime.date(2011, 4, 1), datetime.date(2011, 5, 1), include_upper=False)
         ),
     )
     q = q.search()
     q.facet.facets.append(DateHistogramFacet("date_facet", field="date", interval="month"))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 2)
     self.assertEquals(resultset.facets["date_facet"]["entries"], [{u"count": 2, u"time": 1301616000000}])
Exemplo n.º 27
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
                                              field='date',
                                              interval='month'))
     result = self.conn.search(query=q, indexes=["test-index"], doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'], [{u'count': 2, u'time': 1301616000000},
                                                                   {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000/1000.).date(),
                       datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000/1000.).date(),
                       datetime.date(2011, 05, 01))
Exemplo n.º 28
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.utcfromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.utcfromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Exemplo n.º 29
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(DateHistogramFacet('date_facet',
         field='date',
         interval='month'))
     resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
     self.assertEquals(resultset.total, 3)
     self.assertEquals(resultset.facets.date_facet.entries, [{u'count': 2, u'time': 1301616000000},
             {u'count': 1, u'time': 1304208000000}])
     self.assertEquals(datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Exemplo n.º 30
0
    def test_GeoPolygonFilter(self):
        gq = GeoPolygonFilter("pin.location", [{"lat" : 50, "lon" :-30},
                                                {"lat" : 30, "lon" :-80},
                                                {"lat" : 80, "lon" :-90}]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)

        gq = GeoPolygonFilter("pin.location", [[ -30, 50],
                                              [ -80, 30],
                                              [ -90, 80]]
                                                )
        q = FilteredQuery(MatchAllQuery(), gq)
        resultset = self.conn.search(query=q, indices=["test-mindex"])
        self.assertEquals(resultset.total, 1)
Exemplo n.º 31
0
 def create_query(self, sql):
     # query  filter. Rest are ignored.
     if "termquery" in sql.keys():
         # Create Term filter
         terms = sql['termquery']
         for term in terms:
             self._query_field.append(term.keys()[0])
             _tf = TermFilter(term.keys()[0], term[term.keys()[0]])
             self._filters.append(_tf)
     if "geodistancefilter" in sql.keys():
         # Prepare distance filter
         geoterm = sql['geodistancefilter']
         geofieldname = geoterm['field']
         geofieldvalue = geoterm['fieldvalue']
         _gf = GeoDistanceFilter(geofieldname,
                                 geofieldvalue,
                                 geoterm['distance'],
                                 'arc', 'km')
         self._filters.append(_gf)
         self._query_field.append(geofieldname)
     if "optype" in sql.keys():
         # Condition filters
         if sql['optype'].lower() == 'and':
             self._filtertype = ANDFilter(self._filters)
         if sql['optype'].lower() == 'or':
             self._filtertype = ORFilter(self._filters)
     else:
         self._filtertype = ANDFilter(self._filters)
     return FilteredQuery(MatchAllQuery(), self._filtertype)
Exemplo n.º 32
0
def get_query(s):
    import re
    queries = []

    i = 0
    freetext = ""
    for mat in re.finditer(r'(?P<name>\S+):(?P<value>"[^"]+"|\S+)\s*', s):
        freetext += s[i:mat.start()]
        i = mat.end()
        q = mat.groupdict()
        value = q['value'].strip('"')
        if value.endswith("*"):
            queries.append(WildcardQuery(field=q['name'], value=value))
        else:
            queries.append(TermQuery(field=q['name'], value=value))
    freetext += s[i:]
    freetext = freetext.strip()
    if freetext:
        if freetext == '*':
            queries.append(MatchAllQuery())
        else:
            queries.append(TextQuery("_all", freetext, operator='and'))

    if len(queries) == 1:
        return queries[0]
    else:
        q = BoolQuery()
        for query in queries:
            q.add_must(query)
        return q
Exemplo n.º 33
0
    def test_iterator_offset(self):
        # Query for a block of 10, starting at position 10:
        #
        resultset = self.conn.search(Search(
            MatchAllQuery(),
            start=10,
            size=10,
            sort={'position': {
                'order': 'asc'
            }}),
                                     self.index_name,
                                     self.document_type,
                                     start=10,
                                     size=10)

        # Ensure that there are 1000 results:
        #
        self.assertEqual(len(resultset), 1000)

        # Now check that we actually have records 10-19, rather than 0-9:
        #
        position = 0
        for r in resultset:
            self.assertEqual(r.position, position + 10)
            position += 1
Exemplo n.º 34
0
 def test_sorting_by_geolocation(self):
     search = Search(MatchAllQuery())
     search.sort.add(GeoSortOrder(field='location', lat=1, lon=1))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['1', '2', '3'])
Exemplo n.º 35
0
 def test_sorting_by_script(self):
     search = Search(MatchAllQuery())
     search.sort.add(ScriptSortOrder("1.0/doc['foo'].value", type='number'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
Exemplo n.º 36
0
 def test_sorting_by_foo(self):
     search = Search(MatchAllQuery())
     search.sort.add(SortOrder('foo', order='desc'))
     resultset = self.conn.search(search,
                                  indices=self.index_name,
                                  doc_types=[self.document_type])
     ids = [doc['_id'] for doc in resultset.hits]
     self.assertEqual(ids, ['3', '2', '1'])
Exemplo n.º 37
0
 def records(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRecordIterator(results, self.expand)
Exemplo n.º 38
0
def dump_docs(fp, conn, index_name, doc_type, scroll='5m', encoding='utf8'):
    q = MatchAllQuery()
    for result in conn.search(q,
                              indices=[index_name],
                              doc_types=[doc_type],
                              scan=True,
                              scroll=scroll):
        fp.write(json.dumps(result, encoding=encoding))
        fp.write('\n')
Exemplo n.º 39
0
 def rows(self):
     if not self.connection:
         raise RuntimeError("Stream is not initialized")
     from pyes.query import MatchAllQuery
     fields = self.field_names
     results = self.connection.search(MatchAllQuery(),
                                      search_type="scan",
                                      timeout="5m",
                                      size="200")
     return ESRowIterator(results, fields)
Exemplo n.º 40
0
    def test_nested_filter(self):
        q = FilteredQuery(MatchAllQuery(), TermFilter('_all', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(MatchAllQuery(),
                          TermFilter('nested1.n_field1', 'n_value1_1'))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 0)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[TermQuery('nested1.n_field1', 'n_value1_1')])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 2)

        q = FilteredQuery(
            MatchAllQuery(),
            NestedFilter(
                'nested1',
                BoolQuery(must=[
                    TermQuery('nested1.n_field1', 'n_value1_1'),
                    TermQuery('nested1.n_field2', 'n_value2_1')
                ])))
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEqual(resultset.total, 1)
Exemplo n.º 41
0
def add_filters(filters):
    # filters
    flist = []
    for field, term in filters:
        flist.append(TermFilter(field, term))

    if flist:
        f = ANDFilter(flist)
        q = FilteredQuery(q, f)
    else:
        q = MatchAllQuery()

    return q
Exemplo n.º 42
0
 def test_date_facet(self):
     q = MatchAllQuery()
     q = q.search()
     q.facet.facets.append(
         DateHistogramFacet('date_facet', field='date', interval='month'))
     result = self.conn.search(query=q,
                               indexes=["test-index"],
                               doc_types=["test-type"])
     self.assertEquals(result['hits']['total'], 3)
     self.assertEquals(result['facets']['date_facet']['entries'],
                       [{
                           u'count': 2,
                           u'time': 1301616000000
                       }, {
                           u'count': 1,
                           u'time': 1304208000000
                       }])
     self.assertEquals(
         datetime.datetime.fromtimestamp(1301616000000 / 1000.).date(),
         datetime.date(2011, 04, 01))
     self.assertEquals(
         datetime.datetime.fromtimestamp(1304208000000 / 1000.).date(),
         datetime.date(2011, 05, 01))
Exemplo n.º 43
0
def dump_topics(backupdir,
                eshost,
                _type,
                indices="topics-all"):
    conn = ES(eshost)
    out = file('/tmp/out.json','w')
    q = MatchAllQuery()
    q = q.search()

    resultset = conn.search(query=q,indices=indices, doc_types=[_type], scan=True)
    cnt=0
    if not resultset.total:
        sys.stderr.write("no data\n")
        return

    try:
        sys.stderr.write("Will write %d lines to %s\n"%(resultset.total, out.name))
        while True:
            r = resultset.next()
            r['_id'] = r._meta.id
            cnt+=1
            out.write(json.dumps(r)+'\n')
    except StopIteration:
        pass

    out.close()

    # gzip
    backup = os.path.join(backupdir,"topics.{}.json.gz".format(_type))

    f_in = open(out.name,'rb')
    f_out = gzip.open(backup,'wb')
    f_out.writelines(f_in)
    f_out.close()
    f_out.close()
    sys.stderr.write("Created %s\n"%backup)
Exemplo n.º 44
0
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}, {u"count": 1, u"term": u"bar"}])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q2, q4)
Exemplo n.º 45
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{u'count': 2, u'term': u'foo'}])
        self.assertEquals(resultset.facets.tag.terms, [{u'count': 2, u'term': u'foo'}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)
Exemplo n.º 46
0
 def test_facet_filter_is_serialized_correctly(self):
     query = MatchAllQuery().search(size=0)
     query.facet.add(TermFacet(field='topic', facet_filter=BoolFilter(must_not=TermQuery(field='reviewed', value=True))))
     serialized = query.serialize()
     self.assertTrue(serialized['facets']['topic']['facet_filter']['bool'])
Exemplo n.º 47
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter("tag", "foo"))
        q = q.search()
        q.facet.add_term_facet("tag")
        resultset = self.conn.search(query=q, indices=self.index_name, doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets["tag"]["terms"], [{u"count": 2, u"term": u"foo"}])
        self.assertEquals(resultset.facets.tag.terms, [{u"count": 2, u"term": u"foo"}])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter("tag", "foo"))
        q2 = q2.search()
        q2.facet.add_term_facet("tag")
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter("tag", "foo"))
        q3 = q3.search()
        q3.facet.add_term_facet("tag")
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter("tag", "foo"))
        q4 = q4.search()
        q4.facet.add_term_facet("bag")
        self.assertNotEquals(q3, q4)
Exemplo n.º 48
0
import json
from pyes import ES, Search
from pyes.aggs import TermsAgg, SumAgg, FilterAgg, DateHistogramAgg
from pyes.exceptions import IndexMissingException
from pyes.query import MatchAllQuery, BoolQuery, RangeQuery, ESRange, TermQuery
from pyes.filters import TermFilter, TermsFilter

match_all = MatchAllQuery()
sub_domain_agg = TermsAgg('domain_agg',
                          field='json_data.etp_domain_id',
                          size=20000)
client_agg = TermsAgg('client_agg',
                      field='json_data.etp_client_id',
                      sub_aggs=[sub_domain_agg],
                      size=20000)

search_query = Search(query=match_all, size=0)
search_query.agg.add(client_agg)

print(json.dumps(search_query.serialize(), indent=2))
Exemplo n.º 49
0
    def test_terms_facet(self):
        q = MatchAllQuery()
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 3)
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }, {
            u'count': 1,
            u'term': u'bar'
        }])

        q2 = MatchAllQuery()
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q2, q4)
Exemplo n.º 50
0
    def test_terms_facet_filter(self):
        q = MatchAllQuery()
        q = FilteredQuery(q, TermFilter('tag', 'foo'))
        q = q.search()
        q.facet.add_term_facet('tag')
        resultset = self.conn.search(query=q,
                                     indices=self.index_name,
                                     doc_types=[self.document_type])
        self.assertEquals(resultset.total, 2)
        self.assertEquals(resultset.facets['tag']['terms'], [{
            u'count': 2,
            u'term': u'foo'
        }])
        self.assertEquals(resultset.facets.tag.terms, [{
            u'count': 2,
            u'term': u'foo'
        }])

        q2 = MatchAllQuery()
        q2 = FilteredQuery(q2, TermFilter('tag', 'foo'))
        q2 = q2.search()
        q2.facet.add_term_facet('tag')
        q3 = MatchAllQuery()
        q3 = FilteredQuery(q3, TermFilter('tag', 'foo'))
        q3 = q3.search()
        q3.facet.add_term_facet('tag')
        self.assertEquals(q2, q3)

        q4 = MatchAllQuery()
        q4 = FilteredQuery(q4, TermFilter('tag', 'foo'))
        q4 = q4.search()
        q4.facet.add_term_facet('bag')
        self.assertNotEquals(q3, q4)