示例#1
0
    def get_entries(self):
        '''Get all entries for a team + their filter from ES/MozDef'''
        teamfilter = self.config['teamsetup'][self.team]['filter']
        es = ES((self.config['mozdef']['proto'], self.config['mozdef']['host'], self.config['mozdef']['port']))

        # Default filter - time period
        try:
            td = self.config['es'][teamfilter]['_time_period']
        except KeyError:
            debug('No _time_period defined, defaulting to 24h')
            td = 24
        begindateUTC = toUTC(datetime.now() - timedelta(hours=td))
        enddateUTC= toUTC(datetime.now())
        print begindateUTC, enddateUTC
        fDate = pyes.RangeQuery(qrange=pyes.ESRange('utctimestamp', from_value=begindateUTC, to_value=enddateUTC))

        # Load team queries from our json config.
        # Lists are "should" unless an item is negated with "!" then it's must_not
        # Single items are "must"
        query = pyes.query.BoolQuery()
        query.add_must(pyes.QueryStringQuery('asset.autogroup: "{}"'.format(self.team)))
        for item in self.config['es'][teamfilter]:
            # items starting with '_' are internal/reserved, like _time_period
            if (item.startswith('_')):
                continue
            val = self.config['es'][teamfilter][item]
            if (type(val) == list):
                for v in val:
                    if (v.startswith("!")):
                        query.add_must_not(pyes.MatchQuery(item, v[1:]))
                    else:
                        query.add_should(pyes.MatchQuery(item, v))
            else:
                if (val.startswith("!")):
                    query.add_must_not(pyes.MatchQuery(item, val))
                else:
                    query.add_must(pyes.MatchQuery(item, val))


        q = pyes.ConstantScoreQuery(query)
        q = pyes.FilteredQuery(q, pyes.BoolFilter(must=[fDate]))

        results = es.search(query=q, indices=self.config['es']['index'])

        raw = results._search_raw(0, results.count())
        # This doesn't do much, but pyes has no "close()" or similar functionality.
        es.force_bulk()

        if (raw._shards.failed != 0):
            raise Exception("Some shards failed! {0}".format(raw._shards.__str__()))

        # Nobody cares for the metadata past this point (all the goodies are in '_source')
        data = []
        for i in raw.hits.hits:
            data += [i._source]
        return data
示例#2
0
        'boost': 1.0,
        'index': 'analyzed',
        'store': 'true',
        'type': u'string',
        "term_vector": "with_positions_offsets"
    },
    u'name': {
        'boost': 1.0,
        'index': 'analyzed',
        'store': 'true',
        'type': u'string',
        "term_vector": "with_positions_offsets"
    },
    u'age': {
        'store': 'true',
        'type': u'integer'
    },
}
conn.create_index("test-index")
conn.put_mapping("test-type", {'properties': mapping}, ["test-index"])

start = datetime.now()
for k, userdata in dataset.items():
    #    conn.index(userdata, "test-index", "test-type", k)
    conn.index(userdata, "test-index", "test-type", k, bulk=True)
conn.force_bulk()
end = datetime.now()

print "time:", end - start
dataset.close()
示例#3
0
dataset = shelve.open("samples.shelve")

mapping = {u'description': {'boost': 1.0,
                            'index': 'analyzed',
                            'store': 'yes',
                            'type': u'string',
                            "term_vector": "with_positions_offsets"
},
           u'name': {'boost': 1.0,
                     'index': 'analyzed',
                     'store': 'yes',
                     'type': u'string',
                     "term_vector": "with_positions_offsets"
           },
           u'age': {'store': 'yes',
                    'type': u'integer'},
           }
conn.create_index("test-index")
conn.put_mapping("test-type", {'properties': mapping}, ["test-index"])

start = datetime.now()
for k, userdata in dataset.items():
#    conn.index(userdata, "test-index", "test-type", k)
    conn.index(userdata, "test-index", "test-type", k, bulk=True)
conn.force_bulk()
end = datetime.now()

print "time:", end - start
dataset.close()