def get_entries(self): '''Get all entries for a team + their filter from ES/MozDef''' teamfilter = self.config['teamsetup'][self.team]['filter'] es = ES((self.config['mozdef']['proto'], self.config['mozdef']['host'], self.config['mozdef']['port'])) # Default filter - time period try: td = self.config['es'][teamfilter]['_time_period'] except KeyError: debug('No _time_period defined, defaulting to 24h') td = 24 begindateUTC = toUTC(datetime.now() - timedelta(hours=td)) enddateUTC= toUTC(datetime.now()) print begindateUTC, enddateUTC fDate = pyes.RangeQuery(qrange=pyes.ESRange('utctimestamp', from_value=begindateUTC, to_value=enddateUTC)) # Load team queries from our json config. # Lists are "should" unless an item is negated with "!" then it's must_not # Single items are "must" query = pyes.query.BoolQuery() query.add_must(pyes.QueryStringQuery('asset.autogroup: "{}"'.format(self.team))) for item in self.config['es'][teamfilter]: # items starting with '_' are internal/reserved, like _time_period if (item.startswith('_')): continue val = self.config['es'][teamfilter][item] if (type(val) == list): for v in val: if (v.startswith("!")): query.add_must_not(pyes.MatchQuery(item, v[1:])) else: query.add_should(pyes.MatchQuery(item, v)) else: if (val.startswith("!")): query.add_must_not(pyes.MatchQuery(item, val)) else: query.add_must(pyes.MatchQuery(item, val)) q = pyes.ConstantScoreQuery(query) q = pyes.FilteredQuery(q, pyes.BoolFilter(must=[fDate])) results = es.search(query=q, indices=self.config['es']['index']) raw = results._search_raw(0, results.count()) # This doesn't do much, but pyes has no "close()" or similar functionality. es.force_bulk() if (raw._shards.failed != 0): raise Exception("Some shards failed! {0}".format(raw._shards.__str__())) # Nobody cares for the metadata past this point (all the goodies are in '_source') data = [] for i in raw.hits.hits: data += [i._source] return data
'boost': 1.0, 'index': 'analyzed', 'store': 'true', 'type': u'string', "term_vector": "with_positions_offsets" }, u'name': { 'boost': 1.0, 'index': 'analyzed', 'store': 'true', 'type': u'string', "term_vector": "with_positions_offsets" }, u'age': { 'store': 'true', 'type': u'integer' }, } conn.create_index("test-index") conn.put_mapping("test-type", {'properties': mapping}, ["test-index"]) start = datetime.now() for k, userdata in dataset.items(): # conn.index(userdata, "test-index", "test-type", k) conn.index(userdata, "test-index", "test-type", k, bulk=True) conn.force_bulk() end = datetime.now() print "time:", end - start dataset.close()
dataset = shelve.open("samples.shelve") mapping = {u'description': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector": "with_positions_offsets" }, u'name': {'boost': 1.0, 'index': 'analyzed', 'store': 'yes', 'type': u'string', "term_vector": "with_positions_offsets" }, u'age': {'store': 'yes', 'type': u'integer'}, } conn.create_index("test-index") conn.put_mapping("test-type", {'properties': mapping}, ["test-index"]) start = datetime.now() for k, userdata in dataset.items(): # conn.index(userdata, "test-index", "test-type", k) conn.index(userdata, "test-index", "test-type", k, bulk=True) conn.force_bulk() end = datetime.now() print "time:", end - start dataset.close()