def _get_all_events_with_a_label(self, label, sketch): """Returns a DataFrame with events in a sketch with a certain label. Args: label (string): the label string to search for. sketch (timesketch.models.sketch.Sketch): a sketch object. Returns: pd.DataFrame: a pandas DataFrame with all the events in the datastore that have a label. """ query_dsl = { "query": { "nested": { "path": "timesketch_label", "query": { "bool": { "must": [ {"term": {"timesketch_label.name": label}}, {"term": {"timesketch_label.sketch_id": sketch.id}}, ] } }, } } } result = self.datastore.search( sketch_id=sketch.id, query_string="", query_filter=self.DEFAULT_QUERY_FILTER, query_dsl=json.dumps(query_dsl), indices=self.sketch_indices, ) return export.query_results_to_dataframe(result, sketch)
def _get_all_events_with_a_label(self, label, sketch): """Returns a DataFrame with events in a sketch with a certain label. Args: label (string): the label string to search for. sketch (timesketch.models.sketch.Sketch): a sketch object. Returns: pd.DataFrame: a pandas DataFrame with all the events in the datastore that have a label. """ query_dsl = { 'query': { 'nested': { 'path': 'timesketch_label', 'query': { 'bool': { 'must': [{ 'term': { 'timesketch_label.name': label } }, { 'term': { 'timesketch_label.sketch_id': sketch.id } }] } } } } } result = self.datastore.search( sketch_id=sketch.id, query_string='', query_filter=self.DEFAULT_QUERY_FILTER, query_dsl=json.dumps(query_dsl), indices=self.sketch_indices) return export.query_results_to_dataframe(result, sketch)
def _export_view(self, view, sketch, zip_file): """Export a view from a sketch and write it to a ZIP file. Args: view (timesketch.models.sketch.View): a View object. sketch (timesketch.models.sketch.Sketch): a sketch object. zip_file (ZipFile): a zip file handle that can be used to write content to. """ name = "{0:04d}_{1:s}".format(view.id, view.name) query_filter = json.loads(view.query_filter) if not query_filter: query_filter = self.DEFAULT_QUERY_FILTER indices = query_filter.get("indices", self.sketch_indices) if not indices or "_all" in indices: indices = self.sketch_indices # Ignoring the size limits in views to reduce the amount of queries # needed to get all the data. query_filter["terminate_after"] = 10000 query_filter["size"] = 10000 query_dsl = view.query_dsl if query_dsl: query_dict = json.loads(query_dsl) if not query_dict: query_dsl = None result = self.datastore.search( sketch_id=sketch.id, query_string=view.query_string, query_filter=query_filter, query_dsl=query_dsl, enable_scroll=True, indices=indices, ) scroll_id = result.get("_scroll_id", "") if scroll_id: data_frame = export.query_results_to_dataframe(result, sketch) total_count = result.get("hits", {}).get("total", {}).get("value", 0) if isinstance(total_count, str): try: total_count = int(total_count, 10) except ValueError: total_count = 0 event_count = len(result["hits"]["hits"]) while event_count < total_count: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll(scroll_id=scroll_id, scroll="1m") event_count += len(result["hits"]["hits"]) add_frame = export.query_results_to_dataframe(result, sketch) if add_frame.shape[0]: data_frame = pd.concat([data_frame, add_frame], sort=False) else: logger.warning( "Data Frame returned from a search operation was " "empty, count {0:d} out of {1:d} total. Query is: " '"{2:s}"'.format( event_count, total_count, view.query_string or query_dsl ) ) fh = io.StringIO() data_frame.to_csv(fh, index=False) fh.seek(0) else: fh = export.query_results_to_filehandle(result, sketch) zip_file.writestr("views/{0:s}.csv".format(name), data=fh.read()) if not view.user: username = "******" else: username = view.user.username meta = { "name": view.name, "view_id": view.id, "description": view.description, "query_string": view.query_string, "query_filter": view.query_filter, "query_dsl": view.query_dsl, "username": username, "sketch_id": view.sketch_id, } zip_file.writestr("views/{0:s}.meta".format(name), data=json.dumps(meta))
def _export_view(self, view, sketch, zip_file): """Export a view from a sketch and write it to a ZIP file. Args: view (timesketch.models.sketch.View): a View object. sketch (timesketch.models.sketch.Sketch): a sketch object. zip_file (ZipFile): a zip file handle that can be used to write content to. """ name = '{0:04d}_{1:s}'.format(view.id, view.name) query_filter = json.loads(view.query_filter) if not query_filter: query_filter = self.DEFAULT_QUERY_FILTER indices = query_filter.get('indices', self.sketch_indices) if not indices or '_all' in indices: indices = self.sketch_indices # Ignoring the size limits in views to reduce the amount of queries # needed to get all the data. query_filter['terminate_after'] = 10000 query_filter['size'] = 10000 query_dsl = view.query_dsl if query_dsl: query_dict = json.loads(query_dsl) if not query_dict: query_dsl = None result = self.datastore.search( sketch_id=sketch.id, query_string=view.query_string, query_filter=query_filter, query_dsl=query_dsl, enable_scroll=True, indices=indices) scroll_id = result.get('_scroll_id', '') if scroll_id: data_frame = export.query_results_to_dataframe(result, sketch) total_count = result.get( 'hits', {}).get('total', {}).get('value', 0) if isinstance(total_count, str): try: total_count = int(total_count, 10) except ValueError: total_count = 0 event_count = len(result['hits']['hits']) while event_count < total_count: # pylint: disable=unexpected-keyword-arg result = self.datastore.client.scroll( scroll_id=scroll_id, scroll='1m') event_count += len(result['hits']['hits']) add_frame = export.query_results_to_dataframe(result, sketch) if add_frame.shape[0]: data_frame = pd.concat([data_frame, add_frame], sort=False) else: logger.warning( 'Data Frame returned from a search operation was ' 'empty, count {0:d} out of {1:d} total. Query is: ' '"{2:s}"'.format( event_count, total_count, view.query_string or query_dsl)) fh = io.StringIO() data_frame.to_csv(fh, index=False) fh.seek(0) else: fh = export.query_results_to_filehandle(result, sketch) zip_file.writestr( 'views/{0:s}.csv'.format(name), data=fh.read()) if not view.user: username = '******' else: username = view.user.username meta = { 'name': view.name, 'view_id': view.id, 'description': view.description, 'query_string': view.query_string, 'query_filter': view.query_filter, 'query_dsl': view.query_dsl, 'username': username, 'sketch_id': view.sketch_id, } zip_file.writestr( 'views/{0:s}.meta'.format(name), data=json.dumps(meta))