Exemplo n.º 1
0
    def _get_all_events_with_a_label(self, label, sketch):
        """Returns a DataFrame with events in a sketch with a certain label.

        Args:
            label (string): the label string to search for.
            sketch (timesketch.models.sketch.Sketch): a sketch object.

        Returns:
            pd.DataFrame: a pandas DataFrame with all the events in the
                datastore that have a label.
        """
        query_dsl = {
            "query": {
                "nested": {
                    "path": "timesketch_label",
                    "query": {
                        "bool": {
                            "must": [
                                {"term": {"timesketch_label.name": label}},
                                {"term": {"timesketch_label.sketch_id": sketch.id}},
                            ]
                        }
                    },
                }
            }
        }
        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string="",
            query_filter=self.DEFAULT_QUERY_FILTER,
            query_dsl=json.dumps(query_dsl),
            indices=self.sketch_indices,
        )

        return export.query_results_to_dataframe(result, sketch)
Exemplo n.º 2
0
    def _get_all_events_with_a_label(self, label, sketch):
        """Returns a DataFrame with events in a sketch with a certain label.

        Args:
            label (string): the label string to search for.
            sketch (timesketch.models.sketch.Sketch): a sketch object.

        Returns:
            pd.DataFrame: a pandas DataFrame with all the events in the
                datastore that have a label.
        """
        query_dsl = {
            'query': {
                'nested': {
                    'path': 'timesketch_label',
                    'query': {
                        'bool': {
                            'must': [{
                                'term': {
                                    'timesketch_label.name': label
                                }
                            }, {
                                'term': {
                                    'timesketch_label.sketch_id': sketch.id
                                }
                            }]
                        }
                    }
                }
            }
        }
        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string='',
            query_filter=self.DEFAULT_QUERY_FILTER,
            query_dsl=json.dumps(query_dsl),
            indices=self.sketch_indices)

        return export.query_results_to_dataframe(result, sketch)
Exemplo n.º 3
0
    def _export_view(self, view, sketch, zip_file):
        """Export a view from a sketch and write it to a ZIP file.

        Args:
            view (timesketch.models.sketch.View): a View object.
            sketch (timesketch.models.sketch.Sketch): a sketch object.
            zip_file (ZipFile): a zip file handle that can be used to write
                content to.
        """
        name = "{0:04d}_{1:s}".format(view.id, view.name)

        query_filter = json.loads(view.query_filter)
        if not query_filter:
            query_filter = self.DEFAULT_QUERY_FILTER

        indices = query_filter.get("indices", self.sketch_indices)
        if not indices or "_all" in indices:
            indices = self.sketch_indices

        # Ignoring the size limits in views to reduce the amount of queries
        # needed to get all the data.
        query_filter["terminate_after"] = 10000
        query_filter["size"] = 10000

        query_dsl = view.query_dsl
        if query_dsl:
            query_dict = json.loads(query_dsl)
            if not query_dict:
                query_dsl = None

        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string=view.query_string,
            query_filter=query_filter,
            query_dsl=query_dsl,
            enable_scroll=True,
            indices=indices,
        )

        scroll_id = result.get("_scroll_id", "")
        if scroll_id:
            data_frame = export.query_results_to_dataframe(result, sketch)

            total_count = result.get("hits", {}).get("total", {}).get("value", 0)

            if isinstance(total_count, str):
                try:
                    total_count = int(total_count, 10)
                except ValueError:
                    total_count = 0

            event_count = len(result["hits"]["hits"])

            while event_count < total_count:
                # pylint: disable=unexpected-keyword-arg
                result = self.datastore.client.scroll(scroll_id=scroll_id, scroll="1m")
                event_count += len(result["hits"]["hits"])
                add_frame = export.query_results_to_dataframe(result, sketch)
                if add_frame.shape[0]:
                    data_frame = pd.concat([data_frame, add_frame], sort=False)
                else:
                    logger.warning(
                        "Data Frame returned from a search operation was "
                        "empty, count {0:d} out of {1:d} total. Query is: "
                        '"{2:s}"'.format(
                            event_count, total_count, view.query_string or query_dsl
                        )
                    )

            fh = io.StringIO()
            data_frame.to_csv(fh, index=False)
            fh.seek(0)
        else:
            fh = export.query_results_to_filehandle(result, sketch)

        zip_file.writestr("views/{0:s}.csv".format(name), data=fh.read())

        if not view.user:
            username = "******"
        else:
            username = view.user.username
        meta = {
            "name": view.name,
            "view_id": view.id,
            "description": view.description,
            "query_string": view.query_string,
            "query_filter": view.query_filter,
            "query_dsl": view.query_dsl,
            "username": username,
            "sketch_id": view.sketch_id,
        }
        zip_file.writestr("views/{0:s}.meta".format(name), data=json.dumps(meta))
Exemplo n.º 4
0
    def _export_view(self, view, sketch, zip_file):
        """Export a view from a sketch and write it to a ZIP file.

        Args:
            view (timesketch.models.sketch.View): a View object.
            sketch (timesketch.models.sketch.Sketch): a sketch object.
            zip_file (ZipFile): a zip file handle that can be used to write
                content to.
        """
        name = '{0:04d}_{1:s}'.format(view.id, view.name)

        query_filter = json.loads(view.query_filter)
        if not query_filter:
            query_filter = self.DEFAULT_QUERY_FILTER

        indices = query_filter.get('indices', self.sketch_indices)
        if not indices or '_all' in indices:
            indices = self.sketch_indices

        # Ignoring the size limits in views to reduce the amount of queries
        # needed to get all the data.
        query_filter['terminate_after'] = 10000
        query_filter['size'] = 10000

        query_dsl = view.query_dsl
        if query_dsl:
            query_dict = json.loads(query_dsl)
            if not query_dict:
                query_dsl = None

        result = self.datastore.search(
            sketch_id=sketch.id,
            query_string=view.query_string,
            query_filter=query_filter,
            query_dsl=query_dsl,
            enable_scroll=True,
            indices=indices)

        scroll_id = result.get('_scroll_id', '')
        if scroll_id:
            data_frame = export.query_results_to_dataframe(result, sketch)

            total_count = result.get(
                'hits', {}).get('total', {}).get('value', 0)

            if isinstance(total_count, str):
                try:
                    total_count = int(total_count, 10)
                except ValueError:
                    total_count = 0

            event_count = len(result['hits']['hits'])

            while event_count < total_count:
                # pylint: disable=unexpected-keyword-arg
                result = self.datastore.client.scroll(
                    scroll_id=scroll_id, scroll='1m')
                event_count += len(result['hits']['hits'])
                add_frame = export.query_results_to_dataframe(result, sketch)
                if add_frame.shape[0]:
                    data_frame = pd.concat([data_frame, add_frame], sort=False)
                else:
                    logger.warning(
                        'Data Frame returned from a search operation was '
                        'empty, count {0:d} out of {1:d} total. Query is: '
                        '"{2:s}"'.format(
                            event_count, total_count,
                            view.query_string or query_dsl))

            fh = io.StringIO()
            data_frame.to_csv(fh, index=False)
            fh.seek(0)
        else:
            fh = export.query_results_to_filehandle(result, sketch)

        zip_file.writestr(
            'views/{0:s}.csv'.format(name), data=fh.read())

        if not view.user:
            username = '******'
        else:
            username = view.user.username
        meta = {
            'name': view.name,
            'view_id': view.id,
            'description': view.description,
            'query_string': view.query_string,
            'query_filter': view.query_filter,
            'query_dsl': view.query_dsl,
            'username': username,
            'sketch_id': view.sketch_id,
        }
        zip_file.writestr(
            'views/{0:s}.meta'.format(name), data=json.dumps(meta))