Пример #1
0
    def get_report(self, report_id, offset=0, limit=1000):
        """
        Extracts the contents of a report into a Pandas Data Frame

        :param report_id: Unique ID of the report you wish to extract information from
        :param offset: (optional) To extract all data from the report, use 0 (default)
        :param limit: (optional) Used to control data extract behavior on datasets with a large
        number of rows. The default is 1000. As an example, if the dataset has 50,000 rows,
        get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending
        on system resources, a higher limit (e.g. 10,000) may reduce the total time
        required to extract the entire dataset
        :return: Pandas Data Frame containing the report contents
        """

        # warning for future deprecation / replacement by Report class
        warnings.warn(
            "This method will be deprecated. The Report constructor is preferred.",
            DeprecationWarning)

        response = reports.report_instance(connection=self,
                                           report_id=report_id,
                                           offset=offset,
                                           limit=limit)

        if not response.ok:
            msg = "Error getting report contents."
            self.__response_handler(response=response, msg=msg)
        else:
            json_response = response.json()
            instance_id = json_response['instanceId']

            # Gets the pagination totals from the response object
            pagination = json_response['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if pagination['current'] != pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=json_response)]

                # Fetch add'l rows from this object instance from the intelligence server
                for _offset in range(limit, pagination['total'], limit):
                    response = reports.report_instance_id(
                        connection=self,
                        report_id=report_id,
                        instance_id=instance_id,
                        offset=_offset,
                        limit=limit)
                    table_data.append(parsejson(response=response.json()))
                return pd.concat(table_data)
            else:
                return parsejson(response=json_response)
Пример #2
0
    def to_dataframe(self, limit=25000, progress_bar=True):
        """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`.

        Args:
            limit (int, optional): Used to control data extract behavior on datasets with a large number of rows.
                The default is 25000. As an example, if the dataset has 50,000 rows, get_report() will incrementally
                extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000)
                may reduce the total time required to extract the entire dataset.
            progress_bar(bool, optional): If True (default), show the upload progress bar.

        Returns:
            Pandas Data Frame containing the report contents
        """
        inst_pbar = tqdm(
            desc='Connecting to MicroStrategy I-Server. Please wait...',
            bar_format='{desc}',
            leave=False,
            ncols=310)

        # Request a new instance, set instance id
        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      body=self._filter.filter_body(),
                                      offset=self.__OFFSET,
                                      limit=limit)
        inst_pbar.close()
        if not res.ok:
            msg = "Error getting report contents."
            self.__response_handler(response=res, msg=msg)
        else:
            _instance = res.json()
            _instance_id = _instance['instanceId']

            # Gets the pagination totals from the response object
            _pagination = _instance['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if _pagination['current'] != _pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=_instance)]

                # Count the number of iterations
                it_total = int(_pagination['total'] /
                               limit) + (_pagination['total'] % limit != 0)

                # Fetch add'l rows from this object instance from the intelligence server
                with tqdm(total=it_total,
                          disable=(not progress_bar)) as fetch_pbar:
                    if progress_bar:
                        fetch_pbar.update()
                        fetch_pbar.set_description("Downloading")
                        fetch_pbar.set_postfix(rows=limit)
                    for _offset in range(limit, _pagination['total'], limit):
                        if progress_bar:
                            fetch_pbar.update()
                            fetch_pbar.set_description("Downloading")
                            fetch_pbar.set_postfix(
                                rows=min(_offset +
                                         limit, _pagination['total']))
                        response = reports.report_instance_id(
                            connection=self._connection,
                            report_id=self._report_id,
                            instance_id=_instance_id,
                            offset=_offset,
                            limit=limit)
                        table_data.append(parsejson(response=response.json()))

                # concatenate and return the list of result data as a data frame
                self._dataframe = pd.concat(table_data).reset_index(drop=True)
            else:
                # otherwise parse the first result and return it as a dataframe
                self._dataframe = parsejson(response=_instance)

            return self._dataframe