def get_report(self, report_id, offset=0, limit=1000): """ Extracts the contents of a report into a Pandas Data Frame :param report_id: Unique ID of the report you wish to extract information from :param offset: (optional) To extract all data from the report, use 0 (default) :param limit: (optional) Used to control data extract behavior on datasets with a large number of rows. The default is 1000. As an example, if the dataset has 50,000 rows, get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset :return: Pandas Data Frame containing the report contents """ # warning for future deprecation / replacement by Report class warnings.warn( "This method will be deprecated. The Report constructor is preferred.", DeprecationWarning) response = reports.report_instance(connection=self, report_id=report_id, offset=offset, limit=limit) if not response.ok: msg = "Error getting report contents." self.__response_handler(response=response, msg=msg) else: json_response = response.json() instance_id = json_response['instanceId'] # Gets the pagination totals from the response object pagination = json_response['result']['data']['paging'] # If there are more rows to fetch, fetch them if pagination['current'] != pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=json_response)] # Fetch add'l rows from this object instance from the intelligence server for _offset in range(limit, pagination['total'], limit): response = reports.report_instance_id( connection=self, report_id=report_id, instance_id=instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) return pd.concat(table_data) else: return parsejson(response=json_response)
def to_dataframe(self, limit=25000, progress_bar=True): """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`. Args: limit (int, optional): Used to control data extract behavior on datasets with a large number of rows. The default is 25000. As an example, if the dataset has 50,000 rows, get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000) may reduce the total time required to extract the entire dataset. progress_bar(bool, optional): If True (default), show the upload progress bar. Returns: Pandas Data Frame containing the report contents """ inst_pbar = tqdm( desc='Connecting to MicroStrategy I-Server. Please wait...', bar_format='{desc}', leave=False, ncols=310) # Request a new instance, set instance id res = reports.report_instance(connection=self._connection, report_id=self._report_id, body=self._filter.filter_body(), offset=self.__OFFSET, limit=limit) inst_pbar.close() if not res.ok: msg = "Error getting report contents." self.__response_handler(response=res, msg=msg) else: _instance = res.json() _instance_id = _instance['instanceId'] # Gets the pagination totals from the response object _pagination = _instance['result']['data']['paging'] # If there are more rows to fetch, fetch them if _pagination['current'] != _pagination['total']: # initialize a list to capture slices from each query, and append the first request's result to the list table_data = [parsejson(response=_instance)] # Count the number of iterations it_total = int(_pagination['total'] / limit) + (_pagination['total'] % limit != 0) # Fetch add'l rows from this object instance from the intelligence server with tqdm(total=it_total, disable=(not progress_bar)) as fetch_pbar: if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix(rows=limit) for _offset in range(limit, _pagination['total'], limit): if progress_bar: fetch_pbar.update() fetch_pbar.set_description("Downloading") fetch_pbar.set_postfix( rows=min(_offset + limit, _pagination['total'])) response = reports.report_instance_id( connection=self._connection, report_id=self._report_id, instance_id=_instance_id, offset=_offset, limit=limit) table_data.append(parsejson(response=response.json())) # concatenate and return the list of result data as a data frame self._dataframe = pd.concat(table_data).reset_index(drop=True) else: # otherwise parse the first result and return it as a dataframe self._dataframe = parsejson(response=_instance) return self._dataframe