Exemplo n.º 1
0
    def __initialize_report(self, limit: int) -> requests.Response:
        inst_pbar = tqdm(
            desc='Initializing an instance of a report. Please wait...',
            bar_format='{desc}',
            leave=False,
            ncols=285,
            disable=(not self.progress_bar))

        # Switch off subtotals if I-Server version is higher than 11.2.1
        body = self.__filter._filter_body()
        if version.parse(self._connection.iserver_version) >= version.parse(
                "11.2.0100"):
            self._subtotals["visible"] = False
            body["subtotals"] = {"visible": self._subtotals["visible"]}

        # Request a new instance, set instance id
        response = reports.report_instance(
            connection=self._connection,
            report_id=self._report_id,
            body=body,
            offset=0,
            limit=self._initial_limit,
        )
        inst_pbar.close()
        return response
Exemplo n.º 2
0
    def __definition(self):
        """Get the definition of a report, including attributes and metrics. Implements GET /reports/<report_id>"""

        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      limit=0)

        if not res.ok:
            self.__response_handler(
                response=res,
                msg="Error getting report definition. Check report ID.")
            return None
        else:
            _definition = res.json()
            self._name = _definition["name"]
            full_attributes = _definition["result"]["definition"]["attributes"]
            full_metrics = _definition["result"]["definition"]["metrics"]
            self._attributes = [{
                'name': attr['name'],
                'id': attr['id']
            } for attr in full_attributes]
            self._metrics = [{
                'name': metr['name'],
                'id': metr['id']
            } for metr in full_metrics]
Exemplo n.º 3
0
    def test_report_instance(self, mock_post):

        conn = microstrategy.Connection(base_url=BASE_URL, username=USERNAME,
                                        password=PASSWORD, project_name=PROJECT_NAME)

        mock_post.return_value.status_code = 200

        response = reports.report_instance(conn, report_id=REPORT_ID, offset=OFFSET, limit=LIMIT)

        self.assertEqual(response.status_code, 200)
Exemplo n.º 4
0
    def get_report(self, report_id, offset=0, limit=1000):
        """
        Extracts the contents of a report into a Pandas Data Frame

        :param report_id: Unique ID of the report you wish to extract information from
        :param offset: (optional) To extract all data from the report, use 0 (default)
        :param limit: (optional) Used to control data extract behavior on datasets with a large
        number of rows. The default is 1000. As an example, if the dataset has 50,000 rows,
        get_report() will incrementally extract all 50,000 rows in 1,000 row chunks. Depending
        on system resources, a higher limit (e.g. 10,000) may reduce the total time
        required to extract the entire dataset
        :return: Pandas Data Frame containing the report contents
        """

        # warning for future deprecation / replacement by Report class
        warnings.warn(
            "This method will be deprecated. The Report constructor is preferred.",
            DeprecationWarning)

        response = reports.report_instance(connection=self,
                                           report_id=report_id,
                                           offset=offset,
                                           limit=limit)

        if not response.ok:
            msg = "Error getting report contents."
            self.__response_handler(response=response, msg=msg)
        else:
            json_response = response.json()
            instance_id = json_response['instanceId']

            # Gets the pagination totals from the response object
            pagination = json_response['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if pagination['current'] != pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=json_response)]

                # Fetch add'l rows from this object instance from the intelligence server
                for _offset in range(limit, pagination['total'], limit):
                    response = reports.report_instance_id(
                        connection=self,
                        report_id=report_id,
                        instance_id=instance_id,
                        offset=_offset,
                        limit=limit)
                    table_data.append(parsejson(response=response.json()))
                return pd.concat(table_data)
            else:
                return parsejson(response=json_response)
Exemplo n.º 5
0
    def generate_report_instance(self, filter=None):
        res = reports.report_instance(self.connection,
                                      report_id=self.report_id)
        self.instance_id = res.json()['instanceId']
        res_json = res.json()
        if filter == 'attr_el':
            for i, row in enumerate(res_json['definition']['grid']['rows']):
                row['elements'] = row['elements'][0:1]
                res_json['definition']['grid']['rows'][i] = row

            res_json['data']['headers']['rows'] = res_json['data']['headers']['rows'][0:1]
            res_json['data']['metricValues']['raw'] = res_json['data']['metricValues']['raw'][0:1]
            res_json['data']['metricValues']['formatted'] = res_json['data']['metricValues']['formatted'][0:1]
            res_json['data']['metricValues']['extras'] = res_json['data']['metricValues']['extras'][0:1]

            self.instance_id = res.json()['instanceId']
        return res_json
Exemplo n.º 6
0
    def to_dataframe(self, limit=25000, progress_bar=True):
        """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`.

        Args:
            limit (int, optional): Used to control data extract behavior on datasets with a large number of rows.
                The default is 25000. As an example, if the dataset has 50,000 rows, get_report() will incrementally
                extract all 50,000 rows in 1,000 row chunks. Depending on system resources, a higher limit (e.g. 10,000)
                may reduce the total time required to extract the entire dataset.
            progress_bar(bool, optional): If True (default), show the upload progress bar.

        Returns:
            Pandas Data Frame containing the report contents
        """
        inst_pbar = tqdm(
            desc='Connecting to MicroStrategy I-Server. Please wait...',
            bar_format='{desc}',
            leave=False,
            ncols=310)

        # Request a new instance, set instance id
        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      body=self._filter.filter_body(),
                                      offset=self.__OFFSET,
                                      limit=limit)
        inst_pbar.close()
        if not res.ok:
            msg = "Error getting report contents."
            self.__response_handler(response=res, msg=msg)
        else:
            _instance = res.json()
            _instance_id = _instance['instanceId']

            # Gets the pagination totals from the response object
            _pagination = _instance['result']['data']['paging']

            # If there are more rows to fetch, fetch them
            if _pagination['current'] != _pagination['total']:

                # initialize a list to capture slices from each query, and append the first request's result to the list
                table_data = [parsejson(response=_instance)]

                # Count the number of iterations
                it_total = int(_pagination['total'] /
                               limit) + (_pagination['total'] % limit != 0)

                # Fetch add'l rows from this object instance from the intelligence server
                with tqdm(total=it_total,
                          disable=(not progress_bar)) as fetch_pbar:
                    if progress_bar:
                        fetch_pbar.update()
                        fetch_pbar.set_description("Downloading")
                        fetch_pbar.set_postfix(rows=limit)
                    for _offset in range(limit, _pagination['total'], limit):
                        if progress_bar:
                            fetch_pbar.update()
                            fetch_pbar.set_description("Downloading")
                            fetch_pbar.set_postfix(
                                rows=min(_offset +
                                         limit, _pagination['total']))
                        response = reports.report_instance_id(
                            connection=self._connection,
                            report_id=self._report_id,
                            instance_id=_instance_id,
                            offset=_offset,
                            limit=limit)
                        table_data.append(parsejson(response=response.json()))

                # concatenate and return the list of result data as a data frame
                self._dataframe = pd.concat(table_data).reset_index(drop=True)
            else:
                # otherwise parse the first result and return it as a dataframe
                self._dataframe = parsejson(response=_instance)

            return self._dataframe
Exemplo n.º 7
0
    def to_dataframe(self, limit=None):
        """Extract contents of a report instance into a Pandas Data Frame. Formerly `microstrategy.Connection.get_report()`.

        Args:
            limit (None or int, optional): Used to control data extract behavior. By default (None)
                the limit is calculated automatically, based on an optimized physical size of one
                chunk. Setting limit manually will force the number of rows per chunk. Depending on
                system resources, a higher limit (e.g. 50,000) may reduce the total time required
                to extract the entire dataset.

        Returns:
            Pandas Data Frame containing the report contents
        """
        inst_pbar = tqdm(desc='Initializing an instance of a report. Please wait...',
                         bar_format='{desc}', leave=False, ncols=285, disable=(not self.progress_bar))

        # Switch off subtotals if I-Server version is higher than 11.2.1
        body = self._filter.filter_body()
        if version.parse(self._connection.iserver_version) >= version.parse("11.2.0100"):
            self._subtotals["visible"] = False
            body["subtotals"] = {"visible": self._subtotals["visible"]}

        if limit:
            self._initial_limit = limit
        # Request a new instance, set instance id
        res = reports.report_instance(connection=self._connection,
                                      report_id=self._report_id,
                                      body=body,
                                      offset=self.__OFFSET,
                                      limit=self._initial_limit)
        inst_pbar.close()

        # Gets the pagination totals from the response object
        _instance = res.json()
        _instance_id = _instance['instanceId']
        _pagination = _instance['data']['paging']

        # initialize parser and process first response
        p = Parser(response=_instance, parse_cube=False)
        p.parse(response=_instance)

        # If there are more rows to fetch, fetch them
        if _pagination['current'] != _pagination['total']:
            if not limit:
                limit = max(1000, int((self._initial_limit * self._size_limit) / len(res.content)))
            it_total = int((_pagination['total']-self._initial_limit)/limit) + ((_pagination['total']-self._initial_limit) % limit != 0)

            if self.parallel and it_total > 1:
                threads = helper.get_parallel_number(it_total)
                with FuturesSession(executor=ThreadPoolExecutor(max_workers=threads)) as session:
                    fetch_pbar = tqdm(desc="Downloading", total=it_total+1, disable=(not self.progress_bar))
                    future = self.__fetch_chunks_future(session, _pagination, _instance_id, limit)
                    fetch_pbar.update()
                    for i, f in enumerate(future, start=1):
                        response = f.result()
                        if not response.ok:
                            current_offset = self._initial_limit+(i-1)*limit
                            response = reports.report_instance_id(connection=self._connection,
                                                                  report_id=self._report_id, instance_id=_instance_id,
                                                                  offset=current_offset, limit=limit)
                        fetch_pbar.update()
                        fetch_pbar.set_postfix(rows=str(min(self._initial_limit+i*limit, _pagination['total'])))
                        p.parse(response.json())
                    fetch_pbar.close()
            else:
                self.__fetch_chunks(p, _pagination, it_total, _instance_id, limit)

        # return parsed data as a data frame
        self._dataframe = p.dataframe

        # filter received dataframe if report had crosstabs and filters were applied
        if self.cross_tab_filter != {}:
            if self.cross_tab_filter['metrics'] is not None:
                # drop metrics columns from dataframe
                metr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['metrics'],
                              self._metrics))]
                self._dataframe = self._dataframe.drop(metr_names, axis=1)

            if self.cross_tab_filter['attr_elements'] is not None:
                # create dict of attributes and elements to iterate through
                attr_dict = {}
                for attribute in self.cross_tab_filter['attr_elements']:
                    key = attribute[:32]
                    attr_dict.setdefault(key, []).append(attribute[33:])
                # initialize indexes series for filter
                indexes = pd.Series([False] * len(self._dataframe))

                # logical OR for filtered attribute elements
                for attribute in attr_dict:
                    attr_name = list(filter(lambda x: x['id'] in attribute, self._attributes))[0]['name']
                    elements = attr_dict[attribute]
                    indexes = indexes | self._dataframe[attr_name].isin(elements)
                # select datframe indexes with 
                self._dataframe = self._dataframe[indexes]

            if self.cross_tab_filter['attributes'] is not None:
                attr_names = [el['name'] for el in list(filter(lambda x: x['id'] not in self.cross_tab_filter['attributes'],
                              self._attributes))]
                self._dataframe = self._dataframe.drop(attr_names, axis=1)

        return self._dataframe