Exemplo n.º 1
0
    def get_xml(self, save_as=None, as_string=False) -> XMLElement:
        """
        Provides full data (all pages) in XML.

        Parameters
        ----------
        save_as: Union[str, None]
            If defined, the results will (also) be saved as a
            file. [Default: ``None``]

            The value must be a path to a file with the correct
            extension -- i.e. ``.xml`` for XML).

        as_string: bool
            .. versionadded:: 1.1.4

            If ``False`` (default), returns an ``ElementTree``
            object. Otherwise, returns the data as an XML string.

        Returns
        -------
        xml.etree.ElementTree.Element

        Examples
        --------
        >>> from xml.etree.ElementTree import tostring
        >>> filters = ["areaType=region"]
        >>> structure = {
        ...     "name": "areaName",
        ...     "newCases": "newCasesBySpecimenDate"
        ... }
        >>> data = Cov19API(
        ...     filters=filters,
        ...     structure=structure,
        ...     latest_by='newCasesBySpecimenDate'
        ... )
        >>> result_xml = data.get_xml()
        >>> result_str = tostring(result_xml, encoding='unicode', method='xml')
        >>> print(result_str)
        <document>
            <data>
                <name>East Midlands</name>
                <newCases>0</newCases>
            </data>
            ...
        </document>
        """
        from xml.etree.ElementTree import SubElement, fromstring

        resp = XMLElement("document")

        for response in self._get(DataFormat.XML):
            decoded_content = response.content.decode()

            # Parsing the XML:
            parsed_data = fromstring(decoded_content)

            # Extracting "data" elements from the tree:
            page_data = parsed_data.findall(".//data")

            resp.extend(page_data)

        extras = {
            "lastUpdate": self.last_update,
            "length": len(resp.findall(".//data")),
            "totalPages": self._total_pages
        }

        for elm_name, value in extras.items():
            elm = SubElement(resp, elm_name)
            elm.text = str(value)

        if save_as is None and not as_string:
            return resp

        from xml.etree.ElementTree import tostring

        str_data = tostring(resp, encoding='unicode', method='xml')

        if as_string:
            return str_data

        save_data(str_data, save_as, DataFormat.XML)

        return resp
Exemplo n.º 2
0
    def get_csv(self, save_as=None) -> str:
        """
        Provides full data (all pages) in CSV.

        .. warning::

            Please make sure that the ``structure`` is not hierarchical as
            CSV outputs are defined as 2D tables and as such, do not support
            hierarchies.

        Parameters
        ----------
        save_as: Union[str, None]
            If defined, the results will (also) be saved as a
            file. [Default: ``None``]

            The value must be a path to a file with the correct
            extension -- i.e. ``.csv`` for CSV).

        Returns
        -------
        str

        Raises
        ------
        ValueError
            If the structure is nested.

        Examples
        --------
        >>> filters = ["areaType=region"]
        >>> structure = {
        ...     "name": "areaName",
        ...     "newCases": "newCasesBySpecimenDate"
        ... }
        >>> data = Cov19API(
        ...     filters=filters,
        ...     structure=structure,
        ...     latest_by='newCasesBySpecimenDate'
        ... )
        >>> result = data.get_csv()
        >>> print(result)
        name,newCases
        East Midlands,0
        ...
        """
        # Checks to ensure that the structure is
        # not hierarchical.
        if isinstance(self.structure, dict):
            non_str = filter(lambda val: not isinstance(val, str),
                             self.structure.values())

            if list(non_str):
                struct = dumps(self.structure, indent=4)
                raise ValueError(
                    "CSV structure cannot be nested. Received:\n%s" % struct)

        linebreak = "\n"
        resp = str()

        for page_num, response in enumerate(self._get(DataFormat.CSV),
                                            start=1):
            decoded_content = response.content.decode()

            # Removing CSV header (column names) where page
            # number is greater than 1.
            if page_num > 1:
                data_lines = decoded_content.split(linebreak)[1:]
                decoded_content = str.join(linebreak, data_lines)

            resp += decoded_content.strip() + linebreak

        if save_as is None:
            return resp

        save_data(resp, save_as, DataFormat.CSV)

        return resp
Exemplo n.º 3
0
    def get_json(self,
                 save_as: Union[str, None] = None,
                 as_string: bool = False) -> Union[dict, str]:
        """
        Provides full data (all pages) in JSON.

        Parameters
        ----------
        save_as: Union[str, None]
            If defined, the results will (also) be saved as a
            file. [Default: ``None``]

            The value must be a path to a file with the correct
            extension -- i.e. ``.json`` for JSON).

        as_string: bool
            .. versionadded:: 1.1.4

            If ``False`` (default), returns the data as a dictionary.
            Otherwise, returns the data as a JSON string.

        Returns
        -------
        Union[Dict, str]

        Examples
        --------
        >>> filters = ["areaType=region"]
        >>> structure = {
        ...     "name": "areaName",
        ...     "newCases": "newCasesBySpecimenDate"
        ... }
        >>> data = Cov19API(
        ...     filters=filters,
        ...     structure=structure,
        ...     latest_by='newCasesBySpecimenDate'
        ... )
        >>> result = data.get_json()
        >>> print(result)
        {'data': [{'name': 'East Midlands', 'newCases': 0}, ... }
        """
        resp = {"data": list()}

        for response in self._get(DataFormat.JSON):
            current_data = response.json()
            page_data = current_data['data']

            resp["data"].extend(page_data)

        resp["lastUpdate"] = self.last_update
        resp["length"] = len(resp["data"])
        resp["totalPages"] = self._total_pages

        if as_string:
            return dumps(resp, separators=(",", ":"))

        if save_as is None:
            return resp

        data = dumps(resp, separators=(",", ":"))
        save_data(data, save_as, DataFormat.JSON)

        return resp