示例#1
0
def read_csv_data(input_data,
                  single_record = False,
                  line_terminator = '\r\n'):
    """Return the contents of ``input_data`` as a :class:`str <python:str>`.

    :param input_data: The CSV data to read.

      .. note::

        If ``input_data`` is Path-like, then the underlying file **must** start
        with a header row.

    :type input_data: Path-like or :class:`str <python:str>`

    :param single_record: If ``True``, will return only the first data record.
      If ``False``, will return all data records (including the header row if
      present). Defaults to ``False``.
    :type single_record: :class:`bool <python:bool>`

    :returns: ``input_data`` as a :class:`str <python:str>`
    :rtype: :class:`str <python:str>` or Path-like object

    """
    try:
        input_data = input_data.strip()
    except AttributeError:
        pass

    original_input_data = input_data

    if checkers.is_file(input_data) and not single_record:
        with open(input_data, 'r') as input_file:
            input_data = input_file.read()
    elif checkers.is_file(input_data) and single_record:
        input_data = linecache.getline(original_input_data, 2)
        if input_data == '':
            input_data = linecache.getline(original_input_data, 1)
        if input_data == '':
            input_data = None
    elif single_record:
        try:
            if line_terminator in input_data:
                parsed_data = input_data.split(line_terminator)
            elif line_terminator == '\r\n' and '\r' in input_data:
                parsed_data = input_data.split('\r')
            elif line_terminator == '\r\n' and '\n' in input_data:
                parsed_data = input_data.split('\n')
            else:
                parsed_data = [input_data]
        except TypeError:
            parsed_data = [input_data]

        if not parsed_data:
            input_data = None
        elif len(parsed_data) == 1:
            input_data = parsed_data[0]
        else:
            input_data = parsed_data[1]

    return input_data
def parse_json(input_data, deserialize_function=None, **kwargs):
    """De-serialize JSON data into a Python :class:`dict <python:dict>` object.

    :param input_data: The JSON data to de-serialize.
    :type input_data: :class:`str <python:str>`

    :param deserialize_function: Optionally override the default JSON deserializer.
      Defaults to :obj:`None <python:None>`, which calls the default
      :ref:`simplejson.loads() <simplejson:simplejson.loads>`
      function from the `simplejson <https://github.com/simplejson/simplejson>`_ library.

      .. note::

        Use the ``deserialize_function`` parameter to override the default
        YAML deserializer. A valid ``deserialize_function`` is expected to
        accept a single :class:`str <python:str>` and return a
        :class:`dict <python:dict>`, similar to
        :ref:`simplejson.loads() <simplejson:simplejson.loads>`

        If you wish to pass additional arguments to your ``deserialize_function``
        pass them as keyword arguments (in ``kwargs``).

    :type deserialize_function: callable / :obj:`None <python:None>`

    :param kwargs: Optional keyword parameters that are passed to the
      JSON deserializer function. By default, these are options which are passed
      to :ref:`simplejson.loads() <simplejson:simplejson.loads>`.
    :type kwargs: keyword arguments

    :returns: A :class:`dict <python:dict>` representation of ``input_data``.
    :rtype: :class:`dict <python:dict>`
    """
    is_file = False
    if checkers.is_file(input_data):
        is_file = True

    if deserialize_function is None and not is_file:
        deserialize_function = json.loads
    elif deserialize_function is None and is_file:
        deserialize_function = json.load
    else:
        if checkers.is_callable(deserialize_function) is False:
            raise ValueError('deserialize_function (%s) is not callable' %
                             deserialize_function)

    if not input_data:
        raise DeserializationError('input_data is empty')

    if not is_file:
        try:
            input_data = validators.string(input_data, allow_empty=False)
        except ValueError:
            raise DeserializationError('input_data is not a valid string')

        from_json = deserialize_function(input_data, **kwargs)
    else:
        with open(input_data, 'r') as input_file:
            from_json = deserialize_function(input_file, **kwargs)

    return from_json
示例#3
0
def load(source) -> etree._Element:  # pylint: disable=protected-access
    '''
    Load an XML document
    args:
        source: XML source. Either path, url, string, or loaded LXML Element
    returns:
        Loaded XML object tree, or None on invalid source
    '''
    if not isinstance(source, (str, bytes)) or len(source) < 1:
        # pylint: disable=protected-access
        return source if isinstance(source, etree._ElementTree) else None

    source = source.strip()
    if source[0] == ord('<'):  # Handle source as bytes
        source = io.BytesIO(source)
    elif source[0] == '<':  # Handle source as string
        source = io.StringIO(source)
    elif checkers.is_file(source):  # Handle source as local file
        pass  # etree.parse handles local file paths natively
    elif checkers.is_url(source):  # Handle source as URL
        response = requests.get(source, timeout=10)
        if not response:
            app.logger.warning(
                f"Failed to retrieve XML URL (or timed out): {source}")
            return None
        source = io.BytesIO(response.content)
    else:
        app.logger.warning(
            f"XML source is not valid file, URL, or XML string. {source[:40]}"
            + (len(source) > 40) * '...')
        return None

    return etree.parse(source)
示例#4
0
def main(payload, endpoint, processes, threads, samples, time_based):
    file_type = None
    if checkers.is_url(payload):
        if payload.lower().endswith(".json"):
            file_type = "json"
            payload_data = requests.get(payload).json()
        elif payload.lower().endswith(".jpg"):
            file_type = "jpg"
            payload_data = imageio.imread(payload)
    elif checkers.is_file(payload):
        if payload.lower().endswith(".json"):
            file_type = "json"
            with open(payload, "r") as f:
                payload_data = json.load(f)
        elif payload.lower().endswith(".jpg"):
            file_type = "jpg"
            payload_data = cv2.imread(payload, cv2.IMREAD_COLOR)
    else:
        print(f"'{payload}' isn't an URL resource, nor is it a local file")
        sys.exit(1)

    if file_type is None:
        print(f"'{payload}' doesn't point to a jpg image or to a json file")
        sys.exit(1)
    if file_type == "jpg":
        data = image_to_jpeg_bytes(payload_data)
    if file_type == "json":
        data = json.dumps(payload_data)

    print("Starting the inference throughput test...")
    results = []
    start = time.time()
    with concurrent.futures.ProcessPoolExecutor(
            max_workers=processes) as executor:
        results = executor_submitter(executor, processes, process_worker,
                                     threads, data, endpoint, samples,
                                     time_based)
    end = time.time()
    elapsed = end - start

    total_requests = sum(results)

    print(
        f"A total of {total_requests} requests have been served in {elapsed} seconds"
    )
    print(f"Avg number of inferences/sec is {total_requests / elapsed}")
    print(
        f"Avg time spent on an inference is {elapsed / total_requests} seconds"
    )
示例#5
0
def from_yaml(as_yaml: Union[str, 'PathLike[Any]', BytesIO],
              target: Optional[Union['PathLike[Any]', BytesIO]] = None,
              compress: bool = False,
              **kwargs):
    """Convert YAML data into an SPSS dataset.

    .. tip::

      If you pass any additional keyword arguments, those keyword arguments will be passed
      onto the :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.

    :param as_yaml: The YAML data that you wish to convert into an SPSS dataset.
    :type as_yaml: :class:`str <python:str>` / File-location /
      :class:`BytesIO <python:io.BytesIO>`

    :param target: The target to which the SPSS dataset should be written. Accepts either
      a filename/path, a :class:`BytesIO <python:io.BytesIO>` object, or
      :obj:`None <python:None>`. If :obj:`None <python:None>` will return a
      :class:`BytesIO <python:io.BytesIO>` object containing the SPSS dataset. Defaults to
      :obj:`None <python:None>`.
    :type target: Path-like / :class:`BytesIO <python:io.BytesIO>` /
      :obj:`None <python:None>`

    :param compress: If ``True``, will return data in the compressed ZSAV format. If
      ``False``, will return data in the standards SAV format. Defaults to ``False``.
    :type compress: :class:`bool <python:bool>`

    :param kwargs: Additional keyword arguments which will be passed onto the
      :meth:`DataFrame.from_dict() <pandas:pandas.DataFrame.from_dict>` method.
    :type kwargs: :class:`dict <python:dict>`

    :returns: A :class:`BytesIO <python:io.BytesIO>` object containing the SPSS data if
      ``target`` is :obj:`None <python:None>` or not a filename, otherwise
      :obj:`None <python:None>`
    :rtype: :class:`BytesIO <python:io.BytesIO>` or :obj:`None <python:None>`

    """
    if checkers.is_file(as_yaml) or checkers.is_bytesIO(as_yaml):
        file_path = as_yaml
        with open(file_path, 'rb') as yaml_file:
            as_dict = yaml.safe_load(yaml_file)
    else:
        as_yaml = validators.string(as_yaml, allow_empty=False)
        as_dict = yaml.safe_load(as_yaml)
        as_json = json.dumps(as_dict)

    return from_json(as_json, target=target, compress=compress, **kwargs)
示例#6
0
def check_input_file(input_directory, input_value):
    inputs = os.path.abspath(input_directory)
    if not os.path.exists(input_directory):
        raise AssertionError('input directory (%s) does not exist' % inputs)
    elif not os.path.isdir(input_directory):
        raise AssertionError('input directory (%s) is not a directory' %
                             inputs)

    try:
        input_file = os.path.join(input_directory, input_value)
    except (TypeError, AttributeError):
        input_file = None

    if input_file is not None and checkers.is_file(input_file):
        input_value = input_file

    return input_value
示例#7
0
def test_read_csv_data(input_files, input_data, single_record,
                       expected_result):
    inputs = os.path.abspath(input_files)
    if not os.path.exists(input_files):
        raise AssertionError('input directory (%s) does not exist' % inputs)
    elif not os.path.isdir(input_files):
        raise AssertionError('input directory (%s) is not a directory' %
                             inputs)

    input_file = os.path.join(input_files, input_data)

    if checkers.is_file(input_file):
        input_data = input_file

    result = read_csv_data(input_data, single_record=single_record)

    if result is None:
        assert result == expected_result
    else:
        assert result.strip() == expected_result.strip()
示例#8
0
    def from_csv(cls,
                 serialized,
                 tablename,
                 metadata,
                 primary_key,
                 column_kwargs=None,
                 skip_nested=True,
                 default_to_str=False,
                 type_mapping=None,
                 delimiter='|',
                 wrap_all_strings=False,
                 null_text='None',
                 wrapper_character="'",
                 double_wrapper_character_when_nested=False,
                 escape_character="\\",
                 line_terminator='\r\n',
                 **kwargs):
        """Generate a :class:`Table` object from a
        :term:`CSV <Comma-Separated Value (CSV)>` string.

        .. versionadded: 0.3.0

        :param serialized: The CSV data whose column headers will be treated as column
          names, while value data types will determine :term:`model attribute` data
          types.

          .. note::

            If a Path-like object, will read the file contents from a file that is assumed
            to include a header row. If a :class:`str <python:str>` and has more than
            one record (line), will assume the first line is a header row. If a
            :class:`list <python:list>`, will assume the first item is the header row.

        :type serialized: :class:`str <python:str>` / Path-like object /
          :class:`list <python:list>`

        :param tablename: The name of the SQL table to which the model corresponds.
        :type tablename: :class:`str <python:str>`

        :param metadata: a :class:`MetaData <sqlalchemy:sqlalchemy.schema.MetaData>`
          object which will contain this table. The metadata is used as a point of
          association of this table with other tables which are referenced via foreign
          key. It also may be used to associate this table with a particular
          :class:`Connectable <sqlalchemy:sqlalchemy.engine.Connectable>`.
        :type metadata: :class:`MetaData <sqlalchemy:sqlalchemy.schema.MetaData>`

        :param primary_key: The name of the column/key that should be used as the table's
          primary key.
        :type primary_key: :class:`str <python:str>`

        :param column_kwargs: An optional dictionary whose keys correspond to
          column/key, and whose values are themselves dictionaries with keyword
          arguments that will be passed ot the applicable :class:`Column`
          constructor. Defaults to :obj:`None <python:None>`.
        :type column_kwargs: :class:`dict <python:dict>` / :obj:`None <python:None>`

        :param skip_nested: If ``True`` then any keys in ``serialized`` that
          feature nested items (e.g. iterables, :class:`dict <python:dict>` objects,
          etc.) will be ignored. If ``False``, will treat nested items as
          :class:`str <python:str>`. Defaults to ``True``.
        :type skip_nested: :class:`bool <python:bool>`

        :param default_to_str: If ``True``, will automatically set a key/column whose
          value type cannot be determined to ``str``
          (:class:`Text <sqlalchemy:sqlalchemy.types.Text>`). If ``False``, will
          use the value type's ``__name__`` attribute and attempt to find a mapping.
          Defaults to ``False``.
        :type default_to_str: :class:`bool <python:bool>`

        :param type_mapping: Determines how value types in ``serialized`` map to
          SQL column data types. To add a new mapping or override a default, set a
          key to the name of the value type in Python, and set the value to a
          :doc:`SQLAlchemy Data Type <sqlalchemy:core/types>`. The following are the
          default mappings applied:

          .. list-table::
             :widths: 30 30
             :header-rows: 1

             * - Python Literal
               - SQL Column Type
             * - ``bool``
               - :class:`Boolean <sqlalchemy:sqlalchemy.types.Boolean>`
             * - ``str``
               - :class:`Text <sqlalchemy:sqlalchemy.types.Text>`
             * - ``int``
               - :class:`Integer <sqlalchemy:sqlalchemy.types.Integer>`
             * - ``float``
               - :class:`Float <sqlalchemy:sqlalchemy.types.Float>`
             * - ``date``
               - :class:`Date <sqlalchemy:sqlalchemy.types.Date>`
             * - ``datetime``
               - :class:`DateTime <sqlalchemy:sqlalchemy.types.DateTime>`
             * - ``time``
               - :class:`Time <sqlalchemy:sqlalchemy.types.Time>`

        :type type_mapping: :class:`dict <python:dict>` with type names as keys and
          column data types as values.

        :param delimiter: The delimiter used between columns. Defaults to ``|``.
        :type delimiter: :class:`str <python:str>`

        :param wrapper_character: The string used to wrap string values when
          wrapping is applied. Defaults to ``'``.
        :type wrapper_character: :class:`str <python:str>`

        :param null_text: The string used to indicate an empty value if empty
          values are wrapped. Defaults to `None`.
        :type null_text: :class:`str <python:str>`

        :param kwargs: Any additional keyword arguments will be passed to the
          :class:`Table` constructor. For a full list of options, please see
          :class:`sqlalchemy.schema.Table <sqlalchemy:sqlalchemy.schema.Table>`.

        :returns: A :class:`Table` object.
        :rtype: :class:`Table`

        :raises DeserializationError: if ``serialized`` is not a valid
          :class:`str <python:str>`
        :raises UnsupportedValueTypeError: when a value in ``serialized`` does not
          have a corresponding key in ``type_mapping``
        :raises ValueError: if ``tablename`` is empty
        :raises ValueError: if ``primary_key`` is empty
        :raises CSVStructureError: if there are less than 2 (two) rows in ``serialized``
          or if column headers are not valid Python variable names

        """
        # pylint: disable=line-too-long,invalid-name,too-many-arguments

        if not checkers.is_file(serialized):
            serialized = read_csv_data(serialized, single_record=False)

        from_csv = parse_csv(serialized,
                             delimiter=delimiter,
                             wrap_all_strings=wrap_all_strings,
                             null_text=null_text,
                             wrapper_character=wrapper_character,
                             double_wrapper_character_when_nested=
                             double_wrapper_character_when_nested,
                             escape_character=escape_character,
                             line_terminator=line_terminator)

        table = cls.from_dict(from_csv,
                              tablename,
                              metadata,
                              primary_key,
                              column_kwargs=column_kwargs,
                              skip_nested=skip_nested,
                              default_to_str=default_to_str,
                              type_mapping=type_mapping,
                              **kwargs)

        return table
def parse_csv(input_data,
              delimiter='|',
              wrap_all_strings=False,
              null_text='None',
              wrapper_character="'",
              double_wrapper_character_when_nested=False,
              escape_character="\\",
              line_terminator='\r\n'):
    """De-serialize CSV data into a Python :class:`dict <python:dict>` object.

    .. versionadded:: 0.3.0

    .. tip::

      Unwrapped empty column values are automatically interpreted as null
      (:obj:`None <python:None>`).

    :param input_data: The CSV data to de-serialize. Should include column headers
      and at least **one** row of data. Will ignore any rows of data beyond the
      first row.
    :type input_data: :class:`str <python:str>`

    :param delimiter: The delimiter used between columns. Defaults to ``|``.
    :type delimiter: :class:`str <python:str>`

    :param wrapper_character: The string used to wrap string values when
      wrapping is applied. Defaults to ``'``.
    :type wrapper_character: :class:`str <python:str>`

    :param null_text: The string used to indicate an empty value if empty
      values are wrapped. Defaults to `None`.
    :type null_text: :class:`str <python:str>`

    :returns: A :class:`dict <python:dict>` representation of the CSV record.
    :rtype: :class:`dict <python:dict>`

    :raises DeserializationError: if ``input_data`` is not a valid
      :class:`str <python:str>`
    :raises CSVStructureError: if there are less than 2 (two) rows in ``input_data``
      or if column headers are not valid Python variable names

    """
    use_file = False
    if not checkers.is_file(input_data) and not checkers.is_iterable(
            input_data):
        try:
            input_data = validators.string(input_data, allow_empty=False)
        except (ValueError, TypeError):
            raise DeserializationError("input_data expects a 'str', received '%s'" \
                                       % type(input_data))

        input_data = [input_data]
    elif checkers.is_file(input_data):
        use_file = True

    if not wrapper_character:
        wrapper_character = '\''

    if wrap_all_strings:
        quoting = csv.QUOTE_NONNUMERIC
    else:
        quoting = csv.QUOTE_MINIMAL

    if 'sqlathanor' in csv.list_dialects():
        csv.unregister_dialect('sqlathanor')

    csv.register_dialect('sqlathanor',
                         delimiter=delimiter,
                         doublequote=double_wrapper_character_when_nested,
                         escapechar=escape_character,
                         quotechar=wrapper_character,
                         quoting=quoting,
                         lineterminator=line_terminator)

    if not use_file:
        csv_reader = csv.DictReader(input_data,
                                    dialect='sqlathanor',
                                    restkey=None,
                                    restval=None)
        rows = [x for x in csv_reader]
    else:
        if not is_py2:
            with open(input_data, 'r', newline='') as input_file:
                csv_reader = csv.DictReader(input_file,
                                            dialect='sqlathanor',
                                            restkey=None,
                                            restval=None)
                rows = [x for x in csv_reader]
        else:
            with open(input_data, 'r') as input_file:
                csv_reader = csv.DictReader(input_file,
                                            dialect='sqlathanor',
                                            restkey=None,
                                            restval=None)

                rows = [x for x in csv_reader]

    if len(rows) < 1:
        raise CSVStructureError(
            'expected 1 row of data and 1 header row, missing 1')
    else:
        data = rows[0]

    for key in data:
        try:
            validators.variable_name(key)
        except ValueError:
            raise CSVStructureError(
                'column (%s) is not a valid Python variable name' % key)

        if data[key] == null_text:
            data[key] = None

    csv.unregister_dialect('sqlathanor')

    return data
def generate_model_from_csv(serialized,
                            tablename,
                            primary_key,
                            cls = BaseModel,
                            serialization_config = None,
                            skip_nested = True,
                            default_to_str = False,
                            type_mapping = None,
                            base_model_attrs = None,
                            delimiter = '|',
                            wrap_all_strings = False,
                            null_text = 'None',
                            wrapper_character = "'",
                            double_wrapper_character_when_nested = False,
                            escape_character = "\\",
                            line_terminator = '\r\n',
                            **kwargs):
    """Generate a :term:`model class` from a serialized
    :term:`CSV <Comma-Separated Value (CSV)>` string.

    .. versionadded: 0.3.0

    .. note::

      This function *cannot* programmatically create
      :term:`relationships <relationship>`, :term:`hybrid properties <hybrid property>`,
      or :term:`association proxies <association proxy>`.

    :param serialized: The CSV data whose column headers will be treated as column
      names, while value data types will determine :term:`model attribute` data
      types.

      .. note::

        If a Path-like object, will read the file contents from a file that is assumed
        to include a header row. If a :class:`str <python:str>` and has more than
        one record (line), will assume the first line is a header row. If a
        :class:`list <python:list>`, will assume the first item is the header row.

    :type serialized: :class:`str <python:str>` / Path-like object /
      :class:`list <python:list>`

    :param tablename: The name of the SQL table to which the model corresponds.
    :type tablename: :class:`str <python:str>`

    :param primary_key: The name of the column/key that should be used as the table's
      primary key.
    :type primary_key: :class:`str <python:str>`

    :param cls: The base class to use when generating a new :term:`model class`.
      Defaults to :class:`BaseModel` to provide serialization/de-serialization
      support.

      If a :class:`tuple <python:tuple>` of classes, will include :class:`BaseModel`
      in that list of classes to mixin serialization/de-serialization support.

      If not :obj:`None <python:None>` and not a :class:`tuple <python:tuple>`, will mixin
      :class:`BaseModel` with the value passed to provide
      serialization/de-serialization support.
    :type cls: :obj:`None <python:None>` / :class:`tuple <python:tuple>` of
      classes / class object

    :param serialization_config: Collection of
      :class:`AttributeConfiguration <sqlathanor.attributes.AttributeConfiguration>`
      that determine the generated model's
      :term:`serialization`/:term:`de-serialization`
      :ref:`configuration <configuration>`. If :obj:`None <python:None>`, will
      support serialization and de-serialization across all keys in
      ``serialized_dict``. Defaults to :obj:`None <python:None>`.
    :type serialization_config: Iterable of
      :class:`AttributeConfiguration <sqlathanor.attributes.AttributeConfiguration>`
      or coercable :class:`dict <python:dict>` objects / :obj:`None <python:None>`

    :param skip_nested: If ``True`` then any keys in ``serialized_json`` that
      feature nested items (e.g. iterables, JSON objects, etc.) will be ignored.
      If ``False``, will treat serialized items as :class:`str <python:str>`.
      Defaults to ``True``.
    :type skip_nested: :class:`bool <python:bool>`

    :param default_to_str: If ``True``, will automatically set a key/column whose
      value type cannot be determined to ``str``
      (:class:`Text <sqlalchemy:sqlalchemy.types.Text>`). If ``False``, will
      use the value type's ``__name__`` attribute and attempt to find a mapping.
      Defaults to ``False``.
    :type default_to_str: :class:`bool <python:bool>`

    :param type_mapping: Determines how value types in ``serialized`` map to
      SQL column data types. To add a new mapping or override a default, set a
      key to the name of the value type in Python, and set the value to a
      :doc:`SQLAlchemy Data Type <sqlalchemy:core/types>`. The following are the
      default mappings applied:

      .. list-table::
         :widths: 30 30
         :header-rows: 1

         * - Python Literal
           - SQL Column Type
         * - ``bool``
           - :class:`Boolean <sqlalchemy:sqlalchemy.types.Boolean>`
         * - ``str``
           - :class:`Text <sqlalchemy:sqlalchemy.types.Text>`
         * - ``int``
           - :class:`Integer <sqlalchemy:sqlalchemy.types.Integer>`
         * - ``float``
           - :class:`Float <sqlalchemy:sqlalchemy.types.Float>`
         * - ``date``
           - :class:`Date <sqlalchemy:sqlalchemy.types.Date>`
         * - ``datetime``
           - :class:`DateTime <sqlalchemy:sqlalchemy.types.DateTime>`
         * - ``time``
           - :class:`Time <sqlalchemy:sqlalchemy.types.Time>`

    :type type_mapping: :class:`dict <python:dict>` with type names as keys and
      column data types as values.

    :param base_model_attrs: Optional :class:`dict <python:dict>` of special
      attributes that will be applied to the generated
      :class:`BaseModel <sqlathanor.declarative.BaseModel>` (e.g.
      ``__table_args__``). Keys will correspond to the attribute name, while the
      value is the value that will be applied. Defaults to :obj:`None <python:None>`.
    :type base_model_attrs: :class:`dict <python:dict>` / :obj:`None <python:None>`

    :param delimiter: The delimiter used between columns. Defaults to ``|``.
    :type delimiter: :class:`str <python:str>`

    :param wrapper_character: The string used to wrap string values when
      wrapping is applied. Defaults to ``'``.
    :type wrapper_character: :class:`str <python:str>`

    :param null_text: The string used to indicate an empty value if empty
      values are wrapped. Defaults to `None`.
    :type null_text: :class:`str <python:str>`

    :param kwargs: Any additional keyword arguments will be passed to
      :func:`declarative_base() <sqlathanor.declarative.declarative_base>` when
      generating the programmatic :class:`BaseModel <sqlathanor.declarative.BaseModel>`.

    :returns: :term:`Model class` whose structure matches ``serialized``.
    :rtype: :class:`BaseModel`

    :raises UnsupportedValueTypeError: when a value in ``serialized`` does not
      have a corresponding key in ``type_mapping``
    :raises ValueError: if ``tablename`` is empty
    :raises DeserializationError: if ``serialized`` is not a valid
      :class:`str <python:str>`
    :raises CSVStructureError: if there are less than 2 (two) rows in ``serialized``
      or if column headers are not valid Python variable names

    """
    # pylint: disable=line-too-long,too-many-arguments

    if not checkers.is_file(serialized):
        serialized = read_csv_data(serialized, single_record = False)

    from_csv = parse_csv(serialized,
                         delimiter = delimiter,
                         wrap_all_strings = wrap_all_strings,
                         null_text = null_text,
                         wrapper_character = wrapper_character,
                         double_wrapper_character_when_nested = double_wrapper_character_when_nested,
                         escape_character = escape_character,
                         line_terminator = line_terminator)

    generated_model = generate_model_from_dict(from_csv,
                                               tablename,
                                               primary_key,
                                               cls = cls,
                                               serialization_config = serialization_config,
                                               skip_nested = skip_nested,
                                               default_to_str = default_to_str,
                                               type_mapping = type_mapping,
                                               base_model_attrs = base_model_attrs,
                                               **kwargs)

    return generated_model
def test_from_yaml(input_files, input_data, tablename, primary_key,
                   column_kwargs, skip_nested, default_to_str, type_mapping,
                   expected_types, error):
    # pylint: disable=no-member,line-too-long

    input_data = check_input_file(input_files, input_data)

    if not checkers.is_file(input_data):
        input_data = yaml.dump(input_data)

    # pylint: disable=no-member,line-too-long
    if column_kwargs is None:
        column_kwargs = {}

    if error:
        with pytest.raises(error):
            result = Table.from_yaml(input_data,
                                     tablename=tablename,
                                     metadata=MetaData(),
                                     primary_key=primary_key,
                                     column_kwargs=column_kwargs,
                                     skip_nested=skip_nested,
                                     default_to_str=default_to_str,
                                     type_mapping=type_mapping)
    else:
        result = Table.from_yaml(input_data,
                                 tablename=tablename,
                                 metadata=MetaData(),
                                 primary_key=primary_key,
                                 column_kwargs=column_kwargs,
                                 skip_nested=skip_nested,
                                 default_to_str=default_to_str,
                                 type_mapping=type_mapping)

        assert isinstance(result, Table)

        assert result.name == tablename

        for key in column_kwargs:
            item_column = None
            for column in result.c:
                if column.name == key:
                    item_column = column
                    break

            assert item_column is not None
            for subkey in column_kwargs[key]:
                assert hasattr(item_column, subkey) is True
                item_value = getattr(item_column, subkey)

                if subkey == 'default':
                    item_value = item_value.arg

                expected_value = column_kwargs[key][subkey]

                assert item_value == expected_value

        for item in expected_types:
            item_column = None
            for column in result.c:
                if item[0] == column.name:
                    item_column = column
                    break

            assert item_column is not None
            assert isinstance(item_column.type, item[1]) is True

            assert item_column.primary_key is (item[0] == primary_key)
示例#12
0
def _read_spss(data: Union[bytes, BytesIO, 'os.PathLike[Any]'],
               limit: Optional[int] = None,
               offset: int = 0,
               exclude_variables: Optional[List[str]] = None,
               include_variables: Optional[List[str]] = None,
               metadata_only: bool = False,
               apply_labels: bool = False,
               labels_as_categories: bool = True,
               missing_as_NaN: bool = False,
               convert_datetimes: bool = True,
               dates_as_datetime64: bool = False,
               **kwargs):
    """Internal function that reads an SPSS (.sav or .zsav) file and returns a
    :class:`tuple <python:tuple>` with a Pandas
    :class:`DataFrame <pandas:pandas.DataFrame>` object and a metadata
    :class:`dict <python:dict>`.

    :param data: The SPSS data to load. Accepts either a series of bytes or a filename.
    :type data: Path-like filename, :class:`bytes <python:bytes>` or
      :class:`BytesIO <python:io.bytesIO>`

    :param limit: The number of records to read from the data. If :obj:`None <python:None>`
      will return all records. Defaults to :obj:`None <python:None>`.
    :type limit: :class:`int <python:int>` or :obj:`None <python:None>`

    :param offset: The record at which to start reading the data. Defaults to 0 (first
      record).
    :type offset: :class:`int <python:int>`

    :param exclude_variables: A list of the variables that should be ignored when reading
      data. Defaults to :obj:`None <python:None>`.
    :type exclude_variables: iterable of :class:`str <python:str>` or
      :obj:`None <python:None>`

    :param include_variables: A list of the variables that should be explicitly included
      when reading data. Defaults to :obj:`None <python:None>`.
    :type include_variables: iterable of :class:`str <python:str>` or
      :obj:`None <python:None>`

    :param metadata_only: If ``True``, will return no data records in the resulting
      :class:`DataFrame <pandas:pandas.DataFrame>` but will return a complete metadata
      :class:`dict <python:dict>`. Defaults to ``False``.
    :type metadata_only: :class:`bool <python:bool>`

    :param apply_labels: If ``True``, converts the numerically-coded values in the raw
      data to their human-readable labels. Defaults to ``False``.
    :type apply_labels: :class:`bool <python:bool>`

    :param labels_as_categories: If ``True``, will convert labeled or formatted values to
      Pandas :term:`categories <pandas:category>`. Defaults to ``True``.

      .. caution::

        This parameter will only have an effect if the ``apply_labels`` parameter is
        ``True``.

    :type labels_as_categories: :class:`bool <python:bool>`

    :param missing_as_NaN: If ``True``, will return any missing values as
      :class:`NaN <pandas:NaN>`. Otherwise will return missing values as per the
      configuration of missing value representation stored in the underlying SPSS data.
      Defaults to ``False``, which applies the missing value representation configured in
      the SPSS data itself.
    :type missing_as_NaN: :class:`bool <python:bool>`

    :param convert_datetimes: if ``True``, will convert the native integer representation
      of datetime values in the SPSS data to Pythonic
      :class:`datetime <python:datetime.datetime>`, or
      :class:`date <python:datetime.date>`, etc. representations (or Pandas
      :class:`datetime64 <pandas:datetime64>`, depending on the ``dates_as_datetime64``
      parameter). If ``False``, will leave the original integer representation. Defaults
      to ``True``.
    :type convert_datetimes: :class:`bool <python:bool>`

    :param dates_as_datetime64: If ``True``, will return any date values as Pandas
      :class:`datetime64 <pandas.datetime64>` types. Defaults to ``False``.

      .. caution::

        This parameter is only applied if ``convert_datetimes`` is set to ``True``.

    :type dates_as_datetime64: :class:`bool <python:bool>`

    :returns: A :class:`DataFrame <pandas:DataFrame>` representation of the SPSS data (or
      :obj:`None <python:None>`) and a :class:`Metadata` representation of the dataset's
      metadata / data map.
    :rtype: :class:`pandas.DataFrame <pandas:DataFrame>`/:obj:`None <python:None>` and
      :class:`Metadata`

    """
    if not any([
            checkers.is_file(data),
            checkers.is_bytesIO(data),
            checkers.is_type(data, bytes)
    ]):
        raise errors.InvalidDataFormatError(
            'data must be a filename, BytesIO, or bytes '
            f'object. Was: {data.__class__.__name__}')

    limit = validators.integer(limit, allow_empty=True, minimum=0)
    offset = validators.integer(offset, minimum=0)

    exclude_variables = validators.iterable(exclude_variables,
                                            allow_empty=True)
    if exclude_variables:
        exclude_variables = [validators.string(x) for x in exclude_variables]

    include_variables = validators.iterable(include_variables,
                                            allow_empty=True)
    if include_variables:
        include_variables = [validators.string(x) for x in include_variables]

    if not checkers.is_file(data):
        with tempfile.NamedTemporaryFile(delete=False) as temp_file:
            temp_file.write(data)
            temp_file_name = temp_file.name

        df, meta = pyreadstat.read_sav(
            temp_file_name,
            metadataonly=metadata_only,
            dates_as_pandas_datetime=dates_as_datetime64,
            apply_value_formats=apply_labels,
            formats_as_category=labels_as_categories,
            usecols=include_variables,
            user_missing=not missing_as_NaN,
            disable_datetime_conversion=not convert_datetimes,
            row_limit=limit or 0,
            row_offset=offset,
            **kwargs)
        os.remove(temp_file_name)
    else:
        df, meta = pyreadstat.read_sav(
            data,
            metadataonly=metadata_only,
            dates_as_pandas_datetime=dates_as_datetime64,
            apply_value_formats=apply_labels,
            formats_as_category=labels_as_categories,
            usecols=include_variables,
            user_missing=not missing_as_NaN,
            disable_datetime_conversion=not convert_datetimes,
            row_limit=limit or 0,
            row_offset=offset,
            **kwargs)

    metadata = Metadata.from_pyreadstat(meta)

    if exclude_variables:
        df = df.drop(exclude_variables, axis=1)
        if metadata.column_metadata:
            for variable in exclude_variables:
                metadata.column_metadata.pop(variable, None)

    return df, metadata
示例#13
0
def test_generate_model_from_yaml(input_files,
                                  input_data,
                                  tablename,
                                  primary_key,
                                  serialization_config,
                                  skip_nested,
                                  default_to_str,
                                  type_mapping,
                                  base_model_attrs,
                                  expected_types,
                                  error):
    # pylint: disable=no-member,line-too-long
    input_data = check_input_file(input_files, input_data)

    if not checkers.is_file(input_data):
        input_data = yaml.dump(input_data)

    if error:
        with pytest.raises(error):
            result = generate_model_from_yaml(input_data,
                                              tablename = tablename,
                                              primary_key = primary_key,
                                              serialization_config = serialization_config,
                                              skip_nested = skip_nested,
                                              default_to_str = default_to_str,
                                              type_mapping = type_mapping,
                                              base_model_attrs = base_model_attrs)
    else:
        result = generate_model_from_yaml(input_data,
                                          tablename = tablename,
                                          primary_key = primary_key,
                                          serialization_config = serialization_config,
                                          skip_nested = skip_nested,
                                          default_to_str = default_to_str,
                                          type_mapping = type_mapping,
                                          base_model_attrs = base_model_attrs)

        assert hasattr(result, 'to_json') is True
        assert hasattr(result, 'new_from_json') is True
        assert hasattr(result, 'update_from_json') is True
        assert hasattr(result, '__serialization__') is True

        assert result.__tablename__ == tablename

        for item in expected_types:
            assert hasattr(result, item[0]) is True
            attribute = getattr(result, item[0], None)
            assert isinstance(attribute.type, item[1]) is True

        if serialization_config:
            for item in serialization_config:
                assert hasattr(result, item.name) is True
                assert result.get_attribute_serialization_config(item.name) == item
        else:
            for item in expected_types:
                assert hasattr(result, item[0]) is True
                assert result.get_attribute_serialization_config(item[0]).supports_csv == (True, True)
                assert result.get_attribute_serialization_config(item[0]).supports_json == (True, True)
                assert result.get_attribute_serialization_config(item[0]).supports_yaml == (True, True)
                assert result.get_attribute_serialization_config(item[0]).supports_dict == (True, True)

        if base_model_attrs:
            for key in base_model_attrs:
                assert hasattr(result, key) is True
                assert getattr(result, key) == base_model_attrs[key]