예제 #1
0
def convert_to_pandas_timedelta(x):
    r"""Convert variable with time units to a pandas.Timedelta instance.

    Args:
        x (object): Scalar/array with units to convert to a pandas.Timedelta
            instance.

    Returns:
        pandas.Timedelta: Equivalent Timedelta variable.

    """
    assert (has_units(x))
    t_data = get_data(x)
    t_unit = get_units(x)
    unit_map = {
        'ns': 'ns',
        (tools.bytes2str(b'\xc2\xb5') + 's'): 'us',
        (tools.bytes2str(b'\xce\xbcs') + 's'): 'us',
        'ms': 'ms',
        's': 's',
        'min': 'm',
        'hr': 'h',
        'day': 'D'
    }
    return pd.Timedelta(t_data, unit=unit_map[t_unit])
예제 #2
0
def convert_unit_string(orig_str, replacements=None):
    r"""Convert unit string to string that the Python package can
    understand.

    Args:
        orig_str (str): Original units string to convert.
        replacements (dict, optional): Mapping from unit to another.
            Defaults to empty dict.

    Returns:
        str: Converted string.

    """
    if not orig_str.strip():
        return ''
    out = []
    if replacements is None:
        replacements = {
            'h': 'hr',
            'hrs': 'hr',
            'days': 'day',
            '100%': 'percent'
        }
    regex_mu = [
        tools.bytes2str(b'\xc2\xb5'),
        tools.bytes2str(b'\xce\xbcs'),
        tools.bytes2str(b'\xc2\xb0'), r'(?:100\%)'
    ]
    regex = (r'(?P<paren>\()?(?P<name>[A-Za-z%s]+)'
             r'(?:(?:(?:\^)|(?:\*\*))?(?P<exp_paren>\()?(?P<exp>-?[0-9]+)'
             r'(?(exp_paren)\)))?'
             r'(?(paren)\)|)(?P<op> |(?:\*)|(?:\/))?' % ''.join(regex_mu))
    out = ''
    if re.fullmatch(r'(?:%s)+' % regex, orig_str.strip()):
        for x in re.finditer(regex, orig_str.strip()):
            xdict = x.groupdict()
            if xdict['name'] in replacements:
                xdict['name'] = replacements[xdict['name']]
            if xdict['exp']:
                out += '({name}**{exp})'.format(**xdict)
            else:
                out += xdict['name']
            if xdict['op']:
                if xdict['op'].isspace():
                    xdict['op'] = '*'
                out += xdict['op']
    else:  # pragma: debug
        print(repr(orig_str), type(orig_str))
        m = re.search(r'(?:%s)+' % regex, orig_str.strip())
        if m:
            print(repr(m.group(0)), m.groupdict())
        else:
            print('no match')
        for m in re.finditer(regex, orig_str.strip()):
            print(m.group(0), m.groupdict())
        raise Exception("Could not standardize units: %s" % repr(orig_str))
    return out
예제 #3
0
 def test_convert_R_unit_string(self):
     r"""Test convert_R_unit_string."""
     pairs = [('g', 'g'), ('g2', '(g**2)'),
              ('g2 km s-2', '(g**2)*km*(s**-2)'), ('degC d', 'degC*d'),
              (tools.bytes2str(b'\xc2\xb0C d'),
               tools.bytes2str(b'\xc2\xb0C*d')), ('h', 'hr'),
              ('hrs/kg', 'hr/kg'), ('', ''), ('cm**(-2)', '(cm**-2)')]
     for x, y in pairs:
         self.assert_equal(units.convert_R_unit_string(x), y)
         self.assert_equal(units.convert_R_unit_string(y), y)
         units.add_units(1.0, x)
예제 #4
0
    def get_field_units(self, as_bytes=False):
        r"""Get the field units for an array of fields.

        Args:
            as_bytes (bool, optional): If True, the field units will be returned
                as bytes. If False the field units will be returned as unicode.
                Defaults to False.

        Returns:
            list: Units for each field in the data type.

        """
        if self.typedef['type'] != 'array':
            return None
        if getattr(self, 'field_units', None) is not None:
            out = copy.deepcopy(self.field_units)
        elif isinstance(self.typedef['items'], dict):  # pragma: debug
            raise Exception("Variable number of items not yet supported.")
        elif isinstance(self.typedef['items'], list):
            out = []
            any_units = False
            for i, x in enumerate(self.typedef['items']):
                out.append(x.get('units', ''))
                if len(x.get('units', '')) > 0:
                    any_units = True
            # Don't use field units if they are all defaults
            if not any_units:
                out = None
        if (out is not None):
            if as_bytes:
                out = tools.str2bytes(out, recurse=True)
            else:
                out = tools.bytes2str(out, recurse=True)
        return out
예제 #5
0
def add_units(arr, unit_str, dtype=None):
    r"""Add units to an array or scalar.

    Args:
        arr (np.ndarray, float, int): Scalar or array of data to add units to.
        unit_str (str): Unit string.
        dtype (np.dtype, optional): Numpy data type that should be maintained for
            array/qunatity with units. If not provided, this is determined from the
            array.

    Returns:
        unyt.unyt_array: Array with units.

    """
    ureg = get_ureg()
    unit_str = tools.bytes2str(unit_str)
    if is_null_unit(unit_str):
        return arr
    unit_str = convert_unit_string(unit_str)
    if has_units(arr):
        return convert_to(arr, unit_str)
    if dtype is None:
        if isinstance(arr, np.ndarray):
            dtype = arr.dtype
        else:
            dtype = np.array([arr]).dtype
    try:
        if isinstance(arr, np.ndarray) and (arr.ndim > 0):
            out = unyt.unyt_array(arr, unit_str, dtype=dtype, registry=ureg)
        else:
            out = unyt.unyt_quantity(arr, unit_str, dtype=dtype, registry=ureg)
    except BaseException:
        raise ValueError("Error parsing unit: %s, type(%s)." %
                         (repr(unit_str), type(unit_str)))
    return out
예제 #6
0
    def func_deserialize(self, msg):
        r"""Deserialize a message.

        Args:
            msg (bytes): Message to be deserialized.

        Returns:
            dict: Deserialized Python dictionary.

        """
        out = dict()
        lines = tools.bytes2str(msg.split(self.newline), recurse=True)
        for line in lines:
            kv = [x for x in line.split(self.delimiter) if x]
            if len(kv) <= 1:
                # TODO: Allow empty?
                continue
            elif len(kv) == 2:
                if kv[1].startswith("'") and kv[1].endswith("'"):
                    out[kv[0]] = kv[1].strip("'")
                else:
                    try:
                        out[kv[0]] = json.loads(kv[1])
                    except BaseException:
                        out[kv[0]] = kv[1]
            else:
                raise ValueError("Line has more than one delimiter: " +
                                 str(line))
        return out
예제 #7
0
    def func_serialize(self, args):
        r"""Serialize a message.

        Args:
            args (dict): Python dictionary to be serialized.

        Returns:
            bytes, str: Serialized message.

        """
        lines = []
        for k in args.keys():
            v = args[k]
            if not isinstance(k, (str, bytes)):  # pragma: debug
                raise ValueError(
                    "Serialization of non-string keys not supported.")
            iline = tools.bytes2str(k) + self.delimiter
            if isinstance(v, list):
                indent = ' ' * len(iline)
                arr_lines = []
                assert (len(v) == 2)
                assert (v[0].shape == v[1].shape)
                for i in range(len(v[0])):
                    arr_lines.append(self._array_fmt % (v[0][i], v[1][i]))
                v_units = [str(getattr(vv, 'units', '-')) for vv in v]
                arr_lines[0] += f"\t! [{'; '.join(v_units)}]"
                iline += (',\n' + indent).join(arr_lines)
            elif isinstance(v, str):
                iline += "\'%s\'" % v
            else:
                iline += json.dumps(v, cls=JSONReadableEncoder)
                if hasattr(v, 'units'):
                    iline += f'\t! [{v.units}]'
            lines.append(iline)
        return tools.str2bytes('\n'.join(lines))
예제 #8
0
def add_units(arr, unit_str, dtype=None):
    r"""Add units to an array or scalar.

    Args:
        arr (np.ndarray, float, int): Scalar or array of data to add units to.
        unit_str (str): Unit string.
        dtype (np.dtype, optional): Numpy data type that should be maintained for
            array/qunatity with units. If not provided, this is determined from the
            array.

    Returns:
        unyt.unyt_array: Array with units.

    """
    unit_str = tools.bytes2str(unit_str)
    if is_null_unit(unit_str):
        return arr
    if has_units(arr):
        return convert_to(arr, unit_str)
    if dtype is None:
        if isinstance(arr, np.ndarray):
            dtype = arr.dtype
        else:
            dtype = np.array([arr]).dtype
    if isinstance(arr, np.ndarray) and (arr.ndim > 0):
        out = unyt.unyt_array(arr, unit_str, dtype=dtype, registry=_ureg_unyt)
    else:
        out = unyt.unyt_quantity(arr,
                                 unit_str,
                                 dtype=dtype,
                                 registry=_ureg_unyt)
    return out
예제 #9
0
 def input_kwargs(self):
     r"""dict: Get the input keyword arguments used to create this class."""
     out = {}
     for k in self._schema_properties.keys():
         v = getattr(self, k, None)
         if v is not None:
             out[k] = copy.deepcopy(v)
     for k in self._attr_conv:
         if k in out:
             out[k] = tools.bytes2str(out[k])
     return out
예제 #10
0
def convert_matlab_unit_string(m_str):  # pragma: matlab
    r"""Convert Matlab unit string to string that the Python package
    can understand.

    Args:
        m_str (str): Matlab units string to convert.

    Returns:
        str: Converted string.

    """
    out = m_str
    replacements = {'h': 'hr'}
    regex_mu = [tools.bytes2str(b'\xc2\xb5'), tools.bytes2str(b'\xce\xbcs')]
    regex = r'(?P<name>[A-Za-z%s]+)' % ''.join(regex_mu)
    for x in re.finditer(regex, m_str):
        xdict = x.groupdict()
        if xdict['name'] in replacements:
            xdict['name'] = replacements[xdict['name']]
            out = out[:(x.start())] + xdict['name'] + out[(x.end()):]
    return out
예제 #11
0
def test_popen_nobuffer():
    r"""Test open of process without buffer."""
    ans = os.getcwd()  # + '\n'
    # Test w/o shell
    if platform._is_win:  # pragma: windows
        args = ['cmd', '/c', 'cd']
    else:
        args = ['pwd']
    p = tools.popen_nobuffer(args)
    out, err = p.communicate()
    res = tools.bytes2str(out).splitlines()[0]
    assert_equal(res, ans)
    # Test w/ shell
    if platform._is_win:  # pragma: windows
        args = 'cd'
    else:
        args = 'pwd'
    p = tools.popen_nobuffer(args, shell=True)
    out, err = p.communicate()
    res = tools.bytes2str(out).splitlines()[0]
    assert_equal(res, ans)
예제 #12
0
 def set_reply_socket_recv(self, address):
     r"""Set the recv reply socket if the address dosn't exist."""
     address = tools.bytes2str(address)
     if address not in self.reply_socket_recv:
         s = self.context.socket(zmq.REQ)
         s.setsockopt(zmq.LINGER, 0)
         s.connect(address)
         self.register_comm('REPLY_RECV_' + address, s)
         with self.reply_socket_lock:
             self._n_reply_recv[address] = 0
             self._n_zmq_recv[address] = 0
             self.reply_socket_recv[address] = s
         self.debug("new recv address: %s", address)
     return address
예제 #13
0
def test_bytes2str():
    r"""Test bytes2str."""
    vals = [(b'hello', 'hello'), ('hello', 'hello'),
            ((b'a', b'b'), ('a', 'b')),
            ({
                'a': b'a',
                'b': b'b'
            }, {
                'a': 'a',
                'b': 'b'
            }), ([b'a', b'b'], ['a', 'b']),
            ([b'a', [b'b', b'c']], ['a', ['b', 'c']])]
    for x, exp in vals:
        assert_equal(tools.bytes2str(x, recurse=True), exp)
예제 #14
0
    def func_serialize(self, args):
        r"""Serialize a message.

        Args:
            args (dict): Python dictionary to be serialized.

        Returns:
            bytes, str: Serialized message.

        """
        out = ''
        order = sorted([k for k in args.keys()])
        newline_str = tools.bytes2str(self.newline)
        for k in order:
            v = args[k]
            if not isinstance(k, (str, bytes)):
                raise ValueError(
                    "Serialization of non-string keys not supported.")
            out += tools.bytes2str(k)
            out += self.delimiter
            out += json.dumps(v, cls=JSONReadableEncoder)
            out += newline_str
        return tools.str2bytes(out)
예제 #15
0
 def serializer_info(self):
     r"""dict: Serializer info."""
     out = copy.deepcopy(self.extra_kwargs)
     for k in self._schema_properties.keys():
         if k in ['datatype']:
             continue
         v = getattr(self, k, None)
         if v is not None:
             out[k] = copy.deepcopy(v)
     for k in out.keys():
         v = out[k]
         if isinstance(v, (bytes, list, tuple)):
             out[k] = tools.bytes2str(v, recurse=True)
         else:
             out[k] = v
     return out
예제 #16
0
 def serializer_info(self):
     r"""dict: Serializer info."""
     out = copy.deepcopy(self.extra_kwargs)
     for k in self._schema_properties.keys():
         if (k != 'seritype') and (k in ['datatype'] + self._defaults_set):
             continue
         v = getattr(self, k, None)
         if v is not None:
             out[k] = copy.deepcopy(v)
     for k in out.keys():
         v = out[k]
         try:
             out[k] = tools.bytes2str(v, recurse=True)
         except TypeError:
             out[k] = v
     return out
    def normalize(cls, obj):
        r"""Normalize an object, if possible, to conform to this type.

        Args:
            obj (object): Object to normalize.

        Returns:
            object: Normalized object.

        """
        if isinstance(obj, (str, bytes)):
            try:
                obj_str = tools.bytes2str(obj)
                obj = cls.decode_data(obj_str, {'type': cls.name})
            except (ValueError, AttributeError):
                pass
        return obj
예제 #18
0
def array_to_table(arrs, fmt_str, use_astropy=False):
    r"""Serialize an array as an ASCII table.

    Args:
        arrs (np.ndarray, list, tuple): Structured array or list/tuple of
            arrays that contain table information.
        fmt_str (str, bytes): Format string that should be used to structure
            the ASCII array.
        use_astropy (bool, optional): If True, astropy will be used to format
            the table if it is installed. Defaults to False.

    Returns:
        bytes: ASCII table.

    """
    if not _use_astropy:
        use_astropy = False
    dtype = cformat2nptype(fmt_str)
    if len(dtype) == 0:
        dtype = np.dtype([('f0', dtype)])
    info = format2table(fmt_str)
    comment = info.get('comment', None)
    if comment is not None:
        fmt_str = fmt_str.split(comment, 1)[-1]
    arr1 = consolidate_array(arrs, dtype=dtype)
    if use_astropy:
        fd = sio.StringIO()
        table = apy_Table(arr1)
        delimiter = tools.bytes2str(info['delimiter'])
        apy_ascii.write(table, fd, delimiter=delimiter,
                        format='no_header')
        out = tools.str2bytes(fd.getvalue())
    else:
        fd = sio.BytesIO()
        fmt_str = tools.str2bytes(fmt_str)
        for ele in arr1:
            line = format_message(ele.tolist(), fmt_str)
            fd.write(line)
        # fmt = fmt_str.split(info['newline'])[0]
        # np.savetxt(fd, arr1,
        #            fmt=fmt, delimiter=info['delimiter'],
        #            newline=info['newline'], header='')
        out = fd.getvalue()
    fd.close()
    return out
예제 #19
0
def is_unit(ustr):
    r"""Determine if a string is a valid unit.

    Args:
        ustr (str): String representation to test.

    Returns:
        bool: True if the string is a valid unit. False otherwise.

    """
    ustr = tools.bytes2str(ustr)
    if is_null_unit(ustr):
        return True
    try:
        as_unit(ustr)
    except ValueError:
        return False
    return True
예제 #20
0
def decode_json(msg, **kwargs):
    r"""Decode a Python object from a JSON serialization.

    Args:
        msg (str): JSON serialization to decode.
        **kwargs: Additional keyword arguments are passed to json.loads.

    Returns:
        object: Deserialized Python object.

    """
    if isinstance(msg, (str, bytes)):
        msg_decode = tools.bytes2str(msg)
        func_decode = json.loads
    else:
        msg_decode = msg
        func_decode = json.load
    func_decode = JSONDecoder()
    return func_decode(msg_decode, **kwargs)
예제 #21
0
    def coerce_type(cls, obj, typedef=None, **kwargs):
        r"""Coerce objects of specific types to match the data type.

        Args:
            obj (object): Object to be coerced.
            typedef (dict, optional): Type defintion that object should be
                coerced to. Defaults to None.
            **kwargs: Additional keyword arguments are metadata entries that may
                aid in coercing the type.

        Returns:
            object: Coerced object.

        """
        if isinstance(obj, dict) and ('material' in obj):
            obj['material'] = tools.bytes2str(obj['material'])
        return super(ObjMetaschemaType, cls).coerce_type(obj,
                                                         typedef=typedef,
                                                         **kwargs)
예제 #22
0
    def normalize(cls, obj, working_dir=None):
        r"""Normalize an object, if possible, to conform to this type.

        Args:
            obj (object): Object to normalize.
            working_dir (str, optional): Working directory that should
                be used to make relative paths absolute. Defaults to None.

        Returns:
            object: Normalized object.

        """
        if isinstance(obj, (str, bytes)):
            try:
                obj_str = tools.bytes2str(obj)
                obj = cls.decode_data(obj_str, {'type': cls.name},
                                      working_dir=working_dir)
            except (ValueError, AttributeError):
                pass
        return obj
예제 #23
0
def convert_R_unit_string(r_str):
    r"""Convert R unit string to string that the Python package can
    understand.

    Args:
        r_str (str): R units string to convert.

    Returns:
        str: Converted string.

    """
    out = []
    regex_mu = tools.bytes2str(b'\xc2\xb5')
    regex = r'(?P<name>[A-Za-z%s]+)(?P<exp>-?[0-9]*)(?: |$)' % regex_mu
    for x in re.finditer(regex, r_str):
        xdict = x.groupdict()
        if xdict['exp']:
            out.append('({name}**{exp})'.format(**xdict))
        else:
            out.append(xdict['name'])
    return '*'.join(out)
예제 #24
0
    def decode_data(cls, msg, typedef):
        r"""Decode an object.

        Args:
            msg (string): Encoded object to decode.
            typedef (dict): Type definition that should be used to decode the
                object.

        Returns:
            object: Decoded object.

        """
        msg = tools.bytes2str(msg)
        lines = msg.splitlines()
        metadata = {'comments': []}
        out = {}
        # Parse
        for line_count, line in enumerate(lines):
            if line.startswith('#'):
                metadata['comments'].append(line)
                continue
            values = line.split()
            if not values:
                continue
            if values[0] not in _map_code2element:
                raise ValueError("Type code '%s' on line %d not understood" %
                                 (values[0], line_count))
            e = _map_code2element[values[0]]
            if e not in out:
                out[e] = []
            if e in ['material']:
                out[e] = values[1]
                continue
            else:
                out[e].append(
                    cls._decode_object_property(values[1:],
                                                _default_property_order[e]))
        # Return
        # out.update(**metadata)
        return ObjDict(out)
예제 #25
0
    def decode_data(cls, msg, typedef):
        r"""Decode an object.

        Args:
            msg (string): Encoded object to decode.
            typedef (dict): Type definition that should be used to decode the
                object.

        Returns:
            object: Decoded object.

        """
        msg = tools.bytes2str(msg)
        lines = msg.splitlines()
        metadata = {'comments': [], 'element_order': [], 'property_order': {}}
        if lines[0] != 'ply':
            raise ValueError("The first line must be 'ply'")
        # Parse header
        e = None
        p = None
        type_map = {}
        size_map = {}
        obj = {}
        for i, line in enumerate(lines):
            if line.startswith('format'):
                metadata['plyformat'] = line.split(None, 1)[-1]
            elif line.startswith('comment'):
                out = line.split(None, 1)[-1]
                if out.startswith('material:'):
                    metadata['element_order'].append('material')
                    obj['material'] = out.split(None, 1)[-1]
                metadata['comments'].append(out)
            elif line.startswith('element'):
                vars = line.split()
                e_sing = vars[1]
                e = singular2plural(e_sing)
                size_map[e] = int(float(vars[2]))
                type_map[e] = {}
                metadata['element_order'].append(e)
                metadata['property_order'][e] = []
                obj[e] = []
            elif line.startswith('property'):
                vars = line.split()
                p = vars[-1]
                type_map[e][p] = ' '.join(vars[1:-1])
                metadata['property_order'][e].append(p)
            elif 'end_header' in line:
                headline = i + 1
                break
        # Parse body
        i = headline
        for e in metadata['element_order']:
            if e == 'material':
                continue
            for ie in range(size_map[e]):
                vars = lines[i].split()
                iv = 0
                new = {}
                for p in metadata['property_order'][e]:
                    if type_map[e][p].startswith('list'):
                        type_vars = type_map[e][p].split()
                        count_type = translate_ply2py(type_vars[1])
                        plist_type = translate_ply2py(type_vars[2])
                        count = count_type(vars[iv])
                        plist = []
                        iv += 1
                        for ip in range(count):
                            plist.append(plist_type(vars[iv]))
                            iv += 1
                        new[p] = plist
                    else:
                        prop_type = translate_ply2py(type_map[e][p])
                        new[p] = prop_type(vars[iv])
                        iv += 1
                assert(iv == len(vars))
                obj[e].append(new)
                i += 1
        # Check that all properties filled in
        for e in metadata['element_order']:
            if e not in metadata['property_order']:
                continue
            for p in metadata['property_order'][e]:
                assert(len(obj[e]) == size_map[e])
        # Return
        return PlyDict(obj)
예제 #26
0
def discover_header(fd,
                    serializer,
                    newline=_default_newline,
                    comment=_default_comment,
                    delimiter=None,
                    lineno_format=None,
                    lineno_names=None,
                    lineno_units=None,
                    use_astropy=False):
    r"""Discover ASCII table header info from a file.

    Args:
        fd (file): File object containing the table.
        serializer (DefaultSerialize): Serializer that should be updated with
            header information.
        newline (str, optional): Newline character that should be used to split
            header if it is not already a list. Defaults to _default_newline.
        comment (bytes, optional): String that should be used to mark the
            header lines. If not provided and not in format_str, defaults to
            _default_comment.
        delimiter (bytes, optional): String that should be used to separate
            columns. If not provided and not in format_str, defaults to
            _default_delimiter.
        lineno_format (int, optional): Line number where formats are located.
            If not provided, an attempt will be made to locate one.
        lineno_names (int, optional): Line number where field names are located.
            If not provided, an attempt will be made to locate one.
        lineno_units (int, optional): Line number where field units are located.
            If not provided, an attempt will be made to locate one.
        use_astropy (bool, optional): If True, astropy will be used to parse
            the table if it is installed. Defaults to False.

    """
    header_lines = []
    header_size = 0
    prev_pos = fd.tell()
    for line in fd:
        sline = line.replace(platform._newline, newline)
        if not sline.startswith(comment):
            break
        header_size += len(line)
        header_lines.append(sline)
    # Parse header & set serializer attributes
    header = parse_header(header_lines,
                          newline=newline,
                          lineno_format=lineno_format,
                          lineno_names=lineno_names,
                          lineno_units=lineno_units)
    # Override header with information set explicitly in serializer
    for k in serializer._oldstyle_kws:
        v = getattr(serializer, k, None)
        if v is not None:
            header[k] = v
    header.setdefault('format_str', None)
    if (delimiter is None) or ('format_str' in header):
        delimiter = header['delimiter']
    # Try to determine format from array without header
    str_fmt = b'%s'
    if ((header['format_str'] is None) or (str_fmt in header['format_str'])):
        fd.seek(prev_pos + header_size)
        all_contents = fd.read()
        if len(all_contents) == 0:  # pragma: debug
            return  # In case the file has not been written
        arr = table_to_array(all_contents,
                             names=header.get('field_names', None),
                             comment=comment,
                             delimiter=delimiter,
                             use_astropy=use_astropy)
        header['field_names'] = arr.dtype.names
        # Get format from array
        if header['format_str'] is None:
            header['format_str'] = table2format(arr.dtype,
                                                delimiter=delimiter,
                                                comment=b'',
                                                newline=header['newline'])
        # Determine maximum size of string field
        while str_fmt in header['format_str']:
            field_formats = extract_formats(header['format_str'])
            ifld = tools.bytes2str(
                header['field_names'][field_formats.index(str_fmt)])
            max_len = len(max(arr[ifld], key=len))
            new_str_fmt = b'%%%ds' % max_len
            header['format_str'] = header['format_str'].replace(
                str_fmt, new_str_fmt, 1)
    # Update serializer
    serializer.initialize_serializer(header)
    # Seek to just after the header
    fd.seek(prev_pos + header_size)
예제 #27
0
def table_to_array(msg,
                   fmt_str=None,
                   use_astropy=False,
                   names=None,
                   delimiter=None,
                   comment=None,
                   encoding='utf-8'):
    r"""Extract information from an ASCII table as an array.

    Args:
        msg (bytes): ASCII table as bytes string.
        fmt_str (bytes): Format string that should be used to parse the table.
            If not provided, this will attempt to determine the types of columns
            based on their contents.
        use_astropy (bool, optional): If True, astropy will be used to parse
            the table if it is installed. Defaults to False.
        names (list, optional): Field names that should be used for the
            structured data type of the output array. If not provided, names
            are generated based on the order of the fields in the table.
        delimiter (str, optional): String used to separate columns. Defaults to
            None and is not used. This is only used if fmt_str is not provided.
        comment (str, optional): String used to denote comments. Defaults to
            None and is not used. This is only used if fmt_str is not provided.
        encoding (str, optional): Encoding that should be used in Python 3 or
            higher to extract information from the message. Defaults to 'utf-8'.

    Returns:
        np.ndarray: Table contents as an array.
    
    """
    if not _use_astropy:
        use_astropy = False
    if fmt_str is None:
        dtype = None
        info = dict(delimiter=delimiter, comment=comment)
    else:
        dtype = cformat2nptype(fmt_str, names=names)
        info = format2table(fmt_str)
        names = dtype.names
    fd = sio.BytesIO(msg)
    if names is not None:
        names = tools.bytes2str(names, recurse=True)
    np_kws = dict()
    if info.get('delimiter', None) is not None:
        np_kws['delimiter'] = info['delimiter']
    if info.get('comment', None) is not None:
        np_kws['comments'] = info['comment']
    np_kws = tools.bytes2str(np_kws, recurse=True)
    if use_astropy:
        if 'comments' in np_kws:
            np_kws['comment'] = np_kws.pop('comments')
        tab = apy_ascii.read(fd,
                             names=names,
                             guess=True,
                             encoding=encoding,
                             format='no_header',
                             **np_kws)
        arr = tab.as_array()
        typs = [arr.dtype[i].str for i in range(len(arr.dtype))]
        cols = [c for c in tab.columns]
        # Convert type bytes if python 3
        new_typs = copy.copy(typs)
        convert = []
        for i in range(len(arr.dtype)):
            if np.issubdtype(arr.dtype[i], np.dtype('U')):
                new_typs[i] = 'S' + typs[i].split('U')[-1]
                convert.append(i)
        if convert:
            old_arr = arr
            new_dtyp = np.dtype([(c, t) for c, t in zip(cols, new_typs)])
            new_arr = np.zeros(arr.shape, new_dtyp)
            for i in range(len(arr.dtype)):
                if i in convert:
                    x = np.char.encode(old_arr[cols[i]], encoding='utf-8')
                    new_arr[cols[i]] = x
                else:
                    new_arr[cols[i]] = old_arr[cols[i]]
            arr = new_arr
            typs = new_typs
        # Convert complex type
        for i in range(len(arr.dtype)):
            if np.issubdtype(arr.dtype[i], np.dtype('S')):
                new_typs = copy.copy(typs)
                new_typs[i] = 'complex'
                new_dtyp = np.dtype([(c, t) for c, t in zip(cols, new_typs)])
                try:
                    arr = arr.astype(new_dtyp)
                except ValueError:
                    pass
        if dtype is not None:
            arr = arr.astype(dtype)
    else:
        np_ver = tuple([float(x) for x in (np.__version__).split('.')])
        np_kws.update(autostrip=True, dtype=None, names=names)
        if (np_ver >= (1.0, 14.0, 0.0)):
            np_kws['encoding'] = 'bytes'
        arr = np.genfromtxt(fd, **np_kws)
        if dtype is not None:
            arr = arr.astype(dtype)
    fd.close()
    return arr
예제 #28
0
def cformat2nptype(cfmt, names=None):
    r"""Convert a c format string to a numpy data type.

    Args:
        cfmt (str, bytes): c format that should be translated.
        names (list, optional): Names that should be assigned to fields in the
            format string if there is more than one. If not provided, names
            are generated based on the order of the format codes.

    Returns:
        np.dtype: Corresponding numpy data type.

    Raises:
        TypeError: if cfmt is not a string.
        ValueError: If the c format does not begin with '%'.
        ValueError: If the c format does not contain type info.
        ValueError: If the c format cannot be translated to a numpy datatype.

    """
    # TODO: this may fail on 32bit systems where C long types are 32 bit
    if not (isinstance(cfmt, list) or isinstance(cfmt, (str, bytes))):
        raise TypeError(
            "Input must be a string, bytes string, or list, not %s" %
            type(cfmt))
    if isinstance(cfmt, (str, bytes)):
        cfmt = tools.bytes2str(cfmt)
        fmt_list = extract_formats(cfmt)
        if len(fmt_list) == 0:
            raise ValueError("Could not locate any format codes in the " +
                             "provided format string (%s)." % cfmt)
    else:
        fmt_list = cfmt
    nfmt = len(fmt_list)
    if nfmt == 1:
        cfmt_str = fmt_list[0]
    else:
        dtype_list = [cformat2nptype(f) for f in fmt_list]
        if names is None:
            names = ['f%d' % i for i in range(nfmt)]
        elif len(names) != nfmt:
            raise ValueError(
                "Number of names does not match the number of fields.")
        else:
            names = tools.bytes2str(names, recurse=True)
        out = np.dtype(dict(names=names, formats=dtype_list))
        # out = np.dtype([(n, d) for n, d in zip(names, dtype_list)])
        return out
    out = None
    if cfmt_str[-1] in ['j']:
        out = 'complex128'
    elif cfmt_str[-1] in ['f', 'F', 'e', 'E', 'g', 'G']:
        # if 'hh' in cfmt_str:
        #     out = 'float8'
        # elif cfmt_str[-2] == 'h':
        #     out = 'float16'
        # elif 'll' in cfmt_str:
        #     out = 'longfloat'
        # elif cfmt_str[-2] == 'l':
        #     out = 'double'
        # else:
        #     out = 'single'
        out = 'float64'
    elif cfmt_str[-1] in ['d', 'i']:
        if 'hh' in cfmt_str:  # short short, single char
            out = 'int8'
        elif cfmt_str[-2] == 'h':  # short
            out = 'short'
        elif ('ll' in cfmt_str) or ('l64' in cfmt_str):
            out = 'longlong'  # long long
        elif cfmt_str[-2] == 'l':
            out = 'int_'  # long (broken in python)
        else:
            out = 'intc'  # int, platform dependent
    elif cfmt_str[-1] in ['u', 'o', 'x', 'X']:
        if 'hh' in cfmt_str:  # short short, single char
            out = 'uint8'
        elif cfmt_str[-2] == 'h':  # short
            out = 'ushort'
        elif ('ll' in cfmt_str) or ('l64' in cfmt_str):
            out = 'ulonglong'  # long long
        elif cfmt_str[-2] == 'l':
            out = 'uint64'  # long (broken in python)
        else:
            out = 'uintc'  # int, platform dependent
    elif cfmt_str[-1] in ['c', 's']:
        lstr = cfmt_str[1:-1]
        if lstr:
            lint = int(lstr)
        else:
            lint = 0
        lsiz = lint * np.dtype('S1').itemsize
        out = 'S%d' % lsiz
    else:
        raise ValueError("Could not find match for format str %s" % cfmt)
    return np.dtype(out)
예제 #29
0
    def update_typedef_from_oldstyle(self, typedef):
        r"""Update a given typedef using an old, table-style serialization spec.
        Existing typedef values are not overwritten and warnings are raised if the
        provided serialization spec is not compatible with the type definition.

        Args:
            typedef (dict): Type definition to update.

        Returns:
            dict: Updated typedef.

        """
        for k in self._oldstyle_kws:
            used = []
            updated = []
            v = self.extra_kwargs.get(k, getattr(self, k, None))
            if v is None:
                continue
            # Check status
            if ((k != 'format_str')
                    and (typedef.get('type', None) != 'array')):
                continue
            # Key specific changes to type
            if k == 'format_str':
                v = tools.bytes2str(v)
                fmts = serialize.extract_formats(v)
                if 'type' in typedef:
                    if (typedef.get('type', None) == 'array'):
                        assert (len(typedef.get('items', [])) == len(fmts))
                    # This continue is covered, but the optimization
                    # causes it to be skipped at runtime
                    # https://bitbucket.org/ned/coveragepy/issues/198/
                    # continue-marked-as-not-covered
                    continue  # pragma: no cover
                as_array = self.extra_kwargs.get(
                    'as_array', getattr(self, 'as_array', False))
                typedef.update(type='array', items=[])
                for i, fmt in enumerate(fmts):
                    nptype = self.cformat2nptype(fmt)
                    itype = OneDArrayMetaschemaType.encode_type(
                        np.ones(1, nptype))
                    itype = OneDArrayMetaschemaType.extract_typedef(itype)
                    if (fmt == '%s') and ('precision' in itype):
                        del itype['precision']
                    if as_array:
                        itype['type'] = '1darray'
                    else:
                        itype['type'] = itype.pop('subtype')
                        if (((itype['type'] in _flexible_types)
                             and ('precision' in itype))):
                            del itype['precision']
                    typedef['items'].append(itype)
                used.append('as_array')
                updated.append('format_str')
            elif k == 'as_array':
                # Can only be used in conjunction with format_str
                pass
            elif k in ['field_names', 'field_units']:
                v = tools.bytes2str(v, recurse=True)
                if k == 'field_names':
                    tk = 'title'
                else:
                    tk = 'units'
                if isinstance(typedef['items'], dict):
                    typedef['items'] = [
                        copy.deepcopy(typedef['items']) for _ in range(len(v))
                    ]
                assert (len(v) == len(typedef.get('items', [])))
                # if len(v) != len(typedef.get('items', [])):
                #     warnings.warn('%d %ss provided, but only %d items in typedef.'
                #                   % (len(v), k, len(typedef.get('items', []))))
                #     continue
                all_updated = True
                for iv, itype in zip(v, typedef.get('items', [])):
                    if tk in itype:
                        all_updated = False
                    itype.setdefault(tk, iv)
                if all_updated:
                    used.append(k)
                updated.append(
                    k)  # Won't change anything unless its an attribute
            else:  # pragma: debug
                raise ValueError(
                    "Unrecognized table-style specification keyword: '%s'." %
                    k)
            for rk in used:
                if rk in self.extra_kwargs:
                    del self.extra_kwargs[rk]
            for rk in updated:
                if rk in self.extra_kwargs:
                    self.extra_kwargs[rk] = v
                elif hasattr(self, rk):
                    setattr(self, rk, v)
        return typedef
예제 #30
0
def ygginfo():
    r"""Print information about yggdrasil installation."""
    from yggdrasil import __version__, tools, config, platform
    from yggdrasil.components import import_component
    lang_list = tools.get_installed_lang()
    prefix = '    '
    curr_prefix = ''
    vardict = [
        ('Location', os.path.dirname(__file__)), ('Version', __version__),
        ('Languages', ', '.join(lang_list)),
        ('Communication Mechanisms', ', '.join(tools.get_installed_comm())),
        ('Default Comm Mechanism', tools.get_default_comm()),
        ('Config File', config.usr_config_file)
    ]
    parser = argparse.ArgumentParser(
        description=
        'Display information about the current yggdrasil installation.')
    parser.add_argument(
        '--no-languages',
        action='store_true',
        dest='no_languages',
        help='Don\'t print information about individual languages.')
    parser.add_argument(
        '--verbose',
        action='store_true',
        help='Increase the verbosity of the printed information.')
    args = parser.parse_args()
    try:
        # Add language information
        if not args.no_languages:
            # Install languages
            vardict.append(('Installed Languages:', ''))
            curr_prefix += prefix
            for lang in sorted(lang_list):
                drv = import_component('model', lang)
                vardict.append((curr_prefix + '%s:' % lang.upper(), ''))
                curr_prefix += prefix
                if lang == 'executable':
                    vardict.append((curr_prefix + 'Location', ''))
                else:
                    exec_name = drv.language_executable()
                    if not os.path.isabs(exec_name):
                        exec_name = tools.which(exec_name)
                    vardict.append((curr_prefix + 'Location', exec_name))
                vardict.append(
                    (curr_prefix + 'Version', drv.language_version()))
                curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
            curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
            # Not installed languages
            vardict.append(("Languages Not Installed:", ''))
            curr_prefix += prefix
            for lang in tools.get_supported_lang():
                if lang in lang_list:
                    continue
                drv = import_component('model', lang)
                vardict.append((curr_prefix + '%s:' % lang.upper(), ''))
                curr_prefix += prefix
                vardict.append((curr_prefix + "Language Installed",
                                drv.is_language_installed()))
                vardict.append((curr_prefix + "Base Languages Installed",
                                drv.are_base_languages_installed()))
                if not drv.are_base_languages_installed():
                    vardict.append(
                        (curr_prefix + "Base Languages Not Installed", [
                            b for b in drv.base_languages if
                            (not import_component('model', b).is_installed())
                        ]))
                vardict.append((curr_prefix + "Dependencies Installed",
                                drv.are_dependencies_installed()))
                vardict.append((curr_prefix + "Interface Installed",
                                drv.is_interface_installed()))
                vardict.append(
                    (curr_prefix + "Comm Installed", drv.is_comm_installed()))
                vardict.append(
                    (curr_prefix + "Configured", drv.is_configured()))
                vardict.append((curr_prefix + "Disabled", drv.is_disabled()))
                curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
            curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
        # Add verbose information
        if args.verbose:
            # Conda info
            if os.environ.get('CONDA_PREFIX', ''):
                out = tools.bytes2str(
                    subprocess.check_output(['conda', 'info'])).strip()
                curr_prefix += prefix
                vardict.append((curr_prefix + 'Conda Info:',
                                "\n%s%s" % (curr_prefix + prefix,
                                            ("\n" + curr_prefix + prefix).join(
                                                out.splitlines(False)))))
                curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
            # R and reticulate info
            Rdrv = import_component("model", "R")
            if Rdrv.is_installed():
                env_reticulate = copy.deepcopy(os.environ)
                env_reticulate['RETICULATE_PYTHON'] = sys.executable
                # Stack size
                out = Rdrv.run_executable(["-e", "Cstack_info()"]).strip()
                vardict.append((curr_prefix + "R Cstack_info:",
                                "\n%s%s" % (curr_prefix + prefix,
                                            ("\n" + curr_prefix + prefix).join(
                                                out.splitlines(False)))))
                # Compilation tools
                interp = 'R'.join(Rdrv.get_interpreter().rsplit('Rscript', 1))
                vardict.append((curr_prefix + "R C Compiler:", ""))
                curr_prefix += prefix
                for x in ['CC', 'CFLAGS', 'CXX', 'CXXFLAGS']:
                    out = tools.bytes2str(
                        subprocess.check_output([interp, 'CMD', 'config',
                                                 x])).strip()
                    vardict.append((curr_prefix + x,
                                    "%s" % ("\n" + curr_prefix + prefix).join(
                                        out.splitlines(False))))
                curr_prefix = curr_prefix.rsplit(prefix, 1)[0]
                # Session info
                out = Rdrv.run_executable(["-e", "sessionInfo()"]).strip()
                vardict.append((curr_prefix + "R sessionInfo:",
                                "\n%s%s" % (curr_prefix + prefix,
                                            ("\n" + curr_prefix + prefix).join(
                                                out.splitlines(False)))))
                # Reticulate conda_list
                if os.environ.get('CONDA_PREFIX', ''):
                    out = Rdrv.run_executable([
                        "-e",
                        ("library(reticulate); "
                         "reticulate::conda_list()")
                    ],
                                              env=env_reticulate).strip()
                    vardict.append(
                        (curr_prefix + "R reticulate::conda_list():",
                         "\n%s%s" % (curr_prefix + prefix,
                                     ("\n" + curr_prefix + prefix).join(
                                         out.splitlines(False)))))
                # Windows python versions
                if platform._is_win:  # pragma: windows
                    out = Rdrv.run_executable([
                        "-e",
                        ("library(reticulate); "
                         "reticulate::py_versions_windows()")
                    ],
                                              env=env_reticulate).strip()
                    vardict.append(
                        (curr_prefix + "R reticulate::py_versions_windows():",
                         "\n%s%s" % (curr_prefix + prefix,
                                     ("\n" + curr_prefix + prefix).join(
                                         out.splitlines(False)))))
                # conda_binary
                if platform._is_win:  # pragma: windows
                    out = Rdrv.run_executable([
                        "-e",
                        ("library(reticulate); "
                         "conda <- reticulate:::conda_binary(\"auto\"); "
                         "system(paste(conda, \"info --json\"))")
                    ],
                                              env=env_reticulate).strip()
                    vardict.append(
                        (curr_prefix + "R reticulate::py_versions_windows():",
                         "\n%s%s" % (curr_prefix + prefix,
                                     ("\n" + curr_prefix + prefix).join(
                                         out.splitlines(False)))))
                # Reticulate py_config
                out = Rdrv.run_executable([
                    "-e", ("library(reticulate); "
                           "reticulate::py_config()")
                ],
                                          env=env_reticulate).strip()
                vardict.append((curr_prefix + "R reticulate::py_config():",
                                "\n%s%s" % (curr_prefix + prefix,
                                            ("\n" + curr_prefix + prefix).join(
                                                out.splitlines(False)))))
    finally:
        # Print things
        max_len = max(len(x[0]) for x in vardict)
        lines = []
        line_format = '%-' + str(max_len) + 's' + prefix + '%s'
        for k, v in vardict:
            lines.append(line_format % (k, v))
        logger.info("yggdrasil info:\n%s" % '\n'.join(lines))