示例#1
0
def cast(pda: Union[pdarray, Strings],
         dt: Union[np.dtype, str]) -> Union[pdarray, Strings]:
    """
    Cast an array to another dtype.

    Parameters
    ----------
    pda : pdarray or Strings
        The array of values to cast
    dtype : np.dtype or str
        The target dtype to cast values to

    Returns
    -------
    pdarray or Strings
        Array of values cast to desired dtype

    Notes
    -----
    The cast is performed according to Chapel's casting rules and is NOT safe 
    from overflows or underflows. The user must ensure that the target dtype 
    has the precision and capacity to hold the desired result.
    
    Examples
    --------
    >>> ak.cast(ak.linspace(1.0,5.0,5), dt=ak.int64)
    array([1, 2, 3, 4, 5])    
    
    >>> ak.cast(ak.arange(0,5), dt=ak.float64).dtype
    dtype('float64')
    
    >>> ak.cast(ak.arange(0,5), dt=ak.bool)
    array([False, True, True, True, True])
    
    >>> ak.cast(ak.linspace(0,4,5), dt=ak.bool)
    array([False, True, True, True, True])
    """

    if isinstance(pda, pdarray):
        name = pda.name
        objtype = "pdarray"
    elif isinstance(pda, Strings):
        name = pda.entry.name
        objtype = "str"
    # typechecked decorator guarantees no other case

    dt = _as_dtype(dt)
    opt = ""
    cmd = "cast"
    args = "{} {} {} {}".format(name, objtype, dt.name, opt)
    repMsg = generic_msg(cmd=cmd, args=args)
    if dt.name.startswith("str"):
        return Strings.from_parts(*(type_cast(str, repMsg).split("+")))
    else:
        return create_pdarray(type_cast(str, repMsg))
示例#2
0
def array(a: Union[pdarray, np.ndarray, Iterable]) -> Union[pdarray, Strings]:
    """
    Convert a Python or Numpy Iterable to a pdarray or Strings object, sending 
    the corresponding data to the arkouda server. 

    Parameters
    ----------
    a : Union[pdarray, np.ndarray]
        Rank-1 array of a supported dtype

    Returns
    -------
    pdarray or Strings
        A pdarray instance stored on arkouda server or Strings instance, which
        is composed of two pdarrays stored on arkouda server
        
    Raises
    ------
    TypeError
        Raised if a is not a pdarray, np.ndarray, or Python Iterable such as a
        list, array, tuple, or deque
    RuntimeError
        Raised if a is not one-dimensional, nbytes > maxTransferBytes, a.dtype is
        not supported (not in DTypes), or if the product of a size and
        a.itemsize > maxTransferBytes
    ValueError
        Raised if the returned message is malformed or does not contain the fields
        required to generate the array.

    See Also
    --------
    pdarray.to_ndarray

    Notes
    -----
    The number of bytes in the input array cannot exceed `arkouda.maxTransferBytes`,
    otherwise a RuntimeError will be raised. This is to protect the user
    from overwhelming the connection between the Python client and the arkouda
    server, under the assumption that it is a low-bandwidth connection. The user
    may override this limit by setting ak.maxTransferBytes to a larger value, 
    but should proceed with caution.
    
    If the pdrray or ndarray is of type U, this method is called twice recursively 
    to create the Strings object and the two corresponding pdarrays for string 
    bytes and offsets, respectively.

    Examples
    --------
    >>> ak.array(np.arange(1,10))
    array([1, 2, 3, 4, 5, 6, 7, 8, 9])
    
    >>> ak.array(range(1,10))
    array([1, 2, 3, 4, 5, 6, 7, 8, 9])
   
    >>> strings = ak.array(['string {}'.format(i) for i in range(0,5)])
    >>> type(strings)
    <class 'arkouda.strings.Strings'>  
    """
    # If a is already a pdarray, do nothing
    if isinstance(a, pdarray):
        return a
    from arkouda.client import maxTransferBytes
    # If a is not already a numpy.ndarray, convert it
    if not isinstance(a, np.ndarray):
        try:
            a = np.array(a)
        except:
            raise TypeError(
                ('a must be a pdarray, np.ndarray, or convertible to' +
                 ' a numpy array'))
    # Only rank 1 arrays currently supported
    if a.ndim != 1:
        raise RuntimeError("Only rank-1 pdarrays or ndarrays supported")
    # Check if array of strings
    if 'U' in a.dtype.kind:
        # encode each string and add a null byte terminator
        encoded = [
            i for i in itertools.chain.from_iterable(
                map(lambda x: x.encode() + b"\x00", a))
        ]
        nbytes = len(encoded)
        if nbytes > maxTransferBytes:
            raise RuntimeError(
                ("Creating pdarray would require transferring {} bytes," +
                 " which exceeds allowed transfer size. Increase " +
                 "ak.maxTransferBytes to force.").format(nbytes))
        encoded_np = np.array(encoded, dtype=np.uint8)
        args = f"{encoded_np.dtype.name} {encoded_np.size} seg_string={True}"
        rep_msg = generic_msg(cmd='array',
                              args=args,
                              payload=_array_memview(encoded_np),
                              send_binary=True)
        parts = cast(str, rep_msg).split('+', maxsplit=3)
        return Strings.from_parts(parts[0], parts[1])

    # If not strings, then check that dtype is supported in arkouda
    if a.dtype.name not in DTypes:
        raise RuntimeError("Unhandled dtype {}".format(a.dtype))
    # Do not allow arrays that are too large
    size = a.size
    if (size * a.itemsize) > maxTransferBytes:
        raise RuntimeError(("Array exceeds allowed transfer size. Increase " +
                            "ak.maxTransferBytes to allow"))
    # Pack binary array data into a bytes object with a command header
    # including the dtype and size. If the server has a different byteorder
    # than our numpy array we need to swap to match since the server expects
    # native endian bytes
    aview = _array_memview(a)
    args = f"{a.dtype.name} {size} seg_strings={False}"
    rep_msg = generic_msg(cmd='array',
                          args=args,
                          payload=aview,
                          send_binary=True)
    return create_pdarray(rep_msg)