Пример #1
0
    def __init__(self, p, collength=16, verbose=False):
        """
        Instantiates a MAP object corresponding to MAP files.

        The data_format is how the file is set up for use with reading and writing HDF5 tables. The dictionary
        key represents the name of the column, the first part of the value tuple represents where it is located
        when the line is split and the second part represents the datatype to use within the table.
        :param p: The full file path
        """
        self.data_format = {"chromosome": (0, StringCol(collength)), "identifier": (1, StringCol(collength)),
                            "distance": (2, StringCol(collength)), "position": (3, StringCol(collength))}
        DataType.__init__(self, p, "map", verbose)
Пример #2
0
    def __init__(self, p, collength=16, verbose=False):
        """
        Instantiates a MAP object corresponding to MAP files.

        The data_format is how the file is set up for use with reading and writing HDF5 tables. The dictionary
        key represents the name of the column, the first part of the value tuple represents where it is located
        when the line is split and the second part represents the datatype to use within the table.
        :param p: The full file path
        """
        self.data_format = {
            "chromosome": (0, StringCol(collength)),
            "identifier": (1, StringCol(collength)),
            "distance": (2, StringCol(collength)),
            "position": (3, StringCol(collength))
        }
        DataType.__init__(self, p, "map", verbose)
Пример #3
0
 def view(self, dtype=None, type_=None):
     if type_ is not None:
         dtype = type
     type_type = type(
         int)  # HACK to form an instance of 'type' (very confusing)
     if isinstance(dtype, type_type) and issubclass(dtype, np.ndarray):
         return super(ndarray, self).view(dtype)
     else:
         # TODO: Endianness changes are not supported here
         #         Consider building byteorder into DataType
         dtype_bf = DataType(dtype)
         dtype_np = np.dtype(dtype_bf.as_numpy_dtype())
         v = super(ndarray, self).view(dtype_np)
         v.bf.dtype = dtype_bf
         v._update_BFarray()
         return v
Пример #4
0
    def __init__(self, p, collength=16, writing=False, verbose=False):
        """
        Instantiates a PED object corresponding to MAP files.

        See MAP.py for data_format explanation.
        :param p: The full file path.
        """
        self.input = p
        self.data_format = {"family": (0, StringCol(collength)), "sample": (1, StringCol(collength)),
                            "paternal": (2, StringCol(collength)), "maternal": (3, StringCol(collength)),
                            "sex": (4, StringCol(collength)), "affection": (5, StringCol(collength))}
        if not writing:
            self.geno_length = self.get_geno_length()
            self.data_format["genotype"] = (6, StringCol(self.geno_length))
        else:
            self.data_format["genotype"] = (6, StringCol(4))
        DataType.__init__(self, p, "ped", verbose)
Пример #5
0
	def tensor(self): # TODO: This shouldn't be public
		if self._tensor is not None:
			return self._tensor
		header = self.header
		shape = header['_tensor']['shape']
		ringlet_shape, frame_shape = split_shape(shape)
		nringlet       = reduce(lambda x,y:x*y, ringlet_shape, 1)
		frame_nelement = reduce(lambda x,y:x*y, frame_shape,   1)
		dtype = header['_tensor']['dtype']
		nbit = DataType(dtype).itemsize_bits
		assert(nbit % 8 == 0)
		frame_nbyte = frame_nelement * nbit // 8
		self._tensor = {}
		self._tensor['dtype']         = DataType(dtype)
		self._tensor['ringlet_shape'] = ringlet_shape
		self._tensor['nringlet']      = nringlet
		self._tensor['frame_shape']   = frame_shape
		self._tensor['frame_nbyte']   = frame_nbyte
		self._tensor['dtype_nbyte']   = nbit // 8
		return self._tensor
Пример #6
0
    def __init__(self, p, collength=16, writing=False, verbose=False):
        """
        Instantiates a PED object corresponding to MAP files.

        See MAP.py for data_format explanation.
        :param p: The full file path.
        """
        self.input = p
        self.data_format = {
            "family": (0, StringCol(collength)),
            "sample": (1, StringCol(collength)),
            "paternal": (2, StringCol(collength)),
            "maternal": (3, StringCol(collength)),
            "sex": (4, StringCol(collength)),
            "affection": (5, StringCol(collength))
        }
        if not writing:
            self.geno_length = self.get_geno_length()
            self.data_format["genotype"] = (6, StringCol(self.geno_length))
        else:
            self.data_format["genotype"] = (6, StringCol(4))
        DataType.__init__(self, p, "ped", verbose)
Пример #7
0
    def data_view(self, dtype=np.uint8, shape=-1):
        itemsize = DataType(dtype).itemsize
        assert( self.size   % itemsize == 0 )
        assert( self.stride % itemsize == 0 )
        data_ptr = self._data_ptr
        #if self.sequence.ring.space == 'cuda':
        #    # TODO: See if can wrap this in something like PyCUDA's GPUArray
        #    #         Ideally actual GPUArray, but it doesn't appear to support wrapping pointers
        #    #           Could also try writing a custom GPUArray implem for this purpose
        #    return data_ptr
        span_size  = self.size
        stride     = self.stride
        #nringlet   = self.sequence.nringlet
        nringlet   = self.nringlet
        #print "******", span_size, stride, nringlet
        #BufferType = c_byte*(span_size*self.stride)
        # TODO: We should really map the actual ring memory space and index
        #         it with offset rather than mapping from the current pointer.
        BufferType = ctypes.c_byte * (nringlet * stride)
        data_buffer_ptr = ctypes.cast(data_ptr, ctypes.POINTER(BufferType))
        data_buffer     = data_buffer_ptr.contents
        #print len(data_buffer), (nringlet, span_size), (self.stride, 1)
        _shape   = (nringlet, span_size // itemsize)
        strides = (self.stride, itemsize) if nringlet > 1 else None
        #space   = self.sequence.ring.space
        space   = self.ring.space
        """
        if space != 'cuda':
            data_array = np.ndarray(shape=_shape, strides=strides,
                                    buffer=data_buffer, dtype=dtype)
        else:
            data_array = GPUArray(shape=_shape, strides=strides,
                                  buffer=data_ptr, dtype=dtype)
            data_array.flags['SPACE'] = space
        """

        data_array = ndarray(shape=_shape, strides=strides,
                             buffer=data_ptr, dtype=dtype,
                             space=space)

        # Note: This is a non-standard attribute
        #data_array.flags['SPACE'] = space
        if not self.writeable:
            data_array.flags['WRITEABLE'] = False
        if shape != -1:
            # TODO: Check that this still wraps the same memory
            data_array = data_array.reshape(shape)
        return data_array
Пример #8
0
 def __array_finalize__(self, obj):
     if obj is None:
         # Already initialized self.bf in __new__
         return
     # Initialize as view of existing array
     if isinstance(obj, ndarray):
         # Copy metadata from existing bf.ndarray
         self.bf = BFArrayInfo(obj.bf.space, obj.bf.dtype, obj.bf.native,
                               obj.bf.conjugated)
     else:
         # Generate metadata from existing np.ndarray
         space = str(Space(raw_get_space(obj.ctypes.data)))
         #dtype      = str(DataType(obj.dtype))
         # **TODO: Decide on bf.dtype being DataType vs. string (and same for space)
         dtype = DataType(obj.dtype)
         native = obj.dtype.isnative
         conjugated = False
         self.bf = BFArrayInfo(space, dtype, native, conjugated)
     self._update_BFarray()
Пример #9
0
    def __new__(cls,
                base=None,
                space=None,
                shape=None,
                dtype=None,
                buffer=None,
                offset=0,
                strides=None,
                native=None,
                conjugated=None):
        if isinstance(shape, int):
            shape = [shape]
        ownbuffer = None
        if base is not None:
            if (shape is not None or
                    # dtype is not None or
                    buffer is not None or offset != 0 or strides is not None
                    or native is not None):
                raise ValueError('Invalid combination of arguments when base '
                                 'is specified')
            if 'pycuda' in sys.modules:
                from pycuda.gpuarray import GPUArray as pycuda_GPUArray
                if isinstance(base, pycuda_GPUArray):
                    return ndarray.__new__(cls,
                                           space='cuda',
                                           buffer=int(base.gpudata),
                                           shape=base.shape,
                                           dtype=base.dtype,
                                           strides=base.strides,
                                           native=np.dtype(
                                               base.dtype).isnative)
            if dtype is not None:
                dtype = DataType(dtype)
            if space is None and dtype is None:
                if not isinstance(base, np.ndarray):
                    base = np.asarray(base)
                # TODO: This may not be a good idea
                # Create view of base array
                obj = base.view(cls)  # Note: This calls obj.__array_finalize__
                # Allow conjugated to be redefined
                if conjugated is not None:
                    obj.bf.conjugated = conjugated
                    obj._update_BFarray()
            else:
                if not isinstance(base, np.ndarray):
                    # Convert base to np.ndarray
                    if dtype is not None:
                        base = np.array(base,
                                        dtype=DataType(dtype).as_numpy_dtype())
                    else:
                        base = np.array(base)
                if not isinstance(base, ndarray) and dtype is not None:
                    base = base.astype(dtype.as_numpy_dtype())
                base = ndarray(base)  # View base as bf.ndarray
                if dtype is not None and base.bf.dtype != dtype:
                    raise TypeError('Unable to convert type %s to %s during '
                                    'array construction' %
                                    (base.bf.dtype, dtype))
                #base = base.view(cls
                #if dtype is not None:
                #    base = base.astype(DataType(dtype).as_numpy_dtype())
                if conjugated is None:
                    conjugated = base.bf.conjugated
                # Create copy of base array
                obj = ndarray.__new__(cls,
                                      space=space,
                                      shape=base.shape,
                                      dtype=base.bf.dtype,
                                      native=base.bf.native,
                                      conjugated=conjugated)
                copy_array(obj, base)
        else:
            # Create new array
            if dtype is None:
                dtype = 'f32'  # Default dtype
            dtype = DataType(dtype)
            if native is None:
                native = True  # Default byteorder
            if conjugated is None:
                conjugated = False  # Default unconjugated
            if strides is None:
                #itemsize = dtype.itemsize
                itemsize_bits = dtype.itemsize_bits
                # HACK to support 'packed' arrays, by folding the last
                #   dimension of the shape into the dtype.
                # TODO: Consider using bit strides when dtype < 8 bits
                #         It's hacky, but it may be worth it
                if itemsize_bits < 8:
                    pack_factor = 8 // itemsize_bits
                    if shape[-1] % pack_factor != 0 or not len(shape):
                        raise ValueError("Array cannot be packed")
                    shape = list(shape)
                    shape[-1] //= pack_factor
                    itemsize = 1
                else:
                    itemsize = itemsize_bits // 8

                if len(shape):
                    # This magic came from http://stackoverflow.com/a/32874295
                    strides = (itemsize * np.r_[
                        1, np.cumprod(shape[::-1][:-1], dtype=np.int64)][::-1])
                    strides = tuple(strides)
                else:
                    strides = tuple()
            nbyte = strides[0] * shape[0] if len(shape) else itemsize
            if buffer is None:
                # Allocate new buffer
                if space is None:
                    space = 'system'  # Default space
                if shape is None:
                    raise ValueError('Either buffer or shape must be '
                                     'specified')
                ownbuffer = raw_malloc(nbyte, space)
                buffer = ownbuffer
            else:
                if space is None:
                    #space = _get(_bf.bfGetSpace(buffer))
                    # TODO: raw_get_space should probably return string, and needs a better name
                    space = str(Space(raw_get_space(buffer)))
            # TODO: Should move np.dtype() into as_numpy_dtype?
            dtype_np = np.dtype(dtype.as_numpy_dtype())
            if not native:
                dtype_np = dtype_np.newbyteorder()
            data_buffer = _address_as_buffer(buffer, nbyte)
            obj = np.ndarray.__new__(cls, shape, dtype_np, data_buffer, offset,
                                     strides)
            obj.bf = BFArrayInfo(space, dtype, native, conjugated, ownbuffer)
            obj._update_BFarray()
        return obj