Python DataShape примеры, datashape.DataShape Python примеры использования

Пример #1

0

Показать файл

 def dshape(self):
     measure = Record(list(zip(self.names,
                               [v._dtype for v in self.values])))
     if self.keepdims:
         return DataShape(*((1,) * self._child.ndim + (measure,)))
     else:
         return DataShape(measure)

Пример #2

0

Показать файл

def Data(data,
         dshape=None,
         name=None,
         fields=None,
         columns=None,
         schema=None,
         **kwargs):
    sub_uri = ''
    if isinstance(data, _strtypes):
        if '::' in data:
            data, sub_uri = data.split('::')
        data = resource(data,
                        schema=schema,
                        dshape=dshape,
                        columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator)
            and not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if columns:
        warnings.warn("columns kwarg deprecated.  Use fields instead",
                      DeprecationWarning)
    if columns and not fields:
        fields = columns
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure, ) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema, )))
        elif isrecord(dshape.measure) and fields:
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))

    ds = datashape.dshape(dshape)

    name = name or next(names)
    result = InteractiveSymbol(data, ds, name)

    if sub_uri:
        for field in sub_uri.split('/'):
            if field:
                result = result[field]

    return result

Пример #3

0

Показать файл

Файл: interactive.py Проект: wegamekinglc/blaze

def Data(data,
         dshape=None,
         name=None,
         fields=None,
         columns=None,
         schema=None,
         **kwargs):
    if columns:
        raise ValueError("columns argument deprecated, use fields instead")
    if schema and dshape:
        raise ValueError("Please specify one of schema= or dshape= keyword"
                         " arguments")

    if isinstance(data, InteractiveSymbol):
        return Data(data.data, dshape, name, fields, columns, schema, **kwargs)

    if isinstance(data, _strtypes):
        data = resource(data,
                        schema=schema,
                        dshape=dshape,
                        columns=columns,
                        **kwargs)
    if (isinstance(data, Iterator)
            and not isinstance(data, tuple(not_an_iterator))):
        data = tuple(data)
    if schema and not dshape:
        dshape = var * schema
    if dshape and isinstance(dshape, _strtypes):
        dshape = datashape.dshape(dshape)
    if not dshape:
        dshape = discover(data)
        types = None
        if isinstance(dshape.measure, Tuple) and fields:
            types = dshape[1].dshapes
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))
        elif isscalar(dshape.measure) and fields:
            types = (dshape.measure, ) * int(dshape[-2])
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape[:-1] + (schema, )))
        elif isrecord(dshape.measure) and fields:
            ds = discover(data)
            assert isrecord(ds.measure)
            names = ds.measure.names
            if names != fields:
                raise ValueError(
                    'data column names %s\n'
                    '\tnot equal to fields parameter %s,\n'
                    '\tuse Data(data).relabel(%s) to rename '
                    'fields' %
                    (names, fields, ', '.join('%s=%r' % (k, v)
                                              for k, v in zip(names, fields))))
            types = dshape.measure.types
            schema = Record(list(zip(fields, types)))
            dshape = DataShape(*(dshape.shape + (schema, )))

    ds = datashape.dshape(dshape)
    return InteractiveSymbol(data, ds, name)

Пример #4

0

Показать файл

    def __init__(self,
                 data,
                 dshape=None,
                 name=None,
                 fields=None,
                 columns=None,
                 schema=None,
                 **kwargs):
        if isinstance(data, _strtypes):
            data = resource(data,
                            schema=schema,
                            dshape=dshape,
                            columns=columns,
                            **kwargs)
        if columns:
            warnings.warn("columns kwarg deprecated.  Use fields instead",
                          DeprecationWarning)
        if columns and not fields:
            fields = columns
        if schema and dshape:
            raise ValueError("Please specify one of schema= or dshape= keyword"
                             " arguments")
        if schema and not dshape:
            dshape = var * schema
        if dshape and isinstance(dshape, _strtypes):
            dshape = datashape.dshape(dshape)
        if not dshape:
            dshape = discover(data)
            types = None
            if isinstance(dshape.measure, Tuple) and fields:
                types = dshape[1].dshapes
                schema = Record(list(zip(fields, types)))
                dshape = DataShape(*(dshape.shape + (schema, )))
            elif isscalar(dshape.measure) and fields:
                types = (dshape.measure, ) * int(dshape[-2])
                schema = Record(list(zip(fields, types)))
                dshape = DataShape(*(dshape.shape + (schema, )))
            elif isrecord(dshape.measure) and fields:
                types = dshape.measure.types
                schema = Record(list(zip(fields, types)))
                dshape = DataShape(*(dshape.shape + (schema, )))

        self.dshape = datashape.dshape(dshape)

        self.data = data

        if (hasattr(data, 'schema') and isinstance(data.schema,
                                                   (DataShape, str, unicode))
                and self.schema != data.schema):
            raise TypeError('%s schema %s does not match %s schema %s' %
                            (type(data).__name__, data.schema,
                             type(self).__name__, self.schema))

        self._name = name or next(names)

Пример #5

0

Показать файл

Файл: h5py.py Проект: quasiben/odo

def discover_h5py_dataset(d):
    dshape = datashape.from_numpy(d.shape, d.dtype)
    shape, measure = dshape.shape, dshape.measure
    if not isrecord(measure):
        if dshape == datashape.object_:
            args = shape + (datashape.string, )
            return DataShape(*args)
        return dshape
    else:
        records = list(
            record_dshape_replace(measure, datashape.object_,
                                  datashape.string))
        args = shape + (datashape.Record(records), )
        return DataShape(*args)

Пример #6

0

Показать файл

Файл: dynd.py Проект: debugger22/into

def list_to_dynd(L, **kwargs):
    ds = kwargs['dshape']
    if isinstance(ds.measure, Tuple):
        measure = Record([['f%d' % i, typ]
                          for i, typ in enumerate(ds.measure.parameters[0])])
        ds = DataShape(*(ds.shape + (measure, )))
    return nd.array(L, dtype=str(ds))

Пример #7

0

Показать файл

Файл: dask.py Проект: snmz216/blaze

def compute_up(expr, data, **kwargs):
    leaf = expr._leaves()[0]
    chunk = symbol(
        'chunk',
        DataShape(*(tuple(map(first, data.chunks)) + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = split(expr._child,
                                                 expr,
                                                 chunk=chunk)

    inds = tuple(range(ndim(leaf)))
    dtype = expr.dshape.measure.to_numpy_dtype()
    tmp = atop(
        curry(compute_it, chunk_expr, [chunk], **kwargs),
        inds,
        data,
        inds,
        dtype=dtype,
    )

    return atop(
        compose(
            curry(compute_it, agg_expr, [agg], **kwargs),
            curry(_concatenate2, axes=expr.axis),
        ),
        tuple(i for i in inds if i not in expr.axis),
        tmp,
        inds,
        dtype=dtype,
    )

Пример #8

0

Показать файл

 def _schema(self):
     schema = self._child.schema[0]
     if isinstance(schema, Record) and len(schema.types) == 1:
         result = toolz.first(schema.types)
     else:
         result = schema
     return DataShape(result)

Пример #9

0

Показать файл

 def __init__(self, name, dshape):
     self._name = name
     if isinstance(dshape, _strtypes):
         dshape = datashape.dshape(dshape)
     if isinstance(dshape, Mono) and not isinstance(dshape, DataShape):
         dshape = DataShape(dshape)
     self.dshape = dshape

Пример #10

0

Показать файл

    def dshape(self):
        shape = self._child.dshape.shape
        schema = self._child.dshape.measure.dict[self._name]

        shape = shape + schema.shape
        schema = (schema.measure, )
        return DataShape(*(shape + schema))

Пример #11

0

Показать файл

Файл: sql.py Проект: jimmyaspire/odo

def discover(metadata):
    try:
        metadata.reflect(views=metadata.bind.dialect.supports_views)
    except NotImplementedError:
        metadata.reflect()
    pairs = []
    for table in sorted(metadata.tables.values(), key=attrgetter('name')):
        name = table.name
        try:
            pairs.append([name, discover(table)])
        except sa.exc.CompileError as e:
            warnings.warn(
                "Can not discover type of table {name}.\n"
                "SQLAlchemy provided this error message:\n\t{msg}"
                "\nSkipping.".format(
                    name=name,
                    msg=e.message,
                ),
                stacklevel=3,
            )
        except NotImplementedError as e:
            warnings.warn(
                "Odo does not understand a SQLAlchemy type.\n"
                "Odo provided the following error:\n\t{msg}"
                "\nSkipping.".format(msg="\n\t".join(e.args)),
                stacklevel=3,
            )
    return DataShape(Record(pairs))

Пример #12

0

Показать файл

    def _dshape(self):
        '''
        since pandas supports concat for string columns, do the same for blaze
        '''
        shape = self.lhs.dshape.shape
        if isinstance(self.lhs.schema.measure, Option):
            schema = self.lhs.schema
        elif isinstance(self.rhs.schema.measure, Option):
            schema = self.rhs.schema
        else:
            _, lhs_encoding = self.lhs.schema.measure.parameters
            _, rhs_encoding = self.rhs.schema.measure.parameters
            assert lhs_encoding == rhs_encoding
            # convert fixed length string to variable length string
            schema = DataShape(String(None, lhs_encoding))

        return DataShape(*(shape + (schema, )))

Пример #13

0

Показать файл

    def schema(self):
        subs = dict(self.labels)
        d = self._child.dshape.measure.dict

        return DataShape(
            Record([[subs.get(name, name), dtype]
                    for name, dtype in self._child.dshape.measure.parameters[0]
                    ]))

Пример #14

0

Показать файл

Файл: collections.py Проект: wegamekinglc/blaze

 def dshape(self):
     axis = self.axis
     ldshape = self.lhs.dshape
     lshape = ldshape.shape
     return DataShape(
         *(lshape[:axis] +
           (_shape_add(lshape[axis], self.rhs.dshape.shape[axis]), ) +
           lshape[axis + 1:] + (ldshape.measure, )))

Пример #15

0

Показать файл

 def _schema(self):
     measure = self._child.schema.measure
     base = getattr(measure, 'ty', measure)
     return_type = Option if isinstance(measure, Option) else toolz.identity
     return DataShape(
         return_type(
             base if isinstance(base, Decimal) else
             base if isinstance(base, TimeDelta) else ct.float64, ))

Пример #16

0

Показать файл

Файл: json.py Проект: pskyp/shareapplication

def date_to_datetime_dshape(ds):
    shape = ds.shape
    if isinstance(ds.measure, Record):
        measure = Record([[name, ct.datetime_ if typ == ct.date_ else typ]
                          for name, typ in ds.measure.parameters[0]])
    else:
        measure = ds.measure
    return DataShape(*(shape + (measure, )))

Пример #17

0

Показать файл

Файл: constructors.py Проект: xsixing/blaze

def sql_table(table, colnames, measures, conn):
    """
    Create a new blaze Array from an SQL table description. This returns
    a Record array.
    """
    dtype = Record(list(zip(colnames, measures)))
    record_dshape = DataShape(coretypes.Var(), dtype)
    table = TableSelection(table, '*')
    return Array(SQLDataDescriptor(record_dshape, table, conn))

Пример #18

0

Показать файл

 def _dshape(self):
     axis = self.axis
     if self.keepdims:
         shape = tuple(1 if i in axis else d
                       for i, d in enumerate(self._child.shape))
     else:
         shape = tuple(d for i, d in enumerate(self._child.shape)
                       if i not in axis)
     return DataShape(*(shape + (self.schema, )))

Пример #19

0

Показать файл

    def _dshape(self):
        shape = self._child.dshape.shape
        measure = self._child.dshape.measure

        # TODO: is this too special-case-y?
        schema = getattr(measure, 'value', measure).dict[self._name]

        shape = shape + schema.shape
        schema = (schema.measure,)
        return DataShape(*(shape + schema))

Пример #20

0

Показать файл

def fsql(engine, fcsv, name):
    dshape = discover(fcsv)
    dshape = DataShape(
        var, Record([(n, typ) for n, typ in zip('ab', dshape.measure.types)]))
    try:
        t = resource('%s::%s' % (url, name), dshape=dshape)
    except sqlalchemy.exc.OperationalError as e:
        pytest.skip(str(e))
    else:
        yield t
        drop(t)

Пример #21

0

Показать файл

 def _dshape(self):
     axis = self.axis
     if self.keepdims:
         shape = tuple(1 if i in axis else d
                       for i, d in enumerate(self._child.shape))
     else:
         shape = tuple(d for i, d in enumerate(self._child.shape)
                       if i not in axis)
     measure = Record(list(zip(self.names,
                               [v.schema for v in self.values])))
     return DataShape(*(shape + (measure, )))

Пример #22

0

Показать файл

    def dshape(self):
        # Compute shape
        shape = tuple([d for i, d in enumerate(self.lhs.shape)
                         if i not in self._left_axes] +
                      [d for i, d in enumerate(self.rhs.shape)
                         if i not in self._right_axes])

        # Compute measure by mimicking a mul and add
        l = symbol('l', self.lhs.dshape.measure)
        r = symbol('r', self.rhs.dshape.measure)
        measure = ((l * r) + (l * r)).dshape.measure

        return DataShape(*(shape + (measure,)))

Пример #23

0

Показать файл

def discover(metadata):
    metadata.reflect(views=metadata.bind.dialect.supports_views)
    pairs = []
    for name, table in sorted(metadata.tables.items(), key=first):
        try:
            pairs.append([name, discover(table)])
        except sa.exc.CompileError as e:
            print("Can not discover type of table %s.\n" % name +
                "SQLAlchemy provided this error message:\n\t%s" % e.message +
                "\nSkipping.")
        except NotImplementedError as e:
            print("Blaze does not understand a SQLAlchemy type.\n"
                "Blaze provided the following error:\n\t%s" % e.message +
                "\nSkipping.")
    return DataShape(Record(pairs))

Пример #24

0

Показать файл

Файл: datadescriptor.py Проект: DarshanKumar89/Delivery-Optimization

def column_dshape(dshape, colname):
    """
    Given a record dshape, project a column out
    """
    rec = dshape.measure

    if not isinstance(rec, Record):
        raise TypeError("Can only select fields from record type")
    if colname not in rec.fields:
        raise ValueError("No such field %r" % (colname, ))

    measure = rec.fields[colname]
    params = list(dshape.shape) + [measure]
    dshape = DataShape(*params)

    return dshape

Пример #25

0

Показать файл

def compute_down(expr, data, **kwargs):
    """ Compute expressions on H5Py datasets by operating on chunks

    This uses blaze.expr.split to break a full-array-computation into a
    per-chunk computation and a on-aggregate computation.

    This uses blaze.partition to pick out chunks from the h5py dataset, uses
    compute(numpy) to compute on each chunk and then uses blaze.partition to
    aggregate these (hopefully smaller) intermediate results into a local
    numpy array.  It then performs a second operation (again given by
    blaze.expr.split) on this intermediate aggregate

    The expression must contain some sort of Reduction.  Both the intermediate
    result and the final result are assumed to fit into memory
    """
    leaf = expr._leaves()[0]
    if not any(isinstance(node, Reduction) for node in path(expr, leaf)):
        raise MDNotImplementedError()

    # Compute chunksize (this should be improved)
    chunksize = kwargs.get('chunksize', data.chunks)

    # Split expression into per-chunk and on-aggregate pieces
    chunk = Symbol('chunk', DataShape(*(chunksize + (leaf.dshape.measure, ))))
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(leaf, expr, chunk=chunk)

    # Create numpy array to hold intermediate aggregate
    shape, dtype = to_numpy(agg.dshape)
    intermediate = np.empty(shape=shape, dtype=dtype)

    # Compute partitions
    data_partitions = partitions(data, chunksize=chunksize)
    int_partitions = partitions(intermediate, chunksize=chunk_expr.shape)

    # For each partition, compute chunk->chunk_expr
    # Insert into intermediate
    # This could be parallelized
    for d, i in zip(data_partitions, int_partitions):
        chunk_data = partition_get(data, d, chunksize=chunksize)
        result = compute(chunk_expr, {chunk: chunk_data})
        partition_set(intermediate, i, result, chunksize=chunk_expr.shape)

    # Compute on the aggregate
    return compute(agg_expr, {agg: intermediate})

Пример #26

0

Показать файл

def compute_down(expr, data, map=None, **kwargs):
    """ Compute expressions on H5Py datasets by operating on chunks

    This uses blaze.expr.split to break a full-array-computation into a
    per-chunk computation and a on-aggregate computation.

    This uses blaze.partition to pick out chunks from the h5py dataset, uses
    compute(numpy) to compute on each chunk and then uses blaze.partition to
    aggregate these (hopefully smaller) intermediate results into a local
    numpy array.  It then performs a second operation (again given by
    blaze.expr.split) on this intermediate aggregate

    The expression must contain some sort of Reduction.  Both the intermediate
    result and the final result are assumed to fit into memory
    """
    map = _get_map(map)

    leaf = expr._leaves()[0]
    if not any(isinstance(node, Reduction) for node in path(expr, leaf)):
        raise MDNotImplementedError()

    # Compute chunksize (this should be improved)
    chunksize = kwargs.get('chunksize', data.chunks)

    # Split expression into per-chunk and on-aggregate pieces
    chunk = symbol('chunk', DataShape(*(chunksize + (leaf.dshape.measure,))))
    (chunk, chunk_expr), (agg, agg_expr) = \
            split(leaf, expr, chunk=chunk)

    # Create numpy array to hold intermediate aggregate
    shape, dtype = to_numpy(agg.dshape)
    intermediate = np.empty(shape=shape, dtype=dtype)

    # Compute partitions
    source_parts = list(partitions(data, chunksize=chunksize, keepdims=True))
    target_parts = list(partitions(intermediate, chunksize=chunk_expr.shape,
                                   keepdims=True))

    list(map(
        curry(compute_chunk, data, intermediate, chunk, chunk_expr),
        zip(source_parts, target_parts)
    ))

    # Compute on the aggregate
    return compute(agg_expr, {agg: intermediate}, return_type='native')

Пример #27

0

Показать файл

Файл: datadescriptor.py Проект: xsixing/blaze

    def dynd_arr(self):
        # TODO: This should really use blz
        if self._dynd_result is not None:
            return self._dynd_result

        # Allocate empty dynd array
        length = sum(len(chunk) for chunk in self.query_result)
        ds = DataShape(length, self.dshape.measure)
        result = nd.empty(str(ds))

        # Fill dynd array with chunks
        offset = 0
        for chunk in self.query_result:
            result[offset:offset + len(chunk)] = chunk
            offset += len(chunk)

        self._dynd_result = result
        return result

Пример #28

0

Показать файл

def dynd_chunk_iterator(result, chunk_size=1024):
    """
    Turn a query Result into a bunch of DyND arrays
    """
    cursor = result.cursor

    chunk_size = max(cursor.arraysize, chunk_size)
    while True:
        try:
            results = cursor.fetchmany(chunk_size)
        except db.Error:
            break

        if not results:
            break

        dshape = DataShape(len(results), result.dshape.measure)
        chunk = nd.empty(str(dshape))
        chunk[:] = list(iter_result(results, dshape))
        yield chunk

Пример #29

0

Показать файл

def coalesce(a, b):
    a_dshape = discover(a)
    a_measure = a_dshape.measure
    isoption = isinstance(a_measure, Option)
    if isoption:
        a_measure = a_measure.ty
    isnull = isinstance(a_measure, Null)
    if isnull:
        # a is always null, this is just b
        return b

    if not isoption:
        # a is not an option, this is just a
        return a

    b_dshape = discover(b)
    return Coalesce(a, b, DataShape(*(
        maxshape((a_dshape.shape, b_dshape.shape)) +
        (promote(a_measure, b_dshape.measure),)
    )))

Пример #30

0

Показать файл

def sql_table(table_name, colnames, measures, conn):
    """
    Create a new blaze Array from an SQL table description. This returns
    a Record array.

    Parameters
    ==========

    table_name: str
        table name

    colnames: [str]
        column names

    measures: [DataShape]
        measure (element type) for each column

    conn: pyodbc/whatever Connection
    """
    dtype = Record(list(zip(colnames, measures)))
    record_dshape = DataShape(coretypes.Var(), dtype)
    table = TableSelection(table_name, '*')
    return Array(SQL_DDesc(record_dshape, table, conn))

Python DataShape примеры использования