Exemplo n.º 1
0
def _normalize_domain(domain=None, limit=None, schema=None):
    if not domain:
        return Domain(type="default", limit=limit)
    elif isinstance(domain, _Column):
        if domain.partitions and domain.multi <= 1:  # MULTI FIELDS ARE TUPLES, AND THERE ARE TOO MANY POSSIBLE COMBOS AT THIS TIME
            return SetDomain(partitions=domain.partitions.left(limit))
        else:
            return DefaultDomain(type="default", limit=limit)
    elif isinstance(domain, Dimension):
        return domain.getDomain()
    elif schema and is_text(domain) and schema[domain]:
        return schema[domain].getDomain()
    elif isinstance(domain, Domain):
        return domain

    if not domain.name:
        domain = domain.copy()
        domain.name = domain.type

    return Domain(**domain)
Exemplo n.º 2
0
def _normalize_domain(domain=None, limit=None, schema=None):
    if not domain:
        return Domain(type="default", limit=limit)
    elif isinstance(domain, _Column):
        if domain.partitions:
            return SetDomain(partitions=domain.partitions.left(limit))
        else:
            return DefaultDomain(type="default", limit=limit)
    elif isinstance(domain, Dimension):
        return domain.getDomain()
    elif schema and isinstance(domain, text_type) and schema[domain]:
        return schema[domain].getDomain()
    elif isinstance(domain, Domain):
        return domain

    if not domain.name:
        domain = domain.copy()
        domain.name = domain.type

    return Domain(**domain)
Exemplo n.º 3
0
def _normalize_edge(edge, dim_index, limit, schema=None):
    """
    :param edge: Not normalized edge
    :param dim_index: Dimensions are ordered; this is this edge's index into that order
    :param schema: for context
    :return: a normalized edge
    """
    if not _Column:
        _late_import()

    if not edge:
        Log.error("Edge has no value, or expression is empty")
    elif is_text(edge):
        if schema:
            leaves = unwraplist(list(schema.leaves(edge)))
            if not leaves or is_container(leaves):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(None, limit))
                ]
            elif isinstance(leaves, _Column):
                return [
                    Data(name=edge,
                         value=jx_expression(edge, schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=_normalize_domain(domain=leaves,
                                                  limit=limit,
                                                  schema=schema))
                ]
            elif is_list(leaves.fields) and len(leaves.fields) == 1:
                return [
                    Data(name=leaves.name,
                         value=jx_expression(leaves.fields[0], schema=schema),
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
            else:
                return [
                    Data(name=leaves.name,
                         allowNulls=True,
                         dim=dim_index,
                         domain=leaves.getDomain())
                ]
        else:
            return [
                Data(name=edge,
                     value=jx_expression(edge, schema=schema),
                     allowNulls=True,
                     dim=dim_index,
                     domain=DefaultDomain())
            ]
    else:
        edge = wrap(edge)
        if not edge.name and not is_text(edge.value):
            Log.error("You must name compound and complex edges: {{edge}}",
                      edge=edge)

        if is_container(edge.value) and not edge.domain:
            # COMPLEX EDGE IS SHORT HAND
            domain = _normalize_domain(schema=schema)
            domain.dimension = Data(fields=edge.value)

            return [
                Data(name=edge.name,
                     value=jx_expression(edge.value, schema=schema),
                     allowNulls=bool(coalesce(edge.allowNulls, True)),
                     dim=dim_index,
                     domain=domain)
            ]

        domain = _normalize_domain(edge.domain, schema=schema)

        return [
            Data(name=coalesce(edge.name, edge.value),
                 value=jx_expression(edge.value, schema=schema),
                 range=_normalize_range(edge.range),
                 allowNulls=bool(coalesce(edge.allowNulls, True)),
                 dim=dim_index,
                 domain=domain)
        ]
Exemplo n.º 4
0
    def __new__(cls, e=None, query=None, *args, **kwargs):
        e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            # if query.groupby:
            #     return object.__new__(DefaultDecoder, e)

            if isinstance(e.value, (text_type, binary_type)):
                Log.error("Expecting Variable or Expression, not plain string")

            if isinstance(e.value, LeavesOp):
                return object.__new__(ObjectDecoder, e)
            elif isinstance(e.value, TupleOp):
                # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
                # JUST PULL THE FIELDS
                if not all(isinstance(t, Variable) for t in e.value.terms):
                    Log.error("Can only handle variables in tuples")

                e.domain = Data(
                    dimension={"fields": e.value.terms}
                )
                return object.__new__(DimFieldListDecoder, e)

            elif isinstance(e.value, Variable):
                schema = query.frum.schema
                cols = schema.leaves(e.value.var)
                if not cols:
                    return object.__new__(DefaultDecoder, e)
                if len(cols) != 1:
                    return object.__new__(ObjectDecoder, e)
                col = cols[0]
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.partitions != None:
                    if col.multi > 1 and len(col.partitions) < 6:
                        return object.__new__(MultivalueDecoder)

                    partitions = col.partitions[:limit:]
                    if e.domain.sort==-1:
                        partitions = list(reversed(sorted(partitions)))
                    else:
                        partitions = sorted(partitions)
                    e.domain = SimpleSetDomain(partitions=partitions, limit=limit)
                else:
                    e.domain = set_default(DefaultDomain(limit=limit), e.domain.__data__())
                    return object.__new__(DefaultDecoder, e)

            else:
                return object.__new__(DefaultDecoder, e)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder, e)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder, e)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder, e)
        if e.range:
            return object.__new__(GeneralRangeDecoder, e)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder, e)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder, e)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if isinstance(fields, Mapping):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder, e)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder, e)
        else:
            Log.error("domain type of {{type}} is not supported yet", type=e.domain.type)
Exemplo n.º 5
0
    def __new__(cls, e=None, query=None, *args, **kwargs):
        e.allowNulls = coalesce(e.allowNulls, True)

        if e.value and e.domain.type == "default":
            # if query.groupby:
            #     return object.__new__(DefaultDecoder, e)

            if is_text(e.value):
                Log.error("Expecting Variable or Expression, not plain string")

            if is_op(e.value, LeavesOp):
                return object.__new__(ObjectDecoder)
            elif is_op(e.value, TupleOp):
                # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
                # JUST PULL THE FIELDS
                if not all(is_op(t, Variable) for t in e.value.terms):
                    Log.error("Can only handle variables in tuples")

                e.domain = Data(dimension={"fields": e.value.terms})
                return object.__new__(DimFieldListDecoder)

            elif is_op(e.value, Variable):
                schema = query.frum.schema
                cols = schema.leaves(e.value.var)
                if not cols:
                    return object.__new__(DefaultDecoder)
                if len(cols) != 1:
                    return object.__new__(ObjectDecoder)
                col = first(cols)
                limit = coalesce(e.domain.limit, query.limit, DEFAULT_LIMIT)

                if col.cardinality == None:
                    DEBUG and Log.warning(
                        "metadata for column {{name|quote}} (id={{id}}) is not ready",
                        name=concat_field(col.es_index, col.es_column),
                        id=id(col))
                    e.domain = set_default(DefaultDomain(limit=limit),
                                           e.domain.__data__())
                    return object.__new__(DefaultDecoder)
                elif col.partitions == None:
                    e.domain = set_default(DefaultDomain(limit=limit),
                                           e.domain.__data__())
                    return object.__new__(DefaultDecoder)
                else:
                    DEBUG and Log.note("id={{id}} has parts!!!", id=id(col))
                    if col.multi > 1:
                        return object.__new__(MultivalueDecoder)

                    partitions = col.partitions[:limit:]
                    if e.domain.sort == -1:
                        partitions = list(reversed(sorted(partitions)))
                    else:
                        partitions = sorted(partitions)
                    e.domain = SimpleSetDomain(partitions=partitions,
                                               limit=limit)

            else:
                return object.__new__(DefaultDecoder)

        if e.value and e.domain.type in PARTITION:
            return object.__new__(SetDecoder)
        if isinstance(e.domain.dimension, Dimension):
            e.domain = e.domain.dimension.getDomain()
            return object.__new__(SetDecoder)
        if e.value and e.domain.type == "time":
            return object.__new__(TimeDecoder)
        if e.range:
            return object.__new__(GeneralRangeDecoder)
        if e.value and e.domain.type == "duration":
            return object.__new__(DurationDecoder)
        elif e.value and e.domain.type == "range":
            return object.__new__(RangeDecoder)
        elif not e.value and e.domain.dimension.fields:
            # THIS domain IS FROM A dimension THAT IS A SIMPLE LIST OF fields
            # JUST PULL THE FIELDS
            fields = e.domain.dimension.fields
            if is_data(fields):
                Log.error("No longer allowed: All objects are expressions")
            else:
                return object.__new__(DimFieldListDecoder)
        elif not e.value and all(e.domain.partitions.where):
            return object.__new__(GeneralSetDecoder)
        else:
            Log.error("domain type of {{type}} is not supported yet",
                      type=e.domain.type)