Exemplo n.º 1
0
def filter(data, where):
    """
    where  - a function that accepts (record, rownum, rows) and returns boolean
    """
    if len(data) == 0 or where == None or where == TRUE:
        return data

    if isinstance(data, Container):
        return data.filter(where)

    if is_container(data):
        temp = get(where)
        dd = wrap(data)
        return wrap(
            [unwrap(d) for i, d in enumerate(data) if temp(wrap(d), i, dd)])
    else:
        Log.error("Do not know how to handle type {{type}}",
                  type=data.__class__.__name__)

    try:
        return drill_filter(where, data)
    except Exception as _:
        # WOW!  THIS IS INEFFICIENT!
        return wrap([
            unwrap(d)
            for d in drill_filter(where, [DataObject(d) for d in data])
        ])
Exemplo n.º 2
0
def sort(data, fieldnames=None, already_normalized=False):
    """
    PASS A FIELD NAME, OR LIST OF FIELD NAMES, OR LIST OF STRUCTS WITH {"field":field_name, "sort":direction}
    """
    try:
        if data == None:
            return Null

        if isinstance(fieldnames, int):
            funcs = [(lambda t: t[fieldnames], 1)]
        else:
            if not fieldnames:
                return wrap(sort_using_cmp(data, value_compare))

            if already_normalized:
                formal = fieldnames
            else:
                formal = query._normalize_sort(fieldnames)

            funcs = [(get(f.value), f.sort) for f in formal]

        def comparer(left, right):
            for func, sort_ in funcs:
                try:
                    result = value_compare(func(left), func(right), sort_)
                    if result != 0:
                        return result
                except Exception as e:
                    Log.error("problem with compare", e)
            return 0

        if is_list(data):
            output = FlatList(
                [unwrap(d) for d in sort_using_cmp(data, cmp=comparer)])
        elif is_text(data):
            Log.error("Do not know how to handle")
        elif hasattr(data, "__iter__"):
            output = FlatList(
                [unwrap(d) for d in sort_using_cmp(list(data), cmp=comparer)])
        else:
            Log.error("Do not know how to handle")
            output = None

        return output
    except Exception as e:
        Log.error("Problem sorting\n{{data}}", data=data, cause=e)
Exemplo n.º 3
0
def window(data, param):
    """
    MAYBE WE CAN DO THIS WITH NUMPY (no, the edges of windows are not graceful with numpy)
    data - list of records
    """
    name = param.name  # column to assign window function result
    edges = param.edges  # columns to gourp by
    where = param.where  # DO NOT CONSIDER THESE VALUES
    sortColumns = param.sort  # columns to sort by
    calc_value = get(
        param.value
    )  # function that takes a record and returns a value (for aggregation)
    aggregate = param.aggregate  # WindowFunction to apply
    _range = (
        param.range
    )  # of form {"min":-10, "max":0} to specify the size and relative position of window

    data = filter(data, where)

    if not aggregate and not edges:
        if sortColumns:
            data = sort(data, sortColumns, already_normalized=True)
        # SIMPLE CALCULATED VALUE
        for rownum, r in enumerate(data):
            try:
                r[name] = calc_value(r, rownum, data)
            except Exception as e:
                raise e
        return

    try:
        edge_values = [e.value.var for e in edges]
    except Exception as e:
        raise Log.error("can only support simple variable edges", cause=e)

    if not aggregate or aggregate == "none":
        for _, values in groupby(data, edge_values):
            if not values:
                continue  # CAN DO NOTHING WITH THIS ZERO-SAMPLE

            if sortColumns:
                sequence = sort(values, sortColumns, already_normalized=True)
            else:
                sequence = values

            for rownum, r in enumerate(sequence):
                r[name] = calc_value(r, rownum, sequence)
        return

    for keys, values in groupby(data, edge_values):
        if not values:
            continue  # CAN DO NOTHING WITH THIS ZERO-SAMPLE

        sequence = sort(values, sortColumns)

        for rownum, r in enumerate(sequence):
            r["__temp__"] = calc_value(r, rownum, sequence)

        head = coalesce(_range.max, _range.stop)
        tail = coalesce(_range.min, _range.start)

        # PRELOAD total
        total = aggregate()
        for i in range(tail, head):
            total.add(sequence[i].__temp__)

        # WINDOW FUNCTION APPLICATION
        for i, r in enumerate(sequence):
            r[name] = total.end()
            total.add(sequence[i + head].__temp__)
            total.sub(sequence[i + tail].__temp__)

    for r in data:
        r["__temp__"] = None  # CLEANUP