Пример #1
0
def condition(function=None,
              on=ConditionOperator.START,
              cache=False,
              vectorized=False):
    from remake.schematic.cache import SchematicCache

    if function is None:
        return partial(condition, on=on, cache=cache, vectorized=vectorized)

    transformer, name, sources = get_info(function)
    function = function if not cache else lru_cache(function)

    valid_locations = [
        ConditionOperator.START, ConditionOperator.END,
        ConditionOperator.GROUPS
    ]
    if on not in valid_locations:
        msg = f"{name}(..., at={on}) is not a valid condition error. Please use one of {valid_locations}"
        raise InvalidConditionLocationError(msg)

    spec = ConditionOperator(name=name,
                             on=on,
                             sources=sources,
                             transform=function,
                             vectorized=vectorized)
    SchematicCache.add_condition(transformer, spec)

    return function
Пример #2
0
def named_split(function):
    """
    A split takes a dataframe and converts it into a dictionary with dataframes as values.
    """

    from remake.schematic.cache import SchematicCache

    schematic, name, _ = get_info(function)

    SchematicCache.add_split(schematic, SplitOperator(name, function, named=True))
Пример #3
0
def split(function):
    """
    A split takes a dataframe and converts it into either a list of dataframes.
    """

    from remake.schematic.cache import SchematicCache

    schematic, name, _ = get_info(function)

    SchematicCache.add_split(schematic, SplitOperator(name, function, named=False))
Пример #4
0
def schematic(cls):
    if not inspect.isclass(cls):
        raise ValueError(
            f"@schematic can only be used as a class decorator, it cannot decorate {cls}"
        )

    cls_name = cls.__name__

    inherited_core = SchematicCore()
    for parent in cls.__bases__:
        inherited_core = merge_schematics(inherited_core,
                                          recursive_merge(parent))

    schematic = SchematicCache.get(cls_name)

    final_schematic = merge_schematics(inherited_core, schematic)

    # Copy all functionality of the original class
    cls_attributes = {}
    for attr_name, attr in vars(cls).items():
        if not attr_name.startswith("_") and not attr_name.endswith("_"):
            cls_attributes[attr_name] = attr

    Transform = type(f"{cls_name}Schematic", (SchematicTransformation, ), {})

    transformer_cls = assemble_schematic(Transform, final_schematic,
                                         cls_attributes)

    return transformer_cls
Пример #5
0
def column(function=None, temporary=False, cache=False):
    from remake.schematic.cache import SchematicCache

    if function is None:
        return partial(column, temporary=temporary, cache=cache)

    transformer, column_name, input_columns = get_info(function)
    function = function if not cache else lru_cache(function)

    spec = ColumnOperator(name=column_name,
                          input_columns=input_columns,
                          is_view=temporary,
                          transform=function)

    SchematicCache.add_column(transformer, spec)

    return function
Пример #6
0
def transformation(function=None, on=TransformationOperator.START, after=None):
    from remake.schematic.cache import SchematicCache

    if function is None:
        return partial(transformation, on=on, after=after)

    transformer, name, input_columns = get_info(function)

    valid_locations = [
        TransformationOperator.START, TransformationOperator.END,
        TransformationOperator.GROUPS
    ]
    if on not in valid_locations:
        msg = f"{name}(..., at={on}) is not a valid condition error. Please use one of {valid_locations}"
        raise SchematicError(msg)

    spec = TransformationOperator(name=name,
                                  on=on,
                                  after=after,
                                  transform=function)
    SchematicCache.add_transformation(transformer, spec)

    return function
Пример #7
0
def index(*columns):
    from remake.schematic.cache import SchematicCache

    class_name = inspect.stack()[1][0].f_locals["__qualname__"]

    SchematicCache.add_index(class_name, IndexOperator(list(columns)))
Пример #8
0
def test_core_generation():
    name = "TEST"
    SchematicCache.add_column(name, ColumnSpec("test", [], False, lambda: True, False))
    SchematicCache.add_condition(name, ConditionSpec("test", [], lambda: True, ""))
    SchematicCache.add_copy(name, CopySpec("test", "test"))
    SchematicCache.add_group(name, GroupSpec(name="test", sources=[], transform=None, sort_by=None))
    SchematicCache.add_index(name, IndexSpec("test", []))
    SchematicCache.add_parameter(name, ParameterSpec("Test", "Test", None, None, None, None, None))
    SchematicCache.add_sorts(name, SortBySpec([]))
    SchematicCache.add_explode(name, ExplodeSpec([], None))
    SchematicCache.add_expand(name, ExpandSpec([], None))
    SchematicCache.add_join(name, JoinSpec([], None))

    core = SchematicCache.get(name)
    assert len(core.columns) == 1
    assert len(core.copies) == 1
    assert len(core.groups) == 1
    assert len(core.indexes) == 1
    assert len(core.parameters) == 1
    assert len(core.sorts) == 1
    assert len(core.explodes) == 1
    assert len(core.expands) == 1
    assert len(core.joins) == 1