Exemplo n.º 1
0
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)
Exemplo n.º 2
0
Arquivo: frame.py Projeto: acx2015/atk
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)
Exemplo n.º 3
0
Arquivo: frame.py Projeto: AllanY/atk
    def aggregate_with_udf(self, frame, group_by_column_keys, aggregator_expression, output_schema, init_acc_values=None):
        if not output_schema or not hasattr(output_schema, "__iter__"):
            raise ValueError("aggregate_with_udf requires a non-empty schema of (name, type)")

        if isinstance(output_schema[0], basestring):
            output_schema = [output_schema]

        output_schema = self._format_schema(output_schema)
        names, data_types = zip(*output_schema)

        aggregate_with_udf_function = get_group_by_aggregator_function(aggregator_expression, data_types)

        from itertools import imap
        arguments = { "frame": frame.uri,
                      "aggregate_by_column_keys": group_by_column_keys,
                      "column_names": names,
                      "column_types": [get_rest_str_from_data_type(t) for t in data_types],
                      "udf": get_aggregator_udf_arg(frame, aggregate_with_udf_function, imap, output_schema, init_acc_values)
                    }
        return execute_new_frame_command('frame/aggregate_with_udf', arguments)
Exemplo n.º 4
0
 def from_types_to_strings(s):
     return [(name, get_rest_str_from_data_type(data_type))
             for name, data_type in s]
Exemplo n.º 5
0
Arquivo: frame.py Projeto: acx2015/atk
 def from_types_to_strings(s):
     return [(name, get_rest_str_from_data_type(data_type)) for name, data_type in s]