示例#1
0
文件: frame.py 项目: abhiwand/atk
    def filter_vertices(self, frame, predicate, keep_matching_vertices=True):
        from trustedanalytics.rest.spark import ifilter
        from trustedanalytics.rest.spark import ifilterfalse

        if keep_matching_vertices:
            arguments = {"frame": frame.uri, "udf": get_udf_arg(frame, predicate, ifilter)}
        else:
            arguments = {"frame": frame.uri, "udf": get_udf_arg(frame, predicate, ifilterfalse)}
        execute_update_frame_command("frame:vertex/filter", arguments, frame)
示例#2
0
    def filter_vertices(self, frame, predicate, keep_matching_vertices = True):
        from trustedanalytics.rest.spark import ifilter
        from trustedanalytics.rest.spark import ifilterfalse

        if keep_matching_vertices:
            arguments = {'frame': frame.uri,
                         'udf': get_udf_arg(frame, predicate, ifilter)
                        }
        else:
            arguments = {'frame': frame.uri,
                         'udf': get_udf_arg(frame, predicate, ifilterfalse)
                        }
        execute_update_frame_command("frame:vertex/filter", arguments, frame)
示例#3
0
文件: frame.py 项目: abhiwand/atk
 def filter(self, frame, predicate):
     from trustedanalytics.rest.spark import ifilter
     arguments = {
         'frame': frame.uri,
         'udf': get_udf_arg(frame, predicate, ifilter)
     }
     execute_update_frame_command("frame:/filter", arguments, frame)
示例#4
0
文件: frame.py 项目: abhiwand/atk
 def drop(self, frame, predicate):
     from trustedanalytics.rest.spark import ifilterfalse  # use the REST API filter, with a ifilterfalse iterator
     arguments = {
         'frame': frame.uri,
         'udf': get_udf_arg(frame, predicate, ifilterfalse)
     }
     execute_update_frame_command("frame:/filter", arguments, frame)
示例#5
0
文件: frame.py 项目: acx2015/atk
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)
示例#6
0
    def add_columns(self, frame, expression, schema, columns_accessed=None):
        if not schema or not hasattr(schema, "__iter__"):
            raise ValueError("add_columns requires a non-empty schema of (name, type)")

        only_one_column = False
        if isinstance(schema[0], basestring):
            only_one_column = True
            schema = [schema]

        schema = self._format_schema(schema)
        names, data_types = zip(*schema)

        optimized_frame_schema = []
        if columns_accessed:
            if isinstance(columns_accessed, basestring):
                columns_accessed = [columns_accessed]
            frame_schema = frame.schema
            for i in columns_accessed:
                for j in frame_schema:
                    if i == j[0]:
                        optimized_frame_schema.append(j)

        # By default columns_accessed is an empty list and optimized frame schema is empty which implies frame.schema is considered to evaluate
        columns_accessed, optimized_frame_schema = ([], None) if columns_accessed is None else (columns_accessed, optimized_frame_schema)

        add_columns_function = get_add_one_column_function(expression, data_types[0]) if only_one_column \
            else get_add_many_columns_function(expression, data_types)
        from itertools import imap
        arguments = {'frame': frame.uri,
                     'column_names': names,
                     'column_types': [get_rest_str_from_data_type(t) for t in data_types],
                     'udf': get_udf_arg(frame, add_columns_function, imap, optimized_frame_schema),
                     'columns_accessed': columns_accessed}

        execute_update_frame_command('add_columns', arguments, frame)
示例#7
0
文件: frame.py 项目: acx2015/atk
 def get_row_count(self, frame, where):
     if not where:
         return self._get_frame_info(frame).row_count
     # slightly faster generator to only return a list of one item, since we're just counting rows
     # TODO - there's got to be a better way to do this with the RDDs, trick is with Python.
     def icountwhere(predicate, iterable):
        return ("[1]" for item in iterable if predicate(item))
     arguments = {'frame': frame.uri,
                  'udf': get_udf_arg(frame, where, icountwhere)}
     return executor.execute("frame/count_where", self, arguments)
示例#8
0
 def get_row_count(self, frame, where):
     if not where:
         return self._get_frame_info(frame).row_count
     # slightly faster generator to only return a list of one item, since we're just counting rows
     # TODO - there's got to be a better way to do this with the RDDs, trick is with Python.
     def icountwhere(predicate, iterable):
        return ("[1]" for item in iterable if predicate(item))
     arguments = {'frame': frame.uri,
                  'udf': get_udf_arg(frame, where, icountwhere)}
     return executor.execute("frame/count_where", self, arguments)['value']
示例#9
0
文件: frame.py 项目: acx2015/atk
 def filter(self, frame, predicate):
     from trustedanalytics.rest.spark import ifilter
     arguments = {'frame': frame.uri,
                  'udf': get_udf_arg(frame, predicate, ifilter)}
     execute_update_frame_command("frame:/filter", arguments, frame)
示例#10
0
文件: frame.py 项目: acx2015/atk
 def drop(self, frame, predicate):
     from trustedanalytics.rest.spark import ifilterfalse  # use the REST API filter, with a ifilterfalse iterator
     arguments = {'frame': frame.uri,
                  'udf': get_udf_arg(frame, predicate, ifilterfalse)}
     execute_update_frame_command("frame:/filter", arguments, frame)