Exemplo n.º 1
0
    def __add_select_and_aggregate(self, select, groupby, where, window, tree):
        """
            select, groupby, and where are a list of unparsed fields
            in those respective clauses
        """
        tuple_descriptor = TupleDescriptor()
        fields_to_verify = []
        all_fields = chain(select, where)
        if groupby != ['']:
            groupby = groupby[1:][0]
            all_fields = chain(all_fields, groupby)
        self.__remove_all(groupby, QueryTokens.EMPTY_STRING)
        for field in all_fields:
            (field_descriptors,
             verify) = self.__parse_field(field, self.twitter_td, True, False)
            fields_to_verify.extend(verify)
            tuple_descriptor.add_descriptor_list(field_descriptors)
        for field in fields_to_verify:
            self.__verify_and_fix_field(field, tuple_descriptor)

        # at this point, tuple_descriptor should contain a tuple descriptor
        # with fields/aliases that are correct (we would have gotten an
        # exception otherwise.  built select_descriptor/group_descriptor
        # from it
        select_descriptor = TupleDescriptor()
        group_descriptor = TupleDescriptor()
        aggregates = []
        for field in select:
            (field_descriptors,
             verify) = self.__parse_field(field, tuple_descriptor, True, True)
            select_descriptor.add_descriptor_list(field_descriptors)
            if field_descriptors[0].field_type == FieldType.AGGREGATE:
                aggregates.append(field_descriptors[0])
        # add WHERE clause fields as invisible attributes
        for field in where:
            (field_descriptors,
             verify) = self.__parse_field(field, tuple_descriptor, True, False)
            select_descriptor.add_descriptor_list(field_descriptors)
        if len(aggregates) > 0:
            if window == None:
                raise QueryException(
                    "Aggregate expression provided with no WINDOW parameter")
            for field in groupby:
                (field_descriptors,
                 verify) = self.__parse_field(field, tuple_descriptor, True,
                                              True)
                group_descriptor.add_descriptor_list(field_descriptors)
            for alias in select_descriptor.aliases:
                select_field = select_descriptor.get_descriptor(alias)
                group_field = group_descriptor.get_descriptor(alias)
                if group_field == None and \
                   select_field.field_type != FieldType.AGGREGATE and \
                   select_field.visible:
                    raise QueryException(
                        "'%s' appears in the SELECT but is is neither an aggregate nor a GROUP BY field"
                        % (alias))
            tree = operators.GroupBy(tree, group_descriptor, aggregates,
                                     window)
        tree.assign_descriptor(select_descriptor)
        return tree
Exemplo n.º 2
0
 def __get_source(self, parsed):
     source = parsed.sources[0]
     if source == QueryTokens.TWITTER:
         return StatusSource.TWITTER_FILTER
     elif source.startswith(QueryTokens.TWITTER_SAMPLE):
         return StatusSource.TWITTER_SAMPLE
     else:
         raise QueryException('Unknown query source: %s' % (source))
Exemplo n.º 3
0
 def build(self, query_str):
     """
         Takes a Unicode string query_str, and outputs a query tree
     """
     try:
         parsed = self.parser.parseString(query_str)
     except ParseException, e:
         raise QueryException(e)
Exemplo n.º 4
0
 def __parse_rval(self, val, allow_null):
     if val == QueryTokens.NULL_TOKEN:
         if allow_null:
             return None
         else:
             raise QueryException(
                 "NULL appears in clause where it should not.")
     else:
         return val
Exemplo n.º 5
0
 def __get_handler(self, parsed):
     into = parsed.into.asList()
     handler = None
     if (into == ['']) or (into[1] == QueryTokens.STDOUT):
         handler = PrintStatusHandler(1)
     elif (len(into) == 3) and (into[1] == QueryTokens.TABLE):
         handler = DbInsertStatusHandler(1000, into[2])
     elif (len(into) == 3) and (into[1] == QueryTokens.STREAM):
         raise DbException(
             "Putting results into a STREAM is not yet supported")
     else:
         raise QueryException("Invalid INTO clause")
     return handler
Exemplo n.º 6
0
 def run_built_query(self, query_built, async):
     self.build_stream()
     self.query = query_built
     self.query.handler.set_tuple_descriptor(self.query.get_tuple_descriptor())
     if self.query.source == StatusSource.TWITTER_FILTER:
         no_filter_exception = QueryException("You haven't specified any filters that can query Twitter.  Perhaps you want to query TWITTER_SAMPLE?")
         try:
             (follow_ids, track_words) = self.query.query_tree.filter_params()
             if (follow_ids == None) and (track_words == [None]):
                 raise no_filter_exception
             self.stream.filter(follow_ids, track_words, async)
         except NotImplementedError:
             raise no_filter_exception
     elif self.query.source == StatusSource.TWITTER_SAMPLE:
         self.stream.sample(None, async)
Exemplo n.º 7
0
 def __getattr__(self, attr):
     field_descriptor = self.__tuple_descriptor.get_descriptor(attr)
     result = None
     if field_descriptor.field_type == FieldType.FUNCTION:
         uf = field_descriptor.underlying_fields
         func = field_descriptor.function
         args = [getattr(self, field) for field in uf]
         args.insert(0, self.__data)
         result = func(*args)
     elif field_descriptor.field_type == FieldType.LITERAL:
         result = field_descriptor.literal_value
     elif field_descriptor.underlying_fields[0] in self.__data:
         result = self.__data[field_descriptor.underlying_fields[0]]
     else:
         raise QueryException("Attribute not defined: %s" % (attr))
     if (field_descriptor.return_type == ReturnType.STRING) and isinstance(
             result, str):
         result = unicode(result)
     setattr(self, attr, result)
     return result
Exemplo n.º 8
0
 def add_descriptor(self, descriptor):
     visible = descriptor.visible
     copy_descriptor = True
     if descriptor.alias in self.descriptors:
         if (self.descriptors[descriptor.alias].field_type != FieldType.UNDEFINED) and \
            (descriptor.field_type != FieldType.UNDEFINED) and \
            (self.descriptors[descriptor.alias] != descriptor):
             raise QueryException(
                 "The alias '%s' appears more than once in your query" %
                 (descriptor.alias))
         # if one of the descriptors is visible, mark the stored one as
         # visible.
         visible = self.descriptors[
             descriptor.alias].visible or descriptor.visible
         if descriptor.field_type == FieldType.UNDEFINED:
             copy_descriptor = False
     else:
         self.aliases.append(descriptor.alias)
     if copy_descriptor:
         self.descriptors[
             descriptor.alias] = descriptor  #copy.deepcopy(descriptor)
     self.descriptors[descriptor.alias].visible = visible
Exemplo n.º 9
0
 def __verify_and_fix_field(self, field, tuple_descriptor):
     field_descriptor = tuple_descriptor.get_descriptor(field)
     error = False
     if field_descriptor == None:
         error = True
     elif field_descriptor.field_type == FieldType.UNDEFINED:
         if field == field_descriptor.underlying_fields[0]:
             error = True
         else:
             referenced_field_descriptor = \
                 self.__verify_and_fix_field(field_descriptor.underlying_fields[0],
                                             tuple_descriptor)
             field_descriptor.underlying_fields = referenced_field_descriptor.underlying_fields
             field_descriptor.field_type = referenced_field_descriptor.field_type
             field_descriptor.return_type = referenced_field_descriptor.return_type
             field_descriptor.aggregate_factory = referenced_field_descriptor.aggregate_factory
             field_descriptor.func_factory = referenced_field_descriptor.func_factory
             field_descriptor.function = referenced_field_descriptor.function
     if error:
         raise QueryException(
             "Field '%s' is neither a builtin field nor an alias" % (field))
     else:
         return field_descriptor
Exemplo n.º 10
0
 def get_function(self, alias):
     if alias not in self.__functions:
         raise QueryException("'%s' is not a registered function" % (alias))
     return self.__functions[alias]
Exemplo n.º 11
0
 def register(self, alias, function_information):
     if alias in self.__functions:
         raise QueryException("'%s' has already been registered" % (alias))
     self.__functions[alias] = function_information
Exemplo n.º 12
0
    def __parse_field(self, field, tuple_descriptor, alias_on_complex_types,
                      make_visible):
        """
            Returns a tuple containing (field_descriptors, fieldnames_to_verify)

            The first field in field_descriptors is the one requested to be parsed by this
            function call.  If the field turns out to be an aggregate or a user-defined
            function call, then field_descriptors will contain those parsed field descriptors
            as well, with their visible flag set to False.  

            fieldnames_to_verify is a list of field names that should be verified in order
            to ensure that at some point their alias is defined in an AS clause.
        """
        alias = None
        field_type = None
        return_type = None
        underlying_fields = None
        aggregate_factory = None
        literal_value = None
        func_factory = None
        fields_to_verify = []
        parsed_fds = []
        field_backup = list(field)
        self.__clean_list(field)

        # parse aliases if they exist
        if (len(field) >= 4) and (field[-2] == QueryTokens.AS):
            alias = field[-1]
            field = field[:-2]
        if (field[0] == QueryTokens.STRING_LITERAL) or \
           (field[0] == QueryTokens.INTEGER_LITERAL) or \
           (field[0] == QueryTokens.FLOAT_LITERAL):
            alias = self.unnamed_operator_name()
            underlying_fields = []
            field_type = FieldType.LITERAL
            literal_value = field[1]
            if field[0] == QueryTokens.STRING_LITERAL:
                return_type = ReturnType.STRING
            elif field[0] == QueryTokens.INTEGER_LITERAL:
                return_type = ReturnType.INTEGER
                literal_value = int(literal_value)
            elif field[0] == QueryTokens.FLOAT_LITERAL:
                return_type = ReturnType.FLOAT
                literal_value = float(literal_value)
        elif field[0] == QueryTokens.COLUMN_NAME:  # field or alias
            if alias == None:
                alias = field[1]
            field_descriptor = tuple_descriptor.get_descriptor(field[1])
            if field_descriptor == None:  # underlying field not yet defined.  mark to check later
                field_type = FieldType.UNDEFINED
                underlying_fields = [field[1]]
                # check alias and underlying once this process is done to
                # find yet-undefined fields
                fields_to_verify.append(field[1])
                fields_to_verify.append(alias)
            else:  # field found, copy information
                field_type = field_descriptor.field_type
                return_type = field_descriptor.return_type
                underlying_fields = field_descriptor.underlying_fields
                aggregate_factory = field_descriptor.aggregate_factory
                func_factory = field_descriptor.func_factory
        elif field[
                0] == QueryTokens.FUNCTION_OR_AGGREGATE:  # function or aggregate
            if alias == None:
                if alias_on_complex_types:
                    raise QueryException(
                        "Must specify alias (AS clause) for '%s'" % (field[1]))
                else:
                    alias = self.unnamed_operator_name()
            underlying_field_list = field[2:]
            underlying_fields = []
            for underlying in underlying_field_list:
                (parsed_fd_list,
                 parsed_verify) = self.__parse_field(underlying,
                                                     tuple_descriptor, False,
                                                     False)
                for parsed_fd in parsed_fd_list:
                    parsed_fd.visible = False
                fields_to_verify.extend(parsed_verify)
                parsed_fds.extend(parsed_fd_list)
                underlying_fields.append(parsed_fd_list[0].alias)
            aggregate_factory = get_aggregate_factory(field[1])
            if aggregate_factory != None:  # found an aggregate function
                field_type = FieldType.AGGREGATE
                return_type = ReturnType.FLOAT
            else:
                function_information = self.function_registry.get_function(
                    field[1])
                if function_information != None:
                    field_type = FieldType.FUNCTION
                    func_factory = function_information.func_factory
                    return_type = function_information.return_type
                else:
                    raise QueryException(
                        "'%s' is neither an aggregate or a registered function"
                        % (field[1]))
        else:
            raise QueryException("Empty field clause found: %s" %
                                 ("".join(field_backup)))
        fd = FieldDescriptor(alias, underlying_fields, field_type, return_type,
                             aggregate_factory, func_factory, literal_value)
        fd.visible = make_visible
        parsed_fds.insert(0, fd)
        return (parsed_fds, fields_to_verify)