Exemplo n.º 1
0
def h_min(col):
    """
    Return an aggregation for the minimum of the given column.  Like the SQL min() function::

        select(h_min(employee.salary), employee.department, where=employee)

    returns the lowest salary in each department.

    :type col: :class:`hustle.core.marble.Column`
    :param col: the column to aggregate
    """
    import mdb

    if col.is_numeric:
        return Aggregation("min",
                           col,
                           f=lambda a, v: a if a < v else v,
                           default=lambda: 9223372036854775807,
                           result_spec=Column('_min_type', type_indicator=mdb.MDB_INT_32))
    else:
        return Aggregation("min",
                           col,
                           f=lambda a, v: a if a < v else v,
                           default=lambda: unichr(0xFFFF),
                           result_spec=Column('_min_type', type_indicator=mdb.MDB_STR))
Exemplo n.º 2
0
    def test_is_trie(self):
        str_column = Column(_NAME,
                            None,
                            index_indicator=False,
                            partition=False,
                            type_indicator=mdb.MDB_STR,
                            compression_indicator=0,
                            rtrie_indicator=mdb.MDB_UINT_16)
        self.assertTrue(str_column.is_trie)

        str_column = Column(_NAME,
                            None,
                            index_indicator=False,
                            partition=False,
                            type_indicator=mdb.MDB_INT_16,
                            compression_indicator=0,
                            rtrie_indicator=mdb.MDB_UINT_16)
        self.assertFalse(str_column.is_trie)

        str_column = Column(_NAME,
                            None,
                            index_indicator=False,
                            partition=False,
                            type_indicator=mdb.MDB_STR,
                            compression_indicator=1,
                            rtrie_indicator=mdb.MDB_UINT_16)
        self.assertFalse(str_column.is_trie)
Exemplo n.º 3
0
    def test_get_effective_inttype(self):
        c = Column(_NAME, None, index_indicator=False, partition=False,
                   type_indicator=mdb.MDB_UINT_16, compression_indicator=0,
                   rtrie_indicator=mdb.MDB_INT_16)
        self.assertEqual(c.get_effective_inttype(), mdb.MDB_UINT_16)

        c.type_indicator = mdb.MDB_STR
        self.assertEqual(c.get_effective_inttype(), mdb.MDB_INT_16)
Exemplo n.º 4
0
    def test_get_effective_inttype(self):
        c = Column(_NAME, None, index_indicator=False, partition=False,
                   type_indicator=mdb.MDB_UINT_16, compression_indicator=0,
                   rtrie_indicator=mdb.MDB_INT_16)
        self.assertEqual(c.get_effective_inttype(), mdb.MDB_UINT_16)

        c.type_indicator = mdb.MDB_STR
        self.assertEqual(c.get_effective_inttype(), mdb.MDB_INT_16)
Exemplo n.º 5
0
def h_count():
    """
    Return an aggregation for the count of each grouped key in a query.  Like SQL count() function::

        select(h_count(), employee.department, where=employee)

    returns a count of the number of employees in each department.
    """
    import mdb
    return Aggregation("count",
                       Column(name='_count', type_indicator=1),
                       f=lambda a, v: a + v,
                       default=lambda: 0,
                       result_spec=Column('_count_type', type_indicator=mdb.MDB_UINT_32))
Exemplo n.º 6
0
 def test_is_boolean(self):
     b_column = Column(_NAME, None, boolean=True)
     self.assertTrue(b_column.is_int)
     self.assertTrue(b_column.is_index)
     self.assertTrue(b_column.is_boolean)
     self.assertFalse(b_column.is_wide)
     self.assertFalse(b_column.is_trie)
Exemplo n.º 7
0
 def test_check_range_query(self):
     c = Column(_NAME, None, index_indicator=True, partition=False,
                type_indicator=mdb.MDB_INT_16, compression_indicator=1,
                rtrie_indicator=None)
     c < 1
     c <= 1
     c > 1
     c >= 1
     c == 1
     c != 1
Exemplo n.º 8
0
 def test_check_range_query_for_partition(self):
     c = Column(_NAME, None, index_indicator=1, partition=True,
                type_indicator=mdb.MDB_STR, compression_indicator=1,
                rtrie_indicator=None)
     c < "foo"
     c <= "foo"
     c > "foo"
     c >= "foo"
     c == "foo"
     c != "foo"
Exemplo n.º 9
0
    def test_column_errors(self):
        str_column = Column(_NAME, None, index_indicator=0, partition=False,
                            type_indicator=mdb.MDB_STR,
                            compression_indicator=0,
                            rtrie_indicator=mdb.MDB_UINT_16)
        with self.assertRaises(TypeError):
            str_column > 'hello'

        with self.assertRaises(TypeError):
            str_column == 'hello'
Exemplo n.º 10
0
def h_count():
    """
    Return an aggregation for the count of each grouped key in a query.  Like SQL count() function::

        select(h_count(), employee.department, where=employee)

    returns a count of the number of employees in each department.
    """
    return Aggregation("count",
                       Column('all', None, type_indicator=1),
                       f=lambda a, v: a + (v or 1),
                       default=lambda: 0)
Exemplo n.º 11
0
    def test_expr_without_partitions(self):
        cee_vals = Tablet([1, 5, 7, 9, 12, 13, 14, 19, 27, 38])
        cee = Column('cee',
                     None,
                     type_indicator=1,
                     index_indicator=1,
                     partition=False)

        ex = (cee < 8)
        self.assertEqual(list(ex(cee_vals)), [1, 5, 7])

        ex = (cee > 7)
        self.assertEqual(list(ex(cee_vals)), [9, 12, 13, 14, 19, 27, 38])

        ex = (cee <= 7)
        self.assertEqual(list(ex(cee_vals)), [1, 5, 7])

        ex = (cee >= 7)
        self.assertEqual(list(ex(cee_vals)), [7, 9, 12, 13, 14, 19, 27, 38])

        ex = (cee == 7)
        self.assertEqual(list(ex(cee_vals)), [7])

        ex = (cee != 7)
        self.assertEqual(list(ex(cee_vals)), [1, 5, 9, 12, 13, 14, 19, 27, 38])

        # test AND
        ex = (cee > 7) & (cee < 20)
        self.assertEqual(list(ex(cee_vals)), [9, 12, 13, 14, 19])

        ex = (cee > 7) & (cee < 20) & (cee > 13)
        self.assertEqual(list(ex(cee_vals)), [14, 19])

        # test OR
        ex = (cee < 7) | (cee > 20)
        x = sorted(ex(cee_vals))
        self.assertEqual(x, [1, 5, 27, 38])

        ex = (cee == 7) | (cee == 20) | (cee == 13)
        self.assertEqual(list(ex(cee_vals)), [7, 13])

        # test NOT
        ex = ~((cee >= 7) & (cee <= 20))
        x = sorted(ex(cee_vals))
        self.assertEqual(x, [1, 5, 27, 38])

        # test NOT
        ex = ~((cee < 7) | (cee == 19))
        x = sorted(ex(cee_vals))
        self.assertEqual(x, [7, 9, 12, 13, 14, 27, 38])
Exemplo n.º 12
0
 def test_check_range_query_for_lz4(self):
     c = Column(_NAME, None, index_indicator=1, partition=False,
                type_indicator=mdb.MDB_STR, compression_indicator=2,
                rtrie_indicator=None)
     with self.assertRaises(TypeError):
         c < "foo"
     with self.assertRaises(TypeError):
         c <= "foo"
     with self.assertRaises(TypeError):
         c > "foo"
     with self.assertRaises(TypeError):
         c >= "foo"
     c == "foo"
     c != "foo"
Exemplo n.º 13
0
        def wrap(column):
            index_indicator = self.index_indicator if self.index_indicator is \
                not None else column.index_indicator
            type_indicator = self.type_indicator if self.type_indicator is \
                not None else column.type_indicator
            rtrie_indicator = self.rtrie_indicator if self.rtrie_indicator is \
                not None else column.rtrie_indicator
            compression_indicator = self.compression_indicator if \
                self.compression_indicator is not None else column.compression_indicator
            is_boolean = self.boolean if self.boolean is not None else column.is_boolean

            new_column = Column(column.name, column.table, index_indicator,
                                column.partition, type_indicator, compression_indicator,
                                rtrie_indicator, alias=column.alias, boolean=is_boolean,
                                column_fn=fn)
            return new_column
Exemplo n.º 14
0
def h_sum(col):
    """
    Return an aggregation for the sum of the given column.  Like SQL sum() function.
    This is used in :func:`hustle.select` calls to specify the sum aggregation over a column in a query::

        select(h_sum(employee.salary), employee.department, where=employee.age > 25)

    returns the total salaries for each departments employees over 25 years old

    :type col: :class:`hustle.core.marble.Column`
    :param col: the column to aggregate
    """
    import mdb
    return Aggregation("sum",
                       col,
                       f=lambda a, v: a + v,
                       default=lambda: 0,
                       result_spec=Column('_sum_type', type_indicator=mdb.MDB_INT_32))
Exemplo n.º 15
0
def h_avg(col):
    """
    Return an aggregation for the average of the given column.  Like the SQL avg() function::

        select(h_avg(employee.salary), employee.department, where=employee)

    returns the average salary in each department

    :type col: :class:`hustle.core.marble.Column`
    :param col: the column to aggregate
   """
    import mdb

    return Aggregation("avg",
                       col,
                       f=lambda (a, c), v: (a + v, c + 1),
                       g=lambda (a, c): float(a) / c,
                       default=lambda: (0, 0),
                       result_spec=Column('_avg_type', type_indicator=mdb.MDB_INT_32))
Exemplo n.º 16
0
def h_combine(col, separator=','):
    """
    Return a combination of the given column.  Like the join function.

    :type col: :class:`hustle.core.marble.Column`
    :param col: the column to combine

    :type separator: strnig
    :param separator: the separator of the combination
    """
    import mdb
    import functools

    func = functools.partial(_h_combine, separator=separator)
    functools.update_wrapper(func, _h_combine)
    return Aggregation("combine",
                       col,
                       f=func,
                       default=lambda: None,
                       result_spec=Column('_combine_type', type_indicator=mdb.MDB_STR))
Exemplo n.º 17
0
    def test_schema_string(self):
        c = Column(_NAME, None, index_indicator=False, partition=False,
                   type_indicator=mdb.MDB_UINT_16, compression_indicator=0,
                   rtrie_indicator=mdb.MDB_UINT_16)
        self.assertEqual(c.schema_string(), "%s%s" % ('@2', _NAME))

        c.type_indicator = mdb.MDB_INT_16
        self.assertEqual(c.schema_string(), "%s%s" % ('#2', _NAME))
        c.type_indicator = mdb.MDB_INT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('#4', _NAME))
        c.type_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('@4', _NAME))
        c.type_indicator = mdb.MDB_INT_64
        self.assertEqual(c.schema_string(), "%s%s" % ('#8', _NAME))
        c.type_indicator = mdb.MDB_UINT_64
        self.assertEqual(c.schema_string(), "%s%s" % ('@8', _NAME))

        c.type_indicator = mdb.MDB_STR
        c.compression_indicator = 0
        self.assertEqual(c.schema_string(), "%s%s" % ('%2', _NAME))
        c.rtrie_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('%4', _NAME))
        c.compression_indicator = 1
        self.assertEqual(c.schema_string(), "%s%s" % ('$', _NAME))
        c.compression_indicator = 2
        self.assertEqual(c.schema_string(), "%s%s" % ('*', _NAME))
Exemplo n.º 18
0
    def test_expr_with_partitions(self):
        pee = Column('pee', None, type_indicator=1, index_indicator=1, partition=True)
        pee_tags = [1, 5, 7, 9, 12, 13, 14, 19, 27, 38]
        cee = Column('cee', None, type_indicator=1, index_indicator=1, partition=False)

        p_and_p = (pee < 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5])

        p_and_p = (pee > 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [9, 12, 13, 14, 19, 27, 38])

        p_and_p = (pee == 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7])

        p_and_p = (pee != 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 9, 12, 13, 14, 19, 27, 38])

        p_and_p = (pee >= 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])

        p_and_p = (pee <= 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 7])

        p_and_p = ~(pee > 7)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5, 7])

        # test pure partition combination
        p_and_p = (pee > 5) | (pee == 1)
        self.assertEqual(sorted(p_and_p.partition(pee_tags)), [1, 7, 9, 12, 13, 14, 19, 27, 38])

        p_and_p = ~((pee <= 5) | (pee > 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14])

        p_and_p = (pee == 5) | (pee == 99)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [5])

        p_and_p = (pee > 5) & (pee <= 14) & (pee > 12)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [13, 14])

        p_and_p = ((pee > 5) & (pee <= 14)) | (pee == 5)
        x = sorted(p_and_p.partition(pee_tags))
        self.assertEqual(x, [5, 7, 9, 12, 13, 14])

        p_and_p = ~(~(((pee > 5) & (pee <= 14))) & (pee != 5))
        x = sorted(p_and_p.partition(pee_tags))
        self.assertEqual(x, [5, 7, 9, 12, 13, 14])

        p_and_p = ~(((pee <= 5) | (pee > 14)) & (pee != 5))
        x = sorted(p_and_p.partition(pee_tags))
        self.assertEqual(x, [5, 7, 9, 12, 13, 14])

        # test combined partition/index combinations
        # p & c == p
        p_and_p = (pee > 5) & (cee <= 14)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])

        # test combined partition/index combinations
        # p & ~c == p
        p_and_p = (pee > 5) & ~(cee <= 14)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])
        p_and_p = (pee > 5) & ~~(cee <= 14)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])

        # p & ~c & ~c == p
        p_and_p = (pee > 5) & ~(cee <= 14) & ~(cee >= 5)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])
        p_and_p = (cee > 5) & ~((pee > 5) & ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5])
        p_and_p = (cee > 5) & (~(pee > 5) & ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), [1, 5])
        p_and_p = (cee > 5) & ~(~(pee > 5) & ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])
        p_and_p = (cee > 5) & ~~((pee > 5) & ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])
        p_and_p = (cee > 5) & ~((pee > 5) | ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags)
        p_and_p = (cee > 5) & ~~((pee > 5) | ~(cee <= 14))
        self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags)

        # p & ~c | ~c == all
        p_and_p = (pee > 5) & ~(cee <= 14) | ~(cee >= 5)
        self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags)

        # ~c & ~c & p == p
        p_and_p = ~(cee <= 14) & ~(cee >= 5) & (pee > 5)
        self.assertEqual(list(p_and_p.partition(pee_tags)), [7, 9, 12, 13, 14, 19, 27, 38])

        # p | c == universe
        p_and_p = (pee == 5) | (pee == 8) | (cee == 99)
        x = list(p_and_p.partition(pee_tags))
        self.assertEqual(x, pee_tags)

        p_and_p = (pee == 5) | (pee == 8) | (cee == 99)
        x = list(p_and_p.partition(pee_tags))
        self.assertEqual(x, pee_tags)

        # p | c == universe
        p_and_p = ((pee == 5) | (pee > 14)) | (cee > 12)
        self.assertEqual(list(p_and_p.partition(pee_tags)), pee_tags)

        # c & p == p ==> p | p
        p_and_p = ((pee == 5) | (pee > 14)) | ((cee > 12) & (pee == 1))
        self.assertEqual(sorted(p_and_p.partition(pee_tags)), [1, 5, 19, 27, 38])
Exemplo n.º 19
0
    def test_schema_string(self):
        c = Column(
            _NAME,
            None,
            index_indicator=False,
            partition=False,
            type_indicator=mdb.MDB_UINT_16,
            compression_indicator=0,
            rtrie_indicator=mdb.MDB_UINT_16,
        )
        self.assertEqual(c.schema_string(), "%s%s" % ("@2", _NAME))

        c.type_indicator = mdb.MDB_INT_16
        self.assertEqual(c.schema_string(), "%s%s" % ("#2", _NAME))
        c.type_indicator = mdb.MDB_INT_32
        self.assertEqual(c.schema_string(), "%s%s" % ("#4", _NAME))
        c.type_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ("@4", _NAME))
        c.type_indicator = mdb.MDB_INT_64
        self.assertEqual(c.schema_string(), "%s%s" % ("#8", _NAME))
        c.type_indicator = mdb.MDB_UINT_64
        self.assertEqual(c.schema_string(), "%s%s" % ("@8", _NAME))

        c.type_indicator = mdb.MDB_STR
        c.compression_indicator = 0
        self.assertEqual(c.schema_string(), "%s%s" % ("%2", _NAME))
        c.rtrie_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ("%4", _NAME))
        c.compression_indicator = 1
        self.assertEqual(c.schema_string(), "%s%s" % ("$", _NAME))
        c.compression_indicator = 2
        self.assertEqual(c.schema_string(), "%s%s" % ("*", _NAME))
Exemplo n.º 20
0
    def test_schema_string(self):
        c = Column(_NAME,
                   None,
                   index_indicator=False,
                   partition=False,
                   type_indicator=mdb.MDB_UINT_16,
                   compression_indicator=0,
                   rtrie_indicator=mdb.MDB_UINT_16)
        self.assertEqual(c.schema_string(), "%s%s" % ('@2', _NAME))

        c.type_indicator = mdb.MDB_INT_16
        self.assertEqual(c.schema_string(), "%s%s" % ('#2', _NAME))
        c.type_indicator = mdb.MDB_INT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('#4', _NAME))
        c.type_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('@4', _NAME))
        c.type_indicator = mdb.MDB_INT_64
        self.assertEqual(c.schema_string(), "%s%s" % ('#8', _NAME))
        c.type_indicator = mdb.MDB_UINT_64
        self.assertEqual(c.schema_string(), "%s%s" % ('@8', _NAME))

        c.type_indicator = mdb.MDB_STR
        c.compression_indicator = 0
        self.assertEqual(c.schema_string(), "%s%s" % ('%2', _NAME))
        c.rtrie_indicator = mdb.MDB_UINT_32
        self.assertEqual(c.schema_string(), "%s%s" % ('%4', _NAME))
        c.compression_indicator = 1
        self.assertEqual(c.schema_string(), "%s%s" % ('$', _NAME))
        c.compression_indicator = 2
        self.assertEqual(c.schema_string(), "%s%s" % ('*', _NAME))