def test_wide_table(self, vector):
        NUM_COLS = vector.get_value('num_cols')
        NUM_ROWS = 10
        DB_NAME = QueryTestSectionReader.get_db_name(
            vector.get_value('table_format'))
        TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS)

        result = self.client.execute("select count(*) from %s " % TABLE_NAME)
        assert result.data == [str(NUM_ROWS)]

        expected_result = widetable.get_data(NUM_COLS,
                                             NUM_ROWS,
                                             quote_strings=True)
        result = self.client.execute("select * from %s" % TABLE_NAME)

        types = parse_column_types(result.schema)
        labels = parse_column_labels(result.schema)
        expected = QueryTestResult(expected_result,
                                   types,
                                   labels,
                                   order_matters=False)
        actual = QueryTestResult(parse_result_rows(result),
                                 types,
                                 labels,
                                 order_matters=False)
        assert expected == actual
Exemplo n.º 2
0
  def test_insert_wide_table(self, vector):
    table_format = vector.get_value('table_format')

    # Text can't handle as many columns as Parquet (codegen takes forever)
    num_cols = 1000 if table_format.file_format == 'text' else 2000

    db_name = QueryTestSectionReader.get_db_name(vector.get_value('table_format'))
    table_name = db_name + ".insert_widetable"
    if vector.get_value('exec_option')['disable_codegen']:
      table_name += "_codegen_disabled"
    self.client.execute("drop table if exists " + table_name)

    col_descs = widetable.get_columns(num_cols)
    create_stmt = "CREATE TABLE " + table_name + "(" + ','.join(col_descs) + ")"
    if vector.get_value('table_format').file_format == 'parquet':
      create_stmt += " stored as parquet"
    self.client.execute(create_stmt)

    # Get a single row of data
    col_vals = widetable.get_data(num_cols, 1, quote_strings=True)[0]
    insert_stmt = "INSERT INTO " + table_name + " VALUES(" + col_vals + ")"
    self.client.execute(insert_stmt)

    result = self.client.execute("select count(*) from " + table_name)
    assert result.data == ["1"]

    result = self.client.execute("select * from " + table_name)
    types = parse_column_types(result.schema)
    labels = parse_column_labels(result.schema)
    expected = QueryTestResult([col_vals], types, labels, order_matters=False)
    actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False)
    assert expected == actual
Exemplo n.º 3
0
  def test_wide_table(self, vector):
    if vector.get_value('table_format').file_format == 'kudu':
      pytest.xfail("IMPALA-3718: Extend Kudu functional test support")

    NUM_COLS = vector.get_value('num_cols')
    # Due to the way HBase handles duplicate row keys, we have different number of
    # rows in HBase tables compared to HDFS tables.
    NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2
    DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format'))
    TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS)

    result = self.client.execute("select count(*) from %s " % TABLE_NAME)
    assert result.data == [str(NUM_ROWS)]

    expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True)
    result = self.client.execute("select * from %s" % TABLE_NAME)

    if vector.get_value('table_format').file_format == 'hbase':
      assert len(result.data) == NUM_ROWS
      return

    types = parse_column_types(result.schema)
    labels = parse_column_labels(result.schema)
    expected = QueryTestResult(expected_result, types, labels, order_matters=False)
    actual = QueryTestResult(parse_result_rows(result), types, labels,
        order_matters=False)
    assert expected == actual
Exemplo n.º 4
0
    def test_many_grouping_columns(self, vector):
        """Test that an aggregate with many grouping columns works"""
        table_format = vector.get_value('table_format')
        exec_option = vector.get_value('exec_option')
        query = "select distinct * from widetable_1000_cols"

        # Ensure codegen is enabled.
        result = self.execute_query(query,
                                    exec_option,
                                    table_format=table_format)

        # All rows should be distinct.
        expected_result = widetable.get_data(1000, 10, quote_strings=True)

        types = parse_column_types(result.schema)
        labels = parse_column_labels(result.schema)
        expected = QueryTestResult(expected_result,
                                   types,
                                   labels,
                                   order_matters=False)
        actual = QueryTestResult(parse_result_rows(result),
                                 types,
                                 labels,
                                 order_matters=False)
        assert expected == actual
Exemplo n.º 5
0
  def test_insert_wide_table(self, vector, unique_database):
    table_format = vector.get_value('table_format')

    # Text can't handle as many columns as Parquet (codegen takes forever)
    num_cols = 1000 if table_format.file_format == 'text' else 2000

    table_name = unique_database + ".insert_widetable"
    if vector.get_value('exec_option')['disable_codegen']:
      table_name += "_codegen_disabled"

    col_descs = widetable.get_columns(num_cols)
    create_stmt = "CREATE TABLE " + table_name + "(" + ','.join(col_descs) + ")"
    if vector.get_value('table_format').file_format == 'parquet':
      create_stmt += " stored as parquet"
    self.client.execute(create_stmt)

    # Get a single row of data
    col_vals = widetable.get_data(num_cols, 1, quote_strings=True)[0]
    insert_stmt = "INSERT INTO " + table_name + " VALUES(" + col_vals + ")"
    self.client.execute(insert_stmt)

    result = self.client.execute("select count(*) from " + table_name)
    assert result.data == ["1"]

    result = self.client.execute("select * from " + table_name)
    types = result.column_types
    labels = result.column_labels
    expected = QueryTestResult([col_vals], types, labels, order_matters=False)
    actual = QueryTestResult(parse_result_rows(result), types, labels, order_matters=False)
    assert expected == actual
    def test_wide_table(self, vector):
        NUM_COLS = vector.get_value("num_cols")
        NUM_ROWS = 10
        DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value("table_format"))
        TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS)

        result = self.client.execute("select count(*) from %s " % TABLE_NAME)
        assert result.data == [str(NUM_ROWS)]

        expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True)
        result = self.client.execute("select * from %s" % TABLE_NAME)

        types = parse_column_types(result.schema)
        expected = QueryTestResult(expected_result, types, order_matters=False)
        actual = QueryTestResult(parse_result_rows(result), types, order_matters=False)
        assert expected == actual
  def test_many_grouping_columns(self, vector):
    """Test that an aggregate with many grouping columns works"""
    table_format = vector.get_value('table_format')
    exec_option = vector.get_value('exec_option')
    query = "select distinct * from widetable_1000_cols"

    # Ensure codegen is enabled.
    result = self.execute_query(query, exec_option, table_format=table_format)

    # All rows should be distinct.
    expected_result = widetable.get_data(1000, 10, quote_strings=True)

    types = parse_column_types(result.schema)
    labels = parse_column_labels(result.schema)
    expected = QueryTestResult(expected_result, types, labels, order_matters=False)
    actual = QueryTestResult(parse_result_rows(result), types, labels,
        order_matters=False)
    assert expected == actual
Exemplo n.º 8
0
  def test_wide_table(self, vector):
    NUM_COLS = vector.get_value('num_cols')
    # Due to the way HBase handles duplicate row keys, we have different number of
    # rows in HBase tables compared to HDFS tables.
    NUM_ROWS = 10 if vector.get_value('table_format').file_format != 'hbase' else 2
    DB_NAME = QueryTestSectionReader.get_db_name(vector.get_value('table_format'))
    TABLE_NAME = "%s.widetable_%s_cols" % (DB_NAME, NUM_COLS)

    result = self.client.execute("select count(*) from %s " % TABLE_NAME)
    assert result.data == [str(NUM_ROWS)]

    expected_result = widetable.get_data(NUM_COLS, NUM_ROWS, quote_strings=True)
    result = self.client.execute("select * from %s" % TABLE_NAME)

    if vector.get_value('table_format').file_format == 'hbase':
      assert len(result.data) == NUM_ROWS
      return

    types = parse_column_types(result.schema)
    labels = parse_column_labels(result.schema)
    expected = QueryTestResult(expected_result, types, labels, order_matters=False)
    actual = QueryTestResult(parse_result_rows(result), types, labels,
        order_matters=False)
    assert expected == actual