def sql_table():

  #conn_url = "jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false"
  #conn_url = "jdbc:postgresql://mr-0xf2/ingestSQL"
  conn_url = os.getenv("SQLCONNURL")
  table = "citibike20k"
  
  #figure out username and password
  db_type = conn_url.split(":",3)[1]
  username = password = ""
  if db_type == "mysql":
    username = "******"
    password = "******"
  elif db_type == "postgresql":
    username = password = "******"
    
  citi_sql = h2o.import_sql_table(conn_url, table, username, password)
  citi_csv = h2o.import_file(pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))
    
  py_citi_sql = citi_sql.as_data_frame(False)[1:] #don't compare headers
  py_citi_csv = citi_csv.as_data_frame(False)[1:]
  
  assert first_1000_equal(py_citi_sql, py_citi_csv)

  citi_sql = h2o.import_sql_table(conn_url, table, username, password, ["starttime", "bikeid"])
  assert citi_sql.nrow == 2e4
  assert citi_sql.ncol == 2
  
  sql_select = h2o.import_sql_select(conn_url, "SELECT starttime FROM citibike20k", username, password)
  assert sql_select.nrow == 2e4
  assert sql_select.ncol == 1
def sql_table():

    #conn_url = "jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false"
    #conn_url = "jdbc:postgresql://mr-0xf2/ingestSQL"
    conn_url = os.getenv("SQLCONNURL")
    table = "citibike20k"

    #figure out username and password
    db_type = conn_url.split(":", 3)[1]
    username = password = ""
    if db_type == "mysql":
        username = "******"
        password = "******"
    elif db_type == "postgresql":
        username = password = "******"

    citi_sql = h2o.import_sql_table(conn_url, table, username, password)
    citi_csv = h2o.import_file(
        pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))

    py_citi_sql = citi_sql.as_data_frame(False)[1:]  #don't compare headers
    py_citi_csv = citi_csv.as_data_frame(False)[1:]

    assert first_1000_equal(py_citi_sql, py_citi_csv)

    citi_sql = h2o.import_sql_table(conn_url, table, username, password,
                                    ["starttime", "bikeid"])
    assert citi_sql.nrow == 2e4
    assert citi_sql.ncol == 2

    sql_select = h2o.import_sql_select(conn_url,
                                       "SELECT starttime FROM citibike20k",
                                       username, password)
    assert sql_select.nrow == 2e4
    assert sql_select.ncol == 1
Пример #3
0
def hive_save_frame():
    connection_url = "jdbc:hive2://localhost:10000/default"
    connection_url_nodb = "jdbc:hive2://localhost:10000/"
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    use_token = os.getenv('KRB_USE_TOKEN', 'false').lower() == 'true'
    if krb_enabled:
        if use_token:
            connection_url += ";auth=delegationToken"
            connection_url_nodb += ";auth=delegationToken"
        else:
            connection_url += ";principal=%s" % os.getenv('HIVE_PRINCIPAL', 'hive/[email protected]')
            connection_url_nodb += ";principal=%s" % os.getenv('HIVE_PRINCIPAL', 'hive/[email protected]')
        
    username = "******"
    password = ""

    print("import data")
    prostate_hex = h2o.import_file(path=pyunit_utils.locate("smalldata/prostate/prostate_cat.csv"))

    print("save as csv, managed, custom tmp")
    prostate_hex.save_to_hive(connection_url, "prostate_hex_py_csv", tmp_path="/tmp")
    prostate_hive = h2o.import_sql_table(connection_url, "prostate_hex_py_csv", username, password, fetch_mode="SINGLE")
    pyunit_utils.compare_frames_local(prostate_hex, prostate_hive, prob=1)

    print("save as parquet, managed, db in table name")
    prostate_hex.save_to_hive(connection_url_nodb, "default.prostate_hex_py_parquet", format="parquet")
    prostate_hive = h2o.import_sql_table(connection_url_nodb, "default.prostate_hex_py_parquet", username, password, fetch_mode="SINGLE")
    pyunit_utils.compare_frames_local(prostate_hex, prostate_hive, prob=1)

    print("save as parquet, external")
    prostate_hex.save_to_hive(connection_url, "prostate_hex_py_parquet_ext", format="parquet", table_path="/user/hive/ext/prostate_hex_py_parquet")
    prostate_hive = h2o.import_sql_table(connection_url, "prostate_hex_py_parquet_ext", username, password, fetch_mode="SINGLE")
    pyunit_utils.compare_frames_local(prostate_hex, prostate_hive, prob=1)
def sql_table():

  citi_sql = h2o.import_sql_table(os.environ['SQLCONNURL'], "citibike20k", "root", "0xdata")
  citi_csv = h2o.import_file(pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))
    
  py_citi_sql = citi_sql.as_data_frame(False)[1:] #don't compare headers
  py_citi_csv = citi_csv.as_data_frame(False)[1:]
  
  assert is_equal(py_citi_sql, py_citi_csv)

  citi_sql = h2o.import_sql_table(os.environ['SQLCONNURL'], "citibike20k", "root", "0xdata", ["starttime", "bikeid"])
  assert citi_sql.ncol == 2
def sql_table():

  citi_sql = h2o.import_sql_table("jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false", "citibike20k", "root", "0xdata")
  citi_csv = h2o.import_file(pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))
    
  py_citi_sql = citi_sql.as_data_frame(False)[1:] #don't compare headers
  py_citi_csv = citi_csv.as_data_frame(False)[1:]
  
  assert is_equal(py_citi_sql, py_citi_csv)

  citi_sql = h2o.import_sql_table("jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false", "citibike20k", "root", "0xdata", ["starttime", "bikeid"])
  assert citi_sql.ncol == 2
def sql_table():

    citi_sql = h2o.import_sql_table(os.environ['SQLCONNURL'], "citibike20k",
                                    "root", "0xdata")
    citi_csv = h2o.import_file(
        pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))

    py_citi_sql = citi_sql.as_data_frame(False)[1:]  #don't compare headers
    py_citi_csv = citi_csv.as_data_frame(False)[1:]

    assert is_equal(py_citi_sql, py_citi_csv)

    citi_sql = h2o.import_sql_table(os.environ['SQLCONNURL'], "citibike20k",
                                    "root", "0xdata", ["starttime", "bikeid"])
    assert citi_sql.ncol == 2
Пример #7
0
def hive_import():
    hdfs_name_node = pyunit_utils.hadoop_namenode()
    hive_host = os.getenv("HIVE_HOST")
    connection_url = "jdbc:hive2://{0}:10000/default".format(hive_host)
    krb_enabled = os.getenv('KRB_ENABLED', 'false').lower() == 'true'
    if krb_enabled:
        connection_url += ";auth=delegationToken"

    # read original
    file_url = "hdfs://{0}{1}".format(
        hdfs_name_node, "/user/jenkins/smalldata/chicago/chicagoCensus.csv")
    dataset_original = h2o.import_file(file_url)

    # read TABLE from Hive JDBC
    table_jdbc = h2o.import_sql_table(connection_url,
                                      "chicago",
                                      "",
                                      "",
                                      fetch_mode="SINGLE")
    table_jdbc = adapt_frame(table_jdbc, column_prefix="chicago.")
    pyunit_utils.compare_frames_local(dataset_original, table_jdbc, prob=1)

    # read TABLE from Hive FS
    table_direct = h2o.import_hive_table(connection_url, "chicago")
    table_direct = adapt_frame(table_direct)
    pyunit_utils.compare_frames_local(dataset_original, table_direct, prob=1)
def sql_table():

    citi_sql = h2o.import_sql_table(
        "jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false",
        "citibike20k", "root", "0xdata")
    citi_csv = h2o.import_file(
        pyunit_utils.locate("smalldata/demos/citibike_20k.csv"))

    py_citi_sql = citi_sql.as_data_frame(False)[1:]  #don't compare headers
    py_citi_csv = citi_csv.as_data_frame(False)[1:]

    assert is_equal(py_citi_sql, py_citi_csv)

    citi_sql = h2o.import_sql_table(
        "jdbc:mysql://172.16.2.178:3306/ingestSQL?&useSSL=false",
        "citibike20k", "root", "0xdata", ["starttime", "bikeid"])
    assert citi_sql.ncol == 2
def sql_table():

    pet = h2o.import_sql_table("mysql", "menagerie", "pet", "root", "ludi")
    print(pet)