def hdfs_orc_parser(): # Check if we are running inside the H2O network by seeing if we can touch # the namenode. hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible( ) if hadoop_namenode_is_accessible: hdfs_name_node = pyunit_utils.hadoop_namenode() hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1),\ "Expect warnings from orc parser for file "+url_orc+"!" hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.emptyFile.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1), \ "Expect warnings from orc parser for file "+url_orc+"!" hdfs_orc_file = "/datasets/orc_parser/orc/nulls-at-end-snappy.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) assert pyunit_utils.expect_warnings(url_orc, "UserWarning:", "Skipping field:", 1), \ "Expect warnings from orc parser for file "+url_orc+"!" else: raise EnvironmentError
def hdfs_orc_parser(): # Check if we are running inside the H2O network by seeing if we can touch # the namenode. hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible( ) if hadoop_namenode_is_accessible: hdfs_name_node = pyunit_utils.hadoop_namenode() if pyunit_utils.cannaryHDFSTest( hdfs_name_node, "/datasets/orc_parser/orc/orc_split_elim.orc"): print("Your hive-exec version is too old. Orc parser test {0} is " "skipped.".format("pyunit_INTERNAL_HDFS_baddata_orc.py")) pass else: hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings( url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Skipping field:", in_hdfs=True, number_of_times=1 ), "Expect warnings from orc parser for file " + url_orc + "!" hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.emptyFile.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings( url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Skipping field:", in_hdfs=True, number_of_times=1 ), "Expect warnings from orc parser for file " + url_orc + "!" hdfs_orc_file = "/datasets/orc_parser/orc/nulls-at-end-snappy.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings( url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Long.MIN_VALUE:", in_hdfs=True, number_of_times=1 ), "Expect warnings from orc parser for file " + url_orc + "!" else: raise EnvironmentError
def orc_parser_baddata(): """ This test is used to verify if the orc parser warnings from backend is passed down to python client when parsing orc files with unsupported data types or bad data value. :return: None or a fit if no warning is captured """ fileWithPath = "smalldata/parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc" assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Skipping field:", 1), \ "Expect warnings from orc parser for file "+fileWithPath+"!" fileWithPath = "smalldata/parser/orc/TestOrcFile.emptyFile.orc" assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Skipping field:", 4), \ "Expect warnings from orc parser for file "+fileWithPath+"!" fileWithPath = "smalldata/parser/orc/nulls-at-end-snappy.orc" assert pyunit_utils.expect_warnings(fileWithPath, "UserWarning:", "Long.MIN_VALUE:", 1), \ "Expect warnings from orc parser for file "+fileWithPath+"!"
def hdfs_orc_parser(): # Check if we are running inside the H2O network by seeing if we can touch # the namenode. hadoop_namenode_is_accessible = pyunit_utils.hadoop_namenode_is_accessible() if hadoop_namenode_is_accessible: hdfs_name_node = pyunit_utils.hadoop_namenode() if pyunit_utils.cannaryHDFSTest(hdfs_name_node, "/datasets/orc_parser/orc/orc_split_elim.orc"): print("Your hive-exec version is too old. Orc parser test {0} is " "skipped.".format("pyunit_INTERNAL_HDFS_baddata_orc.py")) pass else: hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.testStringAndBinaryStatistics.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings(url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Skipping field:", in_hdfs=True, number_of_times=1), "Expect warnings from orc parser for file "+url_orc+"!" hdfs_orc_file = "/datasets/orc_parser/orc/TestOrcFile.emptyFile.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings(url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Skipping field:", in_hdfs=True, number_of_times=1), "Expect warnings from orc parser for file "+url_orc+"!" hdfs_orc_file = "/datasets/orc_parser/orc/nulls-at-end-snappy.orc" url_orc = "hdfs://{0}{1}".format(hdfs_name_node, hdfs_orc_file) print("Parsing the orc file {0}".format(url_orc)) assert pyunit_utils.expect_warnings(url_orc, warn_phrase="UserWarning:", warn_string_of_interest="Long.MIN_VALUE:", in_hdfs=True, number_of_times=1), "Expect warnings from orc parser for file "+url_orc+"!" else: raise EnvironmentError