Пример #1
0
 def test_streaming_map_only_job_generation(self):
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'mapreduce',
         'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore=metastore)
     _job_name = 'streaming_test_job_map_only'
     _expected_command = 'hadoop jar ' \
                         '{0}/resources/mapreduce/hadoop-streaming.jar ' \
                         '-D mapreduce.job.name={1} ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '-mapper smapper.py ' \
                         '-reducer NONE ' \
                         '-numReduceTasks 0 ' \
                         '-input /raw/20102014 ' \
                         '-output /core/20102014'\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_streaming_job(
         jar='{0}/resources/mapreduce/hadoop-streaming.jar'
         .format(os.path.dirname(os.path.realpath(__file__))),
         config=_config,
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)
     ).run()
Пример #2
0
 def test_should_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.set(section='section_a', key='new_key', value=None)
     self.assertEqual('value', _config.get('section_a', 'key'), "Can't find old item")
     self.assertTrue(_config.has('section_a', 'new_key'), "New Item was not added")
Пример #3
0
 def test_should_raise_exception_if_required_option_was_not_found(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertRaises(ConfigurationError, _config.require, 'section_a',
                       'item_a')
Пример #4
0
 def __init__(self, methodName='runTest'):
     super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName)
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), 'resources',
         'mapreduce', 'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     self._config = Configuration.load(metastore=metastore)
Пример #5
0
 def test_streaming_map_only_job_generation(self):
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)), 'resources',
         'mapreduce', 'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore=metastore)
     _job_name = 'streaming_test_job_map_only'
     _expected_command = 'hadoop jar ' \
                         '{0}/resources/mapreduce/hadoop-streaming.jar ' \
                         '-D mapreduce.job.name={1} ' \
                         '-D value.delimiter.char=, ' \
                         '-D partition.to.process=20142010 ' \
                         '-mapper smapper.py ' \
                         '-reducer NONE ' \
                         '-numReduceTasks 0 ' \
                         '-input /raw/20102014 ' \
                         '-output /core/20102014'\
         .format(os.path.dirname(os.path.realpath(__file__)),
                         _job_name)
     MapReduce.prepare_streaming_job(
         jar='{0}/resources/mapreduce/hadoop-streaming.jar'.format(
             os.path.dirname(os.path.realpath(__file__))),
         config=_config,
         name=_job_name,
         executor=self.assert_generated_command(_expected_command)).run()
Пример #6
0
    def test_apply_hdfs_snapshot(self):
        _config_file = os.path.join(os.path.dirname(__file__),
                                    'resources',
                                    'bootsrap',
                                    'bootstrap.ini')
        _raw_sales_dir = HDFS('/tmp/raw/sales')
        _raw_users_dir = HDFS('/tmp/raw/users')
        _raw_tmp_dir = HDFS('/tmp/raw/tmp')
        try:
            # run bootstrap script
            metastore = IniFileMetaStore(file=_config_file)
            _config = Configuration.load(metastore)
            apply_hdfs_snapshot(_config)

            # asserts
            # assert directories were created
            self.assertTrue(_raw_sales_dir.exists(), "Directory '/tmp/raw/sales' was not created")
            self.assertTrue(_raw_users_dir.exists(), "Directory '/tmp/raw/users' was not created")
            self.assertTrue(_raw_tmp_dir.exists(), "Directory '/tmp/raw/tmp' was not created")
            # assert acls were applied
            sales_dir_acls = _raw_sales_dir.get_acls()
            users_dir_acls = _raw_users_dir.get_acls()

            self.assertIsNotNone(sales_dir_acls, '/tmp/raw/sales : ACL were not applied')
            self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/sales : pii acl was not applied')
            self.assertTrue('group:sales:r--' in sales_dir_acls, '/tmp/raw/sales : salse acl was not applied')

            self.assertIsNotNone(users_dir_acls, '/tmp/raw/users : ACL were not applied')
            self.assertTrue('group:sys-pii:r-x' in sales_dir_acls, '/tmp/raw/users : pii acl was not applied')
        finally:
            _test_basedir = HDFS('/tmp/raw')
            _test_basedir.delete_directory()
            self.assertFalse(_test_basedir.exists(), "ERROR: clean up failed")
Пример #7
0
    def test_export_table_with_staging(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).to_rdbms().table(table="table_name_second").from_hdfs(
                export_dir="{0}/data_custom_directory".format(
                    BASE_DIR)).with_staging_table(staging_table="stag").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'SELECT * FROM table_name_second'")
            self.assertNotEqual(
                result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD,
                       MYSQL_SERVER), "'DELETE FROM table_name_second'")
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.
                format(USER, PASSWORD, MYSQL_SERVER), "'DELETE FROM stag'")
Пример #8
0
 def test_should_be_able_to_split_string_to_multiple_values(self):
     _values = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.set("section_b", 'list', ",".join(_values))
     self.assertListEqual(_values, _config.get_list("section_b", 'list', delimiter=','))
Пример #9
0
 def test_load_config_from_file(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     _section = 'section_a'
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertTrue(_config.has(section=_section, key='key'),
                     'Cannot find "key" option in test config')
     self.assertEqual('value', _config.require(_section, 'key'))
Пример #10
0
 def __init__(self, methodName='runTest'):
     super(TestMapReduceCommandGenerationFromIni, self).__init__(methodName)
     _config_file = os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'mapreduce',
         'mapreduce_streaming_job.ini')
     metastore = IniFileMetaStore(file=_config_file)
     self._config = Configuration.load(metastore=metastore)
Пример #11
0
 def test_load_config_from_file(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     _section = 'section_a'
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertTrue(_config.has(section=_section, key='key'),
                     'Cannot find "key" option in test config')
     self.assertEqual('value', _config.require(_section, 'key'))
Пример #12
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(job_name='pig test',
                                      config=Configuration.load(
                                          metastore=metastore,
                                          readonly=False, accepts_nulls=True),
                                      command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Пример #13
0
 def test_should_not_be_able_to_add_new_items(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=True, accepts_nulls=False)
     self.assertRaises(
         excClass=ConfigurationError,
         callableObj=_config.set,
         section='test',
         key='key',
         value='value')
Пример #14
0
 def test_should_be_able_to_add_multiple_values_for_a_single_key(self):
     _values = [1, 2, 3, 4]
     _increment = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=True)
     _config.update_list("section_b", 'list', *_values)
     self.assertListEqual(_values, _config.get_list("section_b", 'list'))
     _config.update_list("section_b", 'list', *_increment)
     self.assertListEqual(_values + _increment, _config.get_list("section_b", 'list'))
Пример #15
0
 def test_load_preconfigured_job(self):
     _command = 'pig -brief -optimizer_off SplitFilter -optimizer_off ColumnMapKeyPrune -e "ls /"'
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/pig/pig.ini'))
     pig = Pig.load_preconfigured_job(
         job_name='pig test',
         config=Configuration.load(metastore=metastore,
                                   readonly=False,
                                   accepts_nulls=True),
         command_executor=mock_executor(expected_command=_command))
     pig.without_split_filter().run()
Пример #16
0
 def test_should_be_able_to_split_string_to_multiple_values(self):
     _values = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.set("section_b", 'list', ",".join(_values))
     self.assertListEqual(
         _values, _config.get_list("section_b", 'list', delimiter=','))
Пример #17
0
 def test_import_with_hadoop_properties_from_ini_file(self):
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'sqoop',
         'sqoop.ini'))
     config = Configuration.load(metastore=metastore, readonly=False)
     self.assertEquals(
         SqoopImport.load_preconfigured_job(name="sqoo", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="sqoop_tests").
         to_hdfs().table(table="table_name").with_hadoop_properties(some_properties="10").build(),
         "-DA=12 -DB=13 -Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name")
Пример #18
0
 def test_import_direct(self):
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(os.path.realpath(__file__)),
         'resources',
         'sqoop',
         'sqoop.ini'))
     config = Configuration.load(metastore=metastore, readonly=False)
     self.assertEquals(
         SqoopImport.load_preconfigured_job(name="test", config=config).from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                            host="localhost", database="sqoop_tests").with_direct_mode(direct_split_size="1", name_2="12", names_3="1").table(
                                            table="table_name").to_hdfs().build(),
         '-DA=12 -DB=13 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --direct -- --name-2=12 --names-3=1')
Пример #19
0
 def test_load_config(self):
     _command = "hive -e \"test\" --define A=B --define C=D --hiveconf hello=world " \
                "--hivevar A=B --hivevar C=D --database hive"
     metastore = IniFileMetaStore(file=os.path.join(
         os.path.dirname(__file__), 'resources/hive/hive.ini'))
     hive = Hive.load_preconfigured_job(name='hive test',
                                        config=Configuration.load(
                                            metastore=metastore,
                                            readonly=False, accepts_nulls=True),
                                        executor=mock_executor(expected_command=_command)) \
         .with_hive_conf("hello", "world")
     hive.run()
Пример #20
0
 def test_should_not_be_able_to_add_new_items(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=True,
                                  accepts_nulls=False)
     self.assertRaises(excClass=ConfigurationError,
                       callableObj=_config.set,
                       section='test',
                       key='key',
                       value='value')
Пример #21
0
 def test_should_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.set(section='section_a', key='new_key', value=None)
     self.assertEqual('value', _config.get('section_a', 'key'),
                      "Can't find old item")
     self.assertTrue(_config.has('section_a', 'new_key'),
                     "New Item was not added")
Пример #22
0
 def test_load_config(self):
     _command = (
         'hive -e "test" --define A=B --define C=D --hiveconf hello=world '
         "--hivevar A=B --hivevar C=D --database hive"
     )
     metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources/hive/hive.ini"))
     hive = Hive.load_preconfigured_job(
         name="hive test",
         config=Configuration.load(metastore=metastore, readonly=False, accepts_nulls=True),
         executor=mock_executor(expected_command=_command),
     ).with_hive_conf("hello", "world")
     hive.run()
Пример #23
0
 def test_should_not_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=False)
     self.assertRaises(ConfigurationError,
                       _config.set,
                       section='section_a',
                       key='new_key',
                       value=None)
Пример #24
0
 def test_should_be_able_to_add_multiple_values_for_a_single_key(self):
     _values = [1, 2, 3, 4]
     _increment = ['one', 'two', 'three']
     _config_file = os.path.join(os.path.dirname(__file__), 'resources',
                                 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore,
                                  readonly=False,
                                  accepts_nulls=True)
     _config.update_list("section_b", 'list', *_values)
     self.assertListEqual(_values, _config.get_list("section_b", 'list'))
     _config.update_list("section_b", 'list', *_increment)
     self.assertListEqual(_values + _increment,
                          _config.get_list("section_b", 'list'))
Пример #25
0
 def test_apply_local_fs_snapshot(self):
     _config_file = os.path.join(os.path.dirname(__file__),
                                 'resources',
                                 'bootsrap',
                                 'bootstrap.ini')
     test_dir = LocalFS('/tmp/data_tmp')
     if test_dir.exists():
         test_dir.delete_directory()
     try:
         metastore = IniFileMetaStore(file=_config_file)
         _config = Configuration.load(metastore)
         apply_localfs_snapshot(_config)
         self.assertTrue(test_dir.exists(), "Folder was not created")
     finally:
         test_dir.delete_directory()
Пример #26
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Пример #27
0
    def test_export_table_with_call(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(os.path.dirname(__file__),
                                                                   'resources/sqoop/custom.ini'))
            cmd = SqoopExport.load_preconfigured_job(
                config=Configuration.load(metastore=metastore,
                                           readonly=False,
                                           accepts_nulls=True)).to_rdbms().from_hdfs(
                export_dir="{0}/data_custom_directory".format(BASE_DIR)).call(stored_procedure="p").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'SELECT * FROM table_name_second'")
            self.assertNotEqual(result.stdout.split(' ')[0], 'Empty', result.stdout)
        finally:
            shell.execute_shell_command(
                'mysql --user {0} --password={1} --host={2} sqoop_tests -e'.format(USER, PASSWORD, MYSQL_SERVER),
                "'DELETE FROM table_name_second'")
Пример #28
0
 def test_spark_submit_from_ini(self):
     _command = "spark-submit " \
                "--master local[10] " \
                "--class test.SparkApp " \
                "--name test_app " \
                "--jars lib001.jar,lib002.jar,lib003.jar " \
                "--files dim001.cache.txt,dim002.cache.txt " \
                "--properties-file spark.app.configs " \
                "--conf \"spark.app.name=test_app spark.executor.memory=512m " \
                "spark.serializer=org.apache.spark.serializer.KryoSerializer\" " \
                "application.jar " \
                "10 test"
     metastore=IniFileMetaStore(file=os.path.join(os.path.dirname(__file__), "resources", "spark", "spark.app.ini"))
     spark = SparkApplication.load_preconfigured_job(
         config=Configuration.load(metastore,
                                    readonly=False),
         name="test_spark_app",
         executor=mock_executor(expected_command=_command)).application_jar("application.jar")
     spark.run(10, "test")
Пример #29
0
    def test_import_table(self):
        try:
            metastore = IniFileMetaStore(file=os.path.join(
                os.path.dirname(__file__), 'resources/sqoop/custom.ini'))
            cmd = SqoopImport.load_preconfigured_job(config=Configuration.load(
                metastore=metastore, readonly=False, accepts_nulls=True
            )).from_rdbms().table(
                table="table_name", where="id>2",
                columns="id,last_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Пример #30
0
 def test_should_not_be_able_to_add_nones(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore, readonly=False, accepts_nulls=False)
     self.assertRaises(ConfigurationError, _config.set, section='section_a', key='new_key', value=None)
Пример #31
0
    def __str__(self):
        return '\n'.join("{!s} : {!r}".format(key, val)
                         for (key, val) in self.files.items())


def apply_hdfs_snapshot(config):
    """Creates initial directory structure on HDFS and applies ACL rules """
    _hdfs_snapshot = FsSnapshot.load_from_config(
        config, fs_section=CONFIG_HDFS_DIRS_KEY, acl_section=CONFIG_ACLS_KEY)
    _hdfs_snapshot.apply(
        mkdir_command=lambda path: HDFS(path).create_directory(recursive=True),
        apply_acls_command=lambda path, acls: HDFS(path).apply_acl(acls))


def apply_localfs_snapshot(config):
    """Creates initial directory structure on local file system"""
    _localfs_snapshot = FsSnapshot.load_from_config(
        config, fs_section=CONFIG_LOCAL_FS_DIRS_KEY)
    _localfs_snapshot.apply(
        mkdir_command=lambda path: LocalFS(path).create_directory())


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'Usage: python bootstrap.py <config file>'
        sys.exit(-1)
    _config_file = sys.argv[1]
    _configs = Configuration.load(_config_file)
    apply_hdfs_snapshot(_configs)
    apply_localfs_snapshot(_configs)
Пример #32
0
 def test_should_raise_exception_if_required_option_was_not_found(self):
     _config_file = os.path.join(os.path.dirname(__file__), 'resources', 'test.ini')
     metastore = IniFileMetaStore(file=_config_file)
     _config = Configuration.load(metastore)
     self.assertRaises(ConfigurationError, _config.require, 'section_a', 'item_a')
Пример #33
0
def apply_hdfs_snapshot(config):
    """Creates initial directory structure on HDFS and applies ACL rules """
    _hdfs_snapshot = FsSnapshot.load_from_config(config,
                                                 fs_section=CONFIG_HDFS_DIRS_KEY,
                                                 acl_section=CONFIG_ACLS_KEY)
    _hdfs_snapshot.apply(
        mkdir_command=lambda path: HDFS(path).create_directory(recursive=True),
        apply_acls_command=lambda path, acls: HDFS(path).apply_acl(acls)
    )


def apply_localfs_snapshot(config):
    """Creates initial directory structure on local file system"""
    _localfs_snapshot = FsSnapshot.load_from_config(
        config,
        fs_section=CONFIG_LOCAL_FS_DIRS_KEY
    )
    _localfs_snapshot.apply(
        mkdir_command=lambda path: LocalFS(path).create_directory()
    )


if __name__ == '__main__':
    if len(sys.argv) < 2:
        print 'Usage: python bootstrap.py <config file>'
        sys.exit(-1)
    _config_file = sys.argv[1]
    _configs = Configuration.load(_config_file)
    apply_hdfs_snapshot(_configs)
    apply_localfs_snapshot(_configs)