Exemplo n.º 1
0
    def test_import_with_incremental(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(
                        BASE_DIR)).with_incremental(incremental="append",
                                                    last_value="5",
                                                    check_column="id").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 2
0
 def test_import_with_incremental_attributes(self):
     self.assertEquals(
         Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="sqoop_tests").
         to_hdfs().table(table="table_name").with_incremental(incremental="append", last_value="12",
                                                              check_column="id").build(),
         "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile --incremental append --check-column id --last-value '12'")
Exemplo n.º 3
0
 def test_import_query(self):
     self.assertEquals(Sqoop.import_data().from_rdbms(rdbms="mysql", username="******",
                                                      password_file="/user/cloudera/sqoop.password",
                                                      host="localhost", database="sqoop_tests").
                       query(query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id",
                             id="2").to_hdfs(target_dir="/custom_directory").build(),
                       "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/sqoop.password --query \"SELECT * FROM table_name WHERE \$CONDITIONS AND id>2\" --split-by id --target-dir /custom_directory --as-textfile")
Exemplo n.º 4
0
    def test_import_to_hbase(self):
        cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                             password_file="{0}/rdbms.password".format(BASE_DIR),
                                             database="sqoop_tests").table(table="table_name").to_hbase(
            hbase_table="custom_table", hbase_create_table=True, hbase_row_key="id", column_family="f1").run()

        self.assertEquals(cmd.status, 0, cmd.stderr)
Exemplo n.º 5
0
 def test_import_to_hbase(self):
     self.assertEquals(Sqoop.import_data().from_rdbms(rdbms="mysql", username="******",
                                                      password_file="/user/cloudera/rdbms.password",
                                                      host="localhost", database="sqoop_tests"). \
                       table(table="table_name").to_hbase(hbase_table="custom_table",
                                                          hbase_create_table="family", hbase_row_key="id",
                                                          column_family="f1").build(),
                       "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/rdbms.password --table table_name --as-textfile --hbase-table custom_table --hbase-create-table --hbase-row-key id --column-family f1")
Exemplo n.º 6
0
    def test_import_with_incremental(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_incremental(
                incremental="append", last_value="5", check_column="id").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 7
0
def load_data_from_rdbms_to_hdfs(context):
    # configure Sqoop import job
    _sqoop_import_job_ = Sqoop.import_data().from_rdbms(
        host="127.0.0.1",
        rdbms="mysql",
        database="test_example",
        username="******",
        password_file="{0}/rdbms.password".format(BASE_DIR)
    ).table(
        table="first_table_name"
    ).to_hdfs(target_dir="{0}/data_from_import".format(BASE_DIR))
    _sqoop_import_job_.run()
Exemplo n.º 8
0
    def test_import_query(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").query(
                query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'", split_by="id", id="2").to_hdfs(
                target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 9
0
    def test_import_with_connection_manager(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_attr(
                connection_manager="org.apache.sqoop.manager.MySQLManager").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 10
0
    def test_import_to_hbase(self):
        cmd = Sqoop.import_data().from_rdbms(
            host=MYSQL_SERVER,
            rdbms="mysql",
            username="******",
            password_file="{0}/rdbms.password".format(BASE_DIR),
            database="sqoop_tests").table(table="table_name").to_hbase(
                hbase_table="custom_table",
                hbase_create_table=True,
                hbase_row_key="id",
                column_family="f1").run()

        self.assertEquals(cmd.status, 0, cmd.stderr)
Exemplo n.º 11
0
    def test_import_with_enclosing(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).with_input_parsing(
                escaped_by="\\").with_output_parsing(escaped_by="\\", mysql_delimiters=True).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs', '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 12
0
    def test_import_to_avrodatafile(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(host=MYSQL_SERVER, rdbms="mysql", username="******",
                                                 password_file="{0}/rdbms.password".format(BASE_DIR),
                                                 database="sqoop_tests").table(
                table="table_name").to_hdfs(target_dir="{0}/custom_directory".format(BASE_DIR)).use_file_format(
                file_format="--as-avrodatafile").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command('hadoop fs',
                                                 '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR))
            self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command('hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 13
0
    def test_import_to_avrodatafile(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).use_file_format(file_format="--as-avrodatafile").run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*.avro'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 14
0
    def test_import_with_enclosing(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).with_input_parsing(escaped_by="\\").with_output_parsing(
                    escaped_by="\\", mysql_delimiters=True).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 15
0
    def test_import_to_hive(self):
        _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name'))
        try:
            if _path.exists():
                _path.delete(recursive=_path.is_directory())
                # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name')
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(
                    table="table_name").to_hive().run()

            # self.assertEquals(cmd.status, 0, cmd.stderr)
            # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*')
            # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:

            shell.execute_shell_command(
                'hive', "-e 'DROP TABLE IF EXISTS table_name'")
Exemplo n.º 16
0
    def test_import_to_hive(self):
        _path = HDFS(os.path.join('/user', getpass.getuser(), 'table_name'))
        try:
            if _path.exists():
                _path.delete(recursive=_path.is_directory())
                # shell.execute_shell_command('hadoop fs', '-rm -r /user/', getpass.getuser(), '/table_name')
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests"
            ).table(
                table="table_name"
            ).to_hive().run()

            # self.assertEquals(cmd.status, 0, cmd.stderr)
            # result = shell.execute_shell_command('hadoop fs', '-du -s /user/hive/warehouse/table_name/part-m-*')
            # self.assertNotEqual(result.stdout.split(' ')[0], '0', result.stdout)
        finally:

            shell.execute_shell_command('hive', "-e 'DROP TABLE IF EXISTS table_name'")
Exemplo n.º 17
0
    def test_import_with_connection_manager(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests").table(table="table_name").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)
                ).with_attr(
                    connection_manager="org.apache.sqoop.manager.MySQLManager"
                ).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 18
0
    def test_import_query(self):
        try:
            cmd = Sqoop.import_data().from_rdbms(
                host=MYSQL_SERVER,
                rdbms="mysql",
                username="******",
                password_file="{0}/rdbms.password".format(BASE_DIR),
                database="sqoop_tests"
            ).query(
                query="'SELECT * FROM table_name WHERE $CONDITIONS AND id>$id'",
                split_by="id",
                id="2").to_hdfs(
                    target_dir="{0}/custom_directory".format(BASE_DIR)).run()

            self.assertEquals(cmd.status, 0, cmd.stderr)
            result = shell.execute_shell_command(
                'hadoop fs',
                '-du -s {0}/custom_directory/part-m-*'.format(BASE_DIR))
            self.assertNotEqual(
                result.stdout.split(' ')[0], '0', result.stdout)
        finally:
            shell.execute_shell_command(
                'hadoop fs', '-rm -r {0}/custom_directory'.format(BASE_DIR))
Exemplo n.º 19
0
 def test_import_without_database(self):
     with self.assertRaises(Exception):
         Sqoop.import_data().from_rdbms(rdbms="mysql", password_file="/user/cloudera/password", host="localhost",
                                        database="db").to_hdfs().table(table="table_name").build()
Exemplo n.º 20
0
 def test_import_with_encoding(self):
     self.assertEquals(
         Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="sqoop_tests").
         to_hdfs().table(table="table_name").with_encoding(null_string="null", null_non_string="false").build(),
         "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile --null-string 'null' --null-non-string 'false'")
Exemplo n.º 21
0
 def test_import_to_hive(self):
     self.assertEquals(Sqoop.import_data().from_rdbms(rdbms="mysql", username="******",
                                                      password_file="/user/cloudera/rdbms.password",
                                                      host="localhost", database="sqoop_tests"). \
                       table(table="table_name").to_hive().build(),
                       "--connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/rdbms.password --table table_name --as-textfile --hive-import")
Exemplo n.º 22
0
 def test_import_without_credential(self):
     with self.assertRaises(Exception):
         Sqoop.import_data().from_rdbms(rdbms="mysql", host="localhost", database="db").to_hdfs().table(
             table="table_name").build()
Exemplo n.º 23
0
 def test_import_without_incremental_attributes(self):
     with self.assertRaises(Exception):
         Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="db").to_hdfs().table(table="table_name"). \
             with_incremental(incremental="append", last_value="12").build()
Exemplo n.º 24
0
 def test_import_with_hadoop_properties(self):
     self.assertEquals(
         Sqoop.import_data().from_rdbms(rdbms="mysql", username="******", password_file="/user/cloudera/password",
                                        host="localhost", database="sqoop_tests").
         to_hdfs().table(table="table_name").with_hadoop_properties(some_properties="10").build(),
         "-Dsome.properties=10 --connect jdbc:mysql://localhost/sqoop_tests --username root --password-file /user/cloudera/password --table table_name --as-textfile")