Exemplo n.º 1
0
def test_copy_files():
  cluster = pseudo_hdfs4.shared_cluster()

  try:
    c = make_logged_in_client()
    user = User.objects.get(username='******')

    prefix = '/tmp/test_copy_files'

    if cluster.fs.exists(prefix):
      cluster.fs.rmtree(prefix)

    # Jars in various locations
    deployment_dir = '%s/workspace' % prefix
    external_deployment_dir = '%s/deployment' % prefix
    jar_1 = '%s/udf1.jar' % prefix
    jar_2 = '%s/lib/udf2.jar' % prefix
    jar_3 = '%s/udf3.jar' % deployment_dir
    jar_4 = '%s/lib/udf4.jar' % deployment_dir # Doesn't move
    jar_5 = 'udf5.jar'
    jar_6 = 'lib/udf6.jar' # Doesn't move

    cluster.fs.mkdir(prefix)
    cluster.fs.create(jar_1)
    cluster.fs.create(jar_2)
    cluster.fs.create(jar_3)
    cluster.fs.create(jar_4)
    cluster.fs.create(deployment_dir + '/' + jar_5)
    cluster.fs.create(deployment_dir + '/' + jar_6)

    class MockJob():
      XML_FILE_NAME = 'workflow.xml'

      def __init__(self):
        self.deployment_dir = deployment_dir
        self.nodes = [
            Node({'id': '1', 'type': 'mapreduce', 'properties': {'jar_path': jar_1}}),
            Node({'id': '2', 'type': 'mapreduce', 'properties': {'jar_path': jar_2}}),
            Node({'id': '3', 'type': 'java', 'properties': {'jar_path': jar_3}}),
            Node({'id': '4', 'type': 'java', 'properties': {'jar_path': jar_4}}),

            # Workspace relative paths
            Node({'id': '5', 'type': 'java', 'properties': {'jar_path': jar_5}}),
            Node({'id': '6', 'type': 'java', 'properties': {'jar_path': jar_6}})
        ]

    submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt)

    submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})
    submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir)
    assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir)

    # All sources still there
    assert_true(cluster.fs.exists(jar_1))
    assert_true(cluster.fs.exists(jar_2))
    assert_true(cluster.fs.exists(jar_3))
    assert_true(cluster.fs.exists(jar_4))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
    assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

    # Lib
    deployment_dir = deployment_dir + '/lib'
    external_deployment_dir = external_deployment_dir + '/lib'

    list_dir_workspace = cluster.fs.listdir(deployment_dir)
    list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

    # All destinations there
    assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'), list_dir_workspace)

    assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf5.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf6.jar'), list_dir_deployement)

    stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
    stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
    stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
    stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
    stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
    stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

    submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
    assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
    assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
    assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
    assert_not_equal(stats_udf5['fileId'], cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
    assert_equal(stats_udf6['fileId'], cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

    # Test _create_file()
    submission._create_file(deployment_dir, 'test.txt', data='Test data')
    assert_true(cluster.fs.exists(deployment_dir + '/test.txt'), list_dir_workspace)

  finally:
    try:
      cluster.fs.rmtree(prefix)
    except:
      LOG.exception('failed to remove %s' % prefix)
Exemplo n.º 2
0
def test_copy_files():
    cluster = pseudo_hdfs4.shared_cluster()

    try:
        c = make_logged_in_client()
        user = User.objects.get(username='******')
        ensure_home_directory(cluster.fs, user)

        prefix = '/tmp/test_copy_files'

        if cluster.fs.exists(prefix):
            cluster.fs.rmtree(prefix)

        # Jars in various locations
        deployment_dir = '%s/workspace' % prefix
        external_deployment_dir = '%s/deployment' % prefix
        jar_1 = '%s/udf1.jar' % prefix
        jar_2 = '%s/lib/udf2.jar' % prefix
        jar_3 = '%s/udf3.jar' % deployment_dir
        jar_4 = '%s/lib/udf4.jar' % deployment_dir  # Doesn't move
        jar_5 = 'udf5.jar'
        jar_6 = 'lib/udf6.jar'  # Doesn't move

        cluster.fs.mkdir(prefix)
        cluster.fs.create(jar_1)
        cluster.fs.create(jar_2)
        cluster.fs.create(jar_3)
        cluster.fs.create(jar_4)
        cluster.fs.create(deployment_dir + '/' + jar_5)
        cluster.fs.create(deployment_dir + '/' + jar_6)

        class MockJob(object):
            XML_FILE_NAME = 'workflow.xml'

            def __init__(self):
                self.deployment_dir = deployment_dir
                self.nodes = [
                    Node({
                        'id': '1',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_1
                        }
                    }),
                    Node({
                        'id': '2',
                        'type': 'mapreduce',
                        'properties': {
                            'jar_path': jar_2
                        }
                    }),
                    Node({
                        'id': '3',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_3
                        }
                    }),
                    Node({
                        'id': '4',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_4
                        }
                    }),

                    # Workspace relative paths
                    Node({
                        'id': '5',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_5
                        }
                    }),
                    Node({
                        'id': '6',
                        'type': 'java',
                        'properties': {
                            'jar_path': jar_6
                        }
                    })
                ]

        submission = Submission(user,
                                job=MockJob(),
                                fs=cluster.fs,
                                jt=cluster.jt)

        submission._copy_files(deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})
        submission._copy_files(external_deployment_dir, "<xml>My XML</xml>",
                               {'prop1': 'val1'})

        assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'),
                    deployment_dir)
        assert_true(cluster.fs.exists(deployment_dir + '/job.properties'),
                    deployment_dir)

        # All sources still there
        assert_true(cluster.fs.exists(jar_1))
        assert_true(cluster.fs.exists(jar_2))
        assert_true(cluster.fs.exists(jar_3))
        assert_true(cluster.fs.exists(jar_4))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_5))
        assert_true(cluster.fs.exists(deployment_dir + '/' + jar_6))

        # Lib
        deployment_dir = deployment_dir + '/lib'
        external_deployment_dir = external_deployment_dir + '/lib'

        if USE_LIBPATH_FOR_JARS.get():
            assert_true(jar_1 in submission.properties['oozie.libpath'])
            assert_true(jar_2 in submission.properties['oozie.libpath'])
            assert_true(jar_3 in submission.properties['oozie.libpath'])
            assert_true(jar_4 in submission.properties['oozie.libpath'])
            print(deployment_dir + '/' + jar_5)
            assert_true((deployment_dir + '/' + jar_5)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
            assert_true((deployment_dir + '/' + jar_6)
                        in submission.properties['oozie.libpath'],
                        submission.properties['oozie.libpath'])
        else:
            list_dir_workspace = cluster.fs.listdir(deployment_dir)
            list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

            # All destinations there
            assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf5.jar'),
                        list_dir_workspace)
            assert_true(cluster.fs.exists(deployment_dir + '/udf6.jar'),
                        list_dir_workspace)

            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf1.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf2.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf3.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf4.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf5.jar'),
                list_dir_deployement)
            assert_true(
                cluster.fs.exists(external_deployment_dir + '/udf6.jar'),
                list_dir_deployement)

            stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
            stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
            stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
            stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')
            stats_udf5 = cluster.fs.stats(deployment_dir + '/udf5.jar')
            stats_udf6 = cluster.fs.stats(deployment_dir + '/udf6.jar')

            submission._copy_files('%s/workspace' % prefix,
                                   "<xml>My XML</xml>", {'prop1': 'val1'})

            assert_not_equal(
                stats_udf1['fileId'],
                cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
            assert_not_equal(
                stats_udf2['fileId'],
                cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
            assert_not_equal(
                stats_udf3['fileId'],
                cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
            assert_equal(
                stats_udf4['fileId'],
                cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])
            assert_not_equal(
                stats_udf5['fileId'],
                cluster.fs.stats(deployment_dir + '/udf5.jar')['fileId'])
            assert_equal(
                stats_udf6['fileId'],
                cluster.fs.stats(deployment_dir + '/udf6.jar')['fileId'])

        # Test _create_file()
        submission._create_file(deployment_dir, 'test.txt', data='Test data')
        assert_true(cluster.fs.exists(deployment_dir + '/test.txt'),
                    list_dir_workspace)

    finally:
        try:
            cluster.fs.rmtree(prefix)
        except:
            LOG.exception('failed to remove %s' % prefix)
Exemplo n.º 3
0
def test_copy_files():
  cluster = pseudo_hdfs4.shared_cluster()

  try:
    c = make_logged_in_client()
    user = User.objects.get(username='******')

    prefix = '/tmp/test_copy_files'

    if cluster.fs.exists(prefix):
      cluster.fs.rmtree(prefix)

    # Jars in various locations
    deployment_dir = '%s/workspace' % prefix
    external_deployment_dir = '%s/deployment' % prefix
    jar_1 = '%s/udf1.jar' % prefix
    jar_2 = '%s/lib/udf2.jar' % prefix
    jar_3 = '%s/udf3.jar' % deployment_dir
    jar_4 = '%s/lib/udf4.jar' % deployment_dir # Never move

    cluster.fs.mkdir(prefix)
    cluster.fs.create(jar_1)
    cluster.fs.create(jar_2)
    cluster.fs.create(jar_3)
    cluster.fs.create(jar_4)

    class MockNode():
      def __init__(self, jar_path):
        self.jar_path = jar_path

    class MockJob():
      XML_FILE_NAME = 'workflow.xml'

      def __init__(self):
        self.node_list = [
            MockNode(jar_1),
            MockNode(jar_2),
            MockNode(jar_3),
            MockNode(jar_4),
        ]

    submission = Submission(user, job=MockJob(), fs=cluster.fs, jt=cluster.jt)

    submission._copy_files(deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})
    submission._copy_files(external_deployment_dir, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_true(cluster.fs.exists(deployment_dir + '/workflow.xml'), deployment_dir)
    assert_true(cluster.fs.exists(deployment_dir + '/job.properties'), deployment_dir)

    # All sources still there
    assert_true(cluster.fs.exists(jar_1))
    assert_true(cluster.fs.exists(jar_2))
    assert_true(cluster.fs.exists(jar_3))
    assert_true(cluster.fs.exists(jar_4))

    deployment_dir = deployment_dir + '/lib'
    external_deployment_dir = external_deployment_dir + '/lib'

    list_dir_workspace = cluster.fs.listdir(deployment_dir)
    list_dir_deployement = cluster.fs.listdir(external_deployment_dir)

    # All destinations there
    assert_true(cluster.fs.exists(deployment_dir + '/udf1.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf2.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf3.jar'), list_dir_workspace)
    assert_true(cluster.fs.exists(deployment_dir + '/udf4.jar'), list_dir_workspace)

    assert_true(cluster.fs.exists(external_deployment_dir + '/udf1.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf2.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf3.jar'), list_dir_deployement)
    assert_true(cluster.fs.exists(external_deployment_dir + '/udf4.jar'), list_dir_deployement)

    stats_udf1 = cluster.fs.stats(deployment_dir + '/udf1.jar')
    stats_udf2 = cluster.fs.stats(deployment_dir + '/udf2.jar')
    stats_udf3 = cluster.fs.stats(deployment_dir + '/udf3.jar')
    stats_udf4 = cluster.fs.stats(deployment_dir + '/udf4.jar')

    submission._copy_files('%s/workspace' % prefix, "<xml>My XML</xml>", {'prop1': 'val1'})

    assert_not_equal(stats_udf1['fileId'], cluster.fs.stats(deployment_dir + '/udf1.jar')['fileId'])
    assert_not_equal(stats_udf2['fileId'], cluster.fs.stats(deployment_dir + '/udf2.jar')['fileId'])
    assert_not_equal(stats_udf3['fileId'], cluster.fs.stats(deployment_dir + '/udf3.jar')['fileId'])
    assert_equal(stats_udf4['fileId'], cluster.fs.stats(deployment_dir + '/udf4.jar')['fileId'])

  finally:
    try:
      cluster.fs.rmtree(prefix)
    except:
      pass