Exemplo n.º 1
0
def _create_job_binary(id, type):
    binary = mock.Mock()
    binary.id = id
    binary.url = "internal-db://42"
    if edp.compare_job_type(type, edp.JOB_TYPE_PIG):
        binary.name = "script.pig"
    elif edp.compare_job_type(type, edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_JAVA):
        binary.name = "main.jar"
    else:
        binary.name = "script.q"
    return binary
Exemplo n.º 2
0
def _create_job_binary(id, type):
    binary = mock.Mock()
    binary.id = id
    binary.url = "internal-db://42"
    if edp.compare_job_type(type, 'Pig'):
        binary.name = "script.pig"
    elif edp.compare_job_type(type, 'MapReduce', 'Java'):
        binary.name = "main.jar"
    else:
        binary.name = "script.q"
    return binary
Exemplo n.º 3
0
def create_job_binary(id, type):
    binary = mock.Mock()
    binary.id = id
    binary.url = "internal-db://42"
    if edp.compare_job_type(type, edp.JOB_TYPE_PIG):
        binary.name = "script.pig"
    elif edp.compare_job_type(type, edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_JAVA):
        binary.name = "main.jar"
    else:
        binary.name = "script.q"
    return binary
Exemplo n.º 4
0
 def test_compare_job_type(self):
     self.assertTrue(edp.compare_job_type("Java",
                                          "Java", "MapReduce",
                                          strict=True))
     self.assertFalse(edp.compare_job_type(MAPRED_STREAMING,
                                           "Java", "MapReduce",
                                           strict=True))
     self.assertTrue(edp.compare_job_type(MAPRED_STREAMING,
                                          "Java", "MapReduce"))
     self.assertFalse(edp.compare_job_type("MapReduce",
                                           "Java", MAPRED_STREAMING))
Exemplo n.º 5
0
 def get_possible_job_config(job_type):
     if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
         return {'job_config': ch_helper.get_possible_hive_config_from(
                 'plugins/vanilla/v2_6_0/resources/hive-default.xml')}
     if edp.compare_job_type(job_type,
                             edp.JOB_TYPE_MAPREDUCE,
                             edp.JOB_TYPE_MAPREDUCE_STREAMING):
         return {'job_config': ch_helper.get_possible_mapreduce_config_from(
                 'plugins/vanilla/v2_6_0/resources/mapred-default.xml')}
     if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
         return {'job_config': ch_helper.get_possible_pig_config_from(
                 'plugins/vanilla/v2_6_0/resources/mapred-default.xml')}
     return edp_engine.EdpOozieEngine.get_possible_job_config(job_type)
Exemplo n.º 6
0
 def get_possible_job_config(job_type):
     if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
         return {'job_config': ch_helper.get_possible_hive_config_from(
                 'plugins/cdh/v5_4_0/resources/hive-site.xml')}
     if edp.compare_job_type(job_type,
                             edp.JOB_TYPE_MAPREDUCE,
                             edp.JOB_TYPE_MAPREDUCE_STREAMING):
         return {'job_config': ch_helper.get_possible_mapreduce_config_from(
                 'plugins/cdh/v5_4_0/resources/mapred-site.xml')}
     if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
         return {'job_config': ch_helper.get_possible_pig_config_from(
                 'plugins/cdh/v5_4_0/resources/mapred-site.xml')}
     return edp_engine.OozieJobEngine.get_possible_job_config(job_type)
Exemplo n.º 7
0
 def get_possible_job_config(job_type):
     if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
         return {'job_config': ch_helper.get_possible_hive_config_from(
                 'plugins/cdh/v5_4_0/resources/hive-site.xml')}
     if edp.compare_job_type(job_type,
                             edp.JOB_TYPE_MAPREDUCE,
                             edp.JOB_TYPE_MAPREDUCE_STREAMING):
         return {'job_config': ch_helper.get_possible_mapreduce_config_from(
                 'plugins/cdh/v5_4_0/resources/mapred-site.xml')}
     if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
         return {'job_config': ch_helper.get_possible_pig_config_from(
                 'plugins/cdh/v5_4_0/resources/mapred-site.xml')}
     return edp_engine.OozieJobEngine.get_possible_job_config(job_type)
Exemplo n.º 8
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None

    if edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_SHELL):
        return {'job_config': {'configs': [], 'params': {}, 'args': []}}

    if edp.compare_job_type(job_type,
                            edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_PIG):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE):
            cfg += get_possible_mapreduce_configs()
    elif edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/hive-default.xml')

    config = {'configs': cfg}
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE):
        config.update({'params': {}})
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
        config.update({'args': []})
    return {'job_config': config}
Exemplo n.º 9
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None

    if edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_SHELL):
        return {'job_config': {'configs': [], 'params': {}, 'args': []}}

    if edp.compare_job_type(job_type,
                            edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_PIG):
        cfg = xmlutils.load_hadoop_xml_defaults(
            'service/edp/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE):
            cfg += get_possible_mapreduce_configs()
    elif edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
        cfg = xmlutils.load_hadoop_xml_defaults(
            'service/edp/resources/hive-default.xml')

    config = {'configs': cfg}
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE):
        config.update({'params': {}})
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
        config.update({'args': []})
    return {'job_config': config}
Exemplo n.º 10
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None

    if edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_SHELL):
        return {'job_config': {'configs': [], 'params': {}, 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE,
                            edp.JOB_TYPE_PIG):
        cfg = xmlutils.load_hadoop_xml_defaults(
            'service/edp/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE):
            cfg += get_possible_mapreduce_configs()
    elif edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
        cfg = xmlutils.load_hadoop_xml_defaults(
            'service/edp/resources/hive-default.xml')

    config = {'configs': cfg}
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE):
        config.update({'params': {}})
    if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
        config.update({'args': []})
    return {'job_config': config}
Exemplo n.º 11
0
 def get_possible_job_config(job_type):
     if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
         return {'job_config': ch_helper.get_possible_hive_config_from(
                 'plugins/hdp/versions/version_1_3_2/resources/'
                 'ambari-config-resource.json')}
     if edp.compare_job_type(job_type,
                             edp.JOB_TYPE_MAPREDUCE,
                             edp.JOB_TYPE_MAPREDUCE_STREAMING):
         return {'job_config': ch_helper.get_possible_mapreduce_config_from(
                 'plugins/hdp/versions/version_1_3_2/resources/'
                 'ambari-config-resource.json')}
     if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
         return {'job_config': ch_helper.get_possible_pig_config_from(
                 'plugins/hdp/versions/version_1_3_2/resources/'
                 'ambari-config-resource.json')}
     return edp_engine.EdpOozieEngine.get_possible_job_config(job_type)
Exemplo n.º 12
0
def get_data_sources(job_execution, job):
    if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
        return None, None

    ctx = context.ctx()
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    return input_source, output_source
Exemplo n.º 13
0
def _create_job_exec(job_id, type, configs=None):
    j_exec = mock.Mock()
    j_exec.job_id = job_id
    j_exec.job_configs = configs
    if edp.compare_job_type(type, edp.JOB_TYPE_JAVA):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
Exemplo n.º 14
0
def _create_job_exec(job_id, type, configs=None):
    j_exec = mock.Mock()
    j_exec.job_id = job_id
    j_exec.job_configs = configs
    if edp.compare_job_type(type, edp.JOB_TYPE_JAVA):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
Exemplo n.º 15
0
 def test_compare_job_type(self):
     self.assertTrue(
         edp.compare_job_type(edp.JOB_TYPE_JAVA,
                              edp.JOB_TYPE_JAVA,
                              edp.JOB_TYPE_MAPREDUCE,
                              strict=True))
     self.assertFalse(
         edp.compare_job_type(edp.JOB_TYPE_MAPREDUCE_STREAMING,
                              edp.JOB_TYPE_JAVA,
                              edp.JOB_TYPE_MAPREDUCE,
                              strict=True))
     self.assertTrue(
         edp.compare_job_type(edp.JOB_TYPE_MAPREDUCE_STREAMING,
                              edp.JOB_TYPE_JAVA, edp.JOB_TYPE_MAPREDUCE))
     self.assertFalse(
         edp.compare_job_type(edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_JAVA,
                              edp.JOB_TYPE_MAPREDUCE_STREAMING))
Exemplo n.º 16
0
def get_data_sources(job_execution, job):
    if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA, edp.JOB_TYPE_SPARK):
        return None, None

    ctx = context.ctx()
    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    return input_source, output_source
Exemplo n.º 17
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None

    if edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_SHELL):
        return {'job_config': {'configs': [], 'params': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE,
                            edp.JOB_TYPE_PIG):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE):
            cfg += xmlutils.load_hadoop_xml_defaults(
                'service/edp/resources/mapred-job-config.xml')
    elif edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/hive-default.xml')

    # TODO(tmckay): args should be a list when bug #269968
    # is fixed on the UI side
    config = {'configs': cfg, "args": {}}
    if not edp.compare_job_type(edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_JAVA):
        config.update({'params': {}})
    return {'job_config': config}
Exemplo n.º 18
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *edp.JOB_TYPES_ALL):
        return None

    if edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, edp.JOB_TYPE_SHELL):
        return {'job_config': {'configs': [], 'params': [], 'args': []}}

    if edp.compare_job_type(job_type,
                            edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_PIG):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE):
            cfg += xmlutils.load_hadoop_xml_defaults(
                'service/edp/resources/mapred-job-config.xml')
    elif edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
        # TODO(nmakhotkin): Here we need return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/hive-default.xml')

    # TODO(tmckay): args should be a list when bug #269968
    # is fixed on the UI side
    config = {'configs': cfg, "args": {}}
    if not edp.compare_job_type(edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_JAVA):
        config.update({'params': {}})
    return {'job_config': config}
Exemplo n.º 19
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    if not edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
        input_source = conductor.data_source_get(ctx,  job_execution.input_id)
        output_source = conductor.data_source_get(ctx, job_execution.output_id)
    else:
        input_source = None
        output_source = None
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    hdfs_user = _get_hdfs_user(cluster)
    oozie_server = _get_oozie_server(cluster)
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    wf_xml = creator.get_workflow_xml(cluster, job_execution,
                                      input_source, output_source)

    path_to_workflow = upload_workflow_file(oozie_server,
                                            wf_dir, wf_xml, hdfs_user)

    rm_path = _get_resource_manager_path(cluster)
    nn_path = cluster['info']['HDFS']['NameNode']

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/",
                           _get_oozie_server(cluster))
    job_parameters = {"jobTracker": rm_path,
                      "nameNode": nn_path,
                      "user.name": hdfs_user,
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
                                  job_execution)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                    oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})
    client.run_job(job_execution, oozie_job_id)

    return job_execution
Exemplo n.º 20
0
def run_job(job_execution):
    ctx = context.ctx()

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return job_execution

    job = conductor.job_get(ctx, job_execution.job_id)
    if not edp.compare_job_type(job.type, 'Java'):
        input_source = conductor.data_source_get(ctx,  job_execution.input_id)
        output_source = conductor.data_source_get(ctx, job_execution.output_id)
    else:
        input_source = None
        output_source = None
    #TODO(nprivalova): should be removed after all features implemented
    validate(input_source, output_source, job)

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    hdfs_user = _get_hdfs_user(cluster)
    oozie_server = _get_oozie_server(cluster)
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    wf_xml = creator.get_workflow_xml(cluster, job_execution,
                                      input_source, output_source)

    path_to_workflow = upload_workflow_file(oozie_server,
                                            wf_dir, wf_xml, hdfs_user)

    rm_path = _get_resource_manager_path(cluster)
    nn_path = cluster['info']['HDFS']['NameNode']

    client = o.OozieClient(cluster['info']['JobFlow']['Oozie'] + "/oozie/",
                           _get_oozie_server(cluster))
    job_parameters = {"jobTracker": rm_path,
                      "nameNode": nn_path,
                      "user.name": hdfs_user,
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
                                  job_execution)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                    oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})
    client.run_job(job_execution, oozie_job_id)

    return job_execution
Exemplo n.º 21
0
 def test_compare_job_type(self):
     self.assertTrue(edp.compare_job_type(
         edp.JOB_TYPE_JAVA,
         edp.JOB_TYPE_JAVA,
         edp.JOB_TYPE_MAPREDUCE,
         strict=True))
     self.assertFalse(edp.compare_job_type(
         edp.JOB_TYPE_MAPREDUCE_STREAMING,
         edp.JOB_TYPE_JAVA,
         edp.JOB_TYPE_MAPREDUCE,
         strict=True))
     self.assertTrue(edp.compare_job_type(
         edp.JOB_TYPE_MAPREDUCE_STREAMING,
         edp.JOB_TYPE_JAVA,
         edp.JOB_TYPE_MAPREDUCE))
     self.assertFalse(edp.compare_job_type(
         edp.JOB_TYPE_MAPREDUCE,
         edp.JOB_TYPE_JAVA,
         edp.JOB_TYPE_MAPREDUCE_STREAMING))
Exemplo n.º 22
0
 def get_possible_job_config(job_type):
     if edp.compare_job_type(job_type, edp.JOB_TYPE_HIVE):
         return {
             'job_config':
             ch_helper.get_possible_hive_config_from(
                 'plugins/vanilla/v2_6_0/resources/hive-default.xml')
         }
     if edp.compare_job_type(job_type, edp.JOB_TYPE_MAPREDUCE,
                             edp.JOB_TYPE_MAPREDUCE_STREAMING):
         return {
             'job_config':
             ch_helper.get_possible_mapreduce_config_from(
                 'plugins/vanilla/v2_6_0/resources/mapred-default.xml')
         }
     if edp.compare_job_type(job_type, edp.JOB_TYPE_PIG):
         return {
             'job_config':
             ch_helper.get_possible_pig_config_from(
                 'plugins/vanilla/v2_6_0/resources/mapred-default.xml')
         }
     return edp_engine.EdpOozieEngine.get_possible_job_config(job_type)
Exemplo n.º 23
0
def _create_job(id, job_binary, type):
    job = mock.Mock()
    job.id = id
    job.type = type
    job.name = 'special_name'
    if edp.compare_job_type(type, edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE):
        job.mains = [job_binary]
        job.libs = None
    else:
        job.libs = [job_binary]
        job.mains = None
    return job
Exemplo n.º 24
0
def _create_job(id, job_binary, type):
    job = mock.Mock()
    job.id = id
    job.type = type
    job.name = 'special_name'
    if edp.compare_job_type(type, 'Pig', 'Hive'):
        job.mains = [job_binary]
        job.libs = None
    else:
        job.libs = [job_binary]
        job.mains = None
    return job
Exemplo n.º 25
0
def _create_job_exec(job_id, type, configs=None, info=None):
    j_exec = mock.Mock()
    j_exec.id = six.text_type(uuid.uuid4())
    j_exec.job_id = job_id
    j_exec.job_configs = configs
    j_exec.info = info
    if not j_exec.job_configs:
        j_exec.job_configs = {}
    if edp.compare_job_type(type, edp.JOB_TYPE_JAVA):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
Exemplo n.º 26
0
def _create_job(id, job_binary, type):
    job = mock.Mock()
    job.id = id
    job.type = type
    job.name = 'special_name'
    if edp.compare_job_type(type, edp.JOB_TYPE_PIG, edp.JOB_TYPE_HIVE):
        job.mains = [job_binary]
        job.libs = None
    else:
        job.libs = [job_binary]
        job.mains = None
    return job
Exemplo n.º 27
0
def get_possible_job_config(job_type):
    if not edp.compare_job_type(job_type, *get_possible_job_types()):
        return None

    if edp.compare_job_type(job_type, 'Java'):
        return {'job_config': {'configs': [], 'args': []}}

    if edp.compare_job_type(job_type, 'MapReduce', 'Pig'):
        #TODO(nmakhotkin) Here we should return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/mapred-default.xml')
        if edp.compare_job_type(job_type, 'MapReduce'):
            cfg += xmlutils.load_hadoop_xml_defaults(
                'service/edp/resources/mapred-job-config.xml')
    elif edp.compare_job_type(job_type, 'Hive'):
        #TODO(nmakhotkin) Here we should return config based on specific plugin
        cfg = xmlutils.load_hadoop_xml_defaults(
            'plugins/vanilla/v1_2_1/resources/hive-default.xml')

    # TODO(tmckay): args should be a list when bug #269968
    # is fixed on the UI side
    config = {'configs': cfg, "args": {}}
    if not edp.compare_job_type('MapReduce', 'Java'):
        config.update({'params': {}})
    return {'job_config': config}
Exemplo n.º 28
0
def _create_job_exec(job_id, type, configs=None, info=None):
    j_exec = mock.Mock()
    j_exec.id = uuidutils.generate_uuid()
    j_exec.job_id = job_id
    j_exec.job_configs = configs
    j_exec.info = info
    j_exec.input_id = 4
    j_exec.output_id = 5
    j_exec.engine_job_id = None
    j_exec.data_source_urls = {}
    if not j_exec.job_configs:
        j_exec.job_configs = {}
    if edp.compare_job_type(type, edp.JOB_TYPE_JAVA):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
Exemplo n.º 29
0
def _create_job_exec(job_id, type, configs=None, info=None):
    j_exec = mock.Mock()
    j_exec.id = uuidutils.generate_uuid()
    j_exec.job_id = job_id
    j_exec.job_configs = configs
    j_exec.info = info
    j_exec.input_id = 4
    j_exec.output_id = 5
    j_exec.engine_job_id = None
    j_exec.data_source_urls = {}
    if not j_exec.job_configs:
        j_exec.job_configs = {}
    if edp.compare_job_type(type, edp.JOB_TYPE_JAVA):
        j_exec.job_configs['configs']['edp.java.main_class'] = _java_main_class
        j_exec.job_configs['configs']['edp.java.java_opts'] = _java_opts
    return j_exec
Exemplo n.º 30
0
def get_data_sources(job_execution, job, data_source_urls):
    if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA, edp.JOB_TYPE_SPARK):
        return None, None

    ctx = context.ctx()

    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    if input_source and input_source.id not in data_source_urls:
        data_source_urls[input_source.id] = _construct_data_source_url(
            input_source.url, job_execution.id)

    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    if output_source and output_source.id not in data_source_urls:
        data_source_urls[output_source.id] = _construct_data_source_url(
            output_source.url, job_execution.id)

    return input_source, output_source
Exemplo n.º 31
0
def get_data_sources(job_execution, job, data_source_urls):
    if edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA, edp.JOB_TYPE_SPARK):
        return None, None

    ctx = context.ctx()

    input_source = conductor.data_source_get(ctx, job_execution.input_id)
    if input_source and input_source.id not in data_source_urls:
        data_source_urls[input_source.id] = _construct_data_source_url(
            input_source.url, job_execution.id)

    output_source = conductor.data_source_get(ctx, job_execution.output_id)
    if output_source and output_source.id not in data_source_urls:
        data_source_urls[output_source.id] = _construct_data_source_url(
            output_source.url, job_execution.id)

    return input_source, output_source
Exemplo n.º 32
0
    def edp_testing(self,
                    job_type,
                    job_data_list,
                    lib_data_list=None,
                    configs=None,
                    pass_input_output_args=False,
                    swift_binaries=False,
                    hdfs_local_output=False):
        try:
            swift = self.connect_to_swift()
            container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8]
            swift.put_container(container_name)
            swift.put_object(
                container_name, 'input', ''.join(
                    random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
                    for x in range(10000)))

        except Exception as e:
            with excutils.save_and_reraise_exception():
                self.delete_swift_container(swift, container_name)
                print(str(e))
        input_id = None
        output_id = None
        job_id = None
        job_execution = None
        try:
            job_binary_list = []
            lib_binary_list = []
            job_binary_internal_list = []

            swift_input_url = 'swift://%s.sahara/input' % container_name
            if hdfs_local_output:
                # This will create a file in hdfs under the user
                # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out)
                output_type = "hdfs"
                output_url = container_name + "-out"
            else:
                output_type = "swift"
                output_url = 'swift://%s.sahara/output' % container_name

            # Java jobs don't use data sources.  Input/output paths must
            # be passed as args with corresponding username/password configs
            if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
                input_id = self._create_data_source(
                    'input-%s' % str(uuid.uuid4())[:8], 'swift',
                    swift_input_url)
                output_id = self._create_data_source(
                    'output-%s' % str(uuid.uuid4())[:8], output_type,
                    output_url)

            if job_data_list:
                if swift_binaries:
                    self._create_job_binaries(job_data_list,
                                              job_binary_internal_list,
                                              job_binary_list,
                                              swift_connection=swift,
                                              container_name=container_name)
                else:
                    self._create_job_binaries(job_data_list,
                                              job_binary_internal_list,
                                              job_binary_list)

            if lib_data_list:
                if swift_binaries:
                    self._create_job_binaries(lib_data_list,
                                              job_binary_internal_list,
                                              lib_binary_list,
                                              swift_connection=swift,
                                              container_name=container_name)
                else:
                    self._create_job_binaries(lib_data_list,
                                              job_binary_internal_list,
                                              lib_binary_list)

            job_id = self._create_job(
                'Edp-test-job-%s' % str(uuid.uuid4())[:8], job_type,
                job_binary_list, lib_binary_list)
            if not configs:
                configs = {}

            # Append the input/output paths with the swift configs
            # if the caller has requested it...
            if edp.compare_job_type(
                    job_type, edp.JOB_TYPE_JAVA) and pass_input_output_args:
                self._add_swift_configs(configs)
                if "args" in configs:
                    configs["args"].extend([swift_input_url, output_url])
                else:
                    configs["args"] = [swift_input_url, output_url]

            job_execution = self.sahara.job_executions.create(job_id,
                                                              self.cluster_id,
                                                              input_id,
                                                              output_id,
                                                              configs=configs)

            if job_execution:
                self._await_job_execution(job_execution)

        except Exception as e:
            with excutils.save_and_reraise_exception():
                print(str(e))

        finally:
            self.delete_swift_container(swift, container_name)
            self._delete_job(job_execution, job_id,
                             job_binary_list + lib_binary_list,
                             job_binary_internal_list, input_id, output_id)
Exemplo n.º 33
0
    def edp_testing(self,
                    job_type,
                    job_data_list,
                    lib_data_list=None,
                    configs=None,
                    pass_input_output_args=False,
                    swift_binaries=False,
                    hdfs_local_output=False):
        job_data_list = job_data_list or []
        lib_data_list = lib_data_list or []
        configs = configs or {}

        swift = self.connect_to_swift()
        container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8]
        swift.put_container(container_name)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.delete_swift_container, swift, container_name)
        swift.put_object(
            container_name, 'input', ''.join(
                random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
                for x in six.moves.range(10000)))

        input_id = None
        output_id = None
        job_id = None
        job_execution = None
        job_binary_list = []
        lib_binary_list = []
        job_binary_internal_list = []

        swift_input_url = 'swift://%s.sahara/input' % container_name
        if hdfs_local_output:
            # This will create a file in hdfs under the user
            # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out)
            output_type = "hdfs"
            output_url = container_name + "-out"
        else:
            output_type = "swift"
            output_url = 'swift://%s.sahara/output' % container_name

        # Java jobs don't use data sources.  Input/output paths must
        # be passed as args with corresponding username/password configs
        if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA,
                                    edp.JOB_TYPE_SPARK):
            input_id = self._create_data_source(
                'input-%s' % str(uuid.uuid4())[:8], 'swift', swift_input_url)
            output_id = self._create_data_source(
                'output-%s' % str(uuid.uuid4())[:8], output_type, output_url)

        if job_data_list:
            if swift_binaries:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list,
                                          swift_connection=swift,
                                          container_name=container_name)
            else:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list)

        if lib_data_list:
            if swift_binaries:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list,
                                          swift_connection=swift,
                                          container_name=container_name)
            else:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list)

        job_id = self._create_job('Edp-test-job-%s' % str(uuid.uuid4())[:8],
                                  job_type, job_binary_list, lib_binary_list)
        if not configs:
            configs = {}

        # TODO(tmckay): for spark we don't have support for swift
        # yet.  When we do, we'll need something to here to set up
        # swift paths and we can use a spark wordcount job

        # Append the input/output paths with the swift configs
        # if the caller has requested it...
        if edp.compare_job_type(job_type,
                                edp.JOB_TYPE_JAVA) and pass_input_output_args:
            self._add_swift_configs(configs)
            if "args" in configs:
                configs["args"].extend([swift_input_url, output_url])
            else:
                configs["args"] = [swift_input_url, output_url]

        job_execution = self.sahara.job_executions.create(job_id,
                                                          self.cluster_id,
                                                          input_id,
                                                          output_id,
                                                          configs=configs)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.sahara.job_executions.delete,
                            job_execution.id)

        return job_execution.id
Exemplo n.º 34
0
    def edp_testing(self, job_type, job_data_list, lib_data_list=None,
                    configs=None, pass_input_output_args=False,
                    swift_binaries=False, hdfs_local_output=False):
        try:
            swift = self.connect_to_swift()
            container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8]
            swift.put_container(container_name)
            swift.put_object(
                container_name, 'input', ''.join(
                    random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
                    for x in range(10000)
                )
            )

        except Exception as e:
            with excutils.save_and_reraise_exception():
                self.delete_swift_container(swift, container_name)
                print(str(e))
        input_id = None
        output_id = None
        job_id = None
        job_execution = None
        try:
            job_binary_list = []
            lib_binary_list = []
            job_binary_internal_list = []

            swift_input_url = 'swift://%s.sahara/input' % container_name
            if hdfs_local_output:
                # This will create a file in hdfs under the user
                # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out)
                output_type = "hdfs"
                output_url = container_name + "-out"
            else:
                output_type = "swift"
                output_url = 'swift://%s.sahara/output' % container_name

            # Java jobs don't use data sources.  Input/output paths must
            # be passed as args with corresponding username/password configs
            if not edp.compare_job_type(job_type, edp.JOB_TYPE_JAVA):
                input_id = self._create_data_source(
                    'input-%s' % str(uuid.uuid4())[:8], 'swift',
                    swift_input_url)
                output_id = self._create_data_source(
                    'output-%s' % str(uuid.uuid4())[:8], output_type,
                    output_url)

            if job_data_list:
                if swift_binaries:
                    self._create_job_binaries(job_data_list,
                                              job_binary_internal_list,
                                              job_binary_list,
                                              swift_connection=swift,
                                              container_name=container_name)
                else:
                    self._create_job_binaries(job_data_list,
                                              job_binary_internal_list,
                                              job_binary_list)

            if lib_data_list:
                if swift_binaries:
                    self._create_job_binaries(lib_data_list,
                                              job_binary_internal_list,
                                              lib_binary_list,
                                              swift_connection=swift,
                                              container_name=container_name)
                else:
                    self._create_job_binaries(lib_data_list,
                                              job_binary_internal_list,
                                              lib_binary_list)

            job_id = self._create_job(
                'Edp-test-job-%s' % str(uuid.uuid4())[:8], job_type,
                job_binary_list, lib_binary_list)
            if not configs:
                configs = {}

            # Append the input/output paths with the swift configs
            # if the caller has requested it...
            if edp.compare_job_type(
                    job_type, edp.JOB_TYPE_JAVA) and pass_input_output_args:
                self._add_swift_configs(configs)
                if "args" in configs:
                    configs["args"].extend([swift_input_url,
                                            output_url])
                else:
                    configs["args"] = [swift_input_url,
                                       output_url]

            job_execution = self.sahara.job_executions.create(
                job_id, self.cluster_id, input_id, output_id,
                configs=configs)

            if job_execution:
                self._await_job_execution(job_execution)

        except Exception as e:
            with excutils.save_and_reraise_exception():
                print(str(e))

        finally:
            self.delete_swift_container(swift, container_name)
            self._delete_job(
                job_execution, job_id, job_binary_list+lib_binary_list,
                job_binary_internal_list, input_id, output_id
            )
Exemplo n.º 35
0
def validate(input_data, output_data, job):
    if not edp.compare_job_type(job.type, edp.JOB_TYPE_PIG,
                                edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_HIVE,
                                edp.JOB_TYPE_JAVA):
        raise RuntimeError
Exemplo n.º 36
0
def validate(input_data, output_data, job):
    if not edp.compare_job_type(job.type, 'Pig', 'MapReduce',
                                'Hive', 'Java'):
        raise RuntimeError
Exemplo n.º 37
0
def compare_job_type(job_type, *args, **kwargs):
    return edp.compare_job_type(job_type, *args, **kwargs)
Exemplo n.º 38
0
def compare_job_type(job_type, *args, **kwargs):
    return edp.compare_job_type(job_type, *args, **kwargs)
Exemplo n.º 39
0
    def edp_testing(self,
                    job_type,
                    job_data_list,
                    lib_data_list=None,
                    configs=None,
                    pass_input_output_args=False,
                    swift_binaries=False,
                    hdfs_local_output=False):

        job_data_list = job_data_list or []
        lib_data_list = lib_data_list or []
        configs = configs or {}

        test_id = 'edp-mapr-test-%s' % str(uuid.uuid4())[:8]
        swift = self.connect_to_swift()
        container = test_id
        swift.put_container(container)

        input_folder = '/%s' % test_id
        cldb_ip = self.cluster_info['node_info']['namenode_ip']
        self.create_mapr_fs_dir(cldb_ip, input_folder)

        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.delete_swift_container, swift, container)

        input_data = ''.join(
            random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
            for x in six.moves.range(10000))
        input_file = '%s/input' % input_folder
        self.put_file_to_mapr_fs(cldb_ip, input_file, input_data)

        input_id = None
        output_id = None
        job_binary_list = []
        lib_binary_list = []
        job_binary_internal_list = []

        maprfs_input_url = 'maprfs://%s' % input_file
        maprfs_output_url = 'maprfs://%s/output' % (input_folder + '-out')

        if not utils_edp.compare_job_type(job_type, utils_edp.JOB_TYPE_JAVA,
                                          utils_edp.JOB_TYPE_SPARK):
            input_id = self._create_data_source(
                'input-%s' % str(uuid.uuid4())[:8], 'maprfs', maprfs_input_url)
            output_id = self._create_data_source(
                'output-%s' % str(uuid.uuid4())[:8], 'maprfs',
                maprfs_output_url)

        if job_data_list:
            if swift_binaries:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list,
                                          swift_connection=swift,
                                          container_name=container)
            else:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list)

        if lib_data_list:
            if swift_binaries:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list,
                                          swift_connection=swift,
                                          container_name=container)
            else:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list)

        job_id = self._create_job('edp-test-job-%s' % str(uuid.uuid4())[:8],
                                  job_type, job_binary_list, lib_binary_list)
        if not configs:
            configs = {}

        if utils_edp.compare_job_type(
                job_type, utils_edp.JOB_TYPE_JAVA) and pass_input_output_args:
            self._enable_substitution(configs)
            if "args" in configs:
                configs["args"].extend([maprfs_input_url, maprfs_output_url])
            else:
                configs["args"] = [maprfs_input_url, maprfs_output_url]

        job_execution = self.sahara.job_executions.create(job_id,
                                                          self.cluster_id,
                                                          input_id,
                                                          output_id,
                                                          configs=configs)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.sahara.job_executions.delete,
                            job_execution.id)

        return job_execution.id
Exemplo n.º 40
0
    def edp_testing(self, job_type, job_data_list, lib_data_list=None,
                    configs=None, pass_input_output_args=False,
                    swift_binaries=False, hdfs_local_output=False):

        job_data_list = job_data_list or []
        lib_data_list = lib_data_list or []
        configs = configs or {}

        test_id = 'edp-mapr-test-%s' % str(uuid.uuid4())[:8]
        swift = self.connect_to_swift()
        container = test_id
        swift.put_container(container)

        input_folder = '/%s' % test_id
        cldb_ip = self.cluster_info['node_info']['namenode_ip']
        self.create_mapr_fs_dir(cldb_ip, input_folder)

        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.delete_swift_container, swift, container)

        input_data = ''.join(
            random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
            for x in six.moves.range(10000)
        )
        input_file = '%s/input' % input_folder
        self.put_file_to_mapr_fs(cldb_ip, input_file, input_data)

        input_id = None
        output_id = None
        job_binary_list = []
        lib_binary_list = []
        job_binary_internal_list = []

        maprfs_input_url = 'maprfs://%s' % input_file
        maprfs_output_url = 'maprfs://%s/output' % (input_folder + '-out')

        if not utils_edp.compare_job_type(job_type,
                                          utils_edp.JOB_TYPE_JAVA,
                                          utils_edp.JOB_TYPE_SPARK):
            input_id = self._create_data_source(
                'input-%s' % str(uuid.uuid4())[:8], 'maprfs',
                maprfs_input_url)
            output_id = self._create_data_source(
                'output-%s' % str(uuid.uuid4())[:8], 'maprfs',
                maprfs_output_url)

        if job_data_list:
            if swift_binaries:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list,
                                          swift_connection=swift,
                                          container_name=container)
            else:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list)

        if lib_data_list:
            if swift_binaries:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list,
                                          swift_connection=swift,
                                          container_name=container)
            else:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list)

        job_id = self._create_job(
            'edp-test-job-%s' % str(uuid.uuid4())[:8], job_type,
            job_binary_list, lib_binary_list)
        if not configs:
            configs = {}

        if utils_edp.compare_job_type(
                job_type, utils_edp.JOB_TYPE_JAVA) and pass_input_output_args:
            self._enable_substitution(configs)
            if "args" in configs:
                configs["args"].extend([maprfs_input_url, maprfs_output_url])
            else:
                configs["args"] = [maprfs_input_url, maprfs_output_url]

        job_execution = self.sahara.job_executions.create(
            job_id, self.cluster_id, input_id, output_id,
            configs=configs)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.sahara.job_executions.delete,
                            job_execution.id)

        return job_execution.id
Exemplo n.º 41
0
    def edp_testing(self, job_type, job_data_list, lib_data_list=None,
                    configs=None, pass_input_output_args=False,
                    swift_binaries=False, hdfs_local_output=False):
        job_data_list = job_data_list or []
        lib_data_list = lib_data_list or []
        configs = configs or {}

        swift = self.connect_to_swift()
        container_name = 'Edp-test-%s' % str(uuid.uuid4())[:8]
        swift.put_container(container_name)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.delete_swift_container, swift, container_name)
        swift.put_object(
            container_name, 'input', ''.join(
                random.choice(':' + ' ' + '\n' + string.ascii_lowercase)
                for x in six.moves.range(10000)
            )
        )

        input_id = None
        output_id = None
        job_id = None
        job_execution = None
        job_binary_list = []
        lib_binary_list = []
        job_binary_internal_list = []

        swift_input_url = 'swift://%s.sahara/input' % container_name
        if hdfs_local_output:
            # This will create a file in hdfs under the user
            # executing the job (i.e. /usr/hadoop/Edp-test-xxxx-out)
            output_type = "hdfs"
            output_url = container_name + "-out"
        else:
            output_type = "swift"
            output_url = 'swift://%s.sahara/output' % container_name

        input_name = 'input-%s' % str(uuid.uuid4())[:8]
        input_id = self._create_data_source(input_name,
                                            'swift', swift_input_url)

        output_name = 'output-%s' % str(uuid.uuid4())[:8]
        output_id = self._create_data_source(output_name,
                                             output_type,
                                             output_url)

        if job_data_list:
            if swift_binaries:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list,
                                          swift_connection=swift,
                                          container_name=container_name)
            else:
                self._create_job_binaries(job_data_list,
                                          job_binary_internal_list,
                                          job_binary_list)

        if lib_data_list:
            if swift_binaries:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list,
                                          swift_connection=swift,
                                          container_name=container_name)
            else:
                self._create_job_binaries(lib_data_list,
                                          job_binary_internal_list,
                                          lib_binary_list)

        job_id = self._create_job(
            'Edp-test-job-%s' % str(uuid.uuid4())[:8], job_type,
            job_binary_list, lib_binary_list)
        if not configs:
            configs = {}

        # TODO(tmckay): for spark we don't have support for swift
        # yet.  When we do, we'll need something to here to set up
        # swift paths and we can use a spark wordcount job

        # Append the input/output paths with the swift configs
        # if the caller has requested it...
        if edp.compare_job_type(
                job_type, edp.JOB_TYPE_JAVA) and pass_input_output_args:
            self._enable_substitution(configs)
            input_arg = job_utils.DATA_SOURCE_PREFIX + input_name
            output_arg = output_id
            if "args" in configs:
                configs["args"].extend([input_arg, output_arg])
            else:
                configs["args"] = [input_arg, output_arg]

        job_execution = self.sahara.job_executions.create(
            job_id, self.cluster_id, input_id, output_id,
            configs=configs)
        if not self.common_config.RETAIN_EDP_AFTER_TEST:
            self.addCleanup(self.sahara.job_executions.delete,
                            job_execution.id)

        return job_execution.id
Exemplo n.º 42
0
def run_job(job_execution_id):
    ctx = context.ctx()

    job_execution = conductor.job_execution_get(ctx,
                                                job_execution_id)

    cluster = conductor.cluster_get(ctx, job_execution.cluster_id)
    if cluster.status != 'Active':
        return

    if CONF.use_namespaces and not CONF.use_floating_ips:
        plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
        oozie = plugin.get_oozie_server(cluster)

        info = oozie.remote().get_neutron_info()
        extra = job_execution.extra.copy()
        extra['neutron'] = info

        job_execution = conductor.job_execution_update(ctx,
                                                       job_execution_id,
                                                       {'extra': extra})

    job = conductor.job_get(ctx, job_execution.job_id)
    if not edp.compare_job_type(job.type, edp.JOB_TYPE_JAVA):
        input_source = conductor.data_source_get(ctx,  job_execution.input_id)
        output_source = conductor.data_source_get(ctx, job_execution.output_id)
    else:
        input_source = None
        output_source = None

    for data_source in [input_source, output_source]:
        if data_source and data_source.type == 'hdfs':
            h.configure_cluster_for_hdfs(cluster, data_source)

    hdfs_user = _get_hdfs_user(cluster)
    oozie_server = _get_oozie_server(cluster)
    wf_dir = create_workflow_dir(oozie_server, job, hdfs_user)
    upload_job_files(oozie_server, wf_dir, job, hdfs_user)

    creator = workflow_factory.get_creator(job)

    wf_xml = creator.get_workflow_xml(cluster, job_execution,
                                      input_source, output_source)

    path_to_workflow = upload_workflow_file(oozie_server,
                                            wf_dir, wf_xml, hdfs_user)

    plugin = plugin_base.PLUGINS.get_plugin(cluster.plugin_name)
    rm_path = plugin.get_resource_manager_uri(cluster)
    nn_path = plugin.get_name_node_uri(cluster)

    client = _create_oozie_client(cluster)
    job_parameters = {"jobTracker": rm_path,
                      "nameNode": nn_path,
                      "user.name": hdfs_user,
                      "oozie.wf.application.path":
                      "%s%s" % (nn_path, path_to_workflow),
                      "oozie.use.system.libpath": "true"}

    oozie_job_id = client.add_job(x.create_hadoop_xml(job_parameters),
                                  job_execution)
    job_execution = conductor.job_execution_update(ctx, job_execution,
                                                   {'oozie_job_id':
                                                    oozie_job_id,
                                                    'start_time':
                                                    datetime.datetime.now()})
    client.run_job(job_execution, oozie_job_id)
Exemplo n.º 43
0
def validate(input_data, output_data, job):
    if not edp.compare_job_type(job.type, edp.JOB_TYPE_PIG,
                                edp.JOB_TYPE_MAPREDUCE, edp.JOB_TYPE_HIVE,
                                edp.JOB_TYPE_JAVA):
        raise RuntimeError