Пример #1
0
def init(bridge_type):
    hadoop_classpath = pydoop.hadoop_classpath()
    if hadoop_classpath is None:
        raise RuntimeError('Hadoop classpath not set')
    classpath = os.environ.get('classpath', '.') + ':' + hadoop_classpath
    return JavaWrapperFactory(classpath=classpath,
                              java_bridge_name=bridge_type)
Пример #2
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     self.dependencies = []
     self.properties = []
     if (hadoop_vinfo.main >= (2, 0, 0)
         and (not hadoop_vinfo.is_cloudera()
              or hadoop_vinfo.is_yarn())):
         # FIXME: kinda hardwired to avro for now
         self.properties.append((os.path.join(
             "it/crs4/pydoop/mapreduce/pipes", PROP_BN),
             PROP_FN))
         self.java_files.extend([
             "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ])
         self.java_files.extend(glob.glob(
             'src/v2/it/crs4/pydoop/pipes/*.java'
         ))
         self.java_files.extend(glob.glob(
             'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'
         ))
         # for now we have only hadoop2 deps (avro-mapred)
         self.dependencies.extend(glob.glob('lib/*.jar'))
     else:
         self.java_files.extend([
             "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ])
         self.java_files.extend(glob.glob(
             'src/v1/org/apache/hadoop/mapred/pipes/*.java'
         ))
Пример #3
0
    def __init__(self, hadoop_vinfo):
        self.hadoop_vinfo = hadoop_vinfo
        self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
        self.classpath = pydoop.hadoop_classpath()
        self.java_files = []
        self.dependencies = []
        self.properties = []
        if hadoop_vinfo.main >= (2, 0, 0) and \
           (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()):
            # This version of Hadoop has the v2 pipes API
            # FIXME: kinda hardwired to avro for now
            self.properties.append(
                (os.path.join("it/crs4/pydoop/mapreduce/pipes",
                              PROP_BN), PROP_FN))
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
            # for things such as avro-mapreduce
            self.dependencies.extend(glob.glob('lib/*.jar'))
        else:
            # Else we should be dealing with v1 pipes
            self.java_files.extend(
                glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))

        if hadoop_vinfo.has_mrv2():
            # If the installation has MRv2 we need to use v2 I/O classes
            self.java_files.extend(
                glob.glob('src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java'))
            self.java_files.extend(
                ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
        else:
            self.java_files.extend(
                ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
Пример #4
0
def init():
    os.environ["CLASSPATH"] = "%s:%s:%s" % (
        pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
    os.environ["LIBHDFS_OPTS"] = os.getenv(
        "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
Пример #5
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     self.dependencies = []
     self.properties = []
     if (hadoop_vinfo.main >= (2, 0, 0) and
         (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn())):
         # FIXME: kinda hardwired to avro for now
         self.properties.append(
             (os.path.join("it/crs4/pydoop/mapreduce/pipes",
                           PROP_BN), PROP_FN))
         self.java_files.extend(
             ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
         # for now we have only hadoop2 deps (avro-mapred)
         self.dependencies.extend(glob.glob('lib/*.jar'))
     else:
         self.java_files.extend(
             ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
Пример #6
0
def init():
  os.environ["CLASSPATH"] = "%s:%s:%s" % (
    pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
  os.environ["LIBHDFS_OPTS"] = os.getenv(
    "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    )
Пример #7
0
def init():
    os.environ["CLASSPATH"] = "%s:%s:%s" % (
        pydoop.hadoop_classpath(), _ORIG_CLASSPATH, pydoop.hadoop_conf()
    )
    os.environ["LIBHDFS_OPTS"] = os.getenv(
        "LIBHDFS_OPTS", common.DEFAULT_LIBHDFS_OPTS
    ) + " -Djava.library.path=%s" % pydoop.hadoop_native()
Пример #8
0
def _get_java_output_stream(wd):
    this_directory = os.path.abspath(os.path.dirname(__file__))
    src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS)
    shutil.copy(src, wd)
    nsrc = os.path.join(wd, "%s.java" % _HADOOP_SERIALIZE_CLASS)
    classpath = '.:%s:%s' % (pydoop.hadoop_classpath(), wd)
    compile_java(nsrc, classpath)
    return get_java_output_stream(_HADOOP_SERIALIZE_CLASS, classpath, [], wd)
Пример #9
0
def init(bridge_type):
    hadoop_classpath = pydoop.hadoop_classpath()
    if hadoop_classpath is None:
        raise RuntimeError('Hadoop classpath not set')
    classpath = os.environ.get('classpath', '.') + ':' + hadoop_classpath
    return JavaWrapperFactory(
        classpath=classpath, java_bridge_name=bridge_type
    )
Пример #10
0
 def _run_java(self, in_uri, out_uri, wd):
     this_directory = os.path.abspath(os.path.dirname(__file__))
     shutil.copytree(os.path.join(this_directory, _JAVA_SRC_ROOT),
                     os.path.join(wd, _JAVA_SRC_ROOT))
     classpath = '.:%s:%s:%s' % (wd, pydoop.jar_path(),
                                 pydoop.hadoop_classpath())
     src = os.path.join(wd, _OPAQUE_ROUNDTRIP_SRC)
     utils.compile_java(src, classpath)
     utils.run_java(_OPAQUE_ROUNDTRIP_CLASS, classpath, [in_uri, out_uri],
                    wd)
Пример #11
0
 def __init__(self):
     self.jar_name = pydoop.jar_name()
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = glob.glob(
         "src/it/crs4/pydoop/mapreduce/pipes/*.java") + [
             "src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
         ]
     self.dependencies = glob.glob('lib/*.jar')
     self.properties = [(os.path.join("it/crs4/pydoop/mapreduce/pipes",
                                      PROP_BN), PROP_FN)]
Пример #12
0
 def __init__(self, hadoop_vinfo, pipes_src_dir):
   self.hadoop_vinfo = hadoop_vinfo
   self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
   self.classpath = pydoop.hadoop_classpath()
   self.java_files = ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
   if self.hadoop_vinfo.has_security():
     if hadoop_vinfo.cdh >= (4, 0, 0) and not hadoop_vinfo.ext:
       return  # TODO: add support for mrv2
     # add our fix for https://issues.apache.org/jira/browse/MAPREDUCE-4000
     self.java_files.extend(glob.glob("%s/*" % pipes_src_dir))
Пример #13
0
 def __init__(self, hadoop_vinfo, pipes_src_dir):
   self.hadoop_vinfo = hadoop_vinfo
   self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
   self.classpath = pydoop.hadoop_classpath()
   self.java_files = ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
   if self.hadoop_vinfo.has_security():
     if hadoop_vinfo.cdh >= (4, 0, 0) and not hadoop_vinfo.ext:
       return  # TODO: add support for mrv2
     # add our fix for https://issues.apache.org/jira/browse/MAPREDUCE-4000
     self.java_files.extend(glob.glob("%s/*" % pipes_src_dir))
Пример #14
0
 def _run_java(self, in_uri, out_uri, wd):
     this_directory = os.path.abspath(os.path.dirname(__file__))
     shutil.copytree(os.path.join(this_directory, _JAVA_SRC_ROOT),
                     os.path.join(wd, _JAVA_SRC_ROOT))
     classpath = '.:%s:%s:%s' % (
         wd, pydoop.jar_path(), pydoop.hadoop_classpath())
     src = os.path.join(wd, _OPAQUE_ROUNDTRIP_SRC)
     utils.compile_java(src, classpath)
     utils.run_java(
         _OPAQUE_ROUNDTRIP_CLASS, classpath, [in_uri, out_uri], wd)
Пример #15
0
 def __init__(self):
     self.jar_name = pydoop.jar_name()
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = glob.glob(
         "src/it/crs4/pydoop/mapreduce/pipes/*.java"
     ) + ["src/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"]
     self.dependencies = glob.glob('lib/*.jar')
     self.properties = [(
         os.path.join("it/crs4/pydoop/mapreduce/pipes", PROP_BN),
         PROP_FN
     )]
Пример #16
0
def main(argv):
  try:
    jar_name = argv[1]
  except IndexError:
    print "Usage: python %s JAR_NAME" % os.path.basename(argv[0])
    return 2
  if not os.path.isfile(jar_name):
    classpath = pydoop.hadoop_classpath()
    subprocess.check_call("javac -cp %s %s" % (classpath, SRC), shell=True) 
    subprocess.check_call("jar -cvf %s %s" % (jar_name, CLASS), shell=True)
  return 0
Пример #17
0
def _get_java_output_stream(wd):
    this_directory = os.path.abspath(os.path.dirname(__file__))
    src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS)
    shutil.copy(src, wd)
    classpath = ".:%s:%s" % (pydoop.hadoop_classpath(), wd)
    filename_root = os.path.join(wd, _HADOOP_SERIALIZE_CLASS)
    _compile_java_part(filename_root + ".class", classpath)
    output = subprocess.check_output(
        ["java", "-cp", classpath, _HADOOP_SERIALIZE_CLASS], cwd=wd, stderr=open("/dev/null", "w")
    )
    stream = StringIO(output)
    return stream
Пример #18
0
def _get_java_output_stream(wd):
    this_directory = os.path.abspath(os.path.dirname(__file__))
    src = os.path.join(this_directory, "%s.java" % _HADOOP_SERIALIZE_CLASS)
    shutil.copy(src, wd)
    classpath = '.:%s:%s' % (pydoop.hadoop_classpath(), wd)
    filename_root = os.path.join(wd, _HADOOP_SERIALIZE_CLASS)
    _compile_java_part(filename_root + ".class", classpath)
    output = subprocess.check_output(
        ['java', '-cp', classpath, _HADOOP_SERIALIZE_CLASS],
        cwd=wd,
        stderr=open('/dev/null', 'w'))
    stream = StringIO(output)
    return stream
Пример #19
0
 def __init__(self, hadoop_vinfo):
     self.hadoop_vinfo = hadoop_vinfo
     self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
     self.classpath = pydoop.hadoop_classpath()
     self.java_files = []
     #if hadoop_vinfo.main >= (2, 2, 0):
     if hadoop_vinfo.main >= (2, 0, 0) and hadoop_vinfo.is_yarn():
         self.java_files.extend(
             ["src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/pipes/*.java'))
         self.java_files.extend(
             glob.glob('src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'))
     else:
         self.java_files.extend(
             ["src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"])
         self.java_files.extend(
             glob.glob('src/v1/org/apache/hadoop/mapred/pipes/*.java'))
Пример #20
0
    def __init__(self, hadoop_vinfo):
        self.hadoop_vinfo = hadoop_vinfo
        self.jar_name = pydoop.jar_name(self.hadoop_vinfo)
        self.classpath = pydoop.hadoop_classpath()
        self.java_files = []
        self.dependencies = []
        self.properties = []
        if hadoop_vinfo.main >= (2, 0, 0) and \
           (not hadoop_vinfo.is_cloudera() or hadoop_vinfo.is_yarn()):
            # This version of Hadoop has the v2 pipes API
            # FIXME: kinda hardwired to avro for now
            self.properties.append((os.path.join(
                "it/crs4/pydoop/mapreduce/pipes", PROP_BN),
                PROP_FN))
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/pipes/*.java'
            ))
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/mapreduce/pipes/*.java'
            ))
            # for things such as avro-mapreduce
            self.dependencies.extend(glob.glob('lib/*.jar'))
        else:
            # Else we should be dealing with v1 pipes
            self.java_files.extend(glob.glob(
                'src/v1/org/apache/hadoop/mapred/pipes/*.java'
            ))

        if hadoop_vinfo.has_mrv2():
            # If the installation has MRv2 we need to use v2 I/O classes
            self.java_files.extend(glob.glob(
                'src/v2/it/crs4/pydoop/mapreduce/lib/output/*.java'
            ))
            self.java_files.extend([
                "src/v2/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
            ])
        else:
            self.java_files.extend([
                "src/v1/it/crs4/pydoop/NoSeparatorTextOutputFormat.java"
            ])