Пример #1
0
def distCp(clusters, src, dest):
    """
    create the dist command

    bash$ hadoop distcp hdfs://nn1:8020/foo/a \
    hdfs://nn1:8020/foo/b \
    hdfs://nn2:8020/bar/foo

    Through HDFS proxy (httpfs)
    e.g. hadoop distcp webhdfs://nn1:3888/gutenberg/  webhdfs://nn2:3888/
    """

    start = time.time()
    cmd = "hadoop distcp %s %s" % (src, dest)
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)
    end = time.time()
    span = end - start
    from_cluster, to_cluster = clusters

    # update network data
    totalsize = get_total_size(src)
    print "(Total size of data transfered: %s)" % totalsize

    if totalsize > 0 and span > 0:
        logline = "%s:%s:%s:%s" % (from_cluster, to_cluster, totalsize, span)
        network_filewriteDataTime(logline)

    return stdout.rstrip()
def get_queue_info():
    """ get info of the queue """
    cmd = "mapred queue -list"
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)
    return stdout
Пример #3
0
def hdfs_putf(local_path, hdfs_path):
    cmd = "hadoop fs -put -f %s %s" % (local_path, hdfs_path)
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)
Пример #4
0
def hdfs_rmr(path):
    cmd = "hadoop fs -rm -r %s" % (path)
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)
Пример #5
0
def hdfs_du(path):
    cmd = "hadoop fs -du -s %s" % (path)
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)
    if stdout:
        return stdout.split(' ')[0]
Пример #6
0
def hdfs_mkdirp(path):
    cmd = "hadoop fs -mkdir -p %s" % (path)
    rcode, stdout, stderr = _checked_hadoop_fs_command(cmd)