def extract(path): "Extract a tar, bzipped, or gzipped file. Returns the extracted path, or garbage if not a file (root, ext)" from hpf.utilities import paths as pathutil cmd = None root = path if pathutil.isTarred(path) : tar, suf = pathutil.tarSuffix(path) print "yep TARRED %s" % path options = "-C %s -x" % pathutil.getDirectory(path) #See if it's zipped if tar != pathutil.TARRED : options += conditional(tar == pathutil.BZIPPED, "j", "z") options += "f" cmd = "tar %s %s" % (options, path) ext, root = (suf, path[:-len(suf)]) else: if pathutil.isZipped(path): type, suf = pathutil.zipSuffix(path) cmd = conditional(type == pathutil.BZIPPED, "bunzip2 %s", "gunzip %s") cmd = cmd % path ext, root = (suf, path[:-len(suf)]) if cmd != None : system(cmd) return root, ext
def put(src, dest): from hadoop import hdfs "Copies from source to dest, handles hadoop put if dest is prefixed with hdfs://" if hdfs.isHdfs(dest) : hdfs.put(src, dest) else: pathutil.ensure(pathutil.getDirectory(dest)) copy(src, dest)