示例#1
0
class DFS(object):
  def __init__(self, hostname='speedy', port=8020):
    self.hfs = Hfilesystem(hostname, port)
    self.urlhead = 'hdfs://'  + hostname + ':' + str(port)
    self.urlheadlen = len(self.urlhead)
  
  def mkdir(self, dirname):
    if not self.exists(dirname):
      self.hfs.mkdir(dirname)
  
  def rmdir(self, dirname):
    if self.exists(dirname):
      self.hfs(dirname)
      
  def rm_rf(self, d):
    self.rmdir(d)
    
  def rename(self, srcpath, destpath):
    if os.path.exists(destpath):
      self.rm_rf(destpath)
    self.hfs.rename(srcpath, destpath)
    
  def is_done(self, dirname):
    return self.exists(os.path.join(dirname, DONE_TAG))
  
  def get_subdirs(self, dirname, checkdone = False):
    if not self.exists(dirname):
      return []
    return [sdir[sdir.rindex['/']+1:] for sdir in self.hfs.listdir(dirname) if self.hfs.isDir(os.path.join(dirname, sdir)) 
            if not checkdone or self.is_done(os.path.join(dirname, sdir))]
  
  def get_abs_subdirs(self, dirname, checkdone = False):
    if not self.exists(dirname):
      return []
    return [sdir[self.urlheadlen:] for sdir in self.hfs.listdir(dirname) if self.hfs.isDir(os.path.join(dirname, sdir)) 
            if not checkdone or self.is_done(os.path.join(dirname, sdir))]
    
  def get_unfinished_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_subdirs(dirname, checkdone) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))]
    
  def get_buffered_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_subdirs(dirname, checkdone = False) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))
               and not self.exists(os.path.join(dirname, sdir, jobname + STARTED_TAG))]
    
  def get_unfinished_abs_subdirs(self, dirname, jobname = '', checkdone = False):
    return [sdir for sdir in self.get_abs_subdirs(dirname, checkdone) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))]
    
  def get_buffered_abs_subdirs(self, dirname, jobname = '', checkdone = False):
    return [os.path.join(dirname, sdir) for sdir in self.get_abs_subdirs(dirname, checkdone = False) if
               not self.exists(os.path.join(dirname, sdir, jobname + FINISHED_TAG))
               and not self.exists(os.path.join(dirname, sdir, jobname + STARTED_TAG))]
    
  def get_subfiles(self, dirname):
    if not self.exists(dirname):
      return []
    return [sfile for sfile in self.hfs.listdir(dirname) if self.hfs.isFile(os.path.join(dirname, sfile)) ]
  
  def get_abs_subfiles(self, dirname):
    return [os.path.join(dirname, sfile) for sfile in self.get_subfiles(dirname) ]
  

  def get_subdir_num(self, dirname):
    return len(self.get_subdirs(dirname))
  
  def get_unfinished_subdir_num(self, dirname, jobname = '', checkdone = False):
    return len(self.get_unfinished_subdirs(dirname, jobname, checkdone = False))
  
  def get_buffered_subdir_num(self, dirname, jobname = '', checkdone = False):
    return len(self.get_buffered_subdirs(dirname, jobname, checkdone = False))
  
  def get_subfile_num(self, dirname):
    return len(self.get_subfiles(dirname))
  
  # recursive
  def get_dir_size(self, dirname):
    size = 0L
    if self.exists(dirname):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  # recursive
  def get_unfinished_dir_size(self, dirname, jobname = ''):
    size = 0L
    if self.exists(dirname) and not self.exists(os.path.join(dirname, jobname + FINISHED_TAG)):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  def get_buffered_dir_size(self, dirname, jobname = ''):
    size = 0L
    if self.exists(dirname) and not self.exists(os.path.join(dirname, jobname + FINISHED_TAG)):
      for node in self.hfs.listdir(dirname):
        if self.hfs.isFile(node):
          size += self.hfs.stat(node).mSize
        else:
          size += self.get_dir_size(node)
      
    return size
  
  def exists(self, pathname):
    return self.hfs.exists(pathname)