Пример #1
0
  def scan(self, ctx, prev_num):
    self.compute_stats()
    #
    # Check if we have encountered this file during this scan already
    #
    ctx.num_visited_files_reporter.increment(1)
    ctx.current_scanned_file_reporter.set(self.path())

    if self.scan_hlink(ctx):
      logging.info("File %s: HLINK" % self.path())
      return

    #
    # Check if the file is the same as in one of the upper levels
    #
    if self.scan_prev(ctx, prev_num):
      logging.debug("File %s: PREV" % self.path())
      ctx.num_prev_files_reporter.increment(1)
      return
    
    # --- File not yet in database, process it
    file_size = 0
    packer = PackerStream.PackerOStream(self.backup, Container.CODE_DATA)
    handle = open(self.path(), "rb")
    for data in FileIO.read_blocks(handle, self.backup.get_block_size()):
      packer.write(data)
      file_size += len(data)
      ctx.num_total_blocks_reporter.increment(1)
      ctx.size_total_blocks_reporter.increment(len(data))
      ctx.update_scan_status()
    handle.close()
      
    self.digest = packer.get_digest()
    self.level = packer.get_level()
    self.update_hlink(ctx)

    logging.info("Scanned file %s size:%d new_blocks:%d new_blocks_size:%d" %
        (self.path(), file_size, packer.get_num_new_blocks(),
          packer.get_size_new_blocks()))

    ctx.num_scanned_files_reporter.increment(1)
    if packer.get_num_new_blocks() != 0:
      ctx.num_new_blocks_reporter.increment(packer.get_num_new_blocks())
      ctx.size_new_blocks_reporter.increment(packer.get_size_new_blocks())
      ctx.num_changed_files_reporter.increment(1)
      ctx.changed_files_reporter.append(self.path())

    if file_size > 256 * 1024:
      logging.debug("File %s is big enough to register in cndb" %
          self.path())
      cndb = self.backup.get_completed_nodes_db()
      assert self.stats is not None
      path_digest = Digest.dataDigest(self.path().encode('utf8'))
      encoded = (self.digest +
          IntegerEncodings.binary_encode_int_varlen(self.level) +
          IntegerEncodings.binary_encode_int_varlen(self.get_type()) +
          serialize_stats(self.get_stats()))

      if not cndb.has_key(path_digest) or cndb[path_digest] != encoded:
        cndb[path_digest] = encoded
Пример #2
0
 def save_epoch_data(self):
   # So far, the cache is too resource-intensive.
   # Avoid keeping it persistently until it's better optimized.
   return
   longevity_os = StringIO.StringIO()
   for digest, longevity in self.block_longevity.iteritems():
     longevity_os.write(digest)
     longevity_os.write(IE.binary_encode_int_varlen(longevity))
     epoch = self.block_epoch[digest]
     longevity_os.write(IE.binary_encode_int_varlen(epoch))
   self.block_longevity_data["data"] = longevity_os.getvalue()
   self.block_longevity_data["epoch"] = str(self.epoch)
Пример #3
0
 def update_hlink(self, ctx):
   if os.name == 'nt':
     return
   if self.stats[stat.ST_NLINK] == 1:
     return
   inode_num = self.stats[stat.ST_INO]
   if ctx.inodes_db.has_key(inode_num):
     return
   ctx.inodes_db[inode_num] = self.digest +\
     IntegerEncodings.binary_encode_int_varlen(self.level)
Пример #4
0
 def write(self, ctx):
   """
   Write the info of the current dir to database
   """
   packer = PackerStream.PackerOStream(self.backup, Container.CODE_DIR)
   # sorting is an optimization to make everybody access files in the same
   # order.
   # TODO: measure if this really makes things faster
   # (probably will with a btree db)
   for child in self.children:
     Format.write_int(packer, child.get_type())
     Format.write_string(packer, child.get_name().encode('utf8'))
     packer.write(child.get_digest())
     packer.write(IntegerEncodings.binary_encode_int_varlen(child.get_level()))
     stats_str = serialize_stats(child.get_stats())
     packer.write(stats_str)
   
   self.digest = packer.get_digest()
   self.level = packer.get_level()
   return (packer.get_num_new_blocks(), packer.get_size_new_blocks())
Пример #5
0
    #
    # Update the current dir in completed_nodes_db
    #
    cndb = self.backup.get_completed_nodes_db()
    for subdir in subdirs:
      subdir_path = os.path.join(self.path(), subdir)
      subdir_path_digest = Digest.dataDigest(subdir_path.encode('utf8'))
      if cndb.has_key(subdir_path_digest):
        del cndb[subdir_path_digest]
    if self.stats is not None:
      # Stats are empty for the root node, but we don't want to store
      # it in the cndb, because at this point we're already done with the
      # increment anyway
      digest = Digest.dataDigest(self.path().encode('utf8'))
      encoded = (self.digest +
          IntegerEncodings.binary_encode_int_varlen(self.level) +
          IntegerEncodings.binary_encode_int_varlen(self.get_type()) +
          serialize_stats(self.get_stats()))

      if not cndb.has_key(digest) or cndb[digest] != encoded:
        cndb[digest] = encoded
        
    if self.digest != prev_digest:
      #print "changed node", self.path()
      ctx.changed_nodes += 1

  def get_percent_done(self):
    if self.cur_scanned_child is None:
      return self.weight * self.processed_percent
    else:
      return (self.weight * self.processed_percent +
Пример #6
0
def _encode_block_info(seq_idx, container_idx):
    io = StringIO.StringIO()
    io.write(IE.binary_encode_int_varlen(seq_idx))
    io.write(IE.binary_encode_int_varlen(container_idx))
    return io.getvalue()