示例#1
0
def consumer_metadata(val):
    global buff_meta, size_meta
    global current_file_buf, curr_file_index
    item_ = val[0]
    file_name = val[1]

    if size_meta > args.space * 1000000 or (
            "/".join(curr_file_index.split('/')[0:-1]) != "/".join(
                file_name.split('/')[0:-1]) and curr_file_index != ""):
        m = hashlib.md5()
        m.update(buff_meta[0].SerializeToString())
        curr_hash = m.hexdigest()
        print('writing meta', os.path.join(args.output, curr_hash))
        writer = open(os.path.join(args.output, curr_hash), 'bw')
        decoder.encodeEntry(writer.write, buff_meta)
        writer.close()
        if args.mzip:
            subprocess.call([
                "7z", 'a', '-mx9',
                os.path.join(args.output, curr_hash) + ".7z",
                os.path.join(args.output, curr_hash)
            ])
            os.remove(os.path.join(args.output, curr_hash))
        buff_meta = []
        size_meta = 0
    if curr_file_index != file_name:
        curr_file_index = file_name
    print("%d elements in buffer" % len(buff_meta))
    size_meta += len(item_.SerializeToString())
    buff_meta.append(item_)
def store(buff):
    m = hashlib.md5()
    m.update(buff[0].SerializeToString())
    curr_hash = m.hexdigest()
    writer = open(os.path.join(args.output, curr_hash), 'wb')
    decoder.encodeEntry(writer.write, buff)
    writer.close()
def finalize():
  global buff_meta
  if len(buff_meta) > 0:
    print('writing last set of meta')

    m = hashlib.md5()
    m.update(buff_meta[0].SerializeToString())
    curr_hash = m.hexdigest()
    writer = open(os.path.join(args.output,curr_hash),'bw')
    decoder.encodeEntry(writer.write,buff_meta)
    writer.close()
    if args.mzip:
      subprocess.call(["7z",'a','-mx9',os.path.join(args.output,curr_hash)+".7z",os.path.join(args.output,curr_hash)])
      os.remove(os.path.join(args.output,curr_hash))
    buff_meta = []
    size_meta = 0
示例#4
0
def finalize():
    global buff_meta
    if len(buff_meta) > 0:
        print('writing last set of meta')

        m = hashlib.md5()
        m.update(buff_meta[0].SerializeToString())
        curr_hash = m.hexdigest()
        writer = open(os.path.join(args.output, curr_hash), 'bw')
        decoder.encodeEntry(writer.write, buff_meta)
        writer.close()
        if args.mzip:
            subprocess.call([
                "7z", 'a', '-mx9',
                os.path.join(args.output, curr_hash) + ".7z",
                os.path.join(args.output, curr_hash)
            ])
            os.remove(os.path.join(args.output, curr_hash))
        buff_meta = []
        size_meta = 0
def consumer_metadata(val):
  global buff_meta,size_meta
  global current_file_buf, curr_file_index
  item_ = val[0]
  file_name = val[1]

  if size_meta > args.space*1000000 or ("/".join(curr_file_index.split('/')[0:-1]) != "/".join(file_name.split('/')[0:-1]) and curr_file_index != ""):
    m = hashlib.md5()
    m.update(buff_meta[0].SerializeToString())
    curr_hash = m.hexdigest()
    print('writing meta', os.path.join(args.output,curr_hash))
    writer = open(os.path.join(args.output,curr_hash),'bw')
    decoder.encodeEntry(writer.write,buff_meta)
    writer.close()
    if args.mzip:
      subprocess.call(["7z",'a','-mx9',os.path.join(args.output,curr_hash)+".7z",os.path.join(args.output,curr_hash)])
      os.remove(os.path.join(args.output,curr_hash))
    buff_meta = []
    size_meta = 0
  if curr_file_index != file_name:
    curr_file_index = file_name
  print("%d elements in buffer" % len(buff_meta))
  size_meta += len(item_.SerializeToString())
  buff_meta.append(item_)