示例#1
0
文件: fcp.py 项目: verolero86/pcircle
def fcp_start():
    global circle, fcp, treewalk

    workq = None  # if fresh start, workq is None

    if not args.rid: # if not in recovery
        treewalk = FWalk(circle, G.src, G.dest, force=args.force)
        circle.begin(treewalk)
        circle.finalize()
        G.totalsize = treewalk.epilogue()
    else:  # okay, let's do checkpoint recovery
        workq = prep_recovery()

    circle = Circle()
    fcp = FCP(circle, G.src, G.dest,
              treewalk=treewalk,
              totalsize=G.totalsize,
              verify=args.verify,
              workq=workq,
              hostcnt=num_of_hosts)

    if comm.rank == 0 and G.verbosity > 0:
        rcl, wcl = fcp.rw_cache_limit()
        print("")
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Read Cache:", "%s" % rcl, "|",
                                                      "Write Cache:", "%s" % wcl))
        print("")
    set_chunksize(fcp, G.totalsize)
    fcp.checkpoint_interval = args.cptime
    fcp.checkpoint_file = ".pcp_workq.%s.%s" % (args.cpid, circle.rank)

    circle.begin(fcp)
    circle.finalize()
    fcp.cleanup()
示例#2
0
def fcp_start():
    global circle, fcp, treewalk

    workq = None  # if fresh start, workq is None

    if not args.rid: # if not in recovery
        treewalk = FWalk(circle, G.src, G.dest, force=args.force)
        circle.begin(treewalk)
        circle.finalize()
        treewalk.epilogue()
    else:  # okay, let's do checkpoint recovery
        workq = prep_recovery()

    circle = Circle(dbname="fcp")
    fcp = FCP(circle, G.src, G.dest,
              treewalk=treewalk,
              totalsize=T.total_filesize,
              verify=args.verify,
              workq=workq,
              hostcnt=num_of_hosts)

    set_chunksize(fcp, T.total_filesize)
    fcp.checkpoint_interval = args.cptime
    fcp.checkpoint_file = ".pcp_workq.%s.%s" % (args.cpid, circle.rank)

    circle.begin(fcp)
    circle.finalize()
    fcp.epilogue()
示例#3
0
文件: fcp.py 项目: mjbach/pcircle
def fcp_start():
    global circle, fcp, treewalk

    workq = None  # if fresh start, workq is None

    if not args.rid: # if not in recovery
        treewalk = FWalk(circle, G.src, G.dest, force=args.force)
        circle.begin(treewalk)
        circle.finalize()
        treewalk.epilogue()
    else:  # okay, let's do checkpoint recovery
        workq = prep_recovery()

    circle = Circle(dbname="fcp")
    fcp = FCP(circle, G.src, G.dest,
              treewalk=treewalk,
              totalsize=T.total_filesize,
              verify=args.verify,
              workq=workq,
              hostcnt=num_of_hosts)

    if comm.rank == 0 and G.verbosity > 0:
        rcl, wcl = fcp.rw_cache_limit()
        print("")
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Read Cache:", "%s" % rcl, "|",
                                                      "Write Cache:", "%s" % wcl))
        print("")
    set_chunksize(fcp, T.total_filesize)
    fcp.checkpoint_interval = args.cptime
    fcp.checkpoint_file = ".pcp_workq.%s.%s" % (args.cpid, circle.rank)

    circle.begin(fcp)
    circle.finalize()
    fcp.epilogue()
示例#4
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format("< ",
                    utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size), reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(utils.bytes_fmt(globaltops[i].st_size),
                      globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()
示例#5
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("fprof version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", G.src))

    circle = Circle()
    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    gen_histogram()

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\tSubblocks: %s\n" % gpfs_blocks)
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]
                fmt_msg = "\tBlocksize: {:<6}   Estimated Space: {:<20s}   Efficiency: {:>6.0%}"
                if gpfs_file_size != 0:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()
示例#6
0
文件: fsum.py 项目: fwang2/pcircle
def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    #G.use_store = args.use_store
    G.reduce_interval = args.interval
    G.memitem_threshold = args.item

    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))
        print("\t{:<20}{:<20}".format("Items in memory:", G.memitem_threshold))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()

    fwalk.epilogue()
    circle.finalize()


    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(T.total_filesize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, T.total_filesize, T.total_files)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")

    """
    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")
    """
    if circle.rank > 0:
        circle.comm.send(fcheck.bfsign.bitarray, dest=0)
    else:
        for p in xrange(1, circle.comm.size):
            other_bitarray = circle.comm.recv(source=p)
            fcheck.bfsign.or_bf(other_bitarray)
    circle.comm.Barrier()

    if circle.comm.rank == 0:
        sha1val = fcheck.bfsign.gen_signature()
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)

    fcheck.epilogue()

    if circle.comm.rank == 0:
        if os.path.exists(G.tempdir):
            shutil.rmtree(G.tempdir, ignore_errors=True)
示例#7
0
def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    #G.use_store = args.use_store
    G.reduce_interval = args.interval
    G.memitem_threshold = args.item

    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:",
                                      MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))
        print("\t{:<20}{:<20}".format("Items in memory:", G.memitem_threshold))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()

    fwalk.epilogue()
    circle.finalize()

    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(T.total_filesize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, T.total_filesize,
                      T.total_files)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")
    """
    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")
    """
    if circle.rank > 0:
        circle.comm.send(fcheck.bfsign.bitarray, dest=0)
    else:
        for p in xrange(1, circle.comm.size):
            other_bitarray = circle.comm.recv(source=p)
            fcheck.bfsign.or_bf(other_bitarray)
    circle.comm.Barrier()

    if circle.comm.rank == 0:
        sha1val = fcheck.bfsign.gen_signature()
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % T.total_filesize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)

    fcheck.epilogue()

    if circle.comm.rank == 0:
        if os.path.exists(G.tempdir):
            shutil.rmtree(G.tempdir, ignore_errors=True)
示例#8
0
def main():
    global args, comm
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel
    G.use_store = args.use_store
    G.reduce_interval = args.interval



    hosts_cnt = tally_hosts()
    circle = Circle()

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FSUM version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    fwalk = FWalk(circle, G.src)
    circle.begin(fwalk)
    if G.use_store:
        fwalk.flushdb()
    totalsize = fwalk.epilogue()
    circle.finalize()

    # by default, we use adaptive chunksize
    chunksize = utils.calc_chunksize(totalsize)
    if args.chunksize:
        chunksize = conv_unit(args.chunksize)

    if circle.rank == 0:
        print("Chunksize = ", chunksize)

    circle = Circle()
    fcheck = Checksum(circle, fwalk, chunksize, totalsize)

    circle.begin(fcheck)
    circle.finalize()

    if circle.rank == 0:
        sys.stdout.write("\nAggregating ... ")

    chunkl = circle.comm.gather(fcheck.chunkq)

    if circle.rank == 0:
        chunks = [item for sublist in chunkl for item in sublist]
        chunks.sort()
        sys.stdout.write("%s chunks\n" % len(chunks))
        sha1val = do_checksum(chunks)
        with open(args.output, "w") as f:
            f.write("sha1: %s\n" % sha1val)
            f.write("chunksize: %s\n" % chunksize)
            f.write("fwalk version: %s\n" % __version__)
            f.write("src: %s\n" % utils.choplist(G.src))
            f.write("date: %s\n" % utils.current_time())
            f.write("totalsize: %s\n" % totalsize)

        print("\nSHA1: %s" % sha1val)
        print("Signature file: [%s]" % args.output)
        if args.export_block_signatures:
            export_checksum2(chunks, args.output)
            print("Exporting block signatures ... \n")

    fcheck.epilogue()
示例#9
0
文件: fcp.py 项目: verolero86/pcircle
def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle()
    circle.dbname = dbname

    if args.rid:
        circle.resume = True
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = circle.comm.bcast(ts)

    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))


    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle()
        pcheck = PVerify(circle, fcp, G.totalsize)
        circle.begin(pcheck)
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(fcp, G.totalsize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()

    if fcp:
        fcp.epilogue()
        fcp.cleanup()

    # if circle:
    #     circle.finalize(cleanup=True)
    # TODO: a close file error can happen when circle.finalize()
    #
    if isinstance(circle.workq, DbStore):
        circle.workq.cleanup()
示例#10
0
def main():
    global args, log, circle, fcp, treewalk
    # This might be an overkill function
    signal.signal(signal.SIGINT, sig_handler)
    args = parse_and_bcast(comm, gen_parser)
    tally_hosts()
    G.loglevel = args.loglevel
    G.fix_opt = False if args.no_fixopt else True
    G.preserve = args.preserve
    G.resume = True if args.cpid else False
    G.reduce_interval = args.reduce_interval
    G.verbosity = args.verbosity
    G.am_root = True if os.geteuid() == 0 else False
    G.memitem_threshold = args.item

    if args.signature:  # with signature implies doing verify as well
        args.verify = True

    if args.rid:
        G.resume = True
        args.force = True
        G.rid = args.rid
        args.signature = False # when recovery, no signature

    if not args.cpid:
        ts = utils.timestamp()
        args.cpid = MPI.COMM_WORLD.bcast(ts)

    G.tempdir = os.path.join(os.getcwd(),(".pcircle" + args.cpid))
    if not os.path.exists(G.tempdir):
        try:
            os.mkdir(G.tempdir)
        except OSError:
            pass

    G.src, G.dest = check_source_and_target(args.src, args.dest)
    dbname = get_workq_name()

    circle = Circle(dbname="fwalk")
    #circle.dbname = dbname

    global oflimit

    if num_of_hosts != 0:
        max_ofile, _ = resource.getrlimit(resource.RLIMIT_NOFILE)
        procs_per_host = circle.size // num_of_hosts
        oflimit = ((max_ofile - 64) // procs_per_host) // 2
    if oflimit < 8:
            oflimit = 8


    if circle.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<25}{:<20}".format("Starting at:", utils.current_time()))
        print("\t{:<25}{:<20}".format("FCP version:", __version__))
        print("\t{:<25}{:<20}".format("Source:", utils.choplist(G.src)))
        print("\t{:<25}{:<20}".format("Destination:", os.path.abspath(args.dest)))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Num of Hosts:", num_of_hosts, "|",
            "Num of Processes:", comm.size))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Overwrite:", "%r" % args.force, "|",
            "Copy Verification:", "%r" % args.verify))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Dataset signature:", "%r" % args.signature, "|",
            "Stripe Preserve:", "%r" % G.preserve))
        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Checkpoint interval:", "%s" % utils.conv_time(args.cptime), "|",
            "Checkpoint ID:", "%s" % args.cpid))

        print("\t{:<25}{:<10}{:5}{:<25}{:<10}".format("Items in memory: ",
            " % r" % G.memitem_threshold, "|", "O file limit", "%s" % oflimit))
        #
        if args.verbosity > 0:
            print("\t{:<25}{:<20}".format("Copy Mode:", G.copytype))

    fcp_start()

    if args.pause and args.verify:
        if circle.rank == 0:
            # raw_input("\n--> Press any key to continue ...\n")
            print("Pause, resume after %s seconds ..." % args.pause)
            sys.stdout.flush()
        time.sleep(args.pause)
        circle.comm.Barrier()

    # do checksum verification
    if args.verify:
        circle = Circle(dbname="verify")
        pcheck = PVerify(circle, fcp, G.total_chunks, T.total_filesize, args.signature)
        circle.begin(pcheck)
        circle.finalize()
        tally = pcheck.fail_tally()
        tally = comm.bcast(tally)
        if circle.rank == 0:
            print("")
            if tally == 0:
                print("\t{:<20}{:<20}".format("Verify result:", "PASS"))
            else:
                print("\t{:<20}{:<20}".format("Verify result:", "FAILED"))

        comm.Barrier()

        if args.signature and tally == 0:
            gen_signature(pcheck.bfsign, T.total_filesize)

    # fix permission
    comm.Barrier()
    if G.fix_opt and treewalk:
        if comm.rank == 0:
            print("\nFixing ownership and permissions ...")
        fix_opt(treewalk)

    if treewalk:
        treewalk.cleanup()
    if fcp:
        fcp.cleanup()
    #if circle:
    #    circle.finalize(cleanup=True)
    comm.Barrier()
    if comm.rank == 0:
        try:
            os.rmdir(G.tempdir)
        except:
            pass
示例#11
0
def main():
    global comm, args
    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.use_store = args.use_store
    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("FWALK version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:",
                                      MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", utils.choplist(G.src)))

    circle = Circle()
    treewalk = FWalk(circle, G.src)
    circle.begin(treewalk)

    if G.use_store:
        treewalk.flushdb()

    if args.stats:
        hist = global_histogram(treewalk)
        total = hist.sum()
        bucket_scale = 0.5
        if comm.rank == 0:
            print("\nFileset histograms:\n")
            for idx, rightbound in enumerate(bins[1:]):
                percent = 100 * hist[idx] / float(total)
                star_count = int(bucket_scale * percent)
                print("\t{:<3}{:<15}{:<8}{:<8}{:<50}".format(
                    "< ", utils.bytes_fmt(rightbound), hist[idx],
                    "%0.2f%%" % percent, '∎' * star_count))

    if args.stats:
        treewalk.flist.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
        globaltops = comm.gather(treewalk.flist[:args.top])
        if comm.rank == 0:
            globaltops = [item for sublist in globaltops for item in sublist]
            globaltops.sort(lambda f1, f2: cmp(f1.st_size, f2.st_size),
                            reverse=True)
            if len(globaltops) < args.top:
                args.top = len(globaltops)
            print("\nStats, top %s files\n" % args.top)
            for i in xrange(args.top):
                print("\t{:15}{:<30}".format(
                    utils.bytes_fmt(globaltops[i].st_size),
                    globaltops[i].path))

    treewalk.epilogue()
    treewalk.cleanup()
    circle.finalize()
示例#12
0
文件: fprof.py 项目: mjbach/pcircle
def main():
    global comm, args, stripe_out, DIR_BINS, DIR_HIST

    fpipe.listen()

    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.memitem_threshold = args.item
    G.loglevel = args.loglevel
    hosts_cnt = tally_hosts()

    # doing directory profiling?
    if args.dirprof:
        # check the input
        if args.dirbins is None:
            # err_and_exit("Error: missing directory bin parameters: a sorted integer list\n")
            args.dirbins = [
                0, 10, 100, 1000, 10**4, 10**5, 10**6, 10**7, 10**8
            ]
        else:
            myList = sorted(set(args.dirbins))
            if myList != args.dirbins:
                err_and_exit("Error: duplicated, or unsorted bins: %s\n" %
                             args.dirbins)

        DIR_BINS = args.dirbins
        DIR_HIST = [0] * (len(DIR_BINS) + 1)

    # Doing stripe analysis? lfs is not really bullet-proof way
    # we might need a better way of doing fstype check.

    if args.lustre_stripe:
        G.lfs_bin = lfs.check_lfs()
        G.stripe_threshold = utils.conv_unit(args.stripe_threshold)
        try:
            stripe_out = os.open(args.stripe_output,
                                 os.O_CREAT | os.O_WRONLY | os.O_APPEND)
        except:
            err_and_exit("Error: can't create stripe output: %s" %
                         args.stripe_output)

    if args.exclude:
        process_exclude_file()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{0:<20}{1:<20}".format("fprof version:", __version__))
        print("\t{0:<20}{1:<20}".format("Full rev id:", __revid__))
        print("\t{0:<20}{1:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{0:<20}{1:<20}".format("Num of processes:",
                                        MPI.COMM_WORLD.Get_size()))

        if args.syslog:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "yes"))
        else:
            print("\t{0:<20}{1:<20}".format("Syslog report: ", "no"))

        if args.dirprof:
            print("\t{0:<20}{1:<20}".format("Dir bins: ", args.dirbins))

        if args.lustre_stripe:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "yes"))
            print("\t{0:<20}{1:<20}".format("Stripe threshold: ",
                                            args.stripe_threshold))
        else:
            print("\t{0:<20}{1:<20}".format("Stripe analysis: ", "no"))
        print("\t{0:<20}{1:<20}".format("Root path:", G.src))

        if args.exclude:
            print("\nExclusions:\n")
            for ele in EXCLUDE:
                print("\t %s" % ele)

    circle = Circle()
    if args.perprocess:
        circle.report_enabled = True
    else:
        circle.report_enabled = False

    if args.progress:
        circle.report_enabled = False
        circle.reduce_enabled = True

    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    msg1, msg2 = gen_histogram(total_file_size)

    if args.dirprof:
        gen_directory_histogram()

    if comm.rank == 0 and args.syslog:
        sendto_syslog("fprof.filecount.hist", msg1)
        sendto_syslog("fprof.fsize_perc.hist", msg2)

    if args.topn_files:
        topfiles = gather_topfiles()
        if comm.rank == 0:
            print("\nTop N File Report:\n")
            # edge case: not enough files (< args.top)
            totaln = args.topn_files if len(
                topfiles) > args.topn_files else len(topfiles)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topfiles[index]
                print("\t%s: %s (%s)" %
                      (index + 1, path, utils.bytes_fmt(size)))
            print("")

    if args.topn_dirs:
        topdirs = gather_topdirs()
        if comm.rank == 0:
            print("\nTop N Directory Report:\n")
            totaln = args.topn_dirs if len(topdirs) > args.topn_dirs else len(
                topdirs)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topdirs[index]
                print("\t{0:}: {1:}  ({2:,} items)".format(
                    index + 1, path, size))

            print("")

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        gather_gpfs_dii()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\t{0:<15}{1:<4}".format("inode size:", args.inodesz))
            print("\t{0:<25}{1:>15,}".format("DII (data-in-inode) count:",
                                             DII_COUNT))
            print("\tSubblocks: %s\n" % gpfs_blocks)
            fmt_msg = "\tBlocksize: {0:<6}   Estimated Space: {1:<20s}   Efficiency: {2:>6.2%}"
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]

                if gpfs_file_size != 0:
                    print(
                        fmt_msg.format(bsz, bytes_fmt(gpfs_file_size),
                                       total_file_size /
                                       float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()

    if args.lustre_stripe and stripe_out:
        os.close(stripe_out)

        sp_workload = comm.gather(Tally.spcnt)
        if comm.rank == 0:
            print("Stripe workload total: %s, distribution: %s" %
                  (sum(sp_workload), sp_workload))
示例#13
0
文件: fprof.py 项目: xsswfm/pcircle
def main():
    global comm, args

    fpipe.listen()

    args = parse_and_bcast(comm, gen_parser)

    try:
        G.src = utils.check_src2(args.path)
    except ValueError as e:
        err_and_exit("Error: %s not accessible" % e)

    G.loglevel = args.loglevel

    hosts_cnt = tally_hosts()

    if args.exclude:
        process_exclude_file()

    if comm.rank == 0:
        print("Running Parameters:\n")
        print("\t{:<20}{:<20}".format("fprof version:", __version__))
        print("\t{:<20}{:<20}".format("Num of hosts:", hosts_cnt))
        print("\t{:<20}{:<20}".format("Num of processes:", MPI.COMM_WORLD.Get_size()))
        print("\t{:<20}{:<20}".format("Root path:", G.src))

        if args.exclude:
            print("\nExclusions:\n")
            for ele in EXCLUDE:
                print("\t %s" % ele)

    circle = Circle()
    if args.perprocess:
        circle.report_enabled = True
    else:
        circle.reduce_enabled = True

    treewalk = ProfileWalk(circle, G.src, perfile=args.perfile)
    circle.begin(treewalk)

    # we need the total file size to calculate GPFS efficiency
    total_file_size = treewalk.epilogue()

    msg1, msg2 = gen_histogram(total_file_size)

    if comm.rank == 0:
        sendto_syslog("fprof.filecount.hist", msg1)
        sendto_syslog("fprof.fsize_perc.hist", msg2)

    if args.top:
        topfiles = gather_topfiles()
        if comm.rank == 0:
            print("\nTop File Report:\n")
            # edge case: not enough files (< args.top)
            totaln = args.top if len(topfiles) > args.top else len(topfiles)
            for index, _ in enumerate(xrange(totaln)):
                size, path = topfiles[index]
                print("\t%s: %s (%s)" % (index + 1,
                                       path,
                                       utils.bytes_fmt(size)))
            print("")

    if args.gpfs_block_alloc:
        gpfs_blocks = gather_gpfs_blocks()
        if comm.rank == 0:
            print("\nGPFS Block Alloc Report:\n")
            print("\tinode size: %s" % args.inodesz)
            print("\tDII (data-in-inode) count: %s" % DII_COUNT)
            print("\tSubblocks: %s\n" % gpfs_blocks)
            for idx, bsz in enumerate(G.gpfs_block_size):
                gpfs_file_size = gpfs_blocks[idx] * G.gpfs_subs[idx]
                fmt_msg = "\tBlocksize: {:<6}   Estimated Space: {:<20s}   Efficiency: {:>6.2%}"
                if gpfs_file_size != 0:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), total_file_size/float(gpfs_file_size)))
                else:
                    print(fmt_msg.format(bsz, bytes_fmt(gpfs_file_size), 0))

    treewalk.cleanup()
    circle.finalize()