示例#1
0
def dst_dir_report(jobs, dstdirs, width, prefix=''):
    tab = tt.Texttable()
    dir2oldphase = manager.dstdirs_to_furthest_phase(jobs)
    dir2newphase = manager.dstdirs_to_youngest_phase(jobs)
    headings = ['dst', 'plots', 'GBfree', 'inbnd phases', 'pri']
    tab.header(headings)
    tab.set_cols_dtype('t' * len(headings))

    for d in sorted(dstdirs):
        # TODO: This logic is replicated in archive.py's priority computation,
        # maybe by moving more of the logic in to directory.py
        eldest_ph = dir2oldphase.get(d, job.Phase(0, 0))
        phases = job.job_phases_for_dstdir(d, jobs)

        dir_plots = plot_util.list_k32_plots(d)
        gb_free = int(plot_util.df_b(d) / plot_util.GB)
        n_plots = len(dir_plots)
        priority = archive.compute_priority(eldest_ph, gb_free, n_plots)
        row = [
            abbr_path(d, prefix), n_plots, gb_free,
            phases_str(phases, 5), priority
        ]
        tab.add_row(row)
    tab.set_max_width(width)
    tab.set_deco(tt.Texttable.BORDER | tt.Texttable.HEADER)
    tab.set_deco(0)  # No borders
    return tab.draw()
示例#2
0
def archive(dir_cfg, all_jobs):
    '''Configure one archive job.  Needs to know all jobs so it can avoid IO
    contention on the plotting dstdir drives.  Returns either (False, <reason>) 
    if we should not execute an archive job or (True, <cmd>) with the archive
    command if we should.'''
    if dir_cfg.archive is None:
        return (False, "No 'archive' settings declared in plotman.yaml")

    dir2ph = manager.dstdirs_to_furthest_phase(all_jobs)
    best_priority = -100000000
    chosen_plot = None

    for d in dir_cfg.dst:
        ph = dir2ph.get(d, (0, 0))
        dir_plots = plot_util.list_k32_plots(d)
        gb_free = plot_util.df_b(d) / plot_util.GB
        n_plots = len(dir_plots)
        priority = compute_priority(ph, gb_free, n_plots) 
        if priority >= best_priority and dir_plots:
            best_priority = priority
            chosen_plot = dir_plots[0]

    if not chosen_plot:
        return (False, 'No plots found')

    archdir = '/volume1/chia_plots'

    bwlimit = dir_cfg.archive.rsyncd_bwlimit
    throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else ''
    cmd = ('rsync %s --remove-source-files -P %s %s' %
            (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir)))

    return (True, cmd)
示例#3
0
def test_dstdirs_to_furthest_phase():
    all_jobs = [ job_w_dstdir_phase('/plots1', (1, 5)),
                 job_w_dstdir_phase('/plots2', (1, 1)),
                 job_w_dstdir_phase('/plots2', (3, 1)),
                 job_w_dstdir_phase('/plots2', (2, 1)),
                 job_w_dstdir_phase('/plots3', (4, 1)) ]

    assert (manager.dstdirs_to_furthest_phase(all_jobs) ==
            { '/plots1' : (1, 5),
              '/plots2' : (3, 1),
              '/plots3' : (4, 1) } )
示例#4
0
def archive(dir_cfg, all_jobs):
    '''Configure one archive job.  Needs to know all jobs so it can avoid IO
    contention on the plotting dstdir drives.  Returns either (False, <reason>) 
    if we should not execute an archive job or (True, <cmd>) with the archive
    command if we should.'''
    if dir_cfg.archive is None:
        return (False, "No 'archive' settings declared in plotman.yaml")

    dir2ph = manager.dstdirs_to_furthest_phase(all_jobs)
    best_priority = -100000000
    chosen_plot = None

    for d in dir_cfg.dst:
        ph = dir2ph.get(d, (0, 0))
        dir_plots = plot_util.list_k32_plots(d)
        gb_free = plot_util.df_b(d) / plot_util.GB
        n_plots = len(dir_plots)
        priority = compute_priority(ph, gb_free, n_plots) 
        if priority >= best_priority and dir_plots:
            best_priority = priority
            chosen_plot = dir_plots[0]

    if not chosen_plot:
        return (False, 'No plots found')

    # TODO: sanity check that archive machine is available
    # TODO: filter drives mounted RO

    #
    # Pick first archive dir with sufficient space
    #
    archdir_freebytes = get_archdir_freebytes(dir_cfg.archive)
    if not archdir_freebytes:
        return(False, 'No free archive dirs found.')
    
    archdir = ''
    available = [(d, space) for (d, space) in archdir_freebytes.items() if 
                 space > 1.2 * plot_util.get_k32_plotsize()]
    if len(available) > 0:
        index = min(dir_cfg.archive.index, len(available) - 1)
        (archdir, freespace) = sorted(available)[index]

    if not archdir:
        return(False, 'No archive directories found with enough free space')
    
    msg = 'Found %s with ~%d GB free' % (archdir, freespace / plot_util.GB)

    bwlimit = dir_cfg.archive.rsyncd_bwlimit
    throttle_arg = ('--bwlimit=%d' % bwlimit) if bwlimit else ''
    cmd = ('rsync %s --remove-source-files -P -e "ssh -T -c [email protected] -o Compression=no -x" %s %s' %
            (throttle_arg, chosen_plot, rsync_dest(dir_cfg.archive, archdir)))

    return (True, cmd)
示例#5
0
def test_dstdirs_to_furthest_phase() -> None:
    all_jobs = [
        job_w_dstdir_phase('/plots1', job.Phase(1, 5)),
        job_w_dstdir_phase('/plots2', job.Phase(1, 1)),
        job_w_dstdir_phase('/plots2', job.Phase(3, 1)),
        job_w_dstdir_phase('/plots2', job.Phase(2, 1)),
        job_w_dstdir_phase('/plots3', job.Phase(4, 1))
    ]

    assert (manager.dstdirs_to_furthest_phase(all_jobs) == {
        '/plots1': job.Phase(1, 5),
        '/plots2': job.Phase(3, 1),
        '/plots3': job.Phase(4, 1)
    })
示例#6
0
def main() -> None:
    random.seed()

    pm_parser = PlotmanArgParser()
    args = pm_parser.parse_args()

    if args.cmd == 'version':
        import pkg_resources
        print(pkg_resources.get_distribution('plotman'))
        return

    elif args.cmd == 'config':
        config_file_path = configuration.get_path()
        if args.config_subcommand == 'path':
            if os.path.isfile(config_file_path):
                print(config_file_path)
                return
            print(
                f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'"
            )
            print(
                f"To generate a default config file, run: 'plotman config generate'"
            )
            return
        if args.config_subcommand == 'generate':
            if os.path.isfile(config_file_path):
                overwrite = None
                while overwrite not in {"y", "n"}:
                    overwrite = input(
                        f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n"
                        "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite."
                    ).lower()
                    if overwrite == 'n':
                        print("\nExited without overrwriting file")
                        return

            # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path,
            # creating the parent plotman file/directory if it does not yet exist
            with importlib.resources.path(plotman_resources,
                                          "plotman.yaml") as default_config:
                config_dir = os.path.dirname(config_file_path)

                os.makedirs(config_dir, exist_ok=True)
                copyfile(default_config, config_file_path)
                print(f"\nWrote default plotman.yaml to: {config_file_path}")
                return

        if not args.config_subcommand:
            print("No action requested, add 'generate' or 'path'.")
            return

    config_path = configuration.get_path()
    config_text = configuration.read_configuration_text(config_path)
    preset_target_definitions_text = importlib.resources.read_text(
        plotman_resources,
        "target_definitions.yaml",
    )

    cfg = configuration.get_validated_configs(config_text, config_path,
                                              preset_target_definitions_text)

    with cfg.setup():
        root_logger = logging.getLogger()
        root_handler = logging.handlers.RotatingFileHandler(
            backupCount=10,
            encoding='utf-8',
            filename=cfg.logging.application,
            maxBytes=10_000_000,
        )
        root_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s')
        root_handler.setFormatter(root_formatter)
        root_logger.addHandler(root_handler)
        root_logger.setLevel(logging.INFO)
        root_logger.info('Start root logger')

        disk_space_logger = logging.getLogger("disk_space")
        disk_space_logger.propagate = False
        disk_space_handler = logging.handlers.RotatingFileHandler(
            backupCount=10,
            encoding='utf-8',
            filename=cfg.logging.disk_spaces,
            maxBytes=10_000_000,
        )
        disk_space_formatter = Iso8601Formatter(fmt='%(asctime)s: %(message)s')
        disk_space_handler.setFormatter(disk_space_formatter)
        disk_space_logger.addHandler(disk_space_handler)
        disk_space_logger.setLevel(logging.INFO)
        disk_space_logger.info('Start disk space logger')

        #
        # Stay alive, spawning plot jobs
        #
        if args.cmd == 'plot':
            print('...starting plot loop')
            while True:
                (started,
                 msg) = manager.maybe_start_new_plot(cfg.directories,
                                                     cfg.scheduling,
                                                     cfg.plotting, cfg.logging)

                # TODO: report this via a channel that can be polled on demand, so we don't spam the console
                if started:
                    print('%s' % (msg))
                else:
                    print('...sleeping %d s: %s' %
                          (cfg.scheduling.polling_time_s, msg))
                root_logger.info('[plot] %s', msg)

                time.sleep(cfg.scheduling.polling_time_s)

        #
        # Analysis of completed jobs
        #
        elif args.cmd == 'analyze':

            analyzer.analyze(args.logfile, args.clipterminals, args.bytmp,
                             args.bybitfield)

        #
        # Exports log metadata to CSV
        #
        elif args.cmd == 'export':
            logfilenames = glob.glob(
                os.path.join(cfg.logging.plots, '*.plot.log'))
            if args.save_to is None:
                csv_exporter.generate(logfilenames=logfilenames,
                                      file=sys.stdout)
            else:
                with open(args.save_to, 'w', encoding='utf-8') as file:
                    csv_exporter.generate(logfilenames=logfilenames, file=file)

        else:
            jobs = Job.get_running_jobs(cfg.logging.plots)

            # Status report
            if args.cmd == 'status':
                if args.json:
                    # convert jobs list into json
                    result = reporting.json_report(jobs)
                else:
                    result = "{0}\n\n{1}\n\nUpdated at: {2}".format(
                        reporting.status_report(jobs, get_term_width()),
                        reporting.summary(jobs),
                        datetime.datetime.today().strftime("%c"),
                    )
                print(result)

            # Prometheus report
            if args.cmd == 'prometheus':
                print(reporting.prometheus_report(jobs))

            # Directories report
            elif args.cmd == 'dirs':
                print(
                    reporting.dirs_report(jobs, cfg.directories, cfg.archiving,
                                          cfg.scheduling, get_term_width()))

            elif args.cmd == 'interactive':
                interactive.run_interactive(
                    cfg=cfg,
                    autostart_plotting=args.autostart_plotting,
                    autostart_archiving=args.autostart_archiving,
                )

            # Start running archival
            elif args.cmd == 'archive':
                if cfg.archiving is None:
                    start_msg = 'archiving not configured but is required for this command'
                    print(start_msg)
                    root_logger.info('[archive] %s', start_msg)
                else:
                    start_msg = '...starting archive loop'
                    print(start_msg)
                    root_logger.info('[archive] %s', start_msg)
                    firstit = True
                    while True:
                        if not firstit:
                            print('Sleeping %d s until next iteration...' %
                                  (cfg.scheduling.polling_time_s))
                            time.sleep(cfg.scheduling.polling_time_s)
                            jobs = Job.get_running_jobs(cfg.logging.plots)
                        firstit = False

                        archiving_status, log_messages = archive.spawn_archive_process(
                            cfg.directories, cfg.archiving, cfg.logging, jobs)
                        if log_messages:
                            for log_message in log_messages:
                                print(log_message)
                                root_logger.info('[archive] %s', log_message)
                        else:
                            root_logger.info('[archive] %s', archiving_status)

            # Debugging: show the destination drive usage schedule
            elif args.cmd == 'dsched':
                for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items():
                    print('  %s : %s' % (d, str(ph)))

            #
            # Job control commands
            #
            elif args.cmd in [
                    'details', 'logs', 'files', 'kill', 'suspend', 'resume'
            ]:
                print(args)

                selected = []

                # TODO: clean up treatment of wildcard
                if args.idprefix[0] == 'all':
                    selected = jobs
                else:
                    # TODO: allow multiple idprefixes, not just take the first
                    selected = manager.select_jobs_by_partial_id(
                        jobs, args.idprefix[0])
                    if (len(selected) == 0):
                        print('Error: %s matched no jobs.' % args.idprefix[0])
                    elif len(selected) > 1:
                        print('Error: "%s" matched multiple jobs:' %
                              args.idprefix[0])
                        for j in selected:
                            print('  %s' % j.plot_id)
                        selected = []

                for job in selected:
                    if args.cmd == 'details':
                        print(job.status_str_long())

                    elif args.cmd == 'logs':
                        job.print_logs(args.follow)

                    elif args.cmd == 'files':
                        temp_files = job.get_temp_files()
                        for f in temp_files:
                            print('  %s' % f)

                    elif args.cmd == 'kill':
                        # First suspend so job doesn't create new files
                        print('Pausing PID %d, plot id %s' %
                              (job.proc.pid, job.plot_id))
                        job.suspend()

                        temp_files = job.get_temp_files()

                        print('Will kill pid %d, plot id %s' %
                              (job.proc.pid, job.plot_id))
                        print('Will delete %d temp files' % len(temp_files))

                        if args.force:
                            conf = 'y'
                        else:
                            conf = input('Are you sure? ("y" to confirm): ')

                        if (conf != 'y'):
                            print(
                                'Canceled.  If you wish to resume the job, do so manually.'
                            )
                        else:
                            print('killing...')

                            job.cancel()

                            print('cleaning up temp files...')

                            for f in temp_files:
                                os.remove(f)

                    elif args.cmd == 'suspend':
                        print('Suspending ' + job.plot_id)
                        job.suspend()
                    elif args.cmd == 'resume':
                        print('Resuming ' + job.plot_id)
                        job.resume()
示例#7
0
def archive(
    dir_cfg: configuration.Directories, arch_cfg: configuration.Archiving,
    all_jobs: typing.List[job.Job]
) -> typing.Tuple[bool, typing.Optional[typing.Union[typing.Dict[str, object],
                                                     str]], typing.List[str]]:
    '''Configure one archive job.  Needs to know all jobs so it can avoid IO
    contention on the plotting dstdir drives.  Returns either (False, <reason>)
    if we should not execute an archive job or (True, <cmd>) with the archive
    command if we should.'''
    log_messages: typing.List[str] = []
    if arch_cfg is None:
        return (False, "No 'archive' settings declared in plotman.yaml",
                log_messages)

    dir2ph = manager.dstdirs_to_furthest_phase(all_jobs)
    best_priority = -100000000
    chosen_plot = None
    dst_dir = dir_cfg.get_dst_directories()
    for d in dst_dir:
        ph = dir2ph.get(d, job.Phase(0, 0))
        dir_plots = plot_util.list_plots(d)
        gb_free = plot_util.df_b(d) / plot_util.GB
        n_plots = len(dir_plots)
        priority = compute_priority(ph, gb_free, n_plots)
        if priority >= best_priority and dir_plots:
            best_priority = priority
            chosen_plot = dir_plots[0]

    if not chosen_plot:
        return (False, 'No plots found', log_messages)

    # TODO: sanity check that archive machine is available
    # TODO: filter drives mounted RO

    #
    # Pick first archive dir with sufficient space
    #
    archdir_freebytes, freebytes_log_messages = get_archdir_freebytes(arch_cfg)
    log_messages.extend(freebytes_log_messages)
    if not archdir_freebytes:
        return (False, 'No free archive dirs found.', log_messages)

    archdir = ''
    chosen_plot_size = os.stat(chosen_plot).st_size
    # 10MB is big enough to outsize filesystem block sizes hopefully, but small
    # enough to make this a pretty tight corner for people to get stuck in.
    free_space_margin = 10_000_000
    available = [(d, space) for (d, space) in archdir_freebytes.items()
                 if space > (chosen_plot_size + free_space_margin)]
    if len(available) > 0:
        index = arch_cfg.index % len(available)
        (archdir, freespace) = sorted(available)[index]

    if not archdir:
        return (False, 'No archive directories found with enough free space',
                log_messages)

    env = arch_cfg.environment(
        source=chosen_plot,
        destination=archdir,
    )
    subprocess_arguments: typing.Dict[str, object] = {
        'args': arch_cfg.target_definition().transfer_path,
        'env': {
            **os.environ,
            **env
        }
    }

    return (True, subprocess_arguments, log_messages)
示例#8
0
def main():
    random.seed()

    pm_parser = PlotmanArgParser()
    args = pm_parser.parse_args()

    if args.cmd == 'version':
        import pkg_resources
        print(pkg_resources.get_distribution('plotman'))
        return

    elif args.cmd == 'config':
        config_file_path = configuration.get_path()
        if args.config_subcommand == 'path':
            if os.path.isfile(config_file_path):
                print(config_file_path)
                return
            print(
                f"No 'plotman.yaml' file exists at expected location: '{config_file_path}'"
            )
            print(
                f"To generate a default config file, run: 'plotman config generate'"
            )
            return 1
        if args.config_subcommand == 'generate':
            if os.path.isfile(config_file_path):
                overwrite = None
                while overwrite not in {"y", "n"}:
                    overwrite = input(
                        f"A 'plotman.yaml' file already exists at the default location: '{config_file_path}' \n\n"
                        "\tInput 'y' to overwrite existing file, or 'n' to exit without overwrite."
                    ).lower()
                    if overwrite == 'n':
                        print("\nExited without overrwriting file")
                        return

            # Copy the default plotman.yaml (packaged in plotman/resources/) to the user's config file path,
            # creating the parent plotman file/directory if it does not yet exist
            with importlib.resources.path(plotman_resources,
                                          "plotman.yaml") as default_config:
                config_dir = os.path.dirname(config_file_path)

                os.makedirs(config_dir, exist_ok=True)
                copyfile(default_config, config_file_path)
                print(f"\nWrote default plotman.yaml to: {config_file_path}")
                return

        if not args.config_subcommand:
            print("No action requested, add 'generate' or 'path'.")
            return

    config_path = configuration.get_path()
    config_text = configuration.read_configuration_text(config_path)
    cfg = configuration.get_validated_configs(config_text, config_path)

    #
    # Stay alive, spawning plot jobs
    #
    if args.cmd == 'plot':
        print('...starting plot loop')
        while True:
            wait_reason = manager.maybe_start_new_plot(cfg.directories,
                                                       cfg.scheduling,
                                                       cfg.plotting)

            # TODO: report this via a channel that can be polled on demand, so we don't spam the console
            if wait_reason:
                print('...sleeping %d s: %s' %
                      (cfg.scheduling.polling_time_s, wait_reason))

            time.sleep(cfg.scheduling.polling_time_s)

    #
    # Analysis of completed jobs
    #
    elif args.cmd == 'analyze':

        analyzer.analyze(args.logfile, args.clipterminals, args.bytmp,
                         args.bybitfield)

    else:
        jobs = Job.get_running_jobs(cfg.directories.log)

        # Status report
        if args.cmd == 'status':
            print(reporting.status_report(jobs, get_term_width()))

        # Directories report
        elif args.cmd == 'dirs':
            print(
                reporting.dirs_report(jobs, cfg.directories, cfg.scheduling,
                                      get_term_width()))

        elif args.cmd == 'interactive':
            interactive.run_interactive()

        # Start running archival
        elif args.cmd == 'archive':
            print('...starting archive loop')
            firstit = True
            while True:
                if not firstit:
                    print('Sleeping 60s until next iteration...')
                    time.sleep(60)
                    jobs = Job.get_running_jobs(cfg.directories.log)
                firstit = False

                archiving_status, log_message = archive.spawn_archive_process(
                    cfg.directories, jobs)
                if log_message:
                    print(log_message)

        # Debugging: show the destination drive usage schedule
        elif args.cmd == 'dsched':
            for (d, ph) in manager.dstdirs_to_furthest_phase(jobs).items():
                print('  %s : %s' % (d, str(ph)))

        #
        # Job control commands
        #
        elif args.cmd in ['details', 'files', 'kill', 'suspend', 'resume']:
            print(args)

            selected = []

            # TODO: clean up treatment of wildcard
            if args.idprefix[0] == 'all':
                selected = jobs
            else:
                # TODO: allow multiple idprefixes, not just take the first
                selected = manager.select_jobs_by_partial_id(
                    jobs, args.idprefix[0])
                if (len(selected) == 0):
                    print('Error: %s matched no jobs.' % args.idprefix[0])
                elif len(selected) > 1:
                    print('Error: "%s" matched multiple jobs:' %
                          args.idprefix[0])
                    for j in selected:
                        print('  %s' % j.plot_id)
                    selected = []

            for job in selected:
                if args.cmd == 'details':
                    print(job.status_str_long())

                elif args.cmd == 'files':
                    temp_files = job.get_temp_files()
                    for f in temp_files:
                        print('  %s' % f)

                elif args.cmd == 'kill':
                    # First suspend so job doesn't create new files
                    print('Pausing PID %d, plot id %s' %
                          (job.proc.pid, job.plot_id))
                    job.suspend()

                    temp_files = job.get_temp_files()
                    print('Will kill pid %d, plot id %s' %
                          (job.proc.pid, job.plot_id))
                    print('Will delete %d temp files' % len(temp_files))
                    conf = input('Are you sure? ("y" to confirm): ')
                    if (conf != 'y'):
                        print(
                            'canceled.  If you wish to resume the job, do so manually.'
                        )
                    else:
                        print('killing...')
                        job.cancel()
                        print('cleaing up temp files...')
                        for f in temp_files:
                            os.remove(f)

                elif args.cmd == 'suspend':
                    print('Suspending ' + job.plot_id)
                    job.suspend()
                elif args.cmd == 'resume':
                    print('Resuming ' + job.plot_id)
                    job.resume()