Пример #1
0
 def _load(self, Workflow, **kwargs):
     self.debug("load() was called from run(), workflow class is %s",
                str(Workflow))
     self.load_called = True
     try:
         self.launcher = Launcher(self.interactive)
         self.launcher.workflow_file = self.workflow_file
         self.launcher.config_file = self.config_file
         self.launcher.seeds = self.seeds
     except:
         self.exception("Failed to create the launcher")
         sys.exit(Main.EXIT_FAILURE)
     try:
         self.workflow = self._load_workflow(self.snapshot_file_name)
         snapshot = self.workflow is not None
         if not snapshot:
             wfkw = self._get_interactive_locals()
             wfkw.update(kwargs)
             self.workflow = Workflow(self.launcher, **wfkw)
             self.info("Created %s", self.workflow)
         else:
             self.info("Loaded the workflow snapshot from %s: %s",
                       self.snapshot_file_name, self.workflow)
             if self._visualization_mode:
                 self.workflow.plotters_are_enabled = True
             self.workflow.workflow = self.launcher
     except:
         self.exception("Failed to create the workflow")
         self.launcher.stop()
         sys.exit(Main.EXIT_FAILURE)
     if self._workflow_graph:
         self.workflow.generate_graph(filename=self._workflow_graph,
                                      with_data_links=True,
                                      background='white')
     return self.workflow, snapshot
Пример #2
0
 def _load(self, Workflow, **kwargs):
     self.debug("load() was called from run(), workflow class is %s",
                str(Workflow))
     self.load_called = True
     try:
         self.launcher = Launcher(self.interactive)
         self.launcher.workflow_file = self.workflow_file
         self.launcher.config_file = self.config_file
         self.launcher.seeds = self.seeds
     except:
         self.exception("Failed to create the launcher")
         sys.exit(Main.EXIT_FAILURE)
     try:
         self.workflow = self._load_workflow(self.snapshot_file_name)
         self.snapshot = self.workflow is not None
         if not self.snapshot:
             wfkw = self._get_interactive_locals()
             wfkw.update(kwargs)
             self.workflow = Workflow(self.launcher, **wfkw)
             self.info("Created %s", self.workflow)
         else:
             self.info("Loaded the workflow snapshot from %s: %s",
                       self.snapshot_file_name, self.workflow)
             if self._visualization_mode:
                 self.workflow.plotters_are_enabled = True
             self.workflow.workflow = self.launcher
     except:
         self.exception("Failed to create the workflow")
         self.launcher.stop()
         sys.exit(Main.EXIT_FAILURE)
     if self._workflow_graph:
         self.workflow.generate_graph(filename=self._workflow_graph,
                                      with_data_links=True,
                                      background='white')
     return self.workflow, self.snapshot
Пример #3
0
class Main(Logger, CommandLineBase):
    """
    Entry point of any VELES engine executions.
    """

    EXIT_SUCCESS = 0
    EXIT_FAILURE = 1
    registered_print_max_rss = False

    def __init__(self, interactive=False, *args, **kwargs):
        Main.setup_argv(not interactive, True, *args, **kwargs)
        super(Main, self).__init__()
        self._interactive = interactive
        self._ensemble_train = None
        self._ensemble_test = None
        self._optimization = None

    @property
    def interactive(self):
        return self._interactive

    @property
    def optimization(self):
        return self._optimization

    @optimization.setter
    def optimization(self, value):
        if value is None:
            self._optimization = None
            return
        if not isinstance(value, tuple) or len(value) != 2:
            raise TypeError(
                "Invalid optimization value: expected a tuple of length 2, got"
                " %s" % type(value))
        if not isinstance(value[0], int) or \
                not isinstance(value[1], (int, type(None))):
            raise TypeError("Invalid optimization value types: %s", value)
        if value[0] < 5:
            raise ValueError("Population size may not be less than 5")
        if value[1] is not None and value[1] < 1:
            raise ValueError("At least 1 generation must be set")
        self._optimization = OptimizationTuple(*value)

    @property
    def ensemble_train(self):
        return self._ensemble_train

    @ensemble_train.setter
    def ensemble_train(self, value):
        if value is None:
            self._ensemble_train = None
            return
        if not isinstance(value, tuple) or len(value) != 2 or \
                not isinstance(value[0], int) or \
                not isinstance(value[1], (float, int)):
            raise ValueError("Invalid ensemble_train value")
        ratio = value[1]
        if ratio <= 0 or ratio > 1:
            raise ValueError("Training set ratio must be in (0, 1] (got %s)" %
                             ratio)
        self._ensemble_train = EnsembleTrainTuple(*value)

    @property
    def ensemble_test(self):
        return self._ensemble_test

    @ensemble_test.setter
    def ensemble_test(self, value):
        if value is None:
            self._ensemble_test = None
            return
        if not isinstance(value, string_types):
            raise TypeError("ensemble_test must be a string (got %s)" %
                            type(value))
        self._ensemble_test = EnsembleTestTuple(value)

    @property
    def regular(self):
        return not self.optimization and not self.ensemble_train and \
            not self.ensemble_test

    def _process_special_args(self):
        if "--frontend" in sys.argv:
            try:
                self._open_frontend()
            except KeyboardInterrupt:
                return Main.EXIT_FAILURE
            return self._process_special_args()
        if self.interactive:
            for opt in "forge", "--version", "--help", "--dump-config":
                if opt in self.argv:
                    raise ValueError(
                        "\"%s\" is not supported in interactive mode" % opt)
            return None
        if len(sys.argv) > 1 and sys.argv[1] == "forge":
            from veles.forge.forge_client import __run__ as forge_run
            del sys.argv[1]
            action = sys.argv[1]
            try:
                forge_run()
                return Main.EXIT_SUCCESS
            except Exception as e:
                if isinstance(e, SystemExit):
                    raise from_none(e)
                self.exception("Failed to run forge %s", action)
                return Main.EXIT_FAILURE
        if "--version" in sys.argv:
            self._print_version()
            return Main.EXIT_SUCCESS
        if "--html-help" in sys.argv:
            veles.__html__()
            return Main.EXIT_SUCCESS
        if "--help" in sys.argv:
            # help text requires UTF-8, but the default codec is ascii over ssh
            Logger.ensure_utf8_streams()
        if "--dump-config" in sys.argv:
            self.info("Scanning for the plugins...")
            self.debug("Loaded plugins: %s", veles.__plugins__)
            root.print_()
            return Main.EXIT_SUCCESS
        return None

    def _open_frontend(self):
        from multiprocessing import Process, SimpleQueue

        connection = SimpleQueue()
        frontend = Process(
            target=self._open_frontend_process,
            args=(connection, [k for k in sys.argv[1:] if k != "--frontend"]))
        frontend.start()
        cmdline = connection.get()
        frontend.join()
        if self.interactive:
            argv_backup = list(sys.argv)
        sys.argv[1:] = cmdline.split()
        Main.setup_argv(True, True)
        if self.interactive:
            sys.argv = argv_backup
        print("Running with the following command line: %s" % sys.argv)

    def _open_frontend_process(self, connection, argv):
        if not os.path.exists(
                os.path.join(root.common.web.root, "frontend.html")):
            self.info("frontend.html was not found, generating it...")
            from veles.scripts.generate_frontend import main

            main()
            gc.collect()

        from random import randint

        port = randint(1024, 65535)
        self.info("Launching the web server on localhost:%d...", port)

        from tornado.escape import json_decode
        from tornado.ioloop import IOLoop
        import tornado.web as web

        cmdline = [""]

        class CmdlineHandler(web.RequestHandler):
            def post(self):
                try:
                    data = json_decode(self.request.body)
                    cmdline[0] = data.get("cmdline")
                    IOLoop.instance().add_callback(IOLoop.instance().stop)
                except:
                    self.exception("Frontend cmdline POST")

        class FrontendHandler(web.RequestHandler):
            def get(self):
                self.render("frontend.html", cmdline=" ".join(argv))

        app = web.Application(
            [("/cmdline", CmdlineHandler),
             (r"/((js|css|fonts|img|maps)/.*)", web.StaticFileHandler, {
                 'path': root.common.web.root
             }), (r"/frontend\.html", FrontendHandler),
             ("/", web.RedirectHandler, {
                 "url": "/frontend.html",
                 "permanent": True
             }),
             ("", web.RedirectHandler, {
                 "url": "/frontend.html",
                 "permanent": True
             })],
            template_path=root.common.web.root)
        app.listen(port)

        def open_browser():
            from veles.portable import show_file

            show_file("http://localhost:%d" % port)

        IOLoop.instance().add_callback(open_browser)
        try:
            IOLoop.instance().start()
        except KeyboardInterrupt:
            sys.stderr.write("KeyboardInterrupt\n")
        finally:
            connection.put(cmdline[0])

    def _parse_optimization(self, args):
        if args.optimize is None:
            return
        optparsed = args.optimize.split(':')
        if len(optparsed) > 2:
            raise ValueError("Invalid --optimize value: %s" % args.optimize)
        try:
            self.optimization = int(optparsed[0]), \
                int(optparsed[1]) if len(optparsed) == 2 else None
        except ValueError:
            raise from_none(
                ValueError("\"%s\" is not a valid --optimize value" %
                           args.optimize))

    def _parse_ensemble_train(self, args):
        if args.ensemble_train is None:
            return

        optparsed = args.ensemble_train.split(":")
        if len(optparsed) != 2:
            raise ValueError("--ensemble-train must be specified as"
                             "<number of instances>:<training set ratio>")
        try:
            self.ensemble_train = int(optparsed[0]), float(optparsed[1])
        except ValueError:
            raise from_none(
                "Failed to parse ensemble parameters from (%s, %s)" %
                optparsed)

    def _parse_ensemble_test(self, args):
        if args.ensemble_test is None:
            return
        if self.ensemble_train is not None:
            raise ValueError(
                "--ensemble-train and --ensemble-test may not be used "
                "together")
        self.ensemble_test = args.ensemble_test

    def _daemonize(self):
        daemon_context = daemon.DaemonContext()
        daemon_context.working_directory = os.getcwd()
        daemon_context.files_preserve = [
            int(fd) for fd in os.listdir("/proc/self/fd") if int(fd) > 2
        ]
        daemon_context.open()  # <- the magic happens here

    @staticmethod
    def _get_interactive_locals():
        """
        If we are running under IPython, extracts the local variables;
        otherwise, returns an empty dict.
        """
        try:
            __IPYTHON__  # pylint: disable=E0602
            from IPython.core.interactiveshell import InteractiveShell
            return {
                k: v
                for k, v in InteractiveShell.instance().user_ns.items()
                if k[0] != '_' and k not in ("Out", "In", "exit", "quit",
                                             "get_ipython")
            }
        except NameError:
            return {}

    def _load_model(self, fname_workflow):
        self.info("Loading workflow \"%s\"...", fname_workflow)
        self.load_called = False
        self.main_called = False
        package_name, module_name = get_file_package_and_module(fname_workflow)
        try:
            return import_file_as_package(fname_workflow)
        except Exception as e:
            self.debug(
                "Failed to import \"%s\" through the parent package "
                "\"%s\": %s", fname_workflow, package_name, e)
            package_import_error = e
        # We failed to load the package => try module approach
        try:
            return import_file_as_module(fname_workflow)
        except FileNotFoundError:
            self.exception("Workflow does not exist: \"%s\"", fname_workflow)
            sys.exit(errno.ENOENT)
        except IsADirectoryError:
            self.exception("Workflow \"%s\" is a directory", fname_workflow)
            sys.exit(errno.EISDIR)
        except PermissionError:
            self.exception("Cannot read workflow \"%s\"", fname_workflow)
            sys.exit(errno.EACCES)
        except Exception as e:
            self.error(
                "Failed to load the workflow \"%s\".\n"
                "Package import error: %s\nModule import error: %s",
                fname_workflow, package_import_error, e)
            sys.exit(Main.EXIT_FAILURE)

    def _apply_config(self, fname_config):
        if not self.config_file:
            self.warning("Configuration path is empty")
            return

        def fail():
            self.exception("Failed to apply the configuration \"%s\"",
                           fname_config)
            sys.exit(Main.EXIT_FAILURE)

        self.info("Applying the configuration from %s...", fname_config)
        try:
            runpy.run_path(fname_config)
        except FileNotFoundError:
            self.exception("Configuration does not exist: \"%s\"",
                           fname_config)
            sys.exit(errno.ENOENT)
        except IsADirectoryError:
            self.exception("Configuration \"%s\" is a directory", fname_config)
            sys.exit(errno.EISDIR)
        except PermissionError:
            self.exception("Cannot read configuration \"%s\"", fname_config)
            sys.exit(errno.EACCES)
        except TypeError as e:
            self.debug("Filed to import \"%s\": %s -> assumed pickle",
                       fname_config, e)
            from veles.pickle2 import pickle
            try:
                with open(fname_config, "rb") as fin:
                    cfg = pickle.load(fin)
            except:
                fail()
            for subcfg in cfg:
                root[subcfg].update(cfg[subcfg])
        except SyntaxError as e:
            self.debug("Filed to import \"%s\": %s -> assumed json",
                       fname_config, e)
            import json
            try:
                with open(fname_config, "r") as fin:
                    cfg = json.load(fin)
            except:
                fail()
            for subcfg in cfg:
                root[subcfg].update(cfg[subcfg])
        except:
            fail()

    def _override_config(self, config_list):
        cmds = "\n".join(config_list)
        self.debug("Overriding the configuration with %s", cmds)
        try:
            exec(cmds)
        except:
            self.exception("Invalid configuration overloads")
            sys.exit(Main.EXIT_FAILURE)

    def _seed_random(self, rndvals):
        self.debug("Seeding with %s", rndvals)
        rndvals_split = rndvals.split(',')
        seeds = []
        for rndval, index in zip(rndvals_split, range(len(rndvals_split))):
            try:
                binvle = binascii.unhexlify(rndval)
                seed = numpy.frombuffer(binvle, dtype=numpy.uint8)
                prng.get(index + 1).seed(seed, dtype=numpy.uint8)
                seeds.append(seed)
                continue
            except (binascii.Error, TypeError):
                pass
            vals = rndval.split(':')
            fname = vals[0]
            if fname == "":
                if index > 1:
                    fname = rndvals_split[0].split(':')[0] + str(index)
                else:
                    self.critical("Random generator file name is empty")
                    sys.exit(errno.ENOENT)
            if fname == "-":
                seeds.append(None)
                try:
                    prng.get(index + 1).seed(None)
                except:
                    self.exception(
                        "Failed to seed the random generator %d "
                        "with the last used seed.", index + 1)
                    sys.exit(Main.EXIT_FAILURE)
                continue
            if not os.path.isabs(fname):
                new_fname = os.path.abspath(fname)
                if os.path.exists(new_fname):
                    fname = new_fname
                else:
                    fname = os.path.join(root.common.dirs.veles, fname)
                    if not os.path.exists(fname):
                        self.critical(
                            "Neither %s nor %s exist. Cannot seed "
                            "the random generator.", new_fname, fname)
                        sys.exit(errno.ENOENT)
            count = int(vals[1]) if len(vals) > 1 else 16
            dtype = numpy.dtype(vals[2]) if len(vals) > 2 else numpy.int32

            self.debug("Seeding with %d samples of type %s from %s to %d",
                       count, dtype, fname, index + 1)
            try:
                seed = (numpy.fromfile(fname, dtype=dtype, count=count))
                prng.get(index + 1).seed(seed, dtype=dtype)
                seeds.append(seed)
            except:
                self.exception("Failed to seed the random generator with %s",
                               fname)
                sys.exit(Main.EXIT_FAILURE)
        self.seeds = seeds

    def _load_workflow(self, fname_snapshot):
        stype = splittype(fname_snapshot)[0]
        if stype == "odbc":
            import pyodbc

            addr = fname_snapshot[7:]
            parsed = addr.split('&')
            try:
                odbc, table, id_, log_id = parsed[:4]
            except TypeError:
                self.warning("Invalid ODBC source format. Here is the "
                             "template: odbc://<odbc data source spec>&"
                             "<table>&<id>&<log id>[&<optional name>]\n"
                             "<table> and <log id> may be empty (\"veles\" and"
                             " <id> value will be used).")
                return None
            if not table:
                table = "veles"
            if not log_id:
                log_id = id_
            if len(parsed) > 4:
                if len(parsed) > 5:
                    self.warning("Invalid ODBC source format")
                    return None
                name = parsed[-1]
            else:
                name = None
            try:
                return SnapshotterToDB.import_(odbc, table, id_, log_id, name)
            except pyodbc.Error as e:
                self.warning(
                    "Failed to load the snapshot from ODBC source: %s", e)
                return None
        elif stype in ("http", "https"):
            try:
                self.info("Downloading %s...", fname_snapshot)
                fname_snapshot = self.snapshot_file_name = wget.download(
                    fname_snapshot, root.common.dirs.snapshots)
                print()
                sys.stdout.flush()
            except:
                self.exception("Failed to fetch the snapshot at \"%s\"",
                               fname_snapshot)
                return None
        try:
            return SnapshotterToFile.import_(fname_snapshot)
        except FileNotFoundError:
            if fname_snapshot.strip() != "":
                self.warning("Workflow snapshot %s does not exist",
                             fname_snapshot)
            return None

    def _load(self, Workflow, **kwargs):
        self.debug("load() was called from run(), workflow class is %s",
                   str(Workflow))
        self.load_called = True
        try:
            self.launcher = Launcher(self.interactive)
            self.launcher.workflow_file = self.workflow_file
            self.launcher.config_file = self.config_file
            self.launcher.seeds = self.seeds
        except:
            self.exception("Failed to create the launcher")
            sys.exit(Main.EXIT_FAILURE)
        try:
            self.workflow = self._load_workflow(self.snapshot_file_name)
            snapshot = self.workflow is not None
            if not snapshot:
                wfkw = self._get_interactive_locals()
                wfkw.update(kwargs)
                self.workflow = Workflow(self.launcher, **wfkw)
                self.info("Created %s", self.workflow)
            else:
                self.info("Loaded the workflow snapshot from %s: %s",
                          self.snapshot_file_name, self.workflow)
                if self._visualization_mode:
                    self.workflow.plotters_are_enabled = True
                self.workflow.workflow = self.launcher
        except:
            self.exception("Failed to create the workflow")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)
        if self._workflow_graph:
            self.workflow.generate_graph(filename=self._workflow_graph,
                                         with_data_links=True,
                                         background='white')
        return self.workflow, snapshot

    def _main(self, **kwargs):
        if self._dry_run < 2:
            self.launcher.stop()
            return
        self.debug("main() was called from run()")
        if not self.load_called:
            self.critical("Call load() first in run()")
            sys.exit(Main.EXIT_FAILURE)
        self.main_called = True
        kwargs["no_device"] = not self.regular

        try:
            self.launcher.initialize(**kwargs)
        except:
            self.exception("Failed to initialize the launcher.")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)

        self.debug("Initialization is complete")
        try:
            if self._dump_attrs != "no":
                self._dump_unit_attributes(self._dump_attrs == "all")
            gc.collect()
            if self._dry_run > 2:
                self.debug("Running the launcher")
                self.launcher.run()
            elif self._visualization_mode:
                self.debug("Visualizing the workflow...")
                self._visualize_workflow()
        except:
            self.exception("Failed to run the workflow")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)
        finally:
            self.launcher.device_thread_pool_detach()

    def _dump_unit_attributes(self, arrays=True):
        import veles.external.prettytable as prettytable
        from veles.workflow import Workflow

        self.debug("Dumping unit attributes of %s...", str(self.workflow))
        table = prettytable.PrettyTable("#", "unit", "attr", "value")
        table.align["#"] = "r"
        table.align["unit"] = "l"
        table.align["attr"] = "l"
        table.align["value"] = "l"
        table.max_width["value"] = 100
        for i, u in enumerate(self.workflow.units_in_dependency_order):
            for k, v in sorted(u.__dict__.items()):
                if k not in Workflow.HIDDEN_UNIT_ATTRS:
                    if (not arrays and hasattr(v, "__len__") and len(v) > 32
                            and not isinstance(v, str)
                            and not isinstance(v, bytes)):
                        strv = "object of class %s of length %d" % (repr(
                            v.__class__.__name__), len(v))
                    else:
                        strv = repr(v)
                    table.add_row(i, u.__class__.__name__, k, strv)
        print(table)

    def _visualize_workflow(self):
        _, file_name = self.workflow.generate_graph(with_data_links=True,
                                                    background='white')
        from veles.portable import show_file

        show_file(file_name)

        import signal

        self.launcher.graphics_client.send_signal(signal.SIGUSR2)
        from twisted.internet import reactor

        reactor.callWhenRunning(self._run_workflow_plotters)
        reactor.callWhenRunning(print_, "Press Ctrl-C when you are done...")
        reactor.run()

    def _run_workflow_plotters(self):
        from veles.plotter import Plotter

        for unit in self.workflow:
            if isinstance(unit, Plotter):
                unit.run()
        # Second loop is needed to finish with PDF
        for unit in self.workflow:
            if isinstance(unit, Plotter):
                unit.last_run_time = 0
                unit.run()
                break

    def _run_core(self, wm):
        if self._dry_run <= 0:
            return
        if not self.optimization:
            from veles.genetics import fix_config
            fix_config(root)
        if self.regular:
            self.run_module(wm)
        elif self.optimization is not None:
            import veles.genetics.optimization_workflow as workflow
            self.run_module(workflow, model=wm, **self.optimization.__dict__)
        elif self.ensemble_train is not None:
            import veles.ensemble.model_workflow as workflow
            self.run_module(workflow, model=wm, **self.ensemble_train.__dict__)
        elif self.ensemble_test is not None:
            import veles.ensemble.test_workflow as workflow
            self.run_module(workflow, model=wm, **self.ensemble_test.__dict__)
        else:
            raise NotImplementedError("Unsupported execution mode")

    def _apply_args(self, args):
        if args.background:
            self._daemonize()
        if not args.workflow:
            raise ValueError("Workflow path may not be empty")
        config_file = args.config
        if config_file == "-":
            config_file = "%s_config%s" % os.path.splitext(args.workflow)
        self.workflow_file = os.path.abspath(args.workflow)
        self.config_file = os.path.abspath(config_file) if config_file else ""
        self._visualization_mode = args.visualize
        self._workflow_graph = args.workflow_graph
        self._dry_run = Main.DRY_RUN_CHOICES.index(args.dry_run)
        self._dump_attrs = args.dump_unit_attributes
        self.snapshot_file_name = args.snapshot
        self._parse_optimization(args)
        self._parse_ensemble_train(args)
        self._parse_ensemble_test(args)

    def _print_logo(self, args):
        if not args.no_logo:
            try:
                print(Main.LOGO)
            except:
                print(Main.LOGO.replace("©", "(c)"))
            sys.stdout.flush()

    def _print_version(self):
        print(veles.__version__, formatdate(veles.__date__, True),
              veles.__git__)

    def _print_config(self, cfg):
        io = StringIO()
        cfg.print_(file=io)
        self.debug("\n%s", io.getvalue().strip())

    def setup_logging(self, verbosity):
        try:
            super(Main, self).setup_logging(Main.LOG_LEVEL_MAP[verbosity])
        except Logger.LoggerHasBeenAlreadySetUp as e:
            if not self.interactive:
                raise from_none(e)

    def _register_print_max_rss(self):
        if not Main.registered_print_max_rss:
            atexit.register(self.print_max_rss)
            Main.registered_print_max_rss = True

    @staticmethod
    def format_decimal(val):
        if val < 1000:
            return str(val)
        d, m = divmod(val, 1000)
        return Main.format_decimal(d) + " %03d" % m

    def print_max_rss(self):
        res = resource.getrusage(resource.RUSAGE_SELF)
        if Watcher.max_mem_in_use > 0:
            self.info("Peak device memory used: %s Kb",
                      self.format_decimal(Watcher.max_mem_in_use // 1000))
        self.info("Peak resident memory used: %s Kb",
                  self.format_decimal(res.ru_maxrss))

    def run_module(self, module, **kwargs):
        self.debug("Calling %s.run()...", module.__name__)
        module.run(self._load, self._main, **kwargs)
        if not self.main_called and self._dry_run > 2:
            self.warning("main() was not called by run() in %s",
                         module.__file__)

    """
    Basically, this is what each workflow module's run() should do.
    """

    def run_workflow(self, Workflow, kwargs_load=None, kwargs_main=None):
        # we should not set "{}" as default values because of the way
        # default values work: the dicts will be reused, not recreated
        if kwargs_load is None:
            kwargs_load = {}
        if kwargs_main is None:
            kwargs_main = {}
        self._load(Workflow, **kwargs_load)
        self._main(**kwargs_main)

    def run(self):
        """Entry point for the VELES execution engine.
        """
        veles.validate_environment()

        ret = self._process_special_args()
        if ret is not None:
            return ret
        parser = Main.init_parser()
        args = parser.parse_args(self.argv)
        self._apply_args(args)

        self.setup_logging(args.verbosity)
        self._print_logo(args)
        for name in filter(str.strip, args.debug.split(',')):
            logging.getLogger(name).setLevel(logging.DEBUG)
        self._seed_random(args.random_seed)
        if args.debug_pickle:
            setup_pickle_debug()
        ThreadPool.reset()
        self._register_print_max_rss()

        if self.logger.isEnabledFor(logging.DEBUG):
            self._print_config(root)
        wm = self._load_model(self.workflow_file)
        self._apply_config(self.config_file)
        self._override_config(args.config_list)
        if self.logger.isEnabledFor(logging.DEBUG):
            self._print_config(root)

        self._run_core(wm)

        if not self.interactive:
            self.info("End of job")
        else:
            self.info("\033[1;35mReturned the control\033[0m")
        return Main.EXIT_SUCCESS
Пример #4
0
class Main(Logger, CommandLineBase):
    """
    Entry point of any VELES engine executions.
    """

    EXIT_SUCCESS = 0
    EXIT_FAILURE = 1
    registered_print_max_rss = False

    def __init__(self, interactive=False, *args, **kwargs):
        Main.setup_argv(not interactive, True, *args, **kwargs)
        super(Main, self).__init__()
        self._interactive = interactive
        self._ensemble_train = None
        self._ensemble_test = None
        self._optimization = None

    @property
    def interactive(self):
        return self._interactive

    @property
    def optimization(self):
        return self._optimization

    @optimization.setter
    def optimization(self, value):
        if value is None:
            self._optimization = None
            return
        if not isinstance(value, tuple) or len(value) != 2:
            raise TypeError(
                "Invalid optimization value: expected a tuple of length 2, got"
                " %s" % type(value))
        if not isinstance(value[0], int) or \
                not isinstance(value[1], (int, type(None))):
            raise TypeError("Invalid optimization value types: %s", value)
        if value[0] < 5:
            raise ValueError("Population size may not be less than 5")
        if value[1] is not None and value[1] < 1:
            raise ValueError("At least 1 generation must be set")
        self._optimization = OptimizationTuple(*value)

    @property
    def ensemble_train(self):
        return self._ensemble_train

    @ensemble_train.setter
    def ensemble_train(self, value):
        if value is None:
            self._ensemble_train = None
            return
        if not isinstance(value, tuple) or len(value) != 2 or \
                not isinstance(value[0], int) or \
                not isinstance(value[1], (float, int)):
            raise ValueError("Invalid ensemble_train value")
        ratio = value[1]
        if ratio <= 0 or ratio > 1:
            raise ValueError(
                "Training set ratio must be in (0, 1] (got %s)" % ratio)
        self._ensemble_train = EnsembleTrainTuple(*value)

    @property
    def ensemble_test(self):
        return self._ensemble_test

    @ensemble_test.setter
    def ensemble_test(self, value):
        if value is None:
            self._ensemble_test = None
            return
        if not isinstance(value, string_types):
            raise TypeError(
                "ensemble_test must be a string (got %s)" % type(value))
        self._ensemble_test = EnsembleTestTuple(value)

    @property
    def regular(self):
        return not self.optimization and not self.ensemble_train and \
            not self.ensemble_test

    def _process_special_args(self):
        if "--frontend" in sys.argv:
            try:
                self._open_frontend()
            except KeyboardInterrupt:
                return Main.EXIT_FAILURE
            return self._process_special_args()
        if self.interactive:
            for opt in "forge", "--version", "--help", "--dump-config":
                if opt in self.argv:
                    raise ValueError(
                        "\"%s\" is not supported in interactive mode" % opt)
            return None
        if len(sys.argv) > 1 and sys.argv[1] == "forge":
            from veles.forge.forge_client import __run__ as forge_run
            del sys.argv[1]
            action = sys.argv[1]
            try:
                forge_run()
                return Main.EXIT_SUCCESS
            except Exception as e:
                if isinstance(e, SystemExit):
                    raise from_none(e)
                self.exception("Failed to run forge %s", action)
                return Main.EXIT_FAILURE
        if "--version" in sys.argv:
            self._print_version()
            return Main.EXIT_SUCCESS
        if "--html-help" in sys.argv:
            veles.__html__()
            return Main.EXIT_SUCCESS
        if "--help" in sys.argv:
            # help text requires UTF-8, but the default codec is ascii over ssh
            Logger.ensure_utf8_streams()
        if "--dump-config" in sys.argv:
            self.info("Scanning for the plugins...")
            self.debug("Loaded plugins: %s", veles.__plugins__)
            root.print_()
            return Main.EXIT_SUCCESS
        return None

    def _open_frontend(self):
        from multiprocessing import Process, SimpleQueue

        connection = SimpleQueue()
        frontend = Process(
            target=self._open_frontend_process,
            args=(connection, [k for k in sys.argv[1:] if k != "--frontend"]))
        frontend.start()
        cmdline = connection.get()
        frontend.join()
        if self.interactive:
            argv_backup = list(sys.argv)
        sys.argv[1:] = cmdline.split()
        Main.setup_argv(True, True)
        if self.interactive:
            sys.argv = argv_backup
        print("Running with the following command line: %s" % sys.argv)

    def _open_frontend_process(self, connection, argv):
        if not os.path.exists(os.path.join(root.common.web.root,
                                           "frontend.html")):
            self.info("frontend.html was not found, generating it...")
            from veles.scripts.generate_frontend import main

            main()
            gc.collect()

        from random import randint

        port = randint(1024, 65535)
        self.info("Launching the web server on localhost:%d...", port)

        from tornado.escape import json_decode
        from tornado.ioloop import IOLoop
        import tornado.web as web

        cmdline = [""]

        class CmdlineHandler(web.RequestHandler):
            def post(self):
                try:
                    data = json_decode(self.request.body)
                    cmdline[0] = data.get("cmdline")
                    IOLoop.instance().add_callback(IOLoop.instance().stop)
                except:
                    self.exception("Frontend cmdline POST")

        class FrontendHandler(web.RequestHandler):
            def get(self):
                self.render("frontend.html", cmdline=" ".join(argv))

        app = web.Application([
            ("/cmdline", CmdlineHandler),
            (r"/((js|css|fonts|img|maps)/.*)",
             web.StaticFileHandler, {'path': root.common.web.root}),
            (r"/frontend\.html", FrontendHandler),
            ("/", web.RedirectHandler, {"url": "/frontend.html",
                                        "permanent": True}),
            ("", web.RedirectHandler, {"url": "/frontend.html",
                                       "permanent": True})
        ], template_path=root.common.web.root)
        app.listen(port)

        def open_browser():
            from veles.portable import show_file

            show_file("http://localhost:%d" % port)

        IOLoop.instance().add_callback(open_browser)
        try:
            IOLoop.instance().start()
        except KeyboardInterrupt:
            sys.stderr.write("KeyboardInterrupt\n")
        finally:
            connection.put(cmdline[0])

    def _parse_optimization(self, args):
        if args.optimize is None:
            return
        optparsed = args.optimize.split(':')
        if len(optparsed) > 2:
            raise ValueError("Invalid --optimize value: %s" % args.optimize)
        try:
            self.optimization = int(optparsed[0]), \
                int(optparsed[1]) if len(optparsed) == 2 else None
        except ValueError:
            raise from_none(ValueError(
                "\"%s\" is not a valid --optimize value" % args.optimize))

    def _parse_ensemble_train(self, args):
        if args.ensemble_train is None:
            return

        optparsed = args.ensemble_train.split(":")
        if len(optparsed) != 2:
            raise ValueError(
                "--ensemble-train must be specified as"
                "<number of instances>:<training set ratio>")
        try:
            self.ensemble_train = int(optparsed[0]), float(optparsed[1])
        except ValueError:
            raise from_none(
                "Failed to parse ensemble parameters from (%s, %s)" %
                optparsed)

    def _parse_ensemble_test(self, args):
        if args.ensemble_test is None:
            return
        if self.ensemble_train is not None:
            raise ValueError(
                "--ensemble-train and --ensemble-test may not be used "
                "together")
        self.ensemble_test = args.ensemble_test

    def _daemonize(self):
        daemon_context = daemon.DaemonContext()
        daemon_context.working_directory = os.getcwd()
        daemon_context.files_preserve = [
            int(fd) for fd in os.listdir("/proc/self/fd")
            if int(fd) > 2]
        daemon_context.open()  # <- the magic happens here

    @staticmethod
    def _get_interactive_locals():
        """
        If we are running under IPython, extracts the local variables;
        otherwise, returns an empty dict.
        """
        try:
            __IPYTHON__  # pylint: disable=E0602
            from IPython.core.interactiveshell import InteractiveShell
            return {k: v for k, v in
                    InteractiveShell.instance().user_ns.items()
                    if k[0] != '_' and k not in (
                        "Out", "In", "exit", "quit", "get_ipython")}
        except NameError:
            return {}

    def _load_model(self, fname_workflow):
        self.info("Loading workflow \"%s\"...", fname_workflow)
        self.load_called = False
        self.main_called = False
        package_name, module_name = get_file_package_and_module(
            fname_workflow)
        try:
            return import_file_as_package(fname_workflow)
        except Exception as e:
            self.debug("Failed to import \"%s\" through the parent package "
                       "\"%s\": %s", fname_workflow, package_name, e)
            package_import_error = e
        # We failed to load the package => try module approach
        try:
            return import_file_as_module(fname_workflow)
        except FileNotFoundError:
            self.exception("Workflow does not exist: \"%s\"", fname_workflow)
            sys.exit(errno.ENOENT)
        except IsADirectoryError:
            self.exception("Workflow \"%s\" is a directory", fname_workflow)
            sys.exit(errno.EISDIR)
        except PermissionError:
            self.exception("Cannot read workflow \"%s\"", fname_workflow)
            sys.exit(errno.EACCES)
        except Exception as e:
            self.error("Failed to load the workflow \"%s\".\n"
                       "Package import error: %s\nModule import error: %s",
                       fname_workflow, package_import_error, e)
            sys.exit(Main.EXIT_FAILURE)

    def _apply_config(self, fname_config):
        if not self.config_file:
            self.warning("Configuration path is empty")
            return

        def fail():
            self.exception("Failed to apply the configuration \"%s\"",
                           fname_config)
            sys.exit(Main.EXIT_FAILURE)

        self.info("Applying the configuration from %s...", fname_config)
        try:
            runpy.run_path(fname_config)
        except FileNotFoundError:
            self.exception("Configuration does not exist: \"%s\"",
                           fname_config)
            sys.exit(errno.ENOENT)
        except IsADirectoryError:
            self.exception("Configuration \"%s\" is a directory", fname_config)
            sys.exit(errno.EISDIR)
        except PermissionError:
            self.exception("Cannot read configuration \"%s\"", fname_config)
            sys.exit(errno.EACCES)
        except TypeError as e:
            self.debug("Filed to import \"%s\": %s -> assumed pickle",
                       fname_config, e)
            from veles.pickle2 import pickle
            try:
                with open(fname_config, "rb") as fin:
                    cfg = pickle.load(fin)
            except:
                fail()
            for subcfg in cfg:
                root[subcfg].update(cfg[subcfg])
        except SyntaxError as e:
            self.debug("Filed to import \"%s\": %s -> assumed json",
                       fname_config, e)
            import json
            try:
                with open(fname_config, "r") as fin:
                    cfg = json.load(fin)
            except:
                fail()
            for subcfg in cfg:
                root[subcfg].update(cfg[subcfg])
        except:
            fail()

    def _override_config(self, config_list):
        cmds = "\n".join(config_list)
        self.debug("Overriding the configuration with %s", cmds)
        try:
            exec(cmds)
        except:
            self.exception("Invalid configuration overloads")
            sys.exit(Main.EXIT_FAILURE)

    def _seed_random(self, rndvals):
        self.debug("Seeding with %s", rndvals)
        rndvals_split = rndvals.split(',')
        seeds = []
        for rndval, index in zip(rndvals_split, range(len(rndvals_split))):
            try:
                binvle = binascii.unhexlify(rndval)
                seed = numpy.frombuffer(binvle, dtype=numpy.uint8)
                prng.get(index + 1).seed(seed, dtype=numpy.uint8)
                seeds.append(seed)
                continue
            except (binascii.Error, TypeError):
                pass
            vals = rndval.split(':')
            fname = vals[0]
            if fname == "":
                if index > 1:
                    fname = rndvals_split[0].split(':')[0] + str(index)
                else:
                    self.critical("Random generator file name is empty")
                    sys.exit(errno.ENOENT)
            if fname == "-":
                seeds.append(None)
                try:
                    prng.get(index + 1).seed(None)
                except:
                    self.exception("Failed to seed the random generator %d "
                                   "with the last used seed.", index + 1)
                    sys.exit(Main.EXIT_FAILURE)
                continue
            if not os.path.isabs(fname):
                new_fname = os.path.abspath(fname)
                if os.path.exists(new_fname):
                    fname = new_fname
                else:
                    fname = os.path.join(root.common.dirs.veles, fname)
                    if not os.path.exists(fname):
                        self.critical("Neither %s nor %s exist. Cannot seed "
                                      "the random generator.", new_fname,
                                      fname)
                        sys.exit(errno.ENOENT)
            count = int(vals[1]) if len(vals) > 1 else 16
            dtype = numpy.dtype(vals[2]) if len(vals) > 2 else numpy.int32

            self.debug("Seeding with %d samples of type %s from %s to %d",
                       count, dtype, fname, index + 1)
            try:
                seed = (numpy.fromfile(fname, dtype=dtype, count=count))
                prng.get(index + 1).seed(seed, dtype=dtype)
                seeds.append(seed)
            except:
                self.exception("Failed to seed the random generator with %s",
                               fname)
                sys.exit(Main.EXIT_FAILURE)
        self.seeds = seeds

    def _load_workflow(self, fname_snapshot):
        stype = splittype(fname_snapshot)[0]
        if stype == "odbc":
            import pyodbc

            addr = fname_snapshot[7:]
            parsed = addr.split('&')
            try:
                odbc, table, id_, log_id = parsed[:4]
            except TypeError:
                self.warning("Invalid ODBC source format. Here is the "
                             "template: odbc://<odbc data source spec>&"
                             "<table>&<id>&<log id>[&<optional name>]\n"
                             "<table> and <log id> may be empty (\"veles\" and"
                             " <id> value will be used).")
                return None
            if not table:
                table = "veles"
            if not log_id:
                log_id = id_
            if len(parsed) > 4:
                if len(parsed) > 5:
                    self.warning("Invalid ODBC source format")
                    return None
                name = parsed[-1]
            else:
                name = None
            try:
                return SnapshotterToDB.import_(odbc, table, id_, log_id, name)
            except pyodbc.Error as e:
                self.warning(
                    "Failed to load the snapshot from ODBC source: %s", e)
                return None
        elif stype in ("http", "https"):
            try:
                self.info("Downloading %s...", fname_snapshot)
                fname_snapshot = self.snapshot_file_name = wget.download(
                    fname_snapshot, root.common.dirs.snapshots)
                print()
                sys.stdout.flush()
            except:
                self.exception("Failed to fetch the snapshot at \"%s\"",
                               fname_snapshot)
                return None
        try:
            return SnapshotterToFile.import_(fname_snapshot)
        except FileNotFoundError:
            if fname_snapshot.strip() != "":
                self.warning("Workflow snapshot %s does not exist",
                             fname_snapshot)
            return None

    def _load(self, Workflow, **kwargs):
        self.debug("load() was called from run(), workflow class is %s",
                   str(Workflow))
        self.load_called = True
        try:
            self.launcher = Launcher(self.interactive)
            self.launcher.workflow_file = self.workflow_file
            self.launcher.config_file = self.config_file
            self.launcher.seeds = self.seeds
        except:
            self.exception("Failed to create the launcher")
            sys.exit(Main.EXIT_FAILURE)
        try:
            self.workflow = self._load_workflow(self.snapshot_file_name)
            self.snapshot = self.workflow is not None
            if not self.snapshot:
                wfkw = self._get_interactive_locals()
                wfkw.update(kwargs)
                self.workflow = Workflow(self.launcher, **wfkw)
                self.info("Created %s", self.workflow)
            else:
                self.info("Loaded the workflow snapshot from %s: %s",
                          self.snapshot_file_name, self.workflow)
                if self._visualization_mode:
                    self.workflow.plotters_are_enabled = True
                self.workflow.workflow = self.launcher
        except:
            self.exception("Failed to create the workflow")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)
        if self._workflow_graph:
            self.workflow.generate_graph(filename=self._workflow_graph,
                                         with_data_links=True,
                                         background='white')
        return self.workflow, self.snapshot

    def _main(self, **kwargs):
        if self._dry_run < 2:
            self.launcher.stop()
            return
        self.debug("main() was called from run()")
        if not self.load_called:
            self.critical("Call load() first in run()")
            sys.exit(Main.EXIT_FAILURE)
        self.main_called = True
        kwargs["snapshot"] = self.snapshot
        kwargs["no_device"] = not self.regular

        try:
            self.launcher.initialize(**kwargs)
        except:
            self.exception("Failed to initialize the launcher.")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)

        self.debug("Initialization is complete")
        try:
            if self._dump_attrs != "no":
                self._dump_unit_attributes(self._dump_attrs == "all")
            gc.collect()
            if self._dry_run > 2:
                self.debug("Running the launcher")
                self.launcher.run()
            elif self._visualization_mode:
                self.debug("Visualizing the workflow...")
                self._visualize_workflow()
        except:
            self.exception("Failed to run the workflow")
            self.launcher.stop()
            sys.exit(Main.EXIT_FAILURE)
        finally:
            self.launcher.device_thread_pool_detach()

    def _dump_unit_attributes(self, arrays=True):
        import veles.external.prettytable as prettytable
        from veles.workflow import Workflow

        self.debug("Dumping unit attributes of %s...", str(self.workflow))
        table = prettytable.PrettyTable("#", "unit", "attr", "value")
        table.align["#"] = "r"
        table.align["unit"] = "l"
        table.align["attr"] = "l"
        table.align["value"] = "l"
        table.max_width["value"] = 100
        for i, u in enumerate(self.workflow.units_in_dependency_order):
            for k, v in sorted(u.__dict__.items()):
                if k not in Workflow.HIDDEN_UNIT_ATTRS:
                    if (not arrays and hasattr(v, "__len__") and len(v) > 32
                            and not isinstance(v, str)
                            and not isinstance(v, bytes)):
                        strv = "object of class %s of length %d" % (
                            repr(v.__class__.__name__), len(v))
                    else:
                        strv = repr(v)
                    table.add_row(i, u.__class__.__name__, k, strv)
        print(table)

    def _visualize_workflow(self):
        _, file_name = self.workflow.generate_graph(with_data_links=True,
                                                    background='white')
        from veles.portable import show_file

        show_file(file_name)

        import signal

        self.launcher.graphics_client.send_signal(signal.SIGUSR2)
        from twisted.internet import reactor

        reactor.callWhenRunning(self._run_workflow_plotters)
        reactor.callWhenRunning(print_, "Press Ctrl-C when you are done...")
        reactor.run()

    def _run_workflow_plotters(self):
        from veles.plotter import Plotter

        for unit in self.workflow:
            if isinstance(unit, Plotter):
                unit.run()
        # Second loop is needed to finish with PDF
        for unit in self.workflow:
            if isinstance(unit, Plotter):
                unit.last_run_time = 0
                unit.run()
                break

    def _run_core(self, wm):
        if self._dry_run <= 0:
            return
        if not self.optimization:
            from veles.genetics import fix_config
            fix_config(root)
        if self.regular:
            self.run_module(wm)
        elif self.optimization is not None:
            import veles.genetics.optimization_workflow as workflow
            self.run_module(workflow, model=wm, **self.optimization.__dict__)
        elif self.ensemble_train is not None:
            import veles.ensemble.model_workflow as workflow
            self.run_module(workflow, model=wm, **self.ensemble_train.__dict__)
        elif self.ensemble_test is not None:
            import veles.ensemble.test_workflow as workflow
            self.run_module(workflow, model=wm, **self.ensemble_test.__dict__)
        else:
            raise NotImplementedError("Unsupported execution mode")

    def _apply_args(self, args):
        if args.background:
            self._daemonize()
        if not args.workflow:
            raise ValueError("Workflow path may not be empty")
        config_file = args.config
        if config_file == "-":
            config_file = "%s_config%s" % os.path.splitext(args.workflow)
        self.workflow_file = os.path.abspath(args.workflow)
        self.config_file = os.path.abspath(config_file) if config_file else ""
        self._visualization_mode = args.visualize
        self._workflow_graph = args.workflow_graph
        self._dry_run = Main.DRY_RUN_CHOICES.index(args.dry_run)
        self._dump_attrs = args.dump_unit_attributes
        self.snapshot_file_name = args.snapshot
        self._parse_optimization(args)
        self._parse_ensemble_train(args)
        self._parse_ensemble_test(args)

    def _print_logo(self, args):
        if not args.no_logo:
            try:
                print(Main.LOGO)
            except:
                print(Main.LOGO.replace("©", "(c)"))
            sys.stdout.flush()

    def _print_version(self):
        print(veles.__version__, formatdate(veles.__date__, True),
              veles.__git__)

    def _print_config(self, cfg):
        io = StringIO()
        cfg.print_(file=io)
        self.debug("\n%s", io.getvalue().strip())

    def setup_logging(self, verbosity):
        try:
            super(Main, self).setup_logging(Main.LOG_LEVEL_MAP[verbosity])
        except Logger.LoggerHasBeenAlreadySetUp as e:
            if not self.interactive:
                raise from_none(e)

    def _register_print_max_rss(self):
        if not Main.registered_print_max_rss:
            atexit.register(self.print_max_rss)
            Main.registered_print_max_rss = True

    @staticmethod
    def format_decimal(val):
        if val < 1000:
            return str(val)
        d, m = divmod(val, 1000)
        return Main.format_decimal(d) + " %03d" % m

    def print_max_rss(self):
        res = resource.getrusage(resource.RUSAGE_SELF)
        if Watcher.max_mem_in_use > 0:
            self.info("Peak device memory used: %s Kb",
                      self.format_decimal(Watcher.max_mem_in_use // 1000))
        self.info("Peak resident memory used: %s Kb",
                  self.format_decimal(res.ru_maxrss))

    def run_module(self, module, **kwargs):
        self.debug("Calling %s.run()...", module.__name__)
        module.run(self._load, self._main, **kwargs)
        if not self.main_called and self._dry_run > 2:
            self.warning("main() was not called by run() in %s",
                         module.__file__)

    """
    Basically, this is what each workflow module's run() should do.
    """

    def run_workflow(self, Workflow, kwargs_load=None, kwargs_main=None):
        # we should not set "{}" as default values because of the way
        # default values work: the dicts will be reused, not recreated
        if kwargs_load is None:
            kwargs_load = {}
        if kwargs_main is None:
            kwargs_main = {}
        self._load(Workflow, **kwargs_load)
        self._main(**kwargs_main)

    def run(self):
        """Entry point for the VELES execution engine.
        """
        veles.validate_environment()

        ret = self._process_special_args()
        if ret is not None:
            return ret
        parser = Main.init_parser()
        args = parser.parse_args(self.argv)
        self._apply_args(args)

        self.setup_logging(args.verbosity)
        self._print_logo(args)
        for name in filter(str.strip, args.debug.split(',')):
            logging.getLogger(name).setLevel(logging.DEBUG)
        self._seed_random(args.random_seed)
        if args.debug_pickle:
            setup_pickle_debug()
        ThreadPool.reset()
        self._register_print_max_rss()

        if self.logger.isEnabledFor(logging.DEBUG):
            self._print_config(root)
        wm = self._load_model(self.workflow_file)
        self._apply_config(self.config_file)
        self._override_config(args.config_list)
        if self.logger.isEnabledFor(logging.DEBUG):
            self._print_config(root)

        self._run_core(wm)

        if not self.interactive:
            self.info("End of job")
        else:
            self.info("\033[1;35mReturned the control\033[0m")
        return Main.EXIT_SUCCESS