Пример #1
0
 def list(self):
     om = get_omega(self.args)
     pattern = self.args.get('<pattern>')
     regexp = self.args.get('--regexp') or self.args.get('-E')
     raw = self.args.get('--raw')
     kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern)
     self.logger.info(om.models.list(raw=raw, **kwargs))
Пример #2
0
 def shell(self):
     om = get_omega(self.args)
     use_ipython = False
     try:
         import IPython
     except:
         self.logger.warn("you should pip install ipython for convenience")
     else:
         use_ipython = True
     # ipython
     if use_ipython:
         IPython.embed(header='omegaml is available as the om variable',
                       colors='neutral')
         return
     # default console
     import code
     try:
         import gnureadline
     except:
         self.logger.warn(
             "you should pip install gnureadline for convenience")
     variables = {}
     variables.update(locals())
     shell = code.InteractiveConsole(locals=variables)
     shell.interact()
Пример #3
0
 def get(self):
     om = get_omega(self.args)
     local = self.args['<path>']
     name = self.args['<name>']
     notebook = om.jobs.get(name)
     nbformat.write(notebook, local)
     self.logger.debug(local)
Пример #4
0
 def status(self):
     om = get_omega(self.args)
     labels = self.args.get('labels')
     stats = self.args.get('stats')
     workers = not (labels or stats)
     if workers:
         pprint(om.runtime.workers())
     elif labels:
         queues = om.runtime.queues()
         pprint({
             worker: [
                 q.get('name') for q in details
                 if not q.get('name').startswith('amq')
             ]
             for worker, details in queues.items()
         })
     elif stats:
         stats = om.runtime.stats()
         pprint({
             worker: {
                 'size': details['pool']['max-concurrency'],
                 'tasks':
                 {task: count
                  for task, count in details['total'].items()}
             }
             for worker, details in stats.items()
         })
Пример #5
0
 def do_import(self):
     om = get_omega(self.args)
     names = self.args.get('<prefix/name>')
     archive = Path(self.args.get('--path', './mlops-export'))
     dolist = self.args.get('--list')
     promote = self.args.get('--promote')
     pattern = '|'.join(names)
     if (not archive.is_file() and not archive.exists()
             and archive.parent.exists()):
         archives = list(archive.parent.glob(f'{archive.name}*'))
         if len(archives) > 1:
             archive = Path(
                 self.ask("Select an archive",
                          options=archives,
                          select=True,
                          default=1))
     assert archive.exists(), f"No mlops-export {archive} exists"
     self.print(f"Processing {archive}")
     if dolist:
         arc = OmegaExporter.archive(archive)
         self.print(list(arc.members))
     else:
         exp = OmegaExporter(om)
         arcfile = exp.from_archive(archive,
                                    pattern=pattern,
                                    progressfn=print,
                                    promote=promote)
         self.print("Imported objects:")
         self.print(arcfile)
Пример #6
0
    def result(self):
        from celery.result import AsyncResult

        om = get_omega(self.args)
        task_id = self.args.get('<taskid>')
        result = AsyncResult(task_id, app=om.runtime.celeryapp).get()
        self.logger.info(result)
Пример #7
0
 def shell(self):
     use_ipython = False
     command = self.args.get('<command>') or ''
     try:
         import IPython
     except:
         self.logger.warn("you should pip install ipython for convenience")
     else:
         use_ipython = True
     # ipython
     if use_ipython:
         c = Config()
         c.InteractiveShellApp.exec_lines = [
             'from omegaml.client.util import get_omega',
             'om = get_omega(shell_args)',
             'print("omegaml is available as the om variable")',
         ]
         c.TerminalIPythonApp.display_banner = False
         argv = f'-c {command}'.split(' ') if command else []
         IPython.start_ipython(argv,
                               config=c,
                               user_ns=dict(shell_args=self.args))
         return
     # default console
     import code
     om = get_omega(self.args)
     try:
         import gnureadline
     except:
         self.logger.warn(
             "you should pip install gnureadline for convenience")
     variables = {}
     variables.update(locals())
     shell = code.InteractiveConsole(locals=variables)
     shell.interact()
Пример #8
0
 def put(self):
     om = get_omega(self.args)
     local = self.args['<path>']
     name = self.args['<name>']
     replace = self.args['--replace']
     csvkwargs = self.parse_kwargs('--csv')
     # TODO introduce a puggable filetype processing backend to do this
     is_csv = self.args.get('--format') == 'csv' or anyext(local, '.csv')
     is_image = self.args.get('--format') == 'image' or anyext(
         local, '.png,.img,.bmp,.jpeg,.jpg,.tif,.tiff,.eps,.raw,.gif')
     is_binary = self.args.get('--format') == 'binary' or not (is_csv
                                                               or is_image)
     if is_csv:
         # csv formats
         om.datasets.read_csv(local, name, append=not replace, **csvkwargs)
         meta = om.datasets.metadata(name)
     elif is_image:
         # images
         from imageio import imread
         with smart_open.open(local, 'rb') as fin:
             img = imread(fin)
             meta = om.datasets.put(img, name)
     elif is_binary:
         with smart_open.open(local, 'rb') as fin:
             meta = om.datasets.put(fin, name, append=not replace)
     else:
         meta = om.datasets.put(local, name, append=not replace)
     self.logger.info(meta)
Пример #9
0
 def list(self):
     om = get_omega(self.args)
     raw = self.args.get('--raw', False)
     regexp = self.args.get('--regexp') or self.args.get('-E')
     pattern = self.args.get('<pattern>')
     kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern)
     entries = om.datasets.list(raw=raw, **kwargs)
     self.logger.info(entries)
Пример #10
0
 def list(self):
     om = get_omega(self.args)
     pattern = self.args.get('<pattern>')
     regexp = self.args.get('--regexp') or self.args.get('-E')
     raw = self.args.get('--raw')
     hidden = self.args.get('--hidden')
     kwargs = dict(regexp=pattern) if regexp else dict(pattern=pattern)
     store = getattr(om, self.command)
     self.logger.info(store.list(raw=raw, hidden=hidden, **kwargs))
Пример #11
0
    def put(self):
        from nbformat import read as nbread

        om = get_omega(self.args)
        local = self.args['<path>']
        name = self.args['<name>']
        with open(local, 'rb') as fin:
            nb = nbread(fin, as_version=4)
        self.logger.info(om.jobs.put(nb, name))
Пример #12
0
 def job(self):
     om = get_omega(self.args)
     name = self.args.get('<name>')
     async = self.args.get('--async')
     result = om.runtime.job(name).run()
     if not async:
         self.logger.info(result.get())
     else:
         self.logger.info(result.task_id)
Пример #13
0
 def job(self):
     om = get_omega(self.args)
     name = self.args.get('<name>')
     is_async = self.args.get('--async')
     label = self.args.get('--require')
     result = om.runtime.require(label).job(name).run()
     if not is_async:
         self.logger.info(result.get())
     else:
         self.logger.info(result.task_id)
Пример #14
0
 def script(self):
     om = get_omega(self.args)
     name = self.args.get('<name>')
     async = self.args.get('--async')
     kwargs = self.parse_kwargs('<kw=value>')
     result = om.runtime.script(name).run(**kwargs)
     if not async:
         self.logger.info(result.get())
     else:
         self.logger.info(result.task_id)
Пример #15
0
 def put(self):
     om = get_omega(self.args)
     script_path = self.args.get('<path>')
     name = self.args.get('<name>')
     if os.path.exists(script_path):
         name = name or os.path.basename(script_path)
         abs_path = os.path.abspath(script_path)
         meta = om.scripts.put('pkg://{}'.format(abs_path), name)
         self.logger.info(meta)
     else:
         raise ValueError('{} is not a valid path'.format(script_path))
Пример #16
0
 def log(self):
     import pandas as pd
     tail = self.args.get('-f')
     om = get_omega(self.args)
     if not tail:
         df = om.logger.dataset.get()
         with pd.option_context('display.max_rows', None,
                                'display.max_columns', None,
                                'display.max_colwidth', -1):
             print(df[['text']])
     else:
         om.logger.dataset.tail()
Пример #17
0
 def put(self):
     om = get_omega(self.args)
     modname = self.args.get('<module.callable>')
     name = self.args.get('<name>')
     modname, modelfn = modname.rsplit('.', maxsplit=1)
     try:
         mod = import_module(modname)
     except:
         raise
     modelfn = getattr(mod, modelfn)
     model = modelfn()
     self.logger.info(om.models.put(model, name))
Пример #18
0
 def status(self):
     om = get_omega(self.args)
     name = self.args.get('<name>')
     meta = om.jobs.metadata(name)
     attrs = meta.attributes
     runs = attrs.get('job_runs', [])
     run_at, triggers = om.jobs.get_schedule(name, only_pending=True)
     self.logger.info("Runs:")
     for run in runs:
         self.logger.info("  {ts} {status} ".format(**run))
     self.logger.info("Next scheduled runs:")
     for trigger in triggers:
         trigger['ts'] = trigger.get('ts', '')
         self.logger.info("  {ts} {status} {event-kind} {event}".format(**trigger))
Пример #19
0
 def serve(self):
     om = get_omega(self.args, require_config=False)
     specs = self.args.get('<rule>')
     specfile = self.args.get('--rules')
     if specfile:
         with open(specfile, 'r') as fin:
             specs = [
                 s.replace('\n', '') for s in fin.readlines()
                 if not s.startswith('#')
             ]
     os.environ['OMEGA_RESTAPI_FILTER'] = ';'.join(
         specs) if specs else om.defaults.OMEGA_RESTAPI_FILTER
     subprocess.run("gunicorn 'omegaml.restapi.app:serve_objects()'",
                    shell=True)
Пример #20
0
 def config(self):
     om = get_omega(self.args)
     config_file = om.defaults.OMEGA_CONFIG_FILE
     if config_file is None:
         config_file = print(
             "No configuration file identified, assuming defaults")
     # print config
     restapi_url = getattr(om.defaults, 'OMEGA_RESTAPI_URL',
                           'not configured')
     runtime_url = om.runtime.celeryapp.conf['BROKER_URL']
     userid = getattr(om.defaults, 'OMEGA_USERID', 'not configured')
     self.logger.info('Config file: {config_file}'.format(**locals()))
     self.logger.info('User id: {userid}'.format(**locals()))
     self.logger.info('REST API URL: {restapi_url}'.format(**locals()))
     self.logger.info('Runtime broker: {runtime_url}'.format(**locals()))
Пример #21
0
 def get(self):
     om = get_omega(self.args)
     local = self.args['<path>']
     name = self.args['<name>']
     obj = om.datasets.get(name)
     csvkwargs = self.parse_kwargs('--csv', index=False)
     if isinstance(obj, pd.DataFrame):
         obj.to_csv(local, **csvkwargs)
     elif hasattr(obj, 'read'):
         with open(local, 'wb') as fout:
             while True:
                 data = obj.read(1024 * 10)
                 if not data:
                     break
                 fout.write(data)
     self.logger.debug(local)
Пример #22
0
 def do_export(self):
     om = get_omega(self.args)
     names = self.args.get('<prefix/name>')
     archive = self.args.get('--path') or './mlops-export'
     compress = self.args.get('--compress', True)
     dolist = self.args.get('--list')
     if dolist:
         arc = OmegaExporter.archive(archive)
         self.print(list(arc.members))
     else:
         exp = OmegaExporter(om)
         arcfile = exp.to_archive(archive,
                                  names,
                                  compress=compress,
                                  progressfn=print)
         self.print(arcfile)
Пример #23
0
 def put(self):
     om = get_omega(self.args)
     script_path = self.args.get('<path>')
     name = self.args.get('<name>')
     as_pypi = lambda v: 'pypi://{}'.format(v)
     if os.path.exists(script_path):
         name = name or os.path.basename(script_path)
         abs_path = os.path.abspath(script_path)
         meta = om.scripts.put('pkg://{}'.format(abs_path), name)
     elif PythonPipSourcedPackageData.supports(script_path, name):
         meta = om.scripts.put(script_path, name)
     elif PythonPipSourcedPackageData.supports(as_pypi(script_path), name):
         meta = om.scripts.put(as_pypi(script_path), name)
     else:
         raise ValueError('{} is not a valid path'.format(script_path))
     self.logger.info(meta)
Пример #24
0
 def restart(self):
     import requests
     om = get_omega(self.args, require_config=True)
     name = self.args.get('<name>')
     insecure = self.args.get('--insecure', False)
     user = om.runtime.auth.userid
     auth = requests.auth.HTTPBasicAuth(user, om.runtime.auth.apikey)
     parsed = urlparse(om.defaults.OMEGA_MONGO_URL)
     url = f'https://{parsed.hostname}'
     stop = requests.get(f'{url}/apps/api/stop/{user}/{name}'.format(
         om.runtime.auth.userid),
                         auth=auth,
                         verify=not insecure)
     start = requests.get(f'{url}/apps/api/start/{user}/{name}'.format(
         om.runtime.auth.userid),
                          auth=auth,
                          verify=not insecure)
     self.logger.info(f'stop: {stop} start: {start}')
Пример #25
0
 def env(self):
     om = get_omega(self.args)
     action = self.args.get('<action>')
     package = self.args.get('<package>')
     reqfile = self.args.get('--file')
     every = self.args.get('--every')
     require = self.args.get('--require') or ''
     if reqfile:
         with open(reqfile, 'rb') as fin:
             om.scripts.put(fin, '.system/requirements.txt')
     if not om.scripts.exists('.system/envinstall', hidden=True):
         import omegaml as om_module
         envinstall_path = os.path.join(os.path.dirname(om_module.__file__),
                                        'runtimes', 'envinstall')
         om.scripts.put(f'pkg://{envinstall_path}', '.system/envinstall')
     if every:
         labels = om.runtime.enable_hostqueues()
     else:
         labels = require.split(',')
     results = []
     for label in labels:
         result = (
             om.runtime.require(label).script('.system/envinstall').run(
                 action=action,
                 package=package,
                 file=reqfile,
                 __format='python'))
         results.append((label, result))
     all_results = om.runtime.celeryapp.ResultSet([r[1] for r in results])
     from tqdm import tqdm
     with tqdm() as progress:
         while all_results.waiting():
             progress.update(1)
             sleep(1)
         all_results.get()
     for label, result in results:
         if label:
             print(f'** result of worker require={label}:')
         data = result.get()  # resolve AsyncResult => dict
         print(str(data.get('result',
                            data)))  # get actual result object, pip stdout
Пример #26
0
 def put(self):
     om = get_omega(self.args)
     local = self.args['<path>']
     name = self.args['<name>']
     replace = self.args['--replace']
     csvkwargs = self.parse_kwargs('--csv')
     # TODO introduce a puggable filetype processing backend to do this
     if local.endswith('.csv'):
         # csv formats
         import pandas as pd
         data = pd.read_csv(local, **csvkwargs)
         meta = om.datasets.put(data, name, append=not replace)
     elif imghdr.what(local):
         # images
         from imageio import imread
         img = imread(local)
         meta = om.datasets.put(img, name)
     else:
         meta = self.logger.info(
             om.datasets.put(local, name, append=not replace))
     self.logger.info(meta)
Пример #27
0
 def model(self):
     om = get_omega(self.args)
     name = self.args.get('<name>')
     action = self.args.get('<model-action>')
     is_async = self.args.get('--async')
     kwargs_lst = self.args.get('--param')
     output = self.args.get('--result')
     label = self.args.get('--require')
     X = self._ensure_valid_XY(self.args.get('<X>'))
     Y = self._ensure_valid_XY(self.args.get('<Y>'))
     # parse the list of kw=value values
     # e.g. key1=val1 key2=val2 => kwargs_lst = ['key1=val1', 'key2=val2']
     #   => kw_dct = { 'key1': eval('val1'), 'key2': eval('val2') }
     kv_dct = {}
     for kv in kwargs_lst:
         k, v = kv.split('=', 1)
         kv_dct[k] = eval(v)
     kwargs = {}
     if action in ('predict', 'predict_proba', 'decision_function',
                   'transform'):
         # actions that take rName, but no Y
         kwargs['rName'] = output
     else:
         # actions that take Y, but no rName
         kwargs['Yname'] = Y
     if action == 'gridsearch':
         kwargs['parameters'] = kv_dct
     rt_model = om.runtime.require(label).model(name)
     meth = getattr(rt_model, action, None)
     if meth is not None:
         result = meth(X, **kwargs)
         if not is_async:
             self.logger.info(result.get())
         else:
             self.logger.info(result)
         return
     raise ValueError(
         '{action} is not applicable to {name}'.format(**locals()))
Пример #28
0
 def celery(self, action=None):
     om = get_omega(self.args)
     # giving om command here changes celery help output
     celery_cmds = ['om runtime celery']
     if action:
         celery_cmds += action.split(' ')
     # convert omega terms into celery terms
     celery_opts = (
         # omega term, celery term, value|flag
         ('--worker', '--destination', 'value'),
         ('--queue', '--queue', 'value'),
         ('--celery-help', '--help', 'flag'),
     )
     is_r_worker = 'rworker' in self.args.get('<celery-command>')
     for opt, celery_opt, kind in celery_opts:
         if self.args.get(opt):
             celery_cmds += [celery_opt]
             if kind == 'value':
                 celery_cmds += [self.args.get(opt)]
     # prepare celery command args, remove empty parts
     celery_cmds += self.args.get('<celery-command>')
     celery_cmds += (self.args.get('--flags') or '').split(' ')
     celery_cmds = [cmd for cmd in celery_cmds if cmd]
     if len(celery_cmds) == 1 + int(action is not None):
         celery_cmds += ['--help']
     # start in-process for speed
     # -- disable command logging to avoid curses problems in celery events
     self.logger.setLevel(logging.CRITICAL + 1)
     if is_r_worker:
         # start r runtime
         from omegaml.runtimes import rsystem
         rworker = os.path.join(os.path.dirname(rsystem.__file__),
                                'omworker.R')
         rcmd = f'Rscript {rworker}'.split(' ')
         call(rcmd)
     else:
         # start python runtime
         om.runtime.celeryapp.start(celery_cmds)
Пример #29
0
 def get(self):
     om = get_omega(self.args)
     local = self.args['<path>']
     name = self.args['<name>']
     try:
         # try lazy get first to suppport large dataframes
         obj = om.datasets.get(name, lazy=True)
     except:
         obj = om.datasets.get(name)
     csvkwargs = self.parse_kwargs('--csv', index=False)
     is_csv = self.args.get('--format') == 'csv' or hasattr(
         obj, 'to_csv') or anyext(local, '.csv')
     is_binary = self.args.get('--format') == 'binary' or hasattr(
         obj, 'read')
     if is_csv and not is_binary:
         obj.to_csv(local, **csvkwargs)
     elif is_binary:
         with smart_open.open(local, 'wb') as fout:
             while True:
                 data = obj.read(1024 * 10)
                 if not data:
                     break
                 fout.write(data)
     self.logger.debug(local)
Пример #30
0
 def schedule(self):
     # FIXME this is a mess
     om = get_omega(self.args)
     name = self.args.get('<name>')
     at = self.args.get('--at')
     # get interval specs
     if at:
         hour, minute = at.split(':')
     else:
         hour = self.args.get('--hour')
         minute = self.args.get('--minute')
     weekday = self.args.get('--weekday')
     monthday = self.args.get('--monthday')
     month = self.args.get('--month')
     delete = self.args.get('delete')
     show = self.args.get('show')
     spec = self.args.get('--cron')
     next_n = self.args.get('--next')
     interval = self.args.get('<interval>')
     # by default we show if no interval is specified
     show = show or not any(s for s in (weekday, monthday, month, hour, minute, interval, spec))
     # print current schedule and triggers
     run_at, triggers = om.jobs.get_schedule(name, only_pending=True)
     if run_at:
         human_sched = get_description(run_at)
         self.logger.info("Currently {name} is scheduled at {human_sched}".format(**locals()))
         if next_n:
             self.logger.info("Given this existing interval, next {next_n} times would be:".format(**locals()))
             for time in om.jobs.Schedule.from_cron(run_at).next_times(int(next_n)):
                 self.logger.info("  {}".format(time))
     else:
         self.logger.info("Currently {name} is not scheduled".format(**locals()))
     # show current triggers
     if triggers:
         trigger = triggers[-1]
         if trigger['status'] == 'PENDING':
             event = trigger['event']
             self.logger.info("{name} is scheduled to run next at {event}".format(**locals()))
     # delete if currently scheduled
     if delete:
         if run_at or triggers:
             answer = self.ask("Do you want to delete this schedule?", options='Y/n', default='y')
             should_drop = answer.lower().startswith('y')
             return om.jobs.drop_schedule(name) if should_drop else None
     # create new schedule
     if not (show or delete):
         if interval:
             try:
                 # nlp text-like
                 spec = om.jobs.Schedule(interval).cron
             except Exception as e:
                 self.logger.info(f"Cannot parse {interval}, error was {e}")
                 raise
         if not spec:
             cron_repr = ('{0._orig_minute} {0._orig_hour} {0._orig_day_of_month} '
                          '{0._orig_month_of_year} {0._orig_day_of_week}')
             sched = om.jobs.Schedule(minute=minute or '*',
                                      hour=hour or '*',
                                      monthday=monthday or '*',
                                      weekday=weekday or '*',
                                      month=month or '*')
             cron_sched = sched.cron
         else:
             cron_sched = spec
         human_sched = get_description(cron_sched)
         if next_n:
             self.logger.info("Given this new interval, next {next_n} times would be:".format(**locals()))
             for time in om.jobs.Schedule.from_cron(cron_sched).next_times(int(next_n)):
                 self.logger.info("  {}".format(time))
         text = "Do you want to schedule {name} at {human_sched}?".format(**locals())
         answer = self.ask(text, options="Y/n", default='y')
         if answer.lower().startswith('n'):
             self.logger.info('Ok, not scheduled. Try again.')
             return
         self.logger.info('{name} will be scheduled to run {human_sched}'.format(**locals()))
         om.jobs.schedule(name, run_at=cron_sched, last_run=datetime.datetime.now())