def __init__(self, package_name): logging.getLogger('dtlpy').setLevel(logging.WARN) self.package_name = package_name self.path_to_metrics = 'metrics.json' self.path_to_tensorboard_dir = 'runs' self.path_to_logs = 'logger.conf' self.logger = init_logging(__name__, filename=self.path_to_logs) self.logger.info(self.package_name + ' initialized')
def __init__(self, package_name, service_name): self.package_name = package_name self.service_name = service_name self.path_to_best_checkpoint = 'checkpoint.pt' self.path_to_metrics = 'metrics.json' self.path_to_tensorboard_dir = 'runs' self.path_to_logs = 'logger.conf' self.logger = init_logging(__name__, filename=self.path_to_logs) self.logger.info(self.package_name + ' initialized')
def create_app(): """Create VmaaS application and servers""" vmaas_app = Application() server = tornado.httpserver.HTTPServer(vmaas_app) server.bind(PUBLIC_API_PORT) num_servers = int(os.getenv("MAX_VMAAS_SERVERS", MAX_SERVERS)) server.start(num_servers) # start forking here init_logging(num_servers) LOGGER.info("Starting (version %s).", VMAAS_VERSION) LOGGER.info('Hotcache enabled: %s', os.getenv("HOTCACHE_ENABLED", "YES")) # The rest stuff must be done only after forking BaseHandler.db_cache = Cache() load_cache_to_apis() vmaas_app.websocket_reconnect() vmaas_app.reconnect_callback = PeriodicCallback( vmaas_app.websocket_reconnect, WEBSOCKET_RECONNECT_INTERVAL * 1000) vmaas_app.reconnect_callback.start()
def main(): """ The main function. It creates cryptochat application, run everything.""" async def shutdown(): server.stop() await tornado.gen.sleep(_SHUTDOWN_TIMEOUT) tornado.ioloop.IOLoop.current().stop() LOGGER.info("Server was successfully shut down.") def exit_handler(sig, frame): # pylint: disable=unused-argument def get_sig_name(sig): return dict((k, v) for v, k in reversed(sorted(signal.__dict__.items())) if v.startswith('SIG') and not v.startswith('SIG_')).pop(sig) LOGGER.warning("Registered %s, shutting down.", get_sig_name(sig)) tornado.ioloop.IOLoop.instance().add_callback_from_signal(shutdown) signal.signal(signal.SIGTERM, exit_handler) signal.signal(signal.SIGINT, exit_handler) init_logging() cryptochat_db = DB(DATABASE_LOCATION) cryptochat_app = Application() server = tornado.httpserver.HTTPServer(cryptochat_app) server.bind(PUBLIC_API_PORT) server.start() LOGGER.info("Starting cryptochat (version %s).", SERVER_VERSION) BaseHandler.messages_new_api = MessagesNewAPI(cryptochat_db) BaseHandler.messages_updates_api = MessagesUpdatesAPI(cryptochat_db) BaseHandler.users_api = UsersAPI(cryptochat_db) BaseHandler.chats_api = ChatsAPI(cryptochat_db) BaseHandler.chats_user_api = ChatsUserAPI(cryptochat_db) BaseHandler.contacts_new_api = ContactsAPI(cryptochat_db) tornado.ioloop.IOLoop.current().start()
import psutil import glob import shutil import time import torch.optim as optim from tqdm import tqdm from . import csv_eval from dataloader import * from networks import get_model from torch.utils.data import DataLoader from logging_utils import logginger, init_logging logger = logginger(__name__) mem_log = init_logging('Memory', 'mem_log.log') print('CUDA available: {}'.format(torch.cuda.is_available())) class ModelTrainer: def __init__(self, device_index=0): self.device = torch.device( type='cuda', index=device_index) if torch.cuda.is_available() else torch.device( type='cpu') def load(self, data_path, save_trial_id, resume_trial_id=None,
zazu_service = dl.services.get('zazu') # get project id for billing bla bla bla dataset_obj = get_dataset_obj(configs['dataloop']) id = dataset_obj.project.id if args.search: zazu_service.execute(function_name='search', execution_input=inputs, project_id=id) if args.predict: zazu_service.execute(function_name='predict', execution_input=inputs, project_id=id) else: logger = init_logging(__name__) this_path = path = os.getcwd() configs_path = os.path.join(this_path, 'configs.json') configs = ConfigSpec(configs_path) opt_model = OptModel() opt_model.add_child_spec(configs, 'configs') zazu = ZaZu(opt_model, remote=args.remote) if args.search: zazu.find_best_model() zazu.hp_search() if args.train: zazu.train_new_model() if args.predict: zazu.run_inference() if args.predict_once: zazu.one_time_inference('/home/noam/0120122798.jpg',
def main(): init_logging() LOGGER.info("Starting (version %s).", CLIENT_VERSION) status = app.run() sys.exit(status)
def __init__(self, configs, time, test_dataset_id, query): logger.info('dtlpy version: ' + str(dl.__version__)) logger.info('dtlpy info: ' + str(dl.info())) time = int(time) dl.setenv('prod') configs = json.loads(configs) query = json.loads(query) self.configs_input = dl.FunctionIO(type='Json', name='configs', value=configs) self.service = dl.services.get('zazu') project_name = configs['dataloop']['project'] self.project = dl.projects.get(project_name) test_dataset = self.project.datasets.get(dataset_id=test_dataset_id) maybe_download_pred_data(dataset_obj=test_dataset, val_query=query) # add gt annotations filters = dl.Filters() filters.custom_filter = query dataset_name = test_dataset.name path_to_dataset = os.path.join(os.getcwd(), dataset_name) # only download if doesnt exist if not os.path.exists(path_to_dataset): download_and_organize(path_to_dataset=path_to_dataset, dataset_obj=test_dataset, filters=filters) json_file_path = os.path.join(path_to_dataset, 'json') self.model_obj = self.project.models.get(model_name='retinanet') self.adapter = self.model_obj.build(local_path=os.getcwd()) logger.info('model built') while 1: self.compute = precision_recall_compute() self.compute.add_dataloop_local_annotations(json_file_path) logger.info("running new execution") execution_obj = self.service.execute(function_name='search', execution_input=[self.configs_input], project_id='72bb623f-517f-472b-ad69-104fed8ee94a') while execution_obj.latest_status['status'] != 'success': sleep(5) execution_obj = dl.executions.get(execution_id=execution_obj.id) if execution_obj.latest_status['status'] == 'failed': raise Exception("plugin execution failed") logger.info("execution object status is successful") self.project.artifacts.download(package_name='zazuml', execution_id=execution_obj.id, local_path=os.getcwd()) logs_file_name = 'timer_logs_' + str(execution_obj.id) + '.conf' graph_file_name = 'precision_recall_' + str(execution_obj.id) + '.png' self.cycle_logger = init_logging(__name__, filename=logs_file_name) logger.info('artifact download finished') logger.info(str(os.listdir('.'))) # load new checkpoint and change to unique name new_checkpoint_name = 'checkpoint_' + str(execution_obj.id) + '.pt' logger.info(str(os.listdir('.'))) os.rename('checkpoint0.pt', new_checkpoint_name) new_model_name = new_checkpoint_name[:-3] logger.info(str(os.listdir('.'))) new_checkpoint = torch.load(new_checkpoint_name, map_location=torch.device('cpu')) # self.model_obj = self.project.models.get(model_name=new_checkpoint['model_specs']['name']) # self.adapter = self.model_obj.build(local_path=os.getcwd()) # logger.info('model built') self.new_home_path = new_checkpoint['model_specs']['data']['home_path'] self._compute_predictions(checkpoint_path=new_checkpoint_name, model_name=new_model_name) if len(self.compute.by_model_name.keys()) < 2: # if the model cant predict anything then just skip it logger.info('''model couldn't make any predictions, trying to train again''') continue # if previous best checkpoint doesnt exist there must not be a service, launch prediction service with new # new_checkpoint and create trigger if 'check0' not in [checkp.name for checkp in self.model_obj.checkpoints.list()]: logger.info('there is no check0, will add upload new checkpoint as check0 and ' 'deploy prediction service') new_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0', local_path=new_checkpoint_name) logger.info('uploaded this checkpoint as the new check0 : ' + new_checkpoint_name[:-3]) self._maybe_launch_predict(new_checkpoint_obj) continue logger.info('i guess check0 does exist') best_checkpoint = self.model_obj.checkpoints.get('check0') check0_path = best_checkpoint.download(local_path=os.getcwd()) logger.info('downloading best checkpoint') logger.info(str(os.listdir('.'))) logger.info('check0 path is: ' + str(check0_path)) self._compute_predictions(checkpoint_path=check0_path, model_name=best_checkpoint.name) # compute metrics new_checkpoint_mAP = self.compute.get_metric(model_name=new_model_name, precision_to_recall_ratio=1.) best_checkpoint_mAP = self.compute.get_metric(model_name=best_checkpoint.name, precision_to_recall_ratio=1.) logger.info('best checkpoint: ' + str(best_checkpoint_mAP)) logger.info('new checkpoint: ' + str(new_checkpoint_mAP)) # if new checkpoint performs better switch out prediction if new_checkpoint_mAP > best_checkpoint_mAP: logger.info('new checkpoint is better') logger.info('uploading old best checkpoint under new name') self.model_obj.checkpoints.upload(checkpoint_name='checkpoint_' + check0_path.split('_')[-1][:-3], local_path=check0_path) logger.info('deleting old best checkpoint') best_checkpoint.delete() logger.info('uploading new best checkpoint as check0') new_best_checkpoint_obj = self.model_obj.checkpoints.upload(checkpoint_name='check0', local_path=new_checkpoint_name) if 'predict' not in [s.name for s in dl.services.list()]: self._maybe_launch_predict(new_best_checkpoint_obj) else: self._update_predict_service(new_best_checkpoint_obj) logger.info('switched with new checkpoint') self.compute.save_plot_metrics(save_path=graph_file_name) self.project.artifacts.upload(filepath=logs_file_name, package_name='zazuml', execution_id=execution_obj.id) self.project.artifacts.upload(filepath=graph_file_name, package_name='zazuml', execution_id=execution_obj.id) logger.info('waiting ' + str(time) + ' seconds for next execution . . . .') sleep(time)