def add_jobs_in_avail_which_failed(self): self.init_connection() avail_jobs = "{}.avail_jobs".format(self.schema) running_jobs = "{}.running_jobs".format(self.schema) select_job = """SELECT * FROM {0} row WHERE EXISTS(SELECT job_id FROM {1} r WHERE r.interrupted = FALSE AND r.finished = FALSE AND r.job_id = row.job_id)""".format( avail_jobs, running_jobs) self.cursor_db.execute(select_job) all_jobs = self.cursor_db.fetchall() print("Running jobs are ".format(all_jobs)) self.close_connection() for job in all_jobs: date_time = job['job_allocated_time'] duration = get_duration_seconds(job['duration']) new_date = date_time + timedelta(seconds=duration) if new_date < datetime.now(): job_id = int(job['job_id']) print( "Duration for the Job {} expired so marking it as failed". format(job_id)) error_message = "exception{}".format( "InterruptedDueToSomeError") self.append_error_string_in_running_job( job_id=job_id, error_message=error_message)
models_done = list(df.columns) logger.info("Models done {}".format(models_done)) run_jobs = [] dbConnector.cursor_db.execute(select_st.format('masterthesis.avail_jobs')) for job in dbConnector.cursor_db.fetchall(): run_jobs.append(dict(job)) dbConnector.cursor_db.execute(select_st.format('pymc3.avail_jobs')) for job in dbConnector.cursor_db.fetchall(): run_jobs.append(dict(job)) dbConnector.close_connection() for job in run_jobs: logger.info("learner {} learner_params {}".format(job["learner"], job["learner_params"])) job_description = run_jobs[0] rows_list = [] duration = get_duration_seconds('7D') logger.info("DB config filePath {}".format(config_file_path)) logger.info("Arguments {}".format(arguments)) logger.info("Run optimization {}".format(run_opt)) if dataset_type == 'median': N_OBJECTS_ARRAY = np.arange(3, 20, step=2) OPTIMIZE_ON_OBJECTS = [5, 15] logger.info("N_OBJECTS_ARRAY {}".format(N_OBJECTS_ARRAY)) logger.info("OPTIMIZE_ON_OBJECTS {}".format(OPTIMIZE_ON_OBJECTS)) logger.info("Model {} Dataset {}".format(model, dataset)) seed = int(job_description["seed"]) job_id = int(job_description["job_id"]) fold_id = int(job_description["fold_id"]) dataset_name = job_description["dataset"] n_inner_folds = int(job_description["inner_folds"])
hash_value = job_description["hash_value"] random_state = np.random.RandomState(seed=seed + fold_id) log_path = os.path.join(DIR_PATH, LOGS_FOLDER, "{}.log".format(hash_value)) optimizer_path = os.path.join(DIR_PATH, OPTIMIZER_FOLDER, "{}".format(hash_value)) create_dir_recursively(log_path, True) create_dir_recursively(optimizer_path, True) setup_logging(log_path=log_path) configure_numpy_keras(seed=seed) logger = logging.getLogger('Experiment') logger.info("DB config filePath {}".format(config_file_path)) logger.info("Arguments {}".format(arguments)) logger.info("Job Description {}".format( print_dictionary(job_description))) duration = get_duration_seconds(duration) dataset_params['random_state'] = random_state dataset_params['fold_id'] = fold_id dataset_reader = get_dataset_reader(dataset_name, dataset_params) X_train, Y_train, X_test, Y_test = dataset_reader.get_single_train_test_split( ) n_objects = log_test_train_data(X_train, X_test, logger) del dataset_reader inner_cv = ShuffleSplit(n_splits=n_inner_folds, test_size=0.1, random_state=random_state) hash_file = os.path.join(DIR_PATH, MODEL_FOLDER, "{}.h5".format(hash_value)) learner_params['n_objects'], learner_params[