示例#1
0
 def add_jobs_in_avail_which_failed(self):
     self.init_connection()
     avail_jobs = "{}.avail_jobs".format(self.schema)
     running_jobs = "{}.running_jobs".format(self.schema)
     select_job = """SELECT * FROM {0} row WHERE EXISTS(SELECT job_id FROM {1} r WHERE r.interrupted = FALSE 
                     AND r.finished = FALSE AND r.job_id = row.job_id)""".format(
         avail_jobs, running_jobs)
     self.cursor_db.execute(select_job)
     all_jobs = self.cursor_db.fetchall()
     print("Running jobs are ".format(all_jobs))
     self.close_connection()
     for job in all_jobs:
         date_time = job['job_allocated_time']
         duration = get_duration_seconds(job['duration'])
         new_date = date_time + timedelta(seconds=duration)
         if new_date < datetime.now():
             job_id = int(job['job_id'])
             print(
                 "Duration for the Job {} expired so marking it as failed".
                 format(job_id))
             error_message = "exception{}".format(
                 "InterruptedDueToSomeError")
             self.append_error_string_in_running_job(
                 job_id=job_id, error_message=error_message)
        models_done = list(df.columns)
    logger.info("Models done {}".format(models_done))
    run_jobs = []
    dbConnector.cursor_db.execute(select_st.format('masterthesis.avail_jobs'))
    for job in dbConnector.cursor_db.fetchall():
        run_jobs.append(dict(job))
    dbConnector.cursor_db.execute(select_st.format('pymc3.avail_jobs'))
    for job in dbConnector.cursor_db.fetchall():
        run_jobs.append(dict(job))

    dbConnector.close_connection()
    for job in run_jobs:
        logger.info("learner {} learner_params {}".format(job["learner"], job["learner_params"]))
    job_description = run_jobs[0]
    rows_list = []
    duration = get_duration_seconds('7D')
    logger.info("DB config filePath {}".format(config_file_path))
    logger.info("Arguments {}".format(arguments))
    logger.info("Run optimization {}".format(run_opt))
    if dataset_type == 'median':
        N_OBJECTS_ARRAY = np.arange(3, 20, step=2)
        OPTIMIZE_ON_OBJECTS = [5, 15]
    logger.info("N_OBJECTS_ARRAY {}".format(N_OBJECTS_ARRAY))
    logger.info("OPTIMIZE_ON_OBJECTS {}".format(OPTIMIZE_ON_OBJECTS))
    logger.info("Model {} Dataset {}".format(model, dataset))

    seed = int(job_description["seed"])
    job_id = int(job_description["job_id"])
    fold_id = int(job_description["fold_id"])
    dataset_name = job_description["dataset"]
    n_inner_folds = int(job_description["inner_folds"])
示例#3
0
            hash_value = job_description["hash_value"]
            random_state = np.random.RandomState(seed=seed + fold_id)
            log_path = os.path.join(DIR_PATH, LOGS_FOLDER,
                                    "{}.log".format(hash_value))
            optimizer_path = os.path.join(DIR_PATH, OPTIMIZER_FOLDER,
                                          "{}".format(hash_value))
            create_dir_recursively(log_path, True)
            create_dir_recursively(optimizer_path, True)
            setup_logging(log_path=log_path)
            configure_numpy_keras(seed=seed)
            logger = logging.getLogger('Experiment')
            logger.info("DB config filePath {}".format(config_file_path))
            logger.info("Arguments {}".format(arguments))
            logger.info("Job Description {}".format(
                print_dictionary(job_description)))
            duration = get_duration_seconds(duration)

            dataset_params['random_state'] = random_state
            dataset_params['fold_id'] = fold_id
            dataset_reader = get_dataset_reader(dataset_name, dataset_params)
            X_train, Y_train, X_test, Y_test = dataset_reader.get_single_train_test_split(
            )
            n_objects = log_test_train_data(X_train, X_test, logger)
            del dataset_reader

            inner_cv = ShuffleSplit(n_splits=n_inner_folds,
                                    test_size=0.1,
                                    random_state=random_state)
            hash_file = os.path.join(DIR_PATH, MODEL_FOLDER,
                                     "{}.h5".format(hash_value))
            learner_params['n_objects'], learner_params[