def main(): model_name = config.model() Model = import_model.import_model(model_name) # DATABASE DATABASE = config.data_name() DATABASE_PATH = config.data_root() + "/" + DATABASE # RESULTS result_folder = config.result_root() + '/' + DATABASE + '/' + Model.name() if not os.path.isdir(result_folder): os.makedirs(result_folder) # Parameters parameters = config.parameters(result_folder) if os.path.isfile(DATABASE_PATH + '/binary_piano'): parameters["binarize_piano"] = True else: parameters["binarize_piano"] = False if os.path.isfile(DATABASE_PATH + '/binary_orch'): parameters["binarize_orch"] = True else: parameters["binarize_orch"] = False parameters["model_name"] = model_name # Load the database metadata and add them to the script parameters to keep a record of the data processing pipeline parameters.update(pkl.load(open(DATABASE_PATH + '/metadata.pkl', 'rb'))) ############################################################ # Logging ############################################################ # log file log_file_path = config.scratch_space() + '/log' # set up logging to file - see previous section for more details logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)-8s %(message)s', datefmt='%m-%d %H:%M', filename=log_file_path, filemode='w') # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler() console.setLevel(logging.INFO) # set a format which is simpler for console use formatter = logging.Formatter('%(levelname)-8s %(message)s') # tell the handler to use this format console.setFormatter(formatter) # add the handler to the root logger logging.getLogger('').addHandler(console) # Now, we can log to the root logger, or any other logger. First the root... logging.info('#' * 60) logging.info('#' * 60) logging.info('#' * 60) logging.info('* L * O * P *') logging.info('** Model : ' + Model.name()) for k, v in parameters.items(): logging.info('** ' + k + ' : ' + str(v)) logging.info('#' * 60) logging.info('#' * 60) ############################################################ # Hyper parameter space ############################################################ # Two cases : # 1/ Random search model_parameters_space = Model.get_hp_space() # 2/ Defined configurations configs = config.import_configs() ############################################################ # Grid search loop ############################################################ # Organisation : # Each config is a folder with a random ID (integer) # In eahc of this folder there is : # - a config.pkl file with the hyper-parameter space # - a result.txt file with the result # The result.csv file containing id;result is created from the directory, rebuilt from time to time if DEFINED_CONFIG: for config_id, model_parameters in configs.items(): config_folder = parameters['result_folder'] + '/' + config_id if os.path.isdir(config_folder): shutil.rmtree(config_folder) os.mkdir(config_folder) config_loop(Model, config_folder, model_parameters, parameters, DATABASE_PATH) else: # Already tested configs list_config_folders = glob.glob(result_folder + '/*') number_hp_config = max( 0, parameters["max_hyperparam_configs"] - len(list_config_folders)) for hp_config in range(number_hp_config): # Give a random ID and create folder ID_SET = False while not ID_SET: ID_config = str(random.randint(0, 2**25)) config_folder = parameters['result_folder'] + '/' + ID_config if config_folder not in list_config_folders: ID_SET = True os.mkdir(config_folder) # Sample model parameters from hyperparam space model_parameters = hyperopt.pyll.stochastic.sample( model_parameters_space) config_loop(Model, config_folder, model_parameters, parameters, DATABASE_PATH) # Update folder list list_config_folders.append(config_folder) return
from sklearn import datasets, linear_model from matplotlib import pyplot as plt from sklearn.metrics import mean_squared_error import os import csv import numpy as np import LOP.Scripts.config as config # Collect the data folders = [ "k_folds", ] folders = [config.result_root() + '/' + e for e in folders] for folder in folders: configs = os.listdir(folder) x_ = [] y_ = [] for config in configs: for fold in range(10): path_result_file = os.path.join(folder, config, str(fold), "result.csv") # Read result.csv with open(path_result_file, "rb") as f: reader = csv.DictReader(f, delimiter=';') elem = reader.next() Xentr = float(elem["loss"]) acc = float(elem["accuracy"])