def main(): parser = argparse.ArgumentParser( description="Compute specific dataset for model using of features") parser.add_argument('--output', type=str, help='output file name desired (.train and .test)') parser.add_argument( '--features', type=str, help="list of features choice in order to compute data", default='svd_reconstruction, ipca_reconstruction', required=True) parser.add_argument( '--params', type=str, help= "list of specific param for each features choice (See README.md for further information in 3D mode)", default='100, 200 :: 50, 25', required=True) parser.add_argument('--scenes', type=str, help='List of scenes to use for training data') parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set', choices=list(range(1, 17))) parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=cfg.renderer_choices, default='all') parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1]) parser.add_argument('--only_noisy', type=int, help='Only noisy will be used', choices=[0, 1]) args = parser.parse_args() p_filename = args.output p_features = list(map(str.strip, args.features.split(','))) p_params = list(map(str.strip, args.params.split('::'))) p_scenes = args.scenes.split(',') p_nb_zones = args.nb_zones p_renderer = args.renderer p_random = args.random p_only_noisy = args.only_noisy # create list of Transformation transformations = [] for id, features in enumerate(p_features): if features not in features_choices: raise ValueError( "Unknown features, please select a correct features : ", features_choices) transformations.append(Transformation(features, p_params[id])) # list all possibles choices of renderer scenes_list = dt.get_renderer_scenes_names(p_renderer) scenes_indices = dt.get_renderer_scenes_indices(p_renderer) # getting scenes from indexes user selection scenes_selected = [] for scene_id in p_scenes: index = scenes_indices.index(scene_id.strip()) scenes_selected.append(scenes_list[index]) # create database using img folder (generate first time only) generate_data_model(scenes_list, p_filename, transformations, scenes_selected, p_nb_zones, p_random, p_only_noisy)
def main(): # getting all params parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data") parser.add_argument('--output', type=str, help='output file name desired (.train and .test)') parser.add_argument('--folder', type=str, help='folder path of data augmented database') parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"') parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices) parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices) parser.add_argument('--scenes', type=str, help='List of scenes to use for training data') parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1]) parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)') parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1) parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1) parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all') parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False) args = parser.parse_args() p_filename = args.output p_folder = args.folder p_interval = list(map(int, args.interval.split(','))) p_kind = args.kind p_feature = args.feature p_scenes = args.scenes.split(',') p_random = args.random p_percent = args.percent p_step = args.step p_each = args.each p_renderer = args.renderer p_custom = args.custom # list all possibles choices of renderer scenes_list = dt.get_renderer_scenes_names(p_renderer) scenes_indices = dt.get_renderer_scenes_indices(p_renderer) # getting scenes from indexes user selection scenes_selected = [] for scene_id in p_scenes: index = scenes_indices.index(scene_id.strip()) scenes_selected.append(scenes_list[index]) # find min max value if necessary to renormalize data if p_custom: get_min_max_value_interval(p_folder, scenes_list, p_interval, p_feature) # write new file to save if not os.path.exists(custom_min_max_folder): os.makedirs(custom_min_max_folder) min_max_filename_path = os.path.join(custom_min_max_folder, p_custom) with open(min_max_filename_path, 'w') as f: f.write(str(min_value_interval) + '\n') f.write(str(max_value_interval) + '\n') # create database using img folder (generate first time only) generate_data_model(p_folder, scenes_list, p_filename, p_interval, p_kind, p_feature, scenes_selected, p_percent, p_random, p_step, p_each, p_custom)
def main(): # getting all params parser = argparse.ArgumentParser( description= "Generate data for model using correlation matrix information from data" ) parser.add_argument('--output', type=str, help='output file name desired (.train and .test)') parser.add_argument('--n', type=int, help='Number of features wanted') parser.add_argument('--highest', type=int, help='Specify if highest or lowest values are wishes', choices=[0, 1]) parser.add_argument('--label', type=int, help='Specify if label correlation is used or not', choices=[0, 1]) parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices) parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices) parser.add_argument('--scenes', type=str, help='List of scenes to use for training data') parser.add_argument('--nb_zones', type=int, help='Number of zones to use for training data set') parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1]) parser.add_argument( '--percent', type=float, help='Percent of data use for train and test dataset (by default 1)') parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1) parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all') parser.add_argument( '--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False) args = parser.parse_args() p_filename = args.output p_n = args.n p_highest = args.highest p_label = args.label p_kind = args.kind p_feature = args.feature p_scenes = args.scenes.split(',') p_nb_zones = args.nb_zones p_random = args.random p_percent = args.percent p_step = args.step p_renderer = args.renderer p_custom = args.custom # list all possibles choices of renderer scenes_list = dt.get_renderer_scenes_names(p_renderer) scenes_indices = dt.get_renderer_scenes_indices(p_renderer) # getting scenes from indexes user selection scenes_selected = [] for scene_id in p_scenes: index = scenes_indices.index(scene_id.strip()) scenes_selected.append(scenes_list[index]) # Get indices to keep from correlation information # compute temp data file to get correlation information temp_filename = 'temp' temp_filename_path = os.path.join(cfg.output_data_folder, temp_filename) cmd = [ 'python', 'generate_data_model_random.py', '--output', temp_filename_path, '--interval', '0, 200', '--kind', p_kind, '--feature', p_feature, '--scenes', args.scenes, '--nb_zones', str(16), '--random', str(int(p_random)), '--percent', str(p_percent), '--step', str(p_step), '--each', str(1), '--renderer', p_renderer, '--custom', temp_filename + min_max_ext ] subprocess.Popen(cmd).wait() temp_data_file_path = temp_filename_path + '.train' df = pd.read_csv(temp_data_file_path, sep=';', header=None) indices = [] # compute correlation matrix from whole data scenes of renderer (using or not label column) if p_label: # compute pearson correlation between features and label corr = df.corr() features_corr = [] for id_row, row in enumerate(corr): for id_col, val in enumerate(corr[row]): if id_col == 0 and id_row != 0: features_corr.append(abs(val)) else: df = df.drop(df.columns[[0]], axis=1) # compute pearson correlation between features using only features corr = df[1:200].corr() features_corr = [] for id_row, row in enumerate(corr): correlation_score = 0 for id_col, val in enumerate(corr[row]): if id_col != id_row: correlation_score += abs(val) features_corr.append(correlation_score) # find `n` min or max indices to keep if p_highest: indices = utils.get_indices_of_highest_values(features_corr, p_n) else: indices = utils.get_indices_of_lowest_values(features_corr, p_n) indices = np.sort(indices) # save indices found if not os.path.exists(cfg.correlation_indices_folder): os.makedirs(cfg.correlation_indices_folder) indices_file_path = os.path.join( cfg.correlation_indices_folder, p_filename.replace(cfg.output_data_folder + '/', '') + '.csv') with open(indices_file_path, 'w') as f: for i in indices: f.write(str(i) + ';') # find min max value if necessary to renormalize data from `n` indices found if p_custom: get_min_max_value_interval(scenes_list, indices, p_feature) # write new file to save if not os.path.exists(custom_min_max_folder): os.makedirs(custom_min_max_folder) min_max_current_filename = p_filename.replace( cfg.output_data_folder + '/', '').replace('deep_keras_', '') + min_max_filename min_max_filename_path = os.path.join(custom_min_max_folder, min_max_current_filename) print(min_max_filename_path) with open(min_max_filename_path, 'w') as f: f.write(str(min_value_interval) + '\n') f.write(str(max_value_interval) + '\n') # create database using img folder (generate first time only) generate_data_model(scenes_list, p_filename, indices, p_kind, p_feature, scenes_selected, p_nb_zones, p_percent, p_random, p_step, p_custom)