示例#1
0
def main():

    parser = argparse.ArgumentParser(
        description="Compute specific dataset for model using of features")

    parser.add_argument('--output',
                        type=str,
                        help='output file name desired (.train and .test)')
    parser.add_argument(
        '--features',
        type=str,
        help="list of features choice in order to compute data",
        default='svd_reconstruction, ipca_reconstruction',
        required=True)
    parser.add_argument(
        '--params',
        type=str,
        help=
        "list of specific param for each features choice (See README.md for further information in 3D mode)",
        default='100, 200 :: 50, 25',
        required=True)
    parser.add_argument('--scenes',
                        type=str,
                        help='List of scenes to use for training data')
    parser.add_argument('--nb_zones',
                        type=int,
                        help='Number of zones to use for training data set',
                        choices=list(range(1, 17)))
    parser.add_argument('--renderer',
                        type=str,
                        help='Renderer choice in order to limit scenes used',
                        choices=cfg.renderer_choices,
                        default='all')
    parser.add_argument('--random',
                        type=int,
                        help='Data will be randomly filled or not',
                        choices=[0, 1])
    parser.add_argument('--only_noisy',
                        type=int,
                        help='Only noisy will be used',
                        choices=[0, 1])

    args = parser.parse_args()

    p_filename = args.output
    p_features = list(map(str.strip, args.features.split(',')))
    p_params = list(map(str.strip, args.params.split('::')))
    p_scenes = args.scenes.split(',')
    p_nb_zones = args.nb_zones
    p_renderer = args.renderer
    p_random = args.random
    p_only_noisy = args.only_noisy

    # create list of Transformation
    transformations = []

    for id, features in enumerate(p_features):

        if features not in features_choices:
            raise ValueError(
                "Unknown features, please select a correct features : ",
                features_choices)

        transformations.append(Transformation(features, p_params[id]))

    # list all possibles choices of renderer
    scenes_list = dt.get_renderer_scenes_names(p_renderer)
    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)

    # getting scenes from indexes user selection
    scenes_selected = []

    for scene_id in p_scenes:
        index = scenes_indices.index(scene_id.strip())
        scenes_selected.append(scenes_list[index])

    # create database using img folder (generate first time only)
    generate_data_model(scenes_list, p_filename, transformations,
                        scenes_selected, p_nb_zones, p_random, p_only_noisy)
示例#2
0
def main():

    # getting all params
    parser = argparse.ArgumentParser(description="Generate data for model using correlation matrix information from data")

    parser.add_argument('--output', type=str, help='output file name desired (.train and .test)')
    parser.add_argument('--folder', type=str, help='folder path of data augmented database')
    parser.add_argument('--interval', type=str, help='Interval value to keep from svd', default='"0, 200"')
    parser.add_argument('--kind', type=str, help='Kind of normalization level wished', choices=normalization_choices)
    parser.add_argument('--feature', type=str, help='feature data choice', choices=features_choices)
    parser.add_argument('--scenes', type=str, help='List of scenes to use for training data')
    parser.add_argument('--random', type=int, help='Data will be randomly filled or not', choices=[0, 1])
    parser.add_argument('--percent', type=float, help='Percent of data use for train and test dataset (by default 1)')
    parser.add_argument('--step', type=int, help='Photo step to keep for build datasets', default=1)
    parser.add_argument('--each', type=int, help='Each features to keep from interval', default=1)
    parser.add_argument('--renderer', type=str, help='Renderer choice in order to limit scenes used', choices=renderer_choices, default='all')
    parser.add_argument('--custom', type=str, help='Name of custom min max file if use of renormalization of data', default=False)

    args = parser.parse_args()

    p_filename = args.output
    p_folder   = args.folder
    p_interval = list(map(int, args.interval.split(',')))
    p_kind     = args.kind
    p_feature  = args.feature
    p_scenes   = args.scenes.split(',')
    p_random   = args.random
    p_percent  = args.percent
    p_step     = args.step
    p_each     = args.each
    p_renderer = args.renderer
    p_custom   = args.custom


    # list all possibles choices of renderer
    scenes_list = dt.get_renderer_scenes_names(p_renderer)
    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)

    # getting scenes from indexes user selection
    scenes_selected = []

    for scene_id in p_scenes:
        index = scenes_indices.index(scene_id.strip())
        scenes_selected.append(scenes_list[index])

    # find min max value if necessary to renormalize data
    if p_custom:
        get_min_max_value_interval(p_folder, scenes_list, p_interval, p_feature)

        # write new file to save
        if not os.path.exists(custom_min_max_folder):
            os.makedirs(custom_min_max_folder)

        min_max_filename_path = os.path.join(custom_min_max_folder, p_custom)

        with open(min_max_filename_path, 'w') as f:
            f.write(str(min_value_interval) + '\n')
            f.write(str(max_value_interval) + '\n')

    # create database using img folder (generate first time only)
    generate_data_model(p_folder, scenes_list, p_filename, p_interval, p_kind, p_feature, scenes_selected, p_percent, p_random, p_step, p_each, p_custom)
def main():

    # getting all params
    parser = argparse.ArgumentParser(
        description=
        "Generate data for model using correlation matrix information from data"
    )

    parser.add_argument('--output',
                        type=str,
                        help='output file name desired (.train and .test)')
    parser.add_argument('--n', type=int, help='Number of features wanted')
    parser.add_argument('--highest',
                        type=int,
                        help='Specify if highest or lowest values are wishes',
                        choices=[0, 1])
    parser.add_argument('--label',
                        type=int,
                        help='Specify if label correlation is used or not',
                        choices=[0, 1])
    parser.add_argument('--kind',
                        type=str,
                        help='Kind of normalization level wished',
                        choices=normalization_choices)
    parser.add_argument('--feature',
                        type=str,
                        help='feature data choice',
                        choices=features_choices)
    parser.add_argument('--scenes',
                        type=str,
                        help='List of scenes to use for training data')
    parser.add_argument('--nb_zones',
                        type=int,
                        help='Number of zones to use for training data set')
    parser.add_argument('--random',
                        type=int,
                        help='Data will be randomly filled or not',
                        choices=[0, 1])
    parser.add_argument(
        '--percent',
        type=float,
        help='Percent of data use for train and test dataset (by default 1)')
    parser.add_argument('--step',
                        type=int,
                        help='Photo step to keep for build datasets',
                        default=1)
    parser.add_argument('--renderer',
                        type=str,
                        help='Renderer choice in order to limit scenes used',
                        choices=renderer_choices,
                        default='all')
    parser.add_argument(
        '--custom',
        type=str,
        help='Name of custom min max file if use of renormalization of data',
        default=False)

    args = parser.parse_args()

    p_filename = args.output
    p_n = args.n
    p_highest = args.highest
    p_label = args.label
    p_kind = args.kind
    p_feature = args.feature
    p_scenes = args.scenes.split(',')
    p_nb_zones = args.nb_zones
    p_random = args.random
    p_percent = args.percent
    p_step = args.step
    p_renderer = args.renderer
    p_custom = args.custom

    # list all possibles choices of renderer
    scenes_list = dt.get_renderer_scenes_names(p_renderer)
    scenes_indices = dt.get_renderer_scenes_indices(p_renderer)

    # getting scenes from indexes user selection
    scenes_selected = []

    for scene_id in p_scenes:
        index = scenes_indices.index(scene_id.strip())
        scenes_selected.append(scenes_list[index])

    # Get indices to keep from correlation information
    # compute temp data file to get correlation information
    temp_filename = 'temp'
    temp_filename_path = os.path.join(cfg.output_data_folder, temp_filename)

    cmd = [
        'python', 'generate_data_model_random.py', '--output',
        temp_filename_path, '--interval', '0, 200', '--kind', p_kind,
        '--feature', p_feature, '--scenes', args.scenes, '--nb_zones',
        str(16), '--random',
        str(int(p_random)), '--percent',
        str(p_percent), '--step',
        str(p_step), '--each',
        str(1), '--renderer', p_renderer, '--custom',
        temp_filename + min_max_ext
    ]

    subprocess.Popen(cmd).wait()

    temp_data_file_path = temp_filename_path + '.train'
    df = pd.read_csv(temp_data_file_path, sep=';', header=None)

    indices = []

    # compute correlation matrix from whole data scenes of renderer (using or not label column)
    if p_label:

        # compute pearson correlation between features and label
        corr = df.corr()

        features_corr = []

        for id_row, row in enumerate(corr):
            for id_col, val in enumerate(corr[row]):
                if id_col == 0 and id_row != 0:
                    features_corr.append(abs(val))

    else:
        df = df.drop(df.columns[[0]], axis=1)

        # compute pearson correlation between features using only features
        corr = df[1:200].corr()

        features_corr = []

        for id_row, row in enumerate(corr):
            correlation_score = 0
            for id_col, val in enumerate(corr[row]):
                if id_col != id_row:
                    correlation_score += abs(val)

            features_corr.append(correlation_score)

    # find `n` min or max indices to keep
    if p_highest:
        indices = utils.get_indices_of_highest_values(features_corr, p_n)
    else:
        indices = utils.get_indices_of_lowest_values(features_corr, p_n)

    indices = np.sort(indices)

    # save indices found
    if not os.path.exists(cfg.correlation_indices_folder):
        os.makedirs(cfg.correlation_indices_folder)

    indices_file_path = os.path.join(
        cfg.correlation_indices_folder,
        p_filename.replace(cfg.output_data_folder + '/', '') + '.csv')

    with open(indices_file_path, 'w') as f:
        for i in indices:
            f.write(str(i) + ';')

    # find min max value if necessary to renormalize data from `n` indices found
    if p_custom:
        get_min_max_value_interval(scenes_list, indices, p_feature)

        # write new file to save
        if not os.path.exists(custom_min_max_folder):
            os.makedirs(custom_min_max_folder)

        min_max_current_filename = p_filename.replace(
            cfg.output_data_folder + '/', '').replace('deep_keras_',
                                                      '') + min_max_filename
        min_max_filename_path = os.path.join(custom_min_max_folder,
                                             min_max_current_filename)

        print(min_max_filename_path)
        with open(min_max_filename_path, 'w') as f:
            f.write(str(min_value_interval) + '\n')
            f.write(str(max_value_interval) + '\n')

    # create database using img folder (generate first time only)
    generate_data_model(scenes_list, p_filename, indices, p_kind, p_feature,
                        scenes_selected, p_nb_zones, p_percent, p_random,
                        p_step, p_custom)