def main():
    parser = argparse.ArgumentParser(description='One-hot-code data')
    parser.add_argument('input_path', type=str, help='full path to input file')
    parser.add_argument('output_path',
                        type=str,
                        help='full path to output file')
    parser.add_argument('number_max_bins',
                        type=int,
                        help='number of max bins for output data')
    args = parser.parse_args()
    input_path = args.input_path
    output_file = args.output_path
    num_bins = args.number_max_bins

    data_frame = FileManager.get_csv_file_data_pandas(input_path)
    print('data frame head:')
    print(data_frame.head(3))

    hot_coded_data_frame = DataManipulator.one_hot_code(data_frame, num_bins)
    print('data one hot coded head:')
    print(hot_coded_data_frame.head(3))

    hot_coded_data_frame.to_csv(output_file,
                                header=None,
                                index=None,
                                sep=',',
                                mode='a')
def main():
    #print('LOG: Main program to run tests')

    parser = argparse.ArgumentParser(
        description=
        'Run Stepwise Forward Selection && K-Means clustering on given data')
    parser.add_argument('file_path', type=str, help='full path to input file')
    parser.add_argument('num_clusters',
                        type=int,
                        default=2,
                        nargs='?',
                        help='number of total clusters')
    args = parser.parse_args()

    #INPUTS
    print()
    print('INPUTS')
    file_path = args.file_path
    print('input file path:', file_path)
    num_clusters = args.num_clusters
    print('number of clusters: ', num_clusters)
    print()

    #READ INPUT DATA
    input_data = FileManager.get_csv_file_data_pandas(file_path)
    print('head of input data')
    print(input_data.head())
    print()

    #PREPROCESS DATA
    #According to 'names' files && visual inspection there are 0 missing values for these data sets
    #Remove missing value rows
    #input_data.pd.dropna(inplace=True)
    #print()

    #RUN FEATURE SELECTION && K-MEANS
    sfs = StepwiseForwardSelection(input_data)
    chosen_features, chosen_data_set = sfs.run_sfs(num_clusters)
    print()
    print('RESULT:')
    print('Chosen features (indexed from 0):')
    print(chosen_features)
    print('Chosen data:')
    print(chosen_data_set)
    print('Best silhouette coefficient:')
    print(sfs.base_performance)
    print('Clusters:')
    print(sfs.chosen_model.clusters)
    print('Centroids:')
    print(sfs.chosen_model.centroids)
    print()
def split_data(file_path, is_random, num_groups=5, separator=','):

    #GET DATA
    original_data = FileManager.get_csv_file_data_pandas(file_path, separator)
    #print('original data')
    #print(original_data)

    #STANDARD STUFF

    #Split the data into 5 groups
    groups = list()
    num_groups = int(num_groups)

    if (is_random == True):
        #Use basic random 5-way split
        groups = DataManipulator.split_data_randomly(original_data, num_groups)
    else:
        #Use more complex split
        groups = DataManipulator.split_data_randomly_accounting_for_class(
            original_data, num_groups)

    return groups