示例#1
0
def load_file_csv(path, file_name):
    path_list = extract_all_path(path, 'csv')
    data = pd.read_csv(path_list[0])
    for path_file in path_list[1:]:
        data1 = pd.read_csv(path_file)
        data = data.append(data1)
    data.to_csv(path + file_name + '.csv', index=False)
    EDA_data_label(path + file_name + '.csv', path + file_name + '.png')
示例#2
0
def split_train_test_data(path):
    path_list = extract_all_path(path, 'csv')
    for path_file in path_list:
        data = pd.read_csv(path_file)
        data_group = data.groupby(['names_image', 'list_label'],
                                  axis=0).count().reset_index()
        data_group = data_group.sample(len(data_group))
        msk = np.random.rand(len(data_group)) < 0.8
        train_merge = data_group[msk]
        test_merge = data_group[~msk]
        print(train_merge.shape)
        print(test_merge.shape)
        # train.to_csv('data/data_0930_merge_2/train_merge.csv',index=False)
        # test.to_csv('data/data_0930_merge_2/test_merge.csv', index=False)
        # data = pd.read_csv('data/data_0930_merge_2/data_0930_merge_2.csv')
        # train_label = pd.read_csv('data/data_0930_merge_2/train_merge.csv')
        train = data[~data['names_image'].isin(train_merge['names_image'])]
        test = data[~data['names_image'].isin(test_merge['names_image'])]
        if len(train) > len(test):
            train.to_csv('data/data_0930_merge_2/data_unmerge/train/' +
                         path_file.split('/')[-1],
                         index=False)
            EDA_data_label(
                'data/data_0930_merge_2/data_unmerge/train/' +
                path_file.split('/')[-1],
                'data/data_0930_merge_2/data_unmerge/train/' +
                (path_file.split('/')[-1]).split('.')[0] + '.png')
            test.to_csv('data/data_0930_merge_2/data_unmerge/test/' +
                        path_file.split('/')[-1],
                        index=False)
            EDA_data_label(
                'data/data_0930_merge_2/data_unmerge/test/' +
                path_file.split('/')[-1],
                'data/data_0930_merge_2/data_unmerge/test/' +
                (path_file.split('/')[-1]).split('.')[0] + '.png')
        else:
            train.to_csv('data/data_0930_merge_2/data_unmerge/test/' +
                         path_file.split('/')[-1],
                         index=False)
            EDA_data_label(
                'data/data_0930_merge_2/data_unmerge/test/' +
                path_file.split('/')[-1],
                'data/data_0930_merge_2/data_unmerge/test/' +
                (path_file.split('/')[-1]).split('.')[0] + '.png')
            test.to_csv('data/data_0930_merge_2/data_unmerge/train/' +
                        path_file.split('/')[-1],
                        index=False)
            EDA_data_label(
                'data/data_0930_merge_2/data_unmerge/train/' +
                path_file.split('/')[-1],
                'data/data_0930_merge_2/data_unmerge/train/' +
                (path_file.split('/')[-1]).split('.')[0] + '.png')
示例#3
0
    for img in image_list:
        img = img.replace("unmerge", "merge_1")
        yield(folder + img + '.png')

# data_words = list(sent_to_words(df_train['Review']))
# ten anh tuong ung
def label(path_list, folder, path_file_csv, file_name):

    my_array2 = np.genfromtxt(path_list, dtype=str,
                              skip_header=0)
    image_list = list(create_path_image(folder, my_array2))
    df_info_image = pd.DataFrame()
    try:
        df_info_image = extract_info_image(image_list)
        df_info_image.to_csv(path_file_csv)
        EDA_data_label(path_file_csv, file_name)
    except Exception:
        print(Exception)
    return df_info_image


path = 'data_0930_merge_1_tmp/Segmentation/'
folder = 'data_0930_merge_1_tmp/masks/'
path_file_text = extract_all_path(path, 'txt')
for path_list in path_file_text:
    save_path = 'data/data_0930_merge_1/' + (str(path_list).split('/')[-1]).split('.')[0]
    path_file_csv = save_path + '.csv'
    file_name = save_path + '.png'
    df_info_inmage = label(path_list, folder, path_file_csv, file_name)
    print(df_info_inmage)
示例#4
0
import os
import pandas as pd
from extract_information_image import extract_all_path
path = []
path_file = extract_all_path('crawl_data/pngimg.com1/', 'png')
list_str = ['_', '.']
# list_str = ['_PNG_', '_Transparent', '_Background', '_Download']
# for pt in path_file:
#     path.append((pt.split('/'))[-1].split('_')[0])
for pt in path_file:
    for x in list_str:
        if x in pt:
            pa = (pt.split('/')[-1]).split(x)[0]
            pt = pa
    path.append(pa)

df = pd.DataFrame()
df['Path'] = path_file
df['Category'] = path
df.to_csv('crawl_data/path_file_pngimg.csv', index=False)