Пример #1
0
def get_image_classese_train_eval(data_path):
    """get image list when data struct is
    {
    |-- data_url
        |-- train
            |-- class_1
                |-- a.jpg
                ...
            |-- class_2
                |-- b.jpg
                ...
            ...
        |-- eval
            |-- class_1
                |-- c.jpg
                ...
            |-- class_2
                |-- d.jpg
    }
    :param data_path: data store url
    Returns:
      train_data_list,
      eval_data_list,
    """
    image_label_name = {}
    image_list_train = []
    label_index = 0
    class_name = []
    # get all labeled train data
    image_list_set = file.list_directory(os.path.join(data_path, 'train'))
    assert not image_list_set == [], 'there is no file in data url'
    for i in image_list_set:
        if file.is_directory(os.path.join(data_path, 'train', i)):
            img_list = file.list_directory(os.path.join(data_path, 'train', i))
            for j in img_list:
                label = label_index
                class_name.append(i)
                if not '.xml' in j and not '.txt' in j:
                    image_list_train.append([os.path.join(data_path, 'train', i, j), label])
            image_label_name[i] = label_index
            label_index += 1

    # get all labeled eval data
    image_list_eval = []
    image_list_set = file.list_directory(os.path.join(data_path, 'eval'))
    assert not image_list_set == [], 'there is no file in data url'
    for i in image_list_set:
        if file.is_directory(os.path.join(data_path, 'eval', i)):
            img_list = file.list_directory(os.path.join(data_path, 'eval', i))
            for j in img_list:
                label = image_label_name[i]
                if not '.xml' in j and not '.txt' in j:
                    image_list_eval.append([os.path.join(data_path, 'eval', i, j), label])

    return image_list_train, image_list_eval, class_name
Пример #2
0
def get_image_train_eval(data_path):
    """get image list when data struct is
    {
    |-- data_url
        |-- train
            |-- Images
                |-- a.jpg
                ...
            |-- Annotations
                |-- a.txt (or a.xml)
            |-- label_map_dict (optional)
        |-- eval
            |-- Images
                |-- b.jpg
                ...
            |-- Annotations
                |-- b.txt (or b.xml)
                ...
            |-- label_map_dict (optional)
        |-- label_map_dict (optional)
    }
    :param data_path: data store url
    Returns:
      train_data_list,
      eval_data_list,
    """
    image_list_train = []
    # get all labeled train data
    image_list_set = file.list_directory(os.path.join(data_path, 'train', 'Images'))
    assert not image_list_set == [], 'there is no file in data url'
    for i in image_list_set:
        if file.exists(os.path.join(data_path, 'train', 'Annotations', os.path.splitext(i)[0] + '.xml')):
            image_list_train.append([os.path.join(data_path, 'train', 'Images', i),
                                     os.path.join(data_path, 'train', 'Annotations', os.path.splitext(i)[0] + '.xml')])
        elif file.exists(os.path.join(data_path, 'train', 'Annotations', os.path.splitext(i)[0] + '.txt')):
            image_list_train.append([os.path.join(data_path, 'train', 'Images', i),
                                     file.read(os.path.join(data_path, 'train',
                                                            'Annotations',
                                                            os.path.splitext(i)[0] + '.txt'))])
    # get all labeled eval data
    image_list_eval = []
    image_list_set = []
    image_list_set = file.list_directory(os.path.join(data_path, 'eval', 'Images'))
    assert not image_list_set == [], 'there is no file in data url'
    for i in image_list_set:
        if file.exists(os.path.join(data_path, 'eval', 'Annotations', os.path.splitext(i)[0] + '.xml')):
            image_list_eval.append([os.path.join(data_path, 'eval', 'Images', i),
                                    os.path.join(data_path, 'eval', 'Annotations', os.path.splitext(i)[0] + '.xml')])
        elif file.exists(os.path.join(data_path, 'eval', 'Annotations', os.path.splitext(i)[0] + '.txt')):
            image_list_eval.append([os.path.join(data_path, 'eval', 'Images', i),
                                    file.read(os.path.join(data_path, 'eval',
                                                           'Annotations',
                                                           os.path.splitext(i)[0] + '.txt'))])

    return image_list_train, image_list_eval
Пример #3
0
def get_image_classese_raw(data_path, split_spec):
    """get image list when data struct is
    {
    |-- data_url
        |-- class_1
            |-- a.jpg
            |-- b.jpg
        |-- class_2
            |-- c.jpg
            |-- d.jpg
            ...
        |-- label_map_dict (optional)
    }
    :param data_path: data store url
    Returns:
      train_data_list,
      eval_data_list,
    """
    image_set = []
    class_name = []
    # get all labeled train data
    image_list_set = file.list_directory(data_path)
    for i in image_list_set:
        if not file.is_directory(os.path.join(data_path, i)):
            image_list_set.remove(i)
    assert not image_list_set == [], 'there is no file in data url'
    label_index = 0
    for i in image_list_set:
        if file.is_directory(os.path.join(data_path, i)):
            img_list = file.list_directory(os.path.join(data_path, i))
            for j in img_list:
                label = label_index
                class_name.append(i)
                if not '.xml' in j and not '.txt' in j:
                    image_set.append([os.path.join(data_path, i, j), label])
            label_index += 1

    # split to train and eval
    image_list_train = []
    image_list_eval = []
    start_examples = 0
    for i in image_list_set:
        image_list_set = file.list_directory(os.path.join(data_path, i))
        num_examples = len(image_list_set)
        train_num = int(num_examples * split_spec)
        shuffle_list = list(range(start_examples, start_examples + num_examples))
        random.shuffle(shuffle_list)
        for idx, item in enumerate(shuffle_list):
            if idx < train_num:
                image_list_train.append(image_set[item])
            else:
                image_list_eval.append(image_set[item])
        start_examples += num_examples
    return image_list_train, image_list_eval, class_name
Пример #4
0
def get_image_list(data_path, split_spec):
    """get image list
    [[image_path, label_path]]
    :param data_path: data store url
    :param split_spec: split train percent if data doesn't have evaluation data
    Returns:
        train_data_list,
        eval_data_list,
    """
    image_list_train = []
    image_list_eval = []
    class_name = None
    file_list = file.list_directory(data_path)
    donot_have_directory = True
    if 'cache' in file_list:
        file_list.remove('cache')
    for i in file_list:
        if file.is_directory(os.path.join(data_path, i)):
            donot_have_directory = False
            break
    if 'Images' and 'Annotations' in file_list:
        image_list_train, image_list_eval, class_name = \
            get_image_images_annotation(data_path, split_spec)
    elif 'train' and 'eval' in file_list:
        file_list = file.list_directory(os.path.join(data_path, 'train'))
        is_raw = True
        if 'cache' in file_list:
            file_list.remove('cache')
        for i in file_list:
            if file.is_directory(os.path.join(data_path, 'train', i)):
                is_raw = False
                break
        if 'Images' and 'Annotations' in file_list:
            image_list_train, image_list_eval = get_image_train_eval(data_path)
        elif 'image_to_annotation.csv' in file_list:
            image_list_train, image_list_eval = get_image_csv(data_path)
        elif is_raw:
            image_list_train, image_list_eval = \
                get_image_train_eval_raw(data_path)
        else:
            image_list_train, image_list_eval, class_name = \
                get_image_classese_train_eval(data_path)

    elif donot_have_directory:
        image_list_train, image_list_eval, class_name = get_image_raw_txt(data_path, split_spec)
    else:
        image_list_train, image_list_eval, class_name = get_image_classese_raw(data_path, split_spec)
    return image_list_train, image_list_eval, class_name
Пример #5
0
def get_image_images_annotation(data_path, split_spec):
    """get image list when data struct is
   {
   |-- data_url
       |-- Images
           |-- a.jpg
           |-- b.jpg
           ...
       |-- Annotations
           |-- a.txt (or a.xml)
           |-- b.txt (or b.xml)
           ...
       |-- label_map_dict (optional)
   }
   :param data_path: data store url
   :param split_spec: split train percent if data doesn't have evaluation data
   Returns:
       train_data_list,
       eval_data_list,
   """
    image_set = []
    label_dict = {}
    label_num = 0
    class_name = []
    # get all labeled data
    image_list_set = file.list_directory(os.path.join(data_path, 'Images'))
    assert not image_list_set == [], 'there is no file in data url'
    for i in image_list_set:
        if file.exists(os.path.join(data_path, 'Annotations', os.path.splitext(i)[0] + '.xml')):
            image_set.append([os.path.join(data_path, 'Images', i),
                              os.path.join(data_path, 'Annotations', os.path.splitext(i)[0] + '.xml')])
        elif file.exists(os.path.join(data_path, 'Annotations', os.path.splitext(i)[0] + '.txt')):
            label_name = file.read(os.path.join(data_path, 'Annotations',
                                                os.path.splitext(i)[0] + '.txt'))
            if label_name not in label_dict.keys():
                label_dict[label_name] = label_num
                class_name.append(label_name)
                label_num = label_num + 1
            image_set.append([os.path.join(data_path, 'Images', i),
                             label_dict[label_name]])

    # split data to train and eval
    num_examples = len(image_set)
    train_num = int(num_examples * split_spec)
    shuffle_list = list(range(num_examples))
    random.shuffle(shuffle_list)
    image_list_train = []
    image_list_eval = []
    for idx, item in enumerate(shuffle_list):
        if idx < train_num:
            image_list_train.append(image_set[item])
        else:
            image_list_eval.append(image_set[item])
    return image_list_train, image_list_eval, class_name
Пример #6
0
def get_image_csv(data_path):
    file_list = file.list_directory(os.path.join(data_path, 'train'))
    image_list_train = []
    image_list_eval = []
    for i in file_list:
        if '.txt' in file.list_directory(os.path.join(data_path, 'train', i)):
            image_list_train = os.path.join(data_path, 'train', 'image_to_annotation.csv')
            image_list_eval = os.path.join(data_path, 'eval', 'image_to_annotation.csv')
            break
        elif '.xml' in file.list_directory(os.path.join(data_path, 'train', i)):
            with file.File(os.path.join(data_path, 'train', 'image_to_annotation.csv'), 'r') as f:
                for line in f.readlines()[1:]:
                    image_path, image_label = line.strip().split(',')
                    image_list_train.append([os.path.join(data_path, image_path),
                                             os.path.join(data_path, image_label)])
            with file.File(os.path.join(data_path, 'eval', 'image_to_annotation.csv'), 'r') as f:
                for line in f.readlines()[1:]:
                    image_path, image_label = line.strip().split(',')
                    image_list_eval.append([os.path.join(data_path, image_path),
                                             os.path.join(data_path, image_label)])
            break
    return image_list_train, image_list_eval