Пример #1
0
    def read_paths_and_labels(self, labels_file):

        paths = []
        labels = []

        try:
            with open(labels_file, 'r') as file:
                for line in file:
                    line_split = line.split(',')
                    paths.append(
                        os.path.join(
                            self.dirdata,
                            tools.adapt_path_to_current_os(line_split[0])))
                    labels.append(int(line_split[1]))
        except FileNotFoundError as ex:
            logging.error('File ' + labels_file + ' does not exist.')
            logging.error(str(ex))
            raise

        # Remove data or shuffle:
        if self.percent_of_data != 100:
            # Remove data:
            indexes = np.random.choice(np.arange(len(labels)),
                                       int(self.percent_of_data / 100.0 *
                                           len(labels)),
                                       replace=False)
        else:
            # Shuffle data at least:
            indexes = np.arange(len(labels))
            if self.shuffle_data:
                np.random.shuffle(indexes)

        aux_paths = paths
        aux_labels = labels
        paths = []
        labels = []

        for i in range(len(indexes)):
            paths.append(aux_paths[indexes[i]])
            labels.append(aux_labels[indexes[i]])

        # Remove the remaining examples that do not fit in a batch.
        if len(paths) % self.batch_size != 0:

            aux_paths = paths
            aux_labels = labels
            paths = []
            labels = []

            for i in range(
                    len(aux_paths) - (len(aux_paths) % self.batch_size)):
                paths.append(aux_paths[i])
                labels.append(aux_labels[i])

        assert len(
            paths
        ) % self.batch_size == 0, 'Number of images is not a multiple of batch size'

        return paths, labels
Пример #2
0
 def get_filenames(self, split):
     if split != 'train' and split != 'val':
         raise Exception('Split name not recognized.')
     list_file = os.path.join(self.dirdata, split + '_files.txt')
     try:
         with open(list_file, 'r') as fid:
             filenamesnoext = fid.read().splitlines()
         for i in range(len(filenamesnoext)):
             filenamesnoext[i] = tools.adapt_path_to_current_os(
                 filenamesnoext[i])
     except FileNotFoundError as ex:
         logging.error('File ' + list_file + ' does not exist.')
         logging.error(str(ex))
         raise
     # Remove data or shuffle:
     if self.opts.percent_of_data != 100:
         # Remove data:
         indexes = np.random.choice(np.arange(len(filenamesnoext)),
                                    int(self.opts.percent_of_data / 100.0 *
                                        len(filenamesnoext)),
                                    replace=False)
     else:
         # Shuffle data at least:
         indexes = np.arange(len(filenamesnoext))
         if self.opts.shuffle_data:
             np.random.shuffle(indexes)
     aux = filenamesnoext
     filenamesnoext = []
     for i in range(len(indexes)):
         filenamesnoext.append(aux[indexes[i]])
     # Remove the remaining examples that do not fit in a batch.
     if len(filenamesnoext) % self.batch_size != 0:
         aux = filenamesnoext
         filenamesnoext = []
         for i in range(len(aux) - (len(aux) % self.batch_size)):
             filenamesnoext.append(aux[i])
     assert len(
         filenamesnoext
     ) % self.batch_size == 0, 'Number of images is not a multiple of batch size'
     return filenamesnoext
Пример #3
0
def read_paths_and_labels(labels_file, dirdata):
    paths = []
    labels = []
    try:
        with open(labels_file, 'r') as file:
            for line in file:
                line_split = line.split(',')
                paths.append(
                    os.path.join(dirdata,
                                 tools.adapt_path_to_current_os(
                                     line_split[0])))
                labels.append(int(line_split[1]))
    except FileNotFoundError as ex:
        print('File ' + labels_file + ' does not exist.')
        print(str(ex))
        raise
    # Shuffle data:
    indexes = np.arange(len(labels))
    np.random.shuffle(indexes)
    aux_paths = paths
    aux_labels = labels
    paths = []
    labels = []
    for i in range(len(indexes)):
        paths.append(aux_paths[indexes[i]])
        labels.append(aux_labels[indexes[i]])
    # Remove the remaining examples that do not fit in a batch.
    if len(paths) % batch_size != 0:
        aux_paths = paths
        aux_labels = labels
        paths = []
        labels = []
        for i in range(len(aux_paths) - (len(aux_paths) % batch_size)):
            paths.append(aux_paths[i])
            labels.append(aux_labels[i])
    assert len(
        paths
    ) % batch_size == 0, 'Number of images is not a multiple of batch size'
    return paths, labels