示例#1
0
    def __init__(self, data_dir, split, headers=True, columns=None, **kwargs):
        """Initializes the reader, allowing to override internal settings.

        Arguments:
            data_dir: Path to base directory where all the files are
                located. See class docstring for a description on the expected
                structure.
            split: Split to read. Possible values depend on the dataset itself.
            headers (boolean): Whether the CSV file has headers indicating
                field names, in which case those will be considered.
            columns (list or str): Column names for when `headers` is `False`
                (i.e. the CSV file has no headers). Will be ignored if
                `headers` is `True`.
        """
        super(CSVReader, self).__init__(**kwargs)

        self._data_dir = data_dir
        self._split = split

        self._annotations_path = os.path.join(
            self._data_dir, '{}.csv'.format(self._split)
        )
        if not tf.gfile.Exists(self._annotations_path):
            raise InvalidDataDirectory(
                'CSV annotation file not found. Should be located at '
                '`{}`'.format(self._annotations_path)
            )

        self._images_dir = os.path.join(self._data_dir, self._split)
        if not tf.gfile.Exists(self._images_dir):
            raise InvalidDataDirectory(
                'Image directory not found. Should be located at '
                '`{}`'.format(self._images_dir)
            )

        if columns is not None:
            if is_basestring(columns):
                columns = columns.split(',')
        else:
            columns = self.DEFAULT_COLUMNS
        self._columns = columns
        self._column_names = set(self._columns)

        self._has_headers = headers

        # Cache for the records.
        # TODO: Don't read it all upfront.
        self._records = None

        # Whether the structure of the CSV file has been checked already.
        self._csv_checked = False

        self.errors = 0
        self.yielded_records = 0
示例#2
0
    def __init__(self, data_dir, split, columns=DEFAULT_COLUMNS,
                 field_mapper=FIELD_MAPPER, with_header=False, **kwargs):
        super(CSVReader, self).__init__(**kwargs)
        self._data_dir = data_dir
        self._split = split
        self._labels_filename = self._get_labels_filename()

        if is_basestring(columns):
            columns = columns.split(',')
        self._columns = columns
        self._field_mapper = field_mapper
        self._with_header = with_header

        self._files = None

        self.errors = 0
        self.yielded_records = 0
示例#3
0
    def __init__(self,
                 data_dir,
                 split,
                 columns=DEFAULT_COLUMNS,
                 field_mapper=FIELD_MAPPER,
                 with_header=False,
                 **kwargs):
        super(CSVReader, self).__init__(**kwargs)
        self._data_dir = data_dir
        self._split = split
        self._labels_filename = self._get_labels_filename()

        if is_basestring(columns):
            columns = columns.split(',')
        self._columns = columns
        self._field_mapper = field_mapper
        self._with_header = with_header

        self._files = None

        self.errors = 0
        self.yielded_records = 0