def to_table_data(self): if typepy.is_empty_sequence(self._loader.headers): headers = self._source_data[0] if any([typepy.is_null_string(header) for header in headers]): raise DataError("the first line includes empty string item." "all of the items should contain header name." "actual={}".format(headers)) data_matrix = self._source_data[1:] else: headers = self._loader.headers data_matrix = self._source_data if not data_matrix: raise DataError("data row must be greater or equal than one") self._loader.inc_table_count() yield TableData( self._loader.make_table_name(), headers, data_matrix, dp_extractor=self._loader.dp_extractor, type_hints=self._extract_type_hints(headers), )
def _to_data_matrix(self): from collections import OrderedDict data_matrix = [] for row_idx, row in enumerate(self._ltsv_input_stream): if typepy.is_empty_sequence(row): continue ltsv_record = OrderedDict() for col_idx, ltsv_item in enumerate(row.strip().split("\t")): try: label, value = ltsv_item.split(":") except ValueError: raise DataError( "invalid lstv item found: line={}, col={}, item='{}'". format(row_idx, col_idx, ltsv_item)) label = label.strip('"') try: pv.validate_ltsv_label(label) except (pv.NullNameError, pv.InvalidCharError): raise InvalidHeaderNameError( "invalid label found (acceptable chars are [0-9A-Za-z_.-]): " "line={}, col={}, label='{}'".format( row_idx, col_idx, label)) ltsv_record[label] = value data_matrix.append(ltsv_record) # using generator to prepare for future enhancement to support # iterative load. yield data_matrix
def _to_data_matrix(self): try: return [[self.__modify_item(data) for data in row] for row in self._csv_reader if typepy.is_not_empty_sequence(row)] except (csv.Error, UnicodeDecodeError) as e: raise DataError(e)
def _get_start_row_idx(self): for row_idx in range(self.start_row, self._row_count): if self.__is_header_row(row_idx): break else: raise DataError("header row not found") return row_idx
def _validate_source_data(self): if not self._source_data: raise DataError("source data is empty")
def validate(self): if typepy.is_null_string(self.source): raise DataError("data source is empty")