def load_park_district_courts(csv_file): rows = CSVKitReader(csv_file) patterns = { 'FACILITY NAME': 'BASKETBALL', } # HACK: More human-readable indexes into fields in the rows of # the CSV file. Maybe there's a DictReader subclass that let's one # index by column name, or a method of ``CSVKitReader`` that # resolves column indexes from their names name_index = 0 number_index = 1 facility_name_index = 2 facility_type_index = 3 location_index = 6 filter_reader = FilteringCSVReader(rows, patterns=patterns, header=True) filter_reader.next() # Skip header for row in filter_reader: lat, lng = split_location(row[location_index]) court = ParkDistrictCourt( name=row[name_index], point=[lng, lat], official_name=row[name_index], park_num=row[number_index], facility_name=row[facility_name_index], facility_type=row[facility_type_index], ) court.save()
def test_mixed_indices_and_column_names_in_patterns(self): fcr = FilteringCSVReader(iter(self.tab2),patterns = {'age': 'only', 0: '2'}) self.assertEqual(self.tab2[0],fcr.next()) self.assertEqual(self.tab2[4],fcr.next()) try: fcr.next() self.fail("Should be no more rows left.") except StopIteration: pass
def test_no_header(self): fcr = FilteringCSVReader(iter(self.tab1),patterns={ 2: 'only' },header=False) self.assertEqual(self.tab1[2],fcr.next()) self.assertEqual(self.tab1[3],fcr.next()) try: fcr.next() self.fail("Should be no more rows left.") except StopIteration: pass
def test_inverse(self): fcr = FilteringCSVReader(iter(self.tab2),patterns = ['1'], inverse=True) self.assertEqual(self.tab2[0],fcr.next()) self.assertEqual(self.tab2[2],fcr.next()) self.assertEqual(self.tab2[4],fcr.next()) try: fcr.next() self.fail("Should be no more rows left.") except StopIteration: pass
def test_pattern(self): fcr = FilteringCSVReader(iter(self.tab1),patterns=['1']) self.assertEqual(self.tab1[0],fcr.next()) self.assertEqual(self.tab1[1],fcr.next()) self.assertEqual(self.tab1[4],fcr.next()) try: fcr.next() self.fail("Should be no more rows left.") except StopIteration: pass
def test_regex(self): pattern = re.compile(".*(Reader|Tribune).*") fcr = FilteringCSVReader(iter(self.tab1),patterns = { 1: pattern }) self.assertEqual(self.tab1[0],fcr.next()) self.assertEqual(self.tab1[1],fcr.next()) self.assertEqual(self.tab1[3],fcr.next()) self.assertEqual(self.tab1[4],fcr.next()) try: fcr.next() self.fail("Should be no more rows left.") except StopIteration: pass
def main(self): if self.args.names_only: self.print_column_names() return if not self.args.regex and not self.args.pattern and not self.args.matchfile: self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.") rows = CSVKitReader(self.args.file, **self.reader_kwargs) column_names = rows.next() column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = [line.rstrip() for line in self.args.matchfile] pattern = lambda x: x in lines else: pattern = self.args.pattern patterns = dict((c, pattern) for c in column_ids) output = CSVKitWriter(self.output_file, **self.writer_kwargs) output.writerow(column_names) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse) for i, row in enumerate(filter_reader): output.writerow(row)
def main(self): if self.args.names_only: self.print_column_names() return if not self.args.columns: self.argparser.error('You must specify at least one column to search using the -c option.') if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None: self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.') reader_kwargs = self.reader_kwargs writer_kwargs = self.writer_kwargs if writer_kwargs.pop('line_numbers', False): reader_kwargs = {'line_numbers': True} rows, column_names, column_ids = self.get_rows_and_column_names_and_column_ids(**reader_kwargs) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = set(line.rstrip() for line in self.args.matchfile) pattern = lambda x: x in lines else: pattern = self.args.pattern patterns = dict((column_id, pattern) for column_id in column_ids) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse) output = agate.csv.writer(self.output_file, **writer_kwargs) output.writerow(column_names) for row in filter_reader: output.writerow(row)
def main(self): if self.args.names_only: self.print_column_names() return if not self.args.columns: self.argparser.error('You must specify at least one column to search using the -c option.') if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None: self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.') rows = CSVKitReader(self.input_file, **self.reader_kwargs) column_names = next(rows) column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = set(line.rstrip() for line in self.args.matchfile) pattern = lambda x: x in lines else: pattern = self.args.pattern patterns = dict((c, pattern) for c in column_ids) output = CSVKitWriter(self.output_file, **self.writer_kwargs) output.writerow(column_names) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse) for row in filter_reader: output.writerow(row)
def test_index_out_of_range(self): fcr = FilteringCSVReader(iter(self.tab2), patterns={3: '0'}) self.assertEqual(self.tab2[0], next(fcr)) self.assertEqual(self.tab2[4], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def test_duplicate_column_ids_in_patterns(self): try: FilteringCSVReader(iter(self.tab2), patterns={ 'age': 'only', 1: 'second' }) self.fail("Should be an exception.") except ColumnIdentifierError: pass
def test_column_names_in_patterns(self): fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only'}) self.assertEqual(self.tab2[0], next(fcr)) self.assertEqual(self.tab2[2], next(fcr)) self.assertEqual(self.tab2[4], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def test_inverse(self): fcr = FilteringCSVReader(iter(self.tab2), patterns=['1'], inverse=True) self.assertEqual(self.tab2[0], next(fcr)) self.assertEqual(self.tab2[2], next(fcr)) self.assertEqual(self.tab2[4], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def test_pattern(self): fcr = FilteringCSVReader(iter(self.tab1), patterns=['1']) self.assertEqual(self.tab1[0], next(fcr)) self.assertEqual(self.tab1[1], next(fcr)) self.assertEqual(self.tab1[4], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def test_any_match_and_inverse(self): fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 0: '2'}, any_match=True, inverse=True) self.assertEqual(self.tab2[0], next(fcr)) self.assertEqual(self.tab2[1], next(fcr)) self.assertEqual(self.tab2[3], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def test_multiline(self): table = [['a', 'b'], ['1', 'foo\nbar']] fcr = FilteringCSVReader(iter(table), patterns={'b': re.compile('bar')}) self.assertEqual(table[0], next(fcr)) self.assertEqual(table[1], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def main(self): if self.args.names_only: self.print_column_names() return if self.additional_input_expected(): sys.stderr.write( 'No input file or piped data provided. Waiting for standard input:\n' ) if not self.args.columns: self.argparser.error( 'You must specify at least one column to search using the -c option.' ) if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None: self.argparser.error( 'One of -r, -m or -f must be specified, unless using the -n option.' ) reader_kwargs = self.reader_kwargs writer_kwargs = self.writer_kwargs # Move the line_numbers option from the writer to the reader. if writer_kwargs.pop('line_numbers', False): reader_kwargs['line_numbers'] = True rows, column_names, column_ids = self.get_rows_and_column_names_and_column_ids( **reader_kwargs) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = set(line.rstrip() for line in self.args.matchfile) self.args.matchfile.close() def pattern(x): return x in lines else: pattern = self.args.pattern patterns = dict((column_id, pattern) for column_id in column_ids) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse, any_match=self.args.any_match) output = agate.csv.writer(self.output_file, **writer_kwargs) output.writerow(column_names) for row in filter_reader: output.writerow(row)
def test_regex(self): pattern = re.compile(".*(Reader|Tribune).*") fcr = FilteringCSVReader(iter(self.tab1), patterns={1: pattern}) self.assertEqual(self.tab1[0], next(fcr)) self.assertEqual(self.tab1[1], next(fcr)) self.assertEqual(self.tab1[3], next(fcr)) self.assertEqual(self.tab1[4], next(fcr)) try: next(fcr) self.fail("Should be no more rows left.") except StopIteration: pass
def filter_rows( rows: typeIterable, pattern_str: str, columns_str: str, column_names: list, default_column_ids: list, literal_match: bool, column_offset: int, inverse: bool, any_match: bool, # not_columns, ) -> FilteringCSVReader: if literal_match: pattern = pattern_str else: # literal match pattern = re.compile(pattern_str) if columns_str: expr_col_ids = parse_column_identifiers( columns_str, column_names, column_offset, ) else: expr_col_ids = default_column_ids epatterns = dict((eid, pattern) for eid in expr_col_ids) filtered_rows = FilteringCSVReader( rows, header=False, patterns=epatterns, inverse=inverse, any_match=any_match, ) return filtered_rows