Python FilteringCSVReader示例，csvkit.grep.FilteringCSVReader Python示例

示例#1

0

显示文件

文件： data.py 项目： ghing/hoops-data

def load_park_district_courts(csv_file):
    rows = CSVKitReader(csv_file)
    patterns = {
        'FACILITY NAME': 'BASKETBALL',
    }
    # HACK: More human-readable indexes into fields in the rows of
    # the CSV file.  Maybe there's a DictReader subclass that let's one
    # index by column name, or a method of ``CSVKitReader`` that
    # resolves column indexes from their names
    name_index = 0
    number_index = 1 
    facility_name_index = 2
    facility_type_index = 3
    location_index = 6

    filter_reader = FilteringCSVReader(rows, patterns=patterns, header=True)
    filter_reader.next() # Skip header

    for row in filter_reader:
        lat, lng = split_location(row[location_index])
        court = ParkDistrictCourt(
            name=row[name_index],
            point=[lng, lat],
            official_name=row[name_index],
            park_num=row[number_index],
            facility_name=row[facility_name_index],
            facility_type=row[facility_type_index],
        )
        court.save()

示例#2

0

显示文件

 def test_mixed_indices_and_column_names_in_patterns(self):
     fcr = FilteringCSVReader(iter(self.tab2),patterns = {'age': 'only', 0: '2'})
     self.assertEqual(self.tab2[0],fcr.next())
     self.assertEqual(self.tab2[4],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#3

0

显示文件

文件： test_grep.py 项目： binarydud/csvkit

 def test_no_header(self):
     fcr = FilteringCSVReader(iter(self.tab1),patterns={ 2: 'only' },header=False)
     self.assertEqual(self.tab1[2],fcr.next())
     self.assertEqual(self.tab1[3],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#4

0

显示文件

 def test_no_header(self):
     fcr = FilteringCSVReader(iter(self.tab1),patterns={ 2: 'only' },header=False)
     self.assertEqual(self.tab1[2],fcr.next())
     self.assertEqual(self.tab1[3],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#5

0

显示文件

文件： test_grep.py 项目： binarydud/csvkit

 def test_inverse(self):
     fcr = FilteringCSVReader(iter(self.tab2),patterns = ['1'], inverse=True)
     self.assertEqual(self.tab2[0],fcr.next())
     self.assertEqual(self.tab2[2],fcr.next())
     self.assertEqual(self.tab2[4],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#6

0

显示文件

文件： test_grep.py 项目： binarydud/csvkit

 def test_pattern(self):
     fcr = FilteringCSVReader(iter(self.tab1),patterns=['1'])
     self.assertEqual(self.tab1[0],fcr.next())
     self.assertEqual(self.tab1[1],fcr.next())
     self.assertEqual(self.tab1[4],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#7

0

显示文件

文件： test_grep.py 项目： binarydud/csvkit

 def test_regex(self):
     pattern = re.compile(".*(Reader|Tribune).*")
     fcr = FilteringCSVReader(iter(self.tab1),patterns = { 1: pattern })
     
     self.assertEqual(self.tab1[0],fcr.next())
     self.assertEqual(self.tab1[1],fcr.next())
     self.assertEqual(self.tab1[3],fcr.next())
     self.assertEqual(self.tab1[4],fcr.next())
     try:
         fcr.next()
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#8

0

显示文件

文件： csvgrep.py 项目： higs4281/csvkit

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.regex and not self.args.pattern and not self.args.matchfile:
            self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)
        
        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = [line.rstrip() for line in self.args.matchfile]
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern
            
        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for i, row in enumerate(filter_reader):
            output.writerow(row)

示例#9

0

显示文件

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.columns:
            self.argparser.error('You must specify at least one column to search using the -c option.')

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.')

        reader_kwargs = self.reader_kwargs
        writer_kwargs = self.writer_kwargs
        if writer_kwargs.pop('line_numbers', False):
            reader_kwargs = {'line_numbers': True}

        rows, column_names, column_ids = self.get_rows_and_column_names_and_column_ids(**reader_kwargs)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((column_id, pattern) for column_id in column_ids)
        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        output = agate.csv.writer(self.output_file, **writer_kwargs)
        output.writerow(column_names)

        for row in filter_reader:
            output.writerow(row)

示例#10

0

显示文件

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if not self.args.columns:
            self.argparser.error('You must specify at least one column to search using the -c option.')

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.')

        rows = CSVKitReader(self.input_file, **self.reader_kwargs)
        column_names = next(rows)

        column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for row in filter_reader:
            output.writerow(row)

示例#11

0

显示文件

文件： test_grep.py 项目： wireservice/csvkit

 def test_index_out_of_range(self):
     fcr = FilteringCSVReader(iter(self.tab2), patterns={3: '0'})
     self.assertEqual(self.tab2[0], next(fcr))
     self.assertEqual(self.tab2[4], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#12

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

 def test_duplicate_column_ids_in_patterns(self):
     try:
         FilteringCSVReader(iter(self.tab2),
                            patterns={
                                'age': 'only',
                                1: 'second'
                            })
         self.fail("Should be an exception.")
     except ColumnIdentifierError:
         pass

示例#13

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

 def test_column_names_in_patterns(self):
     fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only'})
     self.assertEqual(self.tab2[0], next(fcr))
     self.assertEqual(self.tab2[2], next(fcr))
     self.assertEqual(self.tab2[4], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#14

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

 def test_inverse(self):
     fcr = FilteringCSVReader(iter(self.tab2), patterns=['1'], inverse=True)
     self.assertEqual(self.tab2[0], next(fcr))
     self.assertEqual(self.tab2[2], next(fcr))
     self.assertEqual(self.tab2[4], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#15

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

 def test_pattern(self):
     fcr = FilteringCSVReader(iter(self.tab1), patterns=['1'])
     self.assertEqual(self.tab1[0], next(fcr))
     self.assertEqual(self.tab1[1], next(fcr))
     self.assertEqual(self.tab1[4], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#16

0

显示文件

 def test_any_match_and_inverse(self):
     fcr = FilteringCSVReader(iter(self.tab2), patterns={'age': 'only', 0: '2'}, any_match=True, inverse=True)
     self.assertEqual(self.tab2[0], next(fcr))
     self.assertEqual(self.tab2[1], next(fcr))
     self.assertEqual(self.tab2[3], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#17

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

 def test_multiline(self):
     table = [['a', 'b'], ['1', 'foo\nbar']]
     fcr = FilteringCSVReader(iter(table),
                              patterns={'b': re.compile('bar')})
     self.assertEqual(table[0], next(fcr))
     self.assertEqual(table[1], next(fcr))
     try:
         next(fcr)
         self.fail("Should be no more rows left.")
     except StopIteration:
         pass

示例#18

0

显示文件

    def main(self):
        if self.args.names_only:
            self.print_column_names()
            return

        if self.additional_input_expected():
            sys.stderr.write(
                'No input file or piped data provided. Waiting for standard input:\n'
            )

        if not self.args.columns:
            self.argparser.error(
                'You must specify at least one column to search using the -c option.'
            )

        if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None:
            self.argparser.error(
                'One of -r, -m or -f must be specified, unless using the -n option.'
            )

        reader_kwargs = self.reader_kwargs
        writer_kwargs = self.writer_kwargs
        # Move the line_numbers option from the writer to the reader.
        if writer_kwargs.pop('line_numbers', False):
            reader_kwargs['line_numbers'] = True

        rows, column_names, column_ids = self.get_rows_and_column_names_and_column_ids(
            **reader_kwargs)

        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = set(line.rstrip() for line in self.args.matchfile)
            self.args.matchfile.close()

            def pattern(x):
                return x in lines
        else:
            pattern = self.args.pattern

        patterns = dict((column_id, pattern) for column_id in column_ids)
        filter_reader = FilteringCSVReader(rows,
                                           header=False,
                                           patterns=patterns,
                                           inverse=self.args.inverse,
                                           any_match=self.args.any_match)

        output = agate.csv.writer(self.output_file, **writer_kwargs)
        output.writerow(column_names)

        for row in filter_reader:
            output.writerow(row)

示例#19

0

显示文件

文件： test_grep.py 项目： jamshedmelik/csvkit

    def test_regex(self):
        pattern = re.compile(".*(Reader|Tribune).*")
        fcr = FilteringCSVReader(iter(self.tab1), patterns={1: pattern})

        self.assertEqual(self.tab1[0], next(fcr))
        self.assertEqual(self.tab1[1], next(fcr))
        self.assertEqual(self.tab1[3], next(fcr))
        self.assertEqual(self.tab1[4], next(fcr))
        try:
            next(fcr)
            self.fail("Should be no more rows left.")
        except StopIteration:
            pass

示例#20

0

显示文件

文件： csvrgrep.py 项目： dannguyen/csvkitcat

def filter_rows(
    rows: typeIterable,
    pattern_str: str,
    columns_str: str,
    column_names: list,
    default_column_ids: list,
    literal_match: bool,
    column_offset: int,
    inverse: bool,
    any_match: bool,
    # not_columns,
) -> FilteringCSVReader:

    if literal_match:
        pattern = pattern_str
    else:  # literal match
        pattern = re.compile(pattern_str)

    if columns_str:
        expr_col_ids = parse_column_identifiers(
            columns_str,
            column_names,
            column_offset,
        )
    else:
        expr_col_ids = default_column_ids

    epatterns = dict((eid, pattern) for eid in expr_col_ids)

    filtered_rows = FilteringCSVReader(
        rows,
        header=False,
        patterns=epatterns,
        inverse=inverse,
        any_match=any_match,
    )
    return filtered_rows