Пример #1
0
    def main(self):
        if self.args.names_only:
            print_column_names(self.args.file, self.output_file,
                               **self.reader_kwargs)
            return

        if self.args.file.name != '<stdin>':
            # Use filename as table name
            table_name = os.path.splitext(
                os.path.split(self.args.file.name)[1])[0]
        else:
            table_name = 'csvsql_table'

        tab = table.Table.from_csv(self.args.file,
                                   name=table_name,
                                   snifflimit=self.args.snifflimit,
                                   **self.reader_kwargs)
        column_ids = parse_column_identifiers(self.args.columns, tab.headers())

        rows = tab.to_rows(serialize_dates=True)
        rows.sort(key=lambda r: [r[c] for c in column_ids],
                  reverse=self.args.reverse)

        rows.insert(0, tab.headers())

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for row in rows:
            output.writerow(row)
Пример #2
0
    def main(self):
        if self.args.names_only:
            print_column_names(self.args.file, self.output_file, **self.reader_kwargs)
            return

        if not self.args.regex and not self.args.pattern and not self.args.matchfile:
            self.argparser.error("One of -r, -m or -f must be specified, unless using the -n option.")

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names)
        
        if self.args.regex:
            pattern = re.compile(self.args.regex)
        elif self.args.matchfile:
            lines = [line.rstrip() for line in self.args.matchfile]
            pattern = lambda x: x in lines
        else:
            pattern = self.args.pattern
            
        patterns = dict((c, pattern) for c in column_ids)

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)
        output.writerow(column_names)

        filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse)

        for i, row in enumerate(filter_reader):
            self.input_line_number = i + 1
            output.writerow(row)
Пример #3
0
    def main(self):
        if self.args.names_only:
            print_column_names(self.args.file, self.output_file, **self.reader_kwargs)
            return

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for i, row in enumerate(rows):
            self.input_line_number = i + 1
            out_row = [row[c] if c < len(row) else None for c in column_ids] 

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue
            
            output.writerow(out_row)
Пример #4
0
    def main(self):
        if self.args.names_only:
            print_column_names(self.args.file, self.output_file, **self.reader_kwargs)
            return

        if self.args.file.name != '<stdin>':
            # Use filename as table name
            table_name = os.path.splitext(os.path.split(self.args.file.name)[1])[0]
        else:
            table_name = 'csvsql_table'

        tab = table.Table.from_csv(self.args.file, name=table_name, snifflimit=self.args.snifflimit, **self.reader_kwargs)
        column_ids = parse_column_identifiers(self.args.columns, tab.headers())

        rows = tab.to_rows(serialize_dates=True) 
        rows.sort(key=lambda r: [r[c] for c in column_ids], reverse=self.args.reverse)
        
        rows.insert(0, tab.headers())

        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        for row in rows:
            output.writerow(row)
Пример #5
0
    def main(self):
        if self.args.names_only:
            print_column_names(self.args.file, self.output_file,
                               **self.reader_kwargs)
            return

        rows = CSVKitReader(self.args.file, **self.reader_kwargs)
        column_names = rows.next()

        column_ids = parse_column_identifiers(self.args.columns, column_names)
        output = CSVKitWriter(self.output_file, **self.writer_kwargs)

        output.writerow([column_names[c] for c in column_ids])

        for i, row in enumerate(rows):
            self.input_line_number = i + 1
            out_row = [row[c] if c < len(row) else None for c in column_ids]

            if self.args.delete_empty:
                if ''.join(out_row) == '':
                    continue

            output.writerow(out_row)