def main(self): if self.args.names_only: self.print_column_names() return rows = agate.reader(self.input_file, **self.reader_kwargs) if self.args.no_header_row: row = next(rows) column_names = make_default_headers(len(row)) # Put the row back on top rows = itertools.chain([row], rows) else: column_names = next(rows) column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based, self.args.not_columns) output = agate.writer(self.output_file, **self.writer_kwargs) output.writerow([column_names[c] for c in column_ids]) for row in rows: out_row = [row[c] if c < len(row) else None for c in column_ids] if self.args.delete_empty: if ''.join(out_row) == '': continue output.writerow(out_row)
def main(self): self.input_files = [] for path in self.args.input_paths: self.input_files.append(self._open_input_file(path)) if not self.input_files: self.argparser.error('You must specify at least one file to stack.') if self.args.group_by_filenames: groups = [os.path.split(f.name)[1] for f in self.input_files] elif self.args.groups: groups = self.args.groups.split(',') if len(groups) != len(self.input_files): self.argparser.error('The number of grouping values must be equal to the number of CSV files being stacked.') else: groups = None group_name = self.args.group_name if self.args.group_name else 'group' output = agate.writer(self.output_file, **self.writer_kwargs) for i, f in enumerate(self.input_files): rows = agate.reader(f, **self.reader_kwargs) # If we have header rows, use them if not self.args.no_header_row: headers = next(rows, []) if i == 0: if groups: headers.insert(0, group_name) output.writerow(headers) # If we don't generate simple column names based on first row else: row = next(rows, []) headers = make_default_headers(len(row)) if i == 0: if groups: headers.insert(0, group_name) output.writerow(headers) if groups: row.insert(0, groups[i]) output.writerow(row) for row in rows: if groups: row.insert(0, groups[i]) output.writerow(row) f.close()
def main(self): if self.args.names_only: self.print_column_names() return if not self.args.columns: self.argparser.error('You must specify at least one column to search using the -c option.') if self.args.regex is None and self.args.pattern is None and self.args.matchfile is None: self.argparser.error('One of -r, -m or -f must be specified, unless using the -n option.') rows = agate.reader(self.input_file, **self.reader_kwargs) if self.args.no_header_row: row = next(rows) column_names = make_default_headers(len(row)) # Put the row back on top rows = itertools.chain([row], rows) else: column_names = next(rows) column_ids = parse_column_identifiers(self.args.columns, column_names, self.args.zero_based) if self.args.regex: pattern = re.compile(self.args.regex) elif self.args.matchfile: lines = set(line.rstrip() for line in self.args.matchfile) pattern = lambda x: x in lines else: pattern = self.args.pattern patterns = dict((c, pattern) for c in column_ids) output = agate.writer(self.output_file, **self.writer_kwargs) output.writerow(column_names) filter_reader = FilteringCSVReader(rows, header=False, patterns=patterns, inverse=self.args.inverse) for row in filter_reader: output.writerow(row)