def export_to_html(table, filename_or_fobj=None, encoding='utf-8'): fields = table.fields.keys() result = ['<table>\n\n', ' <thead>\n', ' <tr>\n'] header = [' <th> {} </th>\n'.format(field) for field in fields] result.extend(header) result.extend([' </tr>\n', ' </thead>\n', '\n', ' <tbody>\n', '\n']) for index, row in enumerate(serialize(table, encoding=encoding), start=1): css_class = 'odd' if index % 2 == 1 else 'even' result.append(' <tr class="{}">\n'.format(css_class)) for value in row: result.extend([' <td> ', value, ' </td>\n']) result.append(' </tr>\n\n') result.append(' </tbody>\n\n</table>\n') new_result = [ value.encode(encoding) if isinstance(value, unicode) else value for value in result ] html = ''.encode(encoding).join(new_result) if filename_or_fobj is not None: filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='w') fobj.write(html) fobj.flush() return fobj else: return html
def export_to_txt(table, filename_or_fobj, encoding='utf-8', *args, **kwargs): # TODO: will work only if table.fields is OrderedDict # TODO: should use fobj? What about creating a method like json.dumps? filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='w') max_sizes = _max_column_sizes(table, encoding, *args, **kwargs) fields = table.fields.keys() dashes = [DASH * (max_sizes[field] + 2) for field in fields] header = [field.center(max_sizes[field]) for field in fields] header = '{} {} {}'.format(PIPE, ' {} '.format(PIPE).join(header), PIPE) split_line = PLUS + PLUS.join(dashes) + PLUS result = [split_line, header, split_line] for row in serialize(table): values = [ value.rjust(max_sizes[field_name]) for field_name, value in zip(fields, row) ] row_data = ' {} '.format(PIPE).join(values) result.append('{} {} {}'.format(PIPE, row_data, PIPE)) result.extend([split_line, '\n']) data = '\n'.join(result).encode(encoding) fobj.write(data) fobj.flush() return fobj
def export_to_html(table, filename_or_fobj=None, encoding='utf-8'): fields = table.fields.keys() result = ['<table>\n\n', ' <thead>\n', ' <tr>\n'] header = [' <th> {} </th>\n'.format(field) for field in fields] result.extend(header) result.extend([' </tr>\n', ' </thead>\n', '\n', ' <tbody>\n', '\n']) for index, row in enumerate(serialize(table, encoding=encoding), start=1): css_class = 'odd' if index % 2 == 1 else 'even' result.append(' <tr class="{}">\n'.format(css_class)) for value in row: result.extend([' <td> ', value, ' </td>\n']) result.append(' </tr>\n\n') result.append(' </tbody>\n\n</table>\n') new_result = [value.encode(encoding) if isinstance(value, unicode) else value for value in result] html = ''.encode(encoding).join(new_result) if filename_or_fobj is not None: filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='w') fobj.write(html) fobj.flush() return fobj else: return html
def export_to_txt(table, filename_or_fobj, encoding='utf-8', *args, **kwargs): # TODO: will work only if table.fields is OrderedDict # TODO: should use fobj? What about creating a method like json.dumps? filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='w') max_sizes = _max_column_sizes(table, encoding, *args, **kwargs) fields = table.fields.keys() dashes = [DASH * (max_sizes[field] + 2) for field in fields] header = [field.center(max_sizes[field]) for field in fields] header = '{} {} {}'.format(PIPE, ' {} '.format(PIPE).join(header), PIPE) split_line = PLUS + PLUS.join(dashes) + PLUS result = [split_line, header, split_line] for row in serialize(table): values = [value.rjust(max_sizes[field_name]) for field_name, value in zip(fields, row)] row_data = ' {} '.format(PIPE).join(values) result.append('{} {} {}'.format(PIPE, row_data, PIPE)) result.extend([split_line, '\n']) data = '\n'.join(result).encode(encoding) fobj.write(data) fobj.flush() return fobj
def import_from_html(filename_or_fobj, encoding='utf-8', index=0, ignore_colspan=True, preserve_html=False, row_tag='tr', column_tag='td|th', *args, **kwargs): # TODO: unescape before returning: html_parser.unescape(html) # TODO: lxml -> unicode? filename, fobj = get_filename_and_fobj(filename_or_fobj) kwargs['encoding'] = encoding html = fobj.read().decode(encoding) html_tree = document_fromstring(html) tables = html_tree.xpath('//table') table = tables[index] strip_tags(table, 'thead') strip_tags(table, 'tbody') row_elements = table.xpath(row_tag) if not preserve_html: table_rows = [[value_element.text_content().strip() for value_element in row.xpath(column_tag)] for row in row_elements] else: table_rows = [[_get_content(value_element) for value_element in row.xpath(column_tag)] for row in row_elements] max_columns = max(len(row) for row in table_rows) if ignore_colspan: table_rows = filter(lambda row: len(row) == max_columns, table_rows) meta = {'imported_from': 'html', 'filename': filename,} return create_table(table_rows, meta=meta, *args, **kwargs)
def import_from_csv(filename_or_fobj, encoding='utf-8', delimiter=',', quotechar='"', *args, **kwargs): 'Import data from a CSV file' filename, fobj = get_filename_and_fobj(filename_or_fobj) kwargs['encoding'] = encoding csv_reader = unicodecsv.reader(fobj, encoding=encoding, delimiter=str(delimiter), quotechar=str(quotechar)) meta = {'imported_from': 'csv', 'filename': filename,} return create_table(csv_reader, meta=meta, *args, **kwargs)
def export_to_csv(table, filename_or_fobj, encoding='utf-8'): # TODO: will work only if table.fields is OrderedDict # TODO: should use fobj? What about creating a method like json.dumps? filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='w') csv_writer = unicodecsv.writer(fobj, encoding=encoding) csv_writer.writerow(table.fields.keys()) for row in serialize(table, encoding=encoding): csv_writer.writerow(row) fobj.flush() return fobj
def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0, start_row=0, start_column=0, *args, **kwargs): filename, _ = get_filename_and_fobj(filename_or_fobj) book = xlrd.open_workbook(filename, formatting_info=True) if sheet_name is not None: sheet = book.sheet_by_name(sheet_name) else: sheet = book.sheet_by_index(sheet_index) # TODO: may re-use Excel data types # Get field names # TODO: may use sheet.col_values or even sheet.ncols column_count = 0 header = [] column_value = cell_value(sheet, start_row, start_column + column_count) while column_value: header.append(column_value) column_count += 1 column_value = cell_value(sheet, start_row, start_column + column_count) # Get sheet rows # TODO: may use sheel.col_slice or even sheet.nrows table_rows = [] row_count = 0 start_row += 1 cell_is_empty = False while not cell_is_empty: row = [ cell_value(sheet, start_row + row_count, start_column + column_index) for column_index in range(column_count) ] cell_is_empty = not any(row) if not cell_is_empty: table_rows.append(row) row_count += 1 meta = { 'imported_from': 'xls', 'filename': filename, } return create_table([header] + table_rows, meta=meta, *args, **kwargs)
def import_from_html(filename_or_fobj, encoding='utf-8', index=0, ignore_colspan=True, preserve_html=False, row_tag='tr', column_tag='td|th', *args, **kwargs): # TODO: unescape before returning: html_parser.unescape(html) # TODO: lxml -> unicode? filename, fobj = get_filename_and_fobj(filename_or_fobj) kwargs['encoding'] = encoding html = fobj.read().decode(encoding) html_tree = document_fromstring(html) tables = html_tree.xpath('//table') table = tables[index] strip_tags(table, 'thead') strip_tags(table, 'tbody') row_elements = table.xpath(row_tag) if not preserve_html: table_rows = [[ value_element.text_content().strip() for value_element in row.xpath(column_tag) ] for row in row_elements] else: table_rows = [[ _get_content(value_element) for value_element in row.xpath(column_tag) ] for row in row_elements] max_columns = max(len(row) for row in table_rows) if ignore_colspan: table_rows = filter(lambda row: len(row) == max_columns, table_rows) meta = { 'imported_from': 'html', 'filename': filename, } return create_table(table_rows, meta=meta, *args, **kwargs)
def import_from_xls(filename_or_fobj, sheet_name=None, sheet_index=0, start_row=0, start_column=0, *args, **kwargs): filename, _ = get_filename_and_fobj(filename_or_fobj) book = xlrd.open_workbook(filename, formatting_info=True) if sheet_name is not None: sheet = book.sheet_by_name(sheet_name) else: sheet = book.sheet_by_index(sheet_index) # TODO: may re-use Excel data types # Get field names # TODO: may use sheet.col_values or even sheet.ncols column_count = 0 header = [] column_value = cell_value(sheet, start_row, start_column + column_count) while column_value: header.append(column_value) column_count += 1 column_value = cell_value(sheet, start_row, start_column + column_count) # Get sheet rows # TODO: may use sheel.col_slice or even sheet.nrows table_rows = [] row_count = 0 start_row += 1 cell_is_empty = False while not cell_is_empty: row = [cell_value(sheet, start_row + row_count, start_column + column_index) for column_index in range(column_count)] cell_is_empty = not any(row) if not cell_is_empty: table_rows.append(row) row_count += 1 meta = {'imported_from': 'xls', 'filename': filename,} return create_table([header] + table_rows, meta=meta, *args, **kwargs)
def export_to_xls(table, filename_or_fobj, sheet_name='Sheet1'): filename, fobj = get_filename_and_fobj(filename_or_fobj, mode='wb') work_book = xlwt.Workbook() sheet = work_book.add_sheet(sheet_name) fields = [(index, field_name) for index, field_name in enumerate(table.fields)] for index, field_name in fields: sheet.write(0, index, field_name) for row_index, row in enumerate(table, start=1): for column_index, field_name in fields: value = getattr(row, field_name) field_type = table.fields[field_name] data = {} if field_type in FORMATTING_STYLES: data['style'] = FORMATTING_STYLES[field_type] sheet.write(row_index, column_index, value, **data) work_book.save(fobj) fobj.flush() return fobj