def write(self, r, c, label='', *args, **kwargs): self.sheet.write(r, c, label, *args, **kwargs) self.sheet.row(r).collapse = True bold = False if args: style = args[0] bold = str(style.font.bold) in ('1', 'true', 'True') width = int(arial10.fitwidth(label, bold)) if width > self.widths.get(c, 0): self.widths[c] = width self.sheet.col(c).width = width height = int(arial10.fitheight(label, bold)) if height > self.heights.get(r, 0): self.heights[r] = height self.sheet.row(r).height = height
def write(self, r, c, label='', *args, **kwargs): self.sheet.write(r, c, label, *args, **kwargs) self.sheet.row(r).collapse = True bold = False if args: style = args[0] bold = str(style.font.bold) in ('1', 'true', 'True') width = arial10.fitwidth(label, bold) if width > self.widths.get(c, 0): self.widths[c] = width self.sheet.col(c).width = width height = arial10.fitheight(label, bold) if height > self.heights.get(r, 0): self.heights[r] = height self.sheet.row(r).height = height
def write(self, r, c, label=u'', style=None): if not style: style = XFStyle() if isinstance(label, datetime.datetime): _saved_format = style.num_format_str style.num_format_str = 'dd/mm/yyyy hh:mm:ss' self.sheet.write(r, c, label, style) style.num_format_str = _saved_format elif isinstance(label, datetime.date): _saved_format = style.num_format_str style.num_format_str = 'dd/mm/yyyy' self.sheet.write(r, c, label, style) style.num_format_str = _saved_format # elif isinstance(label, (float, Decimal)): # _saved_format = style.num_format_str # style.num_format_str = '#,##0.00' # self.sheet.write(r, c, label, style) # style.num_format_str = _saved_format else: self.sheet.write(r, c, label, style) self.sheet.row(r).collapse = True unicode_label = unicode(label) bold = str(style.font.bold) in ('1', 'true', 'True') width = min(int(arial10.fitwidth(unicode_label, bold)), MAX_COLUMN_WIDTH) if width > self.widths.get(c, 0): self.widths[c] = width self.sheet.col(c).width = width height = int(arial10.fitheight(unicode_label, bold)) if height > self.heights.get(r, 0): self.heights[r] = height self.sheet.row(r).height = height
def buildexcel(file_dest): # prepare an empty workbook workbook = xlsxwriter.Workbook(file_dest, {'default_date_format': 'YYYY-mm-dd'}) header_row_format = workbook.add_format({ 'bg_color': '#C2C2D6', 'bottom': 3, 'top': 3, 'text_wrap': True, 'font_name': 'Arial', 'font_size': 10 }) data_row_format = workbook.add_format({ 'font_name': 'Arial', 'font_size': 10 }) # work on each URL, as one new worksheet for url, csvy in webscraping(): print(url) lines = csvy.split("\n") # Extract and parse the CSVY metadata part fields_def_yaml = [ ln[1:] for ln in itertools.takewhile(lambda s: s[0] == '#', lines) ] if not fields_def_yaml: # the metadata is not in comment, look for limiters instead fields_def_yaml = [ ln for ln in itertools.takewhile( lambda s: not s.startswith('---'), lines[1:]) ] else: fields_def_yaml = fields_def_yaml[1:-1] fields_def = yaml.load("\n".join(fields_def_yaml)) assert ('fields' in fields_def) assert (all('name' in f for f in fields_def['fields'])) fields = {f['name']: f for f in fields_def['fields']} for name in fields: if 'labels' in fields[name]: labeltext = "\n".join([ str(int(x2)) + " : " + str(x1) for x1, x2 in sorted(fields[name]['labels'].items(), key=lambda x: x[1]) ]) fields[name]['labels'] = labeltext # Extract and parse the CSVY data part csvdata = list( csv.reader([ln + '\n' for ln in lines[len(fields_def_yaml) + 2:]])) # Create worksheet and set title worksheet = workbook.add_worksheet( deduce_name(url)) # new sheet at end # Header rows: label, labels, name header = csvdata[0] if len(header) < len(csvdata[1]): # in case header row is shorter, align to right header = [None] * (len(csvdata[1]) - len(header)) + header option = [fields.get(h, {}).get('labels', ' ') for h in header] label = [ breaklabel(fields.get(h, {}).get('label', ' ')) for h in header ] worksheet.write_row(0, 0, header, header_row_format) worksheet.write_row(1, 0, option, header_row_format) worksheet.write_row(2, 0, label, header_row_format) # Data rows for j, row in enumerate(csvdata[1:]): for i, col in enumerate(row): if header[i] == 'date': row[i] = datetime.datetime.strptime(col, "%Y%m%d").date() elif col.isdigit(): row[i] = int(col) else: try: row[i] = float(col) except: row[i] = None if col == 'NA' else col worksheet.write_row(3 + j, 0, row, data_row_format) # Add autofilter ignore_col = ['weight', 'caseid', None] maxrow = len(csvdata) + 2 minrow = 2 mincol = min(i for i, c in enumerate(header) if c not in ignore_col) maxcol = max(i for i, c in enumerate(header) if c not in ignore_col) worksheet.autofilter(minrow, mincol, maxrow, maxcol) # Adjust column width: usually to fit the option text colwidths = [] for c in range(len(header)): columndata = ["00000", header[c], option[c]] + [ str(row[c]) for row in csvdata[1:] if len(row) > c ] width = max(arial10.fitwidth(x) for x in columndata if x) worksheet.set_column(c, c, width / 256.0) colwidths.append(width) # Adjust row height for option text row height = max(arial10.fitheight(x) for x in option if x) worksheet.set_row(1, height / 18.0) # Adjust row height for option text row height = max(arial10.fitheight(x) for x in label if x) wrapped_add_height = max( arial10.fitheight("\n" * math.ceil(arial10.fitwidth(l) / w - 1)) for l, w in zip(label, colwidths)) worksheet.set_row(2, (height + wrapped_add_height) / 18.0) # Freeze the 3 header rows worksheet.freeze_panes(3, 0) # save to disk workbook.close()