def dset_sheet(dataset, ws, freeze_panes=True): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) bold = openpyxl.styles.Font(bold=True) wrap_text = openpyxl.styles.Alignment(wrap_text=True) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) cell = ws['%s%s' % (col_idx, row_number)] # bold headers if (row_number == 1) and dataset.headers: # cell.value = unicode('%s' % col, errors='ignore') cell.value = unicode(col) cell.font = bold if freeze_panes: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: cell.value = unicode('%s' % col, errors='ignore') cell.font = bold # wrap the rest else: try: if isinstance(col, dict): cell.value = col['value'] if col['format'] == '@': cell.style = cell_string_format elif isinstance(col, date): cell.value = col cell.style = cell_date_format elif isinstance(col, datetime): cell.value = col cell.style = cell_datetime_format elif isinstance(col, str): cell.value = col cell.style = cell_string_format elif '\n' in col: cell.value = col cell.alignment = wrap_text else: cell.value = col except TypeError: cell.value = col except Exception as e: print('invalid col %s' % e)
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) frzn_col_idx = get_column_letter(j + 2) # bold headers if (row_number == 1) and dataset.headers: # ws.cell('%s%s'%(col_idx, row_number)).value = unicode( # '%s' % col, errors='ignore') ws.cell('%s%s' % (col_idx, row_number)).value = unicode(col) style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True if dataset.freeze_panes: ws.freeze_panes = '%s%s' % (frzn_col_idx, row_number) # bold separators elif len(row) < dataset.width: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True # wrap the rest else: try: if isinstance(col, basestring) and '\n' in col: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.alignment.wrap_text else: try: ws.cell( '%s%s' % (col_idx, row_number)).value = retriveDate(col) except ValueError as e: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') except TypeError: ws.cell('%s%s' % (col_idx, row_number)).value = unicode(col)
def dset_sheet(dataset, ws, freeze_panes=True): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) column_widths = {} for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) column_widths['%s' % col_idx] = median( [column_widths.get('%s' % col_idx, 0), len(str(col)) * 2]) if (row_number == 1) and dataset.headers: # ws.cell('%s%s'%(col_idx, row_number)).value = unicode( # '%s' % col, errors='ignore') ws.cell('%s%s' % (col_idx, row_number)).value = unicode(col) ws.column_dimensions['%s' % col_idx].width = int( column_widths['%s' % col_idx]) style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True if freeze_panes: # As already done in #53, but after Merge lost: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True # wrap the rest else: try: if '\n' in col: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.alignment.wrap_text else: ws.cell('%s%s' % (col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') except TypeError: ws.cell('%s%s' % (col_idx, row_number)).value = unicode(col)
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) frzn_col_idx = get_column_letter(j + 2) # bold headers if (row_number == 1) and dataset.headers: # ws.cell('%s%s'%(col_idx, row_number)).value = unicode( # '%s' % col, errors='ignore') ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col) style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True if dataset.freeze_panes: ws.freeze_panes = '%s%s' % (frzn_col_idx, row_number) # bold separators elif len(row) < dataset.width: ws.cell('%s%s'%(col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.font.bold = True # wrap the rest else: try: if isinstance(col,basestring) and '\n' in col: ws.cell('%s%s'%(col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') style = ws.get_style('%s%s' % (col_idx, row_number)) style.alignment.wrap_text else: try: ws.cell('%s%s'%(col_idx, row_number)).value = retriveDate(col) except ValueError as e: ws.cell('%s%s'%(col_idx, row_number)).value = unicode( '%s' % col, errors='ignore') except TypeError: ws.cell('%s%s'%(col_idx, row_number)).value = unicode(col)
def get_changed_fields(serializer, validated_data=None): validated_data = validated_data or serializer.validated_data result = {} orig = get_original_representation(serializer) for field_name, field in serializer.fields.items(): if field.read_only or field.write_only: continue source = unicode(field.source) if source not in validated_data: continue old_value = orig[field_name] if field_name in orig else None value = validated_data[source] new_value = field.to_representation(value) # TODO: Move this to .to_representation()? if isinstance(old_value, six.string_types): old_value = strings.normalize_string(old_value) if old_value != new_value: result[field_name] = FieldChange(field, old_value, new_value, value) return result
def export_set(dataset): """HTML representation of a Dataset.""" stream = StringIO() page = markup.page() page.table.open() if dataset.headers is not None: new_header = [ item if item is not None else '' for item in dataset.headers ] page.thead.open() headers = markup.oneliner.th(new_header) page.tr(headers) page.thead.close() for row in dataset: new_row = [item if item is not None else '' for item in row] html_row = markup.oneliner.td(new_row) page.tr(html_row) page.table.close() # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) wrapper.writelines(unicode(page)) return stream.getvalue().decode('utf-8')
def export_set(dataset): """HTML representation of a Dataset.""" stream = StringIO() page = markup.page() page.table.open() if dataset.headers is not None: new_header = [item if item is not None else '' for item in dataset.headers] page.thead.open() headers = markup.oneliner.th(new_header) page.tr(headers) page.thead.close() for row in dataset: new_row = [item if item is not None else '' for item in row] html_row = markup.oneliner.td(new_row) page.tr(html_row) page.table.close() # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) wrapper.writelines(unicode(page)) return stream.getvalue().decode('utf-8')
def dset_sheet(dataset, ws, freeze_panes=True): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) bold = openpyxl.styles.Font(bold=True) wrap_text = openpyxl.styles.Alignment(wrap_text=True) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): stored_col_value = col col_idx = get_column_letter(j + 1) cell = ws['%s%s' % (col_idx, row_number)] # bold headers if (row_number == 1) and dataset.headers: # cell.value = unicode('%s' % col, errors='ignore') # cell.value = unicode(col) cell.font = bold if freeze_panes: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: cell.value = unicode('%s' % col, errors='ignore') cell.font = bold # wrap the rest else: try: if '\n' in col: cell.value = unicode('%s' % col, errors='ignore') cell.alignment = wrap_text # else: # cell.value = unicode('%s' % col, errors='ignore') except TypeError: cell.value = unicode(col) try: cell.value = stored_col_value except (ValueError, TypeError): cell.value = unicode(col)
def export_as_excel(modeladmin, request, queryset): if not request.user.is_staff: raise PermissionDenied opts = modeladmin.model._meta response = HttpResponse(content_type='text/csv; charset=utf-8') response['Content-Disposition'] = 'attachment; filename=%s.xls' % unicode( opts).replace('.', '_') try: field_names = modeladmin.model.get_csv_fields() v_field_names = field_names except: field_names = [field.name for field in opts.fields] v_field_names = [ getattr(field, 'verbose_name') or field.name for field in opts.fields ] v_field_names = map(lambda x: x if x != 'ID' else 'Id', v_field_names) ax = [] headers = v_field_names data = [] data = tablib.Dataset(*data, headers=headers) for obj in queryset: acc = [] for field in field_names: try: uf = getattr(obj, field)() except TypeError: try: uf = getattr(obj, field) except: uf = ' error obteniendo el dato' if uf is None: uf = '' elif isinstance(uf, datetime): uf = unicode(uf) elif isinstance(uf, Model): uf = unicode(uf) elif isinstance(uf, FieldFile): uf = uf.url acc.append(uf) data.append(acc) response.write(data.xls) return response
def to_string_representation(field, value): if hasattr(field, 'to_string_representation'): return field.to_string_representation(value) if isinstance(field, relations.ManyRelatedField): if value is None: value = [] return unicode(list_separator).join([ to_string_representation(field.child_relation, val) for val in value ]) if value is None: value = '' return strings.normalize_string(unicode(value))
def _serialize_row(row): """Returns string representation of a single row. :param row: single dataset row """ new_row = [_escape_tex_reserved_symbols(unicode(item)) if item else '' for item in row] return 6 * ' ' + ' & '.join(new_row) + ' \\\\'
def readSheet(self, sheet): name = sheet.getAttribute("name") rows = sheet.getElementsByType(table.TableRow) arrRows = [] # get longestRow to not fill empty rows with blanks, shortens runtime cols = sheet.getElementsByType(table.TableColumn) try: longestRow = int( max([ col.getAttribute("numbercolumnsrepeated") for col in cols ])) except: longestRow = 0 # for each row for row in rows: row_comment = "" arrCells = [] cells = row.getElementsByType(table.TableCell) # for each cell for cell in cells: # repeated value? repeat = cell.getAttribute("numbercolumnsrepeated") if (not repeat): repeat = 1 ps = cell.getElementsByType(text.P) textContent = "" # for each text node for p in ps: for n in p.childNodes: if (n.nodeType == 3): textContent = textContent + unicode(n.data) if (textContent): if (textContent[0] != "#"): # ignore comments cells for rr in range(int(repeat)): # repeated? arrCells.append(textContent) else: row_comment = row_comment + textContent + " " else: if int(repeat) < longestRow: for rr in range(int(repeat)): # repeated? arrCells.append('') # for empty cells else: arrCells.append('') # if row contained something if (len(arrCells)): arrRows.append(arrCells) # else: # print "Empty or commented row (", row_comment, ")" self.SHEETS[name] = arrRows
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" def float_or_not(val): # float output fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$') if fltExp.match(str(val)): tc = table.TableCell(valuetype="float", value=str(val).strip()) else: tc = table.TableCell(valuetype="string") return tc _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) for i, row in enumerate(_package): row_number = i + 1 odf_row = table.TableRow(stylename=bold, defaultcellstylename='bold') for j, col in enumerate(row): try: col = unicode(col, errors='ignore') except TypeError: ## col is already unicode pass ws.addElement(table.TableColumn()) # bold headers if (row_number == 1) and dataset.headers: odf_row.setAttribute('stylename', bold) ws.addElement(odf_row) cell = float_or_not(col) p = text.P() p.addElement(text.Span(text=col, stylename=bold)) cell.addElement(p) odf_row.addElement(cell) # wrap the rest else: try: if '\n' in col: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell) else: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell) except TypeError: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell)
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" def float_or_not(val): # float output fltExp = re.compile('^\s*[-+]?\d+(\.\d+)?\s*$') if fltExp.match(str(val)): tc = table.TableCell(valuetype="float", value=str(val).strip()) else: tc = table.TableCell(valuetype="string") return tc _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) for i, row in enumerate(_package): row_number = i + 1 odf_row = table.TableRow(stylename=bold, defaultcellstylename='bold') for j, col in enumerate(row): try: col = unicode(col, errors='ignore') except TypeError: ## col is already unicode pass ws.addElement(table.TableColumn()) # bold headers if (row_number == 1) and dataset.headers: odf_row.setAttribute('stylename', bold) ws.addElement(odf_row) cell = float_or_not(col) p = text.P() p.addElement(text.Span(text=col, stylename=bold)) cell.addElement(p) odf_row.addElement(cell) # wrap the rest else: try: if '\n' in col: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell) else: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell) except TypeError: ws.addElement(odf_row) cell = float_or_not(col) cell.addElement(text.P(text=col)) odf_row.addElement(cell)
def _serialize_row(row): """Returns string representation of a single row. :param row: single dataset row """ new_row = [ _escape_tex_reserved_symbols(unicode(item)) if item else '' for item in row ] return 6 * ' ' + ' & '.join(new_row) + ' \\\\'
def readSheet(self, sheet): name = sheet.getAttribute("name") rows = sheet.getElementsByType(table.TableRow) arrRows = [] # get longestRow to not fill empty rows with blanks, shortens runtime cols = sheet.getElementsByType(table.TableColumn) try: longestRow = int(max([col.getAttribute("numbercolumnsrepeated") for col in cols])) except: longestRow = 0 # for each row for row in rows: row_comment = "" arrCells = [] cells = row.getElementsByType(table.TableCell) # for each cell for cell in cells: # repeated value? repeat = cell.getAttribute("numbercolumnsrepeated") if(not repeat): repeat = 1 ps = cell.getElementsByType(text.P) textContent = "" # for each text node for p in ps: for n in p.childNodes: if (n.nodeType == 3): textContent = textContent + unicode(n.data) if(textContent): if(textContent[0] != "#"): # ignore comments cells for rr in range(int(repeat)): # repeated? arrCells.append(textContent) else: row_comment = row_comment + textContent + " " else: if int(repeat) < longestRow: for rr in range(int(repeat)): # repeated? arrCells.append('') # for empty cells else: arrCells.append('') # if row contained something if(len(arrCells)): arrRows.append(arrCells) # else: # print "Empty or commented row (", row_comment, ")" self.SHEETS[name] = arrRows
def dset_sheet(dataset, ws, freeze_panes=True): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) bold = openpyxl.styles.Font(bold=True) wrap_text = openpyxl.styles.Alignment(wrap_text=True) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) cell = ws['%s%s' % (col_idx, row_number)] # bold headers if (row_number == 1) and dataset.headers: # cell.value = unicode('%s' % col, errors='ignore') cell.value = unicode(col) cell.font = bold if freeze_panes: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: cell.value = unicode('%s' % col, errors='ignore') cell.font = bold # wrap the rest else: try: if '\n' in col: cell.value = unicode('%s' % col, errors='ignore') cell.alignment = wrap_text else: cell.value = unicode('%s' % col, errors='ignore') except TypeError: cell.value = unicode(col)
def test_unicode_renders_markdown_table(self): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) self.assertEquals( """ first_name|last_name |gpa ----------|----------|------ John |Adams |90 George |Washington|67 Thomas |Jefferson |50 Old |Man |100500 """.strip(), unicode(self.founders))
def test_unicode_renders_markdown_table(self): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) self.assertEquals( """ first_name|last_name |gpa ----------|----------|------ John |Adams |90 George |Washington|67 Thomas |Jefferson |50 Old |Man |100500 """.strip(), unicode(self.founders) )
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) for i, row in enumerate(_package): row_number = i + 1 odf_row = table.TableRow(stylename=bold, defaultcellstylename='bold') for j, col in enumerate(row): try: col = unicode(col, errors='ignore') except TypeError: ## col is already unicode pass ws.addElement(table.TableColumn()) # bold headers if (row_number == 1) and dataset.headers: odf_row.setAttribute('stylename', bold) ws.addElement(odf_row) cell = table.TableCell() p = text.P() p.addElement(text.Span(text=col, stylename=bold)) cell.addElement(p) odf_row.addElement(cell) # wrap the rest else: try: if '\n' in col: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) else: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) except TypeError: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell)
def dset_sheet(dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) for i, row in enumerate(_package): row_number = i + 1 odf_row = table.TableRow(stylename=bold, defaultcellstylename='bold') for j, col in enumerate(row): try: col = unicode(col, errors='ignore') except TypeError: ## col is already unicode pass ws.addElement(table.TableColumn()) # bold headers if (row_number == 1) and dataset.headers: odf_row.setAttribute('stylename', bold) ws.addElement(odf_row) cell = table.TableCell() p = text.P() p.addElement(text.Span(text=col, stylename=bold)) cell.addElement(p) odf_row.addElement(cell) # wrap the rest else: try: if '\n' in col: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) else: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell) except TypeError: ws.addElement(odf_row) cell = table.TableCell() cell.addElement(text.P(text=col)) odf_row.addElement(cell)
def add(self, obj): cached_obj = CachedObject(obj) fields_to_cache = ( (field, value) for field, value in ( (field, getattr(obj, field, None)) for field in self.cache_fields ) if value is not None ) for field, value in fields_to_cache: self.objects[field][unicode(value)].add(cached_obj) self.object_count += 1
def __unicode__(self): result = [] # Add unicode representation of headers. result.append([unicode(h) for h in self.__headers]) # Add unicode representation of rows. result.extend(list(map(unicode, row)) for row in self._data) lens = [list(map(len, row)) for row in result] field_lens = list(map(max, zip(*lens))) # delimiter between header and data result.insert(1, ["-" * length for length in field_lens]) format_string = "|".join("{%s:%s}" % item for item in enumerate(field_lens)) return "\n".join(format_string.format(*row) for row in result)
def __unicode__(self): result = [] # Add unicode representation of headers. result.append([unicode(h) for h in self.__headers]) # Add unicode representation of rows. result.extend(list(map(unicode, row)) for row in self._data) lens = [list(map(len, row)) for row in result] field_lens = list(map(max, zip(*lens))) # delimiter between header and data result.insert(1, ['-' * length for length in field_lens]) format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens)) return '\n'.join(format_string.format(*row) for row in result)
def import_set(dset, in_stream, headers=True): """Returns dataset from ODS stream. Default sheet 1""" dset.wipe() doc = opendocument.load(in_stream) sheet = doc.spreadsheet.childNodes[0] rows = sheet.getElementsByType(table.TableRow) row_count = 0 for row in rows: cells = row.getElementsByType(table.TableCell) arrCells = [] cell_count = 0 for cell in cells: # repeated value? repeat = cell.getAttribute("numbercolumnsrepeated") if(not repeat): repeat = 1 ps = cell.getElementsByType(text.P) textContent = "" # for each text node for p in ps: c = p.firstChild # TODO: Where is it used? textContent = textContent + unicode(p) if textContent and textContent[0] != "#": # ignore comments cells for rr in range(int(repeat)): # repeated? arrCells.append(textContent) cell_count += 1 else: arrCells.append("") if row_count == 0 and headers: dset.headers = arrCells elif cell_count > 1: # empty cells are needed, but last string == [''] dset.append(arrCells) else: pass row_count += 1
def get(self, field, value, default=None): if isinstance(field, six.string_types): field = (field,) value = (value,) zipped_values = list(zip(field, value)) if any(f not in self.objects or v is None for f, v in zipped_values): return default result_sets = [ self.objects[f][unicode(v)] for f, v in zipped_values ] results = [ result.obj for result in set.intersection(*result_sets) ] return self.to_result(results) or default
def export_set(dataset): """HTML representation of a Dataset.""" stream = StringIO() page = markup.page() page.head.open() new_styling = "table { border-collapse: collapse;} th { background: #ccc;}th, td { border: 1px solid #ccc; padding: 8px;}tr:nth-child(even) { background: #efefef;}" styling = markup.oneliner.style(new_styling) page.style(new_styling) page.head.close() page.table.open() if dataset.headers is not None: new_header = [ item if item is not None else '' for item in dataset.headers ] page.thead.open() headers = markup.oneliner.th(new_header) page.tr(headers) page.thead.close() for row in dataset: new_row = [item if item is not None else '' for item in row] html_row = markup.oneliner.td(new_row) page.tr(html_row) page.table.close() # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) wrapper.writelines(unicode(page)) return stream.getvalue().decode('utf-8')
def parse_html_for_content(html): """ This function takes in the HTML from transifex and looks for the special tags that break down the anchors into two separate divs see function above :param html: HTML from Transifex generated from the function above :return: clean HTML ready to be post processed """ p = re.compile(r'<.*?>') if p.findall(html): # h = HTMLParser() parser = etree.HTMLParser() tree = etree.parse(StringIO(html), parser) a = CSSSelector('div.former-anchor') translatable_a = CSSSelector('div.former-anchor-translatable') img = CSSSelector('div.former-image') phones = CSSSelector('div.former-tel') italic = CSSSelector('div.former-em') bolded = CSSSelector('div.former-strong') anchors = a(tree) for anchor in anchors: try: attributes = [(k.replace('data-a-', ''), unescape(v)) for k, v in dict(anchor.attrib).items() if 'data-a-' in k] ht_st = "<a>{}</a>".format(stringify_children(anchor)) div = etree.parse(StringIO(fix_html_fragment(ht_st.encode('ascii', 'xmlcharrefreplace')))).getroot() for k, v in attributes: div.attrib[k] = v swap_element_inbound(div, anchor) except: pass anchors = translatable_a(tree.getroot()) for anchor in anchors: attributes = [(k.replace('data-a-', ''), unescape(v)) for k, v in dict(anchor.attrib).items() if 'data-a-' in k] content = etree.Element('div') link = etree.Element('div') for c in anchor: if 'class' in c.attrib: if c.attrib['class'] == 'text': content = c if c.attrib['class'] == 'href': link = c ht_st = "<a>{}</a>".format(stringify_children(content)) div = etree.parse(StringIO(fix_html_fragment(ht_st))).getroot() for k, v in attributes: div.attrib[k] = v href = stringify_children(link) if href: div.attrib['href'] = unescape(href) swap_element_inbound(div, anchor) images = img(tree.getroot()) for image in images: attributes = [(k.replace('data-img-', ''), unescape(v)) for k, v in dict(image.attrib).items() if 'data-img-' in k] div = etree.Element('img') for k, v in attributes: div.attrib[k] = unescape(v) swap_element_inbound(div, image) """ _is = italic(tree.getroot()) for i in _is: attributes = [(k.replace('data-em-', ''), unescape(v)) for k, v in dict(i.attrib).items() if 'data-em-' in k] ht_st = "<em>{}</em>".format(stringify_children(i)) div = etree.parse(StringIO(fix_html_fragment(ht_st.encode('ascii', 'xmlcharrefreplace')))).getroot() for k, v in attributes: div.attrib[k] = unescape(v) swap_element_inbound(div, i) bs = bolded(tree.getroot()) for b in bs: attributes = [(k.replace('data-strong-', ''), unescape(v)) for k, v in dict(b.attrib).items() if 'data-strong-' in k] ht_st = "<strong>{}</strong>".format(stringify_children(b)) div = etree.parse(StringIO(fix_html_fragment(ht_st.encode('ascii', 'xmlcharrefreplace')))).getroot() for k, v in attributes: div.attrib[k] = unescape(v) swap_element_inbound(div, b) """ tels = phones(tree.getroot()) for tel in tels: if 'class' in tel.attrib: classes = tel.attrib['class'].split(' ') tag_format = "{}" if 'has-b' in classes: tag_format = "<b>{}</b>".format(tag_format) if 'has-u' in classes: tag_format = "<u>{}</u>".format(tag_format) if 'has-strong' in classes: tag_format = "<strong>{}</strong>".format(tag_format) if 'has-em' in classes: tag_format = "<em>{}</em>".format(tag_format) if 'has-i' in classes: tag_format = "<i>{}</i>".format(tag_format) tag_format = "<span class=\"tel\">{}</span>".format(tag_format) div = etree.parse(StringIO(tag_format.format(tel.attrib['data-tel-number']))).getroot() swap_element_inbound(div, tel) html = etree.tostring(tree).decode('utf-8') soup = BeautifulSoup(html, "lxml") return unicode(soup.prettify())
def test_unicode_renders_markdown_table(self): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) self.assertEqual('first_name|last_name |gpa ', unicode(self.founders).split('\n')[0])
def fix_html_fragment(html): soup = BeautifulSoup(html, "lxml") return ''.join([unicode(f) for f in soup.body.children]) if soup.body else ''
def to_unicode(value): if isinstance(value, bytes): return value.decode('utf-8') return unicode(value)
def stringify_children(node): b = BeautifulSoup(etree.tostring(node), "lxml") tag = node.tag bnode = b.find(tag) return "".join([unicode(c) for c in bnode.contents])
def test_unicode_renders_markdown_table(self): # add another entry to test right field width for # integer self.founders.append(("Old", "Man", 100500)) self.assertEqual("first_name|last_name |gpa ", unicode(self.founders).split("\n")[0])
def parse_html_for_translation(html): """ Preprocessing function that takes in HTML and preps it for translations. Anchors are removed, phone numbers are prepped, and other HTML quirks gets fixed before going to transifex. :param html: raw HTML to be sent to Transifex :return: processed HTML """ p = re.compile(r'<.*?>') if p.findall(html): html = unicode(BeautifulSoup(html, "lxml").prettify()) parser = etree.HTMLParser() tree = etree.parse(StringIO(html), parser) a = CSSSelector('a') translatable_a = CSSSelector('a.translatable') img = CSSSelector('img:not(.image-translatable)') # Translatable anchors are split into text and links anchors = translatable_a(tree.getroot()) logger.info(str(anchors)) for anchor in anchors: attributes = [("data-a-{}".format(k), v) for k, v in dict(anchor.attrib).items()] div = etree.Element('div') content = etree.parse( StringIO("<div class=\"text\">{}</div>".format(stringify_children(anchor)))).getroot() href_format = """<div class=\"href\">{}</div>""" href_html = fix_html_fragment(href_format.format(anchor.attrib['href'])) link = etree.parse(StringIO(href_html)).getroot() for k, v in attributes: div.attrib[k] = v div.attrib['class'] = 'former-anchor-translatable' div.append(content) div.append(link) swap_element(div, anchor) # Anchors are just the text anchors = a(tree.getroot()) for anchor in anchors: attributes = [("data-a-{}".format(k), v) for k, v in dict(anchor.attrib).items()] anchor_format = "<div class=\"former-anchor\">{}</div>" anchor_html = fix_html_fragment(anchor_format.format(stringify_children(anchor))) div = etree.parse(StringIO(anchor_html)).getroot() for k, v in attributes: div.attrib[k] = v swap_element(div, anchor) # Images are just copies of the attributes images = img(tree.getroot()) for image in images: div = etree.Element('div') attributes = [("data-img-{}".format(k), v) for k, v in dict(image.attrib).items()] for k, v in attributes: div.attrib[k] = v div.attrib['class'] = 'former-image' swap_element(div, image) """ b_objects = CSSSelector('b, strong')(tree.getroot()) for b in b_objects: attributes = [("data-strong-{}".format(k), v) for k, v in dict(b.attrib).items()] div = etree.parse( StringIO("<div>{}</div>".format(stringify_children(b)))).getroot() for k, v in attributes: div.attrib[k] = v div.attrib['class'] = 'former-strong' if b.getparent().tag == 'p' and len(b.getparent().getchildren()) == 1: # this is an only child, so lets replace the parent intead swap_element(div, b.getparent()) else: swap_element(div, b) em_objects = CSSSelector('i, em')(tree.getroot()) for em in em_objects: attributes = [("data-em-{}".format(k), v) for k, v in dict(em.attrib).items()] div = etree.parse( StringIO("<div>{}</div>".format(stringify_children(em)))).getroot() for k, v in attributes: div.attrib[k] = v div.attrib['class'] = 'former-em' if em.getparent().tag == 'p' and len(em.getparent().getchildren()) == 1: # this is an only child, so lets replace the parent intead swap_element(div, em.getparent()) else: swap_element(div, em) """ html = etree.tostring(tree).decode('utf-8') # Chicken coop de grass # Massive regex that takes in phone numbers and puts them in divs # only to be postprocessed below and dissapear from the translations p = re.compile(r'((?:\+\s*)*\d+(?:\s+\(*\d+\)*)*\d+(?:\s+\d+\(*\)*)+|\d+(?:\s+\d+)+|00\d+(?:\s+\d+)+)') html = p.sub('<div class="former-tel">\g<1></div>', html) soup = BeautifulSoup(html, "lxml") for div in soup.find_all('div'): tag_format = None while div.parent and div.parent.name in ['b', 'em', 'i', 'strong', 'u', 'sup']: if div.parent.name == "b": div.parent.unwrap() tag_format = "<b>{}</b>" if div.parent.name == "strong": div.parent.unwrap() tag_format = "<strong>{}</strong>" if div.parent.name == "em": div.parent.unwrap() tag_format = "<em>{}</em>" if div.parent.name == "i": div.parent.unwrap() tag_format = "<i>{}</i>" if div.parent.name == "u": div.parent.unwrap() tag_format = "<u>{}</u>" if div.parent.name == "sup": div.parent.unwrap() tag_format = "<sup>{}</sup>" if tag_format: children = "".join([unicode(c) for c in div.contents]) div.clear() child_soup = BeautifulSoup(tag_format.format(children), "lxml") if child_soup.body: child_frag = child_soup.body.next elif child_soup.html: child_frag = child_soup.html.next else: child_frag = child_soup div.append(child_frag) for n in soup.select('u, b, i, em, strong, sup'): if not n.text.strip(): n.extract() for tel in soup.select('div.former-tel'): number = tel.text classes = ['former-tel'] if tel.select('b'): classes.append('has-b') if tel.select('em'): classes.append('has-em') if tel.select('strong'): classes.append('has-strong') if tel.select('i'): classes.append('has-i') if tel.select('u'): classes.append('has-u') tel.attrs['data-tel-number'] = number tel.attrs['class'] = classes tel.clear() return soup.prettify()
# -*- coding: utf-8 -*- """ Tablib - *SV Support. """ from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv',) DEFAULT_DELIMITER = unicode(',') def export_set(dataset, **kwargs): """Returns CSV representation of Dataset.""" stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) _csv = csv.writer(stream, **kwargs) for row in dataset._package(dicts=False): _csv.writerow(row) return stream.getvalue() def import_set(dset, in_stream, headers=True, **kwargs): """Returns dataset from CSV stream."""
def dset_sheet(dataset, ws, freeze_panes=True): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1], )) bold = Font(bold=True) for i, row in enumerate(_package): row_number = i + 1 for j, cell_value in enumerate(row): col_idx = get_column_letter(j + 1) cell = ws['%s%s' % (col_idx, row_number)] cell_horizontal, cell_vertical = None, None if isinstance(cell_value, dict): cell_color: str = cell_value.get("color", None) # 处理水平居中 cell_horizontal: str = cell_value.get("horizontal", None) if cell_horizontal and cell_horizontal not in ( "general", "left", "center", "right"): cell_horizontal = "general" # 默认对其方式 # 处理垂直居中 cell_vertical: str = cell_value.get("vertical", None) if cell_vertical and cell_vertical not in ("top", "center", "bottom"): cell_vertical = "center" # 默认对其方式 cell_value: str = cell_value.get("value", '') if cell_color: cell.fill = PatternFill( "solid", fgColor=cell_color.lstrip("# ")) cell.alignment = Alignment(wrap_text=True, horizontal=cell_horizontal, vertical=cell_vertical) # 增加边框单线,这里是固定的 thin = Side(border_style="thin", color="000000") cell.border = Border(top=thin, left=thin, right=thin, bottom=thin) # bold headers if (row_number == 1) and dataset.headers: # cell.value = unicode('%s' % col, errors='ignore') cell.value = unicode(cell_value) cell.font = bold if freeze_panes: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: cell.value = unicode('%s' % cell_value, errors='ignore') cell.font = bold # wrap the rest else: try: if '\n' in cell_value: cell.value = unicode('%s' % cell_value, errors='ignore') else: cell.value = unicode('%s' % cell_value, errors='ignore') except TypeError: cell.value = unicode(cell_value)
def pull_from_transifex(slug, language, project=settings.TRANSIFEX_PROJECT_SLUG): content_pages = Page.objects.filter(slug=slug, status='staging') try: content_page = content_pages[0] except Exception as e: logger.info('Page not found.') raise e password = settings.TRANSIFEX_PASSWORD user = settings.TRANSIFEX_USER transifex_url_data = { "project": project, "slug": content_page.slug, "language": language } fetch_format = "http://www.transifex.com/api/2/project/{project}/resource/{slug}html/translation/{language}/" \ "?mode=default" logger.info("Trying to request: %s" % fetch_format.format(**transifex_url_data)) logger.info("With creds: %s %s" % (user, password)) r = requests.get(fetch_format.format(**transifex_url_data), auth=(user, password)) translation = r.json() text = translation['content'].strip() text = parse_html_for_content(text) soup = BeautifulSoup(text, "lxml") parser = etree.HTMLParser() tree = etree.parse(StringIO(unicode(soup.prettify())), parser) selector = CSSSelector('div[data-id]') title_selector = CSSSelector('div.title') """ Directions are handled application-wise """ dir_selector = CSSSelector('[dir]') for element in dir_selector(tree.getroot()): del element.attrib['dir'] content = selector(tree.getroot()) title = title_selector(tree.getroot()) title = title[0].text.strip() dict_list = [] for div in content: plugin_dict = { 'id': div.attrib['data-id'], 'class': div.attrib['data-class'], 'type': div.attrib['data-type'], 'parent': div.attrib['data-parent'], 'translated': (div.text or '') + u''.join([ etree.tostring(div, pretty_print=True, method="html").decode('utf-8') ]), } dict_list.append(plugin_dict) _translate_page(dict_list, language, content_page, title)
# -*- coding: utf-8 -*- """ Tablib - TSV (Tab Separated Values) Support. """ from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, detect as detect_wrapper, ) title = 'tsv' extensions = ('tsv', ) DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" return export_set_wrapper(dataset, delimiter=DELIMITER) def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" return import_set_wrapper(dset, in_stream, headers=headers, delimiter=DELIMITER) def detect(stream):
# -*- coding: utf-8 -*- """ Tablib - *SV Support. """ from tablib.compat import csv, StringIO, unicode title = 'csv' extensions = ('csv', ) DEFAULT_DELIMITER = unicode(',') def export_stream_set(dataset, **kwargs): """Returns CSV representation of Dataset as file-like.""" stream = StringIO() kwargs.setdefault('delimiter', DEFAULT_DELIMITER) _csv = csv.writer(stream, **kwargs) for row in dataset._package(dicts=False): _csv.writerow(row) stream.seek(0) return stream def export_set(dataset, **kwargs): """Returns CSV representation of Dataset.""" stream = export_stream_set(dataset, **kwargs) return stream.getvalue()
# -*- coding: utf-8 -*- """ Tablib - TSV (Tab Separated Values) Support. """ from tablib.compat import unicode from tablib.formats._csv import ( export_set as export_set_wrapper, import_set as import_set_wrapper, detect as detect_wrapper, ) title = 'tsv' extensions = ('tsv',) DELIMITER = unicode('\t') def export_set(dataset): """Returns TSV representation of Dataset.""" return export_set_wrapper(dataset, delimiter=DELIMITER) def import_set(dset, in_stream, headers=True): """Returns dataset from TSV stream.""" return import_set_wrapper(dset, in_stream, headers=headers, delimiter=DELIMITER) def detect(stream): """Returns True if given stream is valid TSV.""" return detect_wrapper(stream, delimiter=DELIMITER)
def _serialize_row(row, delimiter='|'): return '%s%s%s' % (delimiter, delimiter.join([unicode(item) if item else ' ' for item in row]), delimiter)