def parse(self, csvsrc): text, encoding = self.detect_encoding(csvsrc, default_encodings=['utf-8', 'utf-16']) #FIXME: raise parse error if encoding detection fails? self.encoding = encoding or 'utf-8' sniffer = csv.Sniffer() sample = text[:1024] try: self.dialect = sniffer.sniff(sample) if self.dialect.quoting == csv.QUOTE_MINIMAL: #HACKISH: most probably a default, not real detection self.dialect.quoting = csv.QUOTE_ALL self.dialect.doublequote = True except csv.Error: self.dialect = 'default' inputfile = csv.StringIO(text) try: fieldnames = detect_header(inputfile, self.dialect, self.fieldnames) self.fieldnames = fieldnames except csv.Error: pass inputfile.seek(0) reader = try_dialects(inputfile, self.fieldnames, self.dialect) first_row = True for row in reader: newce = self.UnitClass() newce.fromdict(row) if not first_row or not newce.match_header(): self.addunit(newce) first_row = False
def getoutput(self): output = csv.StringIO() writer = csv.DictWriter(output, self.fieldnames, extrasaction='ignore', dialect=self.dialect) writer.writeheader() for ce in self.units: writer.writerow(ce.todict()) return output.getvalue()
def serialize(self, out): output = csv.StringIO() writer = csv.DictWriter(output, FIELDNAMES, dialect="catkeys") # No real headers, the first line contains metadata writer.writerow( dict( zip(FIELDNAMES, [ self.header._header_dict[key] for key in FIELDNAMES_HEADER ]))) for unit in self.units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=OMEGAT_FIELDNAMES, dialect="omegat") for unit in translated_units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=self._fieldnames, dialect="utx") for unit in translated_units: writer.writerow(unit.dict) result = output.getvalue().encode(self.encoding) out.write(self._write_header().encode(self.encoding)) out.write(result)
def serialize(self, out): # Check first if there is at least one translated unit translated_units = [u for u in self.units if u.istranslated()] if not translated_units: return output = csv.StringIO() writer = csv.DictWriter(output, fieldnames=WF_FIELDNAMES, dialect="wordfast") # No real headers, the first line contains metadata self.header.tucount = len(translated_units) writer.writerow(dict(zip(WF_FIELDNAMES, [self.header.header[key] for key in WF_FIELDNAMES_HEADER]))) for unit in translated_units: writer.writerow(unit.dict) out.write(output.getvalue().encode(self.encoding))
def detect_header(sample, dialect, fieldnames): """Test if file has a header or not, also returns number of columns in first row""" inputfile = csv.StringIO(sample) try: reader = csv.reader(inputfile, dialect) except csv.Error: try: inputfile.seek(0) reader = csv.reader(inputfile, 'default') except csv.Error: inputfile.seek(0) reader = csv.reader(inputfile, 'excel') header = next(reader) columncount = max(len(header), 3) if valid_fieldnames(header): return header return fieldnames[:columncount]