def rollover(self): """Roll the StringIO over to a TempFile""" if not self._rolled: tmp = EncodedFile(TemporaryFile(dir=self._dir), data_encoding='utf-8') pos = self.buffer.tell() tmp.write(self.buffer.getvalue()) tmp.seek(pos) self.buffer.close() self._buffer = tmp
def __init__(self, zodb_blob, encoding=None): self.zodb_blob = zodb_blob blob = zodb_blob.open('r') if not encoding: encoding = snoop_encoding(blob) blob.seek(0) if not encoding == "utf-8": blob = EncodedFile(blob, "utf-8", encoding) self.parser = configparser.ConfigParser() self.parser.readfp(blob) self.encoding = encoding blob.close()
def convert_to_tags(self): """ Read in the file one line at a time. Get the important info, between [:16]. Check if this info matches a dictionary entry. If it does, call the appropriate function. The functions that are called: a text function for text an open function for open tags an open with attribute function for tags with attributes an empty with attribute function for tags that are empty but have attribtes. a closed function for closed tags. an empty tag function. """ self.__initiate_values() with open(self.__write_to, 'w') as self.__write_obj: self.__write_dec() with open(self.__file, 'r') as read_obj: for line in read_obj: self.__token_info = line[:16] action = self.__state_dict.get(self.__token_info) if action is not None: action(line) # convert all encodings to UTF8 or ASCII to avoid unsupported encodings in lxml if self.__convert_utf or self.__bad_encoding: copy_obj = copy.Copy(bug_handler=self.__bug_handler) copy_obj.rename(self.__write_to, self.__file) file_encoding = "utf-8" if self.__bad_encoding: file_encoding = "us-ascii" with open(self.__file, 'r') as read_obj: with open(self.__write_to, 'w') as write_obj: write_objenc = EncodedFile(write_obj, self.__encoding, file_encoding, 'replace') for line in read_obj: write_objenc.write(line) copy_obj = copy.Copy(bug_handler=self.__bug_handler) if self.__copy: copy_obj.copy_file(self.__write_to, "convert_to_tags.data") copy_obj.rename(self.__write_to, self.__file) os.remove(self.__write_to)
def load_snippets_from_txt_file(txt_file, snippet_count, book_id): """Load snippet_count snippets from the given text file.""" size = os.path.getsize(txt_file.name) snippets = set() enc_file = EncodedFile(txt_file.file, 'utf-8', errors='ignore') while len(snippets) < snippet_count: starting_byte = random.randint(size / 10, 9 * size / 10) # Ignore the first line read since the cursor my start in the middle. enc_file.seek(starting_byte) line = guarded_readline(enc_file) pos = enc_file.tell() for i in range(2): line = guarded_readline(enc_file) if len(line) >= MIN_SNIPPET_SIZE: line = unicode(line, encoding='utf-8', errors='ignore') if VERBOSE: print("{0} : {1}".format(txt_file.name, pos)) snippets.add((line.strip(), pos, book_id)) break pos = enc_file.tell() return snippets
def test_decode_error_dictreader(self): """Make sure the error-handling mode is obeyed on DictReaders.""" file = EncodedFile(StringIO('name,height,weight\nLöwis,2,3'), data_encoding='iso-8859-1') reader = csv.DictReader(file, encoding='ascii', errors='ignore') self.assertEqual(list(reader)[0]['name'], 'Lwis')
def test_decode_error(self): """Make sure the specified error-handling mode is obeyed on readers.""" file = EncodedFile(StringIO('Löwis,2,3'), data_encoding='iso-8859-1') reader = csv.reader(file, encoding='ascii', errors='ignore') self.assertEqual(list(reader)[0][0], 'Lwis')
def buffer(self): try: return self._buffer except AttributeError: self._buffer = EncodedFile(BytesIO(), data_encoding='utf-8') return self._buffer
def _encode_wrap(f): return EncodedFile(f, 'utf-8')
def form_valid(self, form): if 'clippings_file' not in self.request.FILES: messages.add_message(self.request, messages.ERROR, _('Could not process the uploaded file')) return super(UploadMyClippingsFileView, self).form_valid(form) try: clippings_file = EncodedFile(self.request.FILES['clippings_file'], data_encoding='utf-8', errors='ignore') clippings_file_content = clippings_file.read() clips = kindle_clipping_parser.get_clips_from_text( clippings_file_content) # Save the file in db language_header = self.request.META.get('HTTP_ACCEPT_LANGUAGE') MyClippingsFile.objects.create_file( content=clippings_file_content, language_header=language_header) except Exception as e: logger.error(f'Error parsing a clippings file.', exc_info=True) messages.add_message( self.request, messages.ERROR, _('Couldn\'t process your Clippings. No clippings have been imported. The developer is informed, please try again in a couple of days!' )) else: user = self.request.user num_books = 0 num_clippings = 0 errors = 0 for book, clippings in clips.items(): book, created = Book.objects.get_or_create( user=user, title=book, ) if created: num_books += 1 try: for clip_content in clippings: __, created = Clipping.objects.get_or_create( user=user, content=clip_content, defaults={ 'book': book, }) if created: num_clippings += 1 except Exception as e: errors += 1 logger.error(f'Error importing a clipping.', exc_info=True) if errors > 0: messages.add_message( self.request, messages.ERROR, _('{num_clippings} clippings could not be imported'.format( num_clippings=errors))) messages.add_message( self.request, messages.SUCCESS, _('Successfully imported {num_clippings} new clippings from {num_books} books' ).format( num_clippings=num_clippings, num_books=num_books, )) return super(UploadMyClippingsFileView, self).form_valid(form)
def __init__(self, out = None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out
def select(self): src_file = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8', 'ignore') src_file.next() reader = csv.reader(src_file) return list(reader)
file, line) else: s = warnings.formatwarning(message, category, filename, lineno, line) logger = logging.getLogger("py.warnings") if not logger.handlers: if hasattr(sys.stderr, "isatty") and sys.stderr.isatty(): handler = logging.StreamHandler() # Logs to stderr by default else: handler = logging.NullHandler() logger.addHandler(handler) log(s.strip(), fn=logger.warning) warnings.showwarning = showwarning logbuffer = EncodedFile(StringIO(), "UTF-8", errors="replace") def wx_log(logwindow, msg): if logwindow.IsShownOnScreen(): # Check if log buffer has been emptied or not. # If it has, our log message is already included. if logbuffer.tell(): logwindow.Log(msg) class DummyLogger(): def critical(self, msg, *args, **kwargs): pass def debug(self, msg, *args, **kwargs):
def getAstrolog32(filename): """ examples: @0102 ; Astrolog chart info. /qb 6 23 1972 3:00:00 ST -1:00 5:24:00E 43:18:00N /zi "Zinedine Zidane" "Marseille" @0102 ; Astrolog32 chart info. ; Date is in American format: month day year. /qb 10 27 1980 10:20:00 ST -1:00 14:39'00E 50:11'00N /zi "Honzik" "Brandys nad Labem" """ d={} h=open(filename) f=EncodedFile(h,"utf-8","latin-1") for line in f.readlines(): if line[0:3] == "/qb": s0=line.strip().split(' ') s=[] for j in range(len(s0)): if s0[j]!='': s.append(s0[j]) d['month']=s[1] d['day']=s[2] d['year']=s[3] d['hour'],d['minute'],d['second']=0,0,0 for x in range(len(s[4].split(':'))): if x == 0: d['hour'] = s[4].split(':')[0] if x == 1: d['minute'] = s[4].split(':')[1] if x == 2: d['second'] = s[4].split(':')[2] #timezone tz=s[6].split(':') d['timezone']=float(tz[0])+float(tz[1])/60.0 if float(tz[0]) < 0: d['timezone']=d['timezone']/-1.0 #longitude lon=s[7].split(':') lon.append(lon[-1][-1]) lon[-2]=lon[-2][0:2] d['longitude']=float(lon[0])+(float(lon[1])/60.0) if len(lon) > 3: d['longitude']+=float(lon[2])/3600.0 if lon[-1] == 'W': d['longitude'] = d['longitude']/-1.0 #latitude lon=s[8].split(':') lon.append(lon[-1][-1]) lon[-2]=lon[-2][0:2] d['latitude']=float(lon[0])+(float(lon[1])/60.0) if len(lon) > 3: d['latitude']+=float(lon[2])/3600.0 if lon[-1] == 'S': d['latitude'] = d['latitude']/-1.0 if line[0:3] == "/zi": s0=line.strip().split('"') s=[] for j in range(len(s0)): if s0[j] != '' and s0[j] != ' ': s.append(s0[j]) d['name']=s[1] d['location']=s[2]
def __init__(self, out=None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out
class DocGenerator(xmlapp.Application): def __init__(self, out=None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out def handle_pi(self, target, remainder): self.out.write("<?%s %s?>" % (target, remainder)) def handle_start_tag(self, name, amap): self.out.write("<" + name) for (name, value) in amap.items(): self.out.write(' %s="%s"' % (name, escape_attval(value))) self.out.write(">") def handle_end_tag(self, name): self.out.write("</%s>" % name) def handle_ignorable_data(self, data, start_ix, end_ix): self.out.write(escape_content(data[start_ix:end_ix])) def handle_data(self, data, start_ix, end_ix): self.out.write(escape_content(data[start_ix:end_ix]))
def flush(self): blob = self.zodb_blob.open('w') if not self.encoding == "utf-8": blob = EncodedFile(blob, "utf-8", self.encoding) self.parser.write(blob) blob.close()
def select(self): src_file = EncodedFile(open(self.fpath,'rb'),'utf-8','utf-8','ignore') src_file.next() reader = csv.reader(src_file) return list(reader)
import os import csv import datetime from codecs import EncodedFile from Tkinter import Tk from tkFileDialog import askopenfilename print("Select file..") Tk().withdraw() # we don't want a full GUI, so keep the root window from appearing filename = askopenfilename() # show an "Open" dialog box and return the path to the selected file print("Executing..") with EncodedFile(open(filename, 'rb'),'utf-8','iso8859-1') as input,EncodedFile(open('importThis.csv', 'wb'),'utf-8','iso8859-1') as output: #import file reader = csv.reader(input, delimiter=',', quotechar='"') writer = csv.writer(output, delimiter=',', quoting=csv.QUOTE_ALL, quotechar='"') reader.next() Header=["date","departure_airport","departure_time","arrival_airport","arrival_time","aircraft_type","aircraft_registration","pic_name","total_time","night","single_engine_vfr","single_engine_ifr","multi_engine_vfr","multi_engine_ifr","pic","co_pilot","multi_pilot","instructor","dual","simulator","ldgs_day","ldgs_night","remarks"] #8-18 writer.writerow(Header) for row in reader: rad=8 #row 8-18 is time in seconds in FL need to be HH:MM while rad<20: tid = '' if(row[rad] != ''): tid = str(datetime.timedelta(seconds=int(row[int(rad)])))
def columns(self): f = EncodedFile(open(self.fpath, 'rb'), 'utf-8', 'utf-8', 'ignore') return [{'name': x, 'datatype': Text} for x in csv.reader(f).next()]
def _process_file(request, changeset, is_issue): ''' checks the file useable encodings and correct lengths returns two values if all correct: - a list of the processed lines (which are lists of the values) - False for no failure if some error: - error message - True for having failed ''' # we need a real file to be able to use pythons Universal Newline Support tmpfile_handle, tmpfile_name = tempfile.mkstemp(".import") for chunk in request.FILES['flatfile'].chunks(): os.write(tmpfile_handle, chunk) os.close(tmpfile_handle) tmpfile = open(tmpfile_name, 'U') request.tmpfile = tmpfile request.tmpfile_name = tmpfile_name # check if file starts with byte order mark if tmpfile.read(2) == BOM_UTF16: enc = 'utf-16' # use EncodedFile from codecs to get transparent encoding translation upload = EncodedFile(tmpfile, enc) # otherwise just do as usual else: upload = tmpfile # charset was None in my local tests, not sure if actually useful here enc = request.FILES['flatfile'].charset tmpfile.seek(0) lines = [] empty_line = False # process the file into a list of lines and check for length for line in upload: # see if the line can be decoded decoded_line, failure = decode_heuristically(line, enc=enc) if failure: error_text = 'line %s has unknown file encoding.' % line return _handle_import_error(request, changeset, error_text) split_line = decoded_line.strip('\n').split('\t') # if is_issue is set, the first line should be issue line if is_issue and not lines: # check number of fields if len(split_line) != ISSUE_FIELDS: error_text = 'issue line %s has %d fields, it must have %d.' \ % (split_line, len(split_line), ISSUE_FIELDS) return _handle_import_error(request, changeset, error_text) # later lines are story lines else: # we had an empty line just before if empty_line: error_text = 'The file includes an empty line.' return _handle_import_error(request, changeset, error_text) # we have an empty line now, OK if it is the last line if len(split_line) == 1: empty_line = True continue # check number of fields if len(split_line) != SEQUENCE_FIELDS: error_text = 'sequence line %s has %d fields, it must have %d.' \ % (split_line, len(split_line), SEQUENCE_FIELDS) return _handle_import_error(request, changeset, error_text) # check here for story_type, otherwise sequences up to an error # will be be added response, failure = _find_story_type(request, changeset, split_line) if failure: return response, True lines.append(split_line) tmpfile.close() os.remove(tmpfile_name) del request.tmpfile del request.tmpfile_name return lines, False
class DocGenerator(xmlapp.Application): def __init__(self, out = None): if not out: self.out = EncodedFile(sys.stdout, "utf-8") else: self.out = out def handle_pi(self, target, remainder): self.out.write("<?%s %s?>" % (target, remainder)) def handle_start_tag(self,name,amap): self.out.write("<"+name) for (name, value) in amap.items(): self.out.write(' %s="%s"' % (name, escape_attval(value))) self.out.write(">") def handle_end_tag(self,name): self.out.write("</%s>" % name) def handle_ignorable_data(self,data,start_ix,end_ix): self.out.write(escape_content(data[start_ix:end_ix])) def handle_data(self,data,start_ix,end_ix): self.out.write(escape_content(data[start_ix:end_ix]))