def bisect(self, f: io.TextIOWrapper, regex_src: str, dtarget: datetime) -> int: self.regex = re.compile(regex_src) self.dtarget = dtarget start = 0 f.seek(0, io.SEEK_END) end = f.tell() self.mid_bisect(f, start, end) self.searchback(f, dtarget) return f.tell()
def read_options(f: TextIOWrapper) -> Dict[str, str]: next_option_line = f.tell() options: Dict[str, str] = {} next_line = f.readline() while (next_line.startswith("#")): option_match = re.match("# (.*): (.*)", next_line) assert option_match key, value = option_match.group(1, 2) options[key] = value next_option_line = f.tell() next_line = f.readline() f.seek(next_option_line) return options
def test_get_file_size_text_file(): from io import TextIOWrapper test_inner_file = BytesIO() test_file = TextIOWrapper(test_inner_file, encoding="utf-8") test_file.write(u"\u0001F3A9 " * 123) test_file.seek(0) # read 9 *unicode chars* to advance fd to somewhere interesting test_file.read(9) previous_pos = test_file.tell() assert get_file_size(test_file) == 738 assert test_file.tell() == previous_pos
def validate_min_lines(f: io.TextIOWrapper, lines: int) -> bool: saved_pos = f.tell() f.seek(0, io.SEEK_END) end = f.tell() f.seek(0, io.SEEK_SET) for _ in itertools.repeat(None, lines): line = f.readline() if not line and f.tell() >= end: f.seek(saved_pos, io.SEEK_SET) return False f.seek(saved_pos, io.SEEK_SET) return True
def _parse_flag(self, line: str, pattern: str, file: TextIOWrapper, array: list): count = self._parse_int_flag(line, pattern, file) while count > 0: line = self._get_next_not_empty_line(file) if line is None: raise self._generate_syntax_error(pattern, file.tell()) line = line.strip() if not line.startswith(pattern): raise self._generate_syntax_error(pattern, file.tell()) parts = line.split(maxsplit=1) array.append(parts[1].strip()) count = count - 1 if count != 0: raise self._generate_syntax_error(pattern, file.tell())
def mid_bisect(self, f: io.TextIOWrapper, start: int, end: int) -> None: if f.tell() == self.prev_tell: f.readline() return self.prev_tell = f.tell() self.bisect_count += 1 mid = int((start + end) / 2) line = LogBisect.find_line(f, mid) match = self.regex.search(line) p = parse(match.group()) if self.dtarget < p: self.mid_bisect(f, start, mid) elif self.dtarget > p: self.mid_bisect(f, mid, end)
def _parse_affix_body(self, line: str, pattern: str, file: TextIOWrapper) -> tuple: parts = line.split(maxsplit=6) if parts[0] != pattern or len(parts) < 4: raise self._generate_syntax_error(pattern, file.tell()) option_name = parts[0] flag = parts[1] if self.flag.lower() == 'long' and len(flag) != 2 or self.flag.lower() != 'long' and len(flag) != 1: raise self._generate_syntax_error(pattern, file.tell()) stripping = parts[2] affix = parts[3] condition = parts[4] if len(parts) > 4 else None morphological_fields = parts[5].split() if len(parts) > 5 else [] return option_name, flag, stripping, affix, condition, morphological_fields
def download(self, outfile: io.TextIOWrapper) -> Result: if outfile.closed: result = Result(Status.FILE_CLOSED, outfile) self.emit('download', result) return result if outfile.mode.startswith('r'): self._req.add_header('Range', 'bytes=%i-' % length(outfile, restore=False)) elif outfile.mode.startswith('a'): self._req.add_header('Range', 'bytes=%i-' % outfile.tell()) if self._conn is None or self.__conn.closed: st = self.connect() if not st: outfile.close() return st else: self.__conn = st.value bytes_written = 0 self.emit("download", Result(Status.OK, outfile)) for chunk in self: self.emit('data', chunk) outfile.write(chunk) bytes_written += len(chunk) if bytes_written >= int(self.__conn.headers['Content-Length']): result = Result(Status.OK, outfile) else: result = Result( Status.DISCONNECTED, outfile, "The connection was aborted," + "check your internet connection") self.emit('end', result) return result
def decode_bytes_from_file(the_file: TextIOWrapper, search_variable_name: str): search_variable_name = search_variable_name.strip() search_var_name = re.match(r'^(.*?)(?:_base(\d\d))?$', search_variable_name) var_base_name = str(search_var_name[1]) encode_bases = [str(search_var_name[2]) ] if search_var_name.lastindex > 1 else ('64', '85', '32', '16') saved_file_position = 0 if the_file.seekable(): saved_file_position = the_file.tell() the_file.seek(0) file_content = the_file.read() if the_file.seekable(): the_file.seek(saved_file_position) for enc in encode_bases: reg_exp = var_base_name + "_base" + str( enc) + r"\s*=\s*[a-zA-Z]{0,2}'''(.*?)'''" var_found = re.match(reg_exp, file_content, re.DOTALL) if var_found: if hasattr(base64, 'b' + enc + 'decode'): decoded = getattr(base64, 'b' + enc + 'decode')(var_found[1]) return var_base_name, bytes(decoded) return None, f'Variable found with unsupported encoding: base{enc}' return None, 'Variable not found'
def struncate(file: TextIOWrapper, amount: int): """ Truncate the first n bytes from the beginning of file :param file :param amount: amount of bytes to remove from start :type file: TextIOWrapper :type amount: int """ #Get file size file.seek(0, 2) file_size = file.tell() #Go to the beginning of file file_offset = amount file.seek(0, 0) bytes_to_write = file_size - amount bytes_written = 0 while bytes_written < bytes_to_write: #Move to offset + bytes_written file.seek(file_offset + bytes_written, 0) #Get bytes to rewrite block_size = 1024 if bytes_to_write - bytes_written < block_size: block_size = bytes_to_write - bytes_written #Read block block_data = file.read(block_size) #Move to the beginning of file + bytes_written file.seek(bytes_written, 0) #Write block bytes_written += file.write(block_data) #Then truncate file.flush() #Flush write first file.seek(bytes_written) file.truncate()
def readback(f: io.TextIOWrapper) -> None: pos = f.tell() while pos >= 0: f.seek(pos) if f.read(1) == "\n": break pos -= 2
def boomerang_stream(stream: TextIOWrapper) -> TextIOWrapper: """ Yield a stream that goes back to the original offset after exiting the "with" context :param stream: The stream """ current_offset = stream.tell() yield stream stream.seek(current_offset)
def searchback(self, f: io.TextIOWrapper, dtarget: datetime): linetime = dtarget while linetime == dtarget: LogBisect.readback(f) saved = f.tell() match = self.regex.search(f.readline()) linetime = parse(match.group()) f.seek(saved, io.SEEK_SET)
def _parse_affix_header(self, line: str, pattern: str, file: TextIOWrapper) -> tuple: parts = line.split() if parts[0] != pattern or len(parts) != 4: raise self._generate_syntax_error(pattern, file.tell()) option_name = parts[0] flag = parts[1] if parts[2] == 'Y': cross_product = True elif parts[2] == 'N': cross_product = False else: raise self._generate_syntax_error(pattern, file.tell()) if not parts[3].isdigit(): raise self._generate_syntax_error(pattern, file.tell()) number = int(parts[3]) return option_name, flag, cross_product, number
def matchCatcher(self, fileHandle: TextIOWrapper): try: pos = fileHandle.tell() text = fileHandle.readline() matcher = self.pattern.match(text) if matcher is None: return False return True finally: if self.catched: fileHandle.seek(pos)
def _read_lines(self, f: io.TextIOWrapper) -> None: while True: self._state.offset = f.tell() line = f.readline() if line: line = line.strip() self._handle_string(line) else: self._stop_if_file_was_deleted_or_recreated() self._sleep_and_maybe_stop() f.seek(self._state.offset)
def adjust_file_block_pos( f: io.TextIOWrapper, start_pos: int, end_pos: int, split_symbol: str = '\n', adjust_direct: str = 'forward' ): """ adjust_direct: 'forward' or 'backward' """ # TODO if start_pos != 0: f.seek(start_pos - 1) if f.read(1) != '\n': line = f.readline() start_pos = f.tell() f.seek(start_pos) while (start_pos <= end_pos): line = f.readline() start_pos = f.tell()
def _parse_affix_flag(self, line: str, pattern: str, file: TextIOWrapper): pattern, flag, cross_product, count = self._parse_affix_header(line, pattern, file) afx = Afx() afx.type = pattern afx.cross_product = cross_product while count > 0: line = self._get_next_not_empty_line(file) if line is None: raise self._generate_syntax_error(pattern, file.tell()) line = line.strip() pattern, flag2, stripping, affix, condition, morphological_fields \ = self._parse_affix_body(line, pattern, file) if flag != flag2 or afx.type != pattern: raise self._generate_syntax_error(pattern, file.tell()) afx.rules.append(Rule(stripping, affix, condition, morphological_fields)) count = count - 1 if count != 0: raise self._generate_syntax_error(pattern, file.tell()) self.afx[flag] = afx
def _read_lines(self, log_file: io.TextIOWrapper, limit: int) -> str: # pylint: disable = no-self-use all_lines = [] while len(all_lines) < limit: last_position = log_file.tell() next_line = log_file.readline() if not next_line: break if not next_line.endswith("\n"): log_file.seek(last_position) break all_lines.append(next_line) return all_lines
class MultiPageTextImporter: def __init__(self, mainControl): """ mainControl -- Currently PersonalWikiFrame object """ self.mainControl = mainControl def getImportTypes(self, guiparent): """ Return sequence of tuples with the description of import types provided by this object. A tuple has the form (<imp. type>, <human readable description>, <panel for add. options or None>) If panels for additional options must be created, they should use guiparent as parent """ if guiparent: res = wx.xrc.XmlResource.Get() mptPanel = res.LoadPanel(guiparent, "ImportSubMultipageText") # ctrls = XrcControls(htmlPanel) # config = self.mainControl.getConfig() # # ctrls.cbPicsAsLinks.SetValue(config.getboolean("main", # "html_export_pics_as_links")) # ctrls.chTableOfContents.SetSelection(config.getint("main", # "export_table_of_contents")) # ctrls.tfHtmlTocTitle.SetValue(config.get("main", # "html_toc_title")) else: mptPanel = None return ( ("multipage_text", _("Multipage text"), mptPanel), ) def getImportSourceWildcards(self, importType): """ If an export type is intended to go to a file, this function returns a (possibly empty) sequence of tuples (wildcard description, wildcard filepattern). If an export type goes to a directory, None is returned """ if importType == "multipage_text": return ((_("Multipage files (*.mpt)"), "*.mpt"), (_("Text file (*.txt)"), "*.txt")) return None def getAddOptVersion(self): """ Returns the version of the additional options information returned by getAddOpt(). If the return value is -1, the version info can't be stored between application sessions. Otherwise, the addopt information can be stored between sessions and can later handled back to the doImport method of the object without previously showing the import dialog. """ return 0 def getAddOpt(self, addoptpanel): """ Reads additional options from panel addoptpanel. If getAddOptVersion() > -1, the return value must be a sequence of simple string, unicode and/or numeric objects. Otherwise, any object can be returned (normally the addoptpanel itself) """ if addoptpanel is None: return (0,) else: ctrls = XrcControls(addoptpanel) showImportTableAlways = boolToInt(ctrls.cbShowImportTableAlways.GetValue()) return (showImportTableAlways,) def _collectContent(self): """ Collect lines from current position of importFile up to separator or file end collect all lines and return them as list of lines. """ content = [] while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page # content = u"".join(content) break if line == self.separator: if len(content) > 0: # Iff last line of mpt page is empty, the original # page ended with a newline, so remove last # character (=newline) content[-1] = content[-1][:-1] # content = u"".join(content) break content.append(line) return "".join(content) def _skipContent(self): """ Skip content until reaching next separator or end of file """ while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page break if line == self.separator: break def doImport(self, wikiDocument, importType, importSrc, compatFilenames, addOpt, importData=None): """ Run import operation. wikiDocument -- WikiDocument object importType -- string tag to identify how to import importSrc -- Path to source directory or file to import from compatFilenames -- Should the filenames be decoded from the lowest level compatible? addOpt -- additional options returned by getAddOpt() importData -- if not None contains data to import as bytestring. importSrc is ignored in this case. Needed for trashcan. returns True if import was done (needed for trashcan) """ if importData is not None: self.rawImportFile = BytesIO(importData) # TODO bytes or string??? else: try: self.rawImportFile = open(pathEnc(importSrc), "rb") except IOError: raise ImportException(_("Opening import file failed")) self.wikiDocument = wikiDocument self.tempDb = None showImportTableAlways = addOpt[0] # wikiData = self.wikiDocument.getWikiData() # TODO Do not stop on each import error, instead create error list and # continue try: try: # Wrap input file to convert format bom = self.rawImportFile.read(len(BOM_UTF8)) if bom != BOM_UTF8: self.rawImportFile.seek(0) self.importFile = TextIOWrapper(self.rawImportFile, MBCS_ENCODING, "replace") else: self.importFile = TextIOWrapper(self.rawImportFile, "utf-8", "replace") line = self.importFile.readline() if line.startswith("#!"): # Skip initial line with #! to allow execution as shell script line = self.importFile.readline() if not line.startswith("Multipage text format "): raise ImportException( _("Bad file format, header not detected")) # Following in the format identifier line is a version number # of the file format self.formatVer = int(line[22:-1]) if self.formatVer > 1: raise ImportException( _("File format number %i is not supported") % self.formatVer) # Next is the separator line line = self.importFile.readline() if not line.startswith("Separator: "): raise ImportException( _("Bad file format, header not detected")) self.separator = line[11:] startPos = self.importFile.tell() if self.formatVer == 0: self._doImportVer0() elif self.formatVer == 1: # Create temporary database. It is mainly filled during # pass 1 to check for validity and other things before # actual importing in pass 2 # TODO Respect settings for general temp location!!! self.tempDb = ConnectWrapSyncCommit(sqlite3.connect("")) try: # TODO: Remove column "collisionWithPresent", seems to be unused self.tempDb.execSql("create table entries(" "unifName text primary key not null, " # Unified name in import file "seen integer not null default 0, " # data really exists "dontImport integer not null default 0, " # don't import this (set between pass 1 and 2) "missingDep integer not null default 0, " # missing dependency(ies) "importVersionData integer not null default 0, " # versioning data present # "neededBy text default ''," # "versionContentDifferencing text default ''," "collisionWithPresent text not null default ''," # Unif. name of present entry which collides with imported one (if any) "renameImportTo text not null default ''," # Rename imported element to (if at all) "renamePresentTo text not null default ''" # Rename present element in database to (if at all) ");" ) # Dependencies. If unifName isn't imported (or faulty), neededBy shouldn't be either self.tempDb.execSql("create table depgraph(" "unifName text not null default ''," "neededBy text not null default ''," "constraint depgraphpk primary key (unifName, neededBy)" ");" ) # Recursive processing is not supported for this table self.tempDb.execSql("create table renamegraph(" "unifName text not null default ''," "dependent text not null default ''," "constraint renamegraphpk primary key (unifName, dependent)," "constraint renamegraphsingledep unique (dependent)" ");" ) # Collect some initial information into the temporary database self._doImportVer1Pass1() # Draw some logical conclusions on the temp db self._markMissingDependencies() self._markHasVersionData() self._markCollision() # Now ask user if necessary if showImportTableAlways or self._isUserNeeded(): if not self._doUserDecision(): # Canceled by user return False # Further logical processing after possible user editing self._markNonImportedVersionsData() self._markNonImportedDependencies() self._propagateRenames() # TODO: Remove version data without ver. overview or main data # Back to start of import file and import according to settings # in temp db self.importFile.seek(startPos) self._doImportVer1Pass2() return True finally: self.tempDb.close() self.tempDb = None except ImportException: raise except Exception as e: traceback.print_exc() raise ImportException(str(e)) finally: self.importFile.close() def _markMissingDependencies(self): """ If a datablock wasn't present, all dependent data blocks are marked as not to import """ while True: self.tempDb.execSql(""" update entries set missingDep=1, dontImport=1 where (not missingDep) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where (not entries.seen) or entries.missingDep); """) if self.tempDb.rowcount == 0: break def _markHasVersionData(self): """ Mark if version data present """ self.tempDb.execSql(""" update entries set importVersionData=1 where (not importVersionData) and unifName in (select substr(unifName, 21) from entries where unifName glob 'versioning/overview/*' and not dontImport) """) # TODO Take missing deps into account here? # self.tempDb.execSql("insert or replace into entries(unifName, importVersionData) " # "values (?, 1)", (depunifName,)) def _markCollision(self): """ Mark collisions between existing and data blocks and such to import """ # First find collisions with wiki words for wikipageUnifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where unifName glob 'wikipage/*' " "and not dontImport"): wpName = wikipageUnifName[9:] if not self.wikiDocument.isDefinedWikiPageName(wpName): continue self.tempDb.execSql("update entries set collisionWithPresent = ? " "where unifName = ?", (wikipageUnifName, wikipageUnifName)) # (u"wikipage/" + collisionWithPresent, wikipageUnifName)) # Then find other collisions (saved searches etc.) for unifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where (unifName glob 'savedsearch/*' " "or unifName glob 'savedpagesearch/*') and not dontImport"): if self.wikiDocument.hasDataBlock(unifName): self.tempDb.execSql("update entries set collisionWithPresent = ? " "where unifName = ?", (unifName, unifName)) def _markNonImportedVersionsData(self): """ After user dialog: If importVersionData is false for some entries the depending version data shouldn't be imported. Only the versioning overview is marked for not importing. The next step propagates this to the other data blocks """ self.tempDb.execSql(""" update entries set dontImport = 1 where unifName in (select 'versioning/overview/' || unifName from entries where not importVersionData) """) # # Vice versa the importVersionData column must be updated if # self.tempDb.execSql(""" # update entries set importVersionData = 0 where importVersionData # and ('versioning/overview/' || unifName) in (select unifName # from entries where dontImport) # """) def _markNonImportedDependencies(self): """ After user dialog: If some data blocks where chosen not to import mark all dependent blocks to not import also (especially version data) """ while True: self.tempDb.execSql(""" update entries set dontImport=1 where (not dontImport) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where entries.dontImport); """) if self.tempDb.rowcount == 0: break def _propagateRenames(self): """ Write rename commands for imported items to all parts to import if some parts need renaming. Renaming of present items is not propagated. """ for unifName, renImportTo in self.tempDb.execSqlQuery( "select unifName, renameImportTo from entries " "where renameImportTo != '' and not dontImport"): for depUnifName in self.tempDb.execSqlQuerySingleColumn( "select dependent from renamegraph where unifName = ? and " "dependent in (select unifName from entries where " "not dontImport)", (unifName,)): if depUnifName.endswith(unifName): newName = depUnifName[:-len(unifName)] + renImportTo self.tempDb.execSql(""" update entries set renameImportTo=? where unifName = ? """, (newName, depUnifName)) def _doUserDecision(self): """ Called to present GUI to user for deciding what to do. This method is overwritten for trashcan GUI. Returns False if user canceled operation """ return MultiPageTextImporterDialog.runModal( self.mainControl, self.tempDb, self.mainControl) def _isUserNeeded(self): """ Decide if a dialog must be shown to ask user how to proceed. Under some circumstances the dialog may be shown regardless of the result. """ if self.tempDb.execSqlQuerySingleItem("select missingDep from entries " "where missingDep limit 1", default=False): # Missing dependency return True if len(self.tempDb.execSqlQuerySingleItem("select collisionWithPresent " "from entries where collisionWithPresent != '' limit 1", default="")) > 0: # Name collision return True # No problems found return False def _doImportVer0(self): """ Import wikiwords if format version is 0. """ langHelper = wx.GetApp().createWikiLanguageHelper( self.wikiDocument.getWikiDefaultWikiLanguage()) while True: # Read next wikiword line = self.importFile.readline() if line == "": break wikiWord = line[:-1] errMsg = langHelper.checkForInvalidWikiWord(wikiWord, self.wikiDocument) if errMsg: raise ImportException(_("Bad wiki word: %s, %s") % (wikiWord, errMsg)) content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) page.replaceLiveText(content) def _doImportVer1Pass1(self): while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] if tag.startswith("funcpage/"): self._skipContent() elif tag.startswith("savedsearch/"): self._skipContent() elif tag.startswith("savedpagesearch/"): self._skipContent() elif tag.startswith("wikipage/"): self._skipContent() elif tag.startswith("versioning/overview/"): self._doImportItemVersioningOverviewVer1Pass1(tag[20:]) elif tag.startswith("versioning/packet/versionNo/"): self._skipContent() else: # Unknown tag -> Ignore until separator self._skipContent() continue self.tempDb.execSql("insert or replace into entries(unifName, seen) " "values (?, 1)", (tag,)) def _readHintedDatablockVer1(self): """ Reads datablock and preprocesses encoding if necessary. Returns either (hintStrings, content) or (None, None) if either an unknown important hint was found or if encoding had an error. hintStrings is a list of hints (as unistrings) which were not processed by the function (therefore encoding hint is removed). content can be a bytestring or a unistring. If (None, None) is returned, the remaining content of the entry was skipped already by the function. """ hintLine = self.importFile.readline()[:-1] hintStrings = hintLine.split(" ") resultHintStrings = [] # Set default useB64 = False # Process hints for hint in hintStrings: if hint.startswith("important/encoding/"): if hint[19:] == "text": useB64 = False elif hint[19:] == "base64": useB64 = True else: # Unknown encoding: don't read further self._skipContent() return None, None elif hint.startswith("important/"): # There is something important we do not understand self._skipContent() return None, None else: resultHintStrings.append(hint) content = self._collectContent() if useB64: try: content = base64BlockDecode(content) except TypeError: # base64 decoding failed self._skipContent() return None, None return (resultHintStrings, content) def _doImportItemVersioningOverviewVer1Pass1(self, subtag): hintStrings, content = self._readHintedDatablockVer1() if content is None: return # Always encode to UTF-8 no matter what the import file encoding is content = content.encode("utf-8") try: ovw = Versioning.VersionOverview(self.wikiDocument, unifiedBasePageName=subtag) ovw.readOverviewFromBytes(content) ovwUnifName = ovw.getUnifiedName() self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (subtag, ovwUnifName)) self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, ovwUnifName)) for depUnifName in ovw.getDependentDataBlocks(omitSelf=True): # Mutual dependency between version overview and each version packet self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (depUnifName, ovwUnifName)) self.tempDb.execSql("insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (ovwUnifName, depUnifName)) self.tempDb.execSql("insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, depUnifName)) # self.tempDb.execSql("insert or replace into entries(unifName, needed) " # "values (?, 1)", (depUnifName,)) except VersioningException: return def _doImportVer1Pass2(self): wikiDoc = self.wikiDocument # We have to rename present items # First wikipages because this automatically renames depending version data for pageFrom, pageTo in self.tempDb.execSqlQuery( """ select substr(unifName, 10), substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' """): if wikiDoc.isDefinedWikiPageName(pageFrom): wikiDoc.renameWikiWords({pageFrom: pageTo}, Consts.ModifyText.off) # TODO How to handle rename of home page? # Then remaining data blocks for oldUnifName, newUnifName in self.tempDb.execSqlQuery( """ select unifName, renamePresentTo from entries where unifName not glob 'wikipage/*' and renamePresentTo != '' """): wikiDoc.renameDataBlock(oldUnifName, newUnifName) # For wiki pages with versions to import, existing versions must be # deleted for wikiWord in self.tempDb.execSqlQuerySingleColumn( """ select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renameImportTo == '' and not dontImport and importVersionData union select substr(renameImportTo, 10) from entries where unifName glob 'wikipage/*' and renameImportTo glob 'wikipage/*' and not dontImport and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.delete() while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] # Remove line end try: dontImport, renameImportTo = \ self.tempDb.execSqlQuery( "select dontImport, renameImportTo from " "entries where unifName = ?", (tag,))[0] except IndexError: # Maybe dangerous traceback.print_exc() self._skipContent() continue if dontImport: self._skipContent() continue if renameImportTo == "": renameImportTo = tag if tag.startswith("wikipage/"): self._importItemWikiPageVer1Pass2(renameImportTo[9:]) elif tag.startswith("funcpage/"): self._importItemFuncPageVer1Pass2(tag[9:]) elif tag.startswith("savedsearch/"): self._importB64DatablockVer1Pass2(renameImportTo) elif tag.startswith("savedpagesearch/"): self._importHintedDatablockVer1Pass2(renameImportTo) elif tag.startswith("versioning/"): self._importHintedDatablockVer1Pass2(renameImportTo) else: # Unknown tag -> Ignore until separator self._skipContent() for wikiWord in self.tempDb.execSqlQuerySingleColumn( """ select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo == '' and importVersionData union select substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.readOverview() def _importItemWikiPageVer1Pass2(self, wikiWord): timeStampLine = self.importFile.readline()[:-1] timeStrings = timeStampLine.split(" ") if len(timeStrings) < 3: traceback.print_exc() self._skipContent() return # TODO Report error timeStrings = timeStrings[:3] try: timeStrings = [str(ts) for ts in timeStrings] except UnicodeEncodeError: traceback.print_exc() self._skipContent() return # TODO Report error try: timeStamps = [timegm(time.strptime(ts, "%Y-%m-%d/%H:%M:%S")) for ts in timeStrings] except (ValueError, OverflowError): traceback.print_exc() self._skipContent() return # TODO Report error content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) # TODO How to handle versions here? page.replaceLiveText(content) if page.getTxtEditor() is not None: page.writeToDatabase() page.setTimestamps(timeStamps) def _importItemFuncPageVer1Pass2(self, subtag): # The subtag is functional page tag try: # subtag is unicode but func tags are bytestrings subtag = str(subtag) except UnicodeEncodeError: self._skipContent() return content = self._collectContent() try: page = self.wikiDocument.getFuncPage(subtag) page.replaceLiveText(content) except BadFuncPageTagException: # This function tag is bad or unknown -> ignore return # TODO Report error def _importB64DatablockVer1Pass2(self, unifName): # Content is base64 encoded b64Content = self._collectContent() try: datablock = base64BlockDecode(b64Content) self.wikiDocument.getWikiData().storeDataBlock(unifName, datablock, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: # base64 decoding failed return # TODO Report error def _importTextDatablockVer1Pass2(self, unifName): content = self._collectContent() try: self.wikiDocument.getWikiData().storeDataBlock(unifName, content, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: return # TODO Report error def _importHintedDatablockVer1Pass2(self, unifName): """ A hinted datablock starts with an extra line defining encoding (text or B64) and storage hint. It was introduced later therefore only versioning packets use this while saved searches don't. """ hintStrings, content = self._readHintedDatablockVer1() if hintStrings is None: return # Set defaults storeHint = Consts.DATABLOCK_STOREHINT_INTERN # Process hints for hint in hintStrings: if hint.startswith("storeHint/"): if hint[10:] == "extern": storeHint = Consts.DATABLOCK_STOREHINT_EXTERN elif hint[10:] == "intern": storeHint = Consts.DATABLOCK_STOREHINT_INTERN # No else. It is not vital to get the right storage hint try: if isinstance(content, str): content = BOM_UTF8 + content.encode("utf-8") self.wikiDocument.getWikiData().storeDataBlock(unifName, content, storeHint=storeHint) except TypeError: traceback.print_exc() return # TODO Report error
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files. .. seealso:: :func:`gevent.os.make_nonblocking` """ default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file will be put in non-blocking mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False assert len(mode) == 1, 'mode can only be [rb, rU, wb]' self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
def _parse_int_flag(self, line: str, pattern, file: TextIOWrapper): parts = line.split(maxsplit=1) if not parts[1].isdigit(): raise self._generate_syntax_error(pattern, file.tell()) return int(parts[1].strip())
class MultiPageTextImporter: def __init__(self, mainControl): """ mainControl -- Currently PersonalWikiFrame object """ self.mainControl = mainControl def getImportTypes(self, guiparent): """ Return sequence of tuples with the description of import types provided by this object. A tuple has the form (<imp. type>, <human readable description>, <panel for add. options or None>) If panels for additional options must be created, they should use guiparent as parent """ if guiparent: res = wx.xrc.XmlResource.Get() mptPanel = res.LoadPanel(guiparent, "ImportSubMultipageText") # ctrls = XrcControls(htmlPanel) # config = self.mainControl.getConfig() # # ctrls.cbPicsAsLinks.SetValue(config.getboolean("main", # "html_export_pics_as_links")) # ctrls.chTableOfContents.SetSelection(config.getint("main", # "export_table_of_contents")) # ctrls.tfHtmlTocTitle.SetValue(config.get("main", # "html_toc_title")) else: mptPanel = None return (("multipage_text", _("Multipage text"), mptPanel), ) def getImportSourceWildcards(self, importType): """ If an export type is intended to go to a file, this function returns a (possibly empty) sequence of tuples (wildcard description, wildcard filepattern). If an export type goes to a directory, None is returned """ if importType == "multipage_text": return ((_("Multipage files (*.mpt)"), "*.mpt"), (_("Text file (*.txt)"), "*.txt")) return None def getAddOptVersion(self): """ Returns the version of the additional options information returned by getAddOpt(). If the return value is -1, the version info can't be stored between application sessions. Otherwise, the addopt information can be stored between sessions and can later handled back to the doImport method of the object without previously showing the import dialog. """ return 0 def getAddOpt(self, addoptpanel): """ Reads additional options from panel addoptpanel. If getAddOptVersion() > -1, the return value must be a sequence of simple string, unicode and/or numeric objects. Otherwise, any object can be returned (normally the addoptpanel itself) """ if addoptpanel is None: return (0, ) else: ctrls = XrcControls(addoptpanel) showImportTableAlways = boolToInt( ctrls.cbShowImportTableAlways.GetValue()) return (showImportTableAlways, ) def _collectContent(self): """ Collect lines from current position of importFile up to separator or file end collect all lines and return them as list of lines. """ content = [] while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page # content = u"".join(content) break if line == self.separator: if len(content) > 0: # Iff last line of mpt page is empty, the original # page ended with a newline, so remove last # character (=newline) content[-1] = content[-1][:-1] # content = u"".join(content) break content.append(line) return "".join(content) def _skipContent(self): """ Skip content until reaching next separator or end of file """ while True: # Read lines of wikiword line = self.importFile.readline() if line == "": # The last page in mpt file without separator # ends as the real wiki page break if line == self.separator: break def doImport(self, wikiDocument, importType, importSrc, compatFilenames, addOpt, importData=None): """ Run import operation. wikiDocument -- WikiDocument object importType -- string tag to identify how to import importSrc -- Path to source directory or file to import from compatFilenames -- Should the filenames be decoded from the lowest level compatible? addOpt -- additional options returned by getAddOpt() importData -- if not None contains data to import as bytestring. importSrc is ignored in this case. Needed for trashcan. returns True if import was done (needed for trashcan) """ if importData is not None: self.rawImportFile = BytesIO(importData) # TODO bytes or string??? else: try: self.rawImportFile = open(pathEnc(importSrc), "rb") except IOError: raise ImportException(_("Opening import file failed")) self.wikiDocument = wikiDocument self.tempDb = None showImportTableAlways = addOpt[0] # wikiData = self.wikiDocument.getWikiData() # TODO Do not stop on each import error, instead create error list and # continue try: try: # Wrap input file to convert format bom = self.rawImportFile.read(len(BOM_UTF8)) if bom != BOM_UTF8: self.rawImportFile.seek(0) self.importFile = TextIOWrapper(self.rawImportFile, MBCS_ENCODING, "replace") else: self.importFile = TextIOWrapper(self.rawImportFile, "utf-8", "replace") line = self.importFile.readline() if line.startswith("#!"): # Skip initial line with #! to allow execution as shell script line = self.importFile.readline() if not line.startswith("Multipage text format "): raise ImportException( _("Bad file format, header not detected")) # Following in the format identifier line is a version number # of the file format self.formatVer = int(line[22:-1]) if self.formatVer > 1: raise ImportException( _("File format number %i is not supported") % self.formatVer) # Next is the separator line line = self.importFile.readline() if not line.startswith("Separator: "): raise ImportException( _("Bad file format, header not detected")) self.separator = line[11:] startPos = self.importFile.tell() if self.formatVer == 0: self._doImportVer0() elif self.formatVer == 1: # Create temporary database. It is mainly filled during # pass 1 to check for validity and other things before # actual importing in pass 2 # TODO Respect settings for general temp location!!! self.tempDb = ConnectWrapSyncCommit(sqlite3.connect("")) try: # TODO: Remove column "collisionWithPresent", seems to be unused self.tempDb.execSql( "create table entries(" "unifName text primary key not null, " # Unified name in import file "seen integer not null default 0, " # data really exists "dontImport integer not null default 0, " # don't import this (set between pass 1 and 2) "missingDep integer not null default 0, " # missing dependency(ies) "importVersionData integer not null default 0, " # versioning data present # "neededBy text default ''," # "versionContentDifferencing text default ''," "collisionWithPresent text not null default ''," # Unif. name of present entry which collides with imported one (if any) "renameImportTo text not null default ''," # Rename imported element to (if at all) "renamePresentTo text not null default ''" # Rename present element in database to (if at all) ");") # Dependencies. If unifName isn't imported (or faulty), neededBy shouldn't be either self.tempDb.execSql( "create table depgraph(" "unifName text not null default ''," "neededBy text not null default ''," "constraint depgraphpk primary key (unifName, neededBy)" ");") # Recursive processing is not supported for this table self.tempDb.execSql( "create table renamegraph(" "unifName text not null default ''," "dependent text not null default ''," "constraint renamegraphpk primary key (unifName, dependent)," "constraint renamegraphsingledep unique (dependent)" ");") # Collect some initial information into the temporary database self._doImportVer1Pass1() # Draw some logical conclusions on the temp db self._markMissingDependencies() self._markHasVersionData() self._markCollision() # Now ask user if necessary if showImportTableAlways or self._isUserNeeded(): if not self._doUserDecision(): # Canceled by user return False # Further logical processing after possible user editing self._markNonImportedVersionsData() self._markNonImportedDependencies() self._propagateRenames() # TODO: Remove version data without ver. overview or main data # Back to start of import file and import according to settings # in temp db self.importFile.seek(startPos) self._doImportVer1Pass2() return True finally: self.tempDb.close() self.tempDb = None except ImportException: raise except Exception as e: traceback.print_exc() raise ImportException(str(e)) finally: self.importFile.close() def _markMissingDependencies(self): """ If a datablock wasn't present, all dependent data blocks are marked as not to import """ while True: self.tempDb.execSql(""" update entries set missingDep=1, dontImport=1 where (not missingDep) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where (not entries.seen) or entries.missingDep); """) if self.tempDb.rowcount == 0: break def _markHasVersionData(self): """ Mark if version data present """ self.tempDb.execSql(""" update entries set importVersionData=1 where (not importVersionData) and unifName in (select substr(unifName, 21) from entries where unifName glob 'versioning/overview/*' and not dontImport) """) # TODO Take missing deps into account here? # self.tempDb.execSql("insert or replace into entries(unifName, importVersionData) " # "values (?, 1)", (depunifName,)) def _markCollision(self): """ Mark collisions between existing and data blocks and such to import """ # First find collisions with wiki words for wikipageUnifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where unifName glob 'wikipage/*' " "and not dontImport"): wpName = wikipageUnifName[9:] if not self.wikiDocument.isDefinedWikiPageName(wpName): continue self.tempDb.execSql( "update entries set collisionWithPresent = ? " "where unifName = ?", (wikipageUnifName, wikipageUnifName)) # (u"wikipage/" + collisionWithPresent, wikipageUnifName)) # Then find other collisions (saved searches etc.) for unifName in self.tempDb.execSqlQuerySingleColumn( "select unifName from entries where (unifName glob 'savedsearch/*' " "or unifName glob 'savedpagesearch/*') and not dontImport"): if self.wikiDocument.hasDataBlock(unifName): self.tempDb.execSql( "update entries set collisionWithPresent = ? " "where unifName = ?", (unifName, unifName)) def _markNonImportedVersionsData(self): """ After user dialog: If importVersionData is false for some entries the depending version data shouldn't be imported. Only the versioning overview is marked for not importing. The next step propagates this to the other data blocks """ self.tempDb.execSql(""" update entries set dontImport = 1 where unifName in (select 'versioning/overview/' || unifName from entries where not importVersionData) """) # # Vice versa the importVersionData column must be updated if # self.tempDb.execSql(""" # update entries set importVersionData = 0 where importVersionData # and ('versioning/overview/' || unifName) in (select unifName # from entries where dontImport) # """) def _markNonImportedDependencies(self): """ After user dialog: If some data blocks where chosen not to import mark all dependent blocks to not import also (especially version data) """ while True: self.tempDb.execSql(""" update entries set dontImport=1 where (not dontImport) and unifName in (select depgraph.neededBy from depgraph inner join entries on depgraph.unifName = entries.unifName where entries.dontImport); """) if self.tempDb.rowcount == 0: break def _propagateRenames(self): """ Write rename commands for imported items to all parts to import if some parts need renaming. Renaming of present items is not propagated. """ for unifName, renImportTo in self.tempDb.execSqlQuery( "select unifName, renameImportTo from entries " "where renameImportTo != '' and not dontImport"): for depUnifName in self.tempDb.execSqlQuerySingleColumn( "select dependent from renamegraph where unifName = ? and " "dependent in (select unifName from entries where " "not dontImport)", (unifName, )): if depUnifName.endswith(unifName): newName = depUnifName[:-len(unifName)] + renImportTo self.tempDb.execSql( """ update entries set renameImportTo=? where unifName = ? """, (newName, depUnifName)) def _doUserDecision(self): """ Called to present GUI to user for deciding what to do. This method is overwritten for trashcan GUI. Returns False if user canceled operation """ return MultiPageTextImporterDialog.runModal(self.mainControl, self.tempDb, self.mainControl) def _isUserNeeded(self): """ Decide if a dialog must be shown to ask user how to proceed. Under some circumstances the dialog may be shown regardless of the result. """ if self.tempDb.execSqlQuerySingleItem( "select missingDep from entries " "where missingDep limit 1", default=False): # Missing dependency return True if len( self.tempDb.execSqlQuerySingleItem( "select collisionWithPresent " "from entries where collisionWithPresent != '' limit 1", default="")) > 0: # Name collision return True # No problems found return False def _doImportVer0(self): """ Import wikiwords if format version is 0. """ langHelper = wx.GetApp().createWikiLanguageHelper( self.wikiDocument.getWikiDefaultWikiLanguage()) while True: # Read next wikiword line = self.importFile.readline() if line == "": break wikiWord = line[:-1] errMsg = langHelper.checkForInvalidWikiWord( wikiWord, self.wikiDocument) if errMsg: raise ImportException( _("Bad wiki word: %s, %s") % (wikiWord, errMsg)) content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) page.replaceLiveText(content) def _doImportVer1Pass1(self): while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] if tag.startswith("funcpage/"): self._skipContent() elif tag.startswith("savedsearch/"): self._skipContent() elif tag.startswith("savedpagesearch/"): self._skipContent() elif tag.startswith("wikipage/"): self._skipContent() elif tag.startswith("versioning/overview/"): self._doImportItemVersioningOverviewVer1Pass1(tag[20:]) elif tag.startswith("versioning/packet/versionNo/"): self._skipContent() else: # Unknown tag -> Ignore until separator self._skipContent() continue self.tempDb.execSql( "insert or replace into entries(unifName, seen) " "values (?, 1)", (tag, )) def _readHintedDatablockVer1(self): """ Reads datablock and preprocesses encoding if necessary. Returns either (hintStrings, content) or (None, None) if either an unknown important hint was found or if encoding had an error. hintStrings is a list of hints (as unistrings) which were not processed by the function (therefore encoding hint is removed). content can be a bytestring or a unistring. If (None, None) is returned, the remaining content of the entry was skipped already by the function. """ hintLine = self.importFile.readline()[:-1] hintStrings = hintLine.split(" ") resultHintStrings = [] # Set default useB64 = False # Process hints for hint in hintStrings: if hint.startswith("important/encoding/"): if hint[19:] == "text": useB64 = False elif hint[19:] == "base64": useB64 = True else: # Unknown encoding: don't read further self._skipContent() return None, None elif hint.startswith("important/"): # There is something important we do not understand self._skipContent() return None, None else: resultHintStrings.append(hint) content = self._collectContent() if useB64: try: content = base64BlockDecode(content) except TypeError: # base64 decoding failed self._skipContent() return None, None return (resultHintStrings, content) def _doImportItemVersioningOverviewVer1Pass1(self, subtag): hintStrings, content = self._readHintedDatablockVer1() if content is None: return # Always encode to UTF-8 no matter what the import file encoding is content = content.encode("utf-8") try: ovw = Versioning.VersionOverview(self.wikiDocument, unifiedBasePageName=subtag) ovw.readOverviewFromBytes(content) ovwUnifName = ovw.getUnifiedName() self.tempDb.execSql( "insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (subtag, ovwUnifName)) self.tempDb.execSql( "insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, ovwUnifName)) for depUnifName in ovw.getDependentDataBlocks(omitSelf=True): # Mutual dependency between version overview and each version packet self.tempDb.execSql( "insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (depUnifName, ovwUnifName)) self.tempDb.execSql( "insert or replace into depgraph(unifName, neededBy) " "values (?, ?)", (ovwUnifName, depUnifName)) self.tempDb.execSql( "insert or replace into renamegraph(unifName, dependent) " "values (?, ?)", (subtag, depUnifName)) # self.tempDb.execSql("insert or replace into entries(unifName, needed) " # "values (?, 1)", (depUnifName,)) except VersioningException: return def _doImportVer1Pass2(self): wikiDoc = self.wikiDocument # We have to rename present items # First wikipages because this automatically renames depending version data for pageFrom, pageTo in self.tempDb.execSqlQuery(""" select substr(unifName, 10), substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' """): if wikiDoc.isDefinedWikiPageName(pageFrom): wikiDoc.renameWikiWords({pageFrom: pageTo}, Consts.ModifyText.off) # TODO How to handle rename of home page? # Then remaining data blocks for oldUnifName, newUnifName in self.tempDb.execSqlQuery(""" select unifName, renamePresentTo from entries where unifName not glob 'wikipage/*' and renamePresentTo != '' """): wikiDoc.renameDataBlock(oldUnifName, newUnifName) # For wiki pages with versions to import, existing versions must be # deleted for wikiWord in self.tempDb.execSqlQuerySingleColumn(""" select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renameImportTo == '' and not dontImport and importVersionData union select substr(renameImportTo, 10) from entries where unifName glob 'wikipage/*' and renameImportTo glob 'wikipage/*' and not dontImport and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.delete() while True: tag = self.importFile.readline() if tag == "": # End of file break tag = tag[:-1] # Remove line end try: dontImport, renameImportTo = \ self.tempDb.execSqlQuery( "select dontImport, renameImportTo from " "entries where unifName = ?", (tag,))[0] except IndexError: # Maybe dangerous traceback.print_exc() self._skipContent() continue if dontImport: self._skipContent() continue if renameImportTo == "": renameImportTo = tag if tag.startswith("wikipage/"): self._importItemWikiPageVer1Pass2(renameImportTo[9:]) elif tag.startswith("funcpage/"): self._importItemFuncPageVer1Pass2(tag[9:]) elif tag.startswith("savedsearch/"): self._importB64DatablockVer1Pass2(renameImportTo) elif tag.startswith("savedpagesearch/"): self._importHintedDatablockVer1Pass2(renameImportTo) elif tag.startswith("versioning/"): self._importHintedDatablockVer1Pass2(renameImportTo) else: # Unknown tag -> Ignore until separator self._skipContent() for wikiWord in self.tempDb.execSqlQuerySingleColumn(""" select substr(unifName, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo == '' and importVersionData union select substr(renamePresentTo, 10) from entries where unifName glob 'wikipage/*' and renamePresentTo glob 'wikipage/*' and importVersionData """): if not wikiDoc.isDefinedWikiPageName(wikiWord): continue page = wikiDoc.getWikiPage(wikiWord) versionOverview = page.getExistingVersionOverview() if versionOverview is not None: versionOverview.readOverview() def _importItemWikiPageVer1Pass2(self, wikiWord): timeStampLine = self.importFile.readline()[:-1] timeStrings = timeStampLine.split(" ") if len(timeStrings) < 3: traceback.print_exc() self._skipContent() return # TODO Report error timeStrings = timeStrings[:3] try: timeStrings = [str(ts) for ts in timeStrings] except UnicodeEncodeError: traceback.print_exc() self._skipContent() return # TODO Report error try: timeStamps = [ timegm(time.strptime(ts, "%Y-%m-%d/%H:%M:%S")) for ts in timeStrings ] except (ValueError, OverflowError): traceback.print_exc() self._skipContent() return # TODO Report error content = self._collectContent() page = self.wikiDocument.getWikiPageNoError(wikiWord) # TODO How to handle versions here? page.replaceLiveText(content) if page.getTxtEditor() is not None: page.writeToDatabase() page.setTimestamps(timeStamps) def _importItemFuncPageVer1Pass2(self, subtag): # The subtag is functional page tag try: # subtag is unicode but func tags are bytestrings subtag = str(subtag) except UnicodeEncodeError: self._skipContent() return content = self._collectContent() try: page = self.wikiDocument.getFuncPage(subtag) page.replaceLiveText(content) except BadFuncPageTagException: # This function tag is bad or unknown -> ignore return # TODO Report error def _importB64DatablockVer1Pass2(self, unifName): # Content is base64 encoded b64Content = self._collectContent() try: datablock = base64BlockDecode(b64Content) self.wikiDocument.getWikiData().storeDataBlock( unifName, datablock, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: # base64 decoding failed return # TODO Report error def _importTextDatablockVer1Pass2(self, unifName): content = self._collectContent() try: self.wikiDocument.getWikiData().storeDataBlock( unifName, content, storeHint=Consts.DATABLOCK_STOREHINT_INTERN) except TypeError: return # TODO Report error def _importHintedDatablockVer1Pass2(self, unifName): """ A hinted datablock starts with an extra line defining encoding (text or B64) and storage hint. It was introduced later therefore only versioning packets use this while saved searches don't. """ hintStrings, content = self._readHintedDatablockVer1() if hintStrings is None: return # Set defaults storeHint = Consts.DATABLOCK_STOREHINT_INTERN # Process hints for hint in hintStrings: if hint.startswith("storeHint/"): if hint[10:] == "extern": storeHint = Consts.DATABLOCK_STOREHINT_EXTERN elif hint[10:] == "intern": storeHint = Consts.DATABLOCK_STOREHINT_INTERN # No else. It is not vital to get the right storage hint try: if isinstance(content, str): content = BOM_UTF8 + content.encode("utf-8") self.wikiDocument.getWikiData().storeDataBlock(unifName, content, storeHint=storeHint) except TypeError: traceback.print_exc() return # TODO Report error
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files but provides a synchronous, cooperative interface. .. caution:: This object is most effective wrapping files that can be used appropriately with :func:`select.select` such as sockets and pipes. In general, on most platforms, operations on regular files (e.g., ``open('/etc/hosts')``) are considered non-blocking already, even though they can take some time to complete as data is copied to the kernel and flushed to disk (this time is relatively bounded compared to sockets or pipes, though). A :func:`~os.read` or :func:`~os.write` call on such a file will still effectively block for some small period of time. Therefore, wrapping this class around a regular file is unlikely to make IO gevent-friendly: reading or writing large amounts of data could still block the event loop. If you'll be working with regular files and doing IO in large chunks, you may consider using :class:`~gevent.fileobject.FileObjectThread` or :func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this concern. .. note:: Random read/write (e.g., ``mode='rwb'``) is not supported. For that, use :class:`io.BufferedRWPair` around two instance of this class. .. tip:: Although this object provides a :meth:`fileno` method and so can itself be passed to :func:`fcntl.fcntl`, setting the :data:`os.O_NONBLOCK` flag will have no effect; likewise, removing that flag will cause this object to no longer be cooperative. """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :keyword fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be put in non-blocking mode using :func:`gevent.os.make_nonblocking`. :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" (where the "b" or "U" can be omitted). If "U" is part of the mode, IO will be done on text, otherwise bytes. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default, and a value of 0 is translated to a value of 1. Other values are interpreted as for the :mod:`io` package. Buffering is ignored in text mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1 and mode not in 'rw': # pragma: no cover # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled. # NOTE: This is preventing a mode like 'rwb' for binary random access; # that code was never tested and was explicitly marked as "not used" raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode, )) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0 or bufsize == 1: bufsize = self.default_bufsize elif bufsize == 0: bufsize = 1 if mode == 'r': self.io = BufferedReader(self.fileio, bufsize) else: assert mode == 'w' self.io = BufferedWriter(self.fileio, bufsize) #else: # QQQ: not used, not reachable # # self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is closed""" return self._closed def close(self): if self._closed: # make sure close() is only run once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
class FileObjectPosix(object): default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False assert len(mode) == 1, 'mode can only be [rb, rU, wb]' self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files but provides a synchronous, cooperative interface. .. caution:: This object is most effective wrapping files that can be used appropriately with :func:`select.select` such as sockets and pipes. In general, on most platforms, operations on regular files (e.g., ``open('/etc/hosts')``) are considered non-blocking already, even though they can take some time to complete as data is copied to the kernel and flushed to disk (this time is relatively bounded compared to sockets or pipes, though). A :func:`~os.read` or :func:`~os.write` call on such a file will still effectively block for some small period of time. Therefore, wrapping this class around a regular file is unlikely to make IO gevent-friendly: reading or writing large amounts of data could still block the event loop. If you'll be working with regular files and doing IO in large chunks, you may consider using :class:`~gevent.fileobject.FileObjectThread` or :func:`~gevent.os.tp_read` and :func:`~gevent.os.tp_write` to bypass this concern. .. note:: Random read/write (e.g., ``mode='rwb'``) is not supported. For that, use :class:`io.BufferedRWPair` around two instance of this class. .. tip:: Although this object provides a :meth:`fileno` method and so can itself be passed to :func:`fcntl.fcntl`, setting the :data:`os.O_NONBLOCK` flag will have no effect; however, removing that flag will cause this object to no longer be cooperative. .. versionchanged:: 1.1 Now uses the :mod:`io` package internally. Under Python 2, previously used the undocumented class :class:`socket._fileobject`. This provides better file-like semantics (and portability to Python 3). """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :keyword fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be put in non-blocking mode using :func:`gevent.os.make_nonblocking`. :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" (where the "b" or "U" can be omitted). If "U" is part of the mode, IO will be done on text, otherwise bytes. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default, and a value of 0 is translated to a value of 1. Other values are interpreted as for the :mod:`io` package. Buffering is ignored in text mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1 and mode not in 'rw': # pragma: no cover # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled. # NOTE: This is preventing a mode like 'rwb' for binary random access; # that code was never tested and was explicitly marked as "not used" raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0 or bufsize == 1: bufsize = self.default_bufsize elif bufsize == 0: bufsize = 1 if mode == 'r': self.io = BufferedReader(self.fileio, bufsize) else: assert mode == 'w' self.io = BufferedWriter(self.fileio, bufsize) #else: # QQQ: not used, not reachable # # self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is closed""" return self._closed def close(self): if self._closed: # make sure close() is only run once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): """ .. versionadded:: 1.1b2 """ return self.io.readable() def writable(self): """ .. versionadded:: 1.1b2 """ return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files. .. seealso:: :func:`gevent.os.make_nonblocking` """ default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file will be put in non-blocking mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False assert len(mode) == 1, 'mode can only be [rb, rU, wb]' self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files but provides a synchronous, cooperative interface. .. note:: Random read/write (e.g., ``mode='rwb'``) is not supported. For that, use :class:`io.BufferedRWPair` around two instance of this class. .. tip:: Although this object provides a :meth:`fileno` method and so can itself be passed to :func:`fcntl.fcntl`, setting the :data:`os.O_NONBLOCK` flag will have no effect; likewise, removing that flag will cause this object to no longer be cooperative. """ #: platform specific default for the *bufsize* parameter default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :keyword fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file *will* be put in non-blocking mode using :func:`gevent.os.make_nonblocking`. :keyword str mode: The manner of access to the file, one of "rb", "rU" or "wb" (where the "b" or "U" can be omitted). If "U" is part of the mode, IO will be done on text, otherwise bytes. :keyword int bufsize: If given, the size of the buffer to use. The default value means to use a platform-specific default, and a value of 0 is translated to a value of 1. Other values are interpreted as for the :mod:`io` package. Buffering is ignored in text mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1 and mode not in 'rw': # pragma: no cover # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores it. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled. # NOTE: This is preventing a mode like 'rwb' for binary random access; # that code was never tested and was explicitly marked as "not used" raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0 or bufsize == 1: bufsize = self.default_bufsize elif bufsize == 0: bufsize = 1 if mode == 'r': self.io = BufferedReader(self.fileio, bufsize) else: assert mode == 'w' self.io = BufferedWriter(self.fileio, bufsize) #else: # QQQ: not used, not reachable # # self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only run once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)
class FileObjectPosix(object): """ A file-like object that operates on non-blocking files. .. seealso:: :func:`gevent.os.make_nonblocking` """ default_bufsize = io.DEFAULT_BUFFER_SIZE def __init__(self, fobj, mode='rb', bufsize=-1, close=True): """ :param fobj: Either an integer fileno, or an object supporting the usual :meth:`socket.fileno` method. The file will be put in non-blocking mode. """ if isinstance(fobj, int): fileno = fobj fobj = None else: fileno = fobj.fileno() if not isinstance(fileno, int): raise TypeError('fileno must be int: %r' % fileno) orig_mode = mode mode = (mode or 'rb').replace('b', '') if 'U' in mode: self._translate = True mode = mode.replace('U', '') else: self._translate = False if len(mode) != 1: # Python 3 builtin `open` raises a ValueError for invalid modes; # Python 2 ignores in. In the past, we raised an AssertionError, if __debug__ was # enabled (which it usually was). Match Python 3 because it makes more sense # and because __debug__ may not be enabled raise ValueError('mode can only be [rb, rU, wb], not %r' % (orig_mode,)) self._fobj = fobj self._closed = False self._close = close self.fileio = GreenFileDescriptorIO(fileno, mode, closefd=close) if bufsize < 0: bufsize = self.default_bufsize if mode == 'r': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedReader(self.fileio, bufsize) elif mode == 'w': if bufsize == 0: bufsize = 1 elif bufsize == 1: bufsize = self.default_bufsize self.io = BufferedWriter(self.fileio, bufsize) else: # QQQ: not used self.io = BufferedRandom(self.fileio, bufsize) if self._translate: self.io = TextIOWrapper(self.io) @property def closed(self): """True if the file is cloed""" return self._closed def close(self): if self._closed: # make sure close() is only ran once when called concurrently return self._closed = True try: self.io.close() self.fileio.close() finally: self._fobj = None def flush(self): self.io.flush() def fileno(self): return self.io.fileno() def write(self, data): self.io.write(data) def writelines(self, lines): self.io.writelines(lines) def read(self, size=-1): return self.io.read(size) def readline(self, size=-1): return self.io.readline(size) def readlines(self, sizehint=0): return self.io.readlines(sizehint) def readable(self): return self.io.readable() def writable(self): return self.io.writable() def seek(self, *args, **kwargs): return self.io.seek(*args, **kwargs) def seekable(self): return self.io.seekable() def tell(self): return self.io.tell() def truncate(self, size=None): return self.io.truncate(size) def __iter__(self): return self.io def __getattr__(self, name): # XXX: Should this really be _fobj, or self.io? # _fobj can easily be None but io never is return getattr(self._fobj, name)