def gather_pagedirs(dir_from): """ this gathers edit-log information from the pagedirs, just to make sure """ pagedir = opj(dir_from, 'pages') pagelist = listdir(pagedir) for pagename in pagelist: editlog_from = opj(pagedir, pagename, 'edit-log') gather_editlog(editlog_from, pagename)
def process_pagedirs(dir_from, dir_to): pagelist = listdir(dir_from) for pagename in pagelist: pagedir_from = opj(dir_from, pagename) pagedir_to = opj(dir_to, pagename) # first we copy all, even the stuff we convert later: copy_dir(pagedir_from, pagedir_to) rev_from = opj(pagedir_from, 'revisions') rev_to = opj(pagedir_to, 'revisions') if os.path.exists(rev_from): revlist = listdir(rev_from) for rfile in revlist: rev = int(rfile) r_from = opj(rev_from, rfile) r_to = opj(rev_to, rfile) tocrlf(r_from, r_to)
def convert_pagedir(dir_from, dir_to): os.mkdir(dir_to) for dname_from in listdir(dir_from): print "%s" % (dname_from,) dname_to = dname_from shutil.copytree(opj(dir_from, dname_from), opj(dir_to, dname_to), 1) try: os.remove(opj(dir_to, dname_to, "last-edited")) except: pass
def convert_userdir(dir_from, dir_to): os.mkdir(dir_to) for fname in listdir(dir_from): if fname.endswith('.bookmark'): bm = open(opj(dir_from, fname)).read().strip() bm = str(wikiutil.timestamp2version(float(bm))) f = open(opj(dir_to, fname), 'w') f.write(bm) f.close() else: copy_file(opj(dir_from, fname), opj(dir_to, fname))
def convert_pagedir(dir_from, dir_to, enc_from, enc_to): os.mkdir(dir_to) for dname_from in listdir(dir_from): dname_to = qf_convert_string(dname_from, enc_from, enc_to) print "%s -> %s" % (dname_from, dname_to) shutil.copytree(opj(dir_from, dname_from), opj(dir_to, dname_to), 1) try: convert_editlog(opj(dir_from, dname_from, 'last-edited'), opj(dir_to, dname_to, 'last-edited'), enc_from, enc_to) except IOError: pass # we ignore if it doesnt exist
def convert_pagedir(dir_from, dir_to, is_backupdir=0): os.mkdir(dir_to) for pagedir in listdir(dir_from): text_from = opj(dir_from, pagedir, 'text') text_to = opj(dir_to, pagedir, 'text') os.mkdir(opj(dir_to, pagedir)) copy_file(text_from, text_to) backupdir_from = opj(dir_from, pagedir, 'backup') backupdir_to = opj(dir_to, pagedir, 'backup') if os.path.exists(backupdir_from): os.mkdir(backupdir_to) for ts in listdir(backupdir_from): ts_usec = str(convert_ts(float(ts))) backup_from = opj(backupdir_from, ts) backup_to = opj(backupdir_to, ts_usec) copy_file(backup_from, backup_to) editlog_from = opj(dir_from, pagedir, 'edit-log') editlog_to = opj(dir_to, pagedir, 'edit-log') convert_editlog(editlog_from, editlog_to) #cachedir_from = opj(dir_from, pagedir, 'cache') #cachedir_to = opj(dir_to, pagedir, 'cache') #if os.path.exists(cachedir_from): # os.mkdir(cachedir_to) # try: # copy_file( # opj(cachedir_from, 'hitcounts'), # opj(cachedir_to, 'hitcounts')) # except: pass attachdir_from = opj(dir_from, pagedir, 'attachments') attachdir_to = opj(dir_to, pagedir, 'attachments') if os.path.exists(attachdir_from): try: copy_dir(attachdir_from, attachdir_to) except: pass
def convert_textdir(dir_from, dir_to, enc_from, enc_to, is_backupdir=0): os.mkdir(dir_to) for fname_from in listdir(dir_from): if is_backupdir: fname, timestamp = fname_from.split('.') else: fname = fname_from fname = qf_convert_string(fname, enc_from, enc_to) if is_backupdir: fname_to = '.'.join([fname, timestamp]) else: fname_to = fname convert_file(opj(dir_from, fname_from), opj(dir_to, fname_to), enc_from, enc_to)
def walk(top, topdown=True, onerror=None): from os.path import join, isdir, islink # We may not have read permission for top, in which case we can't # get a list of the files the directory contains. os.path.walk # always suppressed the exception then, rather than blow up for a # minor reason when (say) a thousand readable directories are still # left to visit. That logic is copied here. try: # Note that listdir and error are globals in this module due # to earlier import-*. names = listdir(top) except error, err: if onerror is not None: onerror(err) return
def migrate(dir_to): """ this removes edit-lock files from the pagedirs and converts attachment filenames """ pagesdir = opj(dir_to, 'pages') pagelist = listdir(pagesdir) for pagename in pagelist: pagedir = opj(pagesdir, pagename) editlock = opj(pagedir, 'edit-lock') try: os.remove(editlock) except: pass attachdir = os.path.join(pagedir, 'attachments') for root, dirs, files in walk(attachdir): for f in files: try: f.decode(to_encoding) except UnicodeDecodeError: fnew = f.decode(from_encoding).encode(to_encoding) os.rename(os.path.join(root,f), os.path.join(root, fnew)) print 'renamed', f, '\n ->', fnew, ' in dir:', root
def convert_textdir(dir_from, dir_to, is_backupdir=0): for fname_from in listdir(dir_from): if is_backupdir: fname, timestamp = fname_from.split(".") else: fname = fname_from try: os.mkdir(opj(dir_to, "pages", fname)) except: pass try: os.mkdir(opj(dir_to, "pages", fname, "backup")) except: pass try: os.mkdir(opj(dir_to, "pages", fname, "cache")) except: pass if is_backupdir: fname_to = opj("pages", fname, "backup", timestamp) else: fname_to = opj("pages", fname, "text") copy_file(opj(dir_from, fname_from), opj(dir_to, fname_to))
def convert_userdir(dir_from, dir_to, enc_from, enc_to): os.mkdir(dir_to) for fname in listdir(dir_from): convert_file(opj(dir_from, fname), opj(dir_to, fname), enc_from, enc_to)
def gather_pagedirs(dir_from, is_backupdir=0): """ this gathers information from the pagedirs, i.e. text and backup files (and also the local editlog) and tries to merge/synchronize with the informations gathered from editlog """ global pagelist pagelist = listdir(dir_from) for pagename in pagelist: editlog_from = opj(dir_from, pagename, 'edit-log') gather_editlog(dir_from, editlog_from) entry = info.get(pagename, {}) loglist = [] # editlog timestamps of page revisions for ts,data in entry.items(): if data[1][2] in ['SAVE','SAVENEW','SAVE/REVERT',]: loglist.append(ts) loglist.sort() lleftover = loglist[:] # remember the latest log entry if lleftover: llatest = lleftover[-1] else: llatest = None backupdir_from = opj(dir_from, pagename, 'backup') if os.path.exists(backupdir_from): backuplist = listdir(backupdir_from) bleftover = backuplist[:] for bfile in backuplist: backup_from = opj(backupdir_from, bfile) ts = long(bfile) if ts in loglist: # we have an editlog entry, exact match entry[ts][0] = backup_from lleftover.remove(ts) bleftover.remove(bfile) text_from = opj(dir_from, pagename, 'text') found_text = False if os.path.exists(text_from): # we have a text file, it should match latest log entry exists[pagename] = True mtime = os.path.getmtime(text_from) if llatest and llatest in lleftover: ts = llatest if abs(wikiutil.timestamp2version(mtime) - ts) < 2000000: # less than a second diff entry[ts][0] = text_from lleftover.remove(ts) found_text = True else: # we have no log entries left 8( ts = wikiutil.timestamp2version(mtime) data = [ts,'','SAVE', pagename,'','','','','missing editlog entry for this page version'] entry[ts] = [text_from, data] else: # this page was maybe deleted, so we remember for later: exists[pagename] = False if llatest in lleftover: # if a page is deleted, the last log entry has no file entry[llatest][0] = None lleftover.remove(llatest) if os.path.exists(backupdir_from): backuplist = listdir(backupdir_from) for bfile in backuplist: if not bfile in bleftover: continue backup_from = opj(backupdir_from, bfile) bts = long(bfile) # must be long for py 2.2.x for ts in lleftover: if abs(bts-ts) < 2000000: # editlog, inexact match entry[ts][0] = backup_from lleftover.remove(ts) bleftover.remove(bfile) if len(bleftover) == 1 and len(lleftover) == 1: # only 1 left, must be this backup_from = opj(backupdir_from, bleftover[0]) entry[lleftover[0]][0] = backup_from lleftover = [] bleftover = [] # fake some log entries for bfile in bleftover: backup_from = opj(backupdir_from, bfile) bts = long(bfile) # must be long py 2.2.x data = [ts,'','SAVE',pagename,'','','','','missing editlog entry for this page version'] entry[bts] = [backup_from, data] # check if we still haven't matched the "text" file if not found_text and os.path.exists(text_from): if llatest in lleftover: # latest log entry still free entry[llatest][0] = text_from # take it. do not care about mtime of file. lleftover.remove(llatest) else: # log for "text" file is missing or latest was taken by other rev 8( mtime = os.path.getmtime(text_from) ts = wikiutil.timestamp2version(mtime) # take mtime, we have nothing better data = [ts,'','SAVE', pagename,'','','','','missing editlog entry for this page version'] entry[ts] = [text_from, data] # delete unmatching log entries for ts in lleftover: del entry[ts] info[pagename] = entry