def check_merges(submitted_pages, all_pages, hexdigest_func): """ INTERNAL: Raises a SubmitError if the merge constraints aren't met. """ if len(submitted_pages) == 0: raise NoChangesError(True) #print "SUBMITTED_PAGES: ", submitted_pages conflicts = conflict_table(all_pages) resolved = conflict_table(submitted_pages) for name in submitted_pages: #print "check_merges -- NAME: ", name assert WIKINAME_REGEX.match(name) if name in conflicts: if resolved.get(name, set([])) != conflicts[name]: unresolved = set([ver for ver in conflicts[name] if not ver in resolved.get(name, set([]))]) raise SubmitError("Unresolved fork(s): [%s]:%s" % (WIKINAME_REGEX.match(name).group('wikiword'), ','.join([ver[:12] for ver in unresolved])), True) for name in resolved: for version in resolved[name]: versioned_name = '%s_%s' % (name, version) if hexdigest_func(versioned_name) != EMPTY_FILE_SHA_HEX: raise SubmitError("Not deleted!: %s" % versioned_name, True)
def check_base_shas(arch, overlay): """ Helper to simplify unbundle_wikitext. """ for name in arch.namelist(): #print "CHECKING NAME: ", name if name == '__INFO__': continue if not WIKINAME_REGEX.match(name): raise SubmitError("File name is not a WikiWord: %s" % name, True) raw_delta = arch.read(name) base_sha = raw_delta[:20] updated_sha = raw_delta[20:40] full_path = os.path.join(os.path.join(overlay.base_path, 'wikitext'), name) if base_sha == NULL_SHA: # New file. if overlay.exists(full_path): if (utf8_sha(overlay.read(full_path, 'rb')).digest() == updated_sha): continue raise SubmitError("New file already exists: %s" % name) else: if not overlay.exists(full_path): if updated_sha == EMPTY_FILE_SHA: continue raise SubmitError("Base file doesn't exist(1): %s [%s]" % (name, full_path)) raw_a = overlay.read(full_path, 'rb') tmp_sha = utf8_sha(raw_a).digest() if tmp_sha == updated_sha: continue if not tmp_sha == base_sha: # Hmmmm... windows vs. *nix line terminators? raise SubmitError("Base file SHA1 hash failure(1): %s" % name)
def check_writable(overlay, arch): """ Helper raises SubmitError if any pages in the zip are read only. """ names = set([]) for name in arch.namelist(): match = WIKINAME_REGEX.match(name) if not match: continue names.add(match.group('wikiword')) illegal_writes = names.intersection(get_read_only_list(overlay)) if len(illegal_writes) > 0: raise SubmitError("Attempt to modify read only page(s): %s" % ','.join(illegal_writes), True)
def conflict_table(names): """ INTERNAL: Make a WikiName -> version map from a list of 'WikiName_40digithexversion' names. """ ret = {} for name in names: #print "conflict_table -- NAME: ", name match = WIKINAME_REGEX.match(name) if not match: continue wiki_word = match.group('wikiword') version = match.group('version') if not version or not wiki_word: # hmmm... not wiki_word??? continue entry = ret.get(wiki_word, set([])) assert not version in entry # Slow! but list should be short entry.add(version) ret[wiki_word] = entry return ret
def unbundle_wikitext(overlay, in_stream, required_version = None, required_submitter = None): """ Unbundle a wiki submission bundle from a zipfile byte stream. """ wiki_text = os.path.join(overlay.base_path, 'wikitext') if not os.path.exists(overlay.overlay_path(wiki_text)): os.makedirs(overlay.overlay_path(wiki_text)) # created, modified, removed, skipped op_lut = (set([]), set([]), set([]), set([])) arch = ZipFile(in_stream, 'r') try: base_ver, submitter = unpack_info(arch.read('__INFO__')) if not required_version is None and required_version != base_ver: raise SubmitError("Expected version: %s, got: %s" % (required_version[:12], base_ver[:12])) if not required_submitter is None and submitter != required_submitter: raise SubmitError("Expected submitter: %s, got: %s" % \ (required_submitter, submitter)) if required_version is None: check_base_shas(arch, overlay) check_writable(overlay, arch) for name in arch.namelist(): if name == "__INFO__": continue if not WIKINAME_REGEX.match(name): raise SubmitError("File name is not a WikiWord: %s" % name, True) action = extract_wikitext(arch, overlay, name) op_lut[action].add(name) return op_lut finally: arch.close()
def bundle_wikitext(overlay, version, submitter): """ Return raw zipfile bytes containing the overlayed wiki changes in the overlay_base dir. """ assert overlay.is_overlayed() # Catch bad wikitext. validate_wikitext(overlay) wiki_text = os.path.join(overlay.base_path, 'wikitext') names = (set(overlay.list_pages(wiki_text)). union(overlay.list_pages(wiki_text, True))) # Catch illegal names. for name in names: if not WIKINAME_REGEX.match(name): raise SubmitError("File name is not a WikiWord: %s" % name, True) page_ver = WIKINAME_REGEX.match(name).group('version') if not page_ver: continue if not overlay.exists(os.path.join(wiki_text, name), True): raise SubmitError("Forked page doesn't exist in base version: %s" \ % name, True) # Catch unresolved merges. check_merges([name for name in names if overlay.has_overlay(os.path.join(wiki_text, name))], names, OverlayHasher(overlay).hexdigest) illegal_writes = get_read_only_list(overlay) buf = StringIO.StringIO() arch = ZipFile(buf, 'w') assert version arch.writestr('__INFO__', pack_info(version, submitter)) count = 0 for name in names: full_path = os.path.join(wiki_text, name) if not overlay.has_overlay(full_path): # has_overlay is True for locally deleted files. continue if not overlay.exists(full_path, True): original_sha = NULL_SHA original_raw = '' else: # Compute SHA1 of original file. original_raw = overlay.read(full_path, 'rb', True) original_sha = utf8_sha(original_raw).digest() new_raw = overlay.read(full_path, 'rb') if new_raw == original_raw: # Don't bundle changes which are already in the repo # even if we have a copy of them in the overlay # directory. continue if name in illegal_writes: raise SubmitError("Can't modify read only page: %s" % name, True) # Make patch. delta = unicode_make_patch(original_raw, new_raw) # REDFLAG: BLOAT. Worth 40 bytes / file ??? # Prepend old and new SHA1 to patch so we will know if we # are trying to patch against the wrong file or patch # a file that has already been patched. delta = original_sha + utf8_sha(new_raw).digest() + delta arch.writestr(name, delta) count += 1 arch.close() if count < 1: raise NoChangesError() return buf.getvalue()