def _update_mini_dag(co, changedbs, helper, handles, cset, txn): indexdb = changedbs.indexdb dagdb = changedbs.dagdb pres = cset['precursors'] point = cset['point'] #bminfo = bdecode(co.branchmapdb.get(point, txn=txn)) bminfo = db_get(co, co.branchmapdb, point, txn) bnum = struct.pack('>II', bminfo['branch'], bminfo['branchnum']) clean_merges = [] for handle in handles: precursors = simplify_precursors(co, handle, changedbs, pres, txn)[0] mdinfo = {'handle': {}} if cset['handles'].has_key(handle): mdinfo['handle'] = helper(co, handle, point, cset['handles'][handle], txn) if mdinfo['handle'] == {}: del mdinfo['handle'] if len(precursors) > 1: clean_merges.append(handle) mdinfo['precursors'] = precursors if precursors == []: assert cset['handles'][handle].has_key('add') dagdb.put(handle + point, bencode(mdinfo), txn=txn) indexdb.put(handle + bnum, point, txn=txn) if len(clean_merges) > 0: changedbs.mergedb.put(point, bencode(clean_merges), txn=txn) return
def db_put(co, cdb, key, value, txn): cdb.put(key, bencode(value), txn=txn) try: cache = co.db_cache[db] except KeyError: cache = co.db_cache[db] = {} cache[key] = value
def write_index(co, point, handle, index, txn): cdagdb = co.contents.dagdb try: old_index = bdecode(cdagdb.get(handle + point, txn=txn)) old_index['handle'] = index except (db.DBNotFoundError, TypeError): old_index = {'handle': index} cdagdb.put(handle + point, bencode(old_index), txn=txn)
def rebuild_from_points(co, points, txn): co.changesdb.truncate(txn) co.branchdb.truncate(txn) co.branchmapdb.truncate(txn) co.names.indexdb.truncate(txn) co.names.dagdb.truncate(txn) co.names.mergedb.truncate(txn) co.contents.indexdb.truncate(txn) co.contents.mergedb.truncate(txn) # we don't truncate the cdagdb because it contains the offsets and lengths # for the diffs in the files, which we can't recreate. the sync below will # read those parameters out and rewrite the cdagdb, anyway. co.linforepo.put('branchmax', bencode(0), txn=txn) cdagdb = co.contents.dagdb for key, value in cdagdb.items(txn): if len(key) != 40: continue if not bdecode(value).has_key('handle'): cdagdb.delete(key, txn=txn) for point in points: sync_history(co, point, txn)
def validate_handle(handle, precursors, hinfo): encinfo = bencode({'precursors': precursors, 'handle': hinfo}) if sha(encinfo).digest() != handle: raise HistoryError, 'bad identifier'
def _sync_history(co, point, txn, cache=dict()): pinfo = cache[point] # see if we can extend an existing branch pre, prebminfo = None, None generations, pre_important = [], [] bminfo = {'precursors': pinfo['precursors']} for pre in bminfo['precursors']: prebminfo = db_get(co, co.branchmapdb, pre, txn) generations.append(prebminfo['generation']) if bminfo.has_key('branch'): continue binfo = db_get(co, co.branchdb, prebminfo['branch'], txn) if prebminfo['branchnum'] == binfo['last']: bminfo['branch'] = prebminfo['branch'] bminfo['branchnum'] = prebminfo['branchnum'] + 1 binfo['last'] += 1 db_put(co, co.branchdb, bminfo['branch'], binfo, txn) pre_important.append(pre) # generation == 'distance to root node', the exception is for the root try: bminfo['generation'] = max(generations) + 1 except ValueError: bminfo['generation'] = 0 # if we couldn't extend a branch, start a new one if not bminfo.has_key('branch'): bminfo['branch'] = bdecode(co.linforepo.get('branchmax', txn=txn)) + 1 co.linforepo.put('branchmax', bencode(bminfo['branch']), txn=txn) bminfo['branchnum'] = 0 try: # using the last precursor for this did the best empirically, # beating out both first precursor and largest branch number. binfo = {'last': 0, 'parent': prebminfo['branch'], 'parentnum': prebminfo['branchnum']} pre_important.append(pre) except TypeError: # special stuff for the rootnode assert bminfo['branch'] == 1 binfo = {'last': 0} db_put(co, co.branchdb, bminfo['branch'], binfo, txn) db_put(co, co.branchmapdb, point, bminfo, txn) # put new files into staticdb for (handle, value) in pinfo['handles'].items(): if value.has_key('add'): validate_handle(handle, pinfo['precursors'], value) db_put(co, co.staticdb, handle, {'type': value['add']['type']}, txn) # figure out which files were modified here and hand off to helpers named, modified = handles_in_branch(co, pre_important, [point], txn, cache=cache) pinfo['point'] = point _update_mini_dag(co, co.names, _update_helper_name, named, pinfo, txn) _update_mini_dag(co, co.contents, _update_helper_content, modified, pinfo, txn) return (named, modified)
def convert_cset(UR, point): indices = {} old_cset = bdecode(UR.old_repo.lcrepo.get(point)) new_cset = {} new_cset['precursors'] = [UR.point_map[pre] for pre in old_cset['precursors']] if old_cset.has_key('time'): new_cset['time'] = old_cset['time'] if old_cset.has_key('user'): new_cset['user'] = old_cset['user'] # some heuristics for comments and whether this was a server change clean_merge = True force_new_cset = False if old_cset.has_key('comment'): clean_merge = False new_cset['comment'] = old_cset['comment'].rstrip() if len(new_cset['comment']): new_cset['comment'] = new_cset['comment'] + '\n' elif point == rootnode: pass elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2: clean_merge = False new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n' # sort the handles handle_list = UR.sort_names(old_cset['handles']) # find implicit clean content merges clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point) handle_list.extend(hl) # find implicit clean name merges clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point) handle_list.extend(hl) new_cset['handles'] = handles = {} for old_handle in handle_list: old_hinfo = None try: old_hinfo = old_cset['handles'][old_handle] except KeyError: old_hinfo = {} # not much has changed new_hinfo = copy.copy(old_hinfo) new_handle = None if UR.handle_map.has_key(old_handle): new_handle = UR.handle_map[old_handle] # make name changes explicit if clean_nmerges.has_key(old_handle): name = old_handle_name_at_point(UR.old_repo, old_handle, point, None) new_hinfo['parent'] = name['parent'] new_hinfo['name'] = name['name'] # fixup the parent pointers if old_hinfo.has_key('parent'): new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']] if old_hinfo.has_key('hash') or clean_merges.has_key(old_handle): # figure out what the file is supposed to look like now lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines'] # if the file is being added, there are no precursors precursors = [] if new_handle is not None and not old_hinfo.has_key('add'): precursors = new_cset['precursors'] # generate the diff against the new repo dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn) if old_hinfo.has_key('add'): dinfo['add'] = 1 assert dinfo['matches'] == [] if dinfo is not None: diff = bencode(dinfo) new_hinfo['hash'] = sha.new(diff).digest() # if this used to be a clean merge, we have to replace it if not old_cset.has_key(old_handle) or not old_cset[old_handle].has_key('hash'): force_new_cset = True elif new_hinfo.has_key('hash'): del new_hinfo['hash'] # sanity check if new_handle is None: assert old_hinfo.has_key('add') assert old_hinfo['add']['type'] == 'file' # if the file is new, we have to create the handle before writing # the diff if old_hinfo.has_key('add'): nhandle = create_handle(new_cset['precursors'], new_hinfo) assert new_handle is None or new_handle == nhandle new_handle = nhandle UR.handle_map[old_handle] = new_handle # write out the new diff if new_hinfo.has_key('hash'): zdiff = zlib.compress(diff, 6) indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn) elif old_hinfo.has_key('add'): assert old_hinfo['add']['type'] == 'dir' nhandle = create_handle(new_cset['precursors'], new_hinfo) assert new_handle is None or new_handle == nhandle new_handle = nhandle UR.handle_map[old_handle] = new_handle if new_hinfo != {}: handles[new_handle] = new_hinfo # if it used to be a clean merge, preserve the line of clean merge heads index_point = None if clean_merge and force_new_cset: forced_cset = new_cset forced_cset['comment'] = '--- change created by cdvupgrade ---\n' bforced_cset = bencode(forced_cset) forced_point = sha.new(bforced_cset).digest() UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn) index_point = forced_point new_cset = {'precursors': [forced_cset['precursors'][0], forced_point], 'user': forced_cset['user'], 'time': forced_cset['time'], 'handles': {}} # calculate the new point name and write it out bnew_cset = bencode(new_cset) new_point = sha.new(bnew_cset).digest() UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn) UR.point_map[point] = new_point if index_point is None: index_point = new_point # now that we know the new point name, write out the indices for new_handle, index in indices.items(): write_index(UR.new_repo, index_point, new_handle, index, UR.txn) # diff generation depends on history syncing named, modified = sync_history(UR.new_repo, new_point, UR.txn) for new_handle in modified: handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn) return new_point
def convert_cset(UR, point): indices = {} old_cset = bdecode(UR.old_repo.lcrepo.get(point)) new_cset = {} new_cset['precursors'] = [ UR.point_map[pre] for pre in old_cset['precursors'] ] if old_cset.has_key('time'): new_cset['time'] = old_cset['time'] if old_cset.has_key('user'): new_cset['user'] = old_cset['user'] # some heuristics for comments and whether this was a server change clean_merge = True force_new_cset = False if old_cset.has_key('comment'): clean_merge = False new_cset['comment'] = old_cset['comment'].rstrip() if len(new_cset['comment']): new_cset['comment'] = new_cset['comment'] + '\n' elif point == rootnode: pass elif old_cset['handles'] != {} or len(old_cset['precursors']) != 2: clean_merge = False new_cset['comment'] = '--- comment inserted by cdvupgrade ---\n' # sort the handles handle_list = UR.sort_names(old_cset['handles']) # find implicit clean content merges clean_merges, hl = UR.clean_merges(UR, UR.old_repo.contents.dagdb, point) handle_list.extend(hl) # find implicit clean name merges clean_nmerges, hl = UR.clean_merges(UR, UR.old_repo.names.dagdb, point) handle_list.extend(hl) new_cset['handles'] = handles = {} for old_handle in handle_list: old_hinfo = None try: old_hinfo = old_cset['handles'][old_handle] except KeyError: old_hinfo = {} # not much has changed new_hinfo = copy.copy(old_hinfo) new_handle = None if UR.handle_map.has_key(old_handle): new_handle = UR.handle_map[old_handle] # make name changes explicit if clean_nmerges.has_key(old_handle): name = old_handle_name_at_point(UR.old_repo, old_handle, point, None) new_hinfo['parent'] = name['parent'] new_hinfo['name'] = name['name'] # fixup the parent pointers if old_hinfo.has_key('parent'): new_hinfo['parent'] = UR.handle_map[old_hinfo['parent']] if old_hinfo.has_key('hash') or clean_merges.has_key(old_handle): # figure out what the file is supposed to look like now lines = old_handle_contents_at_point(UR.old_repo, old_handle, point, None)['lines'] # if the file is being added, there are no precursors precursors = [] if new_handle is not None and not old_hinfo.has_key('add'): precursors = new_cset['precursors'] # generate the diff against the new repo dinfo = gen_diff(UR.new_repo, new_handle, precursors, lines, UR.txn) if old_hinfo.has_key('add'): dinfo['add'] = 1 assert dinfo['matches'] == [] if dinfo is not None: diff = bencode(dinfo) new_hinfo['hash'] = sha.new(diff).digest() # if this used to be a clean merge, we have to replace it if not old_cset.has_key(old_handle) or not old_cset[ old_handle].has_key('hash'): force_new_cset = True elif new_hinfo.has_key('hash'): del new_hinfo['hash'] # sanity check if new_handle is None: assert old_hinfo.has_key('add') assert old_hinfo['add']['type'] == 'file' # if the file is new, we have to create the handle before writing # the diff if old_hinfo.has_key('add'): nhandle = create_handle(new_cset['precursors'], new_hinfo) assert new_handle is None or new_handle == nhandle new_handle = nhandle UR.handle_map[old_handle] = new_handle # write out the new diff if new_hinfo.has_key('hash'): zdiff = zlib.compress(diff, 6) indices[new_handle] = write_diff(UR.new_repo, new_handle, zdiff, UR.txn) elif old_hinfo.has_key('add'): assert old_hinfo['add']['type'] == 'dir' nhandle = create_handle(new_cset['precursors'], new_hinfo) assert new_handle is None or new_handle == nhandle new_handle = nhandle UR.handle_map[old_handle] = new_handle if new_hinfo != {}: handles[new_handle] = new_hinfo # if it used to be a clean merge, preserve the line of clean merge heads index_point = None if clean_merge and force_new_cset: forced_cset = new_cset forced_cset['comment'] = '--- change created by cdvupgrade ---\n' bforced_cset = bencode(forced_cset) forced_point = sha.new(bforced_cset).digest() UR.new_repo.lcrepo.put(forced_point, bforced_cset, txn=UR.txn) index_point = forced_point new_cset = { 'precursors': [forced_cset['precursors'][0], forced_point], 'user': forced_cset['user'], 'time': forced_cset['time'], 'handles': {} } # calculate the new point name and write it out bnew_cset = bencode(new_cset) new_point = sha.new(bnew_cset).digest() UR.new_repo.lcrepo.put(new_point, bnew_cset, txn=UR.txn) UR.point_map[point] = new_point if index_point is None: index_point = new_point # now that we know the new point name, write out the indices for new_handle, index in indices.items(): write_index(UR.new_repo, index_point, new_handle, index, UR.txn) # diff generation depends on history syncing named, modified = sync_history(UR.new_repo, new_point, UR.txn) for new_handle in modified: handle_contents_at_point(UR.new_repo, new_handle, new_point, UR.txn) return new_point