def splitJobList(jobList, src, targetSet, hidden=False, callback=ChangesetCallback()): log.debug("Changeset Key conflict detected; splitting job further...") jobs = {} for job in jobList: name = job[0] if ':' in name: name = name.split(':')[0] l = jobs.setdefault(name, []) l.append(job) i = 0 for smallJobList in jobs.itervalues(): (outFd, tmpName) = util.mkstemp() os.close(outFd) log.debug( "jobsplit %d of %d %s" % (i + 1, len(jobs), displayBundle([(0, x) for x in smallJobList]))) src.createChangeSetFile(smallJobList, tmpName, recurse=False, callback=callback, mirrorMode=True) for target in targetSet: target.commitChangeSetFile(tmpName, hidden=hidden, callback=callback) os.unlink(tmpName) callback.done() i += 1 return
def splitJobList(jobList, src, targetSet, hidden = False, callback = ChangesetCallback()): log.debug("Changeset Key conflict detected; splitting job further...") jobs = {} for job in jobList: name = job[0] if ':' in name: name = name.split(':')[0] l = jobs.setdefault(name, []) l.append(job) i = 0 for smallJobList in jobs.itervalues(): (outFd, tmpName) = util.mkstemp() os.close(outFd) log.debug("jobsplit %d of %d %s" % ( i + 1, len(jobs), displayBundle([(0,x) for x in smallJobList]))) src.createChangeSetFile(smallJobList, tmpName, recurse = False, callback = callback, mirrorMode = True) for target in targetSet: target.commitChangeSetFile(tmpName, hidden = hidden, callback = callback) os.unlink(tmpName) callback.done() i += 1 return
def mirrorRepository(sourceRepos, targetRepos, cfg, test = False, sync = False, syncSigs = False, callback = ChangesetCallback(), fastSync = False, referenceRepos=None, ): if referenceRepos is None: referenceRepos = sourceRepos checkConfig(cfg) targets = _makeTargets(cfg, targetRepos, test) log.debug("-" * 20 + " start loop " + "-" * 20) hidden = len(targets) > 1 or cfg.useHiddenCommits if hidden: log.debug("will use hidden commits to synchronize target mirrors") if sync: currentMark = -1 else: marks = [ t.getMirrorMark() for t in targets ] # we use the oldest mark as a starting point (since we have to # get stuff from source for that oldest one anyway) currentMark = min(marks) log.debug("using common mirror mark %s", currentMark) # reset mirror mark to the lowest common denominator for t in targets: if t.getMirrorMark() != currentMark: t.setMirrorMark(currentMark) # mirror gpg signatures from the src into the targets for t in targets: t.mirrorGPG(referenceRepos, cfg.host) # mirror changed trove information for troves already mirrored if fastSync: updateCount = 0 log.debug("skip trove info records sync because of fast-sync") else: updateCount = mirrorTroveInfo(referenceRepos, targets, currentMark, cfg, syncSigs) newMark, troveList = getTroveList(referenceRepos, cfg, currentMark) if not troveList: if newMark > currentMark: # something was returned, but filtered out for t in targets: t.setMirrorMark(newMark) return -1 # call again return 0 # prepare a new max mark to be used when we need to break out of a loop crtMaxMark = max(long(x[0]) for x in troveList) if currentMark > 0 and crtMaxMark == currentMark: # if we're hung on the current max then we need to # forcibly advance the mark in case we're stuck crtMaxMark += 1 # only used if we filter out all troves below initTLlen = len(troveList) # removed troves are a special blend - we keep them separate removedSet = set([ x[1] for x in troveList if x[2] == trove.TROVE_TYPE_REMOVED ]) troveList = [ (x[0], x[1]) for x in troveList if x[2] != trove.TROVE_TYPE_REMOVED ] # figure out if we need to recurse the group-troves if cfg.recurseGroups: # avoid adding duplicates troveSetList = set([x[1] for x in troveList]) for mark, (name, version, flavor) in troveList: if trove.troveIsGroup(name): recTroves = recurseTrove(referenceRepos, name, version, flavor, callback=callback) # add sources here: if cfg.includeSources: troveInfo = referenceRepos.getTroveInfo( trove._TROVEINFO_TAG_SOURCENAME, recTroves) sourceComps = set() for nvf, source in itertools.izip(recTroves, troveInfo): sourceComps.add((source(), nvf[1].getSourceVersion(), parseFlavor(''))) recTroves.extend(sourceComps) # add the results at the end with the current mark for (n, v, f) in recTroves: if (n, v, f) not in troveSetList: troveList.append((mark, (n, v, f))) troveSetList.add((n, v, f)) log.debug("after group recursion %d troves are needed", len(troveList)) # we need to make sure we mirror the GPG keys of any newly added troves newHosts = set([x[1].getHost() for x in troveSetList.union(removedSet)]) for host in newHosts.difference(set([cfg.host])): for t in targets: t.mirrorGPG(referenceRepos, host) # we check which troves from the troveList are needed on each # target and we split the troveList into separate lists depending # on how many targets require each byTarget = {} targetSetList = [] if len(troveList): byTrove = {} for i, target in enumerate(targets): for t in target.addTroveList(troveList): bt = byTrove.setdefault(t, set()) bt.add(i) # invert the dict by target now for trv, ts in byTrove.iteritems(): targetSet = [ targets[i] for i in ts ] try: targetIdx = targetSetList.index(targetSet) except ValueError: targetSetList.append(targetSet) targetIdx = len(targetSetList)-1 bt = byTarget.setdefault(targetIdx, []) bt.append(trv) del byTrove # if we were returned troves, but we filtered them all out, advance the # mark and signal "try again" if len(byTarget) == 0 and len(removedSet) == 0 and initTLlen: # we had troves and now we don't log.debug("no troves found for our label %s" % cfg.labels) for t in targets: t.setMirrorMark(crtMaxMark) # try again return -1 # now we get each section of the troveList for each targetSet. We # start off mirroring by those required by fewer targets, using # the assumption that those troves are what is required for the # targets to catch up to a common set if len(byTarget) > 1: log.debug("split %d troves into %d chunks by target", len(troveList), len(byTarget)) # sort the targetSets by length targetSets = list(enumerate(targetSetList)) targetSets.sort(lambda a,b: cmp(len(a[1]), len(b[1]))) bundlesMark = 0 for idx, targetSet in targetSets: troveList = byTarget[idx] if not troveList: # XXX: should not happen... continue log.debug("mirroring %d troves into %d targets", len(troveList), len(targetSet)) # since these troves are required for all targets, we can use # the "first" one to build the relative changeset requests target = list(targetSet)[0] bundles = buildBundles(sourceRepos, target, troveList, cfg.absoluteChangesets) for i, bundle in enumerate(bundles): jobList = [ x[1] for x in bundle ] # XXX it's a shame we can't give a hint as to what server to use # to avoid having to open the changeset and read in bits of it if test: log.debug("test mode: not mirroring (%d of %d) %s" % (i + 1, len(bundles), jobList)) updateCount += len(bundle) continue (outFd, tmpName) = util.mkstemp() os.close(outFd) log.debug("getting (%d of %d) %s" % (i + 1, len(bundles), displayBundle(bundle))) try: sourceRepos.createChangeSetFile(jobList, tmpName, recurse = False, callback = callback, mirrorMode = True) except changeset.ChangeSetKeyConflictError: splitJobList(jobList, sourceRepos, targetSet, hidden=hidden, callback=callback) else: for target in targetSet: target.commitChangeSetFile(tmpName, hidden=hidden, callback=callback) try: os.unlink(tmpName) except OSError: pass callback.done() updateCount += len(bundle) # compute the max mark of the bundles we comitted mark = max([min([x[0] for x in bundle]) for bundle in bundles]) if mark > bundlesMark: bundlesMark = mark else: # only when we're all done looping advance mark to the new max if bundlesMark == 0 or bundlesMark <= currentMark: bundlesMark = crtMaxMark # avoid repeating the same query... for target in targets: if hidden: # if we've hidden the last commits, show them now target.presentHiddenTroves() target.setMirrorMark(bundlesMark) # mirroring removed troves requires one by one processing for target in targets: copySet = removedSet.copy() updateCount += mirrorRemoved(referenceRepos, target.repo, copySet, test=test, callback=callback) # if this was a noop because the removed troves were already mirrored # we need to keep going if updateCount == 0 and len(removedSet): for target in targets: target.setMirrorMark(crtMaxMark) return -1 return updateCount
def mirrorRepository( sourceRepos, targetRepos, cfg, test=False, sync=False, syncSigs=False, callback=ChangesetCallback(), fastSync=False, referenceRepos=None, ): if referenceRepos is None: referenceRepos = sourceRepos checkConfig(cfg) targets = _makeTargets(cfg, targetRepos, test) log.debug("-" * 20 + " start loop " + "-" * 20) hidden = len(targets) > 1 or cfg.useHiddenCommits if hidden: log.debug("will use hidden commits to synchronize target mirrors") if sync: currentMark = -1 else: marks = [t.getMirrorMark() for t in targets] # we use the oldest mark as a starting point (since we have to # get stuff from source for that oldest one anyway) currentMark = min(marks) log.debug("using common mirror mark %s", currentMark) # reset mirror mark to the lowest common denominator for t in targets: if t.getMirrorMark() != currentMark: t.setMirrorMark(currentMark) # mirror gpg signatures from the src into the targets for t in targets: t.mirrorGPG(referenceRepos, cfg.host) # mirror changed trove information for troves already mirrored if fastSync: updateCount = 0 log.debug("skip trove info records sync because of fast-sync") else: updateCount = mirrorTroveInfo(referenceRepos, targets, currentMark, cfg, syncSigs) newMark, troveList = getTroveList(referenceRepos, cfg, currentMark) if not troveList: if newMark > currentMark: # something was returned, but filtered out for t in targets: t.setMirrorMark(newMark) return -1 # call again return 0 # prepare a new max mark to be used when we need to break out of a loop crtMaxMark = max(long(x[0]) for x in troveList) if currentMark > 0 and crtMaxMark == currentMark: # if we're hung on the current max then we need to # forcibly advance the mark in case we're stuck crtMaxMark += 1 # only used if we filter out all troves below initTLlen = len(troveList) # removed troves are a special blend - we keep them separate removedSet = set( [x[1] for x in troveList if x[2] == trove.TROVE_TYPE_REMOVED]) troveList = [(x[0], x[1]) for x in troveList if x[2] != trove.TROVE_TYPE_REMOVED] # figure out if we need to recurse the group-troves if cfg.recurseGroups: # avoid adding duplicates troveSetList = set([x[1] for x in troveList]) for mark, (name, version, flavor) in troveList: if trove.troveIsGroup(name): recTroves = recurseTrove(referenceRepos, name, version, flavor, callback=callback) # add sources here: if cfg.includeSources: troveInfo = referenceRepos.getTroveInfo( trove._TROVEINFO_TAG_SOURCENAME, recTroves) sourceComps = set() for nvf, source in itertools.izip(recTroves, troveInfo): sourceComps.add((source(), nvf[1].getSourceVersion(), parseFlavor(''))) recTroves.extend(sourceComps) # add the results at the end with the current mark for (n, v, f) in recTroves: if (n, v, f) not in troveSetList: troveList.append((mark, (n, v, f))) troveSetList.add((n, v, f)) log.debug("after group recursion %d troves are needed", len(troveList)) # we need to make sure we mirror the GPG keys of any newly added troves newHosts = set( [x[1].getHost() for x in troveSetList.union(removedSet)]) for host in newHosts.difference(set([cfg.host])): for t in targets: t.mirrorGPG(referenceRepos, host) # we check which troves from the troveList are needed on each # target and we split the troveList into separate lists depending # on how many targets require each byTarget = {} targetSetList = [] if len(troveList): byTrove = {} for i, target in enumerate(targets): for t in target.addTroveList(troveList): bt = byTrove.setdefault(t, set()) bt.add(i) # invert the dict by target now for trv, ts in byTrove.iteritems(): targetSet = [targets[i] for i in ts] try: targetIdx = targetSetList.index(targetSet) except ValueError: targetSetList.append(targetSet) targetIdx = len(targetSetList) - 1 bt = byTarget.setdefault(targetIdx, []) bt.append(trv) del byTrove # if we were returned troves, but we filtered them all out, advance the # mark and signal "try again" if len(byTarget) == 0 and len(removedSet) == 0 and initTLlen: # we had troves and now we don't log.debug("no troves found for our label %s" % cfg.labels) for t in targets: t.setMirrorMark(crtMaxMark) # try again return -1 # now we get each section of the troveList for each targetSet. We # start off mirroring by those required by fewer targets, using # the assumption that those troves are what is required for the # targets to catch up to a common set if len(byTarget) > 1: log.debug("split %d troves into %d chunks by target", len(troveList), len(byTarget)) # sort the targetSets by length targetSets = list(enumerate(targetSetList)) targetSets.sort(lambda a, b: cmp(len(a[1]), len(b[1]))) bundlesMark = 0 for idx, targetSet in targetSets: troveList = byTarget[idx] if not troveList: # XXX: should not happen... continue log.debug("mirroring %d troves into %d targets", len(troveList), len(targetSet)) # since these troves are required for all targets, we can use # the "first" one to build the relative changeset requests target = list(targetSet)[0] bundles = buildBundles(sourceRepos, target, troveList, cfg.absoluteChangesets) for i, bundle in enumerate(bundles): jobList = [x[1] for x in bundle] # XXX it's a shame we can't give a hint as to what server to use # to avoid having to open the changeset and read in bits of it if test: log.debug("test mode: not mirroring (%d of %d) %s" % (i + 1, len(bundles), jobList)) updateCount += len(bundle) continue (outFd, tmpName) = util.mkstemp() os.close(outFd) log.debug("getting (%d of %d) %s" % (i + 1, len(bundles), displayBundle(bundle))) try: sourceRepos.createChangeSetFile(jobList, tmpName, recurse=False, callback=callback, mirrorMode=True) except changeset.ChangeSetKeyConflictError: splitJobList(jobList, sourceRepos, targetSet, hidden=hidden, callback=callback) else: for target in targetSet: target.commitChangeSetFile(tmpName, hidden=hidden, callback=callback) try: os.unlink(tmpName) except OSError: pass callback.done() updateCount += len(bundle) # compute the max mark of the bundles we comitted mark = max([min([x[0] for x in bundle]) for bundle in bundles]) if mark > bundlesMark: bundlesMark = mark else: # only when we're all done looping advance mark to the new max if bundlesMark == 0 or bundlesMark <= currentMark: bundlesMark = crtMaxMark # avoid repeating the same query... for target in targets: if hidden: # if we've hidden the last commits, show them now target.presentHiddenTroves() target.setMirrorMark(bundlesMark) # mirroring removed troves requires one by one processing for target in targets: copySet = removedSet.copy() updateCount += mirrorRemoved(referenceRepos, target.repo, copySet, test=test, callback=callback) # if this was a noop because the removed troves were already mirrored # we need to keep going if updateCount == 0 and len(removedSet): for target in targets: target.setMirrorMark(crtMaxMark) return -1 return updateCount