class G2P: """class to handle batching of p4 commands when copying git to p4""" def __init__(self, ctx): self.ctx = ctx self.addeditdelete = {} self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (FAST_EXPORT, OVERALL), (TEST_BLOCK_PUSH, OVERALL), (CHECK_CONFLICT, OVERALL), (COPY, OVERALL), (GIT_CHECKOUT, COPY), (CHECK_PROTECTS, COPY), (COPY_BLOBS_1, COPY), (COPY_BLOBS_2, COPY), (MIRROR, OVERALL), ]) self.perf.add_counters([N_BLOBS, N_RENAMES]) self.usermap = p4gf_usermap.UserMap(ctx.p4gf) self.progress = ProgressReporter() def __str__(self): return "\n".join([str(self.perf), str(self.ctx.mirror)]) def revert_and_raise(self, errmsg): """An error occurred while attempting to submit the incoming change to Perforce. As a result, revert all modifications, log the error, and raise an exception.""" # roll back and raise the problem to the caller p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client) if p4: opened = p4.run('opened') if opened: p4.run('revert', '//{}/...'.format(self.ctx.p4.client)) # revert doesn't clean up added files self.remove_added_files() if not errmsg: errmsg = traceback.format_stack() msg = "import failed: {}".format(errmsg) LOG.error(msg) raise RuntimeError(msg) def _p4_message_to_text(self, msg): ''' Convert a list of P4 messages to a single string. Annotate some errors with additional context such as P4USER. ''' txt = str(msg) if msg.msgid in MSGID_EXPLAIN_P4USER: txt += ' P4USER={}.'.format(self.ctx.p4.user) if msg.msgid in MSGID_EXPLAIN_P4CLIENT: txt += ' P4USER={}.'.format(self.ctx.p4.client) return txt def check_p4_messages(self): """If the results indicate a file is locked by another user, raise an exception so that the overall commit will fail. The changes made so far will be reverted. """ msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN) if not msgs: return lines = [self._p4_message_to_text(m) for m in msgs] self.revert_and_raise('\n'.join(lines)) def _p4run(self, cmd): ''' Run one P4 command, logging cmd and results. ''' p4 = self.ctx.p4 LOG.getChild('p4.cmd').debug(" ".join(cmd)) results = p4.run(cmd) if p4.errors: LOG.getChild('p4.err').error("\n".join(p4.errors)) if p4.warnings: LOG.getChild('p4.warn').warning("\n".join(p4.warnings)) LOG.getChild('p4.out').debug("{}".format(results)) if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG): log = LOG.getChild('p4.msgid') for m in p4.messages: log.debug(p4gf_p4msg.msg_repr(m)) self.check_p4_messages() def run_p4_commands(self): """run all pending p4 commands""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') # avoid writable client files problem by using -k and handling # the actual file action ourselves (in add/edit cases the caller # has already written the new file) if not cmd[0] == 'add': cmd.append('-k') if cmd[0] == 'move': # move takes a tuple of two arguments, the old name and new name oldnames = [escape_path(pair[0]) for pair in paths] # move requires opening the file for edit first self._p4run(['edit', '-k'] + oldnames) LOG.debug("Edit {}".format(oldnames)) for pair in paths: (frompath, topath) = pair self._p4run([ 'move', '-k', escape_path(frompath), escape_path(topath) ]) LOG.debug("Move from {} to {}".format(frompath, topath)) else: reopen = [] if 'edit -t' in operation: # edit -t text does not work, must 'edit' then 'reopen -t' # "can't change from xtext - use 'reopen'" reopen = ['reopen', '-t', cmd[2]] cmd = cmd[0:1] + cmd[3:] if not cmd[0] == 'add': self._p4run(cmd + [escape_path(path) for path in paths]) else: self._p4run(cmd + paths) if reopen: self._p4run(reopen + [escape_path(path) for path in paths]) if cmd[0] == 'delete': LOG.debug("Delete {}".format(paths)) for path in paths: os.remove(path) def remove_added_files(self): """remove added files to restore p4 client after failure of p4 command""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') if cmd[0] == 'add': for path in paths: os.unlink(path) def setup_p4_command(self, command, p4path): """Add command to list to be run by run_p4_commands. If the command is 'move' then the p4path is expected to be a tuple of the frompath and topath.""" if command in self.addeditdelete: self.addeditdelete[command].append(p4path) else: self.addeditdelete[command] = [p4path] def _toggle_filetype(self, p4path, isx): """Returns the new file type for the named file, switching the executable state based on the isx value. Args: p4path: Path of the file to modify. isx: True if currently executable. Returns: New type for the file; may be None. """ p4type = None if isx: p4type = '+x' else: # To remove a previously assigned modifier, the whole filetype # must be specified. for tipe in ['headType', 'type']: # For a file that was executable, is being renamed (with # edits), and is no longer executable, we need to handle the # fact that it's not yet in Perforce and so does not have a # headType. try: p4type = p4gf_util.first_value_for_key( self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe) except P4.P4Exception: pass if p4type: p4type = p4gf_p4filetype.remove_mod(p4type, 'x') return p4type def add_or_edit_blob(self, blob): """run p4 add or edit for a new or modified file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] # edit or add? isedit = os.path.exists(p4path) # make sure dest dir exists dstdir = os.path.dirname(p4path) if not os.path.exists(dstdir): os.makedirs(dstdir) if isedit: LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path) # for edits, only use +x or -x to propagate partial filetype changes wasx = os.stat(p4path).st_mode & stat.S_IXUSR isx = os.stat(blob['path']).st_mode & stat.S_IXUSR if wasx != isx: p4type = self._toggle_filetype(p4path, isx) else: p4type = None if p4type: LOG.debug( " set filetype: {ft} oldx={oldx} newx={newx}".format( ft=p4type, oldx=wasx, newx=isx)) shutil.copystat(blob['path'], p4path) shutil.copyfile(blob['path'], p4path) else: LOG.debug("Copy add from: " + blob['path'] + " to " + p4path) # for adds, use complete filetype of new file p4type = p4type_from_mode(blob['mode']) shutil.copyfile(blob['path'], p4path) # if file exists it's an edit, so do p4 edit before copying content # for an add, do p4 add after copying content p4type = ' -t ' + p4type if p4type else '' if isedit: self.setup_p4_command("edit" + p4type, p4path) else: self.setup_p4_command("add -f" + p4type, p4path) def rename_blob(self, blob): """ run p4 move for a renamed/moved file""" self.perf.counter[N_RENAMES] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] # ensure destination directory exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) # copy out of Git repo to Perforce workspace shutil.copyfile(blob['topath'], p4topath) self.setup_p4_command("move", (p4frompath, p4topath)) def copy_blob(self, blob): """run p4 integ for a copied file""" self.perf.counter[N_BLOBS] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] self._p4run( ["copy", "-v", escape_path(p4frompath), escape_path(p4topath)]) # make sure dest dir exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath) shutil.copyfile(p4frompath, p4topath) def delete_blob(self, blob): """run p4 delete for a deleted file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] self.setup_p4_command("delete", p4path) def copy_blobs(self, blobs): """copy git blobs to perforce revs""" # first, one pass to do rename/copy # these don't batch. move can't batch due to p4 limitations. # however, the edit required before move is batched. # copy could be batched by creating a temporary branchspec # but for now it's done file by file with self.perf.timer[COPY_BLOBS_1]: for blob in blobs: if blob['action'] == 'R': self.rename_blob(blob) elif blob['action'] == 'C': self.copy_blob(blob) self.run_p4_commands() # then, another pass to do add/edit/delete # these are batched to allow running the minimum number of # p4 commands. That means no more than one delete, one add per # filetype and one edit per filetype. Since we only support three # possible filetypes (text, text+x, symlink) there could be at most # 1 + 3 + 3 commands run. with self.perf.timer[COPY_BLOBS_2]: self.addeditdelete = {} for blob in blobs: if blob['action'] == 'M': self.add_or_edit_blob(blob) elif blob['action'] == 'D': self.delete_blob(blob) self.run_p4_commands() def check_protects(self, p4user, blobs): """check if author is authorized to submit files""" pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user) pc.filter_paths(blobs) if pc.has_error(): self.revert_and_raise(pc.error_message()) def _reset_for_new_commit(self): """ Clear out state from previous commit that must not carry over into next commit. """ self.addeditdelete = {} def attempt_resync(self): """Attempts to sync -k the Git Fusion client to the change that corresponds to the HEAD of the Git mirror repository. This prevents the obscure "file(s) not on client" error. """ # we assume we are in the GIT_WORK_TREE, which seems to be a safe # assumption at this point try: last_commit = p4gf_util.git_ref_master() if last_commit: last_changelist_number = self.ctx.mirror.get_change_for_commit( last_commit, self.ctx) if last_changelist_number: filerev = "//...@{}".format(last_changelist_number) self._p4run(['sync', '-k', filerev]) except P4.P4Exception: # don't stop the world if we have an error above LOG.warn("resync failed with exception", exc_info=True) def copy_commit(self, commit): """copy a single commit""" self._reset_for_new_commit() #OG.debug("dump commit {}".format(commit)) LOG.debug("for commit {}".format(commit['mark'])) LOG.debug("with description: {}".format(commit['data'])) LOG.debug("files affected: {}".format(commit['files'])) # Reject merge commits. Not supported in 2012.1. if 'merge' in commit: self.revert_and_raise(("Merge commit {} not permitted." + " Rebase to create a linear" + " history.").format(commit['sha1'])) # strip any enclosing angle brackets from the email address email = commit['author']['email'].strip('<>') user = self.usermap.lookup_by_email(email) LOG.debug("for email {} found user {}".format(email, user)) if (user is None) or (not self.usermap.p4user_exists(user[0])): # User is not a known and existing Perforce user, and the # unknown_git account is not set up, so reject the commit. self.revert_and_raise( "User '{}' not permitted to commit".format(email)) author_p4user = user[0] for blob in commit['files']: err = check_valid_filename(blob['path']) if err: self.revert_and_raise(err) with self.perf.timer[GIT_CHECKOUT]: d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']]) if d['Popen'].returncode: # Sometimes git cannot distinquish the revision from a path... p4gf_util.popen( ['git', 'reset', '--hard', commit['sha1'], '--']) with self.perf.timer[CHECK_PROTECTS]: self.check_protects(author_p4user, commit['files']) try: self.copy_blobs(commit['files']) except P4.P4Exception as e: self.revert_and_raise(str(e)) with self.perf.timer[COPY_BLOBS_2]: pusher_p4user = self.ctx.authenticated_p4user LOG.debug("Pusher is: {}, author is: {}".format( pusher_p4user, author_p4user)) desc = change_description(commit, pusher_p4user, author_p4user) try: opened = self.ctx.p4.run('opened') if opened: changenum = p4_submit(self.ctx.p4, desc, author_p4user, commit['author']['date']) LOG.info("Submitted change @{} for commit {}".format( changenum, commit['sha1'])) else: LOG.info("Ignored empty commit {}".format(commit['sha1'])) return None except P4.P4Exception as e: self.revert_and_raise(str(e)) return ":" + str(changenum) + " " + commit['sha1'] def test_block_push(self): """Test hook to temporarily block and let test script introduce conflicting changes. """ s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH) if not s: return log = logging.getLogger("test_block_push") block_dict = p4gf_util.test_var_to_dict(s) log.debug(block_dict) # Fetch ALL the submitted changelists as of right now. log.debug("p4 changes {}".format( p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))) cl_ay = self.ctx.p4.run( 'changes', '-l', p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)) # Don't block until after something? after = block_dict['after'] if after: if not contains_desc(after, cl_ay): log.debug("Do not block until after: {}".format(after)) return until = block_dict['until'] log.debug("BLOCKING. Seen 'after': {}".format(after)) log.debug("BLOCKING. Waiting for 'until': {}".format(until)) changes_path_at = ("{path}@{change},now".format( path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client), change=cl_ay[-1]['change'])) while not contains_desc(until, cl_ay): time.sleep(1) cl_ay = self.ctx.p4.run('changes', changes_path_at) log.debug("Block released") def copy(self, start_at, end_at): """copy a set of commits from git into perforce""" with self.perf.timer[OVERALL]: with p4gf_util.HeadRestorer(): LOG.debug("begin copying from {} to {}".format( start_at, end_at)) self.attempt_resync() with self.perf.timer[CHECK_CONFLICT]: conflict_checker = G2PConflictChecker(self.ctx) with self.perf.timer[FAST_EXPORT]: fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name) fe.run() marks = [] commit_count = 0 for x in fe.commands: if x['command'] == 'commit': commit_count += 1 self.progress.progress_init_determinate(commit_count) try: for command in fe.commands: with self.perf.timer[TEST_BLOCK_PUSH]: self.test_block_push() if command['command'] == 'commit': self.progress.progress_increment( "Copying changelists...") self.ctx.heartbeat() with self.perf.timer[COPY]: mark = self.copy_commit(command) if mark is None: continue with self.perf.timer[CHECK_CONFLICT]: (git_commit_sha1, p4_changelist_number ) = mark_to_commit_changelist(mark) conflict_checker.record_commit( git_commit_sha1, p4_changelist_number) if conflict_checker.check(): LOG.error("P4 conflict found") break marks.append(mark) elif command['command'] == 'reset': pass else: raise RuntimeError( "Unexpected fast-export command: " + command['command']) finally: # we want to write mirror objects for any commits that made it through # any exception will still be alive after this with self.perf.timer[MIRROR]: self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) if conflict_checker.has_conflict(): raise RuntimeError( "Conflicting change from Perforce caused one" + " or more git commits to fail. Time to" + " pull, rebase, and try again.") LOG.getChild("time").debug("\n" + str(self))
class GitMirror: """handle git things that get mirrored in perforce""" def __init__(self, view_name): self.git_objects = GitObjectList() self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (BUILD, OVERALL), (CAT_FILE, BUILD), (LS_TREE, BUILD), (LS_TREE_PROCESS, BUILD), (DIFF_TREE, BUILD), (DIFF_TREE_PROCESS, BUILD), (ADD_SUBMIT, OVERALL), (EXTRACT_OBJECTS, ADD_SUBMIT), (P4_FSTAT, ADD_SUBMIT), (P4_ADD, ADD_SUBMIT), (P4_SUBMIT, ADD_SUBMIT), ]) self.perf.add_counters([(CAT_FILE_COUNT, "files"), (CAT_FILE_SIZE, "bytes")]) self.progress = ProgressReporter() self.view_name = view_name @staticmethod def get_change_for_commit(commit, ctx): """Given a commit sha1, find the corresponding perforce change. """ object_type = p4gf_object_type.sha1_to_object_type( sha1=commit, view_name=ctx.config.view_name, p4=ctx.p4gf, raise_on_error=False) if not object_type: return None return object_type.view_name_to_changelist(ctx.config.view_name) def add_commits(self, marks): """build list of commit and tree objects for a set of changelists marks: list of commit marks output by git-fast-import formatted as: :changenum sha1 """ with self.perf.timer[OVERALL]: with self.perf.timer[BUILD]: last_top_tree = None for mark in marks: #parse perforce change number and SHA1 from marks parts = mark.split(' ') change_num = parts[0][1:] sha1 = parts[1].strip() # add commit object self.git_objects.add_object( GitObject("commit", sha1, [(change_num, self.view_name)])) # add all trees referenced by the commit if last_top_tree: last_top_tree = self.__get_delta_trees( last_top_tree, sha1) else: last_top_tree = self.__get_snapshot_trees(sha1) def add_objects_with_views(self, ctx, add_files): """Add the list of files to the object cache in the depot and return the number of files not added. """ added_files = [] files_not_added = 0 treecount = 0 commitcount = 0 # Add new files to the object cache. bite_size = 1000 while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] result = ctx.p4gf.run("add", "-t", "binary", bite) for m in [ m for m in ctx.p4gf.messages if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or m.dict['action'] != 'add') ]: files_not_added += 1 LOG.debug(str(m)) for r in [r for r in result if isinstance(r, dict)]: if r["action"] != 'add': # file already exists in depot, perhaps? files_not_added += 1 LOG.debug(r) else: added_files.append(r["depotFile"]) if r["depotFile"].endswith("-tree"): treecount += 1 else: commitcount += 1 LOG.debug("Added {} commits and {} trees".format( commitcount, treecount)) # Set the 'views' attribute on the opened files. while len(added_files): bite = added_files[:bite_size] added_files = added_files[bite_size:] ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite) return files_not_added def add_objects_to_p4(self, ctx): """actually run p4 add, submit to create mirror files in .git-fusion""" with self.perf.timer[OVERALL]: # Revert any opened files left over from a failed mirror operation. opened = ctx.p4gf.run('opened') if opened: ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf)) with self.perf.timer[ADD_SUBMIT]: LOG.debug("adding {0} commits and {1} trees to .git-fusion...". format(self.git_objects.counts['commit'], self.git_objects.counts['tree'])) # build list of objects to add, extracting them from git self.progress.progress_init_determinate( len(self.git_objects.objects)) add_files = [ self.__add_object_to_p4(ctx, go) for go in self.git_objects.objects.values() ] # filter out any files that have already been added # only do this if the number of files is large enough to justify # the cost of the fstat existing_files = None with self.perf.timer[P4_FSTAT]: # Need to use fstat to get the 'views' attribute for existing # files, which we can't know until we use fstat to find out. bite_size = 1000 LOG.debug("using fstat to optimize add") original_count = len(add_files) ctx.p4gf.handler = FilterAddFstatHandler(self.view_name) # spoon-feed p4 to avoid blowing out memory while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] # Try to get only the information we really need. ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite) add_files = ctx.p4gf.handler.files existing_files = ctx.p4gf.handler.existing ctx.p4gf.handler = None LOG.debug("{} files removed from add list".format( original_count - len(add_files))) files_to_add = len(add_files) + len(existing_files) if files_to_add == 0: return with self.perf.timer[P4_ADD]: files_not_added = self.add_objects_with_views( ctx, add_files) edit_objects_with_views(ctx, existing_files) with self.perf.timer[P4_SUBMIT]: if files_not_added < files_to_add: desc = 'Git Fusion {view} copied to git'.format( view=ctx.config.view_name) self.progress.status( "Submitting new Git objects to Perforce...") ctx.p4gf.run("submit", "-d", desc) else: LOG.debug("ignoring empty change list...") def __str__(self): return "\n".join([str(self.git_objects), str(self.perf)]) def __repr__(self): return "\n".join([repr(self.git_objects), str(self.perf)]) # pylint: disable=R0201, W1401 # R0201 Method could be a function # I agree, this _could_ be a function, does not need self. But when I # blindly promote this to a module-level function, things break and I # cannot explain why. # W1401 Unescaped backslash # We want that null for the header, so we're keeping the backslash. def __add_object_to_p4(self, ctx, go): """add a commit or tree to the git-fusion perforce client workspace return the path of the client workspace file suitable for use with p4 add """ self.progress.progress_increment( "Adding new Git objects to Perforce...") ctx.heartbeat() # get client path for .git-fusion file dst = go.git_p4_client_path(ctx) # A tree is likely to already exist, in which case we don't need # or want to try to recreate it. We'll just use the existing one. if os.path.exists(dst): LOG.debug("reusing existing object: " + dst) return dst with self.perf.timer[EXTRACT_OBJECTS]: # make sure dir exists dstdir = os.path.dirname(dst) if not os.path.exists(dstdir): os.makedirs(dstdir) # get contents of commit or tree; can't just copy it because it's # probably in a packfile and we don't know which one. And there's # no way to have git give us the compressed commit directly, so we # need to recompress it p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE) po = p.communicate()[0] header = go.type + " " + str(len(po)) + '\0' deflated = zlib.compress(header.encode() + po) # write it into our p4 client workspace for adding. LOG.debug("adding new object: " + dst) with open(dst, "wb") as f: f.write(deflated) return dst def __get_snapshot_trees(self, commit): """get all tree objects for a given commit commit: SHA1 of commit each tree is added to the list to be mirrored return the SHA1 of the commit's tree """ top_tree = self.__get_commit_tree(commit) with self.perf.timer[LS_TREE]: p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[LS_TREE_PROCESS]: # line is: mode SP type SP sha TAB path # we only want the sha from lines with type "tree" pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [ self.git_objects.add_object(GitObject("tree", m.group(1))) for line in po.splitlines() for m in [pattern.match(line)] if m ] # pylint: enable=W0106 return top_tree def __get_delta_trees(self, top_tree1, commit2): """get all tree objects new in one commit vs another commit topTree1: SHA1 of first commit's tree commit2: SHA1 of second commit each tree is added to the list to be mirrored return the SHA1 of commit2's tree """ top_tree2 = self.__get_commit_tree(commit2) with self.perf.timer[DIFF_TREE]: p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[DIFF_TREE_PROCESS]: # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path # we want sha2 from lines where mode2 indicates a dir pattern = re.compile( "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*" ) # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [ self.git_objects.add_object(GitObject("tree", m.group(2))) for line in po.splitlines() for m in [pattern.match(line)] if m and m.group(1) == "04" ] # pylint: enable=W0106 return top_tree2 def __get_commit_tree(self, commit): """get the one and only tree at the top of commit commit: SHA1 of the commit add the tree object to the list of objects to be mirrored and return its SHA1 """ with self.perf.timer[CAT_FILE]: self.perf.counter[CAT_FILE_COUNT] += 1 p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE) po = p.communicate()[0].decode() self.perf.counter[CAT_FILE_SIZE] += len(po) for line in iter(po.splitlines()): if not line.startswith("tree"): continue # line is: tree sha parts = line.strip().split(' ') sha1 = parts[1] self.git_objects.add_object(GitObject("tree", sha1)) return sha1
class P2G: """class to manage copying from Perforce to git""" def __init__(self, ctx): self.ctx = ctx self.fastimport = FastImport(self.ctx) self.fastimport.set_timezone(self.ctx.timezone) self.fastimport.set_project_root_path(self.ctx.contentlocalroot) self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([ OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL), (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL), (MERGE, OVERALL), (PACK, OVERALL) ]) self.rev_range = None # RevRange instance set in copy(). self.graft_change = None # self.changes = None # dict['changelist'] ==> P4Changelist of what to copy() self.printed_revs = None # RevList produced by PrintHandler self.status_verbose = True self.progress = ProgressReporter() def __str__(self): return "\n".join([ "\n\nFast Import:\n", str(self.fastimport), "", str(self.perf), "" ]) def _setup(self, start_at, stop_at): """Set RevRange rev_range, figure out which changelists to copy.""" self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at) LOG.debug( "Revision range to copy to Git: {rr}".format(rr=self.rev_range)) # get list of changes to import into git self.changes = P4Changelist.create_changelist_list_as_dict( self.ctx.p4, self._path_range()) # If grafting, get that too. if self.rev_range.graft_change_num: # Ignore all depotFile elements, we just want the change/desc/time/user. self.graft_change = P4Changelist.create_using_describe( self.ctx.p4, self.rev_range.graft_change_num, "ignore_depot_files") self.graft_change.description += ( '\n[grafted history before {start_at}]'.format( start_at=start_at)) def _path_range(self): """Return the common path...@range string we use frequently. """ return self.ctx.client_view_path() + self.rev_range.as_range_string() def _copy_print(self): """p4 print all revs and git-hash-object them into the git repo.""" server_can_unexpand = self.ctx.p4.server_level > 32 printhandler = PrintHandler(need_unexpand=not server_can_unexpand, tempdir=self.ctx.tempdir.name) self.ctx.p4.handler = printhandler args = ["-a"] if server_can_unexpand: args.append("-k") self.ctx.p4.run("print", args, self._path_range()) printhandler.flush() printhandler.progress.progress_finish() # If also grafting, print all revs in existence at time of graft. if self.graft_change: args = [] if server_can_unexpand: args.append("-k") path = self._graft_path() LOG.debug("Printing for grafted history: {}".format(path)) self.ctx.p4.run("print", args, path) printhandler.flush() # If grafting, we just printed revs that refer to changelists # that have no P4Changelist counterpart in self.changes. Make # some skeletal versions now so that FstatHandler will have # someplace to hang its outputStat() P4File instances. for (_key, p4file) in printhandler.revs.revs: if not p4file.change in self.changes: cl = P4Changelist() cl.change = p4file.change self.changes[p4file.change] = cl self.ctx.p4.handler = None self.printed_revs = printhandler.revs def _fstat(self): """run fstat to find deleted revs and get client paths""" # TODO for 12.2 print will also report deleted revs so between # that and using MapApi to get client paths, we won't need this fstat self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes) fstat_cols = "-T" + ",".join(P4File.fstat_cols()) self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range()) if self.graft_change: # Also run 'p4 fstat //<view>/...@change' for the graft # change to catch all files as of @change, not just # revs changed between begin and end of _path_range(). self.ctx.p4.run("fstat", fstat_cols, self._graft_path()) self.ctx.p4.handler = None self._collapse_to_graft_change() self._add_graft_to_changes() # don't need this any more self.printed_revs = None sorted_changes = [ str(y) for y in sorted([int(x) for x in self.changes.keys()]) ] LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes])) return sorted_changes def _sync(self, sorted_changes): """fake sync of last change to make life easier at push time""" self.ctx.p4.handler = SyncHandler() lastchange = self.changes[sorted_changes[-1]] self.ctx.p4.run( "sync", "-kf", self.ctx.client_view_path() + "@" + str(lastchange.change)) self.ctx.p4.handler = None def _fast_import(self, sorted_changes, last_commit): """build fast-import script from changes, then run fast-import""" self.progress.progress_init_determinate(len(sorted_changes)) for changenum in sorted_changes: change = self.changes[changenum] self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() # create commit and trees self.fastimport.add_commit(change, last_commit) last_commit = change.change # run git-fast-import and get list of marks marks = self.fastimport.run_fast_import() # done with these self.changes = None return marks def _mirror(self, marks): """build up list of p4 objects to mirror git repo in perforce then submit them """ self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror)) self.ctx.mirror = GitMirror(self.ctx.config.view_name) last_commit = marks[len(marks) - 1] LOG.debug("Last commit created: " + last_commit) # pylint: disable=R0201 # R0201 Method could be a function def _pack(self): """run 'git gc' to pack up the blobs aside from any possible performance benefit, this prevents warnings from git about "unreachable loose objects" """ p4gf_util.popen_no_throw(["git", "gc"]) def _collapse_to_graft_change(self): """Move all of the files from pre-graft changelists into the graft changelist. Remove all pre-graft changelists. NOP if not grafting. 'p4 print //client/...@100' does indeed print all the files that exist @100, but the tag dict that goes with each file includes the changelist in which that file was last added/edited, not 100. So this function gathers up all the file revs with change=1..99 and sticks them under change 100's file list. """ if (not self.graft_change): return graft_num_int = int(self.graft_change.change) LOG.debug("_collapse_to_graft_change() graft_num_int={}".format( graft_num_int)) # Delete all P4Changelist elements from self.changes where they # refer to a change that will be collapsed into the graft change, # including the graft change itself. del_keys = [] for p4changelist in self.changes.values(): if graft_num_int < int(p4changelist.change): LOG.debug("_collapse_to_graft_change() skipping {}".format( p4changelist.change)) continue LOG.debug("_collapse_to_graft_change() deleting {}".format( p4changelist.change)) del_keys.append(p4changelist.change) for key in del_keys: del self.changes[key] # Associate with the graft change all printed P4File results from # graft-change or older for (_key, p4file) in self.printed_revs.revs: if graft_num_int < int(p4file.change): LOG.debug("_collapse_to_graft_change() skipping post-graft {}". format(p4file)) continue old = self.graft_change.file_from_depot_path(p4file.depot_path) # If print picked up multiple revs, keep the newest. if (not old) or (int(old.change) < int(p4file.change)): p4file.change = self.graft_change.change self.graft_change.files.append(p4file) LOG.debug( "_collapse_to_graft_change() keeping {}".format(p4file)) else: LOG.debug( "_collapse_to_graft_change() skipping, had newer {}". format(p4file)) def _add_graft_to_changes(self): """Add the graft changelist to our list of changes: It will be copied over like any other change. NOP if not grafting. """ if (not self.graft_change): return self.changes[self.graft_change.change] = self.graft_change def _graft_path(self): """If grafting, return '//<client>/...@N' where N is the graft changelist number. If not grafting, return None. """ if (not self.graft_change): return return "{path}@{change}".format(path=self.ctx.client_view_path(), change=self.graft_change.change) def copy(self, start_at, stop_at): """copy a set of changelists from perforce into git""" with self.perf.timer[OVERALL]: with self.perf.timer[SETUP]: self._setup(start_at, stop_at) if not len(self.changes): LOG.debug("No new changes found to copy") return last_commit = self.rev_range.last_commit with self.perf.timer[PRINT]: self._copy_print() with self.perf.timer[FSTAT]: sorted_changes = self._fstat() with self.perf.timer[SYNC]: self._sync(sorted_changes) with self.perf.timer[FAST_IMPORT]: marks = self._fast_import(sorted_changes, last_commit) sorted_changes = None with self.perf.timer[MIRROR]: self._mirror(marks) with self.perf.timer[MERGE]: # merge temporary branch into master, then delete it self.fastimport.merge() with self.perf.timer[PACK]: self._pack() LOG.getChild("time").debug("\n" + str(self))
class GitMirror: """handle git things that get mirrored in perforce""" def __init__(self, view_name): self.git_objects = GitObjectList() self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (BUILD, OVERALL), (CAT_FILE, BUILD), (LS_TREE, BUILD), (LS_TREE_PROCESS, BUILD), (DIFF_TREE, BUILD), (DIFF_TREE_PROCESS, BUILD), (ADD_SUBMIT, OVERALL), (EXTRACT_OBJECTS, ADD_SUBMIT), (P4_FSTAT, ADD_SUBMIT), (P4_ADD, ADD_SUBMIT), (P4_SUBMIT, ADD_SUBMIT), ]) self.perf.add_counters([(CAT_FILE_COUNT, "files"), (CAT_FILE_SIZE, "bytes")]) self.progress = ProgressReporter() self.view_name = view_name @staticmethod def get_change_for_commit(commit, ctx): """Given a commit sha1, find the corresponding perforce change. """ object_type = p4gf_object_type.sha1_to_object_type( sha1 = commit , view_name = ctx.config.view_name , p4 = ctx.p4gf , raise_on_error = False) if not object_type: return None return object_type.view_name_to_changelist(ctx.config.view_name) def add_commits(self, marks): """build list of commit and tree objects for a set of changelists marks: list of commit marks output by git-fast-import formatted as: :changenum sha1 """ with self.perf.timer[OVERALL]: with self.perf.timer[BUILD]: last_top_tree = None for mark in marks: #parse perforce change number and SHA1 from marks parts = mark.split(' ') change_num = parts[0][1:] sha1 = parts[1].strip() # add commit object self.git_objects.add_object( GitObject( "commit" , sha1 , [(change_num, self.view_name)] )) # add all trees referenced by the commit if last_top_tree: last_top_tree = self.__get_delta_trees(last_top_tree, sha1) else: last_top_tree = self.__get_snapshot_trees(sha1) def add_objects_with_views(self, ctx, add_files): """Add the list of files to the object cache in the depot and return the number of files not added. """ added_files = [] files_not_added = 0 treecount = 0 commitcount = 0 # Add new files to the object cache. bite_size = 1000 while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] result = ctx.p4gf.run("add", "-t", "binary", bite) for m in [m for m in ctx.p4gf.messages if (m.msgid != p4gf_p4msgid.MsgDm_OpenUpToDate or m.dict['action'] != 'add')]: files_not_added += 1 LOG.debug(str(m)) for r in [r for r in result if isinstance(r, dict)]: if r["action"] != 'add': # file already exists in depot, perhaps? files_not_added += 1 LOG.debug(r) else: added_files.append(r["depotFile"]) if r["depotFile"].endswith("-tree"): treecount += 1 else: commitcount += 1 LOG.debug("Added {} commits and {} trees".format(commitcount, treecount)) # Set the 'views' attribute on the opened files. while len(added_files): bite = added_files[:bite_size] added_files = added_files[bite_size:] ctx.p4gf.run("attribute", "-p", "-n", "views", "-v", self.view_name, bite) return files_not_added def add_objects_to_p4(self, ctx): """actually run p4 add, submit to create mirror files in .git-fusion""" with self.perf.timer[OVERALL]: # Revert any opened files left over from a failed mirror operation. opened = ctx.p4gf.run('opened') if opened: ctx.p4gf.run('revert', '//{}/...'.format(ctx.config.p4client_gf)) with self.perf.timer[ADD_SUBMIT]: LOG.debug("adding {0} commits and {1} trees to .git-fusion...". format(self.git_objects.counts['commit'], self.git_objects.counts['tree'])) # build list of objects to add, extracting them from git self.progress.progress_init_determinate(len(self.git_objects.objects)) add_files = [self.__add_object_to_p4(ctx, go) for go in self.git_objects.objects.values()] # filter out any files that have already been added # only do this if the number of files is large enough to justify # the cost of the fstat existing_files = None with self.perf.timer[P4_FSTAT]: # Need to use fstat to get the 'views' attribute for existing # files, which we can't know until we use fstat to find out. bite_size = 1000 LOG.debug("using fstat to optimize add") original_count = len(add_files) ctx.p4gf.handler = FilterAddFstatHandler(self.view_name) # spoon-feed p4 to avoid blowing out memory while len(add_files): bite = add_files[:bite_size] add_files = add_files[bite_size:] # Try to get only the information we really need. ctx.p4gf.run("fstat", "-Oa", "-T", "depotFile, attr-views", bite) add_files = ctx.p4gf.handler.files existing_files = ctx.p4gf.handler.existing ctx.p4gf.handler = None LOG.debug("{} files removed from add list" .format(original_count - len(add_files))) files_to_add = len(add_files) + len(existing_files) if files_to_add == 0: return with self.perf.timer[P4_ADD]: files_not_added = self.add_objects_with_views(ctx, add_files) edit_objects_with_views(ctx, existing_files) with self.perf.timer[P4_SUBMIT]: if files_not_added < files_to_add: desc = 'Git Fusion {view} copied to git'.format( view=ctx.config.view_name) self.progress.status("Submitting new Git objects to Perforce...") ctx.p4gf.run("submit", "-d", desc) else: LOG.debug("ignoring empty change list...") def __str__(self): return "\n".join([str(self.git_objects), str(self.perf) ]) def __repr__(self): return "\n".join([repr(self.git_objects), str(self.perf) ]) # pylint: disable=R0201, W1401 # R0201 Method could be a function # I agree, this _could_ be a function, does not need self. But when I # blindly promote this to a module-level function, things break and I # cannot explain why. # W1401 Unescaped backslash # We want that null for the header, so we're keeping the backslash. def __add_object_to_p4(self, ctx, go): """add a commit or tree to the git-fusion perforce client workspace return the path of the client workspace file suitable for use with p4 add """ self.progress.progress_increment("Adding new Git objects to Perforce...") ctx.heartbeat() # get client path for .git-fusion file dst = go.git_p4_client_path(ctx) # A tree is likely to already exist, in which case we don't need # or want to try to recreate it. We'll just use the existing one. if os.path.exists(dst): LOG.debug("reusing existing object: " + dst) return dst with self.perf.timer[EXTRACT_OBJECTS]: # make sure dir exists dstdir = os.path.dirname(dst) if not os.path.exists(dstdir): os.makedirs(dstdir) # get contents of commit or tree; can't just copy it because it's # probably in a packfile and we don't know which one. And there's # no way to have git give us the compressed commit directly, so we # need to recompress it p = Popen(['git', 'cat-file', go.type, go.sha1], stdout=PIPE) po = p.communicate()[0] header = go.type + " " + str(len(po)) + '\0' deflated = zlib.compress(header.encode() + po) # write it into our p4 client workspace for adding. LOG.debug("adding new object: " + dst) with open(dst, "wb") as f: f.write(deflated) return dst def __get_snapshot_trees(self, commit): """get all tree objects for a given commit commit: SHA1 of commit each tree is added to the list to be mirrored return the SHA1 of the commit's tree """ top_tree = self.__get_commit_tree(commit) with self.perf.timer[LS_TREE]: p = Popen(['git', 'ls-tree', '-rt', top_tree], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[LS_TREE_PROCESS]: # line is: mode SP type SP sha TAB path # we only want the sha from lines with type "tree" pattern = re.compile("^[0-7]{6} tree ([0-9a-fA-F]{40})\t.*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [self.git_objects.add_object(GitObject("tree", m.group(1))) for line in po.splitlines() for m in [pattern.match(line)] if m] # pylint: enable=W0106 return top_tree def __get_delta_trees(self, top_tree1, commit2): """get all tree objects new in one commit vs another commit topTree1: SHA1 of first commit's tree commit2: SHA1 of second commit each tree is added to the list to be mirrored return the SHA1 of commit2's tree """ top_tree2 = self.__get_commit_tree(commit2) with self.perf.timer[DIFF_TREE]: p = Popen(['git', 'diff-tree', '-t', top_tree1, top_tree2], stdout=PIPE) po = p.communicate()[0].decode() with self.perf.timer[DIFF_TREE_PROCESS]: # line is: :mode1 SP mode2 SP sha1 SP sha2 SP action TAB path # we want sha2 from lines where mode2 indicates a dir pattern = re.compile( "^:[0-7]{6} ([0-7]{2})[0-7]{4} [0-9a-fA-F]{40} ([0-9a-fA-F]{40}) .*") # yes, we're doing nothing with the result of this list comprehension # pylint: disable=W0106 [self.git_objects.add_object(GitObject("tree", m.group(2))) for line in po.splitlines() for m in [pattern.match(line)] if m and m.group(1) == "04"] # pylint: enable=W0106 return top_tree2 def __get_commit_tree(self, commit): """get the one and only tree at the top of commit commit: SHA1 of the commit add the tree object to the list of objects to be mirrored and return its SHA1 """ with self.perf.timer[CAT_FILE]: self.perf.counter[CAT_FILE_COUNT] += 1 p = Popen(['git', 'cat-file', 'commit', commit], stdout=PIPE) po = p.communicate()[0].decode() self.perf.counter[CAT_FILE_SIZE] += len(po) for line in iter(po.splitlines()): if not line.startswith("tree"): continue # line is: tree sha parts = line.strip().split(' ') sha1 = parts[1] self.git_objects.add_object(GitObject("tree", sha1)) return sha1
class G2P: """class to handle batching of p4 commands when copying git to p4""" def __init__(self, ctx): self.ctx = ctx self.addeditdelete = {} self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (FAST_EXPORT, OVERALL), (TEST_BLOCK_PUSH, OVERALL), (CHECK_CONFLICT, OVERALL), (COPY, OVERALL), (GIT_CHECKOUT, COPY), (CHECK_PROTECTS, COPY), (COPY_BLOBS_1, COPY), (COPY_BLOBS_2, COPY), (MIRROR, OVERALL), ]) self.perf.add_counters([N_BLOBS, N_RENAMES]) self.usermap = p4gf_usermap.UserMap(ctx.p4gf) self.progress = ProgressReporter() def __str__(self): return "\n".join([str(self.perf), str(self.ctx.mirror) ]) def revert_and_raise(self, errmsg): """An error occurred while attempting to submit the incoming change to Perforce. As a result, revert all modifications, log the error, and raise an exception.""" # roll back and raise the problem to the caller p4 = connect_p4(user=p4gf_const.P4GF_USER, client=self.ctx.p4.client) if p4: opened = p4.run('opened') if opened: p4.run('revert', '//{}/...'.format(self.ctx.p4.client)) # revert doesn't clean up added files self.remove_added_files() if not errmsg: errmsg = traceback.format_stack() msg = "import failed: {}".format(errmsg) LOG.error(msg) raise RuntimeError(msg) def _p4_message_to_text(self, msg): ''' Convert a list of P4 messages to a single string. Annotate some errors with additional context such as P4USER. ''' txt = str(msg) if msg.msgid in MSGID_EXPLAIN_P4USER: txt += ' P4USER={}.'.format(self.ctx.p4.user) if msg.msgid in MSGID_EXPLAIN_P4CLIENT: txt += ' P4USER={}.'.format(self.ctx.p4.client) return txt def check_p4_messages(self): """If the results indicate a file is locked by another user, raise an exception so that the overall commit will fail. The changes made so far will be reverted. """ msgs = p4gf_p4msg.find_all_msgid(self.ctx.p4, MSGID_CANNOT_OPEN) if not msgs: return lines = [self._p4_message_to_text(m) for m in msgs] self.revert_and_raise('\n'.join(lines)) def _p4run(self, cmd): ''' Run one P4 command, logging cmd and results. ''' p4 = self.ctx.p4 LOG.getChild('p4.cmd').debug(" ".join(cmd)) results = p4.run(cmd) if p4.errors: LOG.getChild('p4.err').error("\n".join(p4.errors)) if p4.warnings: LOG.getChild('p4.warn').warning("\n".join(p4.warnings)) LOG.getChild('p4.out').debug("{}".format(results)) if LOG.getChild('p4.msgid').isEnabledFor(logging.DEBUG): log = LOG.getChild('p4.msgid') for m in p4.messages: log.debug(p4gf_p4msg.msg_repr(m)) self.check_p4_messages() def run_p4_commands(self): """run all pending p4 commands""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') # avoid writable client files problem by using -k and handling # the actual file action ourselves (in add/edit cases the caller # has already written the new file) if not cmd[0] == 'add': cmd.append('-k') if cmd[0] == 'move': # move takes a tuple of two arguments, the old name and new name oldnames = [escape_path(pair[0]) for pair in paths] # move requires opening the file for edit first self._p4run(['edit', '-k'] + oldnames) LOG.debug("Edit {}".format(oldnames)) for pair in paths: (frompath, topath) = pair self._p4run(['move', '-k', escape_path(frompath), escape_path(topath)]) LOG.debug("Move from {} to {}".format(frompath, topath)) else: reopen = [] if 'edit -t' in operation: # edit -t text does not work, must 'edit' then 'reopen -t' # "can't change from xtext - use 'reopen'" reopen = ['reopen', '-t', cmd[2]] cmd = cmd[0:1] + cmd[3:] if not cmd[0] == 'add': self._p4run(cmd + [escape_path(path) for path in paths]) else: self._p4run(cmd + paths) if reopen: self._p4run(reopen + [escape_path(path) for path in paths]) if cmd[0] == 'delete': LOG.debug("Delete {}".format(paths)) for path in paths: os.remove(path) def remove_added_files(self): """remove added files to restore p4 client after failure of p4 command""" for operation, paths in self.addeditdelete.items(): cmd = operation.split(' ') if cmd[0] == 'add': for path in paths: os.unlink(path) def setup_p4_command(self, command, p4path): """Add command to list to be run by run_p4_commands. If the command is 'move' then the p4path is expected to be a tuple of the frompath and topath.""" if command in self.addeditdelete: self.addeditdelete[command].append(p4path) else: self.addeditdelete[command] = [p4path] def _toggle_filetype(self, p4path, isx): """Returns the new file type for the named file, switching the executable state based on the isx value. Args: p4path: Path of the file to modify. isx: True if currently executable. Returns: New type for the file; may be None. """ p4type = None if isx: p4type = '+x' else: # To remove a previously assigned modifier, the whole filetype # must be specified. for tipe in ['headType', 'type']: # For a file that was executable, is being renamed (with # edits), and is no longer executable, we need to handle the # fact that it's not yet in Perforce and so does not have a # headType. try: p4type = p4gf_util.first_value_for_key( self.ctx.p4.run(['fstat', '-T' + tipe, p4path]), tipe) except P4.P4Exception: pass if p4type: p4type = p4gf_p4filetype.remove_mod(p4type, 'x') return p4type def add_or_edit_blob(self, blob): """run p4 add or edit for a new or modified file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] # edit or add? isedit = os.path.exists(p4path) # make sure dest dir exists dstdir = os.path.dirname(p4path) if not os.path.exists(dstdir): os.makedirs(dstdir) if isedit: LOG.debug("Copy edit from: " + blob['path'] + " to " + p4path) # for edits, only use +x or -x to propagate partial filetype changes wasx = os.stat(p4path).st_mode & stat.S_IXUSR isx = os.stat(blob['path']).st_mode & stat.S_IXUSR if wasx != isx: p4type = self._toggle_filetype(p4path, isx) else: p4type = None if p4type: LOG.debug(" set filetype: {ft} oldx={oldx} newx={newx}" .format(ft=p4type, oldx=wasx, newx=isx)) shutil.copystat(blob['path'], p4path) shutil.copyfile(blob['path'], p4path) else: LOG.debug("Copy add from: " + blob['path'] + " to " + p4path) # for adds, use complete filetype of new file p4type = p4type_from_mode(blob['mode']) shutil.copyfile(blob['path'], p4path) # if file exists it's an edit, so do p4 edit before copying content # for an add, do p4 add after copying content p4type = ' -t ' + p4type if p4type else '' if isedit: self.setup_p4_command("edit" + p4type, p4path) else: self.setup_p4_command("add -f" + p4type, p4path) def rename_blob(self, blob): """ run p4 move for a renamed/moved file""" self.perf.counter[N_RENAMES] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] # ensure destination directory exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) # copy out of Git repo to Perforce workspace shutil.copyfile(blob['topath'], p4topath) self.setup_p4_command("move", (p4frompath, p4topath)) def copy_blob(self, blob): """run p4 integ for a copied file""" self.perf.counter[N_BLOBS] += 1 # get local path in p4 client p4frompath = self.ctx.contentlocalroot + blob['path'] p4topath = self.ctx.contentlocalroot + blob['topath'] self._p4run(["copy", "-v", escape_path(p4frompath), escape_path(p4topath)]) # make sure dest dir exists dstdir = os.path.dirname(p4topath) if not os.path.exists(dstdir): os.makedirs(dstdir) LOG.debug("Copy/integ from: " + p4frompath + " to " + p4topath) shutil.copyfile(p4frompath, p4topath) def delete_blob(self, blob): """run p4 delete for a deleted file""" # get local path in p4 client p4path = self.ctx.contentlocalroot + blob['path'] self.setup_p4_command("delete", p4path) def copy_blobs(self, blobs): """copy git blobs to perforce revs""" # first, one pass to do rename/copy # these don't batch. move can't batch due to p4 limitations. # however, the edit required before move is batched. # copy could be batched by creating a temporary branchspec # but for now it's done file by file with self.perf.timer[COPY_BLOBS_1]: for blob in blobs: if blob['action'] == 'R': self.rename_blob(blob) elif blob['action'] == 'C': self.copy_blob(blob) self.run_p4_commands() # then, another pass to do add/edit/delete # these are batched to allow running the minimum number of # p4 commands. That means no more than one delete, one add per # filetype and one edit per filetype. Since we only support three # possible filetypes (text, text+x, symlink) there could be at most # 1 + 3 + 3 commands run. with self.perf.timer[COPY_BLOBS_2]: self.addeditdelete = {} for blob in blobs: if blob['action'] == 'M': self.add_or_edit_blob(blob) elif blob['action'] == 'D': self.delete_blob(blob) self.run_p4_commands() def check_protects(self, p4user, blobs): """check if author is authorized to submit files""" pc = ProtectsChecker(self.ctx, self.ctx.authenticated_p4user, p4user) pc.filter_paths(blobs) if pc.has_error(): self.revert_and_raise(pc.error_message()) def _reset_for_new_commit(self): """ Clear out state from previous commit that must not carry over into next commit. """ self.addeditdelete = {} def attempt_resync(self): """Attempts to sync -k the Git Fusion client to the change that corresponds to the HEAD of the Git mirror repository. This prevents the obscure "file(s) not on client" error. """ # we assume we are in the GIT_WORK_TREE, which seems to be a safe # assumption at this point try: last_commit = p4gf_util.git_ref_master() if last_commit: last_changelist_number = self.ctx.mirror.get_change_for_commit( last_commit, self.ctx) if last_changelist_number: filerev = "//...@{}".format(last_changelist_number) self._p4run(['sync', '-k', filerev]) except P4.P4Exception: # don't stop the world if we have an error above LOG.warn("resync failed with exception", exc_info=True) def copy_commit(self, commit): """copy a single commit""" self._reset_for_new_commit() #OG.debug("dump commit {}".format(commit)) LOG.debug("for commit {}".format(commit['mark'])) LOG.debug("with description: {}".format(commit['data'])) LOG.debug("files affected: {}".format(commit['files'])) # Reject merge commits. Not supported in 2012.1. if 'merge' in commit: self.revert_and_raise(("Merge commit {} not permitted." +" Rebase to create a linear" +" history.").format(commit['sha1'])) # strip any enclosing angle brackets from the email address email = commit['author']['email'].strip('<>') user = self.usermap.lookup_by_email(email) LOG.debug("for email {} found user {}".format(email, user)) if (user is None) or (not self.usermap.p4user_exists(user[0])): # User is not a known and existing Perforce user, and the # unknown_git account is not set up, so reject the commit. self.revert_and_raise("User '{}' not permitted to commit".format(email)) author_p4user = user[0] for blob in commit['files']: err = check_valid_filename(blob['path']) if err: self.revert_and_raise(err) with self.perf.timer[GIT_CHECKOUT]: d = p4gf_util.popen_no_throw(['git', 'checkout', commit['sha1']]) if d['Popen'].returncode: # Sometimes git cannot distinquish the revision from a path... p4gf_util.popen(['git', 'reset', '--hard', commit['sha1'], '--']) with self.perf.timer[CHECK_PROTECTS]: self.check_protects(author_p4user, commit['files']) try: self.copy_blobs(commit['files']) except P4.P4Exception as e: self.revert_and_raise(str(e)) with self.perf.timer[COPY_BLOBS_2]: pusher_p4user = self.ctx.authenticated_p4user LOG.debug("Pusher is: {}, author is: {}".format(pusher_p4user, author_p4user)) desc = change_description(commit, pusher_p4user, author_p4user) try: opened = self.ctx.p4.run('opened') if opened: changenum = p4_submit(self.ctx.p4, desc, author_p4user, commit['author']['date']) LOG.info("Submitted change @{} for commit {}".format(changenum, commit['sha1'])) else: LOG.info("Ignored empty commit {}".format(commit['sha1'])) return None except P4.P4Exception as e: self.revert_and_raise(str(e)) return ":" + str(changenum) + " " + commit['sha1'] def test_block_push(self): """Test hook to temporarily block and let test script introduce conflicting changes. """ s = p4gf_util.test_vars().get(p4gf_const.P4GF_TEST_BLOCK_PUSH) if not s: return log = logging.getLogger("test_block_push") block_dict = p4gf_util.test_var_to_dict(s) log.debug(block_dict) # Fetch ALL the submitted changelists as of right now. log.debug("p4 changes {}".format(p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client))) cl_ay = self.ctx.p4.run('changes', '-l', p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client)) # Don't block until after something? after = block_dict['after'] if after: if not contains_desc(after, cl_ay): log.debug("Do not block until after: {}".format(after)) return until = block_dict['until'] log.debug("BLOCKING. Seen 'after': {}".format(after)) log.debug("BLOCKING. Waiting for 'until': {}".format(until)) changes_path_at = ("{path}@{change},now" .format(path=p4gf_path.slash_dot_dot_dot(self.ctx.config.p4client), change=cl_ay[-1]['change'])) while not contains_desc(until, cl_ay): time.sleep(1) cl_ay = self.ctx.p4.run('changes', changes_path_at) log.debug("Block released") def copy(self, start_at, end_at): """copy a set of commits from git into perforce""" with self.perf.timer[OVERALL]: with p4gf_util.HeadRestorer(): LOG.debug("begin copying from {} to {}".format(start_at, end_at)) self.attempt_resync() with self.perf.timer[CHECK_CONFLICT]: conflict_checker = G2PConflictChecker(self.ctx) with self.perf.timer[FAST_EXPORT]: fe = p4gf_fastexport.FastExport(start_at, end_at, self.ctx.tempdir.name) fe.run() marks = [] commit_count = 0 for x in fe.commands: if x['command'] == 'commit': commit_count += 1 self.progress.progress_init_determinate(commit_count) try: for command in fe.commands: with self.perf.timer[TEST_BLOCK_PUSH]: self.test_block_push() if command['command'] == 'commit': self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() with self.perf.timer[COPY]: mark = self.copy_commit(command) if mark is None: continue with self.perf.timer[CHECK_CONFLICT]: (git_commit_sha1, p4_changelist_number) = mark_to_commit_changelist(mark) conflict_checker.record_commit(git_commit_sha1, p4_changelist_number) if conflict_checker.check(): LOG.error("P4 conflict found") break marks.append(mark) elif command['command'] == 'reset': pass else: raise RuntimeError("Unexpected fast-export command: " + command['command']) finally: # we want to write mirror objects for any commits that made it through # any exception will still be alive after this with self.perf.timer[MIRROR]: self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) if conflict_checker.has_conflict(): raise RuntimeError("Conflicting change from Perforce caused one" + " or more git commits to fail. Time to" + " pull, rebase, and try again.") LOG.getChild("time").debug("\n" + str(self))
class P2G: """class to manage copying from Perforce to git""" def __init__(self, ctx): self.ctx = ctx self.fastimport = FastImport(self.ctx) self.fastimport.set_timezone(self.ctx.timezone) self.fastimport.set_project_root_path(self.ctx.contentlocalroot) self.perf = p4gf_profiler.TimerCounterSet() self.perf.add_timers([OVERALL, (SETUP, OVERALL), (PRINT, OVERALL), (FSTAT, OVERALL), (SYNC, OVERALL), (FAST_IMPORT, OVERALL), (MIRROR, OVERALL), (MERGE, OVERALL), (PACK, OVERALL) ]) self.rev_range = None # RevRange instance set in copy(). self.graft_change = None # self.changes = None # dict['changelist'] ==> P4Changelist of what to copy() self.printed_revs = None # RevList produced by PrintHandler self.status_verbose = True self.progress = ProgressReporter() def __str__(self): return "\n".join(["\n\nFast Import:\n", str(self.fastimport), "", str(self.perf), "" ]) def _setup(self, start_at, stop_at): """Set RevRange rev_range, figure out which changelists to copy.""" self.rev_range = RevRange.from_start_stop(self.ctx, start_at, stop_at) LOG.debug("Revision range to copy to Git: {rr}" .format(rr=self.rev_range)) # get list of changes to import into git self.changes = P4Changelist.create_changelist_list_as_dict( self.ctx.p4, self._path_range()) # If grafting, get that too. if self.rev_range.graft_change_num: # Ignore all depotFile elements, we just want the change/desc/time/user. self.graft_change = P4Changelist.create_using_describe( self.ctx.p4, self.rev_range.graft_change_num, "ignore_depot_files") self.graft_change.description += ('\n[grafted history before {start_at}]' .format(start_at=start_at)) def _path_range(self): """Return the common path...@range string we use frequently. """ return self.ctx.client_view_path() + self.rev_range.as_range_string() def _copy_print(self): """p4 print all revs and git-hash-object them into the git repo.""" server_can_unexpand = self.ctx.p4.server_level > 32 printhandler = PrintHandler(need_unexpand=not server_can_unexpand, tempdir=self.ctx.tempdir.name) self.ctx.p4.handler = printhandler args = ["-a"] if server_can_unexpand: args.append("-k") self.ctx.p4.run("print", args, self._path_range()) printhandler.flush() printhandler.progress.progress_finish() # If also grafting, print all revs in existence at time of graft. if self.graft_change: args = [] if server_can_unexpand: args.append("-k") path = self._graft_path() LOG.debug("Printing for grafted history: {}".format(path)) self.ctx.p4.run("print", args, path) printhandler.flush() # If grafting, we just printed revs that refer to changelists # that have no P4Changelist counterpart in self.changes. Make # some skeletal versions now so that FstatHandler will have # someplace to hang its outputStat() P4File instances. for (_key, p4file) in printhandler.revs.revs: if not p4file.change in self.changes: cl = P4Changelist() cl.change = p4file.change self.changes[p4file.change] = cl self.ctx.p4.handler = None self.printed_revs = printhandler.revs def _fstat(self): """run fstat to find deleted revs and get client paths""" # TODO for 12.2 print will also report deleted revs so between # that and using MapApi to get client paths, we won't need this fstat self.ctx.p4.handler = FstatHandler(self.printed_revs, self.changes) fstat_cols = "-T" + ",".join(P4File.fstat_cols()) self.ctx.p4.run("fstat", "-Of", fstat_cols, self._path_range()) if self.graft_change: # Also run 'p4 fstat //<view>/...@change' for the graft # change to catch all files as of @change, not just # revs changed between begin and end of _path_range(). self.ctx.p4.run("fstat", fstat_cols, self._graft_path()) self.ctx.p4.handler = None self._collapse_to_graft_change() self._add_graft_to_changes() # don't need this any more self.printed_revs = None sorted_changes = [str(y) for y in sorted([int(x) for x in self.changes.keys()])] LOG.debug("\n".join([str(self.changes[ch]) for ch in sorted_changes])) return sorted_changes def _sync(self, sorted_changes): """fake sync of last change to make life easier at push time""" self.ctx.p4.handler = SyncHandler() lastchange = self.changes[sorted_changes[-1]] self.ctx.p4.run("sync", "-kf", self.ctx.client_view_path() + "@" + str(lastchange.change)) self.ctx.p4.handler = None def _fast_import(self, sorted_changes, last_commit): """build fast-import script from changes, then run fast-import""" self.progress.progress_init_determinate(len(sorted_changes)) for changenum in sorted_changes: change = self.changes[changenum] self.progress.progress_increment("Copying changelists...") self.ctx.heartbeat() # create commit and trees self.fastimport.add_commit(change, last_commit) last_commit = change.change # run git-fast-import and get list of marks marks = self.fastimport.run_fast_import() # done with these self.changes = None return marks def _mirror(self, marks): """build up list of p4 objects to mirror git repo in perforce then submit them """ self.ctx.mirror.add_commits(marks) self.ctx.mirror.add_objects_to_p4(self.ctx) LOG.getChild("time").debug("\n\nGit Mirror:\n" + str(self.ctx.mirror)) self.ctx.mirror = GitMirror(self.ctx.config.view_name) last_commit = marks[len(marks) - 1] LOG.debug("Last commit created: " + last_commit) # pylint: disable=R0201 # R0201 Method could be a function def _pack(self): """run 'git gc' to pack up the blobs aside from any possible performance benefit, this prevents warnings from git about "unreachable loose objects" """ p4gf_util.popen_no_throw(["git", "gc"]) def _collapse_to_graft_change(self): """Move all of the files from pre-graft changelists into the graft changelist. Remove all pre-graft changelists. NOP if not grafting. 'p4 print //client/...@100' does indeed print all the files that exist @100, but the tag dict that goes with each file includes the changelist in which that file was last added/edited, not 100. So this function gathers up all the file revs with change=1..99 and sticks them under change 100's file list. """ if (not self.graft_change): return graft_num_int = int(self.graft_change.change) LOG.debug("_collapse_to_graft_change() graft_num_int={}".format(graft_num_int)) # Delete all P4Changelist elements from self.changes where they # refer to a change that will be collapsed into the graft change, # including the graft change itself. del_keys = [] for p4changelist in self.changes.values(): if graft_num_int < int(p4changelist.change): LOG.debug("_collapse_to_graft_change() skipping {}".format(p4changelist.change)) continue LOG.debug("_collapse_to_graft_change() deleting {}".format(p4changelist.change)) del_keys.append(p4changelist.change) for key in del_keys: del self.changes[key] # Associate with the graft change all printed P4File results from # graft-change or older for (_key, p4file) in self.printed_revs.revs: if graft_num_int < int(p4file.change): LOG.debug("_collapse_to_graft_change() skipping post-graft {}".format(p4file)) continue old = self.graft_change.file_from_depot_path(p4file.depot_path) # If print picked up multiple revs, keep the newest. if (not old) or (int(old.change) < int(p4file.change)): p4file.change = self.graft_change.change self.graft_change.files.append(p4file) LOG.debug("_collapse_to_graft_change() keeping {}".format(p4file)) else: LOG.debug("_collapse_to_graft_change() skipping, had newer {}".format(p4file)) def _add_graft_to_changes(self): """Add the graft changelist to our list of changes: It will be copied over like any other change. NOP if not grafting. """ if (not self.graft_change): return self.changes[self.graft_change.change] = self.graft_change def _graft_path(self): """If grafting, return '//<client>/...@N' where N is the graft changelist number. If not grafting, return None. """ if (not self.graft_change): return return "{path}@{change}".format( path = self.ctx.client_view_path(), change = self.graft_change.change) def copy(self, start_at, stop_at): """copy a set of changelists from perforce into git""" with self.perf.timer[OVERALL]: with self.perf.timer[SETUP]: self._setup(start_at, stop_at) if not len(self.changes): LOG.debug("No new changes found to copy") return last_commit = self.rev_range.last_commit with self.perf.timer[PRINT]: self._copy_print() with self.perf.timer[FSTAT]: sorted_changes = self._fstat() with self.perf.timer[SYNC]: self._sync(sorted_changes) with self.perf.timer[FAST_IMPORT]: marks = self._fast_import(sorted_changes, last_commit) sorted_changes = None with self.perf.timer[MIRROR]: self._mirror(marks) with self.perf.timer[MERGE]: # merge temporary branch into master, then delete it self.fastimport.merge() with self.perf.timer[PACK]: self._pack() LOG.getChild("time").debug("\n" + str(self))