def _get_untracked_files(self, **kwargs): # make sure we get all files, no only untracked directores proc = self.git.status(porcelain=True, untracked_files=True, as_process=True, **kwargs) # Untracked files preffix in porcelain mode prefix = "?? " untracked_files = list() for line in proc.stdout: line = line.decode(defenc) if not line.startswith(prefix): continue filename = line[len(prefix):].rstrip('\n') # Special characters are escaped if filename[0] == filename[-1] == '"': filename = filename[1:-1] if PY3: # WHATEVER ... it's a mess, but works for me filename = filename.encode('ascii').decode('unicode_escape').encode('latin1').decode(defenc) else: filename = filename.decode('string_escape').decode(defenc) untracked_files.append(filename) finalize_process(proc) return untracked_files
def _iter_from_process_or_stream(cls, repo, proc_or_stream): """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: git-rev-list process instance - one sha per line :return: iterator returning Commit objects""" stream = proc_or_stream if not hasattr(stream, 'readline'): stream = proc_or_stream.stdout readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, rest = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield Commit(repo, hex_to_bin(hexsha)) # END for each line in stream # TODO: Review this - it seems process handling got a bit out of control # due to many developers trying to fix the open file handles issue if hasattr(proc_or_stream, 'wait'): finalize_process(proc_or_stream)
def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): # special handling for windows for path at which the clone should be # created. # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence # we at least give a proper error instead of letting git fail prev_cwd = None prev_path = None odbt = kwargs.pop('odbt', odb_default_type) if os.name == 'nt': if '~' in path: raise OSError("Git cannot handle the ~ character in path %r correctly" % path) # on windows, git will think paths like c: are relative and prepend the # current working dir ( before it fails ). We temporarily adjust the working # dir to make this actually work match = re.match("(\w:[/\\\])(.*)", path) if match: prev_cwd = os.getcwd() prev_path = path drive, rest_of_path = match.groups() os.chdir(drive) path = rest_of_path kwargs['with_keep_cwd'] = True # END cwd preparation # END windows handling try: proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) if progress: handle_process_output(proc, None, progress.new_message_handler(), finalize_process) else: (stdout, stderr) = proc.communicate() finalize_process(proc, stderr=stderr) # end handle progress finally: if prev_cwd is not None: os.chdir(prev_cwd) path = prev_path # END reset previous working dir # END bad windows handling # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not os.path.isabs(path) and git.working_dir: path = join(git._working_dir, path) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure repo = cls(os.path.abspath(path), odbt=odbt) if repo.remotes: writer = repo.remotes[0].config_writer writer.set_value('url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) # PY3: be sure cleanup is performed and lock is released writer.release() # END handle remote repo return repo
def untracked_files(self): """ :return: list(str,...) Files currently untracked as they have not been staged yet. Paths are relative to the current working directory of the git command. :note: ignored files will not appear here, i.e. files mentioned in .gitignore""" # make sure we get all files, no only untracked directores proc = self.git.status(porcelain=True, untracked_files=True, as_process=True) # Untracked files preffix in porcelain mode prefix = "?? " untracked_files = list() for line in proc.stdout: line = line.decode(defenc) if not line.startswith(prefix): continue filename = line[len(prefix):].rstrip('\n') # Special characters are escaped if filename[0] == filename[-1] == '"': filename = filename[1:-1].decode('string_escape') untracked_files.append(filename) finalize_process(proc) return untracked_files
def _get_fetch_info_from_stderr(self, proc, progress): progress = to_progress_instance(progress) # skip first line as it is some remote info we are not interested in output = IterableList('name') # lines which are no progress are fetch info lines # this also waits for the command to finish # Skip some progress lines that don't provide relevant information fetch_info_lines = list() # Basically we want all fetch info lines which appear to be in regular form, and thus have a # command character. Everything else we ignore, cmds = set(PushInfo._flag_map.keys()) & set(FetchInfo._flag_map.keys()) progress_handler = progress.new_message_handler() stderr_text = None for line in proc.stderr: line = force_text(line) for pline in progress_handler(line): # END handle special messages for cmd in cmds: if len(line) > 1 and line[0] == ' ' and line[1] == cmd: fetch_info_lines.append(line) continue # end find command code # end for each comand code we know # end for each line progress didn't handle # end if progress.error_lines(): stderr_text = '\n'.join(progress.error_lines()) finalize_process(proc, stderr=stderr_text) # read head information fp = open(join(self.repo.git_dir, 'FETCH_HEAD'), 'rb') fetch_head_info = [l.decode(defenc) for l in fp.readlines()] fp.close() l_fil = len(fetch_info_lines) l_fhi = len(fetch_head_info) if l_fil != l_fhi: msg = "Fetch head lines do not match lines provided via progress information\n" msg += "length of progress lines %i should be equal to lines in FETCH_HEAD file %i\n" msg += "Will ignore extra progress lines or fetch head lines." msg %= (l_fil, l_fhi) log.debug(msg) if l_fil < l_fhi: fetch_head_info = fetch_head_info[:l_fil] else: fetch_info_lines = fetch_info_lines[:l_fhi] # end truncate correct list # end sanity check + sanitization output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) for err_line, fetch_line in zip(fetch_info_lines, fetch_head_info)) return output
def _get_fetch_info_from_stderr(self, proc, progress): # skip first line as it is some remote info we are not interested in # TODO: Use poll() to process stdout and stderr at same time output = IterableList('name') # lines which are no progress are fetch info lines # this also waits for the command to finish # Skip some progress lines that don't provide relevant information fetch_info_lines = list() # Basically we want all fetch info lines which appear to be in regular form, and thus have a # command character. Everything else we ignore, cmds = set(PushInfo._flag_map.keys()) & set(FetchInfo._flag_map.keys()) progress_handler = progress.new_message_handler() for line in proc.stderr: line = line.decode(defenc) line = line.rstrip() for pline in progress_handler(line): if line.startswith('fatal:') or line.startswith('error:'): raise GitCommandError(("Error when fetching: %s" % line,), 2) # END handle special messages for cmd in cmds: if len(line) > 1 and line[0] == ' ' and line[1] == cmd: fetch_info_lines.append(line) continue # end find command code # end for each comand code we know # end for each line progress didn't handle # end # We are only interested in stderr here ... try: finalize_process(proc) except Exception: if len(fetch_info_lines) == 0: raise # end exception handler # read head information fp = open(join(self.repo.git_dir, 'FETCH_HEAD'), 'rb') fetch_head_info = [l.decode(defenc) for l in fp.readlines()] fp.close() # NOTE: We assume to fetch at least enough progress lines to allow matching each fetch head line with it. assert len(fetch_info_lines) >= len(fetch_head_info), "len(%s) <= len(%s)" % (fetch_head_info, fetch_info_lines) output.extend(FetchInfo._from_line(self.repo, err_line, fetch_line) for err_line, fetch_line in zip(fetch_info_lines, fetch_head_info)) return output
def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): if progress is not None: progress = to_progress_instance(progress) odbt = kwargs.pop('odbt', odb_default_type) # when pathlib.Path or other classbased path is passed if not isinstance(path, str): path = str(path) ## A bug win cygwin's Git, when `--bare` or `--separate-git-dir` # it prepends the cwd or(?) the `url` into the `path, so:: # git clone --bare /cygwin/d/foo.git C:\\Work # becomes:: # git clone --bare /cygwin/d/foo.git /cygwin/d/C:\\Work # clone_path = (Git.polish_url(path) if Git.is_cygwin() and 'bare' in kwargs else path) sep_dir = kwargs.get('separate_git_dir') if sep_dir: kwargs['separate_git_dir'] = Git.polish_url(sep_dir) proc = git.clone(Git.polish_url(url), clone_path, with_extended_output=True, as_process=True, v=True, universal_newlines=True, **add_progress(kwargs, git, progress)) if progress: handle_process_output(proc, None, progress.new_message_handler(), finalize_process, decode_streams=False) else: (stdout, stderr) = proc.communicate() log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout) finalize_process(proc, stderr=stderr) # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not osp.isabs(path) and git.working_dir: path = osp.join(git._working_dir, path) repo = cls(path, odbt=odbt) # retain env values that were passed to _clone() repo.git.update_environment(**git.environment()) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure if repo.remotes: with repo.remotes[0].config_writer as writer: writer.set_value('url', Git.polish_url(repo.remotes[0].url)) # END handle remote repo return repo
def _iter_from_process_or_stream(cls, repo: 'Repo', proc_or_stream: Union[Popen, IO]) -> Iterator['Commit']: """Parse out commit information into a list of Commit objects We expect one-line per commit, and parse the actual commit information directly from our lighting fast object database :param proc: git-rev-list process instance - one sha per line :return: iterator returning Commit objects""" # def is_proc(inp) -> TypeGuard[Popen]: # return hasattr(proc_or_stream, 'wait') and not hasattr(proc_or_stream, 'readline') # def is_stream(inp) -> TypeGuard[IO]: # return hasattr(proc_or_stream, 'readline') if hasattr(proc_or_stream, 'wait'): proc_or_stream = cast(Popen, proc_or_stream) if proc_or_stream.stdout is not None: stream = proc_or_stream.stdout elif hasattr(proc_or_stream, 'readline'): proc_or_stream = cast(IO, proc_or_stream) stream = proc_or_stream readline = stream.readline while True: line = readline() if not line: break hexsha = line.strip() if len(hexsha) > 40: # split additional information, as returned by bisect for instance hexsha, _ = line.split(None, 1) # END handle extra info assert len(hexsha) == 40, "Invalid line: %s" % hexsha yield cls(repo, hex_to_bin(hexsha)) # END for each line in stream # TODO: Review this - it seems process handling got a bit out of control # due to many developers trying to fix the open file handles issue if hasattr(proc_or_stream, 'wait'): proc_or_stream = cast(Popen, proc_or_stream) finalize_process(proc_or_stream)
def _get_untracked_files(self, *args, **kwargs): # make sure we get all files, not only untracked directories proc = self.git.status(*args, porcelain=True, untracked_files=True, as_process=True, **kwargs) # Untracked files preffix in porcelain mode prefix = "?? " untracked_files = [] for line in proc.stdout: line = line.decode(defenc) if not line.startswith(prefix): continue filename = line[len(prefix):].rstrip('\n') # Special characters are escaped if filename[0] == filename[-1] == '"': filename = filename[1:-1] # WHATEVER ... it's a mess, but works for me filename = filename.encode('ascii').decode('unicode_escape').encode('latin1').decode(defenc) untracked_files.append(filename) finalize_process(proc) return untracked_files
def _get_fetch_info_from_stderr(self, proc, progress): # skip first line as it is some remote info we are not interested in output = IterableList('name') # lines which are no progress are fetch info lines # this also waits for the command to finish # Skip some progress lines that don't provide relevant information fetch_info_lines = list() for line in digest_process_messages(proc.stderr, progress): if line.startswith('From') or line.startswith('remote: Total') or line.startswith('POST') \ or line.startswith(' ='): continue elif line.startswith('warning:'): print >> sys.stderr, line continue elif line.startswith('fatal:'): raise GitCommandError(("Error when fetching: %s" % line, ), 2) # END handle special messages fetch_info_lines.append(line) # END for each line # read head information fp = open(join(self.repo.git_dir, 'FETCH_HEAD'), 'r') fetch_head_info = fp.readlines() fp.close() # NOTE: HACK Just disabling this line will make github repositories work much better. # I simply couldn't stand it anymore, so here is the quick and dirty fix ... . # This project needs a lot of work ! # assert len(fetch_info_lines) == len(fetch_head_info), "len(%s) != len(%s)" % (fetch_head_info, fetch_info_lines) output.extend( FetchInfo._from_line(self.repo, err_line, fetch_line) for err_line, fetch_line in zip(fetch_info_lines, fetch_head_info)) finalize_process(proc) return output
def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): if progress is not None: progress = to_progress_instance(progress) odbt = kwargs.pop('odbt', odb_default_type) proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) if progress: handle_process_output(proc, None, progress.new_message_handler(), finalize_process) else: (stdout, stderr ) = proc.communicate() # FIXME: Will block of outputs are big! log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout) finalize_process(proc, stderr=stderr) # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not os.path.isabs(path) and git.working_dir: path = join(git._working_dir, path) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure repo = cls(path, odbt=odbt) if repo.remotes: with repo.remotes[0].config_writer as writer: writer.set_value('url', Git.polish_url(repo.remotes[0].url)) # END handle remote repo return repo
def _clone(cls, git, url, path, odb_default_type, progress, **kwargs): # special handling for windows for path at which the clone should be # created. # tilde '~' will be expanded to the HOME no matter where the ~ occours. Hence # we at least give a proper error instead of letting git fail prev_cwd = None prev_path = None odbt = kwargs.pop('odbt', odb_default_type) if os.name == 'nt': if '~' in path: raise OSError( "Git cannot handle the ~ character in path %r correctly" % path) # on windows, git will think paths like c: are relative and prepend the # current working dir ( before it fails ). We temporarily adjust the working # dir to make this actually work match = re.match("(\w:[/\\\])(.*)", path) if match: prev_cwd = os.getcwd() prev_path = path drive, rest_of_path = match.groups() os.chdir(drive) path = rest_of_path kwargs['with_keep_cwd'] = True # END cwd preparation # END windows handling try: proc = git.clone(url, path, with_extended_output=True, as_process=True, v=True, **add_progress(kwargs, git, progress)) if progress: handle_process_output(proc, None, progress.new_message_handler(), finalize_process) else: finalize_process(proc) # end handle progress finally: if prev_cwd is not None: os.chdir(prev_cwd) path = prev_path # END reset previous working dir # END bad windows handling # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not os.path.isabs(path) and git.working_dir: path = join(git._working_dir, path) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure repo = cls(os.path.abspath(path), odbt=odbt) if repo.remotes: writer = repo.remotes[0].config_writer writer.set_value( 'url', repo.remotes[0].url.replace("\\\\", "\\").replace("\\", "/")) # PY3: be sure cleanup is performed and lock is released writer.release() # END handle remote repo return repo
def _clone(cls, git, url, path, odb_default_type, progress, multi_options=None, **kwargs): if progress is not None: progress = to_progress_instance(progress) odbt = kwargs.pop('odbt', odb_default_type) # when pathlib.Path or other classbased path is passed if not isinstance(path, str): path = str(path) ## A bug win cygwin's Git, when `--bare` or `--separate-git-dir` # it prepends the cwd or(?) the `url` into the `path, so:: # git clone --bare /cygwin/d/foo.git C:\\Work # becomes:: # git clone --bare /cygwin/d/foo.git /cygwin/d/C:\\Work # clone_path = (Git.polish_url(path) if Git.is_cygwin() and 'bare' in kwargs else path) sep_dir = kwargs.get('separate_git_dir') if sep_dir: kwargs['separate_git_dir'] = Git.polish_url(sep_dir) multi = None if multi_options: multi = ' '.join(multi_options).split(' ') proc = git.clone(multi, Git.polish_url(url), clone_path, with_extended_output=True, as_process=True, v=True, universal_newlines=True, **add_progress(kwargs, git, progress)) if progress: handle_process_output(proc, None, progress.new_message_handler(), finalize_process, decode_streams=False) else: (stdout, stderr) = proc.communicate() log.debug("Cmd(%s)'s unused stdout: %s", getattr(proc, 'args', ''), stdout) finalize_process(proc, stderr=stderr) # our git command could have a different working dir than our actual # environment, hence we prepend its working dir if required if not osp.isabs(path) and git.working_dir: path = osp.join(git._working_dir, path) repo = cls(path, odbt=odbt) # retain env values that were passed to _clone() repo.git.update_environment(**git.environment()) # adjust remotes - there may be operating systems which use backslashes, # These might be given as initial paths, but when handling the config file # that contains the remote from which we were clones, git stops liking it # as it will escape the backslashes. Hence we undo the escaping just to be # sure if repo.remotes: with repo.remotes[0].config_writer as writer: writer.set_value('url', Git.polish_url(repo.remotes[0].url)) # END handle remote repo return repo