class GitWhoosh: def __init__(self, repos_path, index_path): self.repo = Repo(repos_path) self.index_path = index_path self.git_index = self.repo.open_index() if not exists_in(self.index_path): schema = Schema(path=ID(unique=True, stored=True), itime=STORED, content=TEXT) self.ix = create_in(self.index_path, schema) else: self.ix = open_dir(self.index_path) def hook_index(self, func, path): mtime = self.git_index[path][1] sha = self.git_index[path][8] blob = self.repo.get_blob(sha).as_raw_string() func(path=path.decode("utf-8"), content=blob.decode("utf-8"), itime=mtime) def index(self, regexp=None): with self.ix.searcher() as searcher: writer = self.ix.writer() # first of all, check for removed items paths = {} for fields in searcher.all_stored_fields(): paths[fields["path"]] = fields["itime"] if not fields["path"] in self.git_index: writer.delete_by_term("path", fields["path"]) # now check for new or updated items for path in self.git_index: if regexp: if not re.search(regexp, path): continue if path in paths: if self.git_index[path][1] > paths[path.decode("utf-8")]: self.hook_index(writer.update_document, path) else: self.hook_index(writer.add_document, path) writer.commit() def search(self, query): parser = QueryParser("content", schema=self.ix.schema) q = parser.parse(query.decode("utf-8")) found_items = [] with self.ix.searcher() as searcher: results = searcher.search(q, terms=True) for r in results: terms = [] for term in r.matched_terms(): terms.append(term[1]) found_items.append({"path": r["path"], "terms": terms}) return found_items def __call__(self, environ, start_response): start_response("200 OK", [("Content-Type", "application/json")]) output = [] qs = environ.get("QUERY_STRING", None) if qs: output = self.search(urllib.unquote(qs)) return json.dumps(output)
class GitRepo(object): def __init__(self, path): if os.path.exists(path): if not os.path.isdir(path): raise IOError('Git repository "%s" must be a directory.' % path) try: self.repo = Repo(path) except NotGitRepository: # repo does not exist self.repo = Repo.init(path, not os.path.exists(path)) self.temp_persist_files = [] def _get_commit(self, version="HEAD"): commit = self.repo[version] if not isinstance(commit, Commit): raise NotCommitError(commit) return commit def get_type(self, name, version="HEAD"): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) if tree[name][0] & stat.S_IFDIR: return "tree" else: return "blob" def get_path(self, name, version="HEAD", path_type=None, out_name=None, out_suffix=''): if path_type is None: path_type = self.get_type(name, version) if path_type == 'tree': return self.get_dir(name, version, out_name, out_suffix) elif path_type == 'blob': return self.get_file(name, version, out_name, out_suffix) raise TypeError("Unknown path type '%s'" % path_type) def _write_blob(self, blob_sha, out_fname=None, out_suffix=''): if out_fname is None: # create a temporary file (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix, prefix='vt_persist') os.close(fd) self.temp_persist_files.append(out_fname) else: out_dirname = os.path.dirname(out_fname) if out_dirname and not os.path.exists(out_dirname): os.makedirs(out_dirname) blob = self.repo.get_blob(blob_sha) with open(out_fname, "wb") as f: for b in blob.as_raw_chunks(): f.write(b) return out_fname def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find blob "%s"' % name) blob_sha = tree[name][1] out_fname = self._write_blob(blob_sha, out_fname, out_suffix) return out_fname def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''): if out_dirname is None: # create a temporary directory out_dirname = tempfile.mkdtemp(suffix=out_suffix, prefix='vt_persist') self.temp_persist_files.append(out_dirname) elif not os.path.exists(out_dirname): os.makedirs(out_dirname) commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find tree "%s"' % name) subtree_id = tree[name][1] # subtree = self.repo.tree(subtree_id) for entry in self.repo.object_store.iter_tree_contents(subtree_id): out_fname = os.path.join(out_dirname, entry.path) self._write_blob(entry.sha, out_fname) return out_dirname def get_hash(self, name, version="HEAD", path_type=None): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) return tree[name][1] @staticmethod def compute_blob_hash(fname, chunk_size=1 << 16): obj_len = os.path.getsize(fname) head = object_header(Blob.type_num, obj_len) with open(fname, "rb") as f: def read_chunk(): return f.read(chunk_size) my_iter = chain([head], iter(read_chunk, '')) return iter_sha1(my_iter) @staticmethod def compute_tree_hash(dirname): tree = Tree() for entry in sorted(os.listdir(dirname)): fname = os.path.join(dirname, entry) if os.path.isdir(fname): thash = GitRepo.compute_tree_hash(fname) mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, thash) elif os.path.isfile(fname): bhash = GitRepo.compute_blob_hash(fname) mode = os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, bhash) return tree.id @staticmethod def compute_hash(path): if os.path.isdir(path): return GitRepo.compute_tree_hash(path) elif os.path.isfile(path): return GitRepo.compute_blob_hash(path) raise TypeError("Do not support this type of path") def get_latest_version(self, path): head = self.repo.head() walker = Walker(self.repo.object_store, [head], max_entries=1, paths=[path]) return iter(walker).next().commit.id def _stage(self, filename): fullpath = os.path.join(self.repo.path, filename) if os.path.islink(fullpath): debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename)) elif os.path.isdir(fullpath): for f in os.listdir(fullpath): self._stage(os.path.join(filename, f)) else: if os.path.sep != '/': filename = filename.replace(os.path.sep, '/') self.repo.stage(filename) def add_commit(self, filename): self.setup_git() self._stage(filename) commit_id = self.repo.do_commit('Updated %s' % filename) return commit_id def setup_git(self): config_stack = self.repo.get_config_stack() try: config_stack.get(('user', ), 'name') config_stack.get(('user', ), 'email') except KeyError: from vistrails.core.system import current_user from dulwich.config import ConfigFile user = current_user() repo_conf = self.repo.get_config() repo_conf.set(('user', ), 'name', user) repo_conf.set(('user', ), 'email', '%s@localhost' % user) repo_conf.write_to_path()
class GitRepo(object): def __init__(self, path): if os.path.exists(path): if not os.path.isdir(path): raise IOError('Git repository "%s" must be a directory.' % path) try: self.repo = Repo(path) except NotGitRepository: # repo does not exist self.repo = Repo.init(path, not os.path.exists(path)) self.temp_persist_files = [] def _get_commit(self, version="HEAD"): commit = self.repo[version] if not isinstance(commit, Commit): raise NotCommitError(commit) return commit def get_type(self, name, version="HEAD"): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) if tree[name][0] & stat.S_IFDIR: return "tree" else: return "blob" def get_path(self, name, version="HEAD", path_type=None, out_name=None, out_suffix=''): if path_type is None: path_type = self.get_type(name, version) if path_type == 'tree': return self.get_dir(name, version, out_name, out_suffix) elif path_type == 'blob': return self.get_file(name, version, out_name, out_suffix) raise TypeError("Unknown path type '%s'" % path_type) def _write_blob(self, blob_sha, out_fname=None, out_suffix=''): if out_fname is None: # create a temporary file (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix, prefix='vt_persist') os.close(fd) self.temp_persist_files.append(out_fname) else: out_dirname = os.path.dirname(out_fname) if out_dirname and not os.path.exists(out_dirname): os.makedirs(out_dirname) blob = self.repo.get_blob(blob_sha) with open(out_fname, "wb") as f: for b in blob.as_raw_chunks(): f.write(b) return out_fname def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find blob "%s"' % name) blob_sha = tree[name][1] out_fname = self._write_blob(blob_sha, out_fname, out_suffix) return out_fname def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''): if out_dirname is None: # create a temporary directory out_dirname = tempfile.mkdtemp(suffix=out_suffix, prefix='vt_persist') self.temp_persist_files.append(out_dirname) elif not os.path.exists(out_dirname): os.makedirs(out_dirname) commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find tree "%s"' % name) subtree_id = tree[name][1] # subtree = self.repo.tree(subtree_id) for entry in self.repo.object_store.iter_tree_contents(subtree_id): out_fname = os.path.join(out_dirname, entry.path) self._write_blob(entry.sha, out_fname) return out_dirname def get_hash(self, name, version="HEAD", path_type=None): commit = self._get_commit(version) tree = self.repo.tree(commit.tree) if name not in tree: raise KeyError('Cannot find object "%s"' % name) return tree[name][1] @staticmethod def compute_blob_hash(fname, chunk_size=1<<16): obj_len = os.path.getsize(fname) head = object_header(Blob.type_num, obj_len) with open(fname, "rb") as f: def read_chunk(): return f.read(chunk_size) my_iter = chain([head], iter(read_chunk,'')) return iter_sha1(my_iter) return None @staticmethod def compute_tree_hash(dirname): tree = Tree() for entry in sorted(os.listdir(dirname)): fname = os.path.join(dirname, entry) if os.path.isdir(fname): thash = GitRepo.compute_tree_hash(fname) mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, thash) elif os.path.isfile(fname): bhash = GitRepo.compute_blob_hash(fname) mode = os.stat(fname)[stat.ST_MODE] tree.add(entry, mode, bhash) return tree.id @staticmethod def compute_hash(path): if os.path.isdir(path): return GitRepo.compute_tree_hash(path) elif os.path.isfile(path): return GitRepo.compute_blob_hash(path) raise TypeError("Do not support this type of path") def get_latest_version(self, path): head = self.repo.head() walker = Walker(self.repo.object_store, [head], max_entries=1, paths=[path]) return iter(walker).next().commit.id def _stage(self, filename): fullpath = os.path.join(self.repo.path, filename) if os.path.islink(fullpath): debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename)) elif os.path.isdir(fullpath): for f in os.listdir(fullpath): self._stage(os.path.join(filename, f)) else: if os.path.sep != '/': filename = filename.replace(os.path.sep, '/') self.repo.stage(filename) def add_commit(self, filename): self.setup_git() self._stage(filename) commit_id = self.repo.do_commit('Updated %s' % filename) return commit_id def setup_git(self): config_stack = self.repo.get_config_stack() try: config_stack.get(('user',), 'name') config_stack.get(('user',), 'email') except KeyError: from vistrails.core.system import current_user from dulwich.config import ConfigFile user = current_user() repo_conf = self.repo.get_config() repo_conf.set(('user',), 'name', user) repo_conf.set(('user',), 'email', '%s@localhost' % user) repo_conf.write_to_path()