示例#1
0
class GitWhoosh:
    def __init__(self, repos_path, index_path):
        self.repo = Repo(repos_path)
        self.index_path = index_path
        self.git_index = self.repo.open_index()
        if not exists_in(self.index_path):
            schema = Schema(path=ID(unique=True, stored=True), itime=STORED, content=TEXT)
            self.ix = create_in(self.index_path, schema)
        else:
            self.ix = open_dir(self.index_path)

    def hook_index(self, func, path):
        mtime = self.git_index[path][1]
        sha = self.git_index[path][8]
        blob = self.repo.get_blob(sha).as_raw_string()
        func(path=path.decode("utf-8"), content=blob.decode("utf-8"), itime=mtime)

    def index(self, regexp=None):
        with self.ix.searcher() as searcher:
            writer = self.ix.writer()
            # first of all, check for removed items
            paths = {}
            for fields in searcher.all_stored_fields():
                paths[fields["path"]] = fields["itime"]
                if not fields["path"] in self.git_index:
                    writer.delete_by_term("path", fields["path"])
            # now check for new or updated items
            for path in self.git_index:
                if regexp:
                    if not re.search(regexp, path):
                        continue
                if path in paths:
                    if self.git_index[path][1] > paths[path.decode("utf-8")]:
                        self.hook_index(writer.update_document, path)
                else:
                    self.hook_index(writer.add_document, path)
            writer.commit()

    def search(self, query):
        parser = QueryParser("content", schema=self.ix.schema)
        q = parser.parse(query.decode("utf-8"))
        found_items = []
        with self.ix.searcher() as searcher:
            results = searcher.search(q, terms=True)
            for r in results:
                terms = []
                for term in r.matched_terms():
                    terms.append(term[1])
                found_items.append({"path": r["path"], "terms": terms})
        return found_items

    def __call__(self, environ, start_response):
        start_response("200 OK", [("Content-Type", "application/json")])
        output = []
        qs = environ.get("QUERY_STRING", None)
        if qs:
            output = self.search(urllib.unquote(qs))
        return json.dumps(output)
示例#2
0
文件: repo.py 项目: licode/VisTrails
class GitRepo(object):
    def __init__(self, path):
        if os.path.exists(path):
            if not os.path.isdir(path):
                raise IOError('Git repository "%s" must be a directory.' %
                              path)
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            # repo does not exist
            self.repo = Repo.init(path, not os.path.exists(path))

        self.temp_persist_files = []

    def _get_commit(self, version="HEAD"):
        commit = self.repo[version]
        if not isinstance(commit, Commit):
            raise NotCommitError(commit)
        return commit

    def get_type(self, name, version="HEAD"):
        commit = self._get_commit(version)

        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        if tree[name][0] & stat.S_IFDIR:
            return "tree"
        else:
            return "blob"

    def get_path(self,
                 name,
                 version="HEAD",
                 path_type=None,
                 out_name=None,
                 out_suffix=''):
        if path_type is None:
            path_type = self.get_type(name, version)
        if path_type == 'tree':
            return self.get_dir(name, version, out_name, out_suffix)
        elif path_type == 'blob':
            return self.get_file(name, version, out_name, out_suffix)

        raise TypeError("Unknown path type '%s'" % path_type)

    def _write_blob(self, blob_sha, out_fname=None, out_suffix=''):
        if out_fname is None:
            # create a temporary file
            (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix,
                                               prefix='vt_persist')
            os.close(fd)
            self.temp_persist_files.append(out_fname)
        else:
            out_dirname = os.path.dirname(out_fname)
            if out_dirname and not os.path.exists(out_dirname):
                os.makedirs(out_dirname)

        blob = self.repo.get_blob(blob_sha)
        with open(out_fname, "wb") as f:
            for b in blob.as_raw_chunks():
                f.write(b)
        return out_fname

    def get_file(self, name, version="HEAD", out_fname=None, out_suffix=''):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find blob "%s"' % name)
        blob_sha = tree[name][1]
        out_fname = self._write_blob(blob_sha, out_fname, out_suffix)
        return out_fname

    def get_dir(self, name, version="HEAD", out_dirname=None, out_suffix=''):
        if out_dirname is None:
            # create a temporary directory
            out_dirname = tempfile.mkdtemp(suffix=out_suffix,
                                           prefix='vt_persist')
            self.temp_persist_files.append(out_dirname)
        elif not os.path.exists(out_dirname):
            os.makedirs(out_dirname)

        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find tree "%s"' % name)
        subtree_id = tree[name][1]
        # subtree = self.repo.tree(subtree_id)
        for entry in self.repo.object_store.iter_tree_contents(subtree_id):
            out_fname = os.path.join(out_dirname, entry.path)
            self._write_blob(entry.sha, out_fname)
        return out_dirname

    def get_hash(self, name, version="HEAD", path_type=None):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        return tree[name][1]

    @staticmethod
    def compute_blob_hash(fname, chunk_size=1 << 16):
        obj_len = os.path.getsize(fname)
        head = object_header(Blob.type_num, obj_len)
        with open(fname, "rb") as f:

            def read_chunk():
                return f.read(chunk_size)

            my_iter = chain([head], iter(read_chunk, ''))
            return iter_sha1(my_iter)

    @staticmethod
    def compute_tree_hash(dirname):
        tree = Tree()
        for entry in sorted(os.listdir(dirname)):
            fname = os.path.join(dirname, entry)
            if os.path.isdir(fname):
                thash = GitRepo.compute_tree_hash(fname)
                mode = stat.S_IFDIR  # os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, thash)
            elif os.path.isfile(fname):
                bhash = GitRepo.compute_blob_hash(fname)
                mode = os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, bhash)
        return tree.id

    @staticmethod
    def compute_hash(path):
        if os.path.isdir(path):
            return GitRepo.compute_tree_hash(path)
        elif os.path.isfile(path):
            return GitRepo.compute_blob_hash(path)
        raise TypeError("Do not support this type of path")

    def get_latest_version(self, path):
        head = self.repo.head()
        walker = Walker(self.repo.object_store, [head],
                        max_entries=1,
                        paths=[path])
        return iter(walker).next().commit.id

    def _stage(self, filename):
        fullpath = os.path.join(self.repo.path, filename)
        if os.path.islink(fullpath):
            debug.warning("Warning: not staging symbolic link %s" %
                          os.path.basename(filename))
        elif os.path.isdir(fullpath):
            for f in os.listdir(fullpath):
                self._stage(os.path.join(filename, f))
        else:
            if os.path.sep != '/':
                filename = filename.replace(os.path.sep, '/')
            self.repo.stage(filename)

    def add_commit(self, filename):
        self.setup_git()
        self._stage(filename)
        commit_id = self.repo.do_commit('Updated %s' % filename)
        return commit_id

    def setup_git(self):
        config_stack = self.repo.get_config_stack()

        try:
            config_stack.get(('user', ), 'name')
            config_stack.get(('user', ), 'email')
        except KeyError:
            from vistrails.core.system import current_user
            from dulwich.config import ConfigFile
            user = current_user()
            repo_conf = self.repo.get_config()
            repo_conf.set(('user', ), 'name', user)
            repo_conf.set(('user', ), 'email', '%s@localhost' % user)
            repo_conf.write_to_path()
示例#3
0
class GitRepo(object):
    def __init__(self, path):
        if os.path.exists(path):
            if not os.path.isdir(path):
                raise IOError('Git repository "%s" must be a directory.' %
                              path)
        try:
            self.repo = Repo(path)
        except NotGitRepository:
            # repo does not exist
            self.repo = Repo.init(path, not os.path.exists(path))
    
        self.temp_persist_files = []

    def _get_commit(self, version="HEAD"):
        commit = self.repo[version]
        if not isinstance(commit, Commit):
            raise NotCommitError(commit)
        return commit

    def get_type(self, name, version="HEAD"):
        commit = self._get_commit(version)

        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        if tree[name][0] & stat.S_IFDIR:
            return "tree"
        else:
            return "blob"

    def get_path(self, name, version="HEAD", path_type=None, out_name=None,
                 out_suffix=''):
        if path_type is None:
            path_type = self.get_type(name, version)
        if path_type == 'tree':
            return self.get_dir(name, version, out_name, out_suffix)
        elif path_type == 'blob':
            return self.get_file(name, version, out_name, out_suffix)

        raise TypeError("Unknown path type '%s'" % path_type)

    def _write_blob(self, blob_sha, out_fname=None, out_suffix=''):
        if out_fname is None:
            # create a temporary file
            (fd, out_fname) = tempfile.mkstemp(suffix=out_suffix,
                                               prefix='vt_persist')
            os.close(fd)
            self.temp_persist_files.append(out_fname)
        else:
            out_dirname = os.path.dirname(out_fname)
            if out_dirname and not os.path.exists(out_dirname):
                os.makedirs(out_dirname)
        
        blob = self.repo.get_blob(blob_sha)
        with open(out_fname, "wb") as f:
            for b in blob.as_raw_chunks():
                f.write(b)
        return out_fname

    def get_file(self, name, version="HEAD", out_fname=None, 
                 out_suffix=''):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find blob "%s"' % name)
        blob_sha = tree[name][1]
        out_fname = self._write_blob(blob_sha, out_fname, out_suffix)
        return out_fname

    def get_dir(self, name, version="HEAD", out_dirname=None, 
                out_suffix=''):
        if out_dirname is None:
            # create a temporary directory
            out_dirname = tempfile.mkdtemp(suffix=out_suffix,
                                           prefix='vt_persist')
            self.temp_persist_files.append(out_dirname)
        elif not os.path.exists(out_dirname):
            os.makedirs(out_dirname)
        
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find tree "%s"' % name)
        subtree_id = tree[name][1]
        # subtree = self.repo.tree(subtree_id)
        for entry in self.repo.object_store.iter_tree_contents(subtree_id):
            out_fname = os.path.join(out_dirname, entry.path)
            self._write_blob(entry.sha, out_fname)
        return out_dirname

    def get_hash(self, name, version="HEAD", path_type=None):
        commit = self._get_commit(version)
        tree = self.repo.tree(commit.tree)
        if name not in tree:
            raise KeyError('Cannot find object "%s"' % name)
        return tree[name][1]

    @staticmethod
    def compute_blob_hash(fname, chunk_size=1<<16):
        obj_len = os.path.getsize(fname)
        head = object_header(Blob.type_num, obj_len)
        with open(fname, "rb") as f:
            def read_chunk():
                return f.read(chunk_size)
            my_iter = chain([head], iter(read_chunk,''))
            return iter_sha1(my_iter)
        return None

    @staticmethod
    def compute_tree_hash(dirname):
        tree = Tree()
        for entry in sorted(os.listdir(dirname)):
            fname = os.path.join(dirname, entry)
            if os.path.isdir(fname):
                thash = GitRepo.compute_tree_hash(fname)
                mode = stat.S_IFDIR # os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, thash)
            elif os.path.isfile(fname):
                bhash = GitRepo.compute_blob_hash(fname)
                mode = os.stat(fname)[stat.ST_MODE]
                tree.add(entry, mode, bhash)
        return tree.id

    @staticmethod
    def compute_hash(path):
        if os.path.isdir(path):
            return GitRepo.compute_tree_hash(path)
        elif os.path.isfile(path):
            return GitRepo.compute_blob_hash(path)
        raise TypeError("Do not support this type of path")

    def get_latest_version(self, path):
        head = self.repo.head()
        walker = Walker(self.repo.object_store, [head], max_entries=1, 
                        paths=[path])
        return iter(walker).next().commit.id

    def _stage(self, filename):
        fullpath = os.path.join(self.repo.path, filename)
        if os.path.islink(fullpath):
            debug.warning("Warning: not staging symbolic link %s" % os.path.basename(filename))
        elif os.path.isdir(fullpath):
            for f in os.listdir(fullpath):
                self._stage(os.path.join(filename, f))
        else:
            if os.path.sep != '/':
                filename = filename.replace(os.path.sep, '/')
            self.repo.stage(filename)

    def add_commit(self, filename):
        self.setup_git()
        self._stage(filename)
        commit_id = self.repo.do_commit('Updated %s' % filename)
        return commit_id

    def setup_git(self):
        config_stack = self.repo.get_config_stack()

        try:
            config_stack.get(('user',), 'name')
            config_stack.get(('user',), 'email')
        except KeyError:
            from vistrails.core.system import current_user
            from dulwich.config import ConfigFile
            user = current_user()
            repo_conf = self.repo.get_config()
            repo_conf.set(('user',), 'name', user)
            repo_conf.set(('user',), 'email', '%s@localhost' % user)
            repo_conf.write_to_path()