Exemplo n.º 1
0
def __save_revision(repo, sha, chain, stmts, ts):
    # this checks if timestamp is after the last cset of the chain, not if its after all csets for key.
    # this allows pushing to any timestamp, if the chain is right
    if chain and len(chain) > 0 and not ts > chain[-1].time:
        # Appended timestamps must be monotonically increasing!
        raise ValueError

    if len(chain) == 0 or chain[0].type == CSet.DELETE:
        # Provide dummy value for `patch` which is never stored.
        # If we get here, we always store a snapshot later on!
        patch = ""
    else:
        # Reconstruct the previous state of the resource
        prev = __get_revision(repo, sha, chain)

        if stmts == prev:
            # No changes, nothing to be done. Bail out.
            return None

        patch = compress(join(
            map(lambda s: "D " + s, prev - stmts) +
            map(lambda s: "A " + s, stmts - prev), "\n"))

    snapc = compress(join(stmts, "\n"))

    # Calculate the accumulated size of the delta chain including
    # the (potential) patch from the previous to the pushed state.
    accumulated_len = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch)

    base_len = len(chain) > 0 and chain[0].len or 0 # base length

    if (len(chain) == 0 or
        chain[0].type == CSet.DELETE or
        len(snapc) <= len(patch) or
        SNAPF * base_len <= accumulated_len):
        # Store the current state as a new snapshot
        Blob.create(repo=repo, hkey=sha, time=ts, data=snapc)
        CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT,
            len=len(snapc))
    else:
        # Store a directed delta between the previous and current state
        Blob.create(repo=repo, hkey=sha, time=ts, data=patch)
        CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA,
            len=len(patch))
    return 0
Exemplo n.º 2
0
    def put(self, username, reponame):
        # Create a new revision of the resource specified by `key`.

        fmt = self.request.headers.get("Content-Type", "application/n-triples")
        key = self.get_query_argument("key", None)

        if username != self.current_user.name:
            raise HTTPError(403)

        if not key:
            raise HTTPError(400)

        datestr = self.get_query_argument("datetime", None)
        ts = datestr and date(datestr, QSDATEFMT) or now()

        try:
            repo = (Repo.select(Repo.id).join(
                User).where((User.name == username)
                            & (Repo.name == reponame)).naive().get())
        except Repo.DoesNotExist:
            raise HTTPError(404)

        sha = shasum(key.encode("utf-8"))

        chain = list(
            CSet.select(CSet.time, CSet.type, CSet.len).where(
                (CSet.repo == repo) & (CSet.hkey == sha) & (CSet.time >= SQL(
                    "COALESCE((SELECT time FROM cset "
                    "WHERE repo_id = %s "
                    "AND hkey_id = %s "
                    "AND type != %s "
                    "ORDER BY time DESC "
                    "LIMIT 1), 0)", repo.id, sha, CSet.DELTA))).order_by(
                        CSet.time).naive())

        if len(chain) > 0 and not ts > chain[-1].time:
            # Appended timestamps must be monotonically increasing!
            raise HTTPError(400)

        if len(chain) == 0:
            # Mapping for `key` likely does not exist:
            # Store the SHA-to-KEY mapping in HMap,
            # looking out for possible collisions.
            try:
                HMap.create(sha=sha, val=key)
            except IntegrityError:
                val = HMap.select(HMap.val).where(HMap.sha == sha).scalar()
                if val != key:
                    raise HTTPError(500)

        # Parse and normalize into a set of N-Quad lines
        stmts = parse(self.request.body, fmt)
        snapc = compress(join(stmts, "\n"))

        if len(chain) == 0 or chain[0].type == CSet.DELETE:
            # Provide dummy value for `patch` which is never stored.
            # If we get here, we always store a snapshot later on!
            patch = ""
        else:
            # Reconstruct the previous state of the resource
            prev = set()

            blobs = (Blob.select(Blob.data).where(
                (Blob.repo == repo) & (Blob.hkey == sha)
                & (Blob.time << map(lambda e: e.time, chain))).order_by(
                    Blob.time).naive())

            for i, blob in enumerate(blobs.iterator()):
                data = decompress(blob.data)

                if i == 0:
                    # Base snapshot for the delta chain
                    prev.update(data.splitlines())
                else:
                    for line in data.splitlines():
                        mode, stmt = line[0], line[2:]
                        if mode == "A":
                            prev.add(stmt)
                        else:
                            prev.discard(stmt)

            if stmts == prev:
                # No changes, nothing to be done. Bail out.
                return self.finish()

            patch = compress(
                join(
                    map(lambda s: "D " + s, prev - stmts) +
                    map(lambda s: "A " + s, stmts - prev), "\n"))

        # Calculate the accumulated size of the delta chain including
        # the (potential) patch from the previous to the pushed state.
        acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch)

        blen = len(chain) > 0 and chain[0].len or 0  # base length

        if (len(chain) == 0 or chain[0].type == CSet.DELETE
                or len(snapc) <= len(patch) or SNAPF * blen <= acclen):
            # Store the current state as a new snapshot
            Blob.create(repo=repo, hkey=sha, time=ts, data=snapc)
            CSet.create(repo=repo,
                        hkey=sha,
                        time=ts,
                        type=CSet.SNAPSHOT,
                        len=len(snapc))
        else:
            # Store a directed delta between the previous and current state
            Blob.create(repo=repo, hkey=sha, time=ts, data=patch)
            CSet.create(repo=repo,
                        hkey=sha,
                        time=ts,
                        type=CSet.DELTA,
                        len=len(patch))
Exemplo n.º 3
0
    def put(self, username, reponame):
        # Create a new revision of the resource specified by `key`.

        fmt = self.request.headers.get("Content-Type", "application/n-triples")
        key = self.get_query_argument("key", None)

        if username != self.current_user.name:
            raise HTTPError(403)

        if not key:
            raise HTTPError(400)

        datestr = self.get_query_argument("datetime", None)
        ts = datestr and date(datestr, QSDATEFMT) or now()

        try:
            repo = (Repo
                .select(Repo.id)
                .join(User)
                .where((User.name == username) & (Repo.name == reponame))
                .naive()
                .get())
        except Repo.DoesNotExist:
            raise HTTPError(404)

        sha = shasum(key.encode("utf-8"))

        chain = list(CSet
            .select(CSet.time, CSet.type, CSet.len)
            .where(
                (CSet.repo == repo) &
                (CSet.hkey == sha) &
                (CSet.time >= SQL(
                    "COALESCE((SELECT time FROM cset "
                    "WHERE repo_id = %s "
                    "AND hkey_id = %s "
                    "AND type != %s "
                    "ORDER BY time DESC "
                    "LIMIT 1), 0)",
                    repo.id, sha, CSet.DELTA
                )))
            .order_by(CSet.time)
            .naive())

        if len(chain) > 0 and not ts > chain[-1].time:
            # Appended timestamps must be monotonically increasing!
            raise HTTPError(400)

        if len(chain) == 0:
            # Mapping for `key` likely does not exist:
            # Store the SHA-to-KEY mapping in HMap,
            # looking out for possible collisions.
            try:
                HMap.create(sha=sha, val=key)
            except IntegrityError:
                val = HMap.select(HMap.val).where(HMap.sha == sha).scalar()
                if val != key:
                    raise HTTPError(500)

        # Parse and normalize into a set of N-Quad lines
        stmts = parse(self.request.body, fmt)
        snapc = compress(join(stmts, "\n"))

        if len(chain) == 0 or chain[0].type == CSet.DELETE:
            # Provide dummy value for `patch` which is never stored.
            # If we get here, we always store a snapshot later on!
            patch = ""
        else:
            # Reconstruct the previous state of the resource
            prev = set()

            blobs = (Blob
                .select(Blob.data)
                .where(
                    (Blob.repo == repo) &
                    (Blob.hkey == sha) &
                    (Blob.time << map(lambda e: e.time, chain)))
                .order_by(Blob.time)
                .naive())

            for i, blob in enumerate(blobs.iterator()):
                data = decompress(blob.data)

                if i == 0:
                    # Base snapshot for the delta chain
                    prev.update(data.splitlines())
                else:
                    for line in data.splitlines():
                        mode, stmt = line[0], line[2:]
                        if mode == "A":
                            prev.add(stmt)
                        else:
                            prev.discard(stmt)

            if stmts == prev:
                # No changes, nothing to be done. Bail out.
                return self.finish()

            patch = compress(join(
                map(lambda s: "D " + s, prev - stmts) +
                map(lambda s: "A " + s, stmts - prev), "\n"))

        # Calculate the accumulated size of the delta chain including
        # the (potential) patch from the previous to the pushed state.
        acclen = reduce(lambda s, e: s + e.len, chain[1:], 0) + len(patch)

        blen = len(chain) > 0 and chain[0].len or 0 # base length

        if (len(chain) == 0 or chain[0].type == CSet.DELETE or
            len(snapc) <= len(patch) or SNAPF * blen <= acclen):
            # Store the current state as a new snapshot
            Blob.create(repo=repo, hkey=sha, time=ts, data=snapc)
            CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.SNAPSHOT,
                len=len(snapc))
        else:
            # Store a directed delta between the previous and current state
            Blob.create(repo=repo, hkey=sha, time=ts, data=patch)
            CSet.create(repo=repo, hkey=sha, time=ts, type=CSet.DELTA,
                len=len(patch))