示例#1
0
def clone_scope(base_parent, new_parent, event):
    """Create exact copy of parent object scope.

  Args:
    base_parent: Old parent object
    new_parent: New parent object
    event: Event that triggered scope cloning
  """

    with benchmark("clone_scope.clone audit scope"):
        source_snapshots = db.session.query(
            models.Snapshot.child_type, models.Snapshot.child_id,
            models.Snapshot.revision_id).filter(
                models.Snapshot.parent_type == base_parent.type,
                models.Snapshot.parent_id == base_parent.id)

        snapshot_revisions = {
            Pair.from_4tuple((new_parent.type, new_parent.id, ctype, cid)):
            revid
            for ctype, cid, revid in source_snapshots
        }

        parent = Stub(new_parent.type, new_parent.id)
        children = {pair.child for pair in snapshot_revisions}
        generator = SnapshotGenerator(dry_run=False)
        generator.add_family(parent, children)
        generator.create(event, snapshot_revisions)
示例#2
0
def reindex(parents=None):
  """Reindex all snapshots or limit to a subset of certain parents.

  Args:
    parents: An iterable of parents for which to reindex their scopes.
  Returns:
    Pair of parent-child that were reindexed.
  """
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  )
  query = columns
  if parents:
    _parents = {(obj.type, obj.id) for obj in parents}
    query = query.filter(
        tuple_(
            models.Snapshot.parent_type,
            models.Snapshot.parent_id,
        ).in_(_parents))

  pairs = {Pair.from_4tuple(p) for p in query}
  reindex_pairs(pairs)
  return pairs
示例#3
0
def clone_scope(base_parent, new_parent, event):
  """Create exact copy of parent object scope.

  Args:
    base_parent: Old parent object
    new_parent: New parent object
    event: Event that triggered scope cloning
  """

  with benchmark("clone_scope.clone audit scope"):
    source_snapshots = db.session.query(
        models.Snapshot.child_type,
        models.Snapshot.child_id,
        models.Snapshot.revision_id
    ).filter(
        models.Snapshot.parent_type == base_parent.type,
        models.Snapshot.parent_id == base_parent.id)

    snapshot_revisions = {
        Pair.from_4tuple((new_parent.type, new_parent.id, ctype, cid)): revid
        for ctype, cid, revid in source_snapshots}

    parent = Stub(new_parent.type, new_parent.id)
    children = {pair.child for pair in snapshot_revisions}
    generator = SnapshotGenerator(dry_run=False)
    generator.add_family(parent, children)
    generator.create(event, snapshot_revisions)
示例#4
0
def reindex():
    """Reindex all snapshots."""
    columns = db.session.query(
        models.Snapshot.parent_type,
        models.Snapshot.parent_id,
        models.Snapshot.child_type,
        models.Snapshot.child_id,
    )
    for query_chunk in generate_query_chunks(columns):
        pairs = {Pair.from_4tuple(p) for p in query_chunk}
        reindex_pairs(pairs)
        db.session.commit()
示例#5
0
  def analyze(self):
    """Analyze which snapshots need to be updated and which created"""
    query = set(db.session.query(
        models.Snapshot.parent_type,
        models.Snapshot.parent_id,
        models.Snapshot.child_type,
        models.Snapshot.child_id,
    ).filter(tuple_(
        models.Snapshot.parent_type, models.Snapshot.parent_id
    ).in_(self.parents)))

    existing_scope = {Pair.from_4tuple(fields) for fields in query}

    full_scope = {Pair(parent, child)
                  for parent, children in self.snapshots.items()
                  for child in children}

    for_update = existing_scope
    for_create = full_scope - existing_scope

    return for_create, for_update
示例#6
0
def reindex():
  """Reindex all snapshots."""
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  )
  for query_chunk in generate_query_chunks(columns):
    pairs = {Pair.from_4tuple(p) for p in query_chunk}
    reindex_pairs(pairs)
    db.session.commit()
示例#7
0
def get_revisions(pairs, revisions, filters=None):
    """Retrieve revision ids for pairs

  If revisions dictionary is provided it will validate that the selected
  revision exists in the objects revision history.

  Args:
    pairs: set([(parent_1, child_1), (parent_2, child_2), ...])
    revisions: dict({(parent, child): revision_id, ...})
    filters: predicate
  """
    with benchmark("snapshotter.helpers.get_revisions"):
        revision_id_cache = dict()

        if pairs:
            with benchmark("get_revisions.create caches"):
                child_stubs = {pair.child for pair in pairs}

                with benchmark("get_revisions.create child -> parents cache"):
                    parents_cache = collections.defaultdict(set)
                    for parent, child in pairs:
                        parents_cache[child].add(parent)

            with benchmark("get_revisions.retrieve revisions"):
                query = db.session.query(
                    models.Revision.id, models.Revision.resource_type,
                    models.Revision.resource_id).filter(
                        tuple_(models.Revision.resource_type,
                               models.Revision.resource_id).in_(
                                   child_stubs)).order_by(
                                       models.Revision.id.desc())
                if filters:
                    for _filter in filters:
                        query = query.filter(_filter)

            with benchmark("get_revisions.create revision_id cache"):
                for revid, restype, resid in query:
                    child = Stub(restype, resid)
                    for parent in parents_cache[child]:
                        key = Pair(parent, child)
                        if key in revisions:
                            if revid == revisions[key]:
                                revision_id_cache[key] = revid
                            else:
                                logger.warning(
                                    "Specified revision for object %s but couldn't find the"
                                    "revision '%s' in object history", key,
                                    revisions[key])
                        else:
                            if key not in revision_id_cache:
                                revision_id_cache[key] = revid
        return revision_id_cache
示例#8
0
def reindex_snapshots(snapshot_ids):
  """Reindex selected snapshots"""
  if not snapshot_ids:
    return
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  ).filter(models.Snapshot.id.in_(snapshot_ids))
  for query_chunk in generate_query_chunks(columns):
    pairs = {Pair.from_4tuple(p) for p in query_chunk}
    reindex_pairs(pairs)
    db.session.commit()
示例#9
0
def reindex_snapshots(snapshot_ids):
  """Reindex selected snapshots"""
  if not snapshot_ids:
    return
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  ).filter(models.Snapshot.id.in_(snapshot_ids))
  for query_chunk in generate_query_chunks(columns):
    pairs = {Pair.from_4tuple(p) for p in query_chunk}
    reindex_pairs(pairs)
    db.session.commit()
示例#10
0
def get_latest_revision_id(snapshot):
    """Retrieve last revision saved for snapshots

  Args:
    snapshot: Instance of models.Snapshot
  Returns:
    ID of the latest revision or None otherwise
  """
    from ggrc.snapshotter.helpers import get_revisions
    from ggrc.snapshotter.datastructures import Pair
    pair = Pair.from_snapshot(snapshot)
    revisions = get_revisions({pair}, revisions=set())
    if pair in revisions and revisions[pair]:
        return revisions[pair]
示例#11
0
def reindex():
  """Reindex all snapshots."""
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  )
  all_count = columns.count()
  handled = 0
  for query_chunk in generate_query_chunks(columns):
    handled += query_chunk.count()
    logger.info("Snapshot: %s/%s", handled, all_count)
    pairs = {Pair.from_4tuple(p) for p in query_chunk}
    reindex_pairs(pairs)
    db.session.commit()
示例#12
0
def reindex():
  """Reindex all snapshots."""
  columns = db.session.query(
      models.Snapshot.parent_type,
      models.Snapshot.parent_id,
      models.Snapshot.child_type,
      models.Snapshot.child_id,
  )
  all_count = columns.count()
  handled = 0
  for query_chunk in generate_query_chunks(columns):
    handled += query_chunk.count()
    logger.info("Snapshot: %s/%s", handled, all_count)
    pairs = {Pair.from_4tuple(p) for p in query_chunk}
    reindex_pairs(pairs)
    db.session.commit()
示例#13
0
文件: helpers.py 项目: kripsy/Project
def get_revisions(pairs, revisions, filters=None):
    """Retrieve revision ids for pairs

  Args:
    pairs: set([(parent_1, child_1), (parent_2, child_2), ...])
    revisions: dict({(parent, child): revision_id, ...})
    filters: predicate
  """
    with benchmark("snapshotter.helpers.get_revisions"):
        revision_id_cache = dict()

        if pairs:
            with benchmark("get_revisions.create caches"):
                child_stubs = {pair.child for pair in pairs}

                with benchmark("get_revisions.create child -> parents cache"):
                    parents_cache = collections.defaultdict(set)
                    for parent, child in pairs:
                        parents_cache[child].add(parent)

            with benchmark("get_revisions.retrieve revisions"):
                query = db.session.query(
                    models.Revision.id, models.Revision.resource_type,
                    models.Revision.resource_id).filter(
                        tuple_(models.Revision.resource_type,
                               models.Revision.resource_id).in_(
                                   child_stubs)).order_by(
                                       models.Revision.id.desc())
                if filters:
                    for _filter in filters:
                        query = query.filter(_filter)

            with benchmark("get_revisions.create revision_id cache"):
                for revid, restype, resid in query:
                    child = Stub(restype, resid)
                    for parent in parents_cache[child]:
                        key = Pair(parent, child)
                        if key in revisions:
                            if revid == revisions[key]:
                                revision_id_cache[key] = revid
                        else:
                            if key not in revision_id_cache:
                                revision_id_cache[key] = revid
        return revision_id_cache
示例#14
0
文件: helpers.py 项目: zdqf/ggrc-core
def get_revisions(pairs, revisions, filters=None):
    """Retrieve revision ids for pairs

  If revisions dictionary is provided it will validate that the selected
  revision exists in the objects revision history.

  Args:
    pairs: set([(parent_1, child_1), (parent_2, child_2), ...])
    revisions: dict({(parent, child): revision_id, ...})
    filters: predicate
  """
    with benchmark("snapshotter.helpers.get_revisions"):
        if not pairs:
            return {}

        with benchmark("get_revisions.create child -> parents cache"):
            parents_cache = collections.defaultdict(set)
            child_stubs = set()
            for parent, child in pairs:
                parents_cache[child].add(parent)
                child_stubs.add(child)

        with benchmark("get_revisions.retrieve revisions"):
            query = get_revisions_query(child_stubs, revisions, filters)

        revision_id_cache = {}
        with benchmark("get_revisions.create revision_id cache"):
            for revid, restype, resid in query:
                child = Stub(restype, resid)
                for parent in parents_cache[child]:
                    key = Pair(parent, child)
                    if key in revisions and revisions[key] != revid:
                        logger.warning(
                            "Specified revision for object %s but couldn't find the"
                            "revision '%s' in object history",
                            key,
                            revisions[key],
                        )
                    else:
                        revision_id_cache[key] = revid
        return revision_id_cache
示例#15
0
  def analyze(self):
    """Analyze which snapshots need to be updated and which created"""
    query = set(db.session.query(
        models.Snapshot.parent_type,
        models.Snapshot.parent_id,
        models.Snapshot.child_type,
        models.Snapshot.child_id,
    ).filter(tuple_(
        models.Snapshot.parent_type, models.Snapshot.parent_id
    ).in_(self.parents)))

    existing_scope = {Pair.from_4tuple(fields) for fields in query}

    full_scope = {Pair(parent, child)
                  for parent, children in self.snapshots.items()
                  for child in children}

    for_update = existing_scope
    for_create = full_scope - existing_scope

    return for_create, for_update
示例#16
0
def reindex_pairs(pairs):  # noqa  # pylint:disable=too-many-branches
  """Reindex selected snapshots.

  Args:
    pairs: A list of parent-child pairs that uniquely represent snapshot
    object whose properties should be reindexed.
  """

  # pylint: disable=too-many-locals
  snapshots = dict()
  revisions = dict()
  snap_to_sid_cache = dict()
  search_payload = list()

  cad_dict = _get_custom_attribute_dict()

  snapshot_columns, revision_columns = _get_columns()

  snapshot_query = snapshot_columns
  if pairs:  # pylint:disable=too-many-nested-blocks
    pairs_filter = tuple_(
        models.Snapshot.parent_type,
        models.Snapshot.parent_id,
        models.Snapshot.child_type,
        models.Snapshot.child_id,
    ).in_({pair.to_4tuple() for pair in pairs})
    snapshot_query = snapshot_columns.filter(pairs_filter)

    for _id, ctx_id, ptype, pid, ctype, cid, revid in snapshot_query:
      pair = Pair.from_4tuple((ptype, pid, ctype, cid))
      snapshots[pair] = [_id, ctx_id, revid]
      snap_to_sid_cache[pair] = _id

    revision_ids = {revid for _, _, revid in snapshots.values()}
    revision_query = revision_columns.filter(
        models.Revision.id.in_(revision_ids)
    )
    for _id, _type, content in revision_query:
      revisions[_id] = get_searchable_attributes(
          CLASS_PROPERTIES[_type], cad_dict, content)

    snapshot_ids = set()
    for pair in snapshots:
      snapshot_id, ctx_id, revision_id = snapshots[pair]
      snapshot_ids.add(snapshot_id)

      properties = revisions[revision_id]
      properties.update({
          "parent": _get_parent_property(pair),
          "child": _get_child_property(pair),
          "child_type": pair.child.type,
          "child_id": pair.child.id
      })

      assignees = properties.pop("assignees", None)
      if assignees:
        for person, roles in assignees:
          if person:
            for role in roles:
              properties[role] = [person]

      for prop, val in properties.items():
        if prop and val is not None:
          # record stub
          rec = {
              "key": snapshot_id,
              "type": "Snapshot",
              "context_id": ctx_id,
              "tags": _get_tag(pair),
              "property": prop,
              "subproperty": "",
              "content": val,
          }
          if isinstance(val, dict) and "title" in val:
            # Option
            rec["content"] = val["title"]
            search_payload += [rec]
          elif isinstance(val, dict) and val.get("type") == "Person":
            search_payload += get_person_data(rec, val)
            search_payload += get_person_sort_subprop(rec, [val])
          elif isinstance(val, list) and all([p.get("type") == "Person"
                                              for p in val]):
            for person in val:
              search_payload += get_person_data(rec, person)
            search_payload += get_person_sort_subprop(rec, val)
          elif isinstance(val, (bool, int, long)):
            rec["content"] = unicode(val)
            search_payload += [rec]
          elif isinstance(rec["content"], basestring):
            search_payload += [rec]
          else:
            logger.warning(u"Unsupported value for %s #%s in %s %s: %r",
                           rec["type"], rec["key"], rec["property"],
                           rec["subproperty"], rec["content"])

    delete_records(snapshot_ids)
    insert_records(search_payload)
示例#17
0
    def _update(self, for_update, event, revisions, _filter):
        """Update (or create) parent objects' snapshots and create revisions for
    them.

    Args:
      event: A ggrc.models.Event instance
      revisions: A set of tuples of pairs with revisions to which it should
        either create or update a snapshot of that particular audit
      _filter: Callable that should return True if it should be updated
    Returns:
      OperationResponse
    """
        # pylint: disable=too-many-locals
        with benchmark("Snapshot._update"):
            user_id = get_current_user_id()
            missed_keys = set()
            snapshot_cache = dict()
            modified_snapshot_keys = set()
            data_payload_update = list()
            revision_payload = list()
            response_data = dict()

            if self.dry_run and event is None:
                event_id = 0
            else:
                event_id = event.id

            with benchmark("Snapshot._update.filter"):
                if _filter:
                    for_update = {elem for elem in for_update if _filter(elem)}

            with benchmark("Snapshot._update.get existing snapshots"):
                existing_snapshots = db.session.query(
                    models.Snapshot.id,
                    models.Snapshot.revision_id,
                    models.Snapshot.parent_type,
                    models.Snapshot.parent_id,
                    models.Snapshot.child_type,
                    models.Snapshot.child_id,
                ).filter(
                    tuple_(models.Snapshot.parent_type,
                           models.Snapshot.parent_id,
                           models.Snapshot.child_type,
                           models.Snapshot.child_id).in_(
                               {pair.to_4tuple()
                                for pair in for_update}))

                for esnap in existing_snapshots:
                    sid, rev_id, pair_tuple = esnap[0], esnap[1], esnap[2:]
                    pair = Pair.from_4tuple(pair_tuple)
                    snapshot_cache[pair] = (sid, rev_id)

            with benchmark("Snapshot._update.retrieve latest revisions"):
                revision_id_cache = get_revisions(
                    for_update,
                    filters=[
                        models.Revision.action.in_(["created", "modified"])
                    ],
                    revisions=revisions)

            response_data["revisions"] = {
                "old":
                {pair: values[1]
                 for pair, values in snapshot_cache.items()},
                "new": revision_id_cache
            }

            with benchmark("Snapshot._update.build snapshot payload"):
                for key in for_update:
                    if key in revision_id_cache:
                        sid, rev_id = snapshot_cache[key]
                        latest_rev = revision_id_cache[key]
                        if rev_id != latest_rev:
                            modified_snapshot_keys.add(key)
                            data_payload_update += [{
                                "_id": sid,
                                "_revision_id": latest_rev,
                                "_modified_by_id": user_id
                            }]
                    else:
                        missed_keys.add(key)

            if missed_keys:
                logger.warning(
                    "Tried to update snapshots for the following objects but "
                    "found no revisions: %s", missed_keys)

            if not modified_snapshot_keys:
                return OperationResponse("update", True, set(), response_data)

            with benchmark("Snapshot._update.write snapshots to database"):
                update_sql = models.Snapshot.__table__.update().where(
                    models.Snapshot.id == bindparam("_id")).values(
                        revision_id=bindparam("_revision_id"),
                        modified_by_id=bindparam("_modified_by_id"))
                self._execute(update_sql, data_payload_update)

            with benchmark("Snapshot._update.retrieve inserted snapshots"):
                snapshots = get_snapshots(modified_snapshot_keys)

            with benchmark(
                    "Snapshot._update.create snapshots revision payload"):
                for snapshot in snapshots:
                    parent = Stub(snapshot.parent_type, snapshot.parent_id)
                    context_id = self.context_cache[parent]
                    data = create_snapshot_revision_dict(
                        "modified", event_id, snapshot, user_id, context_id)
                    revision_payload += [data]

            with benchmark("Insert Snapshot entries into Revision"):
                self._execute(models.Revision.__table__.insert(),
                              revision_payload)
            return OperationResponse("update", True, for_update, response_data)
示例#18
0
  def _update(self, for_update, event, revisions, _filter):
    """Update (or create) parent objects' snapshots and create revisions for
    them.

    Args:
      event: A ggrc.models.Event instance
      revisions: A set of tuples of pairs with revisions to which it should
        either create or update a snapshot of that particular audit
      _filter: Callable that should return True if it should be updated
    Returns:
      OperationResponse
    """
    # pylint: disable=too-many-locals
    with benchmark("Snapshot._update"):
      user_id = get_current_user_id()
      missed_keys = set()
      snapshot_cache = dict()
      modified_snapshot_keys = set()
      data_payload_update = list()
      revision_payload = list()
      response_data = dict()

      if self.dry_run and event is None:
        event_id = 0
      else:
        event_id = event.id

      with benchmark("Snapshot._update.filter"):
        if _filter:
          for_update = {elem for elem in for_update if _filter(elem)}

      with benchmark("Snapshot._update.get existing snapshots"):
        existing_snapshots = db.session.query(
            models.Snapshot.id,
            models.Snapshot.revision_id,
            models.Snapshot.parent_type,
            models.Snapshot.parent_id,
            models.Snapshot.child_type,
            models.Snapshot.child_id,
        ).filter(tuple_(
            models.Snapshot.parent_type, models.Snapshot.parent_id,
            models.Snapshot.child_type, models.Snapshot.child_id
        ).in_({pair.to_4tuple() for pair in for_update}))

        for esnap in existing_snapshots:
          sid, rev_id, pair_tuple = esnap[0], esnap[1], esnap[2:]
          pair = Pair.from_4tuple(pair_tuple)
          snapshot_cache[pair] = (sid, rev_id)

      with benchmark("Snapshot._update.retrieve latest revisions"):
        revision_id_cache = get_revisions(
            for_update,
            filters=[models.Revision.action.in_(["created", "modified"])],
            revisions=revisions)

      response_data["revisions"] = {
          "old": {pair: values[1] for pair, values in snapshot_cache.items()},
          "new": revision_id_cache
      }

      with benchmark("Snapshot._update.build snapshot payload"):
        for key in for_update:
          if key in revision_id_cache:
            sid, rev_id = snapshot_cache[key]
            latest_rev = revision_id_cache[key]
            if rev_id != latest_rev:
              modified_snapshot_keys.add(key)
              data_payload_update += [{
                  "_id": sid,
                  "_revision_id": latest_rev,
                  "_modified_by_id": user_id
              }]
          else:
            missed_keys.add(key)

      if missed_keys:
        logger.warning(
            "Tried to update snapshots for the following objects but "
            "found no revisions: %s", missed_keys)

      if not modified_snapshot_keys:
        return OperationResponse("update", True, set(), response_data)

      with benchmark("Snapshot._update.write snapshots to database"):
        update_sql = models.Snapshot.__table__.update().where(
            models.Snapshot.id == bindparam("_id")).values(
            revision_id=bindparam("_revision_id"),
            modified_by_id=bindparam("_modified_by_id"))
        self._execute(update_sql, data_payload_update)

      with benchmark("Snapshot._update.retrieve inserted snapshots"):
        snapshots = get_snapshots(modified_snapshot_keys)

      with benchmark("Snapshot._update.create snapshots revision payload"):
        for snapshot in snapshots:
          parent = Stub(snapshot.parent_type, snapshot.parent_id)
          context_id = self.context_cache[parent]
          data = create_snapshot_revision_dict("modified", event_id, snapshot,
                                               user_id, context_id)
          revision_payload += [data]

      with benchmark("Insert Snapshot entries into Revision"):
        self._execute(models.Revision.__table__.insert(), revision_payload)
      return OperationResponse("update", True, for_update, response_data)
示例#19
0
def reindex_pairs(pairs):
  """Reindex selected snapshots.

  Args:
    pairs: A list of parent-child pairs that uniquely represent snapshot
    object whose properties should be reindexed.
  """

  # pylint: disable=too-many-locals
  snapshots = dict()
  revisions = dict()
  snap_to_sid_cache = dict()
  search_payload = list()

  object_properties, cad_list = _get_model_properties()

  snapshot_columns, revision_columns = _get_columns()

  snapshot_query = snapshot_columns
  if pairs:
    pairs_filter = tuple_(
        models.Snapshot.parent_type,
        models.Snapshot.parent_id,
        models.Snapshot.child_type,
        models.Snapshot.child_id,
    ).in_({pair.to_4tuple() for pair in pairs})
    snapshot_query = snapshot_columns.filter(pairs_filter)

    for _id, ctx_id, ptype, pid, ctype, cid, revid in snapshot_query:
      pair = Pair.from_4tuple((ptype, pid, ctype, cid))
      snapshots[pair] = [_id, ctx_id, revid]
      snap_to_sid_cache[pair] = _id

    revision_ids = {revid for _, _, revid in snapshots.values()}
    revision_query = revision_columns.filter(
        models.Revision.id.in_(revision_ids)
    )
    for _id, _type, content in revision_query:
      revisions[_id] = get_searchable_attributes(
          object_properties[_type], cad_list, content)

    snapshot_ids = set()
    for pair in snapshots:
      snapshot_id, ctx_id, revision_id = snapshots[pair]
      snapshot_ids.add(snapshot_id)

      properties = revisions[revision_id]
      properties.update({
          "parent": _get_parent_property(pair),
          "child": _get_child_property(pair),
          "child_type": pair.child.type,
          "child_id": pair.child.id
      })

      for prop, val in properties.items():
        if prop and val:
          data = {
              "key": snapshot_id,
              "type": "Snapshot",
              "context_id": ctx_id,
              "tags": _get_tag(pair),
              "property": prop,
              "content": val,
          }
          search_payload += [data]

    delete_records(snapshot_ids)
    insert_records(search_payload)