def create_repository( namespace, name, creating_user, visibility="private", repo_kind="image", description=None ): namespace_user = User.get(username=namespace) yesterday = datetime.now() - timedelta(days=1) with db_transaction(): repo = Repository.create( name=name, visibility=Repository.visibility.get_id(visibility), namespace_user=namespace_user, kind=Repository.kind.get_id(repo_kind), description=description, ) RepositoryActionCount.create(repository=repo, count=0, date=yesterday) RepositorySearchScore.create(repository=repo, score=0) # Note: We put the admin create permission under the transaction to ensure it is created. if creating_user and not creating_user.organization: admin = Role.get(name="admin") RepositoryPermission.create(user=creating_user, repository=repo, role=admin) # Apply default permissions (only occurs for repositories under organizations) if creating_user and not creating_user.organization and creating_user.username != namespace: permission.apply_default_permissions(repo, creating_user) return repo
def has_repository_action_count(repository, day): """ Returns whether there is a stored action count for a repository for a specific day. """ try: RepositoryActionCount.get(repository=repository, date=day) return True except RepositoryActionCount.DoesNotExist: return False
def create_repository(namespace, name, creating_user, visibility="private", repo_kind="image", description=None): namespace_user = User.get(username=namespace) yesterday = datetime.now() - timedelta(days=1) try: with db_transaction(): # Check if the repository exists to avoid an IntegrityError if possible. existing = get_repository(namespace, name) if existing is not None: return None try: repo = Repository.create( name=name, visibility=Repository.visibility.get_id(visibility), namespace_user=namespace_user, kind=Repository.kind.get_id(repo_kind), description=description, ) except IntegrityError as ie: raise _RepositoryExistsException(ie) RepositoryActionCount.create(repository=repo, count=0, date=yesterday) RepositorySearchScore.create(repository=repo, score=0) # Note: We put the admin create permission under the transaction to ensure it is created. if creating_user and not creating_user.organization: admin = Role.get(name="admin") RepositoryPermission.create(user=creating_user, repository=repo, role=admin) except _RepositoryExistsException as ree: try: return Repository.get(namespace_user=namespace_user, name=name) except Repository.DoesNotExist: logger.error( "Got integrity error when trying to create repository %s/%s: %s", namespace, name, ree.internal_exception, ) return None # Apply default permissions (only occurs for repositories under organizations) if creating_user and not creating_user.organization and creating_user.username != namespace: permission.apply_default_permissions(repo, creating_user) return repo
def store_repository_action_count(repository, day, action_count): """ Stores the action count for a repository for a specific day. Returns False if the repository already has an entry for the specified day. """ try: RepositoryActionCount.create(repository=repository, date=day, count=action_count) return True except IntegrityError: logger.debug("Count already written for repository %s", repository.id) return False
def get_repository_action_counts(repo, start_date): """ Returns the daily aggregated action counts for the given repository, starting at the given start date. """ return RepositoryActionCount.select().where( RepositoryActionCount.repository == repo, RepositoryActionCount.date >= start_date)
def update_repository_score(repo): """ Updates the repository score entry for the given table by retrieving information from the RepositoryActionCount table. Note that count_repository_actions for the repo should be called first. Returns True if the row was updated and False otherwise. """ today = date.today() # Retrieve the counts for each bucket and calculate the final score. final_score = 0.0 last_end_timedelta = timedelta(days=0) for bucket in SEARCH_BUCKETS: start_date = today - bucket.delta end_date = today - last_end_timedelta last_end_timedelta = bucket.delta query = RepositoryActionCount.select( fn.Sum(RepositoryActionCount.count), fn.Count(RepositoryActionCount.id) ).where( RepositoryActionCount.date >= start_date, RepositoryActionCount.date < end_date, RepositoryActionCount.repository == repo, ) bucket_tuple = query.tuples()[0] logger.debug( "Got bucket tuple %s for bucket %s for repository %s", bucket_tuple, bucket, repo.id ) if bucket_tuple[0] is None: continue bucket_sum = float(bucket_tuple[0]) bucket_count = int(bucket_tuple[1]) if not bucket_count: continue bucket_score = bucket_sum / (bucket_count * 1.0) final_score += bucket_score * bucket.weight # Update the existing repo search score row or create a new one. normalized_score = int(final_score * 100.0) try: try: search_score_row = RepositorySearchScore.get(repository=repo) search_score_row.last_updated = datetime.now() search_score_row.score = normalized_score search_score_row.save() return True except RepositorySearchScore.DoesNotExist: RepositorySearchScore.create( repository=repo, score=normalized_score, last_updated=today ) return True except IntegrityError: logger.debug("RepositorySearchScore row already existed; skipping") return False
def delete_expired_entries(repo, limit=50): """ Deletes expired entries from the RepositoryActionCount table for a specific repository. Returns the number of entries removed. """ threshold_date = datetime.today() - RAC_RETENTION_PERIOD found = list( RepositoryActionCount.select() .where( RepositoryActionCount.repository == repo, RepositoryActionCount.date < threshold_date ) .limit(limit) ) if not found: return 0 assert len(found) <= limit count_removed = 0 for entry in found: try: entry.delete_instance(recursive=False) count_removed += 1 except IntegrityError: continue return count_removed
def missing_counts_query(date): """ Returns a query to find all Repository's with missing RAC entries for the given date. """ subquery = (RepositoryActionCount.select( RepositoryActionCount.id, RepositoryActionCount.repository).where( RepositoryActionCount.date == date).alias("rac")) return (Repository.select().join( subquery, JOIN.LEFT_OUTER, on=(Repository.id == subquery.c.repository_id )).where(subquery.c.id >> None))
def test_update_repository_score(bucket_sums, expected_score, initialized_db): # Create a new repository. repo = create_repository('devtable', 'somenewrepo', None, repo_kind='image') # Delete the RAC created in create_repository. RepositoryActionCount.delete().where( RepositoryActionCount.repository == repo).execute() # Add RAC rows for each of the buckets. for index, bucket in enumerate(SEARCH_BUCKETS): for day in range(0, bucket.days): RepositoryActionCount.create( repository=repo, count=(bucket_sums[index] / bucket.days * 1.0), date=date.today() - bucket.delta + timedelta(days=day)) assert update_repository_score(repo) assert RepositorySearchScore.get(repository=repo).score == expected_score
def find_uncounted_repository(): """ Returns a repository that has not yet had an entry added into the RepositoryActionCount table for yesterday. """ try: # Get a random repository to count. today = date.today() yesterday = today - timedelta(days=1) has_yesterday_actions = RepositoryActionCount.select( RepositoryActionCount.repository).where( RepositoryActionCount.date == yesterday) to_count = (Repository.select().where( ~(Repository.id << (has_yesterday_actions))).order_by( db_random_func()).get()) return to_count except Repository.DoesNotExist: return None
def get_repositories_action_sums(repository_ids): """ Returns a map from repository ID to total actions within that repository in the last week. """ if not repository_ids: return {} # Filter the join to recent entries only. last_week = datetime.now() - timedelta(weeks=1) tuples = (RepositoryActionCount.select( RepositoryActionCount.repository, fn.Sum(RepositoryActionCount.count)).where( RepositoryActionCount.repository << repository_ids).where( RepositoryActionCount.date >= last_week).group_by( RepositoryActionCount.repository).tuples()) action_count_map = {} for record in tuples: action_count_map[record[0]] = record[1] return action_count_map
def found_entry_count(day): """ Returns the number of entries for the given day in the RAC table. """ return RepositoryActionCount.select().where( RepositoryActionCount.date == day).count()