예제 #1
0
def main(send_email=False):
    logger.info('Starting Project storage audit')
    init_app(set_backends=True, routes=False)

    lines = []
    projects = {}
    users = defaultdict(lambda: (0, 0))

    progress_bar = progressbar.ProgressBar(
        maxval=Node.find(Q('parent_node', 'eq', None)).count()).start()

    for i, node in enumerate(Node.find(Q('parent_node', 'eq', None))):
        progress_bar.update(i + 1)
        if node._id in WHITE_LIST:
            continue  # Dont count whitelisted nodes against users
        projects[node._id] = get_usage(node)
        for contrib in node.contributors:
            if node.can_edit(user=contrib):
                users[contrib._id] = tuple(
                    map(sum, zip(users[contrib._id], projects[node._id]))
                )  # Adds tuples together, map(sum, zip((a, b), (c, d))) -> (a+c, b+d)

        if i % 25 == 0:
            # Clear all caches
            for key in ('node', 'user', 'fileversion', 'storedfilenode'):
                Node._cache.data.get(key, {}).clear()
                Node._object_cache.data.get(key, {}).clear()
            # Collect garbage
            gc.collect()
    progress_bar.finish()

    for model, collection, limit in ((User, users, USER_LIMIT),
                                     (Node, projects, PROJECT_LIMIT)):
        for item, (used,
                   deleted) in filter(functools.partial(limit_filter, limit),
                                      collection.items()):
            line = '{!r} has exceeded the limit {:.2f}GBs ({}b) with {:.2f}GBs ({}b) used and {:.2f}GBs ({}b) deleted.'.format(
                model.load(item), limit / GBs, limit, used / GBs, used,
                deleted / GBs, deleted)
            logger.info(line)
            lines.append(line)

    if lines:
        if send_email:
            logger.info('Sending email...')
            mails.send_mail('*****@*****.**',
                            mails.EMPTY,
                            body='\n'.join(lines),
                            subject='Script: OsfStorage usage audit')
        else:
            logger.info('send_email is False, not sending email'.format(
                len(lines)))
        logger.info('{} offending project(s) and user(s) found'.format(
            len(lines)))
    else:
        logger.info('No offending projects or users found')
예제 #2
0
파일: views.py 프로젝트: cwisecarver/osf.io
 def get_queryset(self):
     query = self.get_query_from_request()
     blacklisted = self.is_blacklisted(query)
     nodes = Node.find(query)
     # If attempting to filter on a blacklisted field, exclude withdrawals.
     if blacklisted:
         non_withdrawn_list = [node._id for node in nodes if not node.is_retracted]
         non_withdrawn_nodes = Node.find(Q("_id", "in", non_withdrawn_list))
         return non_withdrawn_nodes
     return nodes
예제 #3
0
 def get_queryset(self):
     query = self.get_query_from_request()
     blacklisted = self.is_blacklisted(query)
     nodes = Node.find(query)
     # If attempting to filter on a blacklisted field, exclude retractions.
     if blacklisted:
         non_retracted_list = [node._id for node in nodes if not node.is_retracted]
         non_retracted_nodes = Node.find(Q('_id', 'in', non_retracted_list))
         return non_retracted_nodes
     return nodes
예제 #4
0
파일: views.py 프로젝트: ajski/osf.io
 def get_queryset(self):
     query = self.get_query_from_request()
     blacklisted = self.is_blacklisted(query)
     nodes = Node.find(query)
     # If attempting to filter on a blacklisted field, exclude retractions.
     if blacklisted:
         non_retracted_list = [node._id for node in nodes if not node.is_retracted]
         non_retracted_nodes = Node.find(Q('_id', 'in', non_retracted_list))
         return non_retracted_nodes
     return nodes
예제 #5
0
def main(send_email=False):
    logger.info('Starting Project storage audit')
    init_app(set_backends=True, routes=False)

    lines = []
    projects = {}
    users = defaultdict(lambda: (0, 0))

    for node in Node.find(Q('__backrefs.parent.node.nodes', 'eq', None)):  # ODM hack to ignore all nodes with parents
        if node._id in WHITE_LIST:
            continue  # Dont count whitelisted nodes against users
        projects[node] = get_usage(node)
        for contrib in node.contributors:
            if node.can_edit(user=contrib):
                users[contrib] = tuple(map(sum, zip(users[contrib], projects[node])))  # Adds tuples together, map(sum, zip((a, b), (c, d))) -> (a+c, b+d)

    for collection, limit in ((users, USER_LIMIT), (projects, PROJECT_LIMIT)):
        for item, (used, deleted) in filter(functools.partial(limit_filter, limit), collection.items()):
            line = '{!r} has exceeded the limit {:.2f}GBs ({}b) with {:.2f}GBs ({}b) used and {:.2f}GBs ({}b) deleted.'.format(item, limit / GBs, limit, used / GBs, used, deleted / GBs, deleted)
            logger.info(line)
            lines.append(line)

    if lines:
        if send_email:
            logger.info('Sending email...')
            mails.send_mail('*****@*****.**', mails.EMPTY, body='\n'.join(lines), subject='Script: OsfStorage usage audit')
        else:
            logger.info('send_email is False, not sending email'.format(len(lines)))
        logger.info('{} offending project(s) and user(s) found'.format(len(lines)))
    else:
        logger.info('No offending projects or users found')
def do_migration():
    dupe_nodes = [n for n in Node.find(Q('_id', 'in', list(set([l.node._id for l in NodeLog.find(Q('action', 'eq', 'preprint_license_updated'))])))) if NodeLog.find(Q('action', 'eq', 'preprint_license_updated') & Q('node', 'eq', n._id)).count() > 1]
    logger.info('Found {} nodes with multiple preprint_license_updated logs'.format(len(dupe_nodes)))

    for node in dupe_nodes:
        preprint_license_updated_logs = [log for log in node.logs if log.action == 'preprint_license_updated']

        log = preprint_license_updated_logs.pop()
        while(preprint_license_updated_logs):
            next_log = preprint_license_updated_logs.pop()
            timedelta = log.date - next_log.date
            if timedelta.seconds < 60:
                logger.info(
                    'Hiding duplicate preprint_license_updated log with ID {} from node {}, timedelta was {}'.format(
                        log._id, node._id, timedelta
                    )
                )
                log.should_hide = True
                log.save()
            else:
                logger.info(
                    'Skipping preprint_license_updated log with ID {} from node {}, timedelta was {}'.format(
                        log._id, node._id, timedelta
                    )
                )

            log = next_log
예제 #7
0
def migrate_nodes():
    migrated_count = 0
    for node in Node.find():
        was_migrated = migrate_category(node)
        if was_migrated:
            node.save()
            logger.info('Migrated {0}'.format(node._id))
            migrated_count += 1
    logger.info('Finished migrating {0} nodes.'.format(migrated_count))
예제 #8
0
def migrate_nodes():
    migrated_count = 0
    for node in Node.find():
        was_migrated = migrate_category(node)
        if was_migrated:
            node.save()
            logger.info('Migrated {0}'.format(node._id))
            migrated_count += 1
    logger.info('Finished migrating {0} nodes.'.format(migrated_count))
예제 #9
0
 def test_delete_registration_tree(self):
     proj = factories.NodeFactory()
     factories.NodeFactory(parent=proj)
     comp2 = factories.NodeFactory(parent=proj)
     factories.NodeFactory(parent=comp2)
     reg = factories.RegistrationFactory(project=proj)
     reg_ids = [reg._id] + [r._id for r in reg.get_descendants_recursive()]
     archiver_utils.delete_registration_tree(reg)
     assert_false(Node.find(Q('_id', 'in', reg_ids) & Q('is_deleted', 'eq', False)).count())
예제 #10
0
파일: utils.py 프로젝트: scooley/osf.io
def get_projects(time=None, public=False, registered=False):
    query = (Q('parent_node', 'eq', None) & CONTENT_NODE_QUERY)
    if time:
        query = query & Q('date_created', 'lt', time)
    if public:
        query = query & Q('is_public', 'eq', True)
    if registered:
        query = query & Q('is_registration', 'eq', True)
    return Node.find(query).count()
예제 #11
0
 def test_delete_registration_tree(self):
     proj = factories.NodeFactory()
     factories.NodeFactory(parent=proj)
     comp2 = factories.NodeFactory(parent=proj)
     factories.NodeFactory(parent=comp2)
     reg = factories.RegistrationFactory(project=proj)
     reg_ids = [reg._id] + [r._id for r in reg.get_descendants_recursive()]
     archiver_utils.delete_registration_tree(reg)
     assert_false(Node.find(Q('_id', 'in', reg_ids) & Q('is_deleted', 'eq', False)).count())
예제 #12
0
파일: tag.py 프로젝트: ycchen1989/osf.io
def project_tag(tag, auth, **kwargs):
    tag_obj = Tag.load(tag)
    if tag_obj:
        nodes = Node.find(Q("tags", "eq", tag_obj._id))
    else:
        nodes = []

    visible_nodes = [obj for obj in nodes if obj.can_view(auth)]
    return {"nodes": [{"title": node.title, "url": node.url} for node in visible_nodes], "tag": tag}
예제 #13
0
def get_targets():
    logger.info('Acquiring targets...')
    targets = [
        u for u in User.find() if Node.find(
            Q('is_bookmark_collection', 'eq', True)
            & Q('is_deleted', 'eq', False)
            & Q('creator', 'eq', u._id)).count() > 1
    ]
    logger.info('Found {} target users.'.format(len(targets)))
    return targets
예제 #14
0
def main(send_email=False):
    logger.info('Starting Project storage audit')
    init_app(set_backends=True, routes=False)

    lines = []
    projects = {}
    users = defaultdict(lambda: (0, 0))

    progress_bar = progressbar.ProgressBar(maxval=Node.find(Q('parent_node', 'eq', None)).count()).start()

    for i, node in enumerate(Node.find(Q('parent_node', 'eq', None))):
        progress_bar.update(i+1)
        if node._id in WHITE_LIST:
            continue  # Dont count whitelisted nodes against users
        projects[node._id] = get_usage(node)
        for contrib in node.contributors:
            if node.can_edit(user=contrib):
                users[contrib._id] = tuple(map(sum, zip(users[contrib._id], projects[node._id])))  # Adds tuples together, map(sum, zip((a, b), (c, d))) -> (a+c, b+d)

        if i % 25 == 0:
            # Clear all caches
            for key in ('node', 'user', 'fileversion', 'storedfilenode'):
                Node._cache.data.get(key, {}).clear()
                Node._object_cache.data.get(key, {}).clear()
            # Collect garbage
            gc.collect()
    progress_bar.finish()

    for model, collection, limit in ((User, users, USER_LIMIT), (Node, projects, PROJECT_LIMIT)):
        for item, (used, deleted) in filter(functools.partial(limit_filter, limit), collection.items()):
            line = '{!r} has exceeded the limit {:.2f}GBs ({}b) with {:.2f}GBs ({}b) used and {:.2f}GBs ({}b) deleted.'.format(model.load(item), limit / GBs, limit, used / GBs, used, deleted / GBs, deleted)
            logger.info(line)
            lines.append(line)

    if lines:
        if send_email:
            logger.info('Sending email...')
            mails.send_mail('*****@*****.**', mails.EMPTY, body='\n'.join(lines), subject='Script: OsfStorage usage audit')
        else:
            logger.info('send_email is False, not sending email'.format(len(lines)))
        logger.info('{} offending project(s) and user(s) found'.format(len(lines)))
    else:
        logger.info('No offending projects or users found')
예제 #15
0
파일: utils.py 프로젝트: kch8qx/osf.io
def get_projects(time=None, public=False, registered=False):
    query = (Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False)
             & Q('is_folder', 'ne', True))
    if time:
        query = query & Q('date_created', 'lt', time)
    if public:
        query = query & Q('is_public', 'eq', True)
    if registered:
        query = query & Q('is_registration', 'eq', True)
    return Node.find(query).count()
예제 #16
0
    def get_queryset(self):
        node = self.get_node()
        req_query = self.get_query_from_request()

        query = (
            Q('_id', 'in', [e._id for e in node.nodes if e.primary]) &
            req_query
        )
        nodes = Node.find(query)
        auth = get_user_auth(self.request)
        return sorted([each for each in nodes if each.can_view(auth)], key=lambda n: n.date_modified, reverse=True)
예제 #17
0
파일: views.py 프로젝트: baylee-d/osf.io
    def get_queryset(self):
        node = self.get_node()
        req_query = self.get_query_from_request()

        query = (
            Q('_id', 'in', [e._id for e in node.nodes if e.primary]) &
            req_query
        )
        nodes = Node.find(query)
        auth = get_user_auth(self.request)
        return sorted([each for each in nodes if each.can_view(auth)], key=lambda n: n.date_modified, reverse=True)
예제 #18
0
def get_projects(time=None, public=False, registered=False):
    query = (
        Q('parent_node', 'eq', None) &
        CONTENT_NODE_QUERY
    )
    if time:
        query = query & Q('date_created', 'lt', time)
    if public:
        query = query & Q('is_public', 'eq', True)
    if registered:
        query = query & Q('is_registration', 'eq', True)
    return Node.find(query).count()
예제 #19
0
파일: utils.py 프로젝트: scooley/osf.io
def recent_public_registrations(n=10):
    registrations = Node.find(CONTENT_NODE_QUERY & Q('parent_node', 'eq', None)
                              & Q('is_public', 'eq', True)
                              & Q('is_registration', 'eq', True)).sort(
                                  '-registered_date')
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.is_pending_embargo:
            # Filter based on calculated properties
            continue
        n -= 1
        yield reg
예제 #20
0
파일: utils.py 프로젝트: wearpants/osf.io
def recent_public_registrations(n=10):
    recent_query = (Q('category', 'eq', 'project') & Q('is_public', 'eq', True)
                    & Q('is_deleted', 'eq', False))
    registrations = Node.find(recent_query
                              & Q('is_registration', 'eq', True)).sort(
                                  '-registered_date')
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.is_pending_embargo:
            continue
        n = n - 1
        yield reg
예제 #21
0
    def get_queryset(self):
        node = self.get_node()
        req_query = self.get_query_from_request()

        node_pks = node.node_relations.filter(
            is_node_link=False).select_related('child').values_list(
                'child__pk', flat=True)

        query = (Q('pk', 'in', node_pks) & req_query)
        nodes = Node.find(query).order_by('-date_modified')
        auth = get_user_auth(self.request)
        pks = [each.pk for each in nodes if each.can_view(auth)]
        return Node.objects.filter(pk__in=pks).order_by('-date_modified')
예제 #22
0
파일: utils.py 프로젝트: 545zhou/osf.io
def get_projects(time=None, public=False, registered=False):
    query = (
        Q('category', 'eq', 'project') &
        Q('is_deleted', 'eq', False) &
        Q('is_folder', 'ne', True)
    )
    if time:
        query = query & Q('date_created', 'lt', time)
    if public:
        query = query & Q('is_public', 'eq', True)
    if registered:
        query = query & Q('is_registration', 'eq', True)
    return Node.find(query).count()
예제 #23
0
파일: tag.py 프로젝트: scooley/osf.io
def project_tag(tag, auth, **kwargs):
    tag_obj = Tag.load(tag)
    if tag_obj:
        nodes = Node.find(Q('tags', 'eq', tag_obj._id))
    else:
        nodes = []

    visible_nodes = [obj for obj in nodes if obj.can_view(auth)]
    return {
        'nodes': [{
            'title': node.title,
            'url': node.url,
        } for node in visible_nodes],
        'tag':
        tag,
    }
예제 #24
0
def recent_public_registrations(n=10):
    registrations = Node.find(
        CONTENT_NODE_QUERY &
        Q('parent_node', 'eq', None) &
        Q('is_public', 'eq', True) &
        Q('is_registration', 'eq', True)
    ).sort(
        '-registered_date'
    )
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.is_pending_embargo:
            # Filter based on calculated properties
            continue
        n -= 1
        yield reg
예제 #25
0
파일: tag.py 프로젝트: 545zhou/osf.io
def project_tag(tag, auth, **kwargs):
    tag_obj = Tag.load(tag)
    if tag_obj:
        nodes = Node.find(Q('tags', 'eq', tag_obj._id))
    else:
        nodes = []

    visible_nodes = [obj for obj in nodes if obj.can_view(auth)]
    return {
        'nodes': [
            {
                'title': node.title,
                'url': node.url,
            }
            for node in visible_nodes
        ],
        'tag': tag,
    }
def migrate():
    targets = get_targets()
    total = len(targets)
    for i, user in enumerate(targets):
        logger.info('({}/{}) Preparing to migrate User {}'.format(i + 1, total, user._id))
        bookmarks = Node.find(Q('is_bookmark_collection', 'eq', True) & Q('creator', 'eq', user._id)).sort('-date_modified')

        bookmark_to_keep = None
        for n in bookmarks:
            if n.nodes:
                bookmark_to_keep = n
        bookmark_to_keep = bookmark_to_keep or bookmarks[0]
        logger.info('Marking Node {} as primary Bookmark Collection for User {}, preparing to delete others'.format(bookmark_to_keep._id, user._id))
        for n in bookmarks:
            if n._id != bookmark_to_keep._id:
                n.is_deleted = True
                n.save()
        logger.info('Successfully migrated User {}'.format(user._id))
    logger.info('Successfully migrated {} users'.format(total))
예제 #27
0
파일: utils.py 프로젝트: XTech2K/osf.io
def recent_public_registrations(n=10):
    recent_query = (
        Q('category', 'eq', 'project') &
        Q('is_public', 'eq', True) &
        Q('is_deleted', 'eq', False)
    )
    registrations = Node.find(
        recent_query &
        Q('is_registration', 'eq', True)
    ).sort(
        '-registered_date'
    )
    for reg in registrations:
        if not n:
            break
        if reg.is_retracted or reg.pending_embargo:
            continue
        n = n - 1
        yield reg
예제 #28
0
def do_migration():
    dupe_nodes = [
        n for n in Node.find(
            Q(
                '_id', 'in',
                list(
                    set([
                        l.node._id for l in NodeLog.find(
                            Q('action', 'eq', 'preprint_license_updated'))
                    ])))) if NodeLog.find(
                        Q('action', 'eq', 'preprint_license_updated')
                        & Q('node', 'eq', n._id)).count() > 1
    ]
    logger.info(
        'Found {} nodes with multiple preprint_license_updated logs'.format(
            len(dupe_nodes)))

    for node in dupe_nodes:
        preprint_license_updated_logs = [
            log for log in node.logs
            if log.action == 'preprint_license_updated'
        ]

        log = preprint_license_updated_logs.pop()
        while (preprint_license_updated_logs):
            next_log = preprint_license_updated_logs.pop()
            timedelta = log.date - next_log.date
            if timedelta.seconds < 60:
                logger.info(
                    'Hiding duplicate preprint_license_updated log with ID {} from node {}, timedelta was {}'
                    .format(log._id, node._id, timedelta))
                log.should_hide = True
                log.save()
            else:
                logger.info(
                    'Skipping preprint_license_updated log with ID {} from node {}, timedelta was {}'
                    .format(log._id, node._id, timedelta))

            log = next_log
예제 #29
0
def migrate():
    targets = get_targets()
    total = len(targets)
    for i, user in enumerate(targets):
        logger.info('({}/{}) Preparing to migrate User {}'.format(
            i + 1, total, user._id))
        bookmarks = Node.find(
            Q('is_bookmark_collection', 'eq', True)
            & Q('creator', 'eq', user._id)).sort('-date_modified')

        bookmark_to_keep = None
        for n in bookmarks:
            if n.nodes:
                bookmark_to_keep = n
        bookmark_to_keep = bookmark_to_keep or bookmarks[0]
        logger.info(
            'Marking Node {} as primary Bookmark Collection for User {}, preparing to delete others'
            .format(bookmark_to_keep._id, user._id))
        for n in bookmarks:
            if n._id != bookmark_to_keep._id:
                n.is_deleted = True
                n.save()
        logger.info('Successfully migrated User {}'.format(user._id))
    logger.info('Successfully migrated {} users'.format(total))
def get_targets():
    logger.info('Acquiring targets...')
    targets = [u for u in User.find() if Node.find(Q('is_bookmark_collection', 'eq', True) & Q('is_deleted', 'eq', False) & Q('creator', 'eq', u._id)).count() > 1]
    logger.info('Found {} target users.'.format(len(targets)))
    return targets
예제 #31
0
def find_nested_projects():
    return Node.find(
        Q('__backrefs.parent.node.nodes.0', 'exists', True)
        & Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False))
예제 #32
0
 def contributed(self):
     from website.project.model import Node
     return Node.find(Q('contributors', 'eq', self._id))
예제 #33
0
 def created(self):
     from website.project.model import Node
     return Node.find(Q('creator', 'eq', self._id))
예제 #34
0
 def get_queryset(self):
     query = self.get_query_from_request()
     return Node.find(query)
예제 #35
0
파일: views.py 프로젝트: baylee-d/osf.io
 def get_user_nodes_since_workshop(user, workshop_date):
     query_date = workshop_date + timedelta(days=1)
     query = Q('creator', 'eq', user._id) & Q('date_created', 'gt', query_date)
     return list(Node.find(query=query))
예제 #36
0
 def get_user_nodes_since_workshop(user, workshop_date):
     query_date = workshop_date + timedelta(days=1)
     query = Q('creator', 'eq', user._id) & Q('date_created', 'gt',
                                              query_date)
     return list(Node.find(query=query))
예제 #37
0
def find_nested_projects():
    return Node.find(
            Q('__backrefs.parent.node.nodes.0', 'exists', True) &
            Q('category', 'eq', 'project') &
            Q('is_deleted', 'eq', False)
    )