Exemplo n.º 1
0
def search_projects_by_title(**kwargs):
    """ Search for nodes by title. Can pass in arguments from the URL to modify the search
    :arg term: The substring of the title.
    :arg category: Category of the node.
    :arg isDeleted: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg isFolder: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg isRegistration: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg includePublic: yes or no. Whether the projects listed should include public projects.
    :arg includeContributed: yes or no. Whether the search should include projects the current user has
        contributed to.
    :arg ignoreNode: a list of nodes that should not be included in the search.
    :return: a list of dictionaries of projects

    """
    # TODO(fabianvf): At some point, it would be nice to do this with elastic search
    user = kwargs['auth'].user

    term = request.args.get('term', '')
    max_results = int(request.args.get('maxResults', '10'))
    category = request.args.get('category', 'project').lower()
    is_deleted = request.args.get('isDeleted', 'no').lower()
    is_folder = request.args.get('isFolder', 'no').lower()
    is_registration = request.args.get('isRegistration', 'no').lower()
    include_public = request.args.get('includePublic', 'yes').lower()
    include_contributed = request.args.get('includeContributed', 'yes').lower()
    ignore_nodes = request.args.getlist('ignoreNode', [])

    matching_title = (
        Q('title', 'icontains', term) &  # search term (case insensitive)
        Q('category', 'eq', category)  # is a project
    )

    matching_title = conditionally_add_query_item(matching_title, 'is_deleted', is_deleted)
    matching_title = conditionally_add_query_item(matching_title, 'is_folder', is_folder)
    matching_title = conditionally_add_query_item(matching_title, 'is_registration', is_registration)

    if len(ignore_nodes) > 0:
        for node_id in ignore_nodes:
            matching_title = matching_title & Q('_id', 'ne', node_id)

    my_projects = []
    my_project_count = 0
    public_projects = []

    if include_contributed == "yes":
        my_projects = Node.find(
            matching_title &
            Q('contributors', 'eq', user._id)  # user is a contributor
        ).limit(max_results)
        my_project_count = my_project_count

    if my_project_count < max_results and include_public == "yes":
        public_projects = Node.find(
            matching_title &
            Q('is_public', 'eq', True)  # is public
        ).limit(max_results - my_project_count)

    results = list(my_projects) + list(public_projects)
    ret = process_project_search_results(results, **kwargs)
    return ret
Exemplo n.º 2
0
def set_tag_many_to_many_on_nodes(page_size=10000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    node_count = 0
    m2m_count = 0
    start = datetime.now()
    total = MODMNode.find(build_query(m2m_tag_fields, MODMNode)).count()
    print '{} Nodes'.format(total)
    while node_count < total:
        with transaction.atomic():
            for modm_node in MODMNode.find(build_query(
                    m2m_tag_fields, MODMNode)).sort('-date_modified')[
                        node_count:page_size + node_count]:
                django_node = Node.objects.get(
                    pk=modm_to_django[modm_node._id])
                for m2m_tag_field in m2m_tag_fields:
                    try:
                        attr = getattr(django_node, m2m_tag_field)
                    except AttributeError as ex:
                        # node field doesn't exist on node
                        pass
                    else:
                        # node field exists, do the stuff
                        django_pks = []
                        for modm_m2m_value in getattr(modm_node, m2m_tag_field,
                                                      []):
                            suffix = 'system' if m2m_tag_field == 'system_tags' else 'not_system'
                            if isinstance(modm_m2m_value, MODMTag):
                                django_pks.append(modm_to_django[
                                    '{}:{}'.format(modm_m2m_value._id,
                                                   suffix)])
                            elif isinstance(modm_m2m_value, basestring):
                                django_pks.append(modm_to_django[
                                    '{}:{}'.format(modm_m2m_value, suffix)])
                            elif modm_m2m_value is None:
                                print 'Tag of None found on Node {}'.format(
                                    modm_node._id)
                            else:
                                # wth
                                print '\a'  # bells!
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                print '\a'
                                import bpdb

                                bpdb.set_trace()

                        if len(django_pks) > 0:
                            attr.add(*django_pks)
                        m2m_count += len(django_pks)
                node_count += 1
                if node_count % page_size == 0 or node_count == total:
                    print 'Through {} nodes and {} m2m'.format(node_count,
                                                               m2m_count)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 3
0
def set_node_many_to_many_on_nodes(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    node_count = 0
    m2m_count = 0
    start = datetime.now()
    total = MODMNode.find(
        build_query(m2m_node_fields, MODMNode),
        allow_institution=True).count()
    print '{} Nodes'.format(total)
    while node_count < total:
        with transaction.atomic():
            for modm_node in MODMNode.find(
                    build_query(m2m_node_fields, MODMNode),
                    allow_institution=True).sort('-date_modified')[
                        node_count:page_size + node_count]:
                django_node = Node.objects.get(
                    pk=modm_to_django[modm_node._id])
                for m2m_node_field in m2m_node_fields:
                    attr = getattr(django_node, m2m_node_field)
                    django_pks = []
                    for modm_m2m_value in getattr(modm_node, m2m_node_field,
                                                  []):
                        if isinstance(modm_m2m_value, MODMNode):
                            django_pks.append(modm_to_django[
                                modm_m2m_value._id])
                        elif isinstance(modm_m2m_value, basestring):
                            django_pks.append(modm_to_django[modm_m2m_value])
                        elif isinstance(modm_m2m_value, Pointer):
                            django_pks.append(modm_to_django[
                                modm_m2m_value.node._id])
                        else:
                            # wth
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            import bpdb
                            bpdb.set_trace()
                    if len(django_pks) > 0:
                        attr.add(*django_pks)
                    m2m_count += len(django_pks)
                node_count += 1
                if node_count % page_size == 0 or node_count == total:
                    print 'Through {} nodes and {} m2m'.format(node_count,
                                                               m2m_count)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 4
0
    def get_queryset(self):
        # For bulk requests, queryset is formed from request body.
        if is_bulk_request(self.request):
            query = Q("_id", "in", [node["id"] for node in self.request.data])

            auth = get_user_auth(self.request)
            nodes = Node.find(query)
            for node in nodes:
                if not node.can_edit(auth):
                    raise PermissionDenied
            return nodes
        else:
            query = self.get_query_from_request()
            return Node.find(query)
Exemplo n.º 5
0
def save_bare_nodes(page_size=20000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    count = 0
    start = datetime.now()
    total = MODMNode.find(allow_institution=True).count()
    while count < total:
        with transaction.atomic():
            nids = []
            for modm_node in MODMNode.find(
                    allow_institution=True).sort('-date_modified')[
                        count:count + page_size]:
                guid = Guid.objects.get(guid=modm_node._id)
                node_fields = dict(_guid_id=guid.pk, **modm_node.to_storage())

                # remove fields not yet implemented
                cleaned_node_fields = {key: node_fields[key]
                                       for key in node_fields
                                       if key not in node_key_blacklist}

                # make datetimes not naive
                for k, v in cleaned_node_fields.iteritems():
                    if isinstance(v, datetime):
                        cleaned_node_fields[k] = pytz.utc.localize(v)

                # remove null fields, postgres hate null fields
                cleaned_node_fields = {k: v
                                       for k, v in
                                       cleaned_node_fields.iteritems()
                                       if v is not None}
                nids.append(Node(**cleaned_node_fields))
                count += 1
                if count % page_size == 0 or count == total:
                    then = datetime.now()
                    print 'Saving nodes {} through {}...'.format(
                        count - page_size, count)
                    woot = Node.objects.bulk_create(nids)
                    for wit in woot:
                        modm_to_django[wit._guid.guid] = wit.pk
                    now = datetime.now()
                    print 'Done with {} nodes in {} seconds...'.format(
                        len(woot), (now - then).total_seconds())
                    nids = []
                    trash = gc.collect()
                    print 'Took out {} trashes'.format(trash)

    print 'Modm Nodes: {}'.format(total)
    print 'django Nodes: {}'.format(Node.objects.all().count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 6
0
def save_bare_system_tags(page_size=10000):
    print 'Starting save_bare_system_tags...'
    start = datetime.now()

    things = list(MODMNode.find(MQ('system_tags', 'ne', [])).sort(
        '-_id')) + list(MODMUser.find(MQ('system_tags', 'ne', [])).sort(
            '-_id'))

    system_tag_ids = []
    for thing in things:
        for system_tag in thing.system_tags:
            system_tag_ids.append(system_tag)

    unique_system_tag_ids = set(system_tag_ids)

    total = len(unique_system_tag_ids)

    system_tags = []
    for system_tag_id in unique_system_tag_ids:
        system_tags.append(Tag(_id=system_tag_id,
                               lower=system_tag_id.lower(),
                               system=True))

    woot = Tag.objects.bulk_create(system_tags)

    print 'MODM System Tags: {}'.format(total)
    print 'django system tags: {}'.format(Tag.objects.filter(system=
                                                             True).count())
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 7
0
def get_projects_public():
    projects_public = Node.find(
        Q('parent_node', 'eq', None) &
        Q('is_public', 'eq', True) &
        CONTENT_NODE_QUERY
    )
    return projects_public
Exemplo n.º 8
0
def get_projects_forked():
    projects_forked = Node.find(
        Q('parent_node', 'eq', None) &
        Q('is_fork', 'eq', True) &
        CONTENT_NODE_QUERY
    )
    return projects_forked
Exemplo n.º 9
0
def conference_submissions(**kwargs):
    """Return data for all OSF4M submissions.

    The total number of submissions for each meeting is calculated and cached
    in the Conference.num_submissions field.
    """
    submissions = []
    #  TODO: Revisit this loop, there has to be a way to optimize it
    for conf in Conference.find():
        # For efficiency, we filter by tag first, then node
        # instead of doing a single Node query
        projects = set()

        tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys()
        nodes = Node.find(
            Q('tags', 'in', tags) &
            Q('is_public', 'eq', True) &
            Q('is_deleted', 'ne', True)
        )
        projects.update(list(nodes))

        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = len(projects)
        # Cache the number of submissions
        conf.num_submissions = num_submissions
        conf.save()
        if num_submissions < settings.CONFERENCE_MIN_COUNT:
            continue
    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    return {'submissions': submissions}
Exemplo n.º 10
0
def get_projects():
    # This count includes projects, forks, and registrations
    projects = Node.find(
        Q('parent_node', 'eq', None) &
        CONTENT_NODE_QUERY
    )
    return projects
Exemplo n.º 11
0
def get_projects():
    projects = Node.find(
        Q('category', 'eq', 'project') &
        Q('is_deleted', 'eq', False) &
        Q('is_folder', 'ne', True)
    )
    return projects
Exemplo n.º 12
0
def conference_view(**kwargs):
    meetings = []
    submissions = []
    for conf in Conference.find():
        query = (
            Q('tags', 'iexact', conf.endpoint)
            & Q('is_public', 'eq', True)
            & Q('is_deleted', 'eq', False)
        )
        projects = Node.find(query)
        for idx, node in enumerate(projects):
            submissions.append(_render_conference_node(node, idx, conf))
        num_submissions = projects.count()
        if num_submissions < settings.CONFERNCE_MIN_COUNT:
            continue
        meetings.append({
            'name': conf.name,
            'active': conf.active,
            'url': web_url_for('conference_results', meeting=conf.endpoint),
            'count': num_submissions,
        })

    submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True)
    meetings.sort(key=lambda meeting: meeting['count'], reverse=True)

    return {'meetings': meetings, 'submissions': submissions}
Exemplo n.º 13
0
    def test_bookmark_collection_not_counted(self):
        BookmarkCollectionFactory(creator=self.user)
        all_node_count = Node.find().count()

        results = AddonSnapshot().get_events()
        storage_res = [res for res in results if res["provider"]["name"] == "osfstorage"][0]
        assert_equal(storage_res["nodes"]["connected"], all_node_count - 1)
def migrate_registrations():
    PREREG_CHALLENGE_METASCHEMA = get_prereg_schema()
    registrations = Node.find(
        Q('is_registration', 'eq', True) &
        Q('registered_schema', 'eq', PREREG_CHALLENGE_METASCHEMA)
    )
    count = 0
    for reg in registrations:
        data = reg.registered_meta[PREREG_CHALLENGE_METASCHEMA._id]
        migrated = False
        logger.debug('Reading preregistration with id: {0}'.format(reg._id))
        for question in data.values():
            if isinstance(question.get('value'), dict):
                for value in question['value'].values():
                    migrated_one = migrate_file_meta(value)
                    if migrated_one and not migrated:
                        migrated = True
            else:
                migrated_one = migrate_file_meta(question)
                if migrated_one and not migrated:
                    migrated = True
        if migrated:
            reg.save()
            count += 1
            logger.info('Migrated preregistration with id: {0}'.format(reg._id))
    logger.info('Done with {0} prereg registrations migrated.'.format(count))
Exemplo n.º 15
0
def get_projects_registered():
    projects_registered = Node.find(
        Q('parent_node', 'eq', None) &
        Q('is_registration', 'eq', True) &
        CONTENT_NODE_QUERY
    )
    return projects_registered
Exemplo n.º 16
0
def get_projects_registered():
    projects_registered = Node.find(
        Q('category', 'eq', 'project') &
        Q('is_deleted', 'eq', False) &
        Q('is_folder', 'ne', True) &
        Q('is_registration', 'eq', True)
    )
    return projects_registered
def find_file_mismatch_nodes():
    """Find nodes with inconsistent `files_current` and `files_versions` field
    keys.
    """
    return [
        node for node in Node.find()
        if set(node.files_versions.keys()) != set(node.files_current.keys())
    ]
Exemplo n.º 18
0
def get_projects_public():
    projects_public = Node.find(
        Q('category', 'eq', 'project') &
        Q('is_deleted', 'eq', False) &
        Q('is_collection', 'ne', True) &
        Q('is_public', 'eq', True)
    )
    return projects_public
Exemplo n.º 19
0
def get_projects_forked():
    projects_forked = list(Node.find(
        Q('category', 'eq', 'project') &
        Q('is_deleted', 'eq', False) &
        Q('is_collection', 'ne', True) &
        Q('is_fork', 'eq', True)
    ))
    return projects_forked
Exemplo n.º 20
0
def migrate_nodes(index):
    logger.info("Migrating nodes to index: {}".format(index))
    n_iter = 0
    nodes = Node.find(Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False))
    for node in nodes:
        search.update_node(node, index=index)
        n_iter += 1

    logger.info('Nodes migrated: {}'.format(n_iter))
def get_targets():
    return Node.find(
        (
            (Q('registered_from', 'ne', None) & Q('logs', 'eq', []))
            | Q('forked_from', 'ne', None)
        )
        & Q('is_deleted', 'ne', True)
        & Q('system_tags', 'ne', SYSTEM_TAG)
    )
Exemplo n.º 22
0
def main():
    total = MODMNode.find().count()
    page_size = 1000
    count = 0
    print 'Doing {} Nodes...'.format(total)

    while count < total:
        for modm_node in MODMNode.find()[count:count+page_size]:
            noooood = get_or_create_node(modm_node)
            count += 1
            if count % page_size == 0:
                print 'Count: {}'.format(count)
                print 'Nodes: {}, Users: {}, Tags: {}'.format(nodes, users, tags)
                garbages = gc.collect()
                print 'Took out {} trashes.'.format(garbages)

    print 'MODM: {}'.format(total)
    print 'PG: {}'.format(count)
Exemplo n.º 23
0
def get_nodes():
    forked = Q('__backrefs.forked.node.forked_from', 'ne', None)
    registered = Q('__backrefs.registrations.node.registered_from', 'ne', None)
    templated = Q('__backrefs.template_node.node.template_node', 'ne', None)
    duplicate = (forked | registered | templated)

    return Node.find(
        duplicate and Q('date_created', 'lt', datetime.datetime(2014, 10, 31))
    )
Exemplo n.º 24
0
 def get_queryset(self):
     nodes = Node.find(self.get_query_from_request())
     preprints = []
     # TODO [OSF-7090]: Rearchitect how `.is_preprint` is determined,
     # so that a query that is guaranteed to return only
     # preprints can be constructed.
     for node in nodes:
         for preprint in node.preprints:
             preprints.append(preprint)
     return preprints
Exemplo n.º 25
0
def conference_data(meeting):
    try:
        conf = Conference.find_one(Q("endpoint", "iexact", meeting))
    except ModularOdmException:
        raise HTTPError(httplib.NOT_FOUND)

    nodes = Node.find(Q("tags", "iexact", meeting) & Q("is_public", "eq", True) & Q("is_deleted", "eq", False))

    ret = [_render_conference_node(each, idx, conf) for idx, each in enumerate(nodes)]
    return ret
Exemplo n.º 26
0
 def get_queryset(self):
     current_user = self.request.user
     if current_user.is_anonymous():
         auth = Auth(None)
     else:
         auth = Auth(current_user)
     query = self.get_query_from_request()
     raw_nodes = Node.find(self.get_default_odm_query() & query)
     nodes = [each for each in raw_nodes if each.is_public or each.can_view(auth)]
     return nodes
Exemplo n.º 27
0
 def find(cls, query=None, **kwargs):
     from website.models import Node  # done to prevent import error
     if query and getattr(query, 'nodes', False):
         for node in query.nodes:
             replacement_attr = cls.attribute_map.get(node.attribute, False)
             node.attribute = replacement_attr or node.attribute
     elif isinstance(query, RawQuery):
         replacement_attr = cls.attribute_map.get(query.attribute, False)
         query.attribute = replacement_attr or query.attribute
     query = query & Q('institution_id', 'ne', None) if query else Q('institution_id', 'ne', None)
     nodes = Node.find(query, allow_institution=True, **kwargs)
     return InstitutionQuerySet(nodes)
Exemplo n.º 28
0
    def test_register_draft_without_embargo_creates_registration_approval(self, mock_enqueue):
        res = self.app.post(
            self.project.api_url_for('register_draft_registration', draft_id=self.draft._id),
            self.valid_make_public_payload,
            content_type='application/json',
            auth=self.user.auth
        )
        assert_equal(res.status_code, 202)

        registration = Node.find().sort('-registered_date')[0]

        assert_true(registration.is_registration)
        assert_not_equal(registration.registration_approval, None)
Exemplo n.º 29
0
    def test_POST_register_make_public_immediately_creates_registration_approval(self, mock_enqueue):
        res = self.app.post(
            self.project.api_url_for("node_register_template_page_post", template=u"Open-Ended_Registration"),
            self.valid_make_public_payload,
            content_type="application/json",
            auth=self.user.auth,
        )
        assert_equal(res.status_code, 201)

        registration = Node.find().sort("-registered_date")[0]

        assert_true(registration.is_registration)
        assert_not_equal(registration.registration_approval, None)
Exemplo n.º 30
0
def migrate_nodes(index):
    logger.info('Migrating nodes to index: {}'.format(index))
    query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    total = Node.find(query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node, query=query, increment=increment, each=False)
    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1, total_pages))
        Node.bulk_update_search(page, index=index)
        Node._clear_caches()

    logger.info('Nodes migrated: {}'.format(total))
Exemplo n.º 31
0
def search_node(auth, **kwargs):
    """

    """
    # Get arguments
    node = Node.load(request.json.get('nodeId'))
    include_public = request.json.get('includePublic')
    size = float(request.json.get('size', '5').strip())
    page = request.json.get('page', 0)
    query = request.json.get('query', '').strip()

    start = (page * size)
    if not query:
        return {'nodes': []}

    # Build ODM query
    title_query = Q('title', 'icontains', query)
    not_deleted_query = Q('is_deleted', 'eq', False)
    visibility_query = Q('contributors', 'eq', auth.user)
    no_folders_query = Q('is_folder', 'eq', False)
    if include_public:
        visibility_query = visibility_query | Q('is_public', 'eq', True)
    odm_query = title_query & not_deleted_query & visibility_query & no_folders_query

    # Exclude current node from query if provided
    if node:
        nin = [node._id] + node.node_ids
        odm_query = (odm_query & Q('_id', 'nin', nin))

    nodes = Node.find(odm_query)
    count = nodes.count()
    pages = math.ceil(count / size)
    validate_page_num(page, pages)

    return {
        'nodes': [
            _serialize_node_search(each)
            for each in islice(nodes, start, start + size) if each.contributors
        ],
        'total':
        count,
        'pages':
        pages,
        'page':
        page
    }
Exemplo n.º 32
0
def migrate_registrations_metadata_key(schema):
    """
    Finds Veer registrations whose registered_meta has an undefined key and corrects.
    """
    registrations = Node.find(Q('is_registration', 'eq', True) & Q('registered_schema', 'eq', schema))
    total_reg = registrations.count()
    logger.info('Examining {} registrations for improper key'.format(total_reg))
    reg_count = 0

    for reg in registrations:
        reg_count += 1
        if reg.registered_meta.get(schema._id, {}).get('recommended-methods', {}).get('value', {}).get('undefined', {}):
            reg.registered_meta[schema._id]['recommended-methods']['value']['procedure'] = \
            reg.registered_meta[schema._id]['recommended-methods']['value'].pop('undefined')
            reg.save()
            logger.info('{}/{} Migrated key for {}'.format(reg_count, total_reg, reg._id))
        else:
            logger.info('{}/{} Key already correct for {}. No change.'.format(reg_count, total_reg, reg._id))
Exemplo n.º 33
0
def migrate(dry=True):
    registrations = Node.find(
        Q('is_registration', 'eq', True) & Q('registered_meta', 'ne', None))
    regs_migrated, reg_errored = migrate_extras(registrations, dry=dry)

    drafts = DraftRegistration.find(Q('registration_metadata', 'ne', {}))
    drafts_migrated, drafts_errored = migrate_extras(drafts, dry=dry)

    logger.info('Migrated registered_meta for {} registrations'.format(
        len(regs_migrated)))
    if reg_errored:
        logger.error('{} errored: {}'.format(len(reg_errored), reg_errored))

    logger.info('Migrated registered_meta for {} draft registrations'.format(
        len(drafts_migrated)))
    if drafts_errored:
        logger.error('{} errored: {}'.format(len(drafts_errored),
                                             drafts_errored))
Exemplo n.º 34
0
    def test_POST_register_embargo_does_not_make_project_or_children_public(self, mock_enqueue):
        self.project.is_public = True
        self.project.save()
        component = NodeFactory(
            creator=self.user,
            parent=self.project,
            title='Component',
            is_public=True
        )
        subproject = ProjectFactory(
            creator=self.user,
            parent=self.project,
            title='Subproject',
            is_public=True
        )
        subproject_component = NodeFactory(
            creator=self.user,
            parent=subproject,
            title='Subcomponent',
            is_public=True
        )
        res = self.app.post(
            self.project.api_url_for('register_draft_registration', draft_id=self.draft._id),
            self.valid_embargo_payload,
            content_type='application/json',
            auth=self.user.auth
        )
        self.project.reload()
        assert_equal(res.status_code, 202)
        assert_equal(res.json['urls']['registrations'], self.project.web_url_for('node_registrations'))

        # Last node directly registered from self.project
        registration = Node.find(
            Q('registered_from', 'eq', self.project)
        ).sort('-registered_date')[0]

        assert_true(registration.is_registration)
        assert_false(registration.is_public)
        assert_true(registration.is_pending_embargo_for_existing_registration)
        assert_is_not_none(registration.embargo)

        for node in registration.get_descendants_recursive():
            assert_true(node.is_registration)
            assert_false(node.is_public)
Exemplo n.º 35
0
def main():
    init_app(set_backends=True, routes=False)
    staff_registrations = defaultdict(list)
    users = [User.load(each) for each in STAFF_GUIDS]
    for registration in Node.find(
            Q('is_registration', 'eq', True) & Q('is_public', 'eq', True)):
        for user in users:
            if registration in user.node__contributed:
                staff_registrations[user._id].append(registration)

    for uid in staff_registrations:
        user = User.load(uid)
        user_regs = staff_registrations[uid]
        logger.info('{} ({})  on {} Public Registrations:'.format(
            user.fullname, user._id, len(user_regs)))
        for registration in user_regs:
            logger.info('\t{} ({}): {}'.format(registration.title,
                                               registration._id,
                                               registration.absolute_url))
Exemplo n.º 36
0
def migrate_nodes(index, query=None):
    logger.info('Migrating nodes to index: {}'.format(index))
    node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    if query:
        node_query = query & node_query
    total = Node.find(node_query).count()
    increment = 200
    total_pages = (total // increment) + 1
    pages = paginated(Node,
                      query=node_query,
                      increment=increment,
                      each=False,
                      include=['contributor__user__guids'])

    for page_number, page in enumerate(pages):
        logger.info('Updating page {} / {}'.format(page_number + 1,
                                                   total_pages))
        Node.bulk_update_search(page, index=index)

    logger.info('Nodes migrated: {}'.format(total))
Exemplo n.º 37
0
def migrate():
    PREREG_CHALLENGE_METASCHEMA = get_prereg_schema()
    registrations = Node.find(
        Q('is_registration', 'eq', True)
        & Q('registered_schema', 'eq', PREREG_CHALLENGE_METASCHEMA))
    count = 0
    for reg in registrations:
        data = reg.registered_meta[PREREG_CHALLENGE_METASCHEMA._id]
        migrated = False
        for question in data.values():
            if isinstance(question.get('value'), dict):
                for value in question['value'].values():
                    migrated = migrate_file_meta(value)
            else:
                migrated = migrate_file_meta(question)
        reg.save()
        if migrated:
            count += 1
            logger.info('Migrated preregistration with id: {0}'.format(
                reg._id))
    logger.info('Done with {0} preregistrations migrated.'.format(count))
Exemplo n.º 38
0
def conference_view(**kwargs):

    meetings = []
    for conf in Conference.find():
        query = (
            Q('tags', 'iexact', conf.endpoint)
            & Q('is_public', 'eq', True)
            & Q('is_deleted', 'eq', False)
        )
        projects = Node.find(query)
        submissions = projects.count()
        if submissions < settings.CONFERNCE_MIN_COUNT:
            continue
        meetings.append({
            'name': conf.name,
            'active': conf.active,
            'url': web_url_for('conference_results', meeting=conf.endpoint),
            'submissions': submissions,
        })
    meetings.sort(key=lambda meeting: meeting['submissions'], reverse=True)

    return {'meetings': meetings}
Exemplo n.º 39
0
def search_node(**kwargs):
    """

    """
    # Get arguments
    auth = kwargs['auth']
    node = Node.load(request.json.get('nodeId'))
    include_public = request.json.get('includePublic')
    query = request.json.get('query', '').strip()
    if not query:
        return {'nodes': []}

    # Build ODM query
    title_query = Q('title', 'icontains', query)
    not_deleted_query = Q('is_deleted', 'eq', False)
    visibility_query = Q('contributors', 'eq', auth.user)
    no_folders_query = Q('is_folder', 'eq', False)
    if include_public:
        visibility_query = visibility_query | Q('is_public', 'eq', True)
    odm_query = title_query & not_deleted_query & visibility_query & no_folders_query

    # Exclude current node from query if provided
    if node:
        nin = [node._id] + node.node_ids
        odm_query = (
            odm_query &
            Q('_id', 'nin', nin)
        )

    # TODO: Parameterize limit; expose pagination
    cursor = Node.find(odm_query).limit(20)

    return {
        'nodes': [
            _serialize_node_search(each)
            for each in cursor
            if each.contributors
        ]
    }
Exemplo n.º 40
0
    def get_queryset(self):
        # Overriding the default query parameters if the provider filter is present, because the provider is stored on
        # the PreprintService object, not the node itself
        filter_key = 'filter[provider]'
        provider_filter = None

        if filter_key in self.request.query_params:
            # Have to have this mutable so that the filter can be removed in the ODM query, otherwise it will return an
            # empty set
            self.request.GET._mutable = True
            provider_filter = self.request.query_params[filter_key]
            self.request.query_params.pop(filter_key)

        nodes = Node.find(self.get_query_from_request())
        preprints = []
        # TODO [OSF-7090]: Rearchitect how `.is_preprint` is determined,
        # so that a query that is guaranteed to return only
        # preprints can be constructed.
        for node in nodes:
            for preprint in node.preprints:
                if provider_filter is None or preprint.provider._id == provider_filter:
                    preprints.append(preprint)

        return preprints
def get_broken_templated():
    return (
        node for node
        in Node.find(Q('template_node', 'ne', None))
        if has_duplicate_piwik_id(node)
    )
def get_broken_forks():
    return (
        node for node
        in Node.find(Q('is_fork', 'eq', True))
        if has_duplicate_piwik_id(node)
    )
def get_broken_registrations():
    return (
        node for node
        in Node.find(Q('is_registration', 'eq', True))
        if has_duplicate_piwik_id(node)
    )
Exemplo n.º 44
0
def get_targets():
    return Node.find(Q('is_deleted', 'ne', True))
Exemplo n.º 45
0
from website.app import init_app
from website.models import Node, User
from framework import Q
from framework.analytics import piwik

app = init_app('website.settings', set_backends=True)

# NOTE: This is a naive implementation for migration, requiring a POST request
# for every user and every node. It is possible to bundle these together in a
# single request, but it would require duplication of logic and strict error
# checking of the result. Doing it this way is idempotent, and allows any
# exceptions raised to halt the process with a usable error message.

for user in User.find():
    if user.piwik_token:
        continue

    piwik.create_user(user)

for node in Node.find(
        Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)):
    if node.piwik_site_id:
        continue

    piwik._provision_node(node._id)
Exemplo n.º 46
0
def set_user_foreign_keys_on_nodes(page_size=10000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    node_count = 0
    fk_count = 0
    cache_hits = 0
    cache_misses = 0
    start = datetime.now()
    total = MODMNode.find(build_query(fk_user_fields, MODMNode),
                          allow_institution=True).count()

    while node_count < total:
        with transaction.atomic():
            for modm_node in MODMNode.find(
                    build_query(fk_user_fields, MODMNode),
                    allow_institution=True).sort(
                        '-date_modified')[node_count:node_count + page_size]:
                django_node = Node.objects.get(_guid__guid=modm_node._id)
                for fk_user_field in fk_user_fields:
                    value = getattr(modm_node, fk_user_field, None)
                    if value is not None:
                        if isinstance(value, basestring):
                            # value is a guid, try the cache table for the pk
                            if value in modm_to_django:
                                setattr(django_node,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value).pk
                                setattr(django_node,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value] = user_id
                                cache_misses += 1
                        elif isinstance(value, MODMUser):
                            # value is a node object, try the cache table for the pk
                            if value._id in modm_to_django:
                                setattr(django_node,
                                        '{}_id'.format(fk_user_field),
                                        modm_to_django[value._id])
                                cache_hits += 1
                            else:
                                # it's not in the cache, do the query
                                user_id = User.objects.get(
                                    _guid__guid=value._id).pk
                                setattr(django_node,
                                        '{}_id'.format(fk_user_field), user_id)
                                # save for later
                                modm_to_django[value._id] = user_id
                                cache_misses += 1
                        else:
                            # that's odd.
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            import bpdb
                            bpdb.set_trace()
                        fk_count += 1
                django_node.save()
                node_count += 1
                if node_count % page_size == 0 or node_count == total:
                    print 'Through {} nodes and {} foreign keys'.format(
                        node_count, fk_count)
                    print 'Cache: Hits {} Misses {}'.format(
                        cache_hits, cache_misses)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 47
0
def nodes_since(user, date):
    return Node.find(
        Q('creator', 'eq', user._id) &
        Q('date_created', 'gt', date)
    )
Exemplo n.º 48
0
def find_templated_orphans():
    return Node.find(
        Q('template_node', 'ne', None) &
        Q('category', 'ne', 'project') &
        Q('__backrefs.parent.node.nodes.0', 'exists', False)
    )
Exemplo n.º 49
0
def find_candidate_parents(node):
    return Node.find(
        Q('logs', 'eq', node.logs[0]._id) &
        Q('is_fork', 'eq', node.is_fork) &
        Q('is_registration', 'eq', node.is_registration)
    )
Exemplo n.º 50
0
 def get_queryset(self):
     query = (Q('is_registration', 'eq', True))
     return Node.find(query).sort(self.ordering)
Exemplo n.º 51
0
 def get_queryset(self):
     query = (Q('spam_status', 'eq', self.SPAM_STATE))
     return Node.find(query).sort(self.ordering)
Exemplo n.º 52
0
def get_folders():
    return Node.find(
        Q('is_folder', 'eq', True) & Q('is_dashboard', 'ne', True)
        & Q('is_deleted', 'ne', True))
Exemplo n.º 53
0
from website.app import init_app


logger = logging.getLogger(__name__)


if __name__ == '__main__':
    """This script will set the analytics read keys for all public nodes. Requires a valid
    keen master key in settings.KEEN['public']['master_key']. Generated keys are stable
    between runs for the same master key.
    """

    init_app(routes=False)

    public_nodes = Node.find(
        Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)
    )
    total = len(public_nodes)
    logger.info('Adding keen.io read keys to {} public nodes'.format(total))
    count = 0
    for public_node in public_nodes:
        count +=1
        if not count % 10:
            logger.info(' Updating node {} of {}.'.format(count, total))
        public_node.keenio_read_key = public_node.generate_keenio_read_key()
        public_node.save()

    logger.info('Done! {} nodes updated.'.format(count))

    logger.info('Verifying...')
    nodes_with_keen_keys = Node.find(
Exemplo n.º 54
0
def set_user_many_to_many_on_nodes(page_size=5000):
    print 'Starting {}...'.format(sys._getframe().f_code.co_name)
    node_count = 0
    m2m_count = 0
    start = datetime.now()
    total = MODMNode.find(build_query(m2m_user_fields, MODMNode),
                          allow_institution=True).count()
    print '{} Nodes'.format(total)
    while node_count < total:
        with transaction.atomic():
            for modm_node in MODMNode.find(
                    build_query(m2m_user_fields, MODMNode),
                    allow_institution=True).sort(
                        '-date_modified')[node_count:page_size + node_count]:
                django_node = Node.objects.get(
                    pk=modm_to_django[modm_node._id])
                for m2m_user_field in m2m_user_fields:
                    if m2m_user_field in ['permissions', 'recently_added']:
                        continue
                    attr = getattr(django_node, m2m_user_field)
                    django_pks = []
                    for modm_m2m_value in getattr(modm_node, m2m_user_field,
                                                  []):
                        if isinstance(modm_m2m_value, MODMUser):
                            if m2m_user_field == 'contributors':
                                visible = modm_m2m_value._id in modm_node.visible_contributor_ids
                                admin = 'admin' in modm_node.permissions[
                                    modm_m2m_value._id]
                                read = 'read' in modm_node.permissions[
                                    modm_m2m_value._id]
                                write = 'write' in modm_node.permissions[
                                    modm_m2m_value._id]

                                Contributor.objects.get_or_create(
                                    user_id=modm_to_django[modm_m2m_value._id],
                                    node=django_node,
                                    visible=visible,
                                    admin=admin,
                                    read=read,
                                    write=write)
                                m2m_count += 1
                            else:
                                django_pks.append(
                                    modm_to_django[modm_m2m_value._id])
                        elif isinstance(modm_m2m_value, basestring):
                            if m2m_user_field == 'contributors':
                                visible = modm_m2m_value in modm_node.visible_contributor_ids
                                admin = 'admin' in modm_node.permissions[
                                    modm_m2m_value]
                                read = 'read' in modm_node.permissions[
                                    modm_m2m_value]
                                write = 'write' in modm_node.permissions[
                                    modm_m2m_value]
                                Contributor.objects.get_or_create(
                                    user_id=modm_to_django[modm_m2m_value],
                                    node=django_node,
                                    visible=visible,
                                    admin=admin,
                                    read=read,
                                    write=write)
                                m2m_count += 1
                            else:
                                django_pks.append(
                                    modm_to_django[modm_m2m_value])
                        else:
                            # wth
                            print '\a'  # bells
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            print '\a'
                            import bpdb
                            bpdb.set_trace()

                    if len(django_pks) > 0:
                        attr.add(*django_pks)
                    m2m_count += len(django_pks)
                node_count += 1
                if node_count % page_size == 0 or node_count == total:
                    print 'Through {} nodes and {} m2m'.format(
                        node_count, m2m_count)
    print 'Done with {} in {} seconds...'.format(
        sys._getframe().f_code.co_name,
        (datetime.now() - start).total_seconds())
Exemplo n.º 55
0
 def get_queryset(self):
     nodes = Node.find(self.get_query_from_request())
     # TODO: Rearchitect how `.is_preprint` is determined,
     # so that a query that is guaranteed to return only
     # preprints can be contructed. Use generator in meantime.
     return (node for node in nodes if node.is_preprint)
Exemplo n.º 56
0
def search_projects_by_title(**kwargs):
    """ Search for nodes by title. Can pass in arguments from the URL to modify the search
    :arg term: The substring of the title.
    :arg category: Category of the node.
    :arg isDeleted: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg isFolder: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg isRegistration: yes, no, or either. Either will not add a qualifier for that argument in the search.
    :arg includePublic: yes or no. Whether the projects listed should include public projects.
    :arg includeContributed: yes or no. Whether the search should include projects the current user has
        contributed to.
    :arg ignoreNode: a list of nodes that should not be included in the search.
    :return: a list of dictionaries of projects

    """
    # TODO(fabianvf): At some point, it would be nice to do this with elastic search
    user = kwargs['auth'].user

    term = request.args.get('term', '')
    max_results = int(request.args.get('maxResults', '10'))
    category = request.args.get('category', 'project').lower()
    is_deleted = request.args.get('isDeleted', 'no').lower()
    is_collection = request.args.get('isFolder', 'no').lower()
    is_registration = request.args.get('isRegistration', 'no').lower()
    include_public = request.args.get('includePublic', 'yes').lower()
    include_contributed = request.args.get('includeContributed', 'yes').lower()
    ignore_nodes = request.args.getlist('ignoreNode', [])

    matching_title = (
        Q('title', 'icontains', term) &  # search term (case insensitive)
        Q('category', 'eq', category)  # is a project
    )

    matching_title = conditionally_add_query_item(matching_title, 'is_deleted',
                                                  is_deleted)
    matching_title = conditionally_add_query_item(matching_title,
                                                  'is_collection',
                                                  is_collection)
    matching_title = conditionally_add_query_item(matching_title,
                                                  'is_registration',
                                                  is_registration)

    if len(ignore_nodes) > 0:
        for node_id in ignore_nodes:
            matching_title = matching_title & Q('_id', 'ne', node_id)

    my_projects = []
    my_project_count = 0
    public_projects = []

    if include_contributed == 'yes':
        my_projects = Node.find(
            matching_title
            & Q('contributors', 'eq', user._id)  # user is a contributor
        ).limit(max_results)
        my_project_count = my_project_count

    if my_project_count < max_results and include_public == 'yes':
        public_projects = Node.find(matching_title
                                    & Q('is_public', 'eq', True)  # is public
                                    ).limit(max_results - my_project_count)

    results = list(my_projects) + list(public_projects)
    ret = process_project_search_results(results, **kwargs)
    return ret
Exemplo n.º 57
0
 def check(self):
     ProjectFactory()
     self.__class__.ntest_calls += 1  # a little goofy, yes; each test gets its own instance
     nexpected = self.__class__.ntest_calls if self.nexpected == 'ntest_calls' else 1
     assert_equal(nexpected,
                  len(Node.find(Q('is_bookmark_collection', 'eq', False))))
Exemplo n.º 58
0
def add_poster_by_email(conf,
                        recipient,
                        address,
                        fullname,
                        subject,
                        message,
                        attachments,
                        tags=None,
                        system_tags=None,
                        is_spam=False):

    # Fail if no attachments
    if not attachments:
        send_mail(
            address,
            CONFERENCE_FAILED,
            fullname=fullname,
        )
        return

    # Use address as name if name missing
    fullname = fullname or address.split('@')[0]

    created = []

    user, user_created = get_or_create_user(fullname, address, is_spam)

    if user_created:
        created.append(user)
        set_password_url = web_url_for(
            'reset_password',
            verification_key=user.verification_key,
        )
    else:
        set_password_url = None

    auth = Auth(user=user)

    # Find or create node
    node = Node.find(Q('title', 'iexact', subject))
    node = node[0] if node.count() else None
    if node is None or not node.is_contributor(user):
        node = new_node('project', subject, user)
        created.append(node)

    # Add admin to project
    if conf.admins:
        for admin in conf.admins:
            node.add_contributor(contributor=admin,
                                 visible=False,
                                 log=False,
                                 save=True)

    # Make public if confident that this is not spam and projects made public
    if is_spam:
        logger.warn('Possible spam detected in email modification of '
                    'user {0} / node {1}'.format(
                        user._id,
                        node._id,
                    ))
    elif conf.public_projects:
        node.set_privacy('public', auth=auth)

    # Add body
    node.update_node_wiki('home', sanitize(message), auth)

    # Add tags
    presentation_type = 'talk' if 'talk' in recipient else 'poster'

    tags = tags or []
    tags.append(presentation_type)
    for tag in tags:
        node.add_tag(tag, auth=auth)

    # Add system tags
    system_tags = system_tags or []
    system_tags.append(presentation_type)
    system_tags.append('emailed')
    if is_spam:
        system_tags.append('spam')
    for tag in system_tags:
        if tag not in node.system_tags:
            node.system_tags.append(tag)

    # Save changes
    node.save()

    from website.addons.osfstorage import utils as storage_utils

    # Add files
    for attachment in attachments:
        name, content, content_type, size = prepare_file(attachment)
        upload_url = storage_utils.get_upload_url(node, user, size,
                                                  content_type, name)
        requests.put(
            upload_url,
            data=content,
            headers={'Content-Type': content_type},
        )

    download_url = node.web_url_for(
        'osf_storage_view_file',
        path=attachments[0].filename,
        action='download',
    )

    # Add mail record
    mail_record = MailRecord(
        data=request_to_data(),
        records=created,
    )
    mail_record.save()

    # Send confirmation email
    send_mail(
        address,
        CONFERENCE_SUBMITTED,
        conf_full_name=conf.name,
        conf_view_url=urlparse.urljoin(settings.DOMAIN,
                                       os.path.join('view', conf.endpoint)),
        fullname=fullname,
        user_created=user_created,
        set_password_url=set_password_url,
        profile_url=user.absolute_url,
        node_url=urlparse.urljoin(settings.DOMAIN, node.url),
        file_url=urlparse.urljoin(settings.DOMAIN, download_url),
        presentation_type=presentation_type,
        is_spam=is_spam,
    )
Exemplo n.º 59
0
 def get_queryset(self):
     query = self.get_query_from_request()
     return Node.find(query)
Exemplo n.º 60
0
 def get_queryset(self):
     return Node.find(self.get_query_from_request())