def search_projects_by_title(**kwargs): """ Search for nodes by title. Can pass in arguments from the URL to modify the search :arg term: The substring of the title. :arg category: Category of the node. :arg isDeleted: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg isFolder: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg isRegistration: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg includePublic: yes or no. Whether the projects listed should include public projects. :arg includeContributed: yes or no. Whether the search should include projects the current user has contributed to. :arg ignoreNode: a list of nodes that should not be included in the search. :return: a list of dictionaries of projects """ # TODO(fabianvf): At some point, it would be nice to do this with elastic search user = kwargs['auth'].user term = request.args.get('term', '') max_results = int(request.args.get('maxResults', '10')) category = request.args.get('category', 'project').lower() is_deleted = request.args.get('isDeleted', 'no').lower() is_folder = request.args.get('isFolder', 'no').lower() is_registration = request.args.get('isRegistration', 'no').lower() include_public = request.args.get('includePublic', 'yes').lower() include_contributed = request.args.get('includeContributed', 'yes').lower() ignore_nodes = request.args.getlist('ignoreNode', []) matching_title = ( Q('title', 'icontains', term) & # search term (case insensitive) Q('category', 'eq', category) # is a project ) matching_title = conditionally_add_query_item(matching_title, 'is_deleted', is_deleted) matching_title = conditionally_add_query_item(matching_title, 'is_folder', is_folder) matching_title = conditionally_add_query_item(matching_title, 'is_registration', is_registration) if len(ignore_nodes) > 0: for node_id in ignore_nodes: matching_title = matching_title & Q('_id', 'ne', node_id) my_projects = [] my_project_count = 0 public_projects = [] if include_contributed == "yes": my_projects = Node.find( matching_title & Q('contributors', 'eq', user._id) # user is a contributor ).limit(max_results) my_project_count = my_project_count if my_project_count < max_results and include_public == "yes": public_projects = Node.find( matching_title & Q('is_public', 'eq', True) # is public ).limit(max_results - my_project_count) results = list(my_projects) + list(public_projects) ret = process_project_search_results(results, **kwargs) return ret
def set_tag_many_to_many_on_nodes(page_size=10000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) node_count = 0 m2m_count = 0 start = datetime.now() total = MODMNode.find(build_query(m2m_tag_fields, MODMNode)).count() print '{} Nodes'.format(total) while node_count < total: with transaction.atomic(): for modm_node in MODMNode.find(build_query( m2m_tag_fields, MODMNode)).sort('-date_modified')[ node_count:page_size + node_count]: django_node = Node.objects.get( pk=modm_to_django[modm_node._id]) for m2m_tag_field in m2m_tag_fields: try: attr = getattr(django_node, m2m_tag_field) except AttributeError as ex: # node field doesn't exist on node pass else: # node field exists, do the stuff django_pks = [] for modm_m2m_value in getattr(modm_node, m2m_tag_field, []): suffix = 'system' if m2m_tag_field == 'system_tags' else 'not_system' if isinstance(modm_m2m_value, MODMTag): django_pks.append(modm_to_django[ '{}:{}'.format(modm_m2m_value._id, suffix)]) elif isinstance(modm_m2m_value, basestring): django_pks.append(modm_to_django[ '{}:{}'.format(modm_m2m_value, suffix)]) elif modm_m2m_value is None: print 'Tag of None found on Node {}'.format( modm_node._id) else: # wth print '\a' # bells! print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() if len(django_pks) > 0: attr.add(*django_pks) m2m_count += len(django_pks) node_count += 1 if node_count % page_size == 0 or node_count == total: print 'Through {} nodes and {} m2m'.format(node_count, m2m_count) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def set_node_many_to_many_on_nodes(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) node_count = 0 m2m_count = 0 start = datetime.now() total = MODMNode.find( build_query(m2m_node_fields, MODMNode), allow_institution=True).count() print '{} Nodes'.format(total) while node_count < total: with transaction.atomic(): for modm_node in MODMNode.find( build_query(m2m_node_fields, MODMNode), allow_institution=True).sort('-date_modified')[ node_count:page_size + node_count]: django_node = Node.objects.get( pk=modm_to_django[modm_node._id]) for m2m_node_field in m2m_node_fields: attr = getattr(django_node, m2m_node_field) django_pks = [] for modm_m2m_value in getattr(modm_node, m2m_node_field, []): if isinstance(modm_m2m_value, MODMNode): django_pks.append(modm_to_django[ modm_m2m_value._id]) elif isinstance(modm_m2m_value, basestring): django_pks.append(modm_to_django[modm_m2m_value]) elif isinstance(modm_m2m_value, Pointer): django_pks.append(modm_to_django[ modm_m2m_value.node._id]) else: # wth print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() if len(django_pks) > 0: attr.add(*django_pks) m2m_count += len(django_pks) node_count += 1 if node_count % page_size == 0 or node_count == total: print 'Through {} nodes and {} m2m'.format(node_count, m2m_count) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def get_queryset(self): # For bulk requests, queryset is formed from request body. if is_bulk_request(self.request): query = Q("_id", "in", [node["id"] for node in self.request.data]) auth = get_user_auth(self.request) nodes = Node.find(query) for node in nodes: if not node.can_edit(auth): raise PermissionDenied return nodes else: query = self.get_query_from_request() return Node.find(query)
def save_bare_nodes(page_size=20000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) count = 0 start = datetime.now() total = MODMNode.find(allow_institution=True).count() while count < total: with transaction.atomic(): nids = [] for modm_node in MODMNode.find( allow_institution=True).sort('-date_modified')[ count:count + page_size]: guid = Guid.objects.get(guid=modm_node._id) node_fields = dict(_guid_id=guid.pk, **modm_node.to_storage()) # remove fields not yet implemented cleaned_node_fields = {key: node_fields[key] for key in node_fields if key not in node_key_blacklist} # make datetimes not naive for k, v in cleaned_node_fields.iteritems(): if isinstance(v, datetime): cleaned_node_fields[k] = pytz.utc.localize(v) # remove null fields, postgres hate null fields cleaned_node_fields = {k: v for k, v in cleaned_node_fields.iteritems() if v is not None} nids.append(Node(**cleaned_node_fields)) count += 1 if count % page_size == 0 or count == total: then = datetime.now() print 'Saving nodes {} through {}...'.format( count - page_size, count) woot = Node.objects.bulk_create(nids) for wit in woot: modm_to_django[wit._guid.guid] = wit.pk now = datetime.now() print 'Done with {} nodes in {} seconds...'.format( len(woot), (now - then).total_seconds()) nids = [] trash = gc.collect() print 'Took out {} trashes'.format(trash) print 'Modm Nodes: {}'.format(total) print 'django Nodes: {}'.format(Node.objects.all().count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def save_bare_system_tags(page_size=10000): print 'Starting save_bare_system_tags...' start = datetime.now() things = list(MODMNode.find(MQ('system_tags', 'ne', [])).sort( '-_id')) + list(MODMUser.find(MQ('system_tags', 'ne', [])).sort( '-_id')) system_tag_ids = [] for thing in things: for system_tag in thing.system_tags: system_tag_ids.append(system_tag) unique_system_tag_ids = set(system_tag_ids) total = len(unique_system_tag_ids) system_tags = [] for system_tag_id in unique_system_tag_ids: system_tags.append(Tag(_id=system_tag_id, lower=system_tag_id.lower(), system=True)) woot = Tag.objects.bulk_create(system_tags) print 'MODM System Tags: {}'.format(total) print 'django system tags: {}'.format(Tag.objects.filter(system= True).count()) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def get_projects_public(): projects_public = Node.find( Q('parent_node', 'eq', None) & Q('is_public', 'eq', True) & CONTENT_NODE_QUERY ) return projects_public
def get_projects_forked(): projects_forked = Node.find( Q('parent_node', 'eq', None) & Q('is_fork', 'eq', True) & CONTENT_NODE_QUERY ) return projects_forked
def conference_submissions(**kwargs): """Return data for all OSF4M submissions. The total number of submissions for each meeting is calculated and cached in the Conference.num_submissions field. """ submissions = [] # TODO: Revisit this loop, there has to be a way to optimize it for conf in Conference.find(): # For efficiency, we filter by tag first, then node # instead of doing a single Node query projects = set() tags = Tag.find(Q('lower', 'eq', conf.endpoint.lower())).get_keys() nodes = Node.find( Q('tags', 'in', tags) & Q('is_public', 'eq', True) & Q('is_deleted', 'ne', True) ) projects.update(list(nodes)) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = len(projects) # Cache the number of submissions conf.num_submissions = num_submissions conf.save() if num_submissions < settings.CONFERENCE_MIN_COUNT: continue submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) return {'submissions': submissions}
def get_projects(): # This count includes projects, forks, and registrations projects = Node.find( Q('parent_node', 'eq', None) & CONTENT_NODE_QUERY ) return projects
def get_projects(): projects = Node.find( Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False) & Q('is_folder', 'ne', True) ) return projects
def conference_view(**kwargs): meetings = [] submissions = [] for conf in Conference.find(): query = ( Q('tags', 'iexact', conf.endpoint) & Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) ) projects = Node.find(query) for idx, node in enumerate(projects): submissions.append(_render_conference_node(node, idx, conf)) num_submissions = projects.count() if num_submissions < settings.CONFERNCE_MIN_COUNT: continue meetings.append({ 'name': conf.name, 'active': conf.active, 'url': web_url_for('conference_results', meeting=conf.endpoint), 'count': num_submissions, }) submissions.sort(key=lambda submission: submission['dateCreated'], reverse=True) meetings.sort(key=lambda meeting: meeting['count'], reverse=True) return {'meetings': meetings, 'submissions': submissions}
def test_bookmark_collection_not_counted(self): BookmarkCollectionFactory(creator=self.user) all_node_count = Node.find().count() results = AddonSnapshot().get_events() storage_res = [res for res in results if res["provider"]["name"] == "osfstorage"][0] assert_equal(storage_res["nodes"]["connected"], all_node_count - 1)
def migrate_registrations(): PREREG_CHALLENGE_METASCHEMA = get_prereg_schema() registrations = Node.find( Q('is_registration', 'eq', True) & Q('registered_schema', 'eq', PREREG_CHALLENGE_METASCHEMA) ) count = 0 for reg in registrations: data = reg.registered_meta[PREREG_CHALLENGE_METASCHEMA._id] migrated = False logger.debug('Reading preregistration with id: {0}'.format(reg._id)) for question in data.values(): if isinstance(question.get('value'), dict): for value in question['value'].values(): migrated_one = migrate_file_meta(value) if migrated_one and not migrated: migrated = True else: migrated_one = migrate_file_meta(question) if migrated_one and not migrated: migrated = True if migrated: reg.save() count += 1 logger.info('Migrated preregistration with id: {0}'.format(reg._id)) logger.info('Done with {0} prereg registrations migrated.'.format(count))
def get_projects_registered(): projects_registered = Node.find( Q('parent_node', 'eq', None) & Q('is_registration', 'eq', True) & CONTENT_NODE_QUERY ) return projects_registered
def get_projects_registered(): projects_registered = Node.find( Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False) & Q('is_folder', 'ne', True) & Q('is_registration', 'eq', True) ) return projects_registered
def find_file_mismatch_nodes(): """Find nodes with inconsistent `files_current` and `files_versions` field keys. """ return [ node for node in Node.find() if set(node.files_versions.keys()) != set(node.files_current.keys()) ]
def get_projects_public(): projects_public = Node.find( Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False) & Q('is_collection', 'ne', True) & Q('is_public', 'eq', True) ) return projects_public
def get_projects_forked(): projects_forked = list(Node.find( Q('category', 'eq', 'project') & Q('is_deleted', 'eq', False) & Q('is_collection', 'ne', True) & Q('is_fork', 'eq', True) )) return projects_forked
def migrate_nodes(index): logger.info("Migrating nodes to index: {}".format(index)) n_iter = 0 nodes = Node.find(Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)) for node in nodes: search.update_node(node, index=index) n_iter += 1 logger.info('Nodes migrated: {}'.format(n_iter))
def get_targets(): return Node.find( ( (Q('registered_from', 'ne', None) & Q('logs', 'eq', [])) | Q('forked_from', 'ne', None) ) & Q('is_deleted', 'ne', True) & Q('system_tags', 'ne', SYSTEM_TAG) )
def main(): total = MODMNode.find().count() page_size = 1000 count = 0 print 'Doing {} Nodes...'.format(total) while count < total: for modm_node in MODMNode.find()[count:count+page_size]: noooood = get_or_create_node(modm_node) count += 1 if count % page_size == 0: print 'Count: {}'.format(count) print 'Nodes: {}, Users: {}, Tags: {}'.format(nodes, users, tags) garbages = gc.collect() print 'Took out {} trashes.'.format(garbages) print 'MODM: {}'.format(total) print 'PG: {}'.format(count)
def get_nodes(): forked = Q('__backrefs.forked.node.forked_from', 'ne', None) registered = Q('__backrefs.registrations.node.registered_from', 'ne', None) templated = Q('__backrefs.template_node.node.template_node', 'ne', None) duplicate = (forked | registered | templated) return Node.find( duplicate and Q('date_created', 'lt', datetime.datetime(2014, 10, 31)) )
def get_queryset(self): nodes = Node.find(self.get_query_from_request()) preprints = [] # TODO [OSF-7090]: Rearchitect how `.is_preprint` is determined, # so that a query that is guaranteed to return only # preprints can be constructed. for node in nodes: for preprint in node.preprints: preprints.append(preprint) return preprints
def conference_data(meeting): try: conf = Conference.find_one(Q("endpoint", "iexact", meeting)) except ModularOdmException: raise HTTPError(httplib.NOT_FOUND) nodes = Node.find(Q("tags", "iexact", meeting) & Q("is_public", "eq", True) & Q("is_deleted", "eq", False)) ret = [_render_conference_node(each, idx, conf) for idx, each in enumerate(nodes)] return ret
def get_queryset(self): current_user = self.request.user if current_user.is_anonymous(): auth = Auth(None) else: auth = Auth(current_user) query = self.get_query_from_request() raw_nodes = Node.find(self.get_default_odm_query() & query) nodes = [each for each in raw_nodes if each.is_public or each.can_view(auth)] return nodes
def find(cls, query=None, **kwargs): from website.models import Node # done to prevent import error if query and getattr(query, 'nodes', False): for node in query.nodes: replacement_attr = cls.attribute_map.get(node.attribute, False) node.attribute = replacement_attr or node.attribute elif isinstance(query, RawQuery): replacement_attr = cls.attribute_map.get(query.attribute, False) query.attribute = replacement_attr or query.attribute query = query & Q('institution_id', 'ne', None) if query else Q('institution_id', 'ne', None) nodes = Node.find(query, allow_institution=True, **kwargs) return InstitutionQuerySet(nodes)
def test_register_draft_without_embargo_creates_registration_approval(self, mock_enqueue): res = self.app.post( self.project.api_url_for('register_draft_registration', draft_id=self.draft._id), self.valid_make_public_payload, content_type='application/json', auth=self.user.auth ) assert_equal(res.status_code, 202) registration = Node.find().sort('-registered_date')[0] assert_true(registration.is_registration) assert_not_equal(registration.registration_approval, None)
def test_POST_register_make_public_immediately_creates_registration_approval(self, mock_enqueue): res = self.app.post( self.project.api_url_for("node_register_template_page_post", template=u"Open-Ended_Registration"), self.valid_make_public_payload, content_type="application/json", auth=self.user.auth, ) assert_equal(res.status_code, 201) registration = Node.find().sort("-registered_date")[0] assert_true(registration.is_registration) assert_not_equal(registration.registration_approval, None)
def migrate_nodes(index): logger.info('Migrating nodes to index: {}'.format(index)) query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) total = Node.find(query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=query, increment=increment, each=False) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) Node._clear_caches() logger.info('Nodes migrated: {}'.format(total))
def search_node(auth, **kwargs): """ """ # Get arguments node = Node.load(request.json.get('nodeId')) include_public = request.json.get('includePublic') size = float(request.json.get('size', '5').strip()) page = request.json.get('page', 0) query = request.json.get('query', '').strip() start = (page * size) if not query: return {'nodes': []} # Build ODM query title_query = Q('title', 'icontains', query) not_deleted_query = Q('is_deleted', 'eq', False) visibility_query = Q('contributors', 'eq', auth.user) no_folders_query = Q('is_folder', 'eq', False) if include_public: visibility_query = visibility_query | Q('is_public', 'eq', True) odm_query = title_query & not_deleted_query & visibility_query & no_folders_query # Exclude current node from query if provided if node: nin = [node._id] + node.node_ids odm_query = (odm_query & Q('_id', 'nin', nin)) nodes = Node.find(odm_query) count = nodes.count() pages = math.ceil(count / size) validate_page_num(page, pages) return { 'nodes': [ _serialize_node_search(each) for each in islice(nodes, start, start + size) if each.contributors ], 'total': count, 'pages': pages, 'page': page }
def migrate_registrations_metadata_key(schema): """ Finds Veer registrations whose registered_meta has an undefined key and corrects. """ registrations = Node.find(Q('is_registration', 'eq', True) & Q('registered_schema', 'eq', schema)) total_reg = registrations.count() logger.info('Examining {} registrations for improper key'.format(total_reg)) reg_count = 0 for reg in registrations: reg_count += 1 if reg.registered_meta.get(schema._id, {}).get('recommended-methods', {}).get('value', {}).get('undefined', {}): reg.registered_meta[schema._id]['recommended-methods']['value']['procedure'] = \ reg.registered_meta[schema._id]['recommended-methods']['value'].pop('undefined') reg.save() logger.info('{}/{} Migrated key for {}'.format(reg_count, total_reg, reg._id)) else: logger.info('{}/{} Key already correct for {}. No change.'.format(reg_count, total_reg, reg._id))
def migrate(dry=True): registrations = Node.find( Q('is_registration', 'eq', True) & Q('registered_meta', 'ne', None)) regs_migrated, reg_errored = migrate_extras(registrations, dry=dry) drafts = DraftRegistration.find(Q('registration_metadata', 'ne', {})) drafts_migrated, drafts_errored = migrate_extras(drafts, dry=dry) logger.info('Migrated registered_meta for {} registrations'.format( len(regs_migrated))) if reg_errored: logger.error('{} errored: {}'.format(len(reg_errored), reg_errored)) logger.info('Migrated registered_meta for {} draft registrations'.format( len(drafts_migrated))) if drafts_errored: logger.error('{} errored: {}'.format(len(drafts_errored), drafts_errored))
def test_POST_register_embargo_does_not_make_project_or_children_public(self, mock_enqueue): self.project.is_public = True self.project.save() component = NodeFactory( creator=self.user, parent=self.project, title='Component', is_public=True ) subproject = ProjectFactory( creator=self.user, parent=self.project, title='Subproject', is_public=True ) subproject_component = NodeFactory( creator=self.user, parent=subproject, title='Subcomponent', is_public=True ) res = self.app.post( self.project.api_url_for('register_draft_registration', draft_id=self.draft._id), self.valid_embargo_payload, content_type='application/json', auth=self.user.auth ) self.project.reload() assert_equal(res.status_code, 202) assert_equal(res.json['urls']['registrations'], self.project.web_url_for('node_registrations')) # Last node directly registered from self.project registration = Node.find( Q('registered_from', 'eq', self.project) ).sort('-registered_date')[0] assert_true(registration.is_registration) assert_false(registration.is_public) assert_true(registration.is_pending_embargo_for_existing_registration) assert_is_not_none(registration.embargo) for node in registration.get_descendants_recursive(): assert_true(node.is_registration) assert_false(node.is_public)
def main(): init_app(set_backends=True, routes=False) staff_registrations = defaultdict(list) users = [User.load(each) for each in STAFF_GUIDS] for registration in Node.find( Q('is_registration', 'eq', True) & Q('is_public', 'eq', True)): for user in users: if registration in user.node__contributed: staff_registrations[user._id].append(registration) for uid in staff_registrations: user = User.load(uid) user_regs = staff_registrations[uid] logger.info('{} ({}) on {} Public Registrations:'.format( user.fullname, user._id, len(user_regs))) for registration in user_regs: logger.info('\t{} ({}): {}'.format(registration.title, registration._id, registration.absolute_url))
def migrate_nodes(index, query=None): logger.info('Migrating nodes to index: {}'.format(index)) node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) if query: node_query = query & node_query total = Node.find(node_query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=node_query, increment=increment, each=False, include=['contributor__user__guids']) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) logger.info('Nodes migrated: {}'.format(total))
def migrate(): PREREG_CHALLENGE_METASCHEMA = get_prereg_schema() registrations = Node.find( Q('is_registration', 'eq', True) & Q('registered_schema', 'eq', PREREG_CHALLENGE_METASCHEMA)) count = 0 for reg in registrations: data = reg.registered_meta[PREREG_CHALLENGE_METASCHEMA._id] migrated = False for question in data.values(): if isinstance(question.get('value'), dict): for value in question['value'].values(): migrated = migrate_file_meta(value) else: migrated = migrate_file_meta(question) reg.save() if migrated: count += 1 logger.info('Migrated preregistration with id: {0}'.format( reg._id)) logger.info('Done with {0} preregistrations migrated.'.format(count))
def conference_view(**kwargs): meetings = [] for conf in Conference.find(): query = ( Q('tags', 'iexact', conf.endpoint) & Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) ) projects = Node.find(query) submissions = projects.count() if submissions < settings.CONFERNCE_MIN_COUNT: continue meetings.append({ 'name': conf.name, 'active': conf.active, 'url': web_url_for('conference_results', meeting=conf.endpoint), 'submissions': submissions, }) meetings.sort(key=lambda meeting: meeting['submissions'], reverse=True) return {'meetings': meetings}
def search_node(**kwargs): """ """ # Get arguments auth = kwargs['auth'] node = Node.load(request.json.get('nodeId')) include_public = request.json.get('includePublic') query = request.json.get('query', '').strip() if not query: return {'nodes': []} # Build ODM query title_query = Q('title', 'icontains', query) not_deleted_query = Q('is_deleted', 'eq', False) visibility_query = Q('contributors', 'eq', auth.user) no_folders_query = Q('is_folder', 'eq', False) if include_public: visibility_query = visibility_query | Q('is_public', 'eq', True) odm_query = title_query & not_deleted_query & visibility_query & no_folders_query # Exclude current node from query if provided if node: nin = [node._id] + node.node_ids odm_query = ( odm_query & Q('_id', 'nin', nin) ) # TODO: Parameterize limit; expose pagination cursor = Node.find(odm_query).limit(20) return { 'nodes': [ _serialize_node_search(each) for each in cursor if each.contributors ] }
def get_queryset(self): # Overriding the default query parameters if the provider filter is present, because the provider is stored on # the PreprintService object, not the node itself filter_key = 'filter[provider]' provider_filter = None if filter_key in self.request.query_params: # Have to have this mutable so that the filter can be removed in the ODM query, otherwise it will return an # empty set self.request.GET._mutable = True provider_filter = self.request.query_params[filter_key] self.request.query_params.pop(filter_key) nodes = Node.find(self.get_query_from_request()) preprints = [] # TODO [OSF-7090]: Rearchitect how `.is_preprint` is determined, # so that a query that is guaranteed to return only # preprints can be constructed. for node in nodes: for preprint in node.preprints: if provider_filter is None or preprint.provider._id == provider_filter: preprints.append(preprint) return preprints
def get_broken_templated(): return ( node for node in Node.find(Q('template_node', 'ne', None)) if has_duplicate_piwik_id(node) )
def get_broken_forks(): return ( node for node in Node.find(Q('is_fork', 'eq', True)) if has_duplicate_piwik_id(node) )
def get_broken_registrations(): return ( node for node in Node.find(Q('is_registration', 'eq', True)) if has_duplicate_piwik_id(node) )
def get_targets(): return Node.find(Q('is_deleted', 'ne', True))
from website.app import init_app from website.models import Node, User from framework import Q from framework.analytics import piwik app = init_app('website.settings', set_backends=True) # NOTE: This is a naive implementation for migration, requiring a POST request # for every user and every node. It is possible to bundle these together in a # single request, but it would require duplication of logic and strict error # checking of the result. Doing it this way is idempotent, and allows any # exceptions raised to halt the process with a usable error message. for user in User.find(): if user.piwik_token: continue piwik.create_user(user) for node in Node.find( Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False)): if node.piwik_site_id: continue piwik._provision_node(node._id)
def set_user_foreign_keys_on_nodes(page_size=10000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) node_count = 0 fk_count = 0 cache_hits = 0 cache_misses = 0 start = datetime.now() total = MODMNode.find(build_query(fk_user_fields, MODMNode), allow_institution=True).count() while node_count < total: with transaction.atomic(): for modm_node in MODMNode.find( build_query(fk_user_fields, MODMNode), allow_institution=True).sort( '-date_modified')[node_count:node_count + page_size]: django_node = Node.objects.get(_guid__guid=modm_node._id) for fk_user_field in fk_user_fields: value = getattr(modm_node, fk_user_field, None) if value is not None: if isinstance(value, basestring): # value is a guid, try the cache table for the pk if value in modm_to_django: setattr(django_node, '{}_id'.format(fk_user_field), modm_to_django[value]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value).pk setattr(django_node, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value] = user_id cache_misses += 1 elif isinstance(value, MODMUser): # value is a node object, try the cache table for the pk if value._id in modm_to_django: setattr(django_node, '{}_id'.format(fk_user_field), modm_to_django[value._id]) cache_hits += 1 else: # it's not in the cache, do the query user_id = User.objects.get( _guid__guid=value._id).pk setattr(django_node, '{}_id'.format(fk_user_field), user_id) # save for later modm_to_django[value._id] = user_id cache_misses += 1 else: # that's odd. print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() fk_count += 1 django_node.save() node_count += 1 if node_count % page_size == 0 or node_count == total: print 'Through {} nodes and {} foreign keys'.format( node_count, fk_count) print 'Cache: Hits {} Misses {}'.format( cache_hits, cache_misses) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def nodes_since(user, date): return Node.find( Q('creator', 'eq', user._id) & Q('date_created', 'gt', date) )
def find_templated_orphans(): return Node.find( Q('template_node', 'ne', None) & Q('category', 'ne', 'project') & Q('__backrefs.parent.node.nodes.0', 'exists', False) )
def find_candidate_parents(node): return Node.find( Q('logs', 'eq', node.logs[0]._id) & Q('is_fork', 'eq', node.is_fork) & Q('is_registration', 'eq', node.is_registration) )
def get_queryset(self): query = (Q('is_registration', 'eq', True)) return Node.find(query).sort(self.ordering)
def get_queryset(self): query = (Q('spam_status', 'eq', self.SPAM_STATE)) return Node.find(query).sort(self.ordering)
def get_folders(): return Node.find( Q('is_folder', 'eq', True) & Q('is_dashboard', 'ne', True) & Q('is_deleted', 'ne', True))
from website.app import init_app logger = logging.getLogger(__name__) if __name__ == '__main__': """This script will set the analytics read keys for all public nodes. Requires a valid keen master key in settings.KEEN['public']['master_key']. Generated keys are stable between runs for the same master key. """ init_app(routes=False) public_nodes = Node.find( Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) ) total = len(public_nodes) logger.info('Adding keen.io read keys to {} public nodes'.format(total)) count = 0 for public_node in public_nodes: count +=1 if not count % 10: logger.info(' Updating node {} of {}.'.format(count, total)) public_node.keenio_read_key = public_node.generate_keenio_read_key() public_node.save() logger.info('Done! {} nodes updated.'.format(count)) logger.info('Verifying...') nodes_with_keen_keys = Node.find(
def set_user_many_to_many_on_nodes(page_size=5000): print 'Starting {}...'.format(sys._getframe().f_code.co_name) node_count = 0 m2m_count = 0 start = datetime.now() total = MODMNode.find(build_query(m2m_user_fields, MODMNode), allow_institution=True).count() print '{} Nodes'.format(total) while node_count < total: with transaction.atomic(): for modm_node in MODMNode.find( build_query(m2m_user_fields, MODMNode), allow_institution=True).sort( '-date_modified')[node_count:page_size + node_count]: django_node = Node.objects.get( pk=modm_to_django[modm_node._id]) for m2m_user_field in m2m_user_fields: if m2m_user_field in ['permissions', 'recently_added']: continue attr = getattr(django_node, m2m_user_field) django_pks = [] for modm_m2m_value in getattr(modm_node, m2m_user_field, []): if isinstance(modm_m2m_value, MODMUser): if m2m_user_field == 'contributors': visible = modm_m2m_value._id in modm_node.visible_contributor_ids admin = 'admin' in modm_node.permissions[ modm_m2m_value._id] read = 'read' in modm_node.permissions[ modm_m2m_value._id] write = 'write' in modm_node.permissions[ modm_m2m_value._id] Contributor.objects.get_or_create( user_id=modm_to_django[modm_m2m_value._id], node=django_node, visible=visible, admin=admin, read=read, write=write) m2m_count += 1 else: django_pks.append( modm_to_django[modm_m2m_value._id]) elif isinstance(modm_m2m_value, basestring): if m2m_user_field == 'contributors': visible = modm_m2m_value in modm_node.visible_contributor_ids admin = 'admin' in modm_node.permissions[ modm_m2m_value] read = 'read' in modm_node.permissions[ modm_m2m_value] write = 'write' in modm_node.permissions[ modm_m2m_value] Contributor.objects.get_or_create( user_id=modm_to_django[modm_m2m_value], node=django_node, visible=visible, admin=admin, read=read, write=write) m2m_count += 1 else: django_pks.append( modm_to_django[modm_m2m_value]) else: # wth print '\a' # bells print '\a' print '\a' print '\a' print '\a' print '\a' print '\a' import bpdb bpdb.set_trace() if len(django_pks) > 0: attr.add(*django_pks) m2m_count += len(django_pks) node_count += 1 if node_count % page_size == 0 or node_count == total: print 'Through {} nodes and {} m2m'.format( node_count, m2m_count) print 'Done with {} in {} seconds...'.format( sys._getframe().f_code.co_name, (datetime.now() - start).total_seconds())
def get_queryset(self): nodes = Node.find(self.get_query_from_request()) # TODO: Rearchitect how `.is_preprint` is determined, # so that a query that is guaranteed to return only # preprints can be contructed. Use generator in meantime. return (node for node in nodes if node.is_preprint)
def search_projects_by_title(**kwargs): """ Search for nodes by title. Can pass in arguments from the URL to modify the search :arg term: The substring of the title. :arg category: Category of the node. :arg isDeleted: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg isFolder: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg isRegistration: yes, no, or either. Either will not add a qualifier for that argument in the search. :arg includePublic: yes or no. Whether the projects listed should include public projects. :arg includeContributed: yes or no. Whether the search should include projects the current user has contributed to. :arg ignoreNode: a list of nodes that should not be included in the search. :return: a list of dictionaries of projects """ # TODO(fabianvf): At some point, it would be nice to do this with elastic search user = kwargs['auth'].user term = request.args.get('term', '') max_results = int(request.args.get('maxResults', '10')) category = request.args.get('category', 'project').lower() is_deleted = request.args.get('isDeleted', 'no').lower() is_collection = request.args.get('isFolder', 'no').lower() is_registration = request.args.get('isRegistration', 'no').lower() include_public = request.args.get('includePublic', 'yes').lower() include_contributed = request.args.get('includeContributed', 'yes').lower() ignore_nodes = request.args.getlist('ignoreNode', []) matching_title = ( Q('title', 'icontains', term) & # search term (case insensitive) Q('category', 'eq', category) # is a project ) matching_title = conditionally_add_query_item(matching_title, 'is_deleted', is_deleted) matching_title = conditionally_add_query_item(matching_title, 'is_collection', is_collection) matching_title = conditionally_add_query_item(matching_title, 'is_registration', is_registration) if len(ignore_nodes) > 0: for node_id in ignore_nodes: matching_title = matching_title & Q('_id', 'ne', node_id) my_projects = [] my_project_count = 0 public_projects = [] if include_contributed == 'yes': my_projects = Node.find( matching_title & Q('contributors', 'eq', user._id) # user is a contributor ).limit(max_results) my_project_count = my_project_count if my_project_count < max_results and include_public == 'yes': public_projects = Node.find(matching_title & Q('is_public', 'eq', True) # is public ).limit(max_results - my_project_count) results = list(my_projects) + list(public_projects) ret = process_project_search_results(results, **kwargs) return ret
def check(self): ProjectFactory() self.__class__.ntest_calls += 1 # a little goofy, yes; each test gets its own instance nexpected = self.__class__.ntest_calls if self.nexpected == 'ntest_calls' else 1 assert_equal(nexpected, len(Node.find(Q('is_bookmark_collection', 'eq', False))))
def add_poster_by_email(conf, recipient, address, fullname, subject, message, attachments, tags=None, system_tags=None, is_spam=False): # Fail if no attachments if not attachments: send_mail( address, CONFERENCE_FAILED, fullname=fullname, ) return # Use address as name if name missing fullname = fullname or address.split('@')[0] created = [] user, user_created = get_or_create_user(fullname, address, is_spam) if user_created: created.append(user) set_password_url = web_url_for( 'reset_password', verification_key=user.verification_key, ) else: set_password_url = None auth = Auth(user=user) # Find or create node node = Node.find(Q('title', 'iexact', subject)) node = node[0] if node.count() else None if node is None or not node.is_contributor(user): node = new_node('project', subject, user) created.append(node) # Add admin to project if conf.admins: for admin in conf.admins: node.add_contributor(contributor=admin, visible=False, log=False, save=True) # Make public if confident that this is not spam and projects made public if is_spam: logger.warn('Possible spam detected in email modification of ' 'user {0} / node {1}'.format( user._id, node._id, )) elif conf.public_projects: node.set_privacy('public', auth=auth) # Add body node.update_node_wiki('home', sanitize(message), auth) # Add tags presentation_type = 'talk' if 'talk' in recipient else 'poster' tags = tags or [] tags.append(presentation_type) for tag in tags: node.add_tag(tag, auth=auth) # Add system tags system_tags = system_tags or [] system_tags.append(presentation_type) system_tags.append('emailed') if is_spam: system_tags.append('spam') for tag in system_tags: if tag not in node.system_tags: node.system_tags.append(tag) # Save changes node.save() from website.addons.osfstorage import utils as storage_utils # Add files for attachment in attachments: name, content, content_type, size = prepare_file(attachment) upload_url = storage_utils.get_upload_url(node, user, size, content_type, name) requests.put( upload_url, data=content, headers={'Content-Type': content_type}, ) download_url = node.web_url_for( 'osf_storage_view_file', path=attachments[0].filename, action='download', ) # Add mail record mail_record = MailRecord( data=request_to_data(), records=created, ) mail_record.save() # Send confirmation email send_mail( address, CONFERENCE_SUBMITTED, conf_full_name=conf.name, conf_view_url=urlparse.urljoin(settings.DOMAIN, os.path.join('view', conf.endpoint)), fullname=fullname, user_created=user_created, set_password_url=set_password_url, profile_url=user.absolute_url, node_url=urlparse.urljoin(settings.DOMAIN, node.url), file_url=urlparse.urljoin(settings.DOMAIN, download_url), presentation_type=presentation_type, is_spam=is_spam, )
def get_queryset(self): query = self.get_query_from_request() return Node.find(query)
def get_queryset(self): return Node.find(self.get_query_from_request())