def activity(): """Reads node activity from pre-generated popular projects and registrations. New and Noteworthy projects are set manually or through `scripts/populate_new_and_noteworthy_projects.py` Popular projects and registrations are generated by `scripts/populate_popular_projects_and_registrations.py` """ # New and Noreworthy Projects try: new_and_noteworthy_pointers = Node.load(settings.NEW_AND_NOTEWORTHY_LINKS_NODE).nodes_pointer new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers] except AttributeError: new_and_noteworthy_projects = [] # Popular Projects try: popular_public_projects = Node.load(settings.POPULAR_LINKS_NODE).nodes_pointer except AttributeError: popular_public_projects = [] # Popular Registrations try: popular_public_registrations = Node.load(settings.POPULAR_LINKS_REGISTRATIONS).nodes_pointer except AttributeError: popular_public_registrations = [] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': utils.recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, }
def activity(): """Generate analytics for most popular public projects and registrations. Called by `scripts/update_populate_projects_and_registrations` """ popular_public_projects = [] popular_public_registrations = [] max_projects_to_display = settings.MAX_POPULAR_PROJECTS if settings.KEEN['public']['read_key']: keen_activity = get_keen_activity() node_visits = keen_activity['node_visits'] node_data = [{ 'node': x['node.id'], 'views': x['result'] } for x in node_visits] node_data.sort(key=lambda datum: datum['views'], reverse=True) node_data = [node_dict['node'] for node_dict in node_data] for nid in node_data: node = Node.load(nid) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < max_projects_to_display: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < max_projects_to_display: popular_public_registrations.append(node) if len(popular_public_projects) >= max_projects_to_display and len( popular_public_registrations) >= max_projects_to_display: break # New and Noteworthy projects are updated manually new_and_noteworthy_pointers = Node.find_one( Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer new_and_noteworthy_projects = [ pointer.node for pointer in new_and_noteworthy_pointers ] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations }
def activity(): """Generate analytics for most popular public projects and registrations. Called by `scripts/update_populate_projects_and_registrations` """ popular_public_projects = [] popular_public_registrations = [] max_projects_to_display = settings.MAX_POPULAR_PROJECTS if settings.KEEN['public']['read_key']: keen_activity = get_keen_activity() node_visits = keen_activity['node_visits'] node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_visits] node_data.sort(key=lambda datum: datum['views'], reverse=True) node_data = [node_dict['node'] for node_dict in node_data] for nid in node_data: node = Node.load(nid) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < max_projects_to_display: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < max_projects_to_display: popular_public_registrations.append(node) if len(popular_public_projects) >= max_projects_to_display and len(popular_public_registrations) >= max_projects_to_display: break # New and Noteworthy projects are updated manually new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations }
def recent_public_registrations(n=10): registrations = Node.find(CONTENT_NODE_QUERY & Q('parent_node', 'eq', None) & Q('is_public', 'eq', True) & Q('is_registration', 'eq', True)).sort( '-registered_date') for reg in registrations: if not n: break if reg.is_retracted or reg.is_pending_embargo: # Filter based on calculated properties continue n -= 1 yield reg
def activity(): """Reads node activity from pre-generated popular projects and registrations. New and Noteworthy projects are set manually or through `scripts/populate_new_and_noteworthy_projects.py` Popular projects and registrations are generated by `scripts/populate_popular_projects_and_registrations.py` """ # New and Noreworthy Projects try: new_and_noteworthy_pointers = Node.load( settings.NEW_AND_NOTEWORTHY_LINKS_NODE).nodes_pointer new_and_noteworthy_projects = [ pointer.node for pointer in new_and_noteworthy_pointers ] except AttributeError: new_and_noteworthy_projects = [] # Popular Projects try: popular_public_projects = Node.load( settings.POPULAR_LINKS_NODE).nodes_pointer except AttributeError: popular_public_projects = [] # Popular Registrations try: popular_public_registrations = Node.load( settings.POPULAR_LINKS_REGISTRATIONS).nodes_pointer except AttributeError: popular_public_registrations = [] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': utils.recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, }
def recent_public_registrations(n=10): registrations = Node.find( CONTENT_NODE_QUERY & Q('parent_node', 'eq', None) & Q('is_public', 'eq', True) & Q('is_registration', 'eq', True) ).sort( '-registered_date' ) for reg in registrations: if not n: break if reg.is_retracted or reg.is_pending_embargo: # Filter based on calculated properties continue n -= 1 yield reg
def migrate(): targets = get_targets() total = len(targets) for i, user in enumerate(targets): logger.info('({}/{}) Preparing to migrate User {}'.format(i + 1, total, user._id)) bookmarks = Node.find(Q('is_bookmark_collection', 'eq', True) & Q('contributors', 'eq', user._id)) if sum([bool(n.nodes) for n in bookmarks]) > 1: raise Exception('Expected no users to have more than one bookmark with .nodes, {} violated'.format(user._id)) bookmark_to_keep = None for n in bookmarks: if n.nodes: bookmark_to_keep = n bookmark_to_keep = bookmark_to_keep or bookmarks[0] logger.info('Marking Node {} as primary Bookmark Collection for User {}, preparing to delete others'.format(bookmark_to_keep._id, user._id)) for n in bookmarks: if n._id != bookmark_to_keep._id: n.is_deleted = True n.save() logger.info('Successfully migrated User {}'.format(user._id)) logger.info('Successfully migrated {} users'.format(total))
def _provision_node(node_id): from website.project import Node node = Node.load(node_id) response = requests.post( settings.PIWIK_HOST, data={ 'module': 'API', 'token_auth': settings.PIWIK_ADMIN_TOKEN, 'format': 'json', 'method': 'SitesManager.addSite', 'siteName': 'Node: ' + node._id, 'urls': [ settings.CANONICAL_DOMAIN + node.url, settings.SHORT_DOMAIN + node.url, ], } ) try: # Use pymongo so that we can save a single field without overwriting node piwik_site_id = json.loads(response.content)['value'] db.node.update({'_id': node._id}, {'$set': {'piwik_site_id': piwik_site_id}}) except ValueError: raise PiwikException('Piwik site creation failed for ' + node._id) # contributors lists might be empty, due to a bug. if node.contributors: users = ['osf.' + user._id for user in node.contributors if user] if node.is_public: users.append('anonymous') _change_view_access( # contibutors lists might contain `None` due to bug users, node, 'view' )
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} # get the date for exactly one week ago target_date = datetime.date.today() - datetime.timedelta(weeks=1) if settings.PIWIK_HOST: client = PiwikClient( url=settings.PIWIK_HOST, auth_token=settings.PIWIK_ADMIN_TOKEN, site_id=settings.PIWIK_SITE_ID, period='week', date=target_date.strftime('%Y-%m-%d'), ) popular_project_ids = [ x for x in client.custom_variables if x.label == 'Project ID' ][0].values for nid in popular_project_ids: node = Node.load(nid.value) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { x.value: { 'hits': x.actions, 'visits': x.visits } for x in popular_project_ids } # Projects recent_query = ( Q('category', 'eq', 'project') & Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) ) recent_public_projects = Node.find( recent_query & Q('is_registration', 'eq', False) ).sort( '-date_created' ).limit(10) return { 'recent_public_projects': recent_public_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }
def tearDown(self): super(TestExplorePublicActivity, self).tearDown() Node.remove()
def get_targets(): logger.info('Acquiring targets...') targets = [u for u in User.find() if Node.find(Q('is_bookmark_collection', 'eq', True) & Q('is_deleted', 'eq', False) & Q('contributors', 'eq', u._id)).count() > 1] logger.info('Found {} target users.'.format(len(targets))) return targets
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} max_popular_projects = 20 if settings.KEEN['public']['read_key']: client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count( event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_visits = client.count_unique( event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]] for node_visit in node_visits[0:max_popular_projects]: for node_result in node_data: if node_visit['node.id'] == node_result['node']: node_result.update({'visits': node_visit['result']}) node_data.sort(key=lambda datum: datum['views'], reverse=True) for nid in node_data: node = Node.load(nid['node']) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { datum['node']: { 'hits': datum['views'], 'visits': datum['visits'] } for datum in node_data } # Projects new_and_noteworthy_pointers = Node.find_one(Q('_id', 'eq', settings.NEW_AND_NOTEWORTHY_LINKS_NODE)).nodes_pointer new_and_noteworthy_projects = [pointer.node for pointer in new_and_noteworthy_pointers] return { 'new_and_noteworthy_projects': new_and_noteworthy_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} if settings.PIWIK_HOST: client = PiwikClient( url=settings.PIWIK_HOST, auth_token=settings.PIWIK_ADMIN_TOKEN, site_id=settings.PIWIK_SITE_ID, period='week', date='today', ) popular_project_ids = [ x for x in client.custom_variables if x.label == 'Project ID' ][0].values for nid in popular_project_ids: node = Node.load(nid.value) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { x.value: { 'hits': x.actions, 'visits': x.visits } for x in popular_project_ids } # Projects recent_query = ( Q('category', 'eq', 'project') & Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) ) # Temporary bug fix: Skip projects with empty contributor lists # Todo: Fix underlying bug and remove this selector recent_query = recent_query & Q('contributors', 'ne', []) recent_public_projects = Node.find( recent_query & Q('is_registration', 'eq', False) ).sort( '-date_created' ).limit(10) # Registrations recent_public_registrations = Node.find( recent_query & Q('is_registration', 'eq', True) ).sort( '-registered_date' ).limit(10) return { 'recent_public_projects': recent_public_projects, 'recent_public_registrations': recent_public_registrations, 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }
def activity(): popular_public_projects = [] popular_public_registrations = [] hits = {} max_popular_projects = 20 if settings.KEEN['public']['read_key']: client = KeenClient( project_id=settings.KEEN['public']['project_id'], read_key=settings.KEEN['public']['read_key'], ) node_pageviews = client.count( event_collection='pageviews', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_visits = client.count_unique( event_collection='pageviews', target_property='anon.id', timeframe='this_7_days', group_by='node.id', filters=[ { 'property_name': 'node.id', 'operator': 'exists', 'property_value': True } ] ) node_data = [{'node': x['node.id'], 'views': x['result']} for x in node_pageviews[0:max_popular_projects]] for node_visit in node_visits[0:max_popular_projects]: for node_result in node_data: if node_visit['node.id'] == node_result['node']: node_result.update({'visits': node_visit['result']}) node_data.sort(key=lambda datum: datum['views'], reverse=True) for nid in node_data: node = Node.load(nid['node']) if node is None: continue if node.is_public and not node.is_registration and not node.is_deleted: if len(popular_public_projects) < 10: popular_public_projects.append(node) elif node.is_public and node.is_registration and not node.is_deleted and not node.is_retracted: if len(popular_public_registrations) < 10: popular_public_registrations.append(node) if len(popular_public_projects) >= 10 and len(popular_public_registrations) >= 10: break hits = { datum['node']: { 'hits': datum['views'], 'visits': datum['visits'] } for datum in node_data } # Projects # Only show top-level projects (any category) in new and noteworthy lists # This means that public children of private nodes will be excluded recent_query = ( Q('parent_node', 'eq', None) & Q('is_public', 'eq', True) & CONTENT_NODE_QUERY ) recent_public_projects = Node.find( recent_query & Q('is_registration', 'eq', False) ).sort( '-date_created' ).limit(10) return { 'recent_public_projects': recent_public_projects, 'recent_public_registrations': recent_public_registrations(), 'popular_public_projects': popular_public_projects, 'popular_public_registrations': popular_public_registrations, 'hits': hits, }