def get_events(self, date=None): super(AddonSnapshot, self).get_events(date) from website.settings import ADDONS_AVAILABLE counts = [] addons_available = {k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE]} for short_name, addon in addons_available.iteritems(): user_settings_list = [] node_settings_list = [] if addon.settings_models.get('user'): user_settings_list = [setting for setting in paginated(addon.settings_models['user'])] if addon.settings_models.get('node'): node_settings_list = [setting for setting in paginated(addon.settings_models['node'])] has_external_account = True # Check out the first element in node_settings_list to see if it has an external account to check for if node_settings_list: if AddonNodeSettingsBase in node_settings_list[0].__class__.__bases__: has_external_account = False connected_count = 0 for node_settings in node_settings_list: if node_settings.owner and not node_settings.owner.is_bookmark_collection: connected_count += 1 deleted_count = addon.settings_models['node'].find(Q('deleted', 'eq', True)).count() if addon.settings_models.get('node') else 0 if has_external_account: disconnected_count = addon.settings_models['node'].find(Q('external_account', 'eq', None) & Q('deleted', 'ne', True)).count() if addon.settings_models.get('node') else 0 else: disconnected_count = addon.settings_models['node'].find(Q('configured', 'eq', True) & Q('complete', 'eq', False) & Q('deleted', 'ne', True)).count() if addon.settings_models.get('node') else 0 total = connected_count + deleted_count + disconnected_count usage_counts = get_enabled_authorized_linked(user_settings_list, has_external_account, addon.short_name) counts.append({ 'provider': { 'name': short_name }, 'users': usage_counts, 'nodes': { 'total': total, 'connected': connected_count, 'deleted': deleted_count, 'disconnected': disconnected_count } }) logger.info( '{} counted. Users with a linked node: {}, Total connected nodes: {}.'.format( addon.short_name, usage_counts['linked'], total ) ) return counts
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(UserDomainEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering user domains between {} and {}'.format( date, (date + timedelta(1)).isoformat() )) user_query = (Q('date_confirmed', 'lt', date + timedelta(1)) & Q('date_confirmed', 'gte', date) & Q('username', 'ne', None)) users = paginated(OSFUser, query=user_query) user_domain_events = [] for user in users: user_date = user.date_confirmed.replace(tzinfo=pytz.UTC) event = { 'keen': {'timestamp': user_date.isoformat()}, 'date': user_date.isoformat(), 'domain': user.username.split('@')[-1] } user_domain_events.append(event) logger.info('User domains collected. {} users and their email domains.'.format(len(user_domain_events))) return user_domain_events
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(NodeLogEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering node logs between {} and {}'.format( date, (date + timedelta(1)).isoformat() )) node_log_query = Q('date', 'lt', date + timedelta(1)) & Q('date', 'gte', date) node_logs = paginated(NodeLog, query=node_log_query) node_log_events = [] for node_log in node_logs: log_date = node_log.date.replace(tzinfo=pytz.UTC) event = { 'keen': {'timestamp': log_date.isoformat()}, 'date': log_date.isoformat(), 'action': node_log.action } if node_log.user: event.update({'user_id': node_log.user._id}) node_log_events.append(event) logger.info('NodeLogs counted. {} NodeLogs.'.format(len(node_log_events))) return node_log_events
def get_events(self, date): super(UserSummary, self).get_events(date) # Convert to a datetime at midnight for queries and the timestamp timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) query_datetime = timestamp_datetime + timedelta(1) active_user_query = (Q('is_registered', 'eq', True) & Q('password', 'ne', None) & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None) & Q('date_confirmed', 'ne', None) & Q('date_confirmed', 'lt', query_datetime)) active_users = 0 depth_users = 0 profile_edited = 0 user_pages = paginated(User, query=active_user_query) for user in user_pages: active_users += 1 log_count = count_user_logs(user) if log_count >= LOG_THRESHOLD: depth_users += 1 if user.social or user.schools or user.jobs: profile_edited += 1 counts = { 'keen': { 'timestamp': timestamp_datetime.isoformat() }, 'status': { 'active': active_users, 'depth': depth_users, 'unconfirmed': User.find( Q('date_registered', 'lt', query_datetime) & Q('date_confirmed', 'eq', None)).count(), 'deactivated': User.find( Q('date_disabled', 'ne', None) & Q('date_disabled', 'lt', query_datetime)).count(), 'merged': User.find( Q('date_registered', 'lt', query_datetime) & Q('merged_by', 'ne', None)).count(), 'profile_edited': profile_edited } } logger.info( 'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}' .format(counts['status']['active'], counts['status']['depth'], counts['status']['unconfirmed'], counts['status']['deactivated'], counts['status']['merged'], counts['status']['profile_edited'])) return [counts]
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(NodeLogEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info("Gathering node logs between {} and {}".format(date, (date + timedelta(1)).isoformat())) node_log_query = Q("date", "lt", date + timedelta(1)) & Q("date", "gte", date) node_logs = paginated(NodeLog, query=node_log_query) node_log_events = [] for node_log in node_logs: log_date = node_log.date.replace(tzinfo=pytz.UTC) event = { "keen": {"timestamp": log_date.isoformat()}, "date": log_date.isoformat(), "action": node_log.action, } if node_log.user: event.update({"user_id": node_log.user._id}) node_log_events.append(event) logger.info("NodeLogs counted. {} NodeLogs.".format(len(node_log_events))) return node_log_events
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(UserDomainEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering user domains between {} and {}'.format( date, (date + timedelta(1)).isoformat())) user_query = (Q('date_confirmed', 'lt', date + timedelta(1)) & Q('date_confirmed', 'gte', date) & Q('username', 'ne', None)) users = paginated(User, query=user_query) user_domain_events = [] for user in users: user_date = user.date_confirmed.replace(tzinfo=pytz.UTC) event = { 'keen': { 'timestamp': user_date.isoformat() }, 'date': user_date.isoformat(), 'domain': user.username.split('@')[-1] } user_domain_events.append(event) logger.info( 'User domains collected. {} users and their email domains.'.format( len(user_domain_events))) return user_domain_events
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q("node", "eq", node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node, index=index) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize("NFKD", normalized_title).encode("ascii", "ignore") elastic_document = { "id": elastic_document_id, "contributors": [ {"fullname": x.fullname, "url": x.profile_url if x.is_active else None} for x in node.visible_contributors if x is not None ], "title": node.title, "normalized_title": normalized_title, "category": category, "public": node.is_public, "tags": [tag._id for tag in node.tags if tag], "description": node.description, "url": node.url, "is_registration": node.is_registration, "is_pending_registration": node.is_pending_registration, "is_retracted": node.is_retracted, "is_pending_retraction": node.is_pending_retraction, "embargo_end_date": node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False, "is_pending_embargo": node.is_pending_embargo, "registered_date": node.registered_date, "wikis": {}, "parent_id": parent_id, "date_created": node.date_created, "license": serialize_node_license_record(node.license), "affiliated_institutions": [inst.name for inst in node.affiliated_institutions], "boost": int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [NodeWikiPage.load(x) for x in node.wiki_pages_current.values()]: elastic_document["wikis"][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def count_file_downloads(): downloads_unique, downloads_total = 0, 0 for record in paginated(OsfStorageFile): page = ':'.join(['download', record.node._id, record._id]) unique, total = get_basic_counters(page) downloads_unique += unique or 0 downloads_total += total or 0 clear_modm_cache() return downloads_unique, downloads_total
def count_file_downloads(): downloads_unique, downloads_total = 0, 0 for record in paginated(OsfStorageFile): page = ':'.join(['download', record.node._id, record._id]) unique, total = get_basic_counters(page) downloads_unique += unique or 0 downloads_total += total or 0 clear_modm_cache() return downloads_unique, downloads_total
def main(dry=True): init_app(set_backends=True, routes=False) # Sets the storage backends on all models count = 0 for node in paginated(Node, increment=1000): if not node.root or node.root._id != node._root._id: count += 1 logger.info('Setting root for node {} to {}'.format(node._id, node._root._id)) node.root = node._root._id if not dry: node.save() logger.info('Finished migrating {} nodes'.format(count))
def migrate_users(index): logger.info('Migrating users to index: {}'.format(index)) n_migr = 0 n_iter = 0 users = paginated(User, query=None, increment=1000, each=True) for user in users: if user.is_active: search.update_user(user, index=index) n_migr += 1 n_iter += 1 logger.info('Users iterated: {0}\nUsers migrated: {1}'.format(n_iter, n_migr))
def get_enabled_authorized_linked(user_settings_list, has_external_account, short_name): """ Gather the number of users who have at least one node in each of the stages for an addon :param user_settings_list: list of user_settings for a particualr addon :param has_external_account: where addon is derrived from, determines method to load node settings :param short_name: short name of addon to get correct node_settings :return: dict with number of users that have at least one project at each stage """ from addons.forward.models import NodeSettings as ForwardNodeSettings num_enabled = 0 # of users w/ 1+ addon account connected num_authorized = 0 # of users w/ 1+ addon account connected to 1+ node num_linked = 0 # of users w/ 1+ addon account connected to 1+ node and configured # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked if short_name == 'osfstorage' or short_name == 'wiki': num_enabled = num_authorized = num_linked = User.find( Q('is_registered', 'eq', True) & Q('password', 'ne', None) & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None) & Q('date_confirmed', 'ne', None)).count() elif short_name == 'forward': num_enabled = num_authorized = ForwardNodeSettings.find().count() num_linked = ForwardNodeSettings.find(Q('url', 'ne', None)).count() else: for user_settings in paginated(user_settings_list): node_settings_list = [] if has_external_account: if user_settings.has_auth: num_enabled += 1 node_settings_list = [ Node.load(guid).get_addon(short_name) for guid in user_settings.oauth_grants.keys() ] else: num_enabled += 1 node_settings_list = [ Node.load(guid).get_addon(short_name) for guid in user_settings.nodes_authorized ] if any([ns.has_auth for ns in node_settings_list if ns]): num_authorized += 1 if any([(ns.complete and ns.configured) for ns in node_settings_list if ns]): num_linked += 1 return { 'enabled': num_enabled, 'authorized': num_authorized, 'linked': num_linked }
def migrate_users(index): logger.info('Migrating users to index: {}'.format(index)) n_migr = 0 n_iter = 0 users = paginated(User, query=None, each=True) for user in users: if user.is_active: search.update_user(user, index=index) n_migr += 1 n_iter += 1 logger.info('Users iterated: {0}\nUsers migrated: {1}'.format( n_iter, n_migr))
def migrate_nodes(index): logger.info('Migrating nodes to index: {}'.format(index)) query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) total = Node.find(query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=query, increment=increment, each=False) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) Node._clear_caches() logger.info('Nodes migrated: {}'.format(total))
def get_events(self, date=None): super(AddonSnapshot, self).get_events(date) counts = [] addons_available = {k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE]} for short_name, addon in addons_available.iteritems(): has_external_account = hasattr(addon.models.get('nodesettings'), 'external_account') connected_count = 0 deleted_count = 0 disconnected_count = 0 node_settings_model = addon.models.get('nodesettings') if node_settings_model: for node_settings in paginated(node_settings_model): if node_settings.owner and not node_settings.owner.is_bookmark_collection: connected_count += 1 deleted_count = addon.models['nodesettings'].find(Q('deleted', 'eq', True)).count() if addon.models.get('nodesettings') else 0 if has_external_account: disconnected_count = addon.models['nodesettings'].find(Q('external_account', 'eq', None) & Q('deleted', 'ne', True)).count() if addon.models.get('nodesettings') else 0 else: if addon.models.get('nodesettings'): for nsm in addon.models['nodesettings'].find(Q('deleted', 'ne', True)): if nsm.configured and not nsm.complete: disconnected_count += 1 total = connected_count + deleted_count + disconnected_count usage_counts = get_enabled_authorized_linked(addon.models.get('usersettings'), has_external_account, addon.short_name) counts.append({ 'provider': { 'name': short_name }, 'users': usage_counts, 'nodes': { 'total': total, 'connected': connected_count, 'deleted': deleted_count, 'disconnected': disconnected_count } }) logger.info( '{} counted. Users with a linked node: {}, Total connected nodes: {}.'.format( addon.short_name, usage_counts['linked'], total ) ) return counts
def migrate_nodes(index): logger.info('Migrating nodes to index: {}'.format(index)) query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) total = Node.find(query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=query, increment=increment, each=False) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) Node._clear_caches() logger.info('Nodes migrated: {}'.format(total))
def migrate_nodes(index, query=None): logger.info('Migrating nodes to index: {}'.format(index)) node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) if query: node_query = query & node_query total = Node.find(node_query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=node_query, increment=increment, each=False, include=['contributor__user__guids']) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) logger.info('Nodes migrated: {}'.format(total))
def main(): init_app(set_backends=True, routes=False) dry = '--dry' in sys.argv if not dry: scripts_utils.add_file_logger(logger, __file__) nodes = paginated(Node, Q('piwik_site_id', 'eq', None), increment=15) count = 0 for node in nodes: logger.info('Provisioning Piwik node for Node {}'.format(node._id)) if not dry: piwik._provision_node(node._id) # Throttle to reduce load on Piwik time.sleep(1) count += 1 logger.info('Provisioned {} nodes'.format(count))
def get_enabled_authorized_linked(user_settings_list, has_external_account, short_name): """ Gather the number of users who have at least one node in each of the stages for an addon :param user_settings_list: list of user_settings for a particualr addon :param has_external_account: where addon is derrived from, determines method to load node settings :param short_name: short name of addon to get correct node_settings :return: dict with number of users that have at least one project at each stage """ from addons.forward.models import NodeSettings as ForwardNodeSettings num_enabled = 0 # of users w/ 1+ addon account connected num_authorized = 0 # of users w/ 1+ addon account connected to 1+ node num_linked = 0 # of users w/ 1+ addon account connected to 1+ node and configured # osfstorage and wiki don't have user_settings, so always assume they're enabled, authorized, linked if short_name == 'osfstorage' or short_name == 'wiki': num_enabled = num_authorized = num_linked = User.find( Q('is_registered', 'eq', True) & Q('password', 'ne', None) & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None) & Q('date_confirmed', 'ne', None) ).count() elif short_name == 'forward': num_enabled = num_authorized = ForwardNodeSettings.find().count() num_linked = ForwardNodeSettings.find(Q('url', 'ne', None)).count() else: for user_settings in paginated(user_settings_list): node_settings_list = [] if has_external_account: if user_settings.has_auth: num_enabled += 1 node_settings_list = [Node.load(guid).get_addon(short_name) for guid in user_settings.oauth_grants.keys()] else: num_enabled += 1 node_settings_list = [Node.load(guid).get_addon(short_name) for guid in user_settings.nodes_authorized] if any([ns.has_auth for ns in node_settings_list if ns]): num_authorized += 1 if any([(ns.complete and ns.configured) for ns in node_settings_list if ns]): num_linked += 1 return { 'enabled': num_enabled, 'authorized': num_authorized, 'linked': num_linked }
def migrate_nodes(index, query=None): logger.info('Migrating nodes to index: {}'.format(index)) node_query = Q('is_public', 'eq', True) & Q('is_deleted', 'eq', False) if query: node_query = query & node_query total = Node.find(node_query).count() increment = 200 total_pages = (total // increment) + 1 pages = paginated(Node, query=node_query, increment=increment, each=False, include=['contributor__user__guids']) for page_number, page in enumerate(pages): logger.info('Updating page {} / {}'.format(page_number + 1, total_pages)) Node.bulk_update_search(page, index=index) logger.info('Nodes migrated: {}'.format(total))
def get_events(self, date): """ Get all node logs from a given date for a 24 hour period, ending at the date given. """ super(NodeLogEvents, self).get_events(date) # In the end, turn the date back into a datetime at midnight for queries date = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) logger.info('Gathering node logs between {} and {}'.format( date, (date + timedelta(1)).isoformat())) node_log_query = Q('date', 'lt', date + timedelta(1)) & Q( 'date', 'gte', date) node_logs = paginated(NodeLog, query=node_log_query) node_log_events = [] for node_log in node_logs: log_date = node_log.date.replace(tzinfo=pytz.UTC) event = { 'keen': { 'timestamp': log_date.isoformat() }, 'date': log_date.isoformat(), 'action': node_log.action } if node_log.user: event.update({'user_id': node_log.user._id}) node_log_events.append(event) logger.info('NodeLogs counted. {} NodeLogs.'.format( len(node_log_events))) return node_log_events
def get_events(self, date=None): super(AddonSnapshot, self).get_events(date) from website.settings import ADDONS_AVAILABLE counts = [] addons_available = { k: v for k, v in [(addon.short_name, addon) for addon in ADDONS_AVAILABLE] } for short_name, addon in addons_available.iteritems(): has_external_account = True if addon.settings_models.get('user'): if AddonNodeSettingsBase in addon.settings_models[ 'user'].__class__.__bases__: has_external_account = False connected_count = 0 deleted_count = 0 disconnected_count = 0 node_settings_model = addon.settings_models.get('node') if node_settings_model: for node_settings in paginated(node_settings_model): if AddonNodeSettingsBase in node_settings.__class__.__bases__: has_external_account = False if node_settings.owner and not node_settings.owner.is_bookmark_collection: connected_count += 1 deleted_count = addon.settings_models['node'].find( Q('deleted', 'eq', True)).count( ) if addon.settings_models.get('node') else 0 if has_external_account: disconnected_count = addon.settings_models['node'].find( Q('external_account', 'eq', None) & Q('deleted', 'ne', True)).count( ) if addon.settings_models.get('node') else 0 else: disconnected_count = addon.settings_models['node'].find( Q('configured', 'eq', True) & Q('complete', 'eq', False) & Q('deleted', 'ne', True)).count( ) if addon.settings_models.get('node') else 0 total = connected_count + deleted_count + disconnected_count usage_counts = get_enabled_authorized_linked( addon.settings_models.get('user'), has_external_account, addon.short_name) counts.append({ 'provider': { 'name': short_name }, 'users': usage_counts, 'nodes': { 'total': total, 'connected': connected_count, 'deleted': deleted_count, 'disconnected': disconnected_count } }) logger.info( '{} counted. Users with a linked node: {}, Total connected nodes: {}.' .format(addon.short_name, usage_counts['linked'], total)) return counts
} if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): index = index or INDEX from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_.wrapped(), index=index) if node.is_deleted or not node.is_public or node.archiving or (node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH): delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=node._id, body=elastic_document, refresh=True) def bulk_update_nodes(serialize, nodes, index=None): """Updates the list of input projects
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode( 'ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [{ 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'affiliated_institutions': [inst.name for inst in node.affiliated_institutions], 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def update_node(node, index=None, bulk=False): index = index or INDEX from website.addons.wiki.model import NodeWikiPage category = get_doctype_from_node(node) elastic_document_id = node._id parent_id = node.parent_id from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_, index=index) if node.is_deleted or not node.is_public or node.archiving: delete_doc(elastic_document_id, node) else: try: normalized_title = six.u(node.title) except TypeError: normalized_title = node.title normalized_title = unicodedata.normalize('NFKD', normalized_title).encode('ascii', 'ignore') elastic_document = { 'id': elastic_document_id, 'contributors': [ { 'fullname': x.fullname, 'url': x.profile_url if x.is_active else None } for x in node.visible_contributors if x is not None ], 'title': node.title, 'normalized_title': normalized_title, 'category': category, 'public': node.is_public, 'tags': [tag._id for tag in node.tags if tag], 'description': node.description, 'url': node.url, 'is_registration': node.is_registration, 'is_pending_registration': node.is_pending_registration, 'is_retracted': node.is_retracted, 'is_pending_retraction': node.is_pending_retraction, 'embargo_end_date': node.embargo_end_date.strftime("%A, %b. %d, %Y") if node.embargo_end_date else False, 'is_pending_embargo': node.is_pending_embargo, 'registered_date': node.registered_date, 'wikis': {}, 'parent_id': parent_id, 'date_created': node.date_created, 'license': serialize_node_license_record(node.license), 'primary_institution': node.primary_institution.name if node.primary_institution else None, 'boost': int(not node.is_registration) + 1, # This is for making registered projects less relevant } if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=elastic_document_id, body=elastic_document, refresh=True)
def get_events(self, date): super(UserSummary, self).get_events(date) # Convert to a datetime at midnight for queries and the timestamp timestamp_datetime = datetime(date.year, date.month, date.day).replace(tzinfo=pytz.UTC) query_datetime = timestamp_datetime + timedelta(1) active_user_query = ( Q('is_registered', 'eq', True) & Q('password', 'ne', None) & Q('merged_by', 'eq', None) & Q('date_disabled', 'eq', None) & Q('date_confirmed', 'ne', None) & Q('date_confirmed', 'lt', query_datetime) ) active_users = 0 depth_users = 0 profile_edited = 0 user_pages = paginated(User, query=active_user_query) for user in user_pages: active_users += 1 log_count = count_user_logs(user) if log_count >= LOG_THRESHOLD: depth_users += 1 if user.social or user.schools or user.jobs: profile_edited += 1 counts = { 'keen': { 'timestamp': timestamp_datetime.isoformat() }, 'status': { 'active': active_users, 'depth': depth_users, 'unconfirmed': User.find( Q('date_registered', 'lt', query_datetime) & Q('date_confirmed', 'eq', None) ).count(), 'deactivated': User.find( Q('date_disabled', 'ne', None) & Q('date_disabled', 'lt', query_datetime) ).count(), 'merged': User.find( Q('date_registered', 'lt', query_datetime) & Q('merged_by', 'ne', None) ).count(), 'profile_edited': profile_edited } } logger.info( 'Users counted. Active: {}, Depth: {}, Unconfirmed: {}, Deactivated: {}, Merged: {}, Profile Edited: {}'.format( counts['status']['active'], counts['status']['depth'], counts['status']['unconfirmed'], counts['status']['deactivated'], counts['status']['merged'], counts['status']['profile_edited'] ) ) return [counts]
} if not node.is_retracted: for wiki in [ NodeWikiPage.load(x) for x in node.wiki_pages_current.values() ]: elastic_document['wikis'][wiki.page_name] = wiki.raw_text(node) return elastic_document @requires_search def update_node(node, index=None, bulk=False, async=False): index = index or INDEX from website.files.models.osfstorage import OsfStorageFile for file_ in paginated(OsfStorageFile, Q('node', 'eq', node)): update_file(file_.wrapped(), index=index) if node.is_deleted or not node.is_public or node.archiving or ( node.is_spammy and settings.SPAM_FLAGGED_REMOVE_FROM_SEARCH): delete_doc(node._id, node, index=index) else: category = get_doctype_from_node(node) elastic_document = serialize_node(node, category) if bulk: return elastic_document else: es.index(index=index, doc_type=category, id=node._id, body=elastic_document,