def migrate(dry_run=True): added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED)) for log in added_logs: logger.info( 'Log with id <{}> being updated for affiliation added'.format( log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED log.save() removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED)) for log in removed_logs: logger.info( 'Log with id <{}> being updated for affiliation removed'.format( log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED log.save() nodes = Node.find(Q('primary_institution', 'ne', None)) for node in nodes: logger.info('Node with id <{}> and title <{}> being updated'.format( node._id, node.title)) inst = node.primary_institution if inst not in node.affiliated_institutions: node.affiliated_institutions.append(inst) node.primary_institution = None node.save() if dry_run: raise RuntimeError('Dry run, transaction rolled back.')
def delete(self, request, *args, **kwargs): try: node, user = self.get_object() if node.remove_contributor(user, None, log=False): update_admin_log( user_id=self.request.user.id, object_id=node.pk, object_repr='Contributor', message='User {} removed from node {}.'.format( user.pk, node.pk), action_flag=CONTRIBUTOR_REMOVED) # Log invisibly on the OSF. osf_log = NodeLog( action=NodeLog.CONTRIB_REMOVED, user=None, params={ 'project': node.parent_id, 'node': node.pk, 'contributors': user.pk }, date=datetime.utcnow(), should_hide=True, ) osf_log.save() except AttributeError: return page_not_found( request, AttributeError('{} with id "{}" not found.'.format( self.context_object_name.title(), kwargs.get('node_id')))) return redirect(reverse_node(self.kwargs.get('node_id')))
def delete(self, request, *args, **kwargs): try: node, user = self.get_object() if node.remove_contributor(user, None, log=False): update_admin_log( user_id=self.request.user.id, object_id=node.pk, object_repr="Contributor", message="User {} removed from node {}.".format(user.pk, node.pk), action_flag=CONTRIBUTOR_REMOVED, ) # Log invisibly on the OSF. osf_log = NodeLog( action=NodeLog.CONTRIB_REMOVED, user=None, params={"project": node.parent_id, "node": node.pk, "contributors": user.pk}, date=datetime.utcnow(), should_hide=True, ) osf_log.save() except AttributeError: return page_not_found( request, AttributeError( '{} with id "{}" not found.'.format(self.context_object_name.title(), kwargs.get("node_id")) ), ) return redirect(reverse_node(self.kwargs.get("node_id")))
def delete(self, request, *args, **kwargs): try: node = self.get_object() flag = None osf_flag = None message = None if node.is_deleted: node.is_deleted = False node.deleted_date = None flag = NODE_RESTORED message = 'Node {} restored.'.format(node.pk) osf_flag = NodeLog.NODE_CREATED elif not node.is_registration: node.is_deleted = True node.deleted_date = datetime.utcnow() flag = NODE_REMOVED message = 'Node {} removed.'.format(node.pk) osf_flag = NodeLog.NODE_REMOVED node.save() if flag is not None: update_admin_log( user_id=self.request.user.id, object_id=node.pk, object_repr='Node', message=message, action_flag=flag ) if osf_flag is not None: # Log invisibly on the OSF. osf_log = NodeLog( action=osf_flag, user=None, params={ 'project': node.parent_id, }, date=datetime.utcnow(), should_hide=True, ) osf_log.save() except AttributeError: return page_not_found( request, AttributeError( '{} with id "{}" not found.'.format( self.context_object_name.title(), kwargs.get('guid') ) ) ) return redirect(reverse_node(self.kwargs.get('guid')))
def delete(self, request, *args, **kwargs): try: node = self.get_object() flag = None osf_flag = None message = None if node.is_deleted: node.is_deleted = False node.deleted_date = None flag = NODE_RESTORED message = 'Node {} restored.'.format(node.pk) osf_flag = NodeLog.NODE_CREATED elif not node.is_registration: node.is_deleted = True node.deleted_date = timezone.now() flag = NODE_REMOVED message = 'Node {} removed.'.format(node.pk) osf_flag = NodeLog.NODE_REMOVED node.save() if flag is not None: update_admin_log( user_id=self.request.user.id, object_id=node.pk, object_repr='Node', message=message, action_flag=flag ) if osf_flag is not None: # Log invisibly on the OSF. osf_log = NodeLog( action=osf_flag, user=None, params={ 'project': node.parent_id, }, date=timezone.now(), should_hide=True, ) osf_log.save() except AttributeError: return page_not_found( request, AttributeError( '{} with id "{}" not found.'.format( self.context_object_name.title(), kwargs.get('guid') ) ) ) return redirect(reverse_node(self.kwargs.get('guid')))
def do_migration(records, dry=False): for node in records: logs = list(NodeLog.find(Q('was_connected_to', 'contains', node))) existing_logs = node.logs for log in logs: if not log.node__logged: continue log_node = log.node__logged[0] # if the log_node is not contained in the node parent list then it doesn't belong to this node if log_node not in get_all_parents(node): logger.info( 'Excluding log {} from list because it is not associated with node {}' .format(log, node)) logs.remove(log) with TokuTransaction(): node.logs = logs + existing_logs node.system_tags.append(SYSTEM_TAG) node_type = 'registration' if node.is_registration else 'fork' logger.info('Adding {} logs to {} {}'.format( len(logs), node_type, node)) if not dry: try: node.save() except Exception as err: logger.error( 'Could not update logs for node {} due to error'. format(node._id)) logger.exception(err) logger.error('Skipping...')
def migrate(dry_run=True): node_logs = list( NodeLog.find( Q("action", "in", [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) & Q("params.preprint", "exists", False) ) ) logger.info("Preparing to migrate {} NodeLogs".format(len(node_logs))) count = 0 for log in node_logs: preprint = None node_id = log.params.get("node") try: preprint = PreprintService.find_one(Q("node", "eq", node_id)) except NoResultsFound: logger.error("Skipping {}, preprint not found for node: {}".format(log._id, node_id)) continue logger.info("Migrating log - {} - to add params.preprint: {}, ".format(log._id, preprint._id)) log.params["preprint"] = preprint._id log.save() count += 1 logger.info("Migrated {} logs".format(count))
def migrate(dry_run=True): node_logs = list(NodeLog.find( Q('action', 'in', [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) & Q('params.preprint', 'exists', False) )) logger.info('Preparing to migrate {} NodeLogs'.format(len(node_logs))) count = 0 for log in node_logs: preprint = None node_id = log.params.get('node') try: preprint = PreprintService.find_one(Q('node', 'eq', node_id)) except NoResultsFound: logger.error('Skipping {}, preprint not found for node: {}'.format(log._id, node_id)) continue logger.info( 'Migrating log - {} - to add params.preprint: {}, '.format(log._id, preprint._id) ) log.params['preprint'] = preprint._id log.save() count += 1 logger.info('Migrated {} logs'.format(count))
def do_migration(records, dry=False): for node in records: logs = list(NodeLog.find(Q('was_connected_to', 'contains', node))) existing_logs = node.logs for log in logs: if not log.node__logged: continue log_node = log.node__logged[0] # if the log_node is not contained in the node parent list then it doesn't belong to this node if log_node not in get_all_parents(node): logger.info('Excluding log {} from list because it is not associated with node {}'.format(log, node)) logs.remove(log) with TokuTransaction(): node.logs = logs + existing_logs node.system_tags.append(SYSTEM_TAG) node_type = 'registration' if node.is_registration else 'fork' logger.info('Adding {} logs to {} {}'.format(len(logs), node_type, node)) if not dry: try: node.save() except Exception as err: logger.error('Could not update logs for node {} due to error'.format(node._id)) logger.exception(err) logger.error('Skipping...')
def find_invalid_logs(): for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)): # Derive UTC datetime object from ObjectId id_date = ObjectId(log._id).generation_time id_date = id_date.replace(tzinfo=None) - id_date.utcoffset() if id_date > log.date: yield log
def count_user_logs(user): logs = NodeLog.find(Q('user', 'eq', user._id)) length = logs.count() if length == LOG_THRESHOLD: item = logs[0] if item.action == 'project_created' and item.node.is_bookmark_collection: length -= 1 return length
def user_last_log(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) node_logs = NodeLog.find(query) return node_logs[node_logs.count() - 1].date
def get_log(self): log = NodeLog.load(self.kwargs.get('log_id')) if not log: raise NotFound( detail='No log matching that log_id could be found.') self.check_object_permissions(self.request, log) return log
def user_last_log(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) node_logs = NodeLog.find(query) return node_logs[node_logs.count()-1].date
def get_log(self): log = NodeLog.load(self.kwargs.get('log_id')) if not log: raise NotFound( detail='No log matching that log_id could be found.' ) self.check_object_permissions(self.request, log) return log
def get_queryset(self): log = NodeLog.load(self.kwargs.get('log_id')) if not log: raise NotFound( detail='No log matching that log_id could be found.') else: auth_user = get_user_auth(self.request) return [ node for node in log.node__logged if node.can_view(auth_user) ]
def get_registered_from(registration): """ Gets node registration was registered from. Handles deleted registrations where registered_from is null. """ if registration.registered_from: return registration.registered_from_id else: first_log_id = db['node'].find_one({'_id': registration._id})['logs'][0] log = NodeLog.load(first_log_id) return log.params.get('node') or log.params.get('project')
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) logs = NodeLog.find(query) length = logs.count() if length > 0: item = logs[0] if item.action == 'project_created' and item.node.is_dashboard: length -= 1 return length
def get_queryset(self): log = NodeLog.load(self.kwargs.get('log_id')) if not log: raise NotFound( detail='No log matching that log_id could be found.' ) else: auth_user = get_user_auth(self.request) return [ node for node in log.node__logged if node.can_view(auth_user) ]
def main(dry): if dry: logging.warn('DRY mode running') now = datetime.utcnow() initiated_logs = NodeLog.find( Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now)) for log in initiated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id }, 'service': { 'name': preprint.provider.name } }) logging.info( 'Updating log {} from node {}, with preprint id: {}'.format( log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass updated_logs = NodeLog.find( Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now)) for log in updated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({'preprint': {'id': preprint._id}}) logging.info( 'Updating log {} from node {}, with preprint id: {}'.format( log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass
def migrate(dry_run=True): added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED)) for log in added_logs: logger.info('Log with id <{}> being updated for affiliation added'.format(log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED log.save() removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED)) for log in removed_logs: logger.info('Log with id <{}> being updated for affiliation removed'.format(log._id)) log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED log.save() nodes = Node.find(Q('primary_institution', 'ne', None)) for node in nodes: logger.info('Node with id <{}> and title <{}> being updated'.format(node._id, node.title)) inst = node.primary_institution if inst not in node.affiliated_institutions: node.affiliated_institutions.append(inst) node.primary_institution = None node.save() if dry_run: raise RuntimeError('Dry run, transaction rolled back.')
def get_targets(): """ These logs are potentially missing params['registration'] fields. Params['node'] and original_node fields may incorrectly be pointing to the registration instead of the node. """ logs = NodeLog.find( Q('action', 'eq', 'registration_cancelled') | Q('action', 'eq', 'retraction_approved') | Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_cancelled') | Q('action', 'eq', 'embargo_terminated') ) return logs
def get_or_create_node(node_id, sqlite_db): """Gets an OSF node from the sqlite cache. If not found, pulls the node info from mongo and saves it. :param node_id: OSF node id (e.g. 'mst3k') :param sqlite_db: SQLite3 database handle :return: node dict """ if node_id is None: return None cursor = sqlite_db.cursor() query = "SELECT * FROM nodes WHERE id='{}'".format(node_id) cursor.execute(query) nodes = cursor.fetchall() if len(nodes) > 1: raise Exception("Multiple nodes found for single node ID") if nodes: return nodes[0] node = Node.load(node_id) if node is None: return None node_public_date = None privacy_actions = NodeLog.find( Q('node', 'eq', node_id) & Q('action', 'in', [NodeLog.MADE_PUBLIC, NodeLog.MADE_PRIVATE]) ).sort('-date') try: privacy_action = privacy_actions[0] except IndexError as e: pass else: if privacy_action.action == NodeLog.MADE_PUBLIC: node_public_date = privacy_action.date.isoformat() node_public_date = node_public_date[:-3] + 'Z' cursor.execute( u'INSERT INTO nodes (id, title, category, made_public_date) VALUES (?, ?, ?, ?)', (node_id, getattr(node, 'title'), getattr(node, 'category'), node_public_date) ) sqlite_db.commit() return get_or_create_node(node_id, sqlite_db)
def main(dry): if dry: logging.warn('DRY mode running') now = datetime.utcnow() initiated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now)) for log in initiated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id }, 'service': { 'name': preprint.provider.name } }) logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass updated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now)) for log in updated_logs: try: preprint = PreprintService.find_one(Q('node', 'eq', log.node)) log.params.update({ 'preprint': { 'id': preprint._id } }) logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id)) if not dry: log.save() except NoResultsFound: pass
def main(): total = NodeLog.objects.all().count() count = 0 page_size = 50000 with transaction.atomic(): qs = NodeLog.objects.all().order_by('-date').select_related( 'user').select_related('node').select_related( 'user___guid').select_related('node___guid') with server_side_cursors(qs, itersize=page_size): for log in qs.iterator(): modm_nodelog = MODMNodeLog.load(log.guid) if modm_nodelog is not None: modm_node = modm_nodelog.node modm_user = modm_nodelog.user if log.user is not None and log.user._guid.guid != modm_user._id: print 'User doesn\'t match on log {}; {} != {}'.format( log.guid, modm_user._id, log.user._guid.guid) if log.node is not None and log.node._guid.guid != modm_nodelog.node._id: print 'Node doesn\'t match on log {}; {} != {}'.format( log.guid, modm_nodelog.node._id, log.node._guid.guid) if log.date is not None and pytz.utc.localize( modm_nodelog.date) != log.date: print 'Date doesn\'t match on log {}'.format(log.guid) if log.action is not None and log.action != modm_nodelog.action: print 'Action doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.action, log.action) if log.params is not None and log.params != modm_nodelog.params: print 'Params doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.params, log.params) if log.should_hide is not None and log.should_hide != modm_nodelog.should_hide: print 'Should_hide does\'nt match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.should_hide, log.should_hide) if log.foreign_user is not None and log.foreign_user != '' and log.foreign_user != modm_nodelog.foreign_user: print 'Foreign_user doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.foreign_user, log.foreign_user) else: print 'MODMNodeLog with id {} not found.'.format(log.guid) count += 1 if count % page_size == 0: MODMNodeLog._cache.clear() MODMNodeLog._object_cache.clear() print '{} through {}'.format(count, count + page_size)
def get_targets(): """ Fetches all registration-related logs except for project_registered. project_registered log is not included because params already correct. """ logs = NodeLog.find( Q('action', 'eq', 'registration_initiated') | Q('action', 'eq', 'registration_approved') | Q('action', 'eq', 'registration_cancelled') | # On staging, there are a few inconsistencies with these. Majority of params['node'] are registrations, but a handful are nodes. Q('action', 'eq', 'retraction_initiated') | Q('action', 'eq', 'retraction_approved') | # params['node'] is already equal to node. Adds registration_field below. Will be slow. Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_initiated') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_completed') | Q('action', 'eq', 'embargo_cancelled') ) return logs
def get_targets(): """ Fetches all registration-related logs except for project_registered. project_registered log is not included because params already correct. """ logs = NodeLog.find( Q('action', 'eq', 'registration_initiated') | Q('action', 'eq', 'registration_approved') | Q('action', 'eq', 'registration_cancelled') | # On staging, there are a few inconsistencies with these. Majority of params['node'] are registrations, but a handful are nodes. Q('action', 'eq', 'retraction_initiated') | Q('action', 'eq', 'retraction_approved') | # params['node'] is already equal to node. Adds registration_field below. Will be slow. Q('action', 'eq', 'retraction_cancelled') | Q('action', 'eq', 'embargo_initiated') | Q('action', 'eq', 'embargo_approved') | Q('action', 'eq', 'embargo_completed') | Q('action', 'eq', 'embargo_cancelled')) return logs
def main(): total = NodeLog.objects.all().count() count = 0 page_size = 50000 with transaction.atomic(): qs = NodeLog.objects.all().order_by('-date').select_related('user').select_related('node').select_related('user___guid').select_related('node___guid') with server_side_cursors(qs, itersize=page_size): for log in qs.iterator(): modm_nodelog = MODMNodeLog.load(log.guid) if modm_nodelog is not None: modm_node = modm_nodelog.node modm_user = modm_nodelog.user if log.user is not None and log.user._guid.guid != modm_user._id: print 'User doesn\'t match on log {}; {} != {}'.format( log.guid, modm_user._id, log.user._guid.guid) if log.node is not None and log.node._guid.guid != modm_nodelog.node._id: print 'Node doesn\'t match on log {}; {} != {}'.format( log.guid, modm_nodelog.node._id, log.node._guid.guid) if log.date is not None and pytz.utc.localize( modm_nodelog.date) != log.date: print 'Date doesn\'t match on log {}'.format(log.guid) if log.action is not None and log.action != modm_nodelog.action: print 'Action doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.action, log.action) if log.params is not None and log.params != modm_nodelog.params: print 'Params doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.params, log.params) if log.should_hide is not None and log.should_hide != modm_nodelog.should_hide: print 'Should_hide does\'nt match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.should_hide, log.should_hide) if log.foreign_user is not None and log.foreign_user != '' and log.foreign_user != modm_nodelog.foreign_user: print 'Foreign_user doesn\'t match on log {}; `{}` != `{}`'.format( log.guid, modm_nodelog.foreign_user, log.foreign_user) else: print 'MODMNodeLog with id {} not found.'.format(log.guid) count += 1 if count % page_size == 0: MODMNodeLog._cache.clear() MODMNodeLog._object_cache.clear() print '{} through {}'.format(count, count + page_size)
def get_targets(): # ... return the list of logs whose registrations we want to migrate ... targets = NodeLog.find(Q('action', 'eq', 'retraction_approved')) logger.info('Retractions found: {}'.format(len(targets))) return targets
def get_targets(): return NodeLog.find(Q('should_hide', 'eq', True))
def get_targets(): return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
def tearDown(self): NodeLog.remove() Node.remove()
def get_aggregate_logs(ids, user, count=100): query = Q('params.node', 'in', ids) return list(NodeLog.find(query).sort('date').limit(int(count)))
def count_user_logs(user, query=None): if query: query &= Q('user', 'eq', user._id) else: query = Q('user', 'eq', user._id) return NodeLog.find(query).count()
def tearDown(self): super(TestNodeLogList, self).tearDown() NodeLog.remove()
def logs_since(user, date): return NodeLog.find( Q('user', 'eq', user._id) & Q('date', 'gt', date) )
def main(): total = MODMNodeLog.find().count() # total = len(modm_nodelogs) count = 0 page_size = 100000 django_nodelogs = [] django_nodelogs_ids = [] django_nodelogs_was_connected_to = {} print 'Migrating {} logs...'.format(total) while count < total: modm_nodelogs = None modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size] with transaction.atomic(): print 'Migrating {} through {} which is {}'.format( count, count + page_size, len(modm_nodelogs)) for modm_nodelog in modm_nodelogs: # don't recreate the log if it exists if NodeLog.objects.filter(guid=modm_nodelog._id).exists(): pass else: if modm_nodelog.user is not None: # try to get the pk out of the lookup table user_pk = modm_to_django.get(modm_nodelog.user._id, None) # it wasn't there if user_pk is None: # create a new user print 'Creating User {}'.format( modm_nodelog.user._id) user = get_or_create_user(modm_nodelog.user) user_pk = user.pk # put the user in the lookup table for next time modm_to_django[modm_nodelog.user._id] = user_pk else: # log doesn't have user user_pk = None # get the node (either a MODMNode instance or a node guid) node_id = modm_nodelog.params.get( 'node', modm_nodelog.params.get('project')) node_pk = None if node_id is not None: if isinstance(node_id, basestring): # it's a guid, look it up in the table node_pk = modm_to_django.get(node_id, None) elif isinstance(node_id, MODMNode): # it's an instance, look it up in the table node_pk = modm_to_django.get(node_id._id, None) if node_pk is None: print 'Creating Node {}'.format(node_id) # it wasn't in the table if isinstance(node_id, basestring): # it's a guid, get an instance and create a PG version modm_node = MODMNode.load(node_id) django_node = get_or_create_node(modm_node) if django_node is None: print 'Node {} does not exist.'.format( node_id) continue node_pk = get_or_create_node(modm_node).pk # put it in the table for later modm_to_django[modm_node._id] = node_pk elif isinstance(node_id, MODMNode): # it's an instance, create a PG version node_pk = get_or_create_node(node_id).pk # put it in the table for later modm_to_django[node_id._id] = node_pk if node_pk is not None: was_connected_to = [] for wct in modm_nodelog.was_connected_to: wct_pk = modm_to_django.get(wct._id, None) if wct_pk is None: wct_pk = get_or_create_node(wct).pk modm_to_django[wct._id] = wct_pk was_connected_to.append(wct_pk) if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) if modm_nodelog._id not in django_nodelogs_ids: django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) django_nodelogs_was_connected_to[ modm_nodelog._id] = was_connected_to django_nodelogs_ids.append(modm_nodelog._id) else: print 'NodeLog with id {} and data {} was already in the bulk_create'.format( modm_nodelog._id, modm_nodelog.to_storage()) else: print 'Node {} is None on NodeLog {}...'.format( node_id, modm_nodelog._id) count += 1 if count % (page_size / 50) == 0: print 'Through {}'.format(count) if count % page_size == 0: print 'Starting to migrate {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) if len(django_nodelogs) > 0: NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) print 'Adding m2m values' for django_nodelog in django_nodelogs: nl = NodeLog.objects.get(guid=django_nodelog.guid) nl.was_connected_to.add( *django_nodelogs_was_connected_to[ django_nodelog.guid]) print 'Finished adding m2m values' django_nodelogs = [] django_nodelogs_was_connected_to = {} garbage = gc.collect() print 'Collected {} garbages!'.format(garbage) print '\a' print '\a' print '\a' print '\a' print '\a' print 'Finished migration. MODM: {}, DJANGO: {}'.format( total, NodeLog.objects.all().count())
def get_targets(): return NodeLog.find( Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))
def main(): total = MODMNodeLog.find().count() # total = len(modm_nodelogs) count = 0 page_size = 100000 django_nodelogs = [] django_nodelogs_ids = [] django_nodelogs_was_connected_to = {} print 'Migrating {} logs...'.format(total) while count < total: modm_nodelogs = None modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size] with transaction.atomic(): print 'Migrating {} through {} which is {}'.format( count, count + page_size, len(modm_nodelogs)) for modm_nodelog in modm_nodelogs: # don't recreate the log if it exists if NodeLog.objects.filter(guid=modm_nodelog._id).exists(): pass else: if modm_nodelog.user is not None: # try to get the pk out of the lookup table user_pk = modm_to_django.get(modm_nodelog.user._id, None) # it wasn't there if user_pk is None: # create a new user print 'Creating User {}'.format(modm_nodelog.user._id) user = get_or_create_user(modm_nodelog.user) user_pk = user.pk # put the user in the lookup table for next time modm_to_django[modm_nodelog.user._id] = user_pk else: # log doesn't have user user_pk = None # get the node (either a MODMNode instance or a node guid) node_id = modm_nodelog.params.get( 'node', modm_nodelog.params.get('project')) node_pk = None if node_id is not None: if isinstance(node_id, basestring): # it's a guid, look it up in the table node_pk = modm_to_django.get(node_id, None) elif isinstance(node_id, MODMNode): # it's an instance, look it up in the table node_pk = modm_to_django.get(node_id._id, None) if node_pk is None: print 'Creating Node {}'.format(node_id) # it wasn't in the table if isinstance(node_id, basestring): # it's a guid, get an instance and create a PG version modm_node = MODMNode.load(node_id) django_node = get_or_create_node(modm_node) if django_node is None: print 'Node {} does not exist.'.format( node_id) continue node_pk = get_or_create_node(modm_node).pk # put it in the table for later modm_to_django[modm_node._id] = node_pk elif isinstance(node_id, MODMNode): # it's an instance, create a PG version node_pk = get_or_create_node(node_id).pk # put it in the table for later modm_to_django[node_id._id] = node_pk if node_pk is not None: was_connected_to = [] for wct in modm_nodelog.was_connected_to: wct_pk = modm_to_django.get(wct._id, None) if wct_pk is None: wct_pk = get_or_create_node(wct).pk modm_to_django[wct._id] = wct_pk was_connected_to.append(wct_pk) if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) if modm_nodelog._id not in django_nodelogs_ids: django_nodelogs.append(NodeLog( guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) django_nodelogs_was_connected_to[ modm_nodelog._id] = was_connected_to django_nodelogs_ids.append(modm_nodelog._id) else: print 'NodeLog with id {} and data {} was already in the bulk_create'.format( modm_nodelog._id, modm_nodelog.to_storage()) else: print 'Node {} is None on NodeLog {}...'.format( node_id, modm_nodelog._id) count += 1 if count % (page_size / 50) == 0: print 'Through {}'.format(count) if count % page_size == 0: print 'Starting to migrate {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) if len(django_nodelogs) > 0: NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} which should be {}'.format( count - page_size, count, len(django_nodelogs)) print 'Adding m2m values' for django_nodelog in django_nodelogs: nl = NodeLog.objects.get(guid=django_nodelog.guid) nl.was_connected_to.add( *django_nodelogs_was_connected_to[ django_nodelog.guid]) print 'Finished adding m2m values' django_nodelogs = [] django_nodelogs_was_connected_to = {} garbage = gc.collect() print 'Collected {} garbages!'.format(garbage) print '\a' print '\a' print '\a' print '\a' print '\a' print 'Finished migration. MODM: {}, DJANGO: {}'.format( total, NodeLog.objects.all().count())
def main(): start = datetime.now() split = start total = MODMNodeLog.find().count() count = 0 page_size = 10000 blank_users = 0 blank_nodes = 0 while count < total: garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print 'Migrating {} through {}'.format(count, count + page_size) django_nodelogs = deque() nodelog_guids = deque() for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count + page_size]: if modm_nodelog._id in nodelog_guids: print 'Nodelog with guid of {} and data of {} exists in batch'.format( modm_nodelog._id, modm_nodelog.to_storage()) continue else: nodelog_guids.append(modm_nodelog._id) try: user_pk = modm_to_django[modm_nodelog.user._id] except (KeyError, AttributeError) as ex: blank_users += 1 user_pk = None try: node_pk = modm_to_django[getattr(modm_nodelog, 'node', None)._id] except (KeyError, AttributeError) as ex: blank_nodes += 1 print 'Found blank node on {}'.format(modm_nodelog._id) node_pk = None if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) count += 1 if count % 1000 == 0: print 'Through {} in {}'.format(count, (datetime.now() - split).total_seconds()) split = datetime.now() if count % page_size == 0: print '{} blank users; {} blank nodes'.format( blank_users, blank_nodes) print 'Starting to migrate {} through {} which is {}'.format( count - page_size, count, len(django_nodelogs)) splat = datetime.now() if len(django_nodelogs) > 0: with transaction.atomic(): NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} in {} which is {}'.format( count - page_size, count, (datetime.now() - splat).total_seconds(), len(django_nodelogs)) django_nodelogs = deque() nodelog_guids = deque() garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print '\a\a\a\a\a' print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format( (datetime.now() - start).total_seconds(), total, NodeLog.objects.count()) print 'There were {} blank users and {} blank nodes'.format( blank_users, blank_nodes)
def get_registration_approved_logs(): # These logs do not have params['registration'] field logs = NodeLog.find(Q('action', 'eq', 'registration_approved') & Q('params.registration', 'eq', None)) return logs
def main(): start = datetime.now() split = start total = MODMNodeLog.find().count() count = 0 page_size = 10000 blank_users = 0 blank_nodes = 0 while count < total: garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print 'Migrating {} through {}'.format(count, count + page_size) django_nodelogs = deque() nodelog_guids = deque() for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count + page_size]: if modm_nodelog._id in nodelog_guids: print 'Nodelog with guid of {} and data of {} exists in batch'.format( modm_nodelog._id, modm_nodelog.to_storage()) continue else: nodelog_guids.append(modm_nodelog._id) try: user_pk = modm_to_django[modm_nodelog.user._id] except (KeyError, AttributeError) as ex: blank_users += 1 user_pk = None try: node_pk = modm_to_django[getattr(modm_nodelog, 'node', None)._id] except (KeyError, AttributeError) as ex: blank_nodes += 1 print 'Found blank node on {}'.format(modm_nodelog._id) node_pk = None if modm_nodelog.date is None: nodelog_date = None else: nodelog_date = pytz.utc.localize(modm_nodelog.date) django_nodelogs.append( NodeLog(guid=modm_nodelog._id, date=nodelog_date, action=modm_nodelog.action, params=modm_nodelog.params, should_hide=modm_nodelog.should_hide, user_id=user_pk, foreign_user=modm_nodelog.foreign_user or '', node_id=node_pk)) count += 1 if count % 1000 == 0: print 'Through {} in {}'.format(count, ( datetime.now() - split).total_seconds()) split = datetime.now() if count % page_size == 0: print '{} blank users; {} blank nodes'.format(blank_users, blank_nodes) print 'Starting to migrate {} through {} which is {}'.format( count - page_size, count, len(django_nodelogs)) splat = datetime.now() if len(django_nodelogs) > 0: with transaction.atomic(): NodeLog.objects.bulk_create(django_nodelogs) print 'Finished migrating {} through {} in {} which is {}'.format( count - page_size, count, (datetime.now() - splat).total_seconds(), len(django_nodelogs)) django_nodelogs = deque() nodelog_guids = deque() garbage = gc.collect() print 'Collected {} whole garbages!'.format(garbage) print '\a\a\a\a\a' print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format( (datetime.now() - start).total_seconds(), total, NodeLog.objects.count()) print 'There were {} blank users and {} blank nodes'.format(blank_users, blank_nodes)
def get_targets(): return NodeLog.find(Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))