示例#1
0
def migrate(dry_run=True):
    added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED))
    for log in added_logs:
        logger.info(
            'Log with id <{}> being updated for affiliation added'.format(
                log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED
        log.save()

    removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED))
    for log in removed_logs:
        logger.info(
            'Log with id <{}> being updated for affiliation removed'.format(
                log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED
        log.save()

    nodes = Node.find(Q('primary_institution', 'ne', None))
    for node in nodes:
        logger.info('Node with id <{}> and title <{}> being updated'.format(
            node._id, node.title))
        inst = node.primary_institution
        if inst not in node.affiliated_institutions:
            node.affiliated_institutions.append(inst)
        node.primary_institution = None
        node.save()
    if dry_run:
        raise RuntimeError('Dry run, transaction rolled back.')
示例#2
0
文件: views.py 项目: scooley/osf.io
 def delete(self, request, *args, **kwargs):
     try:
         node, user = self.get_object()
         if node.remove_contributor(user, None, log=False):
             update_admin_log(
                 user_id=self.request.user.id,
                 object_id=node.pk,
                 object_repr='Contributor',
                 message='User {} removed from node {}.'.format(
                     user.pk, node.pk),
                 action_flag=CONTRIBUTOR_REMOVED)
             # Log invisibly on the OSF.
             osf_log = NodeLog(
                 action=NodeLog.CONTRIB_REMOVED,
                 user=None,
                 params={
                     'project': node.parent_id,
                     'node': node.pk,
                     'contributors': user.pk
                 },
                 date=datetime.utcnow(),
                 should_hide=True,
             )
             osf_log.save()
     except AttributeError:
         return page_not_found(
             request,
             AttributeError('{} with id "{}" not found.'.format(
                 self.context_object_name.title(), kwargs.get('node_id'))))
     return redirect(reverse_node(self.kwargs.get('node_id')))
示例#3
0
文件: views.py 项目: kch8qx/osf.io
 def delete(self, request, *args, **kwargs):
     try:
         node, user = self.get_object()
         if node.remove_contributor(user, None, log=False):
             update_admin_log(
                 user_id=self.request.user.id,
                 object_id=node.pk,
                 object_repr="Contributor",
                 message="User {} removed from node {}.".format(user.pk, node.pk),
                 action_flag=CONTRIBUTOR_REMOVED,
             )
             # Log invisibly on the OSF.
             osf_log = NodeLog(
                 action=NodeLog.CONTRIB_REMOVED,
                 user=None,
                 params={"project": node.parent_id, "node": node.pk, "contributors": user.pk},
                 date=datetime.utcnow(),
                 should_hide=True,
             )
             osf_log.save()
     except AttributeError:
         return page_not_found(
             request,
             AttributeError(
                 '{} with id "{}" not found.'.format(self.context_object_name.title(), kwargs.get("node_id"))
             ),
         )
     return redirect(reverse_node(self.kwargs.get("node_id")))
示例#4
0
 def delete(self, request, *args, **kwargs):
     try:
         node = self.get_object()
         flag = None
         osf_flag = None
         message = None
         if node.is_deleted:
             node.is_deleted = False
             node.deleted_date = None
             flag = NODE_RESTORED
             message = 'Node {} restored.'.format(node.pk)
             osf_flag = NodeLog.NODE_CREATED
         elif not node.is_registration:
             node.is_deleted = True
             node.deleted_date = datetime.utcnow()
             flag = NODE_REMOVED
             message = 'Node {} removed.'.format(node.pk)
             osf_flag = NodeLog.NODE_REMOVED
         node.save()
         if flag is not None:
             update_admin_log(
                 user_id=self.request.user.id,
                 object_id=node.pk,
                 object_repr='Node',
                 message=message,
                 action_flag=flag
             )
         if osf_flag is not None:
             # Log invisibly on the OSF.
             osf_log = NodeLog(
                 action=osf_flag,
                 user=None,
                 params={
                     'project': node.parent_id,
                 },
                 date=datetime.utcnow(),
                 should_hide=True,
             )
             osf_log.save()
     except AttributeError:
         return page_not_found(
             request,
             AttributeError(
                 '{} with id "{}" not found.'.format(
                     self.context_object_name.title(),
                     kwargs.get('guid')
                 )
             )
         )
     return redirect(reverse_node(self.kwargs.get('guid')))
示例#5
0
 def delete(self, request, *args, **kwargs):
     try:
         node = self.get_object()
         flag = None
         osf_flag = None
         message = None
         if node.is_deleted:
             node.is_deleted = False
             node.deleted_date = None
             flag = NODE_RESTORED
             message = 'Node {} restored.'.format(node.pk)
             osf_flag = NodeLog.NODE_CREATED
         elif not node.is_registration:
             node.is_deleted = True
             node.deleted_date = timezone.now()
             flag = NODE_REMOVED
             message = 'Node {} removed.'.format(node.pk)
             osf_flag = NodeLog.NODE_REMOVED
         node.save()
         if flag is not None:
             update_admin_log(
                 user_id=self.request.user.id,
                 object_id=node.pk,
                 object_repr='Node',
                 message=message,
                 action_flag=flag
             )
         if osf_flag is not None:
             # Log invisibly on the OSF.
             osf_log = NodeLog(
                 action=osf_flag,
                 user=None,
                 params={
                     'project': node.parent_id,
                 },
                 date=timezone.now(),
                 should_hide=True,
             )
             osf_log.save()
     except AttributeError:
         return page_not_found(
             request,
             AttributeError(
                 '{} with id "{}" not found.'.format(
                     self.context_object_name.title(),
                     kwargs.get('guid')
                 )
             )
         )
     return redirect(reverse_node(self.kwargs.get('guid')))
def do_migration(records, dry=False):
    for node in records:
        logs = list(NodeLog.find(Q('was_connected_to', 'contains', node)))
        existing_logs = node.logs
        for log in logs:
            if not log.node__logged:
                continue
            log_node = log.node__logged[0]
            # if the log_node is not contained in the node parent list then it doesn't belong to this node
            if log_node not in get_all_parents(node):
                logger.info(
                    'Excluding log {} from list because it is not associated with node {}'
                    .format(log, node))
                logs.remove(log)

        with TokuTransaction():
            node.logs = logs + existing_logs
            node.system_tags.append(SYSTEM_TAG)
            node_type = 'registration' if node.is_registration else 'fork'
            logger.info('Adding {} logs to {} {}'.format(
                len(logs), node_type, node))
            if not dry:
                try:
                    node.save()
                except Exception as err:
                    logger.error(
                        'Could not update logs for node {} due to error'.
                        format(node._id))
                    logger.exception(err)
                    logger.error('Skipping...')
示例#7
0
def migrate(dry_run=True):
    node_logs = list(
        NodeLog.find(
            Q("action", "in", [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED])
            & Q("params.preprint", "exists", False)
        )
    )

    logger.info("Preparing to migrate {} NodeLogs".format(len(node_logs)))

    count = 0

    for log in node_logs:
        preprint = None
        node_id = log.params.get("node")

        try:
            preprint = PreprintService.find_one(Q("node", "eq", node_id))
        except NoResultsFound:
            logger.error("Skipping {}, preprint not found for node: {}".format(log._id, node_id))
            continue

        logger.info("Migrating log - {} - to add params.preprint: {}, ".format(log._id, preprint._id))

        log.params["preprint"] = preprint._id
        log.save()
        count += 1

    logger.info("Migrated {} logs".format(count))
示例#8
0
def migrate(dry_run=True):
    node_logs = list(NodeLog.find(
        Q('action', 'in', [NodeLog.PREPRINT_FILE_UPDATED, NodeLog.PREPRINT_INITIATED]) &
        Q('params.preprint', 'exists', False)
    ))

    logger.info('Preparing to migrate {} NodeLogs'.format(len(node_logs)))

    count = 0

    for log in node_logs:
        preprint = None
        node_id = log.params.get('node')

        try:
            preprint = PreprintService.find_one(Q('node', 'eq', node_id))
        except NoResultsFound:
            logger.error('Skipping {}, preprint not found for node: {}'.format(log._id, node_id))
            continue

        logger.info(
            'Migrating log - {} - to add params.preprint: {}, '.format(log._id, preprint._id)
        )

        log.params['preprint'] = preprint._id
        log.save()
        count += 1

    logger.info('Migrated {} logs'.format(count))
def do_migration(records, dry=False):
    for node in records:
        logs = list(NodeLog.find(Q('was_connected_to', 'contains', node)))
        existing_logs = node.logs
        for log in logs:
            if not log.node__logged:
                continue
            log_node = log.node__logged[0]
            # if the log_node is not contained in the node parent list then it doesn't belong to this node
            if log_node not in get_all_parents(node):
                logger.info('Excluding log {} from list because it is not associated with node {}'.format(log, node))
                logs.remove(log)

        with TokuTransaction():
            node.logs = logs + existing_logs
            node.system_tags.append(SYSTEM_TAG)
            node_type = 'registration' if node.is_registration else 'fork'
            logger.info('Adding {} logs to {} {}'.format(len(logs), node_type, node))
            if not dry:
                try:
                    node.save()
                except Exception as err:
                    logger.error('Could not update logs for node {} due to error'.format(node._id))
                    logger.exception(err)
                    logger.error('Skipping...')
示例#10
0
def find_invalid_logs():
    for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)):
        # Derive UTC datetime object from ObjectId
        id_date = ObjectId(log._id).generation_time
        id_date = id_date.replace(tzinfo=None) - id_date.utcoffset()

        if id_date > log.date:
            yield log
示例#11
0
def count_user_logs(user):
    logs = NodeLog.find(Q('user', 'eq', user._id))
    length = logs.count()
    if length == LOG_THRESHOLD:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_bookmark_collection:
            length -= 1
    return length
示例#12
0
def user_last_log(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)

    node_logs = NodeLog.find(query)
    return node_logs[node_logs.count() - 1].date
示例#13
0
def count_user_logs(user):
    logs = NodeLog.find(Q('user', 'eq', user._id))
    length = logs.count()
    if length == LOG_THRESHOLD:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_bookmark_collection:
            length -= 1
    return length
示例#14
0
文件: views.py 项目: scooley/osf.io
    def get_log(self):
        log = NodeLog.load(self.kwargs.get('log_id'))
        if not log:
            raise NotFound(
                detail='No log matching that log_id could be found.')

        self.check_object_permissions(self.request, log)
        return log
示例#15
0
def find_invalid_logs():
    for log in NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED)):
        # Derive UTC datetime object from ObjectId
        id_date = ObjectId(log._id).generation_time
        id_date = id_date.replace(tzinfo=None) - id_date.utcoffset()

        if id_date > log.date:
            yield log
def user_last_log(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)

    node_logs = NodeLog.find(query)
    return node_logs[node_logs.count()-1].date
示例#17
0
文件: views.py 项目: 545zhou/osf.io
    def get_log(self):
        log = NodeLog.load(self.kwargs.get('log_id'))
        if not log:
            raise NotFound(
                detail='No log matching that log_id could be found.'
            )

        self.check_object_permissions(self.request, log)
        return log
示例#18
0
 def get_queryset(self):
     log = NodeLog.load(self.kwargs.get('log_id'))
     if not log:
         raise NotFound(
             detail='No log matching that log_id could be found.')
     else:
         auth_user = get_user_auth(self.request)
         return [
             node for node in log.node__logged if node.can_view(auth_user)
         ]
示例#19
0
def get_registered_from(registration):
    """
    Gets node registration was registered from.  Handles deleted registrations where registered_from is null.

    """
    if registration.registered_from:
        return registration.registered_from_id
    else:
        first_log_id = db['node'].find_one({'_id': registration._id})['logs'][0]
        log = NodeLog.load(first_log_id)
        return log.params.get('node') or log.params.get('project')
示例#20
0
def get_registered_from(registration):
    """
    Gets node registration was registered from.  Handles deleted registrations where registered_from is null.

    """
    if registration.registered_from:
        return registration.registered_from_id
    else:
        first_log_id = db['node'].find_one({'_id':
                                            registration._id})['logs'][0]
        log = NodeLog.load(first_log_id)
        return log.params.get('node') or log.params.get('project')
示例#21
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    logs = NodeLog.find(query)
    length = logs.count()
    if length > 0:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_dashboard:
            length -= 1
    return length
示例#22
0
文件: views.py 项目: mauromsl/osf.io
 def get_queryset(self):
     log = NodeLog.load(self.kwargs.get('log_id'))
     if not log:
         raise NotFound(
             detail='No log matching that log_id could be found.'
         )
     else:
         auth_user = get_user_auth(self.request)
         return [
             node for node in log.node__logged
             if node.can_view(auth_user)
         ]
示例#23
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    logs = NodeLog.find(query)
    length = logs.count()
    if length > 0:
        item = logs[0]
        if item.action == 'project_created' and item.node.is_dashboard:
            length -= 1
    return length
示例#24
0
def main(dry):
    if dry:
        logging.warn('DRY mode running')
    now = datetime.utcnow()
    initiated_logs = NodeLog.find(
        Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now))
    for log in initiated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                },
                'service': {
                    'name': preprint.provider.name
                }
            })
            logging.info(
                'Updating log {} from node {}, with preprint id: {}'.format(
                    log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass

    updated_logs = NodeLog.find(
        Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED)
        & Q('date', 'lt', now))
    for log in updated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({'preprint': {'id': preprint._id}})
            logging.info(
                'Updating log {} from node {}, with preprint id: {}'.format(
                    log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass
def migrate(dry_run=True):
    added_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_CHANGED))
    for log in added_logs:
        logger.info('Log with id <{}> being updated for affiliation added'.format(log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_ADDED
        log.save()

    removed_logs = NodeLog.find(Q('action', 'eq', PRIMARY_INSTITUTION_REMOVED))
    for log in removed_logs:
        logger.info('Log with id <{}> being updated for affiliation removed'.format(log._id))
        log.action = NodeLog.AFFILIATED_INSTITUTION_REMOVED
        log.save()

    nodes = Node.find(Q('primary_institution', 'ne', None))
    for node in nodes:
        logger.info('Node with id <{}> and title <{}> being updated'.format(node._id, node.title))
        inst = node.primary_institution
        if inst not in node.affiliated_institutions:
            node.affiliated_institutions.append(inst)
        node.primary_institution = None
        node.save()
    if dry_run:
        raise RuntimeError('Dry run, transaction rolled back.')
示例#26
0
def get_targets():
    """
    These logs are potentially missing params['registration'] fields.  Params['node'] and original_node fields may incorrectly
    be pointing to the registration instead of the node.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_cancelled') |
        Q('action', 'eq', 'retraction_approved') |
        Q('action', 'eq', 'retraction_cancelled') |
        Q('action', 'eq', 'embargo_approved') |
        Q('action', 'eq', 'embargo_cancelled') |
        Q('action', 'eq', 'embargo_terminated')
    )
    return logs
示例#27
0
def get_or_create_node(node_id, sqlite_db):
    """Gets an OSF node from the sqlite cache.  If not found, pulls the node info from mongo and
    saves it.

    :param node_id: OSF node id (e.g. 'mst3k')
    :param sqlite_db: SQLite3 database handle
    :return: node dict
    """

    if node_id is None:
        return None

    cursor = sqlite_db.cursor()
    query = "SELECT * FROM nodes WHERE id='{}'".format(node_id)
    cursor.execute(query)

    nodes = cursor.fetchall()

    if len(nodes) > 1:
        raise Exception("Multiple nodes found for single node ID")

    if nodes:
        return nodes[0]

    node = Node.load(node_id)
    if node is None:
        return None

    node_public_date = None
    privacy_actions = NodeLog.find(
        Q('node', 'eq', node_id)
        & Q('action', 'in', [NodeLog.MADE_PUBLIC, NodeLog.MADE_PRIVATE])
    ).sort('-date')

    try:
        privacy_action = privacy_actions[0]
    except IndexError as e:
        pass
    else:
        if privacy_action.action == NodeLog.MADE_PUBLIC:
            node_public_date = privacy_action.date.isoformat()
            node_public_date = node_public_date[:-3] + 'Z'

    cursor.execute(
        u'INSERT INTO nodes (id, title, category, made_public_date) VALUES (?, ?, ?, ?)',
        (node_id, getattr(node, 'title'), getattr(node, 'category'), node_public_date)
    )
    sqlite_db.commit()
    return get_or_create_node(node_id, sqlite_db)
示例#28
0
def main(dry):
    if dry:
        logging.warn('DRY mode running')
    now = datetime.utcnow()
    initiated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_INITIATED) & Q('date', 'lt', now))
    for log in initiated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                },
                'service': {
                    'name': preprint.provider.name
                }
            })
            logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass

    updated_logs = NodeLog.find(Q('action', 'eq', NodeLog.PREPRINT_FILE_UPDATED) & Q('date', 'lt', now))
    for log in updated_logs:
        try:
            preprint = PreprintService.find_one(Q('node', 'eq', log.node))
            log.params.update({
                'preprint': {
                    'id': preprint._id
                }
            })
            logging.info('Updating log {} from node {}, with preprint id: {}'.format(log._id, log.node.title, preprint._id))
            if not dry:
                log.save()
        except NoResultsFound:
            pass
示例#29
0
def main():
    total = NodeLog.objects.all().count()
    count = 0
    page_size = 50000
    with transaction.atomic():
        qs = NodeLog.objects.all().order_by('-date').select_related(
            'user').select_related('node').select_related(
                'user___guid').select_related('node___guid')
        with server_side_cursors(qs, itersize=page_size):
            for log in qs.iterator():
                modm_nodelog = MODMNodeLog.load(log.guid)
                if modm_nodelog is not None:
                    modm_node = modm_nodelog.node
                    modm_user = modm_nodelog.user
                    if log.user is not None and log.user._guid.guid != modm_user._id:
                        print 'User doesn\'t match on log {}; {} != {}'.format(
                            log.guid, modm_user._id, log.user._guid.guid)
                    if log.node is not None and log.node._guid.guid != modm_nodelog.node._id:
                        print 'Node doesn\'t match on log {}; {} != {}'.format(
                            log.guid, modm_nodelog.node._id,
                            log.node._guid.guid)
                    if log.date is not None and pytz.utc.localize(
                            modm_nodelog.date) != log.date:
                        print 'Date doesn\'t match on log {}'.format(log.guid)
                    if log.action is not None and log.action != modm_nodelog.action:
                        print 'Action doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.action, log.action)
                    if log.params is not None and log.params != modm_nodelog.params:
                        print 'Params doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.params, log.params)
                    if log.should_hide is not None and log.should_hide != modm_nodelog.should_hide:
                        print 'Should_hide does\'nt match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.should_hide,
                            log.should_hide)
                    if log.foreign_user is not None and log.foreign_user != '' and log.foreign_user != modm_nodelog.foreign_user:
                        print 'Foreign_user doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.foreign_user,
                            log.foreign_user)
                else:
                    print 'MODMNodeLog with id {} not found.'.format(log.guid)

                count += 1
                if count % page_size == 0:
                    MODMNodeLog._cache.clear()
                    MODMNodeLog._object_cache.clear()
                    print '{} through {}'.format(count, count + page_size)
示例#30
0
def get_targets():
    """
    Fetches all registration-related logs except for project_registered.

    project_registered log is not included because params already correct.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_initiated') |
        Q('action', 'eq', 'registration_approved') |
        Q('action', 'eq', 'registration_cancelled') |  # On staging, there are a few inconsistencies with these.  Majority of params['node'] are registrations, but a handful are nodes.
        Q('action', 'eq', 'retraction_initiated') |
        Q('action', 'eq', 'retraction_approved') |  # params['node'] is already equal to node.  Adds registration_field below.  Will be slow.
        Q('action', 'eq', 'retraction_cancelled') |
        Q('action', 'eq', 'embargo_initiated') |
        Q('action', 'eq', 'embargo_approved') |
        Q('action', 'eq', 'embargo_completed') |
        Q('action', 'eq', 'embargo_cancelled')
    )
    return logs
示例#31
0
def get_targets():
    """
    Fetches all registration-related logs except for project_registered.

    project_registered log is not included because params already correct.
    """
    logs = NodeLog.find(
        Q('action', 'eq', 'registration_initiated')
        | Q('action', 'eq', 'registration_approved')
        | Q('action', 'eq', 'registration_cancelled')
        |  # On staging, there are a few inconsistencies with these.  Majority of params['node'] are registrations, but a handful are nodes.
        Q('action', 'eq', 'retraction_initiated')
        | Q('action', 'eq', 'retraction_approved')
        |  # params['node'] is already equal to node.  Adds registration_field below.  Will be slow.
        Q('action', 'eq', 'retraction_cancelled')
        | Q('action', 'eq', 'embargo_initiated')
        | Q('action', 'eq', 'embargo_approved')
        | Q('action', 'eq', 'embargo_completed')
        | Q('action', 'eq', 'embargo_cancelled'))
    return logs
示例#32
0
def main():
    total = NodeLog.objects.all().count()
    count = 0
    page_size = 50000
    with transaction.atomic():
        qs = NodeLog.objects.all().order_by('-date').select_related('user').select_related('node').select_related('user___guid').select_related('node___guid')
        with server_side_cursors(qs, itersize=page_size):
            for log in qs.iterator():
                modm_nodelog = MODMNodeLog.load(log.guid)
                if modm_nodelog is not None:
                    modm_node = modm_nodelog.node
                    modm_user = modm_nodelog.user
                    if log.user is not None and log.user._guid.guid != modm_user._id:
                        print 'User doesn\'t match on log {}; {} != {}'.format(
                            log.guid, modm_user._id, log.user._guid.guid)
                    if log.node is not None and log.node._guid.guid != modm_nodelog.node._id:
                        print 'Node doesn\'t match on log {}; {} != {}'.format(
                            log.guid, modm_nodelog.node._id, log.node._guid.guid)
                    if log.date is not None and pytz.utc.localize(
                            modm_nodelog.date) != log.date:
                        print 'Date doesn\'t match on log {}'.format(log.guid)
                    if log.action is not None and log.action != modm_nodelog.action:
                        print 'Action doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.action, log.action)
                    if log.params is not None and log.params != modm_nodelog.params:
                        print 'Params doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.params, log.params)
                    if log.should_hide is not None and log.should_hide != modm_nodelog.should_hide:
                        print 'Should_hide does\'nt match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.should_hide, log.should_hide)
                    if log.foreign_user is not None and log.foreign_user != '' and log.foreign_user != modm_nodelog.foreign_user:
                        print 'Foreign_user doesn\'t match on log {}; `{}` != `{}`'.format(
                            log.guid, modm_nodelog.foreign_user, log.foreign_user)
                else:
                    print 'MODMNodeLog with id {} not found.'.format(log.guid)

                count += 1
                if count % page_size == 0:
                    MODMNodeLog._cache.clear()
                    MODMNodeLog._object_cache.clear()
                    print '{} through {}'.format(count, count + page_size)
示例#33
0
def get_targets():
    # ... return the list of logs whose registrations we want to migrate ...
    targets = NodeLog.find(Q('action', 'eq', 'retraction_approved'))

    logger.info('Retractions found: {}'.format(len(targets)))
    return targets
示例#34
0
def get_targets():
    return NodeLog.find(Q('should_hide', 'eq', True))
示例#35
0
def get_targets():
    return NodeLog.find(Q('should_hide', 'eq', True))
示例#36
0
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
示例#37
0
 def tearDown(self):
     NodeLog.remove()
     Node.remove()
示例#38
0
def get_aggregate_logs(ids, user, count=100):
    query = Q('params.node', 'in', ids)
    return list(NodeLog.find(query).sort('date').limit(int(count)))
示例#39
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    return NodeLog.find(query).count()
示例#40
0
 def tearDown(self):
     super(TestNodeLogList, self).tearDown()
     NodeLog.remove()
示例#41
0
def logs_since(user, date):
    return NodeLog.find(
        Q('user', 'eq', user._id) &
        Q('date', 'gt', date)
    )
示例#42
0
def get_targets():
    # ... return the list of logs whose registrations we want to migrate ...
    targets = NodeLog.find(Q('action', 'eq', 'retraction_approved'))

    logger.info('Retractions found: {}'.format(len(targets)))
    return targets
示例#43
0
def main():
    total = MODMNodeLog.find().count()
    # total = len(modm_nodelogs)
    count = 0
    page_size = 100000
    django_nodelogs = []
    django_nodelogs_ids = []
    django_nodelogs_was_connected_to = {}

    print 'Migrating {} logs...'.format(total)
    while count < total:
        modm_nodelogs = None
        modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count +
                                                         page_size]
        with transaction.atomic():
            print 'Migrating {} through {} which is {}'.format(
                count, count + page_size, len(modm_nodelogs))
            for modm_nodelog in modm_nodelogs:

                # don't recreate the log if it exists
                if NodeLog.objects.filter(guid=modm_nodelog._id).exists():
                    pass
                else:
                    if modm_nodelog.user is not None:
                        # try to get the pk out of the lookup table
                        user_pk = modm_to_django.get(modm_nodelog.user._id,
                                                     None)

                        # it wasn't there
                        if user_pk is None:
                            # create a new user
                            print 'Creating User {}'.format(
                                modm_nodelog.user._id)
                            user = get_or_create_user(modm_nodelog.user)
                            user_pk = user.pk
                            # put the user in the lookup table for next time
                            modm_to_django[modm_nodelog.user._id] = user_pk
                    else:
                        # log doesn't have user
                        user_pk = None

                    # get the node (either a MODMNode instance or a node guid)
                    node_id = modm_nodelog.params.get(
                        'node', modm_nodelog.params.get('project'))
                    node_pk = None
                    if node_id is not None:
                        if isinstance(node_id, basestring):
                            # it's a guid, look it up in the table
                            node_pk = modm_to_django.get(node_id, None)
                        elif isinstance(node_id, MODMNode):
                            # it's an instance, look it up in the table
                            node_pk = modm_to_django.get(node_id._id, None)

                        if node_pk is None:
                            print 'Creating Node {}'.format(node_id)
                            # it wasn't in the table
                            if isinstance(node_id, basestring):
                                # it's a guid, get an instance and create a PG version
                                modm_node = MODMNode.load(node_id)
                                django_node = get_or_create_node(modm_node)
                                if django_node is None:
                                    print 'Node {} does not exist.'.format(
                                        node_id)
                                    continue
                                node_pk = get_or_create_node(modm_node).pk
                                # put it in the table for later
                                modm_to_django[modm_node._id] = node_pk
                            elif isinstance(node_id, MODMNode):
                                # it's an instance, create a PG version
                                node_pk = get_or_create_node(node_id).pk
                                # put it in the table for later
                                modm_to_django[node_id._id] = node_pk
                    if node_pk is not None:
                        was_connected_to = []
                        for wct in modm_nodelog.was_connected_to:
                            wct_pk = modm_to_django.get(wct._id, None)
                            if wct_pk is None:
                                wct_pk = get_or_create_node(wct).pk
                                modm_to_django[wct._id] = wct_pk
                            was_connected_to.append(wct_pk)
                        if modm_nodelog.date is None:
                            nodelog_date = None
                        else:
                            nodelog_date = pytz.utc.localize(modm_nodelog.date)
                        if modm_nodelog._id not in django_nodelogs_ids:
                            django_nodelogs.append(
                                NodeLog(guid=modm_nodelog._id,
                                        date=nodelog_date,
                                        action=modm_nodelog.action,
                                        params=modm_nodelog.params,
                                        should_hide=modm_nodelog.should_hide,
                                        user_id=user_pk,
                                        foreign_user=modm_nodelog.foreign_user
                                        or '',
                                        node_id=node_pk))
                            django_nodelogs_was_connected_to[
                                modm_nodelog._id] = was_connected_to
                            django_nodelogs_ids.append(modm_nodelog._id)
                        else:
                            print 'NodeLog with id {} and data {} was already in the bulk_create'.format(
                                modm_nodelog._id, modm_nodelog.to_storage())

                    else:
                        print 'Node {} is None on NodeLog {}...'.format(
                            node_id, modm_nodelog._id)
                count += 1
                if count % (page_size / 50) == 0:
                    print 'Through {}'.format(count)
                if count % page_size == 0:
                    print 'Starting to migrate {} through {} which should be {}'.format(
                        count - page_size, count, len(django_nodelogs))
                    if len(django_nodelogs) > 0:
                        NodeLog.objects.bulk_create(django_nodelogs)

                        print 'Finished migrating {} through {} which should be {}'.format(
                            count - page_size, count, len(django_nodelogs))
                        print 'Adding m2m values'
                        for django_nodelog in django_nodelogs:
                            nl = NodeLog.objects.get(guid=django_nodelog.guid)
                            nl.was_connected_to.add(
                                *django_nodelogs_was_connected_to[
                                    django_nodelog.guid])
                        print 'Finished adding m2m values'

                    django_nodelogs = []
                    django_nodelogs_was_connected_to = {}
                    garbage = gc.collect()
                    print 'Collected {} garbages!'.format(garbage)

    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print 'Finished migration. MODM: {}, DJANGO: {}'.format(
        total,
        NodeLog.objects.all().count())
示例#44
0
def get_targets():
    return NodeLog.find(
        Q('action', 'eq', NodeLog.EMBARGO_APPROVED)
        & Q('params.user', 'eq', None))
示例#45
0
def main():
    total = MODMNodeLog.find().count()
    # total = len(modm_nodelogs)
    count = 0
    page_size = 100000
    django_nodelogs = []
    django_nodelogs_ids = []
    django_nodelogs_was_connected_to = {}

    print 'Migrating {} logs...'.format(total)
    while count < total:
        modm_nodelogs = None
        modm_nodelogs = MODMNodeLog.find().sort('-date')[count:count + page_size]
        with transaction.atomic():
            print 'Migrating {} through {} which is {}'.format(
                count, count + page_size, len(modm_nodelogs))
            for modm_nodelog in modm_nodelogs:

                # don't recreate the log if it exists
                if NodeLog.objects.filter(guid=modm_nodelog._id).exists():
                    pass
                else:
                    if modm_nodelog.user is not None:
                        # try to get the pk out of the lookup table
                        user_pk = modm_to_django.get(modm_nodelog.user._id,
                                                         None)

                        # it wasn't there
                        if user_pk is None:
                            # create a new user
                            print 'Creating User {}'.format(modm_nodelog.user._id)
                            user = get_or_create_user(modm_nodelog.user)
                            user_pk = user.pk
                            # put the user in the lookup table for next time
                            modm_to_django[modm_nodelog.user._id] = user_pk
                    else:
                        # log doesn't have user
                        user_pk = None

                    # get the node (either a MODMNode instance or a node guid)
                    node_id = modm_nodelog.params.get(
                        'node', modm_nodelog.params.get('project'))
                    node_pk = None
                    if node_id is not None:
                        if isinstance(node_id, basestring):
                            # it's a guid, look it up in the table
                            node_pk = modm_to_django.get(node_id, None)
                        elif isinstance(node_id, MODMNode):
                            # it's an instance, look it up in the table
                            node_pk = modm_to_django.get(node_id._id, None)

                        if node_pk is None:
                            print 'Creating Node {}'.format(node_id)
                            # it wasn't in the table
                            if isinstance(node_id, basestring):
                                # it's a guid, get an instance and create a PG version
                                modm_node = MODMNode.load(node_id)
                                django_node = get_or_create_node(modm_node)
                                if django_node is None:
                                    print 'Node {} does not exist.'.format(
                                        node_id)
                                    continue
                                node_pk = get_or_create_node(modm_node).pk
                                # put it in the table for later
                                modm_to_django[modm_node._id] = node_pk
                            elif isinstance(node_id, MODMNode):
                                # it's an instance, create a PG version
                                node_pk = get_or_create_node(node_id).pk
                                # put it in the table for later
                                modm_to_django[node_id._id] = node_pk
                    if node_pk is not None:
                        was_connected_to = []
                        for wct in modm_nodelog.was_connected_to:
                            wct_pk = modm_to_django.get(wct._id, None)
                            if wct_pk is None:
                                wct_pk = get_or_create_node(wct).pk
                                modm_to_django[wct._id] = wct_pk
                            was_connected_to.append(wct_pk)
                        if modm_nodelog.date is None:
                            nodelog_date = None
                        else:
                            nodelog_date = pytz.utc.localize(modm_nodelog.date)
                        if modm_nodelog._id not in django_nodelogs_ids:
                            django_nodelogs.append(NodeLog(
                                guid=modm_nodelog._id,
                                date=nodelog_date,
                                action=modm_nodelog.action,
                                params=modm_nodelog.params,
                                should_hide=modm_nodelog.should_hide,
                                user_id=user_pk,
                                foreign_user=modm_nodelog.foreign_user or '',
                                node_id=node_pk))
                            django_nodelogs_was_connected_to[
                                modm_nodelog._id] = was_connected_to
                            django_nodelogs_ids.append(modm_nodelog._id)
                        else:
                            print 'NodeLog with id {} and data {} was already in the bulk_create'.format(
                                modm_nodelog._id, modm_nodelog.to_storage())

                    else:
                        print 'Node {} is None on NodeLog {}...'.format(
                            node_id, modm_nodelog._id)
                count += 1
                if count % (page_size / 50) == 0:
                    print 'Through {}'.format(count)
                if count % page_size == 0:
                    print 'Starting to migrate {} through {} which should be {}'.format(
                        count - page_size, count, len(django_nodelogs))
                    if len(django_nodelogs) > 0:
                        NodeLog.objects.bulk_create(django_nodelogs)

                        print 'Finished migrating {} through {} which should be {}'.format(
                            count - page_size, count, len(django_nodelogs))
                        print 'Adding m2m values'
                        for django_nodelog in django_nodelogs:
                            nl = NodeLog.objects.get(guid=django_nodelog.guid)
                            nl.was_connected_to.add(
                                *django_nodelogs_was_connected_to[
                                    django_nodelog.guid])
                        print 'Finished adding m2m values'

                    django_nodelogs = []
                    django_nodelogs_was_connected_to = {}
                    garbage = gc.collect()
                    print 'Collected {} garbages!'.format(garbage)

    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print '\a'
    print 'Finished migration. MODM: {}, DJANGO: {}'.format(
        total, NodeLog.objects.all().count())
示例#46
0
def main():
    start = datetime.now()
    split = start
    total = MODMNodeLog.find().count()

    count = 0
    page_size = 10000
    blank_users = 0
    blank_nodes = 0

    while count < total:
        garbage = gc.collect()
        print 'Collected {} whole garbages!'.format(garbage)
        print 'Migrating {} through {}'.format(count, count + page_size)

        django_nodelogs = deque()
        nodelog_guids = deque()

        for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count +
                                                             page_size]:
            if modm_nodelog._id in nodelog_guids:
                print 'Nodelog with guid of {} and data of {} exists in batch'.format(
                    modm_nodelog._id, modm_nodelog.to_storage())
                continue
            else:
                nodelog_guids.append(modm_nodelog._id)

            try:
                user_pk = modm_to_django[modm_nodelog.user._id]
            except (KeyError, AttributeError) as ex:
                blank_users += 1
                user_pk = None

            try:
                node_pk = modm_to_django[getattr(modm_nodelog, 'node',
                                                 None)._id]
            except (KeyError, AttributeError) as ex:
                blank_nodes += 1
                print 'Found blank node on {}'.format(modm_nodelog._id)
                node_pk = None

            if modm_nodelog.date is None:
                nodelog_date = None
            else:
                nodelog_date = pytz.utc.localize(modm_nodelog.date)
            django_nodelogs.append(
                NodeLog(guid=modm_nodelog._id,
                        date=nodelog_date,
                        action=modm_nodelog.action,
                        params=modm_nodelog.params,
                        should_hide=modm_nodelog.should_hide,
                        user_id=user_pk,
                        foreign_user=modm_nodelog.foreign_user or '',
                        node_id=node_pk))

            count += 1
            if count % 1000 == 0:
                print 'Through {} in {}'.format(count, (datetime.now() -
                                                        split).total_seconds())
                split = datetime.now()
            if count % page_size == 0:
                print '{} blank users; {} blank nodes'.format(
                    blank_users, blank_nodes)
                print 'Starting to migrate {} through {} which is {}'.format(
                    count - page_size, count, len(django_nodelogs))
                splat = datetime.now()

                if len(django_nodelogs) > 0:
                    with transaction.atomic():
                        NodeLog.objects.bulk_create(django_nodelogs)

                print 'Finished migrating {} through {} in {} which is {}'.format(
                    count - page_size, count,
                    (datetime.now() - splat).total_seconds(),
                    len(django_nodelogs))

                django_nodelogs = deque()
                nodelog_guids = deque()

                garbage = gc.collect()
                print 'Collected {} whole garbages!'.format(garbage)

    print '\a\a\a\a\a'
    print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format(
        (datetime.now() - start).total_seconds(), total,
        NodeLog.objects.count())
    print 'There were {} blank users and {} blank nodes'.format(
        blank_users, blank_nodes)
示例#47
0
 def tearDown(self):
     NodeLog.remove()
     Node.remove()
示例#48
0
def get_registration_approved_logs():
    # These logs do not have params['registration'] field
    logs = NodeLog.find(Q('action', 'eq', 'registration_approved') & Q('params.registration', 'eq', None))
    return logs
示例#49
0
 def tearDown(self):
     super(TestNodeLogList, self).tearDown()
     NodeLog.remove()
示例#50
0
def count_user_logs(user, query=None):
    if query:
        query &= Q('user', 'eq', user._id)
    else:
        query = Q('user', 'eq', user._id)
    return NodeLog.find(query).count()
示例#51
0
def main():
    start = datetime.now()
    split = start
    total = MODMNodeLog.find().count()

    count = 0
    page_size = 10000
    blank_users = 0
    blank_nodes = 0

    while count < total:
        garbage = gc.collect()
        print 'Collected {} whole garbages!'.format(garbage)
        print 'Migrating {} through {}'.format(count, count + page_size)

        django_nodelogs = deque()
        nodelog_guids = deque()

        for modm_nodelog in MODMNodeLog.find().sort('-date')[count:count +
                                                             page_size]:
            if modm_nodelog._id in nodelog_guids:
                print 'Nodelog with guid of {} and data of {} exists in batch'.format(
                    modm_nodelog._id, modm_nodelog.to_storage())
                continue
            else:
                nodelog_guids.append(modm_nodelog._id)

            try:
                user_pk = modm_to_django[modm_nodelog.user._id]
            except (KeyError, AttributeError) as ex:
                blank_users += 1
                user_pk = None

            try:
                node_pk = modm_to_django[getattr(modm_nodelog, 'node',
                                                 None)._id]
            except (KeyError, AttributeError) as ex:
                blank_nodes += 1
                print 'Found blank node on {}'.format(modm_nodelog._id)
                node_pk = None

            if modm_nodelog.date is None:
                nodelog_date = None
            else:
                nodelog_date = pytz.utc.localize(modm_nodelog.date)
            django_nodelogs.append(
                NodeLog(guid=modm_nodelog._id,
                        date=nodelog_date,
                        action=modm_nodelog.action,
                        params=modm_nodelog.params,
                        should_hide=modm_nodelog.should_hide,
                        user_id=user_pk,
                        foreign_user=modm_nodelog.foreign_user or '',
                        node_id=node_pk))

            count += 1
            if count % 1000 == 0:
                print 'Through {} in {}'.format(count, (
                    datetime.now() - split).total_seconds())
                split = datetime.now()
            if count % page_size == 0:
                print '{} blank users; {} blank nodes'.format(blank_users,
                                                              blank_nodes)
                print 'Starting to migrate {} through {} which is {}'.format(
                    count - page_size, count, len(django_nodelogs))
                splat = datetime.now()

                if len(django_nodelogs) > 0:
                    with transaction.atomic():
                        NodeLog.objects.bulk_create(django_nodelogs)

                print 'Finished migrating {} through {} in {} which is {}'.format(
                    count - page_size, count,
                    (datetime.now() - splat).total_seconds(),
                    len(django_nodelogs))

                django_nodelogs = deque()
                nodelog_guids = deque()

                garbage = gc.collect()
                print 'Collected {} whole garbages!'.format(garbage)

    print '\a\a\a\a\a'
    print 'Finished migration in {}. MODM: {}, DJANGO: {}'.format(
        (datetime.now() - start).total_seconds(), total,
        NodeLog.objects.count())
    print 'There were {} blank users and {} blank nodes'.format(blank_users,
                                                                blank_nodes)
示例#52
0
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.WIKI_DELETED))
def get_targets():
    return NodeLog.find(Q('action', 'eq', NodeLog.EMBARGO_APPROVED) & Q('params.user', 'eq', None))