예제 #1
0
 def test_set_targets(self):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     job.set_targets()
     assert_equal([t.name for t in job.target_addons],
                  ['osfstorage', 'dropbox'])
예제 #2
0
 def test_find_failed_registrations(self):
     failures = []
     legacy = []
     delta = datetime.timedelta(days=2)
     for i in range(5):
         reg = factories.RegistrationFactory()
         reg.archive_job._fields['datetime_initiated'].__set__(
             reg.archive_job, datetime.datetime.now() - delta, safe=True)
         reg.save()
         ArchiveJob.remove_one(reg.archive_job)
         legacy.append(reg._id)
     for i in range(5):
         reg = factories.RegistrationFactory()
         reg.archive_job._fields['datetime_initiated'].__set__(
             reg.archive_job, datetime.datetime.now() - delta, safe=True)
         reg.save()
         for addon in ['osfstorage', 'dropbox']:
             reg.archive_job._set_target(addon)
             reg.archive_job.update_target(addon, ARCHIVER_INITIATED)
         reg.archive_job.sent = False
         reg.archive_job.save()
         failures.append(reg._id)
     pending = []
     for i in range(5):
         reg = factories.RegistrationFactory()
         for addon in ['osfstorage', 'dropbox']:
             reg.archive_job._set_target(addon)
             reg.archive_job.update_target(addon, ARCHIVER_INITIATED)
         reg.archive_job.save()
         pending.append(reg)
     failed = scripts.find_failed_registrations()
     assert_items_equal([f._id for f in failed], failures)
     for pk in legacy:
         assert_false(pk in failed)
예제 #3
0
def before_archive(node, user):
    link_archive_provider(node, user)
    job = ArchiveJob(
        src_node=node.registered_from,
        dst_node=node,
        initiator=user
    )
    job.set_targets()
예제 #4
0
파일: utils.py 프로젝트: jinluyuan/osf.io
def before_archive(node, user):
    link_archive_provider(node, user)
    job = ArchiveJob(
        src_node=node.registered_from,
        dst_node=node,
        initiator=user
    )
    job.set_targets()
예제 #5
0
 def test_get_target(self):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     job.set_targets()
     dropbox = job.get_target('dropbox')
     assert_false(not dropbox)
     none = job.get_target('fake')
     assert_false(none)
예제 #6
0
    def test_target_info(self):
        target = ArchiveTarget(name='neon-archive')
        target.save()
        job = ArchiveJob()
        job.target_addons.append(target)

        result = job.target_info()
        assert_equal(len(result), 1)

        item = result[0]

        assert_equal(item['name'], target.name)
        assert_equal(item['status'], target.status)
        assert_equal(item['stat_result'], target.stat_result)
        assert_equal(item['errors'], target.errors)
예제 #7
0
    def test_target_info(self):
        target = ArchiveTarget(name='neon-archive')
        target.save()
        job = ArchiveJob()
        job.target_addons.append(target)

        result = job.target_info()
        assert_equal(len(result), 1)

        item = result[0]

        assert_equal(item['name'], target.name)
        assert_equal(item['status'], target.status)
        assert_equal(item['stat_result'], target.stat_result)
        assert_equal(item['errors'], target.errors)
예제 #8
0
파일: tasks.py 프로젝트: GloriaaLi/osf.io
def archive(job_pk):
    """Starts a celery.chord that runs stat_addon for each
    complete addon attached to the Node, then runs
    #archive_node with the result

    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger = get_task_logger(__name__)
    logger.info("Received archive task for Node: {0} into Node: {1}".format(src._id, dst._id))
    return celery.chain(
        [
            celery.group(
                stat_addon.si(
                    addon_short_name=target.name,
                    job_pk=job_pk,
                )
                for target in job.target_addons
            ),
            archive_node.s(
                job_pk=job_pk
            )
        ]
    )
예제 #9
0
파일: tasks.py 프로젝트: GloriaaLi/osf.io
def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    # Dataverse requires special handling for draft
    # and published content
    addon_name = addon_short_name
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving addon: {0} on node: {1}".format(addon_short_name, src._id))
    src_provider = src.get_addon(addon_name)
    folder_name = src_provider.archive_folder_name
    cookie = user.get_or_create_cookie()
    copy_url = settings.WATERBUTLER_URL + '/ops/copy'
    if addon_name == 'dataverse':
        # The dataverse API will not differentiate between published and draft files
        # unless expcicitly asked. We need to create seperate folders for published and
        # draft in the resulting archive.
        #
        # Additionally trying to run the archive without this distinction creates a race
        # condition that non-deterministically caused archive jobs to fail.
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (published)'.format(folder_name), cookie, revision='latest-published')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (draft)'.format(folder_name), cookie, revision='latest')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
    else:
        data = make_waterbutler_payload(src, dst, addon_name, folder_name, cookie)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
예제 #10
0
파일: tasks.py 프로젝트: GloriaaLi/osf.io
def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[utils.aggregate_file_tree_metadata(addon_short_name, file_tree, user)],
    )
    return result
예제 #11
0
def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    for schema in dst.registered_schema:
        if schema.has_files:
            utils.migrate_file_metadata(dst, schema)
    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(
            dst.get_active_contributors_recursive(unique_users=True))
예제 #12
0
파일: tasks.py 프로젝트: XTech2K/osf.io
def archive_node(results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving node: {0}".format(src._id))
    stat_result = AggregateStatResult(
        src._id,
        src.title,
        targets=results,
    )
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not results:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result.num_files:
                job.update_target(result.target_name, ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(
                    addon_short_name=result.target_name,
                    job_pk=job_pk,
                    stat_result=result,
                )
        project_signals.archive_callback.send(dst)
def find_failed_registrations():
    expired_if_before = timezone.now() - ARCHIVE_TIMEOUT_TIMEDELTA
    jobs = ArchiveJob.find(
        Q('sent', 'eq', False)
        & Q('datetime_initiated', 'lt', expired_if_before)
        & Q('status', 'eq', ARCHIVER_INITIATED))
    return {node.root for node in [job.dst_node for job in jobs] if node}
예제 #14
0
def find_failed_registrations():
    expired_if_before = datetime.utcnow() - ARCHIVE_TIMEOUT_TIMEDELTA
    jobs = ArchiveJob.find(
        Q('sent', 'eq', False) &
        Q('datetime_initiated', 'lt', expired_if_before)
    )
    return {node.root for node in [job.dst_node for job in jobs] if node}
예제 #15
0
파일: tasks.py 프로젝트: MattVitting/osf.io
def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    for schema in dst.registered_schema:
        if schema.has_files:
            utils.migrate_file_metadata(dst, schema)
    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True))
예제 #16
0
 def test_archiving_registrations_not_added_to_search_before_archival(
         self, mock_update_search):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     reg.save()
     mock_update_search.assert_not_called()
예제 #17
0
 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = [
             each for each in
             dst.archive_job.target_info()
             if each is not None
         ]
     elif isinstance(exc, ArchivedFileNotFound):
         dst.archive_status = ARCHIVER_FILE_NOT_FOUND
         errors = {
             'missing_files': exc.missing_files,
             'draft': exc.draft_registration
         }
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo] if einfo else []
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)
예제 #18
0
파일: tasks.py 프로젝트: Alpani/osf.io
 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = dst.archive_job.target_info()
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo]
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)
예제 #19
0
파일: tasks.py 프로젝트: rmoorman/osf.io
 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = dst.archive_job.target_info()
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo]
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)
예제 #20
0
def archive_addon(addon_short_name, job_pk, stat_result):
    """Archive the contents of an addon by making a copy request to the
    WaterBulter API

    :param addon_short_name: AddonConfig.short_name of the addon to be archived
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    # Dataverse requires special handling for draft
    # and published content
    addon_name = addon_short_name
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving addon: {0} on node: {1}".format(addon_short_name, src._id))
    src_provider = src.get_addon(addon_name)
    folder_name = src_provider.archive_folder_name
    cookie = user.get_or_create_cookie()
    copy_url = settings.WATERBUTLER_URL + '/ops/copy'
    if addon_name == 'dataverse':
        # The dataverse API will not differentiate between published and draft files
        # unless expcicitly asked. We need to create seperate folders for published and
        # draft in the resulting archive.
        #
        # Additionally trying to run the archive without this distinction creates a race
        # condition that non-deterministically caused archive jobs to fail.
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (published)'.format(folder_name), cookie, revision='latest-published')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
        data = make_waterbutler_payload(src, dst, addon_name, '{0} (draft)'.format(folder_name), cookie, revision='latest')
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
    else:
        data = make_waterbutler_payload(src, dst, addon_name, folder_name, cookie)
        make_copy_request.delay(job_pk=job_pk, url=copy_url, data=data)
예제 #21
0
 def on_failure(self, exc, task_id, args, kwargs, einfo):
     job = ArchiveJob.load(kwargs.get('job_pk'))
     if not job:
         raise ArchiverStateError({
             'exception': exc,
             'args': args,
             'kwargs': kwargs,
             'einfo': einfo,
         })
     if job.status == ARCHIVER_FAILURE:
         # already captured
         return
     src, dst, user = job.info()
     errors = []
     if isinstance(exc, ArchiverSizeExceeded):
         dst.archive_status = ARCHIVER_SIZE_EXCEEDED
         errors = exc.result
     elif isinstance(exc, HTTPError):
         dst.archive_status = ARCHIVER_NETWORK_ERROR
         errors = [
             each for each in dst.archive_job.target_info()
             if each is not None
         ]
     elif isinstance(exc, ArchivedFileNotFound):
         dst.archive_status = ARCHIVER_FILE_NOT_FOUND
         errors = {
             'missing_files': exc.missing_files,
             'draft': exc.draft_registration
         }
     else:
         dst.archive_status = ARCHIVER_UNCAUGHT_ERROR
         errors = [einfo] if einfo else []
     dst.save()
     archiver_signals.archive_fail.send(dst, errors=errors)
예제 #22
0
파일: tasks.py 프로젝트: rmoorman/osf.io
def archive_node(stat_results, job_pk):
    """First use the results of #stat_node to check disk usage of the
    initiated registration, then either fail the registration or
    create a celery.group group of subtasks to archive addons

    :param results: results from the #stat_addon subtasks spawned in #stat_node
    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger.info("Archiving node: {0}".format(src._id))

    if not isinstance(stat_results, list):
        stat_results = [stat_results]
    stat_result = AggregateStatResult(dst._id, dst.title, targets=stat_results)
    if (NO_ARCHIVE_LIMIT not in job.initiator.system_tags) and (
            stat_result.disk_usage > settings.MAX_ARCHIVE_SIZE):
        raise ArchiverSizeExceeded(result=stat_result)
    else:
        if not stat_result.targets:
            job.status = ARCHIVER_SUCCESS
            job.save()
        for result in stat_result.targets:
            if not result.num_files:
                job.update_target(result.target_name, ARCHIVER_SUCCESS)
            else:
                archive_addon.delay(
                    addon_short_name=result.target_name,
                    job_pk=job_pk,
                    stat_result=result,
                )
        project_signals.archive_callback.send(dst)
예제 #23
0
def archive(job_pk):
    """Starts a celery.chord that runs stat_addon for each
    complete addon attached to the Node, then runs
    #archive_node with the result

    :param job_pk: primary key of ArchiveJob
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    logger = get_task_logger(__name__)
    logger.info("Received archive task for Node: {0} into Node: {1}".format(src._id, dst._id))
    return celery.chain(
        [
            celery.group(
                stat_addon.si(
                    addon_short_name=target.name,
                    job_pk=job_pk,
                )
                for target in job.target_addons
            ),
            archive_node.s(
                job_pk=job_pk
            )
        ]
    )
예제 #24
0
def stat_addon(addon_short_name, job_pk):
    """Collect metadata about the file tree of a given addon

    :param addon_short_name: AddonConfig.short_name of the addon to be examined
    :param job_pk: primary key of archive_job
    :return: AggregateStatResult containing file tree metadata
    """
    # Dataverse reqires special handling for draft and
    # published content
    addon_name = addon_short_name
    version = None
    if 'dataverse' in addon_short_name:
        addon_name = 'dataverse'
        version = 'latest' if addon_short_name.split('-')[-1] == 'draft' else 'latest-published'
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    src_addon = src.get_addon(addon_name)
    try:
        file_tree = src_addon._get_file_tree(user=user, version=version)
    except HTTPError as e:
        dst.archive_job.update_target(
            addon_short_name,
            ARCHIVER_NETWORK_ERROR,
            errors=[e.data['error']],
        )
        raise
    result = AggregateStatResult(
        src_addon._id,
        addon_short_name,
        targets=[utils.aggregate_file_tree_metadata(addon_short_name, file_tree, user)],
    )
    return result
예제 #25
0
파일: factories.py 프로젝트: rdhyee/osf.io
    def _create(cls, target_class, project=None, schema=None, user=None,
                template=None, data=None, archive=False, embargo=None, registration_approval=None, retraction=None, *args, **kwargs):
        save_kwargs(**kwargs)

        # Original project to be registered
        project = project or target_class(*args, **kwargs)
        project.save()

        # Default registration parameters
        #schema = schema or MetaSchema.find_one(
        #    Q('name', 'eq', 'Open-Ended_Registration')
        #)
        schema = None
        user = user or project.creator
        template = template or "Template1"
        data = data or "Some words"
        auth = Auth(user=user)
        register = lambda: project.register_node(
            schema=schema,
            auth=auth,
            template=template,
            data=data,
        )

        def add_approval_step(reg):
            if embargo:
                reg.embargo = embargo
            elif registration_approval:
                reg.registration_approval = registration_approval
            elif retraction:
                reg.retraction = retraction
            else:
                reg.require_approval(reg.creator)
            reg.save()
            reg.sanction.add_authorizer(reg.creator)
            reg.sanction.save()

        if archive:
            reg = register()
            add_approval_step(reg)
        else:
            with patch('framework.tasks.handlers.enqueue_task'):
                reg = register()
                add_approval_step(reg)
            with patch.object(reg.archive_job, 'archive_tree_finished', Mock(return_value=True)):
                reg.archive_job.status = ARCHIVER_SUCCESS
                reg.archive_job.save()
                reg.sanction.state = Sanction.APPROVED
                reg.sanction.save()
        ArchiveJob(
            src_node=project,
            dst_node=reg,
            initiator=user,
        )
        reg.save()
        return reg
예제 #26
0
def main(dry):
    if dry:
        logger.info('[DRY MODE]')
    init_app(routes=False)
    for _id in FAILED_ARCHIVE_JOBS:
        archive_job = ArchiveJob.load(_id)
        assert archive_job.status == ARCHIVER_INITIATED
        root_node = archive_job.dst_node.root
        with TokuTransaction():
            clean(reg=root_node, dry=dry)
예제 #27
0
def main(dry):
    if dry:
        logger.info('[DRY MODE]')
    init_app(routes=False)
    for _id in FAILED_ARCHIVE_JOBS:
        archive_job = ArchiveJob.load(_id)
        assert archive_job.status == ARCHIVER_INITIATED
        root_node = archive_job.dst_node.root
        with TokuTransaction():
            clean(reg=root_node, dry=dry)
예제 #28
0
 def test_find_failed_registrations(self):
     failures = []
     legacy = []
     delta = datetime.timedelta(days=2)
     for i in range(5):
         reg = factories.RegistrationFactory()
         reg.archive_job._fields['datetime_initiated'].__set__(
             reg.archive_job,
             datetime.datetime.now() - delta,
             safe=True
         )
         reg.save()
         ArchiveJob.remove_one(reg.archive_job)
         legacy.append(reg._id)
     for i in range(5):
         reg = factories.RegistrationFactory()
         reg.archive_job._fields['datetime_initiated'].__set__(
             reg.archive_job,
             datetime.datetime.now() - delta,
             safe=True
         )
         reg.archive_job.status = ARCHIVER_INITIATED
         reg.archive_job.save()
         reg.save()
         for addon in ['osfstorage', 'dropbox']:
             reg.archive_job._set_target(addon)
             reg.archive_job.update_target(addon, ARCHIVER_INITIATED)
         reg.archive_job.sent = False
         reg.archive_job.save()
         failures.append(reg._id)
     pending = []
     for i in range(5):
         reg = factories.RegistrationFactory()
         for addon in ['osfstorage', 'dropbox']:
             reg.archive_job._set_target(addon)
             reg.archive_job.update_target(addon, ARCHIVER_INITIATED)
         reg.archive_job.save()
         pending.append(reg)
     failed = scripts.find_failed_registrations()
     assert_items_equal([f._id for f in failed], failures)
     for pk in legacy:
         assert_false(pk in failed)
예제 #29
0
 def test_archiving_nodes_not_added_to_search_on_archive_incomplete(
         self, mock_send, mock_update_search):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     reg.save()
     with mock.patch(
             'website.archiver.model.ArchiveJob.archive_tree_finished',
             mock.Mock(return_value=False)):
         listeners.archive_callback(reg)
     mock_update_search.assert_not_called()
예제 #30
0
 def test_get_target(self):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     job.set_targets()
     dropbox = job.get_target('dropbox')
     assert_false(not dropbox)
     none = job.get_target('fake')
     assert_false(none)
예제 #31
0
def make_copy_request(job_pk, url, data):
    """Make the copy request to the WaterBulter API and handle
    successful and failed responses

    :param job_pk: primary key of ArchiveJob
    :param url: URL to send request to
    :param data: <dict> of setting to send in POST to WaterBulter API
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    provider = data['source']['provider']
    logger.info("Sending copy request for addon: {0} on node: {1}".format(provider, dst._id))
    requests.post(url, data=json.dumps(data))
예제 #32
0
파일: tasks.py 프로젝트: Alpani/osf.io
def make_copy_request(job_pk, url, data):
    """Make the copy request to the WaterBulter API and handle
    successful and failed responses

    :param job_pk: primary key of ArchiveJob
    :param url: URL to send request to
    :param data: <dict> of setting to send in POST to WaterBulter API
    :return: None
    """
    create_app_context()
    job = ArchiveJob.load(job_pk)
    src, dst, user = job.info()
    provider = data['source']['provider']
    logger.info("Sending copy request for addon: {0} on node: {1}".format(provider, dst._id))
    requests.post(url, data=json.dumps(data))
예제 #33
0
 def test_archiving_nodes_added_to_search_on_archive_success_if_public(self, mock_send, mock_update_search):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(
         src_node=proj,
         dst_node=reg,
         initiator=proj.creator
     )
     reg.save()
     with nested(
             mock.patch('website.archiver.model.ArchiveJob.archive_tree_finished', mock.Mock(return_value=True)),
             mock.patch('website.archiver.model.ArchiveJob.sent', mock.PropertyMock(return_value=False)),
             mock.patch('website.archiver.model.ArchiveJob.success', mock.PropertyMock(return_value=True))
     ) as (mock_finished, mock_sent, mock_success):
         listeners.archive_callback(reg)
     mock_update_search.assert_called_once()
예제 #34
0
    def _create(cls, target_class, project=None, schema=None, user=None,
                template=None, data=None, archive=False, *args, **kwargs):
        save_kwargs(**kwargs)

        # Original project to be registered
        project = project or target_class(*args, **kwargs)
        project.save()

        # Default registration parameters
        #schema = schema or MetaSchema.find_one(
        #    Q('name', 'eq', 'Open-Ended_Registration')
        #)
        schema = None
        user = user or project.creator
        template = template or "Template1"
        data = data or "Some words"
        auth = Auth(user=user)
        register = lambda: project.register_node(
            schema=schema,
            auth=auth,
            template=template,
            data=data,
        )
        ArchiveJob(
            src_node=project,
            dst_node=register,
            initiator=user,
        )
        if archive:
            return register()
        else:
            with patch('framework.tasks.handlers.enqueue_task'):
                reg = register()
                archiver_utils.archive_success(
                    reg,
                    reg.registered_user
                )
                return reg
예제 #35
0
def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    prereg_schema = MetaSchema.find_one(
        Q('name', 'eq', 'Prereg Challenge') &
        Q('schema_version', 'eq', 2)
    )
    missing_files = []
    if prereg_schema in dst.registered_schema:
        prereg_metadata = dst.registered_meta[prereg_schema._id]
        updated_metadata = {}
        for key, question in prereg_metadata.items():
            if isinstance(question['value'], dict):
                for subkey, subvalue in question['value'].items():
                    registration_file = None
                    if subvalue.get('extra', {}).get('sha256'):
                        registration_file, node_id = find_registration_file(subvalue, dst)
                        if not registration_file:
                            missing_files.append({
                                'file_name': subvalue['extra']['selectedFileName'],
                                'question_title': find_question(prereg_schema.schema, key)['title']
                            })
                            continue
                        subvalue['extra'].update({
                            'viewUrl': VIEW_FILE_URL_TEMPLATE.format(node_id=node_id, path=registration_file['path'].lstrip('/'))
                        })
                    question['value'][subkey] = subvalue
            else:
                if question.get('extra', {}).get('sha256'):
                    registration_file, node_id = find_registration_file(question, dst)
                    if not registration_file:
                        missing_files.append({
                            'file_name': question['extra']['selectedFileName'],
                            'question_title': find_question(prereg_schema.schema, key)['title']
                        })
                        continue
                    question['extra'].update({
                        'viewUrl': VIEW_FILE_URL_TEMPLATE.format(node_id=node_id, path=registration_file['path'].lstrip('/'))
                    })
            updated_metadata[key] = question

        if missing_files:
            raise ArchivedFileNotFound(
                registration=dst,
                missing_files=missing_files
            )

        prereg_metadata.update(updated_metadata)
        dst.registered_meta[prereg_schema._id] = prereg_metadata
        dst.save()

    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(dst.get_active_contributors_recursive(unique_users=True))
예제 #36
0
def archive_success(dst_pk, job_pk):
    """Archiver's final callback. For the time being the use case for this task
    is to rewrite references to files selected in a registration schema (the Prereg
    Challenge being the first to expose this feature). The created references point
    to files on the registered_from Node (needed for previewing schema data), and
    must be re-associated with the corresponding files in the newly created registration.

    :param str dst_pk: primary key of registration Node

    note:: At first glance this task makes redundant calls to utils.get_file_map (which
    returns a generator yielding  (<sha256>, <file_metadata>) pairs) on the dst Node. Two
    notes about utils.get_file_map: 1) this function memoizes previous results to reduce
    overhead and 2) this function returns a generator that lazily fetches the file metadata
    of child Nodes (it is possible for a selected file to belong to a child Node) using a
    non-recursive DFS. Combined this allows for a relatively effient implementation with
    seemingly redundant calls.
    """
    create_app_context()
    dst = Node.load(dst_pk)
    # The filePicker extension addded with the Prereg Challenge registration schema
    # allows users to select files in OSFStorage as their response to some schema
    # questions. These files are references to files on the unregistered Node, and
    # consequently we must migrate those file paths after archiver has run. Using
    # sha256 hashes is a convenient way to identify files post-archival.
    prereg_schema = MetaSchema.find_one(
        Q('name', 'eq', 'Prereg Challenge') & Q('schema_version', 'eq', 2))
    missing_files = []
    if prereg_schema in dst.registered_schema:
        prereg_metadata = dst.registered_meta[prereg_schema._id]
        updated_metadata = {}
        for key, question in prereg_metadata.items():
            if isinstance(question['value'], dict):
                for subkey, subvalue in question['value'].items():
                    registration_file = None
                    if subvalue.get('extra', {}).get('sha256'):
                        registration_file, node_id = find_registration_file(
                            subvalue, dst)
                        if not registration_file:
                            missing_files.append({
                                'file_name':
                                subvalue['extra']['selectedFileName'],
                                'question_title':
                                find_question(prereg_schema.schema,
                                              key)['title']
                            })
                            continue
                        subvalue['extra'].update({
                            'viewUrl':
                            VIEW_FILE_URL_TEMPLATE.format(
                                node_id=node_id,
                                path=registration_file['path'].lstrip('/'))
                        })
                    question['value'][subkey] = subvalue
            else:
                if question.get('extra', {}).get('sha256'):
                    registration_file, node_id = find_registration_file(
                        question, dst)
                    if not registration_file:
                        missing_files.append({
                            'file_name':
                            question['extra']['selectedFileName'],
                            'question_title':
                            find_question(prereg_schema.schema, key)['title']
                        })
                        continue
                    question['extra'].update({
                        'viewUrl':
                        VIEW_FILE_URL_TEMPLATE.format(
                            node_id=node_id,
                            path=registration_file['path'].lstrip('/'))
                    })
            updated_metadata[key] = question

        if missing_files:
            raise ArchivedFileNotFound(registration=dst,
                                       missing_files=missing_files)

        prereg_metadata.update(updated_metadata)
        dst.registered_meta[prereg_schema._id] = prereg_metadata
        dst.save()

    job = ArchiveJob.load(job_pk)
    if not job.sent:
        job.sent = True
        job.save()
        dst.sanction.ask(
            dst.get_active_contributors_recursive(unique_users=True))
예제 #37
0
 def test_set_targets(self):
     proj = factories.ProjectFactory()
     reg = factories.RegistrationFactory(project=proj)
     job = ArchiveJob(src_node=proj, dst_node=reg, initiator=proj.creator)
     job.set_targets()
     assert_equal([t.name for t in job.target_addons], ['osfstorage', 'dropbox'])
예제 #38
0
    def _create(cls,
                target_class,
                project=None,
                is_public=False,
                schema=None,
                data=None,
                archive=False,
                embargo=None,
                registration_approval=None,
                retraction=None,
                *args,
                **kwargs):
        save_kwargs(**kwargs)
        user = None
        if project:
            user = project.creator
        user = kwargs.get('user') or kwargs.get(
            'creator') or user or UserFactory()
        kwargs['creator'] = user
        # Original project to be registered
        project = project or target_class(*args, **kwargs)
        if user._id not in project.permissions:
            project.add_contributor(
                contributor=user,
                permissions=permissions.CREATOR_PERMISSIONS,
                log=False,
                save=False)
        project.save()

        # Default registration parameters
        schema = schema or get_default_metaschema()
        data = data or {'some': 'data'}
        auth = Auth(user=user)
        register = lambda: project.register_node(
            schema=schema, auth=auth, data=data)

        def add_approval_step(reg):
            if embargo:
                reg.embargo = embargo
            elif registration_approval:
                reg.registration_approval = registration_approval
            elif retraction:
                reg.retraction = retraction
            else:
                reg.require_approval(reg.creator)
            reg.save()
            reg.sanction.add_authorizer(reg.creator)
            reg.sanction.save()

        if archive:
            reg = register()
            add_approval_step(reg)
        else:
            with patch('framework.tasks.handlers.enqueue_task'):
                reg = register()
                add_approval_step(reg)
            with patch.object(reg.archive_job, 'archive_tree_finished',
                              Mock(return_value=True)):
                reg.archive_job.status = ARCHIVER_SUCCESS
                reg.archive_job.save()
                reg.sanction.state = Sanction.APPROVED
                reg.sanction.save()
        ArchiveJob(
            src_node=project,
            dst_node=reg,
            initiator=user,
        )
        if is_public:
            reg.is_public = True
        reg.save()
        return reg
예제 #39
0
 def test_repr(self):
     job = ArchiveJob()
     result = repr(job)
     assert_in('ArchiveJob', result)
     assert_in(str(job.done), result)
     assert_in(str(job._id), result)