예제 #1
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(
                bundle.request, {
                    'archives': {
                        '__all__':
                        "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                    },
                    'reason':
                    "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource,
                       self).obj_create(bundle, created_by=bundle.request.user)
        asset = Asset(link=bundle.obj)

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type][
                'new_extension']
            file_path = os.path.join(asset.base_storage_path, file_name)

            uploaded_file.file.seek(0)
            file_name = default_storage.store_file(uploaded_file, file_path)

            if mime_type == 'application/pdf':
                asset.pdf_capture = file_name
            else:
                asset.image_capture = file_name
            asset.user_upload = True
            asset.user_upload_file_name = uploaded_file.name
            asset.save()
        else:
            asset.image_capture = Asset.CAPTURE_STATUS_PENDING
            # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool
            if asset.link.media_type == 'pdf':
                asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING
                task = get_pdf
            else:  # else, it's not a PDF. Let's try our best to retrieve what we can
                asset.warc_capture = Asset.CAPTURE_STATUS_PENDING
                task = proxy_capture

            asset.save()
            run_task(
                task.s(asset.link.guid, asset.link.submitted_url,
                       asset.base_storage_path,
                       bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle
예제 #2
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        asset = Asset(link=bundle.obj)

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']
            file_path = os.path.join(asset.base_storage_path, file_name)

            uploaded_file.file.seek(0)
            file_name = default_storage.store_file(uploaded_file, file_path)

            if mime_type == 'application/pdf':
                asset.pdf_capture = file_name
            else:
                asset.image_capture = file_name
            asset.user_upload = True
            asset.user_upload_file_name = uploaded_file.name
            asset.save()
        else:
            asset.image_capture = Asset.CAPTURE_STATUS_PENDING
            # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool
            if asset.link.media_type == 'pdf':
                asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING
                task = get_pdf
            else:  # else, it's not a PDF. Let's try our best to retrieve what we can
                asset.warc_capture = Asset.CAPTURE_STATUS_PENDING
                task = proxy_capture

            asset.save()
            run_task(task.s(asset.link.guid,
                            asset.link.submitted_url,
                            asset.base_storage_path,
                            bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle
예제 #3
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(
                bundle.request, {
                    'archives': {
                        '__all__':
                        "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                    },
                    'reason':
                    "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        if not bundle.data.get('replace'):
            bundle = super(LinkResource,
                           self).obj_create(bundle,
                                            created_by=bundle.request.user)

        link = bundle.obj
        link.save()

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder.organization and folder.organization.default_to_private:
            link.is_private = True
            link.save()
        link.move_to_folder_for_user(
            folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type][
                'new_extension']

            base_warc_url = "file:///%s/%s" % (link.guid, file_name)

            # only append a random number to warc_url if we're replacing a file
            warc_url = base_warc_url if not bundle.data.get(
                'replace') else "%s?version=%s" % (
                    base_warc_url, str(random.random()).replace('.', ''))

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # create CaptureJob
            CaptureJob(link=link, human=bundle.data.get('human', False)).save()

            # kick off capture tasks -- no need for guid since it'll work through the queue
            run_task(run_next_capture.s())

        return bundle
예제 #4
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        if not bundle.data.get('replace'):
            bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)

        link = bundle.obj
        link.save()

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder.organization and folder.organization.default_to_private:
            link.is_private = True
            link.save()
        link.move_to_folder_for_user(folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']

            base_warc_url = "file:///%s/%s" % (link.guid, file_name)

            # only append a random number to warc_url if we're replacing a file
            warc_url = base_warc_url if not bundle.data.get('replace') else  "%s?version=%s" % (base_warc_url, str(random.random()).replace('.',''))

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # create CaptureJob
            CaptureJob(link=link, human=bundle.data.get('human', False)).save()

            # kick off capture tasks -- no need for guid since it'll work through the queue
            run_task(run_next_capture.s())

        return bundle
예제 #5
0
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(
                response=self.error_response(
                    bundle.request,
                    {
                        "archives": {
                            "__all__": "Perma has paused archive creation for scheduled maintenance. Please try again shortly."
                        },
                        "reason": "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
                    },
                )
            )

        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        link = bundle.obj

        uploaded_file = bundle.data.get("file")
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = "upload.%s" % mime_type_lookup[mime_type]["new_extension"]
            warc_url = "file:///%s/%s" % (link.guid, file_name)

            capture = Capture(
                link=link,
                role="primary",
                status="success",
                record_type="resource",
                user_upload="True",
                content_type=mime_type,
                url=warc_url,
            )

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(link=link, role="primary", status="pending", record_type="response", url=link.submitted_url).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role="screenshot",
                status="pending",
                record_type="resource",
                url="file:///%s/cap.png" % link.guid,
                content_type="image/png",
            ).save()

            # kick off capture task
            run_task(proxy_capture.s(link.guid, bundle.request.META.get("HTTP_USER_AGENT", "")))

        return bundle
예제 #6
0
파일: resources.py 프로젝트: jcushman/perma
    def obj_create(self, bundle, **kwargs):
        # We've received a request to archive a URL. That process is managed here.
        # We create a new entry in our datastore and pass the work off to our indexing
        # workers. They do their thing, updating the model as they go. When we get some minimum
        # set of results we can present the user (a guid for the link), we respond back.
        if settings.READ_ONLY_MODE:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."},
                'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.",
            }))

        # Make sure a limited user has links left to create
        links_remaining = bundle.request.user.get_links_remaining()
        if (bundle.request.user.has_limit() or not bundle.data.get('organization')) and links_remaining < 1:
            raise ImmediateHttpResponse(response=self.error_response(bundle.request, {
                'archives': {'__all__': "You've already reached your limit."},
                'reason': "You've already reached your limit.",
            }))
            
        # Return the number remaining links after this one is created
        if bundle.request.user.has_limit() or not bundle.data.get('organization'):
            bundle.data['links_remaining'] = links_remaining - 1
        else:
            bundle.data['links_remaining'] = 'unlimited'
        
        # Runs validation (exception thrown if invalid), sets properties and saves the object
        bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user)
        link = bundle.obj

        # put link in folder and handle Org settings based on folder
        folder = bundle.data.get('folder')
        if folder:
            if folder.organization and folder.organization.default_to_private:
                link.is_private = True
                link.save()
            link.move_to_folder_for_user(folder, bundle.request.user)  # also sets link.organization

        uploaded_file = bundle.data.get('file')
        if uploaded_file:
            # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf
            mime_type = get_mime_type(uploaded_file.name)
            file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension']
            warc_url = "file:///%s/%s" % (link.guid, file_name)

            capture = Capture(link=link,
                              role='primary',
                              status='success',
                              record_type='resource',
                              user_upload='True',
                              content_type=mime_type,
                              url=warc_url)

            uploaded_file.file.seek(0)
            capture.write_warc_resource_record(uploaded_file)
            capture.save()

        else:
            # create primary capture placeholder
            Capture(
                link=link,
                role='primary',
                status='pending',
                record_type='response',
                url=link.submitted_url,
            ).save()

            # create screenshot placeholder
            Capture(
                link=link,
                role='screenshot',
                status='pending',
                record_type='resource',
                url="file:///%s/cap.png" % link.guid,
                content_type='image/png',
            ).save()

            # kick off capture task
            run_task(proxy_capture.s(link.guid, bundle.request.META.get('HTTP_USER_AGENT', '')))

        return bundle