def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse(response=self.error_response( bundle.request, { 'archives': { '__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly." }, 'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", })) # Runs validation (exception thrown if invalid), sets properties and saves the object bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) asset = Asset(link=bundle.obj) uploaded_file = bundle.data.get('file') if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = 'upload.%s' % mime_type_lookup[mime_type][ 'new_extension'] file_path = os.path.join(asset.base_storage_path, file_name) uploaded_file.file.seek(0) file_name = default_storage.store_file(uploaded_file, file_path) if mime_type == 'application/pdf': asset.pdf_capture = file_name else: asset.image_capture = file_name asset.user_upload = True asset.user_upload_file_name = uploaded_file.name asset.save() else: asset.image_capture = Asset.CAPTURE_STATUS_PENDING # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool if asset.link.media_type == 'pdf': asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING task = get_pdf else: # else, it's not a PDF. Let's try our best to retrieve what we can asset.warc_capture = Asset.CAPTURE_STATUS_PENDING task = proxy_capture asset.save() run_task( task.s(asset.link.guid, asset.link.submitted_url, asset.base_storage_path, bundle.request.META.get('HTTP_USER_AGENT', ''))) return bundle
def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse(response=self.error_response(bundle.request, { 'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."}, 'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", })) # Runs validation (exception thrown if invalid), sets properties and saves the object bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) asset = Asset(link=bundle.obj) uploaded_file = bundle.data.get('file') if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension'] file_path = os.path.join(asset.base_storage_path, file_name) uploaded_file.file.seek(0) file_name = default_storage.store_file(uploaded_file, file_path) if mime_type == 'application/pdf': asset.pdf_capture = file_name else: asset.image_capture = file_name asset.user_upload = True asset.user_upload_file_name = uploaded_file.name asset.save() else: asset.image_capture = Asset.CAPTURE_STATUS_PENDING # If it appears as if we're trying to archive a PDF, only run our PDF retrieval tool if asset.link.media_type == 'pdf': asset.pdf_capture = Asset.CAPTURE_STATUS_PENDING task = get_pdf else: # else, it's not a PDF. Let's try our best to retrieve what we can asset.warc_capture = Asset.CAPTURE_STATUS_PENDING task = proxy_capture asset.save() run_task(task.s(asset.link.guid, asset.link.submitted_url, asset.base_storage_path, bundle.request.META.get('HTTP_USER_AGENT', ''))) return bundle
def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse(response=self.error_response( bundle.request, { 'archives': { '__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly." }, 'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", })) # Runs validation (exception thrown if invalid), sets properties and saves the object if not bundle.data.get('replace'): bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) link = bundle.obj link.save() # put link in folder and handle Org settings based on folder folder = bundle.data.get('folder') if folder.organization and folder.organization.default_to_private: link.is_private = True link.save() link.move_to_folder_for_user( folder, bundle.request.user) # also sets link.organization uploaded_file = bundle.data.get('file') if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = 'upload.%s' % mime_type_lookup[mime_type][ 'new_extension'] base_warc_url = "file:///%s/%s" % (link.guid, file_name) # only append a random number to warc_url if we're replacing a file warc_url = base_warc_url if not bundle.data.get( 'replace') else "%s?version=%s" % ( base_warc_url, str(random.random()).replace('.', '')) capture = Capture(link=link, role='primary', status='success', record_type='resource', user_upload='True', content_type=mime_type, url=warc_url) uploaded_file.file.seek(0) capture.write_warc_resource_record(uploaded_file) capture.save() else: # create primary capture placeholder Capture( link=link, role='primary', status='pending', record_type='response', url=link.submitted_url, ).save() # create screenshot placeholder Capture( link=link, role='screenshot', status='pending', record_type='resource', url="file:///%s/cap.png" % link.guid, content_type='image/png', ).save() # create CaptureJob CaptureJob(link=link, human=bundle.data.get('human', False)).save() # kick off capture tasks -- no need for guid since it'll work through the queue run_task(run_next_capture.s()) return bundle
def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse(response=self.error_response(bundle.request, { 'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."}, 'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", })) # Runs validation (exception thrown if invalid), sets properties and saves the object if not bundle.data.get('replace'): bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) link = bundle.obj link.save() # put link in folder and handle Org settings based on folder folder = bundle.data.get('folder') if folder.organization and folder.organization.default_to_private: link.is_private = True link.save() link.move_to_folder_for_user(folder, bundle.request.user) # also sets link.organization uploaded_file = bundle.data.get('file') if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension'] base_warc_url = "file:///%s/%s" % (link.guid, file_name) # only append a random number to warc_url if we're replacing a file warc_url = base_warc_url if not bundle.data.get('replace') else "%s?version=%s" % (base_warc_url, str(random.random()).replace('.','')) capture = Capture(link=link, role='primary', status='success', record_type='resource', user_upload='True', content_type=mime_type, url=warc_url) uploaded_file.file.seek(0) capture.write_warc_resource_record(uploaded_file) capture.save() else: # create primary capture placeholder Capture( link=link, role='primary', status='pending', record_type='response', url=link.submitted_url, ).save() # create screenshot placeholder Capture( link=link, role='screenshot', status='pending', record_type='resource', url="file:///%s/cap.png" % link.guid, content_type='image/png', ).save() # create CaptureJob CaptureJob(link=link, human=bundle.data.get('human', False)).save() # kick off capture tasks -- no need for guid since it'll work through the queue run_task(run_next_capture.s()) return bundle
def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse( response=self.error_response( bundle.request, { "archives": { "__all__": "Perma has paused archive creation for scheduled maintenance. Please try again shortly." }, "reason": "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", }, ) ) # Runs validation (exception thrown if invalid), sets properties and saves the object bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) link = bundle.obj uploaded_file = bundle.data.get("file") if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = "upload.%s" % mime_type_lookup[mime_type]["new_extension"] warc_url = "file:///%s/%s" % (link.guid, file_name) capture = Capture( link=link, role="primary", status="success", record_type="resource", user_upload="True", content_type=mime_type, url=warc_url, ) uploaded_file.file.seek(0) capture.write_warc_resource_record(uploaded_file) capture.save() else: # create primary capture placeholder Capture(link=link, role="primary", status="pending", record_type="response", url=link.submitted_url).save() # create screenshot placeholder Capture( link=link, role="screenshot", status="pending", record_type="resource", url="file:///%s/cap.png" % link.guid, content_type="image/png", ).save() # kick off capture task run_task(proxy_capture.s(link.guid, bundle.request.META.get("HTTP_USER_AGENT", ""))) return bundle
def obj_create(self, bundle, **kwargs): # We've received a request to archive a URL. That process is managed here. # We create a new entry in our datastore and pass the work off to our indexing # workers. They do their thing, updating the model as they go. When we get some minimum # set of results we can present the user (a guid for the link), we respond back. if settings.READ_ONLY_MODE: raise ImmediateHttpResponse(response=self.error_response(bundle.request, { 'archives': {'__all__': "Perma has paused archive creation for scheduled maintenance. Please try again shortly."}, 'reason': "Perma has paused archive creation for scheduled maintenance. Please try again shortly.", })) # Make sure a limited user has links left to create links_remaining = bundle.request.user.get_links_remaining() if (bundle.request.user.has_limit() or not bundle.data.get('organization')) and links_remaining < 1: raise ImmediateHttpResponse(response=self.error_response(bundle.request, { 'archives': {'__all__': "You've already reached your limit."}, 'reason': "You've already reached your limit.", })) # Return the number remaining links after this one is created if bundle.request.user.has_limit() or not bundle.data.get('organization'): bundle.data['links_remaining'] = links_remaining - 1 else: bundle.data['links_remaining'] = 'unlimited' # Runs validation (exception thrown if invalid), sets properties and saves the object bundle = super(LinkResource, self).obj_create(bundle, created_by=bundle.request.user) link = bundle.obj # put link in folder and handle Org settings based on folder folder = bundle.data.get('folder') if folder: if folder.organization and folder.organization.default_to_private: link.is_private = True link.save() link.move_to_folder_for_user(folder, bundle.request.user) # also sets link.organization uploaded_file = bundle.data.get('file') if uploaded_file: # normalize file name to upload.jpg, upload.png, upload.gif, or upload.pdf mime_type = get_mime_type(uploaded_file.name) file_name = 'upload.%s' % mime_type_lookup[mime_type]['new_extension'] warc_url = "file:///%s/%s" % (link.guid, file_name) capture = Capture(link=link, role='primary', status='success', record_type='resource', user_upload='True', content_type=mime_type, url=warc_url) uploaded_file.file.seek(0) capture.write_warc_resource_record(uploaded_file) capture.save() else: # create primary capture placeholder Capture( link=link, role='primary', status='pending', record_type='response', url=link.submitted_url, ).save() # create screenshot placeholder Capture( link=link, role='screenshot', status='pending', record_type='resource', url="file:///%s/cap.png" % link.guid, content_type='image/png', ).save() # kick off capture task run_task(proxy_capture.s(link.guid, bundle.request.META.get('HTTP_USER_AGENT', ''))) return bundle