def ingest_upload(request, uuid): """ The upload DIP is actually not executed here, but some data is storaged in the database (permalink, ...), used later by upload-qubit.py - GET = It could be used to obtain DIP size - POST = Create Accesses tuple with permalink """ if not models.SIP.objects.filter(uuid__exact=uuid).exists(): raise Http404 if request.method == 'POST': if 'target' in request.POST: try: access = models.Access.objects.get(sipuuid=uuid) except: access = models.Access(sipuuid=uuid) access.target = cPickle.dumps({"target": request.POST['target']}) access.save() response = {'ready': True} return helpers.json_response(response) elif request.method == 'GET': try: access = models.Access.objects.get(sipuuid=uuid) data = cPickle.loads(str(access.target)) except: raise Http404 return helpers.json_response(data) return HttpResponseNotAllowed(['GET', 'POST'])
def ingest_upload(request, uuid): """ The upload DIP is actually not executed here, but some data is storaged in the database (permalink, ...), used later by upload-qubit.py - GET = It could be used to obtain DIP size - POST = Create Accesses tuple with permalink """ try: models.SIP.objects.get(uuid__exact=uuid) except models.SIP.DoesNotExist: raise Http404 if request.method == 'POST': if 'target' in request.POST: try: access = models.Access.objects.get(sipuuid=uuid) except: access = models.Access(sipuuid=uuid) access.target = cPickle.dumps({"target": request.POST['target']}) access.save() response = {'ready': True} return helpers.json_response(response) elif request.method == 'GET': try: access = models.Access.objects.get(sipuuid=uuid) data = cPickle.loads(str(access.target)) except: raise Http404 # Disabled, it could be very slow # job = models.Job.objects.get(jobtype='Upload DIP', sipuuid=uuid) # data['size'] = utils.get_directory_size(job.directory) return helpers.json_response(data) return HttpResponseBadRequest()
def start(job, data): # Make sure we are working with an existing SIP record if not models.SIP.objects.filter(pk=data.uuid).exists(): return error(job, "UUID not recognized") # Get directory jobs = models.Job.objects.filter(sipuuid=data.uuid, jobtype="Upload DIP") if jobs.count(): directory = (jobs[0].directory.rstrip("/").replace( "%sharedPath%", "/var/archivematica/sharedDirectory/")) else: return error(job, "Directory not found: %s" % directory) # Check if exists if os.path.exists(directory) is False: log("Directory not found: %s" % directory) # Trying with uploadedDIPs log("Looking up uploadedDIPs/") directory = directory.replace("uploadDIP", "uploadedDIPs") if os.path.exists(directory) is False: return error(job, "Directory not found: %s" % directory) try: # This upload was called before, restore Access record access = models.Access.objects.get(sipuuid=data.uuid) except: # First time this job is called, create new Access record access = models.Access(sipuuid=data.uuid) # Look for access system ID transfers = models.Transfer.objects.filter( file__sip_id=data.uuid).distinct() if transfers.count() == 1: access.target = cPickle.dumps( {"target": transfers[0].access_system_id}) access.save() # The target columns contents a serialized Python dictionary # - target is the permalink string try: target = cPickle.loads(str(access.target)) log("Target: %s" % (target["target"])) except: return error(job, "No target was selected") # Rsync if data.rsync_target option was passed to this script if data.rsync_target: """ Build command (rsync) -a = -r = recursive -l = recreate symlinks on destination -p = set same permissions -t = transfer modification times -g = set same group owner on destination -o = set same user owner on destination (if possible, super-user) --devices = transfer character and block device files (only super-user) --specials = transfer special files like sockets and fifos -z = compress -P = --partial + --stats """ # Using rsync -rltzP command = [ "rsync", "--protect-args", "-rltz", "-P", "--chmod=ugo=rwX", directory, data.rsync_target, ] # Add -e if data.rsync_command was passed to this script if data.rsync_command: # Insert in second position. Example: rsync -e "ssh -i key" ... command.insert(1, "-e %s" % data.rsync_command) log(" ".join(command)) # Getting around of rsync output buffering by outputting to a temporary file pipe_output, file_name = tempfile.mkstemp() log("Rsync output is being saved in %s" % file_name) # Call Rsync process = subprocess.Popen(command, stdout=pipe_output, stderr=pipe_output) # poll() returns None while the process is still running while process.poll() is None: time.sleep(1) last_line = open(file_name).readlines() # It's possible that it hasn't output yet, so continue if len(last_line) == 0: continue last_line = last_line[-1] # Matching to "[bytes downloaded] number% [speed] number:number:number" match = re.match(".* ([0-9]*)%.* ([0-9]*:[0-9]*:[0-9]*).*", last_line) if not match: continue # Update upload status # - percentage in match.group(1) # - ETA in match.group(2) access.status = "Sending... %s (ETA: %s)" % (match.group(1), match.group(2)) access.statuscode = 10 access.save() log(access.status) # We don't need the temporary file anymore! # log("Removing temporary rsync output file: %s" % file_name) # os.unlink(file_name) # At this point, we should have a return code # If greater than zero, see man rsync (EXIT VALUES) access.exitcode = process.returncode if 0 < process.returncode: access.statuscode = 12 else: access.statuscode = 11 access.save() if 0 < process.returncode: return error( job, "Rsync quit unexpectedly (exit %s), the upload script will be stopped here" % process.returncode, ) # Building headers dictionary for the deposit request headers = {} headers["User-Agent"] = "Archivematica" headers[ "X-Packaging"] = "http://purl.org/net/sword-types/METSArchivematicaDIP" """ headers['X-On-Beahalf-Of'] """ headers["Content-Type"] = "application/zip" headers["X-No-Op"] = "false" headers["X-Verbose"] = "false" headers["Content-Location"] = "file:///%s" % os.path.basename(directory) """ headers['Content-Disposition'] """ # Build URL (expected sth like http://localhost/ica-atom/index.php) atom_url_prefix = ";" if data.version == 1 else "" deposit_url = "%s/%ssword/deposit/%s" % ( data.url, atom_url_prefix, target["target"], ) # Auth and request! log("About to deposit to: %s" % data.url) access.statuscode = 13 access.resource = "%s/%s" % (data.url, target["target"]) access.save() auth = requests.auth.HTTPBasicAuth(data.email, data.password) # Disable redirects: AtoM returns 302 instead of 202, but Location header field is valid response = requests.request( "POST", deposit_url, auth=auth, headers=headers, allow_redirects=False, timeout=mcpclient_settings.AGENTARCHIVES_CLIENT_TIMEOUT, ) # response.{content,headers,status_code} log("> Response code: %s" % response.status_code) log("> Location: %s" % response.headers.get("Location")) if data.debug: # log("> Headers sent: %s" % headers) # log("> Headers received: %s" % response.headers) log("> Content received: %s" % response.content) # Check AtoM response status code if response.status_code not in [200, 201, 302]: return error(job, "Response code not expected") # Location is a must, if it is not included in the AtoM response something was wrong if response.headers["Location"] is None: return error( job, "Location is expected, if not is likely something is wrong with AtoM" ) # (A)synchronously? if response.status_code == 302: access.status = ( "Deposited asynchronously, AtoM is processing the DIP in the job queue" ) log(access.status) else: access.statuscode = 14 access.status = "Deposited synchronously" log(access.status) access.save() # We also have to parse the XML document return 0
def start(data): # Make sure we are working with an existing SIP record try: sip = models.SIP.objects.get(pk=data.uuid) except: error("UUID not recognized") # Get directory jobs = models.Job.objects.filter(sipuuid=data.uuid, jobtype="Upload DIP") if jobs.count(): directory = jobs[0].directory.rstrip('/').replace( '%sharedPath%', '/var/archivematica/sharedDirectory/') else: error("Directory not found: %s" % directory) # Check if exists if os.path.exists(directory) is False: log("Directory not found: %s" % directory) # Trying with uploadedDIPs log("Looking up uploadedDIPs/") directory = directory.replace('uploadDIP', 'uploadedDIPs') if os.path.exists(directory) is False: error("Directory not found: %s" % directory) try: # This upload was called before, restore Access record access = models.Access.objects.get(sipuuid=data.uuid) except: # First time this job is called, create new Access record access = models.Access(sipuuid=data.uuid) access.save() # The target columns contents a serialized Python dictionary # - target is the permalink string try: target = cPickle.loads(str(access.target)) log("Target: %s" % (target['target'])) except: error("No target was selected") # Rsync if data.rsync_target option was passed to this script if data.rsync_target: """ Build command (rsync) -a = -r = recursive -l = recreate symlinks on destination -p = set same permissions -t = transfer modification times -g = set same group owner on destination -o = set same user owner on destination (if possible, super-user) --devices = transfer character and block device files (only super-user) --specials = transfer special files like sockets and fifos -z = compress -P = --partial + --stats """ # Using rsync -rltzP command = [ "rsync", "-rltz", "-P", "--chmod=ugo=rwX", directory, data.rsync_target ] # Add -e if data.rsync_command was passed to this script if data.rsync_command: # Insert in second position. Example: rsync -e "ssh -i key" ... command.insert(1, "-e \"%s\"" % data.rsync_command) log(' '.join(command)) # Getting around of rsync output buffering by outputting to a temporary file pipe_output, file_name = tempfile.mkstemp() log("Rsync output is being saved in %s" % file_name) # Call Rsync process = subprocess.Popen(command, stdout=pipe_output, stderr=pipe_output) # poll() returns None while the process is still running while process.poll() is None: time.sleep(1) last_line = open(file_name).readlines() # It's possible that it hasn't output yet, so continue if len(last_line) == 0: continue last_line = last_line[-1] # Matching to "[bytes downloaded] number% [speed] number:number:number" match = re.match(".* ([0-9]*)%.* ([0-9]*:[0-9]*:[0-9]*).*", last_line) if not match: continue # Update upload status # - percentage in match.group(1) # - ETA in match.group(2) access.status = "Sending... %s (ETA: %s)" % (match.group(1), match.group(2)) access.statuscode = 10 access.save() log(access.status) # We don't need the temporary file anymore! # log("Removing temporary rsync output file: %s" % file_name) # os.unlink(file_name) # At this point, we should have a return code # If greater than zero, see man rsync (EXIT VALUES) access.exitcode = process.returncode if 0 < process.returncode: access.statuscode = 12 else: access.statuscode = 11 access.save() if 0 < process.returncode: error( "Rsync quit unexpectedly (exit %s), the upload script will be stopped here" % process.returncode) # Building headers dictionary for the deposit request headers = {} headers['User-Agent'] = 'Archivematica' headers[ 'X-Packaging'] = 'http://purl.org/net/sword-types/METSArchivematicaDIP' """ headers['X-On-Beahalf-Of'] """ headers['Content-Type'] = 'application/zip' headers['X-No-Op'] = 'false' headers['X-Verbose'] = 'false' headers['Content-Location'] = "file:///%s" % os.path.basename(directory) """ headers['Content-Disposition'] """ # Build URL (expected sth like http://localhost/ica-atom/index.php) data.url = "%s/;sword/deposit/%s" % (data.url, target['target']) # Auth and request! log("About to deposit to: %s" % data.url) access.statuscode = 13 access.save() auth = requests.auth.HTTPBasicAuth(data.email, data.password) response = requests.request('POST', data.url, auth=auth, headers=headers) # response.{content,headers,status_code} log("> Response code: %s" % response.status_code) log("> Location: %s" % response.headers.get('Location')) if data.debug: # log("> Headers sent: %s" % headers) # log("> Headers received: %s" % response.headers) log("> Content received: %s" % response.content) # Check Qubit response status code if not response.status_code in [200, 201, 302]: error("Response code not expected") # Location is a must, if it is not included in the Qubit response something was wrong if response.headers['Location'] is None: error( "Location is expected, if not is likely something is wrong with Qubit" ) else: access.resource = data.url # (A)synchronously? if response.status_code is 200: access.statuscode = 14 access.status = "Deposited synchronously" log(access.status) else: access.statuscode = 15 access.status = "Deposited asynchronously, Qubit is processing the DIP in the job queue" log(access.status) access.save()