def newChild(parent, tag, text=None, tailText=None, sets=[]): child = etree.Element(tag) parent.append(child) child.text = strToUnicode(text) if tailText: child.tail = strToUnicode(tailText) for set in sets: key, value = set child.set(key, value) return child
def newChild(parent, tag, text=None, tailText=None, sets=[]): # TODO convert sets to a dict, and use **dict child = etree.SubElement(parent, tag) child.text = strToUnicode(text) if tailText: child.tail = strToUnicode(tailText) for set in sets: key, value = set child.set(key, value) return child
def _move_to_internal_shared_dir(filepath, dest, transfer): """Move package to an internal Archivematica directory. The side effect of this function is to update the transfer object with the final location. This is important so other components can continue the processing. When relying on watched directories to start a transfer (see _start_package_transfer), this also matters because Transfer is going to look up the object in the database based on the location. """ error = _check_filepath_exists(filepath) if error: raise Exception(error) filepath = Path(filepath) dest = Path(dest) # Confine destination to subdir of originals. basename = filepath.name dest = _pad_destination_filepath_if_it_already_exists(dest / basename) try: filepath.rename(dest) except OSError as e: raise Exception("Error moving from %s to %s (%s)", filepath, dest, e) else: transfer.currentlocation = strToUnicode(dest.as_posix()).replace( _get_setting("SHARED_DIRECTORY"), r"%sharedPath%", 1 ) transfer.save()
def directory_children(request, basePath=False): path = '' if (basePath): path = path + basePath path = path + request.GET.get('base_path', '') path = path + request.GET.get('path', '') response = {} entries = [] directories = [] for entry in sorted_directory_list(path): entry = archivematicaFunctions.strToUnicode(entry) if unicode(entry)[0] != '.': entries.append(entry) entry_path = os.path.join(path, entry) if os.path.isdir(archivematicaFunctions.unicodeToStr(entry_path)) and os.access(archivematicaFunctions.unicodeToStr(entry_path), os.R_OK): directories.append(entry) response = { 'entries': entries, 'directories': directories } return HttpResponse( simplejson.JSONEncoder(encoding='utf-8').encode(response), mimetype='application/json' )
def directory_children(request, basePath=False): path = '' if (basePath): path = path + basePath path = path + request.GET.get('base_path', '') path = path + request.GET.get('path', '') response = {} entries = [] directories = [] for entry in sorted_directory_list(path): entry = archivematicaFunctions.strToUnicode(entry) if unicode(entry)[0] != '.': entries.append(entry) entry_path = os.path.join(path, entry) if os.path.isdir(archivematicaFunctions.unicodeToStr( entry_path)) and os.access( archivematicaFunctions.unicodeToStr(entry_path), os.R_OK): directories.append(entry) response = {'entries': entries, 'directories': directories} return HttpResponse( simplejson.JSONEncoder(encoding='utf-8').encode(response), mimetype='application/json')
def logTaskCompletedSQL(task): """ Fetches execution data from the completed task and logs it to the database. Updates the entry in the Tasks table with data in the provided task. Saves the following fields: exitCode, stdOut, stdError :param task: """ print("Logging task output to db", task.UUID) taskUUID = task.UUID.__str__() exitCode = task.results["exitCode"].__str__() stdOut = task.results["stdOut"] stdError = task.results["stdError"] task = Task.objects.get(taskuuid=taskUUID) task.endtime = getUTCDate() task.exitcode = exitCode # ``strToUnicode`` here prevents the MCP server from crashing when, e.g., # stderr contains Latin-1-encoded chars such as \xa9, i.e., the copyright # symbol, cf. #9967. task.stdout = strToUnicode(stdOut, obstinate=True) task.stderror = strToUnicode(stdError, obstinate=True) task.save()
def sanitize_name(basename): if basename == "": raise ValueError("sanitize_name recieved an empty filename.") unicode_basename = strToUnicode(basename) unicode_name = unidecode(unicode_basename) # We can't return an empty string here because it will become the new filename. # However, in some cases unidecode just strips out all chars (e.g. # unidecode(u"🚀") == ""), so if that happens, we to replace the invalid chars with # REPLACEMENT_CHAR. This will result in a filename of one or more underscores, # which isn't great, but allows processing to continue. if unicode_name == "": unicode_name = unicode_basename return ALLOWED_CHARS.sub(REPLACEMENT_CHAR, unicode_name)
def call(jobs): """Primary entry point for this script.""" for job in jobs: with job.JobContext(): mets_structmap = os.path.join(strToUnicode(job.args[1]), "metadata", "mets_structmap.xml") mets_xsd = job.args[2] if not os.path.isfile(mets_structmap): job.pyprint("Custom structmap not supplied with package") return if not os.path.isfile(mets_xsd): raise (VerifyMETSException("METS asset is unavailable")) xmlschema = etree.XMLSchema(etree.parse(mets_xsd)) # Raise an exception if not valid, e.g. etree.DocumentInvalid # otherwise, the document validates correctly and returns. xmlschema.assertValid(etree.parse(mets_structmap)) job.pyprint("Custom structmap validated correctly")
def run_hashsum_commands(job): """Run hashsum commands and generate a cumulative return code.""" transfer_dir = None transfer_uuid = None try: transfer_dir = strToUnicode(job.args[1]) transfer_uuid = job.args[2] except IndexError: logger.error("Cannot access expected module arguments: %s", job.args) return 1 ret = 0 # Create a query-set once so we don't need to generate per each checksum # file type. file_queryset = get_file_queryset(transfer_uuid) for hashfile in Hashsum.HASHFILES_COMMANDS: hashsum = None hashfilepath = os.path.join(transfer_dir, "metadata", hashfile) if os.path.exists(hashfilepath): try: hashsum = Hashsum(hashfilepath, job) except NoHashCommandAvailable: job.pyprint( "Nothing to do for {}. No command available.".format( Hashsum.get_ext(hashfilepath))) continue if hashsum: job.pyprint( "Comparing transfer checksums with the supplied {} file". format(Hashsum.get_ext(hashfilepath)), file=sys.stderr, ) result = hashsum.compare_hashes(transfer_dir=transfer_dir) # Add to PREMIS on success only. if result == 0: job.pyprint("{}: Comparison was OK".format( Hashsum.get_ext(hashfile))) write_premis_event_per_file( file_uuids=file_queryset, transfer_uuid=transfer_uuid, event_detail=hashsum.get_command_detail(), ) continue ret += result return ret
def _pad_destination_filepath_if_it_already_exists(filepath, original=None, attempt=0): """ Return a version of the filepath that does not yet exist, padding with numbers as necessary and reattempting until a non-existent filepath is found :param filepath: `Path` or string of the desired destination filepath :param original: `Path` or string of the original filepath (before padding attempts) :param attempt: Number :returns: `Path` object, padded as necessary """ if original is None: original = filepath filepath = Path(filepath) original = Path(original) attempt = attempt + 1 if not filepath.exists(): return filepath if filepath.is_dir(): return _pad_destination_filepath_if_it_already_exists( "{}_{}".format(strToUnicode(original.as_posix()), attempt), original, attempt, ) # need to work out basename basedirectory = original.parent basename = original.name # do more complex padding to preserve file extension period_position = basename.index(".") non_extension = basename[0:period_position] extension = basename[period_position:] new_basename = "{}_{}{}".format(non_extension, attempt, extension) new_filepath = basedirectory / new_basename return _pad_destination_filepath_if_it_already_exists( new_filepath, original, attempt)
def _package_create_handler(self, worker, job, payload): """Create a new package. [config] name = packageCreate raise_exc = True """ args = ( self.package_queue, self.executor, payload.get("name"), payload.get("type"), payload.get("accession"), payload.get("access_system_id"), strToUnicode(payload.get("path")), payload.get("metadata_set_id"), payload.get("user_id"), self.workflow, ) kwargs = {"auto_approve": payload.get("auto_approve")} processing_config = payload.get("processing_config") if processing_config is not None: kwargs["processing_config"] = processing_config return create_package(*args, **kwargs).pk
def _get_setting(name): """Retrieve a Django setting decoded as a unicode string.""" return strToUnicode(getattr(settings, name))