def get_data(): def f(x): result = numpy.zeros(3) if x == 'Iris-setosa': result[0] = 1 elif x == 'Iris-versicolor': result[1] = 1 elif x == 'Iris-virginica': result[2] = 1 else: raise ValueError return result data = File(u'../../../data') data.extract() # data.shuffle() data.convertClass(function=f, length=3) return data.allInputs, data.allClass
def unlock_link(self, url): """Unrestrict a file locker link given its url :param url: The url of the file locker you want to un-restrict :type url: str :return: a model.File instance containing the unlocked url """ if self._auth_cookie is None: raise NotConnectedError('No auth cookie is available.') else: encoded = urllib.urlencode({'link': url}) api_url = self._endpoint + '/unrestrict.php?{}'.format(encoded) opener = urllib2.build_opener() opener.addheaders.pop() opener.addheaders.append(('Cookie', self._auth_cookie)) opener.addheaders.append(('User-agent', USER_AGENT)) logging.debug("Headers unlock link: " + str(opener.addheaders)) result = opener.open(api_url) response = json.loads(result.read()) logging.debug(response) error = response.get('error', 0) if error > 0: return {'errorMessage': response['message']} f = File(source_url=url, unrestricted_url=response['main_link'], filename=response['file_name'], size=int(response['file_size_bytes']), file_locker=response['hoster_name'], creation_date=datetime.utcnow()) return f
def purge_unused(self, note, purge_all_links=False): """ Delete files that were linked from the given note but no longer are. @type note: model.Note @param note: note to search for file links @type purge_all_links: bool @param purge_all_links: if True, delete all files that are/were linked from this note """ # load metadata for all files with the given note's note_id files = self.__database.select_many( File, File.sql_load_note_files(note.object_id)) files_to_delete = dict([(db_file.object_id, db_file) for db_file in files]) # search through the note's contents for current links to files if purge_all_links is False: for match in self.FILE_LINK_PATTERN.finditer(note.contents): file_id = match.groups(0)[0] # we've found a link for file_id, so don't delete that file files_to_delete.pop(file_id, None) # for each file to delete, delete its metadata from the database and its data from the # filesystem for (file_id, db_file) in files_to_delete.items(): self.__database.execute(db_file.sql_delete(), commit=False) self.__database.uncache(db_file) Upload_file.delete_file(file_id) self.__database.commit()
def purge_unused( self, note, purge_all_links = False ): """ Delete files that were linked from the given note but no longer are. @type note: model.Note @param note: note to search for file links @type purge_all_links: bool @param purge_all_links: if True, delete all files that are/were linked from this note """ # load metadata for all files with the given note's note_id files = self.__database.select_many( File, File.sql_load_note_files( note.object_id ) ) files_to_delete = dict( [ ( db_file.object_id, db_file ) for db_file in files ] ) # search through the note's contents for current links to files if purge_all_links is False: for match in self.FILE_LINK_PATTERN.finditer( note.contents ): file_id = match.groups( 0 )[ 0 ] # we've found a link for file_id, so don't delete that file files_to_delete.pop( file_id, None ) # for each file to delete, delete its metadata from the database and its data from the # filesystem for ( file_id, db_file ) in files_to_delete.items(): self.__database.execute( db_file.sql_delete(), commit = False ) self.__database.uncache( db_file ) Upload_file.delete_file( file_id ) self.__database.commit()
def __init__(self, arch_mssn): super(FileChunk, self).__init__() self.files = [] self.size = 0 self.should_split = False self.arch_mssn = arch_mssn self.app = self.arch_mssn.app self.chunk_no = arch_mssn.get_no_of_file_chunks() self.chunk_name = "chunk%03d" % self.chunk_no self.meta_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar.meta" % self.chunk_no)) self.archive_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar" % self.chunk_no)) self.split_files = []
def setUp( self ): self.object_id = u"17" self.notebook_id = u"18" self.note_id = u"19" self.filename = u"foo.png" self.size_bytes = 2888 self.content_type = "image/png" self.delta = timedelta( seconds = 1 ) self.file = File.create( self.object_id, self.notebook_id, self.note_id, self.filename, self.size_bytes, self.content_type )
def createFile(self): if self.database == None: return self.response # Extract the important information of the file fileName = self.file.filename if (fileName.rfind(".") != -1): extension = fileName[fileName.rfind(".") + 1:] else: extension = "" dataFile = self.file.stream.read() # Create a File newFile = File(name=self.file.filename, extension=extension, size=len(dataFile), expiring_date=self.expiring_date, owner_id=self.owner_id, shared=self.shared) # Try to create the documment and upload the file, if there is an error in creating or uploading,return a 500 try: # Create a documment with the information of the file newFile.store(self.database) # Upload the file self.database.put_attachment(doc=newFile, filename=fileName, content=dataFile) newFile = self.database.get(newFile["_id"]) md5 = newFile["_attachments"][fileName]["digest"] newFile["md5"] = md5.split("-")[1] self.database.save(newFile) return { "Code": httplib.CREATED, "Message": { "_id": newFile["_id"], "_rev": newFile["_rev"] } } except: return { "Code": httplib.INTERNAL_SERVER_ERROR, "Message": "Error upload" }
def __init__(self, name, archiving_domain): super(ClusterProject, self).__init__() # Attributes self.name = name self.archiving_domain = archiving_domain self.app = self.archiving_domain.app self.folder = ClusterProjectFolder(self, os.path.join(self.app.config.options["projects_path"], self.name)) self.drop_folder = ClusterProjectFolder(self, os.path.join(self.folder.path, self.app.config.options["drop_folder_path_rel"])) self.uploadcache_proj_folder = ClusterProjectFolder(self, os.path.join(self.app.config.options["uploadcache_path"], self.name)) self.confirmation_files_folder = ClusterProjectFolder(self, os.path.join(self.folder.path, self.app.config.options["confirm_files_path_rel"])) self.start_archiving_file = File(os.path.join(self.drop_folder.path, self.app.config.options["startarchiving_filename"])) self.arch_mssns = []
class ClusterProject(AbstractObject): def __init__(self, name, archiving_domain): super(ClusterProject, self).__init__() # Attributes self.name = name self.archiving_domain = archiving_domain self.app = self.archiving_domain.app self.folder = ClusterProjectFolder(self, os.path.join(self.app.config.options["projects_path"], self.name)) self.drop_folder = ClusterProjectFolder(self, os.path.join(self.folder.path, self.app.config.options["drop_folder_path_rel"])) self.uploadcache_proj_folder = ClusterProjectFolder(self, os.path.join(self.app.config.options["uploadcache_path"], self.name)) self.confirmation_files_folder = ClusterProjectFolder(self, os.path.join(self.folder.path, self.app.config.options["confirm_files_path_rel"])) self.start_archiving_file = File(os.path.join(self.drop_folder.path, self.app.config.options["startarchiving_filename"])) self.arch_mssns = [] def has_start_archiving_file(self): return self.start_archiving_file.exists() def get_lock_for_start_archiving_file(self): applock = ApplicationLock(self.start_archiving_file) lock_succeeded = applock.lock() if (lock_succeeded): self.log.info("Successfully locked start archiving file for project %s" % self.name) else: errmsg = "Unable to lock start archiving file for project %s" % self.name self.log.error(errmsg) raise Exception(errmsg) return applock def get_arch_mssns_with_states(self, states): arch_mssns = [] for arch_mssn in self.arch_mssns: for state in states: if arch_mssn.state == state: arch_mssns.append(arch_mssn) return arch_mssns
class FileChunk(AbstractObject): '''FileChunk objects are used to collect chunks of files and folders that should go into one single tar archive.''' def __init__(self, arch_mssn): super(FileChunk, self).__init__() self.files = [] self.size = 0 self.should_split = False self.arch_mssn = arch_mssn self.app = self.arch_mssn.app self.chunk_no = arch_mssn.get_no_of_file_chunks() self.chunk_name = "chunk%03d" % self.chunk_no self.meta_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar.meta" % self.chunk_no)) self.archive_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar" % self.chunk_no)) self.split_files = [] def set_should_split(self): self.split_files = None #TODO: Implement def unset_should_split(self): self.meta_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar.meta" % self.chunk_no)) def add_file(self, file_or_folder): chunk_size_in_mb = int(self.app.config.options["chunk_size_in_mb"]) chunk_size = chunk_size_in_mb * 1024 * 1024 # Convert to bytes self.files.append(file_or_folder) self.size += int(file_or_folder.get_size()) if self.size > chunk_size: self.should_split = True def get_remaining_size(self): chunk_size_in_mb = int(self.app.config.options["chunk_size_in_mb"]) chunk_size = chunk_size_in_mb * 1024 * 1024 # Convert to bytes remaining_size = chunk_size - self.size return remaining_size def is_empty(self): if len(self.files) > 0: return False else: return True def get_file_names(self): filenames_list = [] if len(self.files) > 0: for file in self.files: filenames_list.append(file.name) else: self.log.warn("No files or folders found") return filenames_list def get_relative_file_paths(self): '''Get file paths relative to the current archiving missions' drop folder''' base_folder = self.arch_mssn.folder.path filepaths_list = [] if len(self.files) > 0: for file in self.files: file_path = file.path rel_file_path = file_path.replace(base_folder + "/", "") filepaths_list.append(rel_file_path) else: self.log.warn("No files or folders found") return filepaths_list def get_file_names_recursive(self): filenames_list = [] if len(self.files) > 0: for file in self.files: filenames_list.append(file.name) if file.get_type() == "dir": dir_contents = file.list_files_recursive(include_self=True) filenames_list.extend(dir_contents) else: self.log.warn("No files or folders found") return filenames_list def get_tar_command(self): tar_cmd = ["tar", "-cvf", self.archive_file.path, "-C", self.arch_mssn.folder.path, "-T", "-"] tar_cmd.extend(["--exclude-from=" + self.app.config.options["tarexcludepatternsfile"]]) return tar_cmd def get_tar_command_for_piping(self): tar_cmd = ["tar", "-cvf", "-", "-C", self.arch_mssn.folder.path, "-T", "-"] tar_cmd.extend(["--exclude-from=" + self.app.config.options["tarexcludepatternsfile"]]) return tar_cmd def get_split_command(self): return ["split", "-b", str(self.app.config.options["chunk_size_in_mb"]) + "m", "-d", "-a", "5", "-", self.archive_file.path + ".split"] def get_archiving_file_paths(self): archiving_file_paths = [] archiving_files = self.get_archiving_files() for archiving_file in archiving_files: archiving_file_paths.append(archiving_file.path) return archiving_file_paths def get_archiving_files(self): archive_files = [] archive_files.append(self.meta_file) if self.should_split: split_files = self.get_splitted_archive_file() archive_files.extend(split_files) else: archive_file_unsplit = self.get_unsplit_archive_file() archive_files.append(archive_file_unsplit) return archive_files def get_unsplit_archive_file(self): return self.archive_file def get_splitted_archive_file(self): archive_file_name = self.archive_file.name split_files = self.arch_mssn.uploadcache_proj_folder.get_files_matching_pattern("%s.split\d{5}" % archive_file_name) return split_files def create_as_tar_file_in_upload_cache(self): files_to_archive = self.get_relative_file_paths() files_to_archive_as_rows = "\n".join(files_to_archive) files_to_archive_as_rows = files_to_archive_as_rows + "\n" if self.should_split: tar_cmd = self.get_tar_command_for_piping() split_cmd = self.get_split_command() try: OSUtils().exec_piped_command(tar_cmd, split_cmd, cmd1stdin=files_to_archive_as_rows) self.log.info("Tar-and-split operation successful for archive: " + self.archive_file.path) except: self.log.error("Failed tar-and-split operation for (planned) tar file: %s" % self.archive_file.path) raise elif not self.should_split: tar_cmd = self.get_tar_command() try: OSUtils().exec_command(tar_cmd, cmd1stdin=files_to_archive_as_rows) except: self.log.error("Failed tar operation for (planned) tar file: %s" % self.archive_file.path) raise self.log.info("Tar operation successful for archive: " + self.archive_file.path) def create_meta_file(self): metafile_content = self.get_metafile_content() self.log.debug("Creating meta file: " + self.meta_file.path) self.meta_file.write(metafile_content) def get_metafile_content(self): metafile_content = \ "<?xml version=\"1.0\"?>\n" + \ "<archivemetainfo>\n" + \ " <project>%s</project>\n" % self.arch_mssn.project.name + \ " <archiving_mission_id>%s</archiving_mission_id>\n" % self.arch_mssn.id if self.should_split: sum_size = 0 for split_file in self.get_splitted_archive_file(): sum_size += split_file.get_size() size = sum_size else: size = self.archive_file.get_size() metafile_content += " <sizeinbytes>%d</sizeinbytes>\n" % size + \ " <included_files>\n" for included_file_name in self.get_relative_file_paths(): metafile_content += \ " <included_file_path>%s</included_file_path>\n" % included_file_name metafile_content += \ " </included_files>\n" if self.should_split: metafile_content += " <splitfiles>\n" for split_file in self.get_splitted_archive_file(): metafile_content += " <split_file>\n" metafile_content += " <filename>%s</filename>\n" % split_file.name metafile_content += " <adler32sum>%s</adler32sum>\n" % split_file.get_adler32sum() metafile_content += " </split_file>\n" metafile_content += " </splitfiles>\n" elif not self.should_split: metafile_content += " <adler32sum>%s</adler32sum>\n" % self.archive_file.get_adler32sum() metafile_content += "</archivemetainfo>\n\n" return metafile_content def create_par2_files(self): chunk_file_paths = self.get_archiving_file_paths() par2_cmd = [self.app.config.options["par2_bin_path"], "c", "-t+", os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "%s.par2") % self.chunk_name ] # Version to run for non-threaded par2 bin: #par2_cmd = [self.app.config.options["par2_bin_path"], "c", os.path.join(self.uploadcache_proj_folder.path, "%s.par2") % file_chunk.chunk_name ] par2_cmd.extend(chunk_file_paths) self.log.info("About to create par2 files for chunk: %s in archmission %s" % (self.chunk_name, self.arch_mssn.id)) OSUtils().exec_command(par2_cmd)
def __init__(self, archiving_domain, project, id=None): super(ArchivingMission, self).__init__() if id is None: self.id = self.generate_new_arch_mssn_id() else: self.id = id # ensure_exists the link between itself and it's domain self.archiving_domain = archiving_domain self.app = self.archiving_domain.app # ensure_exists the link between itself and it's project (from each end) self.project = project self.project.arch_mssns.append(self) arch_mssn_folder_name_hidden = ".%s" % self.id # Make folder name start with a dot TODO: What's this? arch_mssn_folder_name = self.id # Make folder name start with a dot TODO: What's this? self.folder = ArchivingMissionFolder(self, os.path.join(project.drop_folder.path, arch_mssn_folder_name_hidden)) self.uploadcache_proj_folder = ArchivingMissionFolder(self, os.path.join(project.uploadcache_proj_folder.path, arch_mssn_folder_name)) self.confirmation_files_folder = ArchivingMissionFolder(self, os.path.join(project.confirmation_files_folder.path, arch_mssn_folder_name)) self.file_chunks = [] self.lock = None self.lock_file = File(os.path.join(self.folder.path, "archiving_in_progress.lock")) self.lock_file_uploadcache = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_in_progress.lock")) self.state = "undefined" self.state_file_new = File(os.path.join(self.folder.path, "archiving_state.new")) self.state_file_failed_prepare_for_upload1 = File(os.path.join(self.folder.path, "archiving_state.failed_prepare_for_upload")) self.state_file_failed_prepare_for_upload2 = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_prepare_for_upload")) self.state_file_prepared_for_upload1 = File(os.path.join(self.folder.path, "archiving_state.prepared_for_upload")) self.state_file_prepared_for_upload2 = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.prepared_for_upload")) self.state_file_failed_upload = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_upload")) self.state_file_uploaded = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.uploaded")) self.state_file_failed_create_confirm_files = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_create_confirm_files")) self.state_file_created_confirm_files = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.created_confirm_files"))
class ArchivingMission(AbstractObject): def __init__(self, archiving_domain, project, id=None): super(ArchivingMission, self).__init__() if id is None: self.id = self.generate_new_arch_mssn_id() else: self.id = id # ensure_exists the link between itself and it's domain self.archiving_domain = archiving_domain self.app = self.archiving_domain.app # ensure_exists the link between itself and it's project (from each end) self.project = project self.project.arch_mssns.append(self) arch_mssn_folder_name_hidden = ".%s" % self.id # Make folder name start with a dot TODO: What's this? arch_mssn_folder_name = self.id # Make folder name start with a dot TODO: What's this? self.folder = ArchivingMissionFolder(self, os.path.join(project.drop_folder.path, arch_mssn_folder_name_hidden)) self.uploadcache_proj_folder = ArchivingMissionFolder(self, os.path.join(project.uploadcache_proj_folder.path, arch_mssn_folder_name)) self.confirmation_files_folder = ArchivingMissionFolder(self, os.path.join(project.confirmation_files_folder.path, arch_mssn_folder_name)) self.file_chunks = [] self.lock = None self.lock_file = File(os.path.join(self.folder.path, "archiving_in_progress.lock")) self.lock_file_uploadcache = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_in_progress.lock")) self.state = "undefined" self.state_file_new = File(os.path.join(self.folder.path, "archiving_state.new")) self.state_file_failed_prepare_for_upload1 = File(os.path.join(self.folder.path, "archiving_state.failed_prepare_for_upload")) self.state_file_failed_prepare_for_upload2 = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_prepare_for_upload")) self.state_file_prepared_for_upload1 = File(os.path.join(self.folder.path, "archiving_state.prepared_for_upload")) self.state_file_prepared_for_upload2 = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.prepared_for_upload")) self.state_file_failed_upload = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_upload")) self.state_file_uploaded = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.uploaded")) self.state_file_failed_create_confirm_files = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.failed_create_confirm_files")) self.state_file_created_confirm_files = File(os.path.join(self.uploadcache_proj_folder.path, "archiving_state.created_confirm_files")) # ------------------------- MAIN ACTIONS ----------------------------- def prepare_for_upload(self): if self.state not in ["new", "failed_prepare_for_upload"]: self.log.info("Found existing arch_mssn %s, but had not status new or failed_prepare_for_upload" % self.id) else: self.log.info("Now preparing %s in project %s for upload ..." % (self.id, self.project.name)) # Some assertions self.uploadcache_proj_folder.ensure_exists() File(self.app.config.options["tarexcludepatternsfile"]).assert_exists() self.partition_files_in_chunks() for file_chunk in self.file_chunks: file_chunk.create_as_tar_file_in_upload_cache() file_chunk.create_meta_file() if self.app.config.options["create_par2_files"] == "True": # TODO: Verify correct behaviour! file_chunk.create_par2_files() self.set_upload_cache_owner_to_upload_user() # Must be run as root! def upload(self): '''Upload all files that are ready for upload''' files_to_upload = [] meta_files = self.uploadcache_proj_folder.get_files_matching_pattern(".*\.tar\.meta") for meta_file in meta_files: self.log.debug("Found meta meta_file: %s (path: %s) " % (meta_file.name, meta_file.path)) tar_file_basename = utils.rchop(meta_file.name, ".tar.meta") files_to_upload.extend(self.uploadcache_proj_folder.get_files_matching_pattern("%s(\.tar\.split\d{5}|\.vol\d{3}\+\d{2}\.par2)?" % tar_file_basename)) if len(files_to_upload) == 0: self.log.warn("No files to upload, for archiving mission %s in project %s" % (self.name, self.project.path)) else: for file in files_to_upload: swestore_path = self.create_swestore_path(self, file) if not self.swestore_file_is_uploaded(swestore_path): try: self.upload_file(file, swestore_path) except: self.log.error("Failed to upload file %s" % file.path) raise else: self.log.info("File already uploaded, so skipping: %s" % swestore_path) def create_confirm_files(self): confirm_folder_path = os.path.join(self.project.folder.path, self.app.config.options["confirm_files_path_rel"], self.id) confirm_folder = Folder(confirm_folder_path) confirm_folder.ensure_exists() file_names_in_upload_cache = self.uploadcache_proj_folder.list_files() file_names = self.get_strings_matching_pattern_from_list(".*\.meta", file_names_in_upload_cache) for file_name in file_names: src_path = os.path.join(self.uploadcache_proj_folder.path, file_name) dest_path = os.path.join(confirm_folder.path, file_name) try: shutil.copy(src_path, dest_path) self.log.info("Successfully moved file %s to %s" % (src_path, dest_path)) except: self.log.error("Could not move file %s to %s" % (src_path, dest_path)) raise # --------------------- HELPER METHODS ----------------------------------- def partition_files_in_chunks(self): '''Divide the files/folders depending on their size into approximately equally sized chunks. If one single file/folder exceeds the maximum chunk size, the chunk is told to be split (which happens physically later on).''' files_and_folders = self.get_file_list_sorted_by_size() file_chunk = FileChunk(self) for file in files_and_folders: file_size = file.get_size() rem_size = file_chunk.get_remaining_size() if (file_size >= rem_size) and not file_chunk.is_empty(): # Store away current file chunk, and start filling a new one self.file_chunks.append(file_chunk) file_chunk = FileChunk(self) file_chunk.add_file(file) self.file_chunks.append(file_chunk) def set_upload_cache_owner_to_upload_user(self): '''Change owner of the uploadcache folder for the current project to a user which has the required credentials for uploading the files to SweStore''' uid = int(self.archiving_domain.app.config.options["swestoreuploaduid"]) gid = int(self.archiving_domain.app.config.options["swestoreuploadgid"]) try: os.chown(self.uploadcache_proj_folder.path, uid, gid) except: self.log.error("Failed to chown folder to user with uid %d and gid %d. Are you really running as root?" % (uid, gid)) raise # ----------------------- UPLOAD HELPER METHODS ------------------------------ def create_swestore_path(self, arch_mssn, file_to_upload): swestore_path = os.path.join(self.app.config.options["swestorebasepath"], arch_mssn.project.name, arch_mssn.id, file_to_upload.name) return swestore_path def swestore_file_is_uploaded(self, swestore_path): check_exist_cmd = ["ngls", swestore_path] (stdout, stderr, returncode) = OSUtils().exec_command(check_exist_cmd, exception_on_nonzero_return_code=False) if returncode != 0: if re.match(".*No such file or directory.*", stderr, re.MULTILINE|re.DOTALL): return False else: raise Exception("Listing of file did not succeed! Have you ran grid-proxy-init?") else: return True def upload_file(self, file, path_on_swestore): self.log.info("Uploading '%s' to '%s'" % (file.path, path_on_swestore)) upload_cmd = ["ngcp", "-r", "5", "-d", str(self.app.config.options["ngcp_debug_level"]), file.path, path_on_swestore] retries_left = int(self.app.config.options["uploadretries"]) while retries_left > 0: try: OSUtils().exec_command(upload_cmd) self.log.info("Upload successful!") retries_left = 0 # Break the loop except: retries_left -= 1 if retries_left > 0: self.log.warn("Failed uploading, trying again %d times ..." % retries_left) else: raise # ----------------- CREATE CONFIRM FILES HELPER METHODS ------------------------ def get_strings_matching_pattern_from_list(self, pattern, list): result_list = [] for item in list: if re.match(pattern, item) and item not in result_list: result_list.append(item) return result_list # ------------------- SOMEWHAT GENERIC HELPER METHODS ------------------------ def get_file_list_sorted_by_size(self): '''Get a list of files and folders sorted per size, so that it can be used to partition file in suitable sized chunks for each tar archive''' files = self.folder.get_files_matching_pattern(self.app.config.options["allowedfilename_pattern"], self.app.config.options["excludedfilename_pattern"], recursive=True, only_empty_folders=True) if len(files) == 0: errmsg = "No files for arch mssn folder: %s" % self.folder.path self.log.error(errmsg) raise Exception(errmsg) files_sorted_by_size = sorted(files, key=lambda file: file.get_size()) return files_sorted_by_size def get_folders_in_folder_matching_pattern(self, folder, pattern, antipattern=""): '''Method used as a more secure alternative to "glob". It returns all files in the specified folders (except files in subfolders), whose filenames match the specified regex pattern''' resultfiles = [] try: # List all files in the directory files = os.listdir(folder) for file in files: # If they match the pattern .... if (re.match(pattern, file) and antipattern=="") or (re.match(pattern, file) and not re.match(antipattern, file)): filepath = os.path.join(folder, file) # Then add them to the resulting list ... resultfiles.append(filepath) except Exception: self.logger.error("Could not get files in folder %s, matching pattern %s (antipattern %s)" % (folder, pattern, antipattern)) raise return resultfiles def get_no_of_file_chunks(self): return len(self.file_chunks) def generate_new_arch_mssn_id(self): time_part = datetime.datetime.now().strftime("%Y%m%d-%H%M%S") arch_mssn_id = "arch_mssn-%s" % time_part return arch_mssn_id def detect_state_from_file_system(self): if self.state_file_failed_create_confirm_files.exists(): self.set_state_failed_create_confirm_files() elif self.state_file_created_confirm_files.exists(): self.set_state_created_confirm_files() elif self.state_file_failed_upload.exists(): self.set_state_failed_upload() elif self.state_file_uploaded.exists(): self.set_state_uploaded() elif self.archiving_domain.app.action in ["prepare"] and self.state_file_failed_prepare_for_upload1.exists(): self.set_state_failed_prepare_for_upload() elif self.state_file_failed_prepare_for_upload2.exists(): self.set_state_failed_prepare_for_upload() elif self.archiving_domain.app.action in ["prepare"] and self.state_file_prepared_for_upload1.exists(): self.log.info("State file exists: %s" % self.state_file_prepared_for_upload1.path) self.set_state_prepared_for_upload() elif self.state_file_prepared_for_upload2.exists(): self.log.info("State file exists: %s" % self.state_file_prepared_for_upload2.path) self.set_state_prepared_for_upload() elif self.state_file_new.exists(): self.set_state_new() def set_state_new(self): self.state = "new" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() self.state_file_new.ensure_exists() def set_state_prepared_for_upload(self): self.state = "prepared_for_upload" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() # We can't be sure that the original drophere folder does exist, so we better # check that first if self.archiving_domain.app.action in ["prepare"]: if self.folder.exists(): self.state_file_prepared_for_upload1.ensure_exists() else: self.log.warn("Archiving mission folder missing in project's drophere folder: %s" % self.folder.path) if self.uploadcache_proj_folder.exists(): self.state_file_prepared_for_upload2.ensure_exists() else: self.log.warn("Archiving mission folder missing in project's upload cache folder: %s" % self.uploadcache_proj_folder.path) def set_state_failed_prepare_for_upload(self): self.state = "failed_prepare_for_upload" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() # We can't be sure that the original drophere folder does exist, so we better # check that first if self.archiving_domain.app.action in ["prepare"]: if self.folder.exists(): self.state_file_failed_prepare_for_upload1.ensure_exists() else: self.log.warn("Archiving mission folder missing in project's drophere folder: %s" % self.folder.path) if self.uploadcache_proj_folder.exists(): self.state_file_failed_prepare_for_upload2.ensure_exists() else: self.log.warn("Archiving mission folder missing in project's upload cache folder: %s" % self.uploadcache_proj_folder.path) def set_state_uploaded(self): self.state = "uploaded" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() self.state_file_uploaded.ensure_exists() def set_state_failed_upload(self): self.state = "failed_upload" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() self.state_file_failed_upload.ensure_exists() def set_state_created_confirm_files(self): self.state = "created_confirm_files" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() self.state_file_created_confirm_files.ensure_exists() def set_state_failed_create_confirm_files(self): self.state = "failed_create_confirm_files" self.log.debug("Set state of archiving mission %s to: %s" % (self.id, self.state)) self.ensure_delete_all_state_files() self.state_file_failed_create_confirm_files.ensure_exists() def ensure_delete_all_state_files(self): action = self.archiving_domain.app.action if action in ["prepare", "createconfirmfiles"]: self.state_file_new.ensure_delete() self.state_file_failed_prepare_for_upload1.ensure_delete() self.state_file_failed_prepare_for_upload2.ensure_delete() self.state_file_prepared_for_upload1.ensure_delete() self.state_file_prepared_for_upload2.ensure_delete() self.state_file_failed_upload.ensure_delete() self.state_file_uploaded.ensure_delete() self.state_file_failed_create_confirm_files.ensure_delete() self.state_file_created_confirm_files.ensure_delete() elif action == "upload": self.state_file_prepared_for_upload2.ensure_delete() self.state_file_failed_upload.ensure_delete() self.state_file_uploaded.ensure_delete() def lock_main_folder(self): '''Get an archiving_domain lock for the main folder (located under the project's drop folder)''' self.lock_file.ensure_exists() self.lock = ApplicationLock(self.lock_file) # ------------------- LOCK FILE METHODS ------------------------------ def unlock_main_folder(self): if self.lock is not None: self.lock.unlock() if self.lock_file.exists(): self.lock_file.delete() def lock_uploadcache_folder(self): '''Get an archiving_domain lock for the main folder (located under the project's drop folder)''' self.log.debug("Trying to lock upload cache folder for %s in project %s ..." % (self.id, self.project.name)) self.lock_file_uploadcache.ensure_exists() self.lock_uploadcache = ApplicationLock(self.lock_file_uploadcache) self.log.debug("Succeded to lock upload cache folder for %s in project %s ..." % (self.id, self.project.name)) def unlock_uploadcache_folder(self): if self.lock_uploadcache is not None: self.lock_uploadcache.unlock() if self.lock_file_uploadcache.exists(): self.lock_file_uploadcache.delete() def has_unlocked_main_folder(self): return self.folder.exists() and not self.lock_file.exists() def has_unlocked_uploadcache_folder(self): return self.uploadcache_proj_folder.exists() and not self.lock_file_uploadcache.exists()
def upload(self, upload, notebook_id, note_id, x_progress_id, user_id): """ Upload a file from the client for attachment to a particular note. The x_progress_id must be provided as part of the query string, even if the other values are submitted as form data. @type upload: cgi.FieldStorage @param upload: file handle to uploaded file @type notebook_id: unicode @param notebook_id: id of the notebook that the upload is to @type note_id: unicode or NoneType @param note_id: id of the note that the upload is to (if any) @type x_progess_id: unicode @param x_progess_id: id of the file being uploaded @type user_id: unicode or NoneType @param user_id: id of current logged-in user (if any) @rtype: unicode @return: rendered HTML page @raise Access_error: the current user doesn't have access to the given notebook or note @raise Upload_error: the Content-Length header value is invalid """ global current_uploads, current_uploads_lock file_id = x_progress_id current_uploads_lock.acquire() try: uploaded_file = current_uploads.get(file_id) if not uploaded_file: return dict(script=general_error_script % u"Please select a file to upload.") del (current_uploads[file_id]) finally: current_uploads_lock.release() user = self.__database.load(User, user_id) notebook = self.__users.load_notebook(user_id, notebook_id, read_write=True) if not user or not notebook or notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES: uploaded_file.delete() return dict( script=general_error_script % u"Sorry, you don't have access to do that. Please make sure you're logged in as the correct user." ) content_type = upload.headers.get("content-type") # if we didn't receive all of the expected data, abort if uploaded_file.total_received_bytes < uploaded_file.content_length: uploaded_file.delete() return dict( script=general_error_script % u"The uploaded file was not fully received. Please try again or contact support." ) if uploaded_file.file_received_bytes == 0: uploaded_file.delete() return dict( script=general_error_script % u"The uploaded file was not received. Please make sure that the file exists." ) # if the uploaded file's size would put the user over quota, bail and inform the user rate_plan = self.__users.rate_plan(user.rate_plan) storage_quota_bytes = rate_plan.get(u"storage_quota_bytes") if storage_quota_bytes and user.storage_bytes + uploaded_file.total_received_bytes > storage_quota_bytes: uploaded_file.delete() return dict(script=quota_error_script) # record metadata on the upload in the database db_file = File.create(file_id, notebook_id, note_id, uploaded_file.filename, uploaded_file.file_received_bytes, content_type) self.__database.save(db_file, commit=False) self.__users.update_storage(user_id, commit=False) self.__database.commit() uploaded_file.close() return dict()
def serve(request: bytes) -> str: """ Handle the peer request Parameters: request - the list containing the request parameters Returns: str - the response """ command = request[0:4].decode('UTF-8') if command == "LOGI": if len(request) != 64: return "0" * 16 ip = request[4:59].decode('UTF-8') port = request[59:64].decode('UTF-8') try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "0" * 16 try: peer = peer_repository.find_by_ip(conn, ip) # if the peer didn't already logged in if peer is None: session_id = str(uuid.uuid4().hex[:16].upper()) peer = peer_repository.find(conn, session_id) # while the generated session_id exists while peer is not None: session_id = str(uuid.uuid4().hex[:16].upper()) peer = peer_repository.find(conn, session_id) peer = Peer(session_id, ip, port) peer.insert(conn) conn.commit() conn.close() except database.Error as e: conn.close() print(f'Error: {e}') return "0" * 16 return "ALGI" + peer.session_id elif command == "ADDF": if len(request) != 152: return "Invalid request. Usage is: ADDF<your_session_id><file_md5><filename>" session_id = request[4:20].decode('UTF-8') md5 = request[20:52].decode('UTF-8') name = request[52:152].decode('UTF-8').lower() try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." try: peer = peer_repository.find(conn, session_id) if peer is None: conn.close() return "Unauthorized: your SessionID is invalid" file = file_repository.find(conn, md5) if file is None: file = File(md5, name, 0) file.insert(conn) file_repository.add_owner(conn, md5, session_id) else: file.file_name = name file.update(conn) if not file_repository.peer_has_file(conn, session_id, md5): file_repository.add_owner(conn, md5, session_id) num_copies = file_repository.get_copies(conn, md5) conn.commit() conn.close() except database.Error as e: conn.rollback() conn.close() print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." return "AADD" + str(num_copies).zfill(3) elif command == "DELF": if len(request) != 52: return "Invalid request. Usage is: DELF<your_session_id><file_md5>" session_id = request[4:20].decode('UTF-8') md5 = request[20:52].decode('UTF-8') try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." try: peer = peer_repository.find(conn, session_id) if peer is None: conn.close() return "Unauthorized: your SessionID is invalid" if not file_repository.peer_has_file(conn, session_id, md5): conn.close() return "ADEL999" peer_repository.file_unlink(conn, session_id, md5) copy = file_repository.get_copies(conn, md5) if copy == 0: file = file_repository.find(conn, md5) file.delete(conn) conn.commit() conn.close() except database.Error as e: conn.rollback() conn.close() print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." return "ADEL" + str(copy).zfill(3) elif command == "FIND": if len(request) != 40: return "Invalid command. Usage is: FIND<your_session_id><query_string>" session_id = request[4:20].decode('UTF-8') query = request[20:40].decode('UTF-8').lower().lstrip().rstrip() if query != '*': query = '%' + query + '%' try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." try: peer = peer_repository.find(conn, session_id) if peer is None: conn.close() return "Unauthorized: your SessionID is invalid" total_file = file_repository.get_files_count_by_querystring(conn, query) if total_file == 0: return 'AFIN' + str(total_file).zfill(3) result = str(total_file).zfill(3) file_list = file_repository.get_files_with_copy_amount_by_querystring(conn, query) for file_row in file_list: file_md5 = file_row['file_md5'] file_name = file_row['file_name'] copies = file_row['copies'] result = result + file_md5 + file_name + str(copies).zfill(3) peer_list = peer_repository.get_peers_by_file(conn, file_md5) for peer_row in peer_list: peer_ip = peer_row['ip'] peer_port = peer_row['port'] result = result + peer_ip + peer_port conn.commit() conn.close() except database.Error as e: conn.rollback() conn.close() print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." return "AFIN" + result elif command == "DREG": if len(request) != 52: return "Invalid request. Usage is: DREG<your_session_id><file_md5>" session_id = request[4:20].decode('UTF-8') md5 = request[20:52].decode('UTF-8') try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." try: peer = peer_repository.find(conn, session_id) if peer is None: conn.close() return "Unauthorized: your SessionID is invalid" file = file_repository.find(conn, md5) if file is None: return "File not found." file.download_count += 1 file.update(conn) conn.commit() conn.close() except database.Error as e: conn.rollback() conn.close() print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." return "ADRE" + str(file.download_count).zfill(5) elif command == "LOGO": if len(request) != 20: return "Invalid request. Usage is: LOGO<your_session_id>" session_id = request[4:20].decode('UTF-8') try: conn = database.get_connection(db_file) conn.row_factory = database.sqlite3.Row except database.Error as e: print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." try: peer = peer_repository.find(conn, session_id) if peer is None: conn.close() return "Unauthorized: your SessionID is invalid" deleted = file_repository.delete_peer_files(conn, session_id) peer.delete(conn) conn.commit() conn.close() except database.Error as e: conn.rollback() conn.close() print(f'Error: {e}') return "The server has encountered an error while trying to serve the request." return "ALGO" + str(deleted).zfill(3) else: return "Command \'" + request.decode('UTF-8') + "\' is invalid, try again."
def get_files_for_user(user): return File.query(File.user == user).fetch()
def unset_should_split(self): self.meta_file = File(os.path.join(self.arch_mssn.uploadcache_proj_folder.path, "chunk%03d.tar.meta" % self.chunk_no))
def upload( self, upload, notebook_id, note_id, x_progress_id, user_id ): """ Upload a file from the client for attachment to a particular note. The x_progress_id must be provided as part of the query string, even if the other values are submitted as form data. @type upload: cgi.FieldStorage @param upload: file handle to uploaded file @type notebook_id: unicode @param notebook_id: id of the notebook that the upload is to @type note_id: unicode or NoneType @param note_id: id of the note that the upload is to (if any) @type x_progess_id: unicode @param x_progess_id: id of the file being uploaded @type user_id: unicode or NoneType @param user_id: id of current logged-in user (if any) @rtype: unicode @return: rendered HTML page @raise Access_error: the current user doesn't have access to the given notebook or note @raise Upload_error: the Content-Length header value is invalid """ global current_uploads, current_uploads_lock file_id = x_progress_id current_uploads_lock.acquire() try: uploaded_file = current_uploads.get( file_id ) if not uploaded_file: return dict( script = general_error_script % u"Please select a file to upload." ) del( current_uploads[ file_id ] ) finally: current_uploads_lock.release() user = self.__database.load( User, user_id ) notebook = self.__users.load_notebook( user_id, notebook_id, read_write = True ) if not user or not notebook or notebook.read_write == Notebook.READ_WRITE_FOR_OWN_NOTES: uploaded_file.delete() return dict( script = general_error_script % u"Sorry, you don't have access to do that. Please make sure you're logged in as the correct user." ) content_type = upload.headers.get( "content-type" ) # if we didn't receive all of the expected data, abort if uploaded_file.total_received_bytes < uploaded_file.content_length: uploaded_file.delete() return dict( script = general_error_script % u"The uploaded file was not fully received. Please try again or contact support." ) if uploaded_file.file_received_bytes == 0: uploaded_file.delete() return dict( script = general_error_script % u"The uploaded file was not received. Please make sure that the file exists." ) # if the uploaded file's size would put the user over quota, bail and inform the user rate_plan = self.__users.rate_plan( user.rate_plan ) storage_quota_bytes = rate_plan.get( u"storage_quota_bytes" ) if storage_quota_bytes and user.storage_bytes + uploaded_file.total_received_bytes > storage_quota_bytes: uploaded_file.delete() return dict( script = quota_error_script ) # record metadata on the upload in the database db_file = File.create( file_id, notebook_id, note_id, uploaded_file.filename, uploaded_file.file_received_bytes, content_type ) self.__database.save( db_file, commit = False ) self.__users.update_storage( user_id, commit = False ) self.__database.commit() uploaded_file.close() return dict()