class BatchFilesCreator(object): def __init__(self, cla, base_folder, friendly_name, custom_args=None): """ Constructor of the general batch files creator, to create multiple files from a CLA. :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements and in the name of the project subfolders :param custom_args: list of custom command-line argument strings for the files. Default is ``None`` """ self._cla = cla self._files_util = FilesUtil(cla.connection) self._base_folder = base_folder self._friendly_name = friendly_name self._custom_args = custom_args def create_files(self, sources): print('Creating %s files...' % self._friendly_name) output_folder = self._files_util.create_folder(self._friendly_name, parent=self._base_folder) output_files = [] for i, source in enumerate(sources, 1): output = self._create_output_file(source) self._files_util.link_file(output, output_folder) print('Created %s file %s (%d/%d)' % (self._friendly_name, output, i, len(output))) output_files.append(output) return output_files # this method can be overridden in child classes to allow for more complex file creation logic def _create_output_file(self, source): output = self._cla.create_file(source) if self._custom_args: self._cla.change_command_line_arguments(output, self._custom_args) return output
def recognize_files(connection, accession_file_map, new_folder): # Files Recognition fu = FilesUtil(connection) application = connection.application('genestack/upload') recognised_files = application.invoke('recognizeGroupsByAccession', accession_file_map.keys()) recognized_accessions = set() for x in recognised_files: for sources in x['sourceFileInfos'].values(): for info in sources: recognized_accessions.add(info['accession']) created_files = application.invoke('createFiles', recognised_files, [], None) groups = sorted(created_files['files'], key=itemgetter('kind')) for name, group in groupby(groups, key=itemgetter('kind')): print(name) # maybe sort by filename before printing a group? for f in group: print('\t%s / %s' % (f['accession'], f['name'])) unrecognized_file_infos = set(accession_file_map) - recognized_accessions if unrecognized_file_infos: print('Unrecognized Raw Files') for accession in unrecognized_file_infos: print('\t%s / %s' % (accession, accession_file_map[accession].decode('utf-8'))) # move unrecognized files to new folder unrecognized_folder = fu.create_folder("Unrecognized files", parent=new_folder) for accession in unrecognized_file_infos: fu.link_file(accession, unrecognized_folder) fu.unlink_file(accession, new_folder) print('Unrecognized files moved to %s / %s' % (unrecognized_folder, 'Unrecognized files'))
def upload_files(connection, files, folder_name, folder_accession): """ :param genestack_client.Connection connection: :param list[str] files: :param str folder_name: :param str folder_accession: """ importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) if not folder_accession: folder_name = folder_name or datetime.now().strftime( 'Upload %d.%m.%y %H:%M:%S') folder_accession = fu.create_folder( folder_name, parent=upload, description='Files uploaded by genestack-uploader') else: folder_name = fu.get_infos([folder_accession])[0]['name'] accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, folder_accession) fu.unlink_file(accession, upload) accession_file_map[accession] = f return folder_accession, folder_name, accession_file_map
def upload_files(connection, files, folder_name): importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S') new_folder = fu.create_folder(folder_name, parent=upload, description='Files uploaded by genestack-uploader') accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, new_folder) fu.unlink_file(accession, upload) accession_file_map[accession] = f return new_folder, folder_name, accession_file_map
class BatchFilesCreator(object): def __init__(self, cla, base_folder, friendly_name, custom_args=None): """ Constructor of the general batch files creator, to create multiple files from a CLA. :param cla: a ``CLApplication`` object, wrapper for the corresponding CLA :param base_folder: accession of the base folder where the pipeline files will be organised into subfolders :param friendly_name: user-friendly name of the files produced by the app ; used in the on-screen statements and in the name of the project subfolders :param custom_args: list of custom command-line argument strings for the files. Default is ``None`` """ self._cla = cla self._files_util = FilesUtil(cla.connection) self._base_folder = base_folder self._friendly_name = friendly_name self._custom_args = custom_args def create_files(self, sources): print "Creating %s files..." % self._friendly_name output_folder = self._files_util.create_folder( self._friendly_name, parent=self._base_folder) output_files = [] for i, source in enumerate(sources, 1): output = self._create_output_file(source) self._files_util.link_file(output, output_folder) print "Created %s file %s (%d/%d)" % (self._friendly_name, output, i, len(output)) output_files.append(output) return output_files # this method can be overridden in child classes to allow for more complex file creation logic def _create_output_file(self, source): output = self._cla.create_file(source) if self._custom_args: self._cla.change_command_line_arguments(output, self._custom_args) return output
print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry: application = app_entry.get('name') or app_entry.get( 'id', "Unknown application") else: application = "Unknown application" # if there is a folder for this group, we add the file to it ; # otherwise, we create one, add it to our dictionary of folders and add the file to it if application not in grouping_folders: new_folder = files_util.create_folder("Files for %s" % application, parent=output_folder) grouping_folders[application] = new_folder files_util.link_file(accession, grouping_folders[application]) if move_files: files_util.unlink_file(accession, source_folder) print "All done! Your files can be found inside the folder with accession %s" % output_folder
print "Collecting files..." files = files_util.get_file_children(source_folder) files_count = len(files) print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry: application = app_entry.get('name') or app_entry.get('id', "Unknown application") else: application = "Unknown application" # if there is a folder for this group, we add the file to it ; # otherwise, we create one, add it to our dictionary of folders and add the file to it if application not in grouping_folders: new_folder = files_util.create_folder("Files for %s" % application, parent=output_folder) grouping_folders[application] = new_folder files_util.link_file(accession, grouping_folders[application]) if move_files: files_util.unlink_file(accession, source_folder) print "All done! Your files can be found inside the folder with accession %s" % output_folder