parser.add_argument( 'folder', help='Accession of the Genestack folder containing the files') args = parser.parse_args() csv_input = args.csv_file local_key = args.local_key print('Connecting to Genestack...') # get connection and application handlers connection = get_connection(args) files_util = FilesUtil(connection) print('Collecting files...') files = files_util.get_file_children(args.folder) print('Found %d files. Collecting metadata...' % len(files)) infos = files_util.get_infos(files) identifier_map = {info['name']: info['accession'] for info in infos} # parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames if args.local_key not in field_names: raise GenestackException( "Error: the local key %s is not present in the supplied CSV file" % args.local_key)
'--move-files', action='store_true', help= 'If present, the original files will be unlinked from the source folder') args = parser.parse_args() source_folder = args.folder move_files = args.move_files print "Connecting to Genestack..." # get connection and application handlers connection = get_connection(args) files_util = FilesUtil(connection) print "Collecting files..." files = files_util.get_file_children(source_folder) files_count = len(files) print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry:
parent=created_files_folder) # create application wrappers and batch files creators bowtie_app = BowtieApplication(connection) mapped_qc_app = AlignedReadsQC(connection) variant_calling_app = VariationCaller2Application(connection) bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome) mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC") vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS) # collect files print "Collecting raw reads..." raw_reads = files_util.get_file_children(args.raw_reads_folder) files_count = len(raw_reads) print "Found %d files to process" % files_count # Create pipeline files mapped_reads = bowtie_creator.create_files(raw_reads) mapped_reads_qcs = mapped_qc_creator.create_files(mapped_reads) vc_creator.create_files(mapped_reads) print "All done! Your files are in the folder %s" % project_folder
parser.add_argument('csv_file', help='Path to the local comma-delimited CSV file containing the data') parser.add_argument('local_key', help='Name of the local key to match CSV records and Genestack files names') parser.add_argument('folder', help='Accession of the Genestack folder containing the files') args = parser.parse_args() csv_input = args.csv_file local_key = args.local_key print "Connecting to Genestack..." # get connection and application handlers connection = get_connection(args) files_util = FilesUtil(connection) print "Collecting files..." files = files_util.get_file_children(args.folder) print "Found %d files. Collecting metadata..." % len(files) infos = files_util.get_infos(files) identifier_map = {info['name']: info['accession'] for info in infos} # parse the CSV file with open(csv_input, 'r') as the_file: reader = csv.DictReader(the_file, delimiter=",") field_names = reader.fieldnames if args.local_key not in field_names: raise GenestackException("Error: the local key %s is not present in the supplied CSV file" % args.local_key) for file_data in reader: # find the corresponding file
print('Connecting to Genestack...') # get connection and create output folder connection = get_connection(args) files_util = FilesUtil(connection) created_files_folder = files_util.get_special_folder(SpecialFolders.CREATED) project_folder = files_util.create_folder(project_name, parent=created_files_folder) # create application wrappers and batch files creators bowtie_app = BowtieApplication(connection) mapped_qc_app = AlignedReadsQC(connection) variant_calling_app = VariationCaller2Application(connection) bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome) mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC") vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS) # collect files print('Collecting raw reads...') raw_reads = files_util.get_file_children(args.raw_reads_folder) files_count = len(raw_reads) print('Found %d files to process' % files_count) # Create pipeline files mapped_reads = bowtie_creator.create_files(raw_reads) mapped_reads_qcs = mapped_qc_creator.create_files(mapped_reads) vc_creator.create_files(mapped_reads) print('All done! Your files are in the folder %s' % project_folder)
parser = make_connection_parser() parser.add_argument('folder', help='Accession of the Genestack folder storing the files to group by application') parser.add_argument('--move-files', action='store_true', help='If present, the original files will be unlinked from the source folder') args = parser.parse_args() source_folder = args.folder move_files = args.move_files print "Connecting to Genestack..." # get connection and application handlers connection = get_connection(args) files_util = FilesUtil(connection) print "Collecting files..." files = files_util.get_file_children(source_folder) files_count = len(files) print "Found %d files to organise. Retrieving infos..." % files_count infos = files_util.get_complete_infos(files) output_folder = files_util.create_folder("Organized files", parent=source_folder) grouping_folders = {} for i, entry in enumerate(infos, 1): accession = entry['accession'] print "Processing file %d of %d (%s)..." % (i, files_count, accession) # use either application name, application ID or "Unknown application" (in this order of preference) app_entry = entry.get('application') if app_entry: application = app_entry.get('name') or app_entry.get('id', "Unknown application")