def upload_files(connection, files, folder_name, folder_accession): """ :param genestack_client.Connection connection: :param list[str] files: :param str folder_name: :param str folder_accession: """ importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) if not folder_accession: folder_name = folder_name or datetime.now().strftime( 'Upload %d.%m.%y %H:%M:%S') folder_accession = fu.create_folder( folder_name, parent=upload, description='Files uploaded by genestack-uploader') else: folder_name = fu.get_infos([folder_accession])[0]['name'] accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, folder_accession) fu.unlink_file(accession, upload) accession_file_map[accession] = f return folder_accession, folder_name, accession_file_map
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local( '2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == { 'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**' } assert metainfo.get('j')[0].get_publication() == { 'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {} } assert metainfo.get( Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
def test_en_isoforms(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforIsoforms(conn) en_file = None try: groups = [{'accessions': accs} for accs in ISOFORM_GROUPS] en_file = en.create_file(groups, multi_mapping_corr=True) finally: if (not keep_files) and (en_file is not None): fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def test_en_rna_seq(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforGenes(conn) en_file = None try: groups = [{'accessions': accs} for accs in RNA_SEQ_GROUPS] en_file = en.create_file(groups, r_package=en.PKG_DESEQ, organism="new organism") finally: if (not keep_files) and (en_file is not None): fu.unlink_file(en_file, fu.get_special_folder(SpecialFolders.CREATED))
def upload_files(connection, files, folder_name): importer = DataImporter(connection) fu = FilesUtil(connection) upload = fu.get_special_folder(SpecialFolders.UPLOADED) folder_name = folder_name or datetime.now().strftime('Upload %d.%m.%y %H:%M:%S') new_folder = fu.create_folder(folder_name, parent=upload, description='Files uploaded by genestack-uploader') accession_file_map = {} for f in files: accession = importer.load_raw(f) fu.link_file(accession, new_folder) fu.unlink_file(accession, upload) accession_file_map[accession] = f return new_folder, folder_name, accession_file_map
def test_en_microarrays(conn, keep_files): fu = FilesUtil(conn) en = ExpressionNavigatorforMicroarrays(conn) norm_app = AffymetrixMicroarraysNormalizationApplication(conn) en_file = None norm_file = None try: groups = [{'accessions': accs} for accs in MICROARRAY_GROUPS] groups[0]['is_control'] = True norm_file = norm_app.create_file([f for group in MICROARRAY_GROUPS for f in group]) en_file = en.create_file(groups, norm_file, RAT_AFFY_ANNOTATION) finally: if not keep_files: created = fu.get_special_folder(SpecialFolders.CREATED) for f in (norm_file, en_file): if f is not None: fu.unlink_file(f, created)
def test_metainfo_io(conn): data_importer = DataImporter(conn) fu = FilesUtil(conn) created = fu.get_special_folder(SpecialFolders.CREATED) info = Metainfo() info.add_boolean("a", True) info.add_file_reference("b", created) info.add_date_time("c", "2015-12-13") info.add_integer("d", 239) info.add_decimal("e", 238.583) info.add_decimal("e", -13.4) info.add_string("f", "hello") info.add_memory_size("g", 2847633) info.add_person("i", "Rosalind Franklin", "+1-202-555-0123", "*****@*****.**") info.add_publication("j", "My Publication", "Myself", "Journal of Me", "23/12/2014", pages="12-23") info.add_value(Metainfo.NAME, StringValue("Test report file")) report_file = None try: report_file = data_importer.create_report_file(metainfo=info, urls=[TEST_URL], parent=created) metainfo = next(iter(fu.collect_metainfos([report_file]))) assert metainfo.get('a')[0].get_boolean() assert isinstance(metainfo.get('b')[0].get_accession(), str) assert metainfo.get('c')[0].get_date() == _strptime_local('2015-12-13', '%Y-%m-%d') assert metainfo.get('d')[0].get_int() == 239 assert metainfo.get('e')[0].get_decimal() == 238.583 assert metainfo.get('e')[1].get_decimal() == -13.4 assert metainfo.get('f')[0].get_string() == "hello" assert metainfo.get('g')[0].get_int() == 2847633 assert metainfo.get('i')[0].get_person() == {'name': 'Rosalind Franklin', 'phone': '+1-202-555-0123', 'email': '*****@*****.**'} assert metainfo.get('j')[0].get_publication() == {'title': 'My Publication', 'authors': 'Myself', 'journalName': 'Journal of Me', 'issueDate': '23/12/2014', 'pages': '12-23', 'issueNumber': None, 'identifiers': {}} assert metainfo.get(Metainfo.NAME)[0].get_string() == "Test report file" assert metainfo.get(BioMetaKeys.DATA_LINK)[0].get_url() == TEST_URL finally: if report_file is not None: fu.unlink_file(report_file, created)
'--name', default="New Project", help='Name of the Genestack folder where to put the output files') parser.add_argument( '--ref-genome', help='Accession of the reference genome to use for the mapping step') args = parser.parse_args() project_name = args.name print "Connecting to Genestack..." # get connection and create output folder connection = get_connection(args) files_util = FilesUtil(connection) created_files_folder = files_util.get_special_folder( SpecialFolders.CREATED) project_folder = files_util.create_folder(project_name, parent=created_files_folder) # create application wrappers and batch files creators bowtie_app = BowtieApplication(connection) mapped_qc_app = AlignedReadsQC(connection) variant_calling_app = VariationCaller2Application(connection) bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome) mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC") vc_creator = BatchFilesCreator(variant_calling_app,
parser = make_connection_parser() parser.add_argument('raw_reads_folder', help='Genestack accession of the folder containing the raw reads files to process') parser.add_argument('--name', default="New Project", help='Name of the Genestack folder where to put the output files') parser.add_argument('--ref-genome', help='Accession of the reference genome to use for the mapping step') args = parser.parse_args() project_name = args.name print('Connecting to Genestack...') # get connection and create output folder connection = get_connection(args) files_util = FilesUtil(connection) created_files_folder = files_util.get_special_folder(SpecialFolders.CREATED) project_folder = files_util.create_folder(project_name, parent=created_files_folder) # create application wrappers and batch files creators bowtie_app = BowtieApplication(connection) mapped_qc_app = AlignedReadsQC(connection) variant_calling_app = VariationCaller2Application(connection) bowtie_creator = BowtieBatchFilesCreator(bowtie_app, project_folder, "Mapped Reads", ref_genome=args.ref_genome) mapped_qc_creator = BatchFilesCreator(mapped_qc_app, project_folder, "Mapped Reads QC") vc_creator = BatchFilesCreator(variant_calling_app, project_folder, "Variants", custom_args=VC_ARGUMENTS_NO_INDELS) # collect files print('Collecting raw reads...') raw_reads = files_util.get_file_children(args.raw_reads_folder) files_count = len(raw_reads)