def process_wget(input_data): data = validate_wget(input_data) if data: url = data[0] if data[1]: extension = data[1] # todo: а куда скачивать? file_downloader = FileDownloader(url, extension) file_downloader.download_files() else: # todo : download with url only pass # print(data) # print('op : ' + str(data[0])) # print('url : ' + data[1]) # print('extension : ' + str(data[2])) pass
from file_downloader import FileDownloader with open('io-data/download-logger.txt', 'a') as log_file: fileDownLoader = FileDownloader(file_with_urls='t-shirt-links.txt', logger=log_file) # fileDownLoader = FileDownloader(file_with_urls='1.txt') fileDownLoader.download_files()
def main(self): ''' :return: ''' # Always do these steps regardless of option # Parse variables files to extract relevant sample information # Identify samples to upload to BaseSpace samples_to_upload = identify_samples() # Load and parse out variables from variables files associated with each sample all_variables = load_all_variables(samples_to_upload, os.getcwd()) # Identify the worksheet number which will be used as the project name in BaseSpace worksheet = identify_worksheet(all_variables) # Pair samples- DNA sample is key, RNA sample to look up- if No RNA sample, it is None sample_pairs = create_sample_pairs(all_variables) # Write out sample pairs to log file for checking if needed log.warning(f"sample pairs are {sample_pairs}") # Locate the fastqs associated with all samples all_fastqs = locate_all_fastqs(samples_to_upload, os.getcwd()) # Create a project in BaseSpace- will not create if it already exists, but will still return project id upload = FileUpload(self.authentication_token, worksheet, samples_to_upload, all_fastqs) project = upload.create_basespace_project() log.info(f"Project {worksheet} created") log.warning(f"Project id for project name {worksheet} is {project}") # If whole pipeline required then upload fastq files if not args.tst170 and not args.smp2 and not args.dl_files: # Upload fastq files print(f"uploading fastq files for all samples") upload.upload_files() # Create launch app object for TST170 app launch_tst = LaunchApp(self.authentication_token, worksheet, project, app_name, app_version, sample_pairs) # If resuming from TST170 required or full pipeline- launch the TST170 app if not args.smp2 and not args.dl_files: # Launch TST170 application for each pair in turn # IMPORTANT NOTE: Only processes paired data tst_170 = launch_tst.launch_tst170_pairs() # Dump data to file with open(os.path.join(os.getcwd(), "tst_170.json"), 'w') as t: json.dump(tst_170, t) # If resuming from SMP2v3 launch load in required TST170 data from file elif args.smp2: try: with open(os.path.join(os.getcwd(), "tst_170.json")) as ts: tst_170 = json.load(ts) except FileNotFoundError: raise FileNotFoundError( f"Could not find file tst_170.json. Cannot resume pipeline from SMP2 step." f"Please delete TST170 analysis in BaseSpace and resume pipeline from" f"TST170 stage.") # If resuming from SMP2v3 required, resuming from TST170 required or full pipeline- launch the SMP2 app if not args.dl_files: # Create launch app object for SMP2 v3 if not just downloading files- poll TST170 and when complete # launch SMP2 launch_smp = LaunchApp(self.authentication_token, worksheet, project, smp2_app_name, smp2_app_version, sample_pairs, tst_170) # Poll the tst 170 appsessions until completion, then launch smp2 app smp_appsession = launch_smp.poll_tst170_launch_smp2() # Dump data to file with open(os.path.join(os.getcwd(), "smp.json"), 'w') as s: json.dump(smp_appsession, s) # If downloading files from a completed SMP2 app required # Create a LaunchApp object for smp2 app if flag to only download files is set- allows for polling of SMP2 if args.dl_files: # Load data in required smp2 data from file try: with open(os.path.join(os.getcwd(), "smp.json")) as sm: smp = json.load(sm) except FileNotFoundError: raise FileNotFoundError( f"Could not find file smp.json. Cannot resume pipeline from download step." f"Please delete SMP2 analysis in BaseSpace and resume pipeline from" f"SMP2 stage.") launch_smp = LaunchApp(self.authentication_token, worksheet, project, smp2_app_name, smp2_app_version, sample_pairs, None, smp) # None as tst170 app data not required # Poll the smp appsessions until completion smp_appresults = launch_smp.poll_smp2() # Download all required files- every step requires this file_download = FileDownloader(self.authentication_token, smp_appresults, worksheet) file_download.download_files()