示例#1
0
def process_wget(input_data):
    data = validate_wget(input_data)
    if data:
        url = data[0]
        if data[1]:
            extension = data[1]
            # todo: а куда скачивать?
            file_downloader = FileDownloader(url, extension)
            file_downloader.download_files()
        else:
            # todo : download with url only
            pass
    #    print(data)
    #    print('op : ' + str(data[0]))
    #    print('url : ' + data[1])
    #    print('extension : ' + str(data[2]))
    pass
示例#2
0
from file_downloader import FileDownloader


with open('io-data/download-logger.txt', 'a') as log_file:
    fileDownLoader = FileDownloader(file_with_urls='t-shirt-links.txt', logger=log_file)
    # fileDownLoader = FileDownloader(file_with_urls='1.txt')
    fileDownLoader.download_files()

示例#3
0
文件: cruk_smp.py 项目: tonbar/SMP2v3
    def main(self):
        '''
        :return:
        '''
        # Always do these steps regardless of option
        # Parse variables files to extract relevant sample information
        # Identify samples to upload to BaseSpace
        samples_to_upload = identify_samples()

        # Load and parse out variables from variables files associated with each sample
        all_variables = load_all_variables(samples_to_upload, os.getcwd())

        # Identify the worksheet number which will be used as the project name in BaseSpace
        worksheet = identify_worksheet(all_variables)

        # Pair samples- DNA sample is key, RNA sample to look up- if No RNA sample, it is None
        sample_pairs = create_sample_pairs(all_variables)
        # Write out sample pairs to log file for checking if needed
        log.warning(f"sample pairs are {sample_pairs}")

        # Locate the fastqs associated with all samples
        all_fastqs = locate_all_fastqs(samples_to_upload, os.getcwd())

        # Create a project in BaseSpace- will not create if it already exists, but will still return project id
        upload = FileUpload(self.authentication_token, worksheet,
                            samples_to_upload, all_fastqs)
        project = upload.create_basespace_project()
        log.info(f"Project {worksheet} created")
        log.warning(f"Project id for project name {worksheet} is {project}")

        # If whole pipeline required then upload fastq files
        if not args.tst170 and not args.smp2 and not args.dl_files:
            # Upload fastq files
            print(f"uploading fastq files for all samples")
            upload.upload_files()

        # Create launch app object for TST170 app
        launch_tst = LaunchApp(self.authentication_token, worksheet, project,
                               app_name, app_version, sample_pairs)

        # If resuming from TST170 required or full pipeline- launch the TST170 app
        if not args.smp2 and not args.dl_files:
            # Launch TST170 application for each pair in turn
            # IMPORTANT NOTE: Only processes paired data
            tst_170 = launch_tst.launch_tst170_pairs()

            # Dump data to file
            with open(os.path.join(os.getcwd(), "tst_170.json"), 'w') as t:
                json.dump(tst_170, t)

        # If resuming from SMP2v3 launch load in required TST170 data from file
        elif args.smp2:
            try:
                with open(os.path.join(os.getcwd(), "tst_170.json")) as ts:
                    tst_170 = json.load(ts)
            except FileNotFoundError:
                raise FileNotFoundError(
                    f"Could not find file tst_170.json. Cannot resume pipeline from SMP2 step."
                    f"Please delete TST170 analysis in BaseSpace and resume pipeline from"
                    f"TST170 stage.")

        # If resuming from SMP2v3 required, resuming from TST170 required or full pipeline- launch the SMP2 app
        if not args.dl_files:
            # Create launch app object for SMP2 v3 if not just downloading files- poll TST170 and when complete
            # launch SMP2
            launch_smp = LaunchApp(self.authentication_token, worksheet,
                                   project, smp2_app_name, smp2_app_version,
                                   sample_pairs, tst_170)
            # Poll the tst 170 appsessions until completion, then launch smp2 app
            smp_appsession = launch_smp.poll_tst170_launch_smp2()

            # Dump data to file
            with open(os.path.join(os.getcwd(), "smp.json"), 'w') as s:
                json.dump(smp_appsession, s)

        # If downloading files from a completed SMP2 app required
        # Create a LaunchApp object for smp2 app if flag to only download files is set- allows for polling of SMP2
        if args.dl_files:
            # Load data in required smp2 data from file
            try:
                with open(os.path.join(os.getcwd(), "smp.json")) as sm:
                    smp = json.load(sm)
            except FileNotFoundError:
                raise FileNotFoundError(
                    f"Could not find file smp.json. Cannot resume pipeline from download step."
                    f"Please delete SMP2 analysis in BaseSpace and resume pipeline from"
                    f"SMP2 stage.")
            launch_smp = LaunchApp(self.authentication_token, worksheet,
                                   project, smp2_app_name, smp2_app_version,
                                   sample_pairs, None,
                                   smp)  # None as tst170 app data not required

        # Poll the smp appsessions until completion
        smp_appresults = launch_smp.poll_smp2()

        # Download all required files- every step requires this
        file_download = FileDownloader(self.authentication_token,
                                       smp_appresults, worksheet)
        file_download.download_files()