def download_assembly(dest_dir, accession, output_format, fetch_wgs, extract_wgs, expanded, quiet=False):
    if output_format is None:
        output_format = utils.EMBL_FORMAT
    assembly_dir = os.path.join(dest_dir, accession)
    utils.create_dir(assembly_dir)
    # download xml
    utils.download_record(assembly_dir, accession, utils.XML_FORMAT)
    local_xml = utils.get_destination_file(assembly_dir, accession, utils.XML_FORMAT)
    # get wgs and sequence report info
    wgs_set, sequence_report = parse_assembly_xml(local_xml)
    has_sequence_report = False
    # download sequence report
    if sequence_report is not None:
        has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir)
    # parse sequence report and download sequences
    wgs_scaffolds = []
    wgs_scaffold_cnt = 0
    if has_sequence_report:
        wgs_scaffolds = download_sequences(sequence_report.split('/')[-1], assembly_dir, output_format, expanded, quiet)
        wgs_scaffold_cnt = len(wgs_scaffolds)
        if wgs_scaffold_cnt > 0:
            if not quiet:
                print 'Assembly contains {} WGS scaffolds, will fetch WGS set'.format(wgs_scaffold_cnt)
            fetch_wgs = True
    else:
        fetch_wgs = True
    # download wgs set if needed
    if wgs_set is not None and fetch_wgs:
        if not quiet:
            print 'fetching wgs set'
        sequenceGet.download_wgs(assembly_dir, wgs_set, output_format)
        # extract wgs scaffolds from WGS file
        if wgs_scaffold_cnt > 0 and extract_wgs:
            extract_wgs_scaffolds(assembly_dir, wgs_scaffolds, wgs_set, output_format, quiet)
示例#2
0
def download_assembly(dest_dir,
                      accession,
                      output_format,
                      fetch_wgs,
                      quiet=False):
    if output_format is None:
        output_format = utils.EMBL_FORMAT
    assembly_dir = os.path.join(dest_dir, accession)
    utils.create_dir(assembly_dir)
    # download xml
    utils.download_record(assembly_dir, accession, utils.XML_FORMAT)
    local_xml = utils.get_destination_file(assembly_dir, accession,
                                           utils.XML_FORMAT)
    # get wgs and sequence report info
    wgs_set, sequence_report = parse_assembly_xml(local_xml)
    has_sequence_report = False
    # download sequence report
    if sequence_report is not None:
        has_sequence_report = utils.get_ftp_file(sequence_report, assembly_dir)
    # download wgs set if needed
    if wgs_set is not None and fetch_wgs:
        if not quiet:
            print 'fetching wgs set'
        sequenceGet.download_wgs(assembly_dir, wgs_set, output_format)
    # parse sequence report and download sequences
    if has_sequence_report:
        download_sequences(
            sequence_report.split('/')[-1], assembly_dir, output_format, quiet)
示例#3
0
def download_data(group, data_accession, output_format, group_dir, fetch_wgs,
                  extract_wgs, expanded, fetch_meta, fetch_index, aspera):
    if group == utils.WGS:
        print 'Fetching ' + data_accession[:6]
        sequenceGet.download_wgs(group_dir, data_accession[:6], output_format)
    else:
        print 'Fetching ' + data_accession
        if group == utils.ASSEMBLY:
            assemblyGet.download_assembly(group_dir, data_accession,
                                          output_format, fetch_wgs,
                                          extract_wgs, expanded, True)
        elif group in [utils.READ, utils.ANALYSIS]:
            readGet.download_files(data_accession, output_format, group_dir,
                                   fetch_index, fetch_meta, aspera)
示例#4
0
def download_data(group, data_accession, format, group_dir, fetch_wgs, fetch_meta, fetch_index, aspera):
    if group == utils.WGS:
        print ('Fetching ' + data_accession[:6])
        if aspera:
            print ('Aspera not supported for WGS data. Using FTP...')
        sequenceGet.download_wgs(group_dir, data_accession[:6], format)
    else:
        print ('Fetching ' + data_accession)
        if group == utils.ASSEMBLY:
            if aspera:
                print ('Aspera not supported for assembly data. Using FTP...')
            assemblyGet.download_assembly(group_dir, data_accession, format, fetch_wgs, True)
        elif group in [utils.READ, utils.ANALYSIS]:
            readGet.download_files(data_accession, format, group_dir, fetch_index, fetch_meta, aspera)
示例#5
0
    fetch_wgs = args.wgs
    extract_wgs = args.extract_wgs
    expanded = args.expanded
    fetch_meta = args.meta
    fetch_index = args.index
    aspera = args.aspera
    aspera_settings = args.aspera_settings

    if aspera or aspera_settings is not None:
        aspera = utils.set_aspera(aspera_settings)

    try:
        if utils.is_wgs_set(accession):
            if output_format is not None:
                sequenceGet.check_format(output_format)
            sequenceGet.download_wgs(dest_dir, accession, output_format)
        elif not utils.is_available(accession):
            sys.stderr.write('ERROR: Record does not exist or is not available for accession provided\n')
            sys.exit(1)
        elif utils.is_sequence(accession):
            if output_format is not None:
                sequenceGet.check_format(output_format)
            sequenceGet.download_sequence(dest_dir, accession, output_format, expanded)
        elif utils.is_analysis(accession):
            if output_format is not None:
                readGet.check_read_format(output_format)
            readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera)
        elif utils.is_run(accession) or utils.is_experiment(accession):
            if output_format is not None:
                readGet.check_read_format(output_format)
            readGet.download_files(accession, output_format, dest_dir, fetch_index, fetch_meta, aspera)