Пример #1
0
def summarise_bins(args):
    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message,
                              args.log_file,
                              args.quiet,
                              show_time=False)

    errors = []

    errors.append(
        check.check_input_file(args.input_file, args.log_file, args.quiet))

    if not args.force:
        errors.append(
            check.check_output_file(args.output_file, args.log_file,
                                    args.quiet))

    if True in errors:
        sys.exit(1)

    message = 'Summarising...'
    shared.give_user_feedback(message, args.log_file, args.quiet)

    with open(args.input_file, 'r') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.split('\t')

                if line[0] != '# bin':
                    message = '{0} is not a BAT classification file.'.format(
                        args.input_file)
                    shared.give_user_feedback(message,
                                              args.log_file,
                                              args.quiet,
                                              error=True)

                    if line[0] == '# contig':
                        message = (
                            '{0} appears to be a CAT classification file. '
                            'If you want to summarise contig '
                            'classifications, supply a contigs fasta with '
                            'argument [-c / --contigs_fasta].'.format(
                                args.input_file))
                        shared.give_user_feedback(message,
                                                  args.log_file,
                                                  args.quiet,
                                                  error=True)

                    sys.exit(1)

                try:
                    superkingdom_index = line.index('superkingdom')
                except:
                    message = (
                        'official ranks not found in header of {0}. Make '
                        'sure that the BAT classification file is named '
                        'with official ranks with \'CAT add_names '
                        '--only_official\'.'.format(args.input_file))
                    shared.give_user_feedback(message,
                                              args.log_file,
                                              args.quiet,
                                              error=True)

                    sys.exit(1)

                break
        else:
            message = 'input file does not have a recognisable header.'
            shared.give_user_feedback(message,
                                      args.log_file,
                                      args.quiet,
                                      error=True)

            sys.exit(1)

    n_bins = {}
    n_bins['no taxid assigned'] = 0

    official_ranks = [
        'superkingdom', 'phylum', 'class', 'order', 'family', 'genus',
        'species'
    ]

    for rank in official_ranks:
        n_bins[rank] = {}

    n = 0
    bin_trace = set()
    doubles = set()
    with open(args.input_file, 'r') as f1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                continue

            n += 1

            line = line.split('\t')

            bin_ = line[0]

            if bin_ in bin_trace:
                doubles.add(bin_)

            bin_trace.add(bin_)

            if line[1] == 'no taxid assigned':
                n_bins['no taxid assigned'] += 1

                continue

            for (i, classification) in enumerate(line[superkingdom_index:]):
                classification = classification.rsplit(': ', 1)[0].rstrip('*')

                rank = official_ranks[i]

                if classification not in n_bins[rank]:
                    n_bins[rank][classification] = 0

                n_bins[rank][classification] += 1

    if len(doubles) != 0:
        message = ('some bins have multiple classifications. CAT summarise '
                   'currently does not allow for this. Bins with multiple '
                   'classifications: {0}.'.format(', '.join(list(doubles))))
        shared.give_user_feedback(message,
                                  args.log_file,
                                  args.quiet,
                                  error=True)

        sys.exit(1)

    n_classified_bins = n - n_bins['no taxid assigned']

    with open(args.output_file, 'w') as outf1:
        outf1.write('# total number of bins is {0:,d}, of which {1:,d} '
                    '({2:.2f}%) have taxonomy assigned.\n'.format(
                        n, n_classified_bins, n_classified_bins / n * 100))
        outf1.write('#\n')
        outf1.write('# rank\tclade\tnumber of bins\n')

        for rank in official_ranks:
            for clade in sorted(n_bins[rank],
                                key=lambda x: n_bins[rank][x],
                                reverse=True):
                outf1.write('{0}\t{1}\t{2}\n'.format(rank, clade,
                                                     n_bins[rank][clade]))

    message = '{0} is created!'.format(args.output_file)
    shared.give_user_feedback(message, args.log_file, args.quiet)

    return
Пример #2
0
def summarise_contigs(args):
    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message,
                              args.log_file,
                              args.quiet,
                              show_time=False)

    errors = []

    errors.append(
        check.check_input_file(args.input_file, args.log_file, args.quiet))

    if not args.force:
        errors.append(
            check.check_output_file(args.output_file, args.log_file,
                                    args.quiet))

    if True in errors:
        sys.exit(1)

    contig2length = import_contig_lengths(args.contigs_fasta, args.log_file,
                                          args.quiet)

    message = 'Summarising...'
    shared.give_user_feedback(message, args.log_file, args.quiet)

    with open(args.input_file, 'r') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.split('\t')

                if line[0] != '# contig':
                    message = '{0} is not a CAT classification file.'.format(
                        args.input_file)
                    shared.give_user_feedback(message,
                                              args.log_file,
                                              args.quiet,
                                              error=True)

                    if line[0] == '# bin':
                        message = (
                            '{0} appears to be a BAT classification file. '
                            'If you want to summarise bin '
                            'classifications, simply don\'t supply a '
                            'contigs fasta and everything should be fine.'
                            ''.format(args.input_file))
                        shared.give_user_feedback(message,
                                                  args.log_file,
                                                  args.quiet,
                                                  error=True)

                    sys.exit(1)

                try:
                    superkingdom_index = line.index('superkingdom')
                except:
                    message = (
                        'official ranks not found in header of {0}. Make '
                        'sure that the CAT classification file is named '
                        'with official ranks with \'CAT add_names '
                        '--only_official\'.'.format(args.input_file))
                    shared.give_user_feedback(message,
                                              args.log_file,
                                              args.quiet,
                                              error=True)

                    sys.exit(1)

                break
        else:
            message = 'input file does not have a recognisable header.'
            shared.give_user_feedback(message,
                                      args.log_file,
                                      args.quiet,
                                      error=True)

            sys.exit(1)

    length = {}
    length['no taxid assigned'] = []

    ORFs = {}

    official_ranks = [
        'superkingdom', 'phylum', 'class', 'order', 'family', 'genus',
        'species'
    ]

    for rank in official_ranks:
        length[rank] = {}
        ORFs[rank] = {}

    n = 0
    contig_trace = set()
    doubles = set()
    with open(args.input_file, 'r') as f1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                continue

            n += 1

            line = line.split('\t')

            contig = line[0]

            if contig in contig_trace:
                doubles.add(contig)

            contig_trace.add(contig)

            if contig not in contig2length:
                message = (
                    'contig {0} in CAT classification file is not found '
                    'in supplied contigs fasta file. Are you sure the CAT '
                    'classification file is based on the contigs fasta?'
                    ''.format(contig))
                shared.give_user_feedback(message,
                                          args.log_file,
                                          args.quiet,
                                          error=True)

                sys.exit(1)

            if line[1] == 'no taxid assigned':
                length['no taxid assigned'].append(contig2length[contig])

                continue

            for (i, classification) in enumerate(line[superkingdom_index:]):
                classification = classification.rsplit(': ', 1)[0].rstrip('*')

                rank = official_ranks[i]

                if classification not in length[rank]:
                    length[rank][classification] = []

                    ORFs[rank][classification] = []

                length[rank][classification].append(contig2length[contig])

                # NOTE that the total number of ORFs on a contig is reproted,
                # not only the number of ORFs a classification is based on.
                ORFs_on_contig = int(line[2].split('/')[1].split(' ')[0])
                ORFs[rank][classification].append(ORFs_on_contig)

    if len(doubles) != 0:
        message = ('some contigs have multiple classifications. CAT summarise '
                   'currently does not allow for this. Contigs with multiple '
                   'classifications: {0}.'.format(', '.join(list(doubles))))
        shared.give_user_feedback(message,
                                  args.log_file,
                                  args.quiet,
                                  error=True)

        sys.exit(1)

    if n != len(contig2length):
        message = ('the number of classified contigs is not the same as the '
                   'number of contigs in contigs fasta. Are you sure the CAT '
                   'classification file is based on the contigs fasta?')
        shared.give_user_feedback(message,
                                  args.log_file,
                                  args.quiet,
                                  error=True)

        sys.exit(1)

    with open(args.output_file, 'w') as outf1:
        n_contigs = len(contig2length)
        total_length = sum(contig2length.values())
        n_classified_contigs = n_contigs - len(length['no taxid assigned'])
        total_classified_length = total_length - sum(
            length['no taxid assigned'])

        outf1.write('# total number of contigs in {0} is {1:,d} representing '
                    '{2:,d} positions.\n'.format(args.contigs_fasta, n_contigs,
                                                 total_length))
        outf1.write(
            '# {0:,d} contigs have taxonomy assigned ({1:.2f}%) '
            'representing {2:,d} positions ({3:.2f}%) in {4}.\n'.format(
                n_classified_contigs, n_classified_contigs / n_contigs * 100,
                total_classified_length,
                total_classified_length / total_length * 100, args.input_file))
        outf1.write('#\n')
        outf1.write('# rank\t'
                    'clade\t'
                    'number of contigs\t'
                    'number of ORFs\t'
                    'number of positions\n')

        for rank in official_ranks:
            for clade in sorted(length[rank],
                                key=lambda x: sum(length[rank][x]),
                                reverse=True):
                outf1.write('{0}\t{1}\t{2}\t{3}\t{4}\n'.format(
                    rank, clade, len(length[rank][clade]),
                    sum(ORFs[rank][clade]), sum(length[rank][clade])))

    message = '{0} is created!'.format(args.output_file)
    shared.give_user_feedback(message, args.log_file, args.quiet)

    return
Пример #3
0
def run():
    args = parse_arguments()

    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message,
                              args.log_file,
                              args.quiet,
                              show_time=False)

    errors = []

    errors.append(
        check.check_input_file(args.input_file, args.log_file, args.quiet))

    if not args.force:
        errors.append(
            check.check_output_file(args.output_file, args.log_file,
                                    args.quiet))

    if True in errors:
        sys.exit(1)

    (taxid2parent, taxid2rank) = tax.import_nodes(args.nodes_dmp,
                                                  args.log_file, args.quiet)
    taxid2name = tax.import_names(args.names_dmp, args.log_file, args.quiet)

    message = 'Appending names...'
    shared.give_user_feedback(message, args.log_file, args.quiet)

    with open(args.input_file, 'r') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.rstrip().split('\t')

                if 'lineage' in line:
                    lineage_index = line.index('lineage')
                else:
                    message = ('{0} is not a supported classification file.'
                               ''.format(input_file))
                    shared.give_user_feedback(message,
                                              args.log_file,
                                              args.quiet,
                                              error=True)

                    sys.exit(1)

                try:
                    scores_index = line.index('lineage scores')
                except:
                    scores_index = None

                full_length = len(line)

                break
        else:
            message = ('{0} is not a supported classification file.'.format(
                args.input_file))
            shared.give_user_feedback(message, log_file, quiet, error=True)

            sys.exit(1)

    with open(args.input_file, 'r') as f1, open(args.output_file,
                                                'w') as outf1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                if args.only_official:
                    outf1.write('{0}\tsuperkingdom\tphylum\tclass\torder\t'
                                'family\tgenus\tspecies\n'.format(line))
                else:
                    outf1.write('{0}\tfull lineage names\n'.format(line))

                continue

            line = line.split('\t')

            if len(line) != full_length:
                # Entry does not have a full annotation.
                outf1.write('{0}\n'.format('\t'.join(line)))

                continue

            if (line[1].startswith('no taxid found')
                    or line[2].startswith('no taxid found')):
                # ORF has database hits but the accession number is not found
                # in the taxonomy files.
                outf1.write('{0}\n'.format('\t'.join(line)))

                continue

            lineage = line[lineage_index].split(';')

            if scores_index is not None and not args.exclude_scores:
                scores = line[scores_index].split(';')
            else:
                scores = None

            if args.only_official:
                names = tax.convert_to_official_names(lineage, taxid2rank,
                                                      taxid2name, scores)
            else:
                names = tax.convert_to_names(lineage, taxid2rank, taxid2name,
                                             scores)

            outf1.write('{0}\t{1}\n'.format('\t'.join(line), '\t'.join(names)))

    message = 'Names written to {0}!'.format(args.output_file)
    shared.give_user_feedback(message, args.log_file, args.quiet)

    return
Пример #4
0
def summarise_bins(input_file, output_file, force, quiet):
    # Currently summarise does not a allow for a log file.
    log_file = None

    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message, log_file, quiet, show_time=False)

    errors = []

    errors.append(check.check_input_file(input_file, log_file, quiet))

    if not force:
        errors.append(check.check_output_file(output_file, log_file, quiet))

    if True in errors:
        sys.exit(1)

    message = 'Summarising...'
    shared.give_user_feedback(message, log_file, quiet)

    with shared.open_maybe_gzip(input_file, 'rt') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.split('\t')

                if line[0] != '# bin':
                    message = ('ERROR: {0} is not a BAT classification file.'
                               ''.format(input_file))
                    shared.give_user_feedback(message,
                                              log_file,
                                              quiet,
                                              error=True)

                    if line[0] == '# contig':
                        message = ('ERROR: {0} appears to be a CAT '
                                   'classification file. If you want to '
                                   'summarise contig classifications, please '
                                   'supply a contigs fasta.'
                                   ''.format(input_file))
                        shared.give_user_feedback(message,
                                                  log_file,
                                                  quiet,
                                                  error=True)

                    sys.exit(1)

                try:
                    superkingdom_index = line.index('superkingdom')
                except:
                    message = ('ERROR: official ranks not found in header of '
                               '{0}. Make sure that the BAT classification '
                               'file is named with official ranks with \'CAT '
                               'add_names --only_official\'.'
                               ''.format(input_file))
                    shared.give_user_feedback(message,
                                              log_file,
                                              quiet,
                                              error=True)

                    sys.exit(1)

                break
        else:
            message = 'ERROR: input file does not have a recognisable header.'
            shared.give_user_feedback(message, log_file, quiet, error=True)

            sys.exit(1)

    number_of_bins = {}
    number_of_bins['unclassified'] = 0

    official_ranks = [
        'superkingdom', 'phylum', 'class', 'order', 'family', 'genus',
        'species'
    ]

    for rank in official_ranks:
        number_of_bins[rank] = {}

    n = 0
    bin_trace = set()
    doubles = set()
    with open(input_file, 'r') as f1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                continue

            n += 1

            line = line.split('\t')

            bin_ = line[0]

            if bin_ in bin_trace:
                doubles.add(bin_)

            bin_trace.add(bin_)

            if line[1] == 'unclassified':
                number_of_bins['unclassified'] += 1

                continue

            for (i, classification) in enumerate(line[superkingdom_index:]):
                classification = classification.rsplit(': ', 1)[0].rstrip('*')

                rank = official_ranks[i]

                if classification not in number_of_bins[rank]:
                    number_of_bins[rank][classification] = 0

                number_of_bins[rank][classification] += 1

    if len(doubles) != 0:
        message = ('ERROR: some bins have multiple classifications. CAT '
                   'summarise currently does not allow for this. Bins with '
                   'multiple classifications: {0}.'
                   ''.format(', '.join(list(doubles))))
        shared.give_user_feedback(message, log_file, quiet, error=True)

        sys.exit(1)

    number_of_classified_bins = n - number_of_bins['unclassified']

    with shared.open_maybe_gzip(output_file, 'wt') as outf1:
        outf1.write('# total number of bins is {0}, of which {1} ({2:.2f}%) '
                    'are classified.\n'
                    ''.format(n, number_of_classified_bins,
                              number_of_classified_bins / n * 100))
        outf1.write('#\n')
        outf1.write('# rank\tclade\tnumber of bins\n')

        for rank in official_ranks:
            for clade in sorted(number_of_bins[rank],
                                key=lambda x: number_of_bins[rank][x],
                                reverse=True):
                outf1.write('{0}\t{1}\t{2}\n'
                            ''.format(rank, clade,
                                      number_of_bins[rank][clade]))

    message = '{0} is created!'.format(output_file)
    shared.give_user_feedback(message, log_file, quiet)
Пример #5
0
def add_names(args):
    (input_file, output_file, taxonomy_folder, only_official, exclude_scores,
     force, quiet) = check.convert_arguments(args)

    # Currently add_names does not allow for a log file.
    log_file = None

    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message, log_file, quiet, show_time=False)

    errors = []

    errors.append(check.check_input_file(input_file, log_file, quiet))

    if not force:
        errors.append(check.check_output_file(output_file, log_file, quiet))

    if True in errors:
        sys.exit(1)

    (nodes_dmp, names_dmp, prot_accession2taxid_file
     ) = check.inspect_taxonomy_folder(taxonomy_folder)

    (taxid2parent, taxid2rank) = tax.import_nodes(nodes_dmp, log_file, quiet)
    taxid2name = tax.import_names(names_dmp, log_file, quiet)

    message = 'Appending names...'
    shared.give_user_feedback(message, log_file, quiet)

    with shared.open_maybe_gzip(input_file, 'rt') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.rstrip().split('\t')

                try:
                    lineage_index = line.index('lineage')
                except:
                    message = ('ERROR: {0} is not a supported classification '
                               'file.'.format(input_file))
                    shared.give_user_feedback(message,
                                              log_file,
                                              quiet,
                                              error=True)

                    sys.exit(1)

                try:
                    scores_index = line.index('lineage scores')
                except:
                    scores_index = None

                full_length = len(line)

                break
        else:
            message = ('ERROR: {0} is not a supported classification file.'
                       ''.format(input_file))
            shared.give_user_feedback(message, log_file, quiet, error=True)

            sys.exit(1)

    with shared.open_maybe_gzip(input_file,
                                'rt') as f1, shared.open_maybe_gzip(
                                    output_file, 'wt') as outf1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                if only_official:
                    outf1.write('{0}\tsuperkingdom\tphylum\tclass\torder\t'
                                'family\tgenus\tspecies\n'.format(line))
                else:
                    outf1.write('{0}\tfull lineage names\n'.format(line))

                continue

            line = line.split('\t')

            if len(line) != full_length:
                # Entry does not have a full annotation.
                outf1.write('{0}\n'.format('\t'.join(line)))

                continue

            if (line[1].startswith('no taxid found')
                    or line[2].startswith('no taxid found')):
                # ORF has database hits but the accession number is not found
                # in the taxonomy files.
                outf1.write('{0}\n'.format('\t'.join(line)))

                continue

            lineage = line[lineage_index].split(';')

            if scores_index and not exclude_scores:
                scores = line[scores_index].split(';')
            else:
                scores = None

            if only_official:
                names = tax.convert_to_official_names(lineage, taxid2rank,
                                                      taxid2name, scores)
            else:
                names = tax.convert_to_names(lineage, taxid2rank, taxid2name,
                                             scores)

            outf1.write('{0}\t{1}\n'.format('\t'.join(line), '\t'.join(names)))

    message = 'Names written to {0}!'.format(output_file)
    shared.give_user_feedback(message, log_file, quiet)
Пример #6
0
def summarise_contigs(input_file, output_file, contigs_fasta, force, quiet):
    # Currently summarise does not a allow for a log file.
    log_file = None
    
    message = '# CAT v{0}.'.format(about.__version__)
    shared.give_user_feedback(message, log_file, quiet, show_time=False)

    errors = []

    errors.append(check.check_input_file(input_file, log_file, quiet))

    if not force:
        errors.append(check.check_output_file(output_file, log_file, quiet))

    if True in errors:
        sys.exit(1)
        
    contig2length = import_contig_lengths(contigs_fasta, log_file, quiet)

    message = 'Summarising...'
    shared.give_user_feedback(message, log_file, quiet)

    with open(input_file, 'r') as f1:
        for line in f1:
            if line.startswith('#'):
                line = line.split('\t')
                
                if line[0] != '# contig':
                    message = ('ERROR: {0} is not a CAT classification file.'
                               ''.format(input_file))
                    shared.give_user_feedback(message,
                                              log_file,
                                              quiet,
                                              error=True)

                    if line[0] == '# bin':
                        message = ('ERROR: {0} appears to be a BAT '
                                   'classification file. If you want to '
                                   'summarise bin classifications, just '
                                   'don\'t supply a contigs fasta and '
                                   'everything should be fine!'
                                   ''.format(input_file))
                        shared.give_user_feedback(message,
                                                  log_file,
                                                  quiet,
                                                  error=True)
                        
                    sys.exit(1)
                    
                try:
                    superkingdom_index = line.index('superkingdom')
                except:
                    message = ('ERROR: official ranks not found in header of '
                               '{0}. Make sure that the CAT classification '
                               'file is named with official ranks with \'CAT '
                               'add_names --only_official\'.'
                               ''.format(input_file))
                    shared.give_user_feedback(message,
                                              log_file,
                                              quiet,
                                              error=True)

                    sys.exit(1)

                break
        else:
            message = 'ERROR: input file does not have a recognisable header.'
            shared.give_user_feedback(message, log_file, quiet, error=True)

            sys.exit(1)
            
    length = {}
    length['unclassified'] = []

    ORFs = {}

    official_ranks = ['superkingdom', 'phylum', 'class', 'order', 'family',
                      'genus', 'species']

    for rank in official_ranks:
        length[rank] = {}
        ORFs[rank] = {}
    
    n = 0
    contig_trace = set()
    doubles = set()
    with open(input_file, 'r') as f1:
        for line in f1:
            line = line.rstrip()

            if line.startswith('#'):
                continue

            n += 1

            line = line.split('\t')

            contig = line[0]

            if contig in contig_trace:
                doubles.add(contig)

            contig_trace.add(contig)
            
            if contig not in contig2length:
                message = ('ERROR: contig {0} in CAT classification file is '
                           'not found in supplied contigs fasta file. Are you '
                           'sure the CAT classification file is based on the '
                           'contigs fasta?'.format(contig))
                shared.give_user_feedback(message, log_file, quiet, error=True)

                sys.exit(1)

            if line[1].startswith('unclassified'):
                length['unclassified'].append(contig2length[contig])

                continue

            for (i, classification) in enumerate(line[superkingdom_index:]):
                classification = classification.rsplit(': ', 1)[0].rstrip('*')
                
                rank = official_ranks[i]

                if classification not in length[rank]:
                    length[rank][classification] = []

                    ORFs[rank][classification] = []

                length[rank][classification].append(contig2length[contig])

                ORFs[rank][classification].append(int(line[2]))

    if len(doubles) != 0:
        message = ('ERROR: some contigs have multiple classifications. CAT '
                   'summarise currently does not allow for this. Contigs with '
                   'multiple classifications: {0}.'
                   ''.format(', '.join(list(doubles))))
        shared.give_user_feedback(message, log_file, quiet, error=True)

        sys.exit(1)
        
    if n != len(contig2length):
        message = ('ERROR: the number of classified contigs is not the same '
                   'as the number of contigs in contigs fasta. Are you sure '
                   'the CAT classification file is based on the contigs '
                   'fasta?')
        shared.give_user_feedback(message, log_file, quiet, error=True)

        sys.exit(1)

    with open(output_file, 'w') as outf1:
        number_of_contigs = len(contig2length)
        total_length = sum(contig2length.values())
        number_of_classified_contigs = number_of_contigs - len(length['unclassified'])
        total_classified_length = total_length - sum(length['unclassified'])

        outf1.write('# total number of contigs in {0} is {1} representing {2} '
                    'positions.\n'
                    ''.format(contigs_fasta,
                              number_of_contigs,
                              total_length))
        outf1.write('# {0} contigs are classified ({1:.2f}%) representing {2} '
                    'positions ({3:.2f}%) in {4}.\n'
                    ''.format(number_of_classified_contigs,
                              number_of_classified_contigs / number_of_contigs * 100,
                              total_classified_length,
                              total_classified_length / total_length * 100,
                              input_file))
        outf1.write('#\n')
        outf1.write('# rank\t'
                    'clade\t'
                    'number of contigs\t'
                    'number of ORFs\t'
                    'number of positions\n')

        for rank in official_ranks:
            for clade in sorted(length[rank],
                               key=lambda x: sum(length[rank][x]),
                               reverse=True):
                outf1.write('{0}\t{1}\t{2}\t{3}\t{4}\n'
                            ''.format(rank,
                                      clade,
                                      len(length[rank][clade]),
                                      sum(ORFs[rank][clade]),
                                      sum(length[rank][clade])))

    message = '{0} is created!'.format(output_file)
    shared.give_user_feedback(message, log_file, quiet)