Exemplo n.º 1
0
def perform_docking_domain_analysis(options, clusterpksgenes, genecluster,
                                    seq_record, pksnrpsvars):
    feature_by_id = utils.get_feature_dict(seq_record)
    #log("Predicting PKS gene order by docking domain sequence " \
    #    "analysis", stdout=True)
    startergene, endinggene = find_first_and_last_genes(
        clusterpksgenes, pksnrpsvars.domainnamesdict)
    with TemporaryDirectory(change=True):
        dockinganalysis_dir = utils.get_full_path(__file__, "docking_analysis")
        ntermintresdict = extract_nterminus(dockinganalysis_dir,
                                            clusterpksgenes, seq_record,
                                            startergene, feature_by_id)
        ctermintresdict = extract_cterminus(dockinganalysis_dir,
                                            clusterpksgenes, seq_record,
                                            endinggene, feature_by_id)
    possible_orders = find_possible_orders(clusterpksgenes, startergene,
                                           endinggene)
    geneorders, possible_orders_scoredict = rank_biosynthetic_orders(
        ntermintresdict, ctermintresdict, startergene, endinggene,
        possible_orders)
    write_gene_orders_to_html(options, geneorders, possible_orders_scoredict,
                              genecluster, startergene, endinggene)
    #log("Predicting PKS gene order by docking domain sequence " \
    #    "analysis succeeded.", stdout=True)
    #Write html outfile with docking domain analysis output
    pksnrpsvars.dockingdomainanalysis.append(genecluster)
    return geneorders[0]
Exemplo n.º 2
0
def getECs(seq_record, options):

    if not name in options.ecpred:
        logging.debug("ECprediction %s not selected, returning..." % name)
        return

    CDSFeatureDict = utils.get_feature_dict(seq_record)
    logging.debug("Predicting EC numbers using KEGG online queries")
    KEGGspeciesLocusTagDict = _getKEGG_speciesLocusTag(CDSFeatureDict)
    ECDict = _get_ECNumberDict(KEGGspeciesLocusTagDict)

    notes = []
    # logging.debug("Found %s EC predictions" % len(ECDict.keys()))
    for key in ECDict.keys():
        Feature = CDSFeatureDict[key]
        if Feature.qualifiers.has_key('note'):
            notes = Feature.qualifiers['note']

        if len(ECDict[key]) > 0:
            logging.debug("Found EC numbers: %s" % ", ".join(ECDict[key]))
            notes.append('EC number prediction based on KEGG query: %s' %
                         ECDict[key])
            Feature.qualifiers['note'] = notes
            if Feature.qualifiers.has_key('EC_number'):
                logging.warn('ECpredictor[kegg]: Overwriting existing EC annotation: %s  with %s' % \
                             (", ".join(Feature.qualifiers['EC_number']), ", ".join(ECDict[key])))

            Feature.qualifiers['EC_number'] = ECDict[key]
        else:
            logging.warn('ECpredictor[KEGG]: Could not find EC number for %s' %
                         utils.get_gene_id(Feature))
Exemplo n.º 3
0
 def test_get_feature_dict(self):
     "Test utils.get_feature_dict()"
     fd = utils.get_feature_dict(self.rec)
     ids = [
         f.qualifiers['locus_tag'][0] for f in self.rec.features
         if f.type == "CDS"
     ]
     keys = fd.keys()
     ids.sort()
     keys.sort()
     self.assertListEqual(ids, keys)
Exemplo n.º 4
0
def retrieve_gene_cluster_annotations(seq_record, smcogdict, gtrcoglist,
                                      transportercoglist, geneclusternr):
    allcoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_secmet_cds_features(seq_record)
    ]
    pksnrpscoregenes = [
        utils.get_gene_id(cds)
        for cds in utils.get_pksnrps_cds_features(seq_record)
    ]
    feature_by_id = utils.get_feature_dict(seq_record)
    clustergenes = [
        utils.get_gene_id(cds) for cds in utils.get_cluster_cds_features(
            utils.get_cluster_by_nr(seq_record, geneclusternr), seq_record)
    ]
    clustertype = utils.get_cluster_type(
        utils.get_cluster_by_nr(seq_record, geneclusternr))
    annotations = {}
    colors = []
    starts = []
    ends = []
    strands = []
    pksnrpsprots = []
    gtrs = []
    transporters = []
    for j in clustergenes:
        cdsfeature = feature_by_id[j]
        if cdsfeature.qualifiers.has_key('product'):
            annotations[j] = cdsfeature.qualifiers['product'][0]
        else:
            annotations[j] = 'Unannotated gene'
        starts.append(cdsfeature.location.start)
        ends.append(cdsfeature.location.end)
        if cdsfeature.strand == -1:
            strands.append("-")
        else:
            strands.append("+")
        if j in allcoregenes:
            colors.append("#810E15")
        else:
            colors.append("grey")
        if j in pksnrpscoregenes:
            pksnrpsprots.append(j)
        if smcogdict.has_key(j):
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in gtrcoglist:
                gtrs.append(j)
            if len(smcogdict[j]) > 0 and smcogdict[j][0] in transportercoglist:
                transporters.append(j)
    clustersize = max(ends) - min(starts)
    return clustergenes, clustertype, annotations, colors, starts, ends, strands, pksnrpsprots, gtrs, transporters, clustersize
Exemplo n.º 5
0
def find_colinear_order(clusterpksnrpsgenes, seq_record, domainnamesdict):
    feature_by_id = utils.get_feature_dict(seq_record)
    #If NRPS genes, mixed NRPS/PKS genes, PKS genes without detected docking domains, or clusters with a 1-3 PKS genes, assume colinearity
    direction = 0
    for feature in clusterpksnrpsgenes:
        k = utils.get_gene_id(feature)
        if feature_by_id[k].strand == 1:
            direction += 1
        elif feature_by_id[k].strand == -1:
            direction = direction - 1
    if direction < 0:
        clusterpksnrpsgenes.reverse()
    #Reverse if first gene encodes a multidomain protein with a TE/TD domain
    if "Thioesterase" in domainnamesdict[utils.get_gene_id(
            clusterpksnrpsgenes[0])] or "TD" in domainnamesdict[
                utils.get_gene_id(clusterpksnrpsgenes[0])]:
        if len(domainnamesdict[utils.get_gene_id(clusterpksnrpsgenes[0])]) > 1:
            clusterpksnrpsgenes.reverse()
    geneorder = [utils.get_gene_id(feature) for feature in clusterpksnrpsgenes]
    return geneorder
Exemplo n.º 6
0
def filter_nonterminal_docking_domains(seq_record, pksnrpsvars):
    dockingdomains = [
        'NRPS-COM_Nterm', 'NRPS-COM_Cterm', 'PKS_Docking_Cterm',
        'PKS_Docking_Nterm'
    ]
    hitgenes = pksnrpsvars.domaindict.keys()
    feature_by_id = utils.get_feature_dict(seq_record)
    for hitgene in hitgenes:
        to_remove = []
        cdsfeature = feature_by_id[hitgene]
        cds_seq = utils.get_aa_sequence(cdsfeature)
        hitgenelength = len(cds_seq)
        x = 0
        for hit in pksnrpsvars.domaindict[hitgene]:
            if hit[0] in dockingdomains:
                if not (hitgenelength - max(hit[1], hit[2]) < 50
                        or min(hit[1], hit[2]) < 50):
                    to_remove.append(x)
            x += 1
        to_remove.reverse()
        for idx in to_remove:
            del pksnrpsvars.domaindict[hitgene][idx]
        if pksnrpsvars.domaindict[hitgene] == []:
            del pksnrpsvars.domaindict[hitgene]
Exemplo n.º 7
0
def _annotate(seq_record, options, results):
    "Annotate seq_record with CDS_motifs for the result"
    logging.debug("generating feature objects for PFAM hits")
    min_score = _min_score(options)
    max_evalue = _max_evalue(options)

    feature_by_id = utils.get_feature_dict(seq_record)
    
    for r in results:
        i = 1
        for hsp in r.hsps:
            if hsp.bitscore <= min_score or hsp.evalue >= max_evalue:
                continue

            if not feature_by_id.has_key(hsp.query_id):
                continue

            feature = feature_by_id[hsp.query_id]

            start, end = _calculate_start_end(feature, hsp)
            loc = FeatureLocation(start, end, strand=feature.strand)
            
            newFeature = SeqFeature(location=loc, type=options.FeatureTags.fullhmmer_tag)
            
            quals = defaultdict(list)
            
            quals['label'].append(r.id)
            if feature.qualifiers.has_key('locus_tag'):       
                quals['locus_tag'] = feature.qualifiers['locus_tag']
            else:
                quals['locus_tag'] = [hsp.query_id]
            quals['domain'] = [hsp.hit_id]
            quals['asDomain_id'] = ['fullhmmer_'+'_'.join(quals['locus_tag'])+'_'+'{:04d}'.format(i)]
            i += 1
            
            quals['evalue'] = [str("{:.2E}".format(float(hsp.evalue)))]
            quals['score'] = [str(hsp.bitscore)]
            quals['aSTool'] = ["fullhmmer"]
            quals['detection'] = ["hmmscan"]
            quals['database'] = [path.basename(r.target)]
            if feature.qualifiers.has_key('transl_table'):
                [transl_table] = feature.qualifiers['transl_table']
            else:
                transl_table = 1
            quals['translation'] = [str(newFeature.extract(seq_record.seq).translate(table=transl_table))]

            quals['note'].append("%s-Hit: %s. Score: %s. E-value: %s. Domain range: %s..%s." % \
                    (path.basename(r.target), hsp.hit_id, hsp.bitscore, hsp.evalue,
                     hsp.hit_start, hsp.hit_end))

            quals['description'] = [hsp.hit_description]

            try:
                pfamid = name_to_pfamid[hsp.hit_id]
                if quals.has_key('db_xref'):
                    quals['db_xref'].append("PFAM: %s" % pfamid)
                else:
                    quals['db_xref'] = ["PFAM: %s" % pfamid]    
            except KeyError:
                pass
            
            newFeature.qualifiers=quals
            seq_record.features.append(newFeature)
Exemplo n.º 8
0
def main():
    multiprocessing.freeze_support()
    res_object = {}

    # get genome files
    files = []
    for line in open(sys.argv[1], 'r'):
        files.append(path.expanduser(line.replace("\n", "")))

    # mockup antismash run per files
    i = 1
    for fpath in files:
        res_object[fpath] = {}
        print "Processing %s... (%d/%d)" % (fpath, i, len(files))
        i += 1
        options = get_mockup_config()
        options.sequences = [fpath]
        config.set_config(options)
        run_antismash.setup_logging(
            options)  #To-DO: get antismash logging to works!

        # load plugins
        plugins = run_antismash.load_detection_plugins()
        run_antismash.filter_plugins(plugins, options,
                                     options.enabled_cluster_types)

        # parse to seq_records
        seq_records = run_antismash.parse_input_sequences(options)
        options.next_clusternr = 1

        for seq_record in seq_records:
            if options.input_type == 'nucl':
                seq_records = [
                    record for record in seq_records if len(record.seq) > 1000
                ]
                if len(seq_records) < 1:
                    continue
            utils.sort_features(seq_record)
            run_antismash.strip_record(seq_record)
            utils.fix_record_name_id(seq_record, options)

            # fetch results_by_id
            feature_by_id = utils.get_feature_dict(seq_record)
            results = []
            results_by_id = {}
            for feature in utils.get_cds_features(seq_record):
                prefix = "%s:" % seq_record.id.replace(":", "_")
                gene_id = utils.get_gene_id(feature)
                if (prefix + gene_id) in options.hmm_results:
                    results_by_id[gene_id] = options.hmm_results[prefix +
                                                                 gene_id]
                    for res in results_by_id[gene_id]:
                        results.append(res)

            # ignore short aa's
            min_length_aa = 100
            short_cds_buffer = []
            for f in seq_record.features:  # temporarily remove short aa
                if f.type == "CDS" and len(
                        f.qualifiers['translation']
                    [0]) < min_length_aa and not results_by_id.has_key(
                        utils.get_gene_id(f)):
                    short_cds_buffer.append(f)
                    seq_record.features.remove(f)

            overlaps = utils.get_overlaps_table(seq_record)
            rulesdict = hmm_detection.create_rules_dict(
                options.enabled_cluster_types)
            # find total cdhit numbers in the chromosome
            total_cdhit = len(
                utils.get_cdhit_table(utils.get_cds_features(seq_record))[0])
            res_object[fpath][seq_record.id] = {
                "total_clusters": 0,
                "total_genes": len(overlaps[0]),
                "total_cdhit": total_cdhit,
                "genes_with_hits": 0,
                "largest_cdhit": 0,
                "largest_domain_variations": 0,
                "per_hits": {},
                "cluster_types": {}
            }

            # filter overlap hits
            results, results_by_id = hmm_detection.filter_results(
                results, results_by_id, overlaps, feature_by_id)

            # count hits
            for gene_id in results_by_id:
                res_gene = results_by_id[gene_id]
                if len(res_gene) > 0:
                    res_object[fpath][seq_record.id]["genes_with_hits"] += 1
                for hsp in res_gene:
                    domain_name = hsp.query_id.replace("plants/", "")
                    if domain_name not in res_object[fpath][
                            seq_record.id]["per_hits"]:
                        res_object[fpath][
                            seq_record.id]["per_hits"][domain_name] = 0
                    res_object[fpath][
                        seq_record.id]["per_hits"][domain_name] += 1

            # do cluster finding algorithm
            typedict = hmm_detection.apply_cluster_rules(
                results_by_id, feature_by_id, options.enabled_cluster_types,
                rulesdict, overlaps)
            hmm_detection.fix_hybrid_clusters_typedict(typedict)
            nseqdict = hmm_detection.get_nseq()
            for cds in results_by_id.keys():
                feature = feature_by_id[cds]
                if typedict[cds] != "none":
                    hmm_detection._update_sec_met_entry(
                        feature, results_by_id[cds], typedict[cds], nseqdict)
            hmm_detection.find_clusters(seq_record, rulesdict, overlaps)
            seq_record.features.extend(short_cds_buffer)
            res_object[fpath][seq_record.id]["total_clusters"] += len(
                utils.get_cluster_features(seq_record))

            # do cluster specific and unspecific analysis
            if len(utils.get_cluster_features(seq_record)) > 0:
                run_antismash.cluster_specific_analysis(
                    plugins, seq_record, options)
            run_antismash.unspecific_analysis(seq_record, options)

            #Rearrange hybrid clusters name alphabetically
            hmm_detection.fix_hybrid_clusters(seq_record)

            #before writing to output, remove all hmm_detection's subdir prefixes from clustertype
            for cluster in utils.get_cluster_features(seq_record):
                prod_names = []
                for prod in cluster.qualifiers['product']:
                    prod_name = []
                    for name in prod.split('-'):
                        prod_name.append(name.split('/')[-1])
                    prod_names.append("-".join(prod_name))
                cluster.qualifiers['product'] = prod_names
            for cds in utils.get_cds_features(seq_record):
                if 'sec_met' in cds.qualifiers:
                    temp_qual = []
                    for row in cds.qualifiers['sec_met']:
                        if row.startswith('Type: '):
                            clustertypes = [
                                (ct.split('/')[-1])
                                for ct in row.split('Type: ')[-1].split('-')
                            ]
                            temp_qual.append('Type: ' + "-".join(clustertypes))
                        elif row.startswith('Domains detected: '):
                            cluster_results = []
                            for cluster_result in row.split(
                                    'Domains detected: ')[-1].split(';'):
                                cluster_results.append(
                                    cluster_result.split(' (E-value')[0].split(
                                        '/')[-1] + ' (E-value' +
                                    cluster_result.split(' (E-value')[-1])
                            temp_qual.append('Domains detected: ' +
                                             ";".join(cluster_results))
                        else:
                            temp_qual.append(row)
                    cds.qualifiers['sec_met'] = temp_qual

            #on plants, remove plant clustertype from hybrid types, and replace single
            #plant clustertype with "putative"
            for cluster in utils.get_cluster_features(seq_record):
                prod_names = []
                for prod in cluster.qualifiers['product']:
                    prod_name = list(set(prod.split('-')))
                    if (len(prod_name) > 1) and ("plant" in prod_name):
                        prod_name.remove("plant")
                    elif prod_name == ["plant"]:
                        prod_name = ["putative"]
                    prod_names.append("-".join(prod_name))
                cluster.qualifiers['product'] = prod_names
            for cds in utils.get_cds_features(seq_record):
                if 'sec_met' in cds.qualifiers:
                    temp_qual = []
                    for row in cds.qualifiers['sec_met']:
                        if row.startswith('Type: '):
                            clustertypes = list(
                                set(row.split('Type: ')[-1].split('-')))
                            if (len(clustertypes) > 1) and ("plant"
                                                            in clustertypes):
                                clustertypes.remove("plant")
                            elif clustertypes == ["plant"]:
                                clustertypes = ["putative"]
                            temp_qual.append('Type: ' + "-".join(clustertypes))
                        else:
                            temp_qual.append(row)
                    cds.qualifiers['sec_met'] = temp_qual

            # find largest cdhit number & largest domain diversity in a cluster
            res_object[fpath][seq_record.id]["average_cdhit"] = 0
            res_object[fpath][seq_record.id]["average_domain_variations"] = 0
            cdhit_numbers = []
            domain_numbers = []
            for cluster in utils.get_cluster_features(seq_record):
                cluster_type = utils.get_cluster_type(cluster)
                if cluster_type not in res_object[fpath][
                        seq_record.id]["cluster_types"]:
                    res_object[fpath][
                        seq_record.id]["cluster_types"][cluster_type] = 0
                res_object[fpath][
                    seq_record.id]["cluster_types"][cluster_type] += 1
                num_cdhit = len(
                    utils.get_cluster_cdhit_table(cluster, seq_record))
                num_domain = len(utils.get_cluster_domains(
                    cluster, seq_record))
                cdhit_numbers.append(num_cdhit)
                domain_numbers.append(num_domain)
                if num_cdhit > res_object[fpath][
                        seq_record.id]["largest_cdhit"]:
                    res_object[fpath][
                        seq_record.id]["largest_cdhit"] = num_cdhit
                if num_domain > res_object[fpath][
                        seq_record.id]["largest_domain_variations"]:
                    res_object[fpath][seq_record.id][
                        "largest_domain_variations"] = num_domain
            if len(cdhit_numbers) > 0:
                res_object[fpath][seq_record.id][
                    "average_cdhit"] = numpy.median(cdhit_numbers)
            if len(domain_numbers) > 0:
                res_object[fpath][seq_record.id][
                    "average_domain_variations"] = numpy.median(domain_numbers)

        with open('result.js', 'w') as h:
            h.write('var result = %s;' % json.dumps(res_object, indent=4))
Exemplo n.º 9
0
def detect_signature_genes(seq_record, enabled_clustertypes, options):
    "Function to be executed by module"
    logging.info('Detecting gene clusters using HMM library')
    feature_by_id = utils.get_feature_dict(seq_record)
    rulesdict = create_rules_dict(enabled_clustertypes)
    results = []
    sig_by_name = {}
    results_by_id = {}
    for sig in get_sig_profiles():
        sig_by_name[sig.name] = sig

    for feature in utils.get_cds_features(seq_record):
        prefix = "%s:" % seq_record.id.replace(":", "_")
        gene_id = utils.get_gene_id(feature)
        if (prefix + gene_id) in options.hmm_results:
            results_by_id[gene_id] = options.hmm_results[prefix + gene_id]
            for res in results_by_id[gene_id]:
                results.append(res)

    short_cds_buffer = []
    if options.ignore_short_aa:
        # Temporarily filter out cds with < prot_min_length AA length
        min_length_aa = 50
        if options.eukaryotic:
            min_length_aa = 100
        for f in seq_record.features:
            if f.type == "CDS" and len(
                    f.qualifiers['translation']
                [0]) < min_length_aa and not results_by_id.has_key(
                    utils.get_gene_id(f)):
                short_cds_buffer.append(f)
                seq_record.features.remove(f)

    #Get overlap tables (for overlap filtering etc)
    overlaps = utils.get_overlaps_table(seq_record)

    #Filter results by comparing scores of different models (for PKS systems)
    results_to_delete = [gene_id for gene_id in results_by_id]
    results, results_by_id = filter_results(results, results_by_id, overlaps,
                                            feature_by_id)

    #Update filtered results back to the options.hmm_results
    for gene_id in results_by_id:
        results_to_delete.remove(gene_id)
        prefix = "%s:" % seq_record.id.replace(":", "_")
        if (prefix + gene_id) in options.hmm_results:
            options.hmm_results[(prefix + gene_id)] = results_by_id[gene_id]
    for gene_id in results_to_delete:
        prefix = "%s:" % seq_record.id.replace(":", "_")
        if (prefix + gene_id) in options.hmm_results:
            del options.hmm_results[(prefix + gene_id)]

    #Use rules to determine gene clusters
    typedict = apply_cluster_rules(results_by_id, feature_by_id,
                                   enabled_clustertypes, rulesdict, overlaps)

    #Rearrange hybrid clusters name in typedict alphabetically
    fix_hybrid_clusters_typedict(typedict)

    #Find number of sequences on which each pHMM is based
    nseqdict = get_nseq()

    #Save final results to seq_record
    for cds in results_by_id.keys():
        feature = feature_by_id[cds]
        if typedict[cds] != "none":
            _update_sec_met_entry(feature, results_by_id[cds], typedict[cds],
                                  nseqdict)

    find_clusters(seq_record, rulesdict, overlaps)

    #Find additional NRPS/PKS genes in gene clusters
    add_additional_nrpspks_genes(typedict, results_by_id, seq_record, nseqdict)

    #Rearrange hybrid clusters name alphabetically
    fix_hybrid_clusters(seq_record)

    #Add details of gene cluster detection to cluster features
    store_detection_details(results_by_id, rulesdict, seq_record)

    # Re-add the short CDSs
    seq_record.features.extend(short_cds_buffer)
    utils.sort_features(seq_record)

    #If all-orfs option on, remove irrelevant short orfs
    if options.all_orfs:
        remove_irrelevant_allorfs(seq_record)

    #Display %identity
    if options.enable_cdhit:
        store_percentage_identities(seq_record)
Exemplo n.º 10
0
def detect_signature_genes(seq_record, enabled_clustertypes, options):
    "Function to be executed by module"
    feature_by_id = utils.get_feature_dict(seq_record)
    full_fasta = utils.get_multifasta(seq_record)
    rulesdict = create_rules_dict(enabled_clustertypes)
    results = []
    sig_by_name = {}
    results_by_id = {}
    for sig in _signature_profiles:
        sig_by_name[sig.name] = sig

    runresults = utils.run_hmmsearch(utils.get_full_path(
        __file__, 'bgc_seeds.hmm'),
                                     full_fasta,
                                     use_tempfile=True)
    for runresult in runresults:
        acc = runresult.accession.split('.')[0]
        # Store result if it is above cut-off
        for hsp in runresult.hsps:
            if hsp.query_id in sig_by_name:
                sig = sig_by_name[hsp.query_id]
            elif acc in sig_by_name:
                sig = sig_by_name[acc]
            else:
                logging.error(
                    'BUG: Failed to find signature for ID %s / ACC %s',
                    hsp.query_id, acc)
                continue
            if hsp.bitscore > sig.cutoff:
                results.append(hsp)
                if hsp.hit_id not in results_by_id:
                    results_by_id[hsp.hit_id] = [hsp]
                else:
                    results_by_id[hsp.hit_id].append(hsp)

    #Get overlap tables (for overlap filtering etc)
    overlaps = utils.get_overlaps_table(seq_record)

    #Filter results by comparing scores of different models (for PKS systems)
    results, results_by_id = filter_results(results, results_by_id)

    # Filter results of overlapping genes (only for plants)
    if options.taxon == 'plants':
        results, results_by_id = filter_result_overlapping_genes(
            results, results_by_id, overlaps, feature_by_id)

    #Filter multiple results of the same model in one gene
    results, results_by_id = filter_result_multiple(results, results_by_id)

    #Use rules to determine gene clusters
    typedict = apply_cluster_rules(results_by_id, feature_by_id,
                                   enabled_clustertypes, rulesdict, overlaps)

    #Find number of sequences on which each pHMM is based
    nseqdict = get_nseq()

    #Save final results to seq_record
    for cds in results_by_id.keys():
        feature = feature_by_id[cds]
        _update_sec_met_entry(feature, results_by_id[cds], typedict[cds],
                              nseqdict)

    find_clusters(seq_record, rulesdict)

    #Find additional NRPS/PKS genes in gene clusters
    add_additional_nrpspks_genes(typedict, results_by_id, seq_record, nseqdict)
    #Add details of gene cluster detection to cluster features
    store_detection_details(results_by_id, rulesdict, seq_record)
    #If all-orfs option on, remove irrelevant short orfs
    if options.all_orfs:
        remove_irrelevant_allorfs(seq_record)