def main(): parser = argparse.ArgumentParser( description='Filters trinity output for longest subcomponents based on naming convention') ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to an input FASTA file' ) parser.add_argument('-o', '--output', type=str, required=False, help='Output file to be created. Default = STDOUT' ) args = parser.parse_args() ## output will either be a file or STDOUT fout = sys.stdout if args.output is not None: fout = open(args.output, 'wt') seqs = utils.fasta_dict_from_file(args.input) components = dict() for seq_id in seqs: m = re.search("(comp\d+)_", seq_id) if m: component_id = m.group(1) if component_id not in components or len(seqs[seq_id]['s']) > len(components[component_id]['s']): components[component_id] = seqs[seq_id] components[component_id]['longest_id'] = seq_id else: raise Exception("ERROR: This ID wasn't in the expected format of compN_cN_seqN: {0}".format(seq_id)) for c_id in components: seq_wrapped = utils.wrapped_fasta(components[c_id]['s'], every=60) fout.write(">{0} {1}\n{2}\n".format(components[c_id]['longest_id'], components[c_id]['h'], seq_wrapped))
def main(): parser = argparse.ArgumentParser( description= 'Filters trinity output for longest subcomponents based on naming convention' ) ## output file to be written parser.add_argument('-i', '--input', type=str, required=True, help='Path to an input FASTA file') parser.add_argument('-o', '--output', type=str, required=False, help='Output file to be created. Default = STDOUT') args = parser.parse_args() ## output will either be a file or STDOUT fout = sys.stdout if args.output is not None: fout = open(args.output, 'wt') seqs = utils.fasta_dict_from_file(args.input) components = dict() for seq_id in seqs: m = re.search("(comp\d+)_", seq_id) if m: component_id = m.group(1) if component_id not in components or len(seqs[seq_id]['s']) > len( components[component_id]['s']): components[component_id] = seqs[seq_id] components[component_id]['longest_id'] = seq_id else: raise Exception( "ERROR: This ID wasn't in the expected format of compN_cN_seqN: {0}" .format(seq_id)) for c_id in components: seq_wrapped = utils.wrapped_fasta(components[c_id]['s'], every=60) fout.write(">{0} {1}\n{2}\n".format(components[c_id]['longest_id'], components[c_id]['h'], seq_wrapped))
def write_fasta_results(f, polypeptides): """ Produces headers like: >ID PRODUCT_NAME gene::GENE_SYMBOL ec::EC_NUMBERS go::GO_TERMS Example: """ for polypeptide_id in polypeptides: polypeptide = polypeptides[polypeptide_id] go_string = "" ec_string = "" for go_annot in polypeptide.annotation.go_annotations: go_string += "GO:{0},".format(go_annot.go_id) go_string = go_string.rstrip(',') for ec_annot in polypeptide.annotation.ec_numbers: ec_string += "{0},".format(ec_annot.number) ec_string = ec_string.rstrip(',') header = "{0} {1}".format(polypeptide_id, polypeptide.annotation.product_name) if polypeptide.annotation.gene_symbol is not None: header = "{0} gene::{1}".format(header, polypeptide.annotation.gene_symbol) if ec_string != "": header = "{0} ec::{1}".format(header, ec_string) if go_string != "": header = "{0} go::{1}".format(header, go_string) f.write(">{0}\n".format(header)) f.write("{0}\n".format(utils.wrapped_fasta(polypeptide.residues)))
def write_fasta_results( f, polypeptides ): """ Produces headers like: >ID PRODUCT_NAME gene::GENE_SYMBOL ec::EC_NUMBERS go::GO_TERMS Example: """ for polypeptide_id in polypeptides: polypeptide = polypeptides[polypeptide_id] go_string = "" ec_string = "" for go_annot in polypeptide.annotation.go_annotations: go_string += "GO:{0},".format(go_annot.go_id) go_string = go_string.rstrip(',') for ec_annot in polypeptide.annotation.ec_numbers: ec_string += "{0},".format(ec_annot.number) ec_string = ec_string.rstrip(',') header = "{0} {1}".format(polypeptide_id, polypeptide.annotation.product_name) if polypeptide.annotation.gene_symbol is not None: header = "{0} gene::{1}".format(header, polypeptide.annotation.gene_symbol) if ec_string != "": header = "{0} ec::{1}".format(header, ec_string) if go_string != "": header = "{0} go::{1}".format(header, go_string) f.write( ">{0}\n".format( header ) ) f.write( "{0}\n".format(utils.wrapped_fasta(polypeptide.residues)))