def main(): #Specify CTD interface # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them. model = argparse.ArgumentParser( description='Commandline tool for cleavage site prediction', ) model.add_argument( '-m', '--method', type=str, choices=CleavageSitePredictorFactory.available_methods().keys(), default="pcm", help='The name of the prediction method') model.add_argument('-v', '--version', type=str, default="", help='The version of the prediction method') model.add_argument('-i', '--input', type=str, required=True, help='Path to the input file (in fasta format)') model.add_argument('-l', '--length', type=int, default=0, help='The length of peptides') model.add_argument( '-op', '--options', type=str, default="", help="Additional options that get directly past to the tool") model.add_argument('-o', '--output', type=str, required=True, help='Path to the output file') args = model.parse_args() #fasta protein peptides = read_fasta(args.input, in_type=Protein) if args.version == "": predictor = CleavageSitePredictorFactory(args.method) result = predictor.predict(peptides, options=args.method) else: predictor = CleavageSitePredictorFactory(args.method, version=args.version) result = predictor.predict(peptides, options=args.method) #if length is specified, than generate compact output if int(args.length) > 0: length = int(args.length) with open(args.output, "w") as f: f.write("Sequence\tMethod\tScore\tProtein ID\tPosition\n") for seq_id in set(result.index.get_level_values(0)): seq = "".join(result.ix[seq_id]["Seq"]) for start in xrange(len(seq) - (length - 1)): pep_seq = seq[start:(start + length)] score = result.loc[(seq_id, start + (length - 1)), predictor.name] f.write(pep_seq + "\t" + predictor.name + "\t" + "%.3f" % score + "\t" + seq_id + "\t" + str(start) + "\n") else: result.to_csv(args.output, float_format="%.3f", sep="\t") return 0
def main(): model = argparse.ArgumentParser( description='Commandline tool for TAP prediction', ) model.add_argument('-m', '--method', type=str, choices=TAPPredictorFactory.available_methods().keys(), default="svmtap", help='The name of the prediction method' ) model.add_argument('-v', '--version', type=str, default="", help='The version of the prediction method' ) model.add_argument('-i', '--input', type=str, required=True, help='Path to the input file' ) model.add_argument('-t', '--type', choices=["fasta", "peptide"], type=str, default="fasta", help='The data type of the input (fasta, peptide list)' ) model.add_argument('-l', '--length', type=int, default=9, help='The length of peptides' ) model.add_argument('-op', '--options', type=str, default="", help="Additional options that get directly past to the tool" ) model.add_argument('-o', '--output', type=str, required=True, help='Path to the output file' ) args = model.parse_args() #fasta protein if args.type == "fasta": with open(args.input, 'r') as f: first_line = f.readline() sep_pos = 1 if first_line.count("|") else 0 proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos) peptides = generate_peptides_from_proteins(proteins, int(args.length)) elif args.type == "peptide": peptides = read_lines(args.input, in_type=Peptide) else: sys.stderr.write('Input type not known\n') return -1 if args.version == "": result = TAPPredictorFactory(args.method).predict(peptides, options=args.options) else: result = TAPPredictorFactory(args.method, version=args.version).predict(peptides, options=args.options) #write to TSV columns sequence method score...,protein-id/transcript-id with open(args.output, "w") as f: proteins = "\tProtein ID" if args.type == "fasta" else "" f.write("Sequence\tMethod\t"+"Score"+proteins+"\n") for index, row in result.iterrows(): p = index proteins = ",".join(prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else "" f.write(str(p)+"\t"+"\t".join("%s\t%.3f"%(method, score) for method, score in row.iteritems())+"\t"+proteins+"\n") return 0
# download the fasta file if __name__ == "__main__": arguments = docopt(__doc__) PEPTIDE_LENGTH = 9 # get arguments if arguments["--alleles"]: alleles = arguments["--alleles"].split(",") else: alleles = None file_in = arguments["--input"] file_out = arguments["--output"] print("read fasta") proteins = read_fasta(file_in, id_position=0, in_type=Protein) # restrict to only top N proteins if provided if arguments["--top_N"]: Nargs = int(arguments["--top_N"]) N = min(Nargs, len(proteins)) proteins = proteins[0:N] # parse peptide/protein information from Peptide list and Protein list print("setup peptide/protein information table") peptides = generate_peptides_from_proteins(proteins, PEPTIDE_LENGTH) peptides_list = [peptide for peptide in peptides] proteins_list = [peptide.proteins.keys()[0] for peptide in peptides_list] peptides_str_list = [peptide.tostring() for peptide in peptides_list] peptides_position_list = [ peptide.proteinPos.items()[0][1][0] for peptide in peptides_list
def main(): #Specify CTD interface # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them. model = argparse.ArgumentParser(description='Process some integers.') model.add_argument('-m', '--method', type=str, choices=EpitopePredictorFactory.available_methods().keys(), default="bimas", help='The name of the prediction method' ) model.add_argument('-v', '--version', type=str, default="", help='The version of the prediction method' ) model.add_argument('-i', '--input', type=str, required=True, help='Path to the input file' ) model.add_argument('-t', '--type', choices=["fasta","peptide"], type=str, default="fasta", help='The data type of the input (fasta, peptide list)' ) model.add_argument('-l', '--length', choices=range(8, 18), type=int, default=9, help='The length of peptides' ) model.add_argument('-a', '--alleles', type=str, required=True, help='Path to the allele file (one per line in new nomenclature)' ) model.add_argument('-op', '--options', type=str, default="", help="Additional options that get directly past to the tool" ) model.add_argument('-o', '--output', type=str, required=True, help='Path to the output file' ) args = model.parse_args() #fasta protein if args.type == "fasta": with open(args.input, 'r') as f: first_line = f.readline() sep_pos = 1 if first_line.count("|") else 0 proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos) peptides = generate_peptides_from_proteins(proteins, args.length) elif args.type == "peptide": peptides = read_lines(args.input, in_type=Peptide) else: sys.stderr.write('Input type not known\n') return -1 #read in alleles alleles = read_lines(args.alleles, in_type=Allele) if args.version == "": result = EpitopePredictorFactory(args.method).predict(peptides, alleles, options=args.options) else: result = EpitopePredictorFactory(args.method, version=args.version).predict(peptides, alleles, options=args.options) #write to TSV columns sequence method allele-scores...,protein-id/transcript-id with open(args.output, "w") as f: proteins = "\tAntigen ID" if args.type == "fasta" else "" alleles = result.columns f.write("Sequence\tMethod\t"+"\t".join(a.name for a in alleles)+proteins+"\n") for index, row in result.iterrows(): p = index[0] method = index[1] proteins = "\t"+",".join( prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else "" f.write(str(p)+"\t"+method+"\t"+"\t".join("%.3f"%row[a] for a in alleles)+proteins+"\n") return 0
def main(): #Specify CTD interface # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them. model = argparse.ArgumentParser( description='Commandline tool for cleavage site prediction', ) model.add_argument('-m', '--method', type=str, choices=CleavageSitePredictorFactory.available_methods().keys(), default="pcm", help='The name of the prediction method' ) model.add_argument('-v', '--version', type=str, default="", help='The version of the prediction method' ) model.add_argument('-i', '--input', type=str, required=True, help='Path to the input file (in fasta format)' ) model.add_argument('-l', '--length', type=int, default=0, help='The length of peptides' ) model.add_argument('-op', '--options', type=str, default="", help="Additional options that get directly past to the tool" ) model.add_argument('-o', '--output', type=str, required=True, help='Path to the output file' ) args = model.parse_args() #fasta protein peptides = read_fasta(args.input, in_type=Protein) if args.version == "": predictor = CleavageSitePredictorFactory(args.method) result = predictor.predict(peptides, options=args.method) else: predictor = CleavageSitePredictorFactory(args.method, version=args.version) result = predictor.predict(peptides, options=args.method) #if length is specified, than generate compact output if int(args.length) > 0: length = int(args.length) with open(args.output, "w") as f: f.write("Sequence\tMethod\tScore\tProtein ID\tPosition\n") for seq_id in set(result.index.get_level_values(0)): seq = "".join(result.ix[seq_id]["Seq"]) for start in xrange(len(seq)-(length-1)): pep_seq = seq[start:(start+length)] score = result.loc[(seq_id, start+(length-1)), predictor.name] f.write(pep_seq+"\t"+predictor.name+"\t"+"%.3f"%score+"\t"+seq_id+"\t"+str(start)+"\n") else: result.to_csv(args.output, float_format="%.3f", sep="\t") return 0