Python read_fasta示例，Fred2.IO.read_fasta Python示例

示例#1

0

显示文件

def main():
    #Specify CTD interface
    # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them.
    model = argparse.ArgumentParser(
        description='Commandline tool for cleavage site prediction', )

    model.add_argument(
        '-m',
        '--method',
        type=str,
        choices=CleavageSitePredictorFactory.available_methods().keys(),
        default="pcm",
        help='The name of the prediction method')

    model.add_argument('-v',
                       '--version',
                       type=str,
                       default="",
                       help='The version of the prediction method')

    model.add_argument('-i',
                       '--input',
                       type=str,
                       required=True,
                       help='Path to the input file (in fasta format)')

    model.add_argument('-l',
                       '--length',
                       type=int,
                       default=0,
                       help='The length of peptides')

    model.add_argument(
        '-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool")

    model.add_argument('-o',
                       '--output',
                       type=str,
                       required=True,
                       help='Path to the output file')

    args = model.parse_args()

    #fasta protein
    peptides = read_fasta(args.input, in_type=Protein)

    if args.version == "":
        predictor = CleavageSitePredictorFactory(args.method)
        result = predictor.predict(peptides, options=args.method)
    else:
        predictor = CleavageSitePredictorFactory(args.method,
                                                 version=args.version)
        result = predictor.predict(peptides, options=args.method)

    #if length is specified, than generate compact output
    if int(args.length) > 0:
        length = int(args.length)
        with open(args.output, "w") as f:
            f.write("Sequence\tMethod\tScore\tProtein ID\tPosition\n")
            for seq_id in set(result.index.get_level_values(0)):
                seq = "".join(result.ix[seq_id]["Seq"])
                for start in xrange(len(seq) - (length - 1)):
                    pep_seq = seq[start:(start + length)]
                    score = result.loc[(seq_id, start + (length - 1)),
                                       predictor.name]
                    f.write(pep_seq + "\t" + predictor.name + "\t" +
                            "%.3f" % score + "\t" + seq_id + "\t" +
                            str(start) + "\n")
    else:
        result.to_csv(args.output, float_format="%.3f", sep="\t")
    return 0

示例#2

0

显示文件

文件： taprediction.py 项目： lkuchenb/ImmunoNodes

def main():
    model = argparse.ArgumentParser(
        description='Commandline tool for TAP prediction',
        )

    model.add_argument('-m',
        '--method',
        type=str,
        choices=TAPPredictorFactory.available_methods().keys(),
        default="svmtap",
        help='The name of the prediction method'
        )

    model.add_argument('-v',
        '--version',
        type=str,
        default="",
        help='The version of the prediction method'
        )

    model.add_argument('-i',
        '--input',
        type=str,
        required=True,
        help='Path to the input file'
        )

    model.add_argument('-t',
        '--type',
        choices=["fasta", "peptide"],
        type=str,
        default="fasta",
        help='The data type of the input (fasta, peptide list)'
        )

    model.add_argument('-l',
        '--length',
        type=int,
        default=9,
        help='The length of peptides'
        )

    model.add_argument('-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool"
    )

    model.add_argument('-o',
        '--output',
        type=str,
        required=True,
        help='Path to the output file'
        )

    args = model.parse_args()

    #fasta protein
    if args.type == "fasta":
        with open(args.input, 'r') as f:
            first_line = f.readline()
        sep_pos = 1 if first_line.count("|") else 0
        proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos)
        peptides = generate_peptides_from_proteins(proteins, int(args.length))
    elif args.type == "peptide":
        peptides = read_lines(args.input, in_type=Peptide)
    else:
        sys.stderr.write('Input type not known\n')
        return -1

    if args.version == "":
        result = TAPPredictorFactory(args.method).predict(peptides, options=args.options)
    else:
        result = TAPPredictorFactory(args.method, version=args.version).predict(peptides, options=args.options)

    #write to TSV columns sequence method score...,protein-id/transcript-id
    with open(args.output, "w") as f:
        proteins = "\tProtein ID" if args.type == "fasta" else ""
        f.write("Sequence\tMethod\t"+"Score"+proteins+"\n")
        for index, row in result.iterrows():
            p = index
            proteins = ",".join(prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else ""
            f.write(str(p)+"\t"+"\t".join("%s\t%.3f"%(method, score) for
                                          method, score in row.iteritems())+"\t"+proteins+"\n")
    return 0

示例#3

0

显示文件

# download the fasta file

if __name__ == "__main__":
    arguments = docopt(__doc__)
    PEPTIDE_LENGTH = 9

    # get arguments
    if arguments["--alleles"]:
        alleles = arguments["--alleles"].split(",")
    else:
        alleles = None
    file_in = arguments["--input"]
    file_out = arguments["--output"]

    print("read fasta")
    proteins = read_fasta(file_in, id_position=0, in_type=Protein)

    # restrict to only top N proteins if provided
    if arguments["--top_N"]:
        Nargs = int(arguments["--top_N"])
        N = min(Nargs, len(proteins))
        proteins = proteins[0:N]

    # parse peptide/protein information from Peptide list and Protein list
    print("setup peptide/protein information table")
    peptides = generate_peptides_from_proteins(proteins, PEPTIDE_LENGTH)
    peptides_list = [peptide for peptide in peptides]
    proteins_list = [peptide.proteins.keys()[0] for peptide in peptides_list]
    peptides_str_list = [peptide.tostring() for peptide in peptides_list]
    peptides_position_list = [
        peptide.proteinPos.items()[0][1][0] for peptide in peptides_list

示例#4

0

显示文件

文件： epitopeprediction.py 项目： lkuchenb/ImmunoNodes

def main():
    #Specify CTD interface
    # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them.
    model = argparse.ArgumentParser(description='Process some integers.')

    model.add_argument('-m',
        '--method',
        type=str,
        choices=EpitopePredictorFactory.available_methods().keys(),
        default="bimas",
        help='The name of the prediction method'
        )

    model.add_argument('-v',
        '--version',
        type=str,
        default="",
        help='The version of the prediction method'
        )

    model.add_argument('-i',
        '--input',
        type=str,
        required=True,
        help='Path to the input file'
        )

    model.add_argument('-t',
        '--type',
        choices=["fasta","peptide"],
        type=str,
        default="fasta",
        help='The data type of the input (fasta, peptide list)'
        )

    model.add_argument('-l',
        '--length',
        choices=range(8, 18),
        type=int,
        default=9,
        help='The length of peptides'
        )

    model.add_argument('-a',
        '--alleles',
        type=str,
        required=True,
        help='Path to the allele file (one per line in new nomenclature)'
        )

    model.add_argument('-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool"
    )

    model.add_argument('-o',
        '--output',
        type=str,
        required=True,
        help='Path to the output file'
        )

    args = model.parse_args()


    #fasta protein
    if args.type == "fasta":
        with open(args.input, 'r') as f:
            first_line = f.readline()
        sep_pos = 1 if first_line.count("|") else 0
        proteins = read_fasta(args.input, in_type=Protein, id_position=sep_pos)
        peptides = generate_peptides_from_proteins(proteins, args.length)
    elif args.type == "peptide":
        peptides = read_lines(args.input, in_type=Peptide)
    else:
        sys.stderr.write('Input type not known\n')
        return -1

    #read in alleles
    alleles = read_lines(args.alleles, in_type=Allele)
    if args.version == "":
        result = EpitopePredictorFactory(args.method).predict(peptides, alleles, options=args.options)
    else:
        result = EpitopePredictorFactory(args.method, version=args.version).predict(peptides, alleles,
                                                                 options=args.options)

    #write to TSV columns sequence method allele-scores...,protein-id/transcript-id
    with open(args.output, "w") as f:
        proteins = "\tAntigen ID" if args.type == "fasta" else ""
        alleles = result.columns
        f.write("Sequence\tMethod\t"+"\t".join(a.name for a in alleles)+proteins+"\n")
        for index, row in result.iterrows():
            p = index[0]
            method = index[1]
            proteins =  "\t"+",".join( prot.transcript_id for prot in p.get_all_proteins()) if args.type == "fasta" else ""
            f.write(str(p)+"\t"+method+"\t"+"\t".join("%.3f"%row[a] for a in alleles)+proteins+"\n")

    return 0

示例#5

0

显示文件

文件： cleavageprediction.py 项目： FRED-2/ImmunoNodes

def main():
    #Specify CTD interface
    # Every CTD Model has to have at least a name and a version, plus any of the optional attributes below them.
    model = argparse.ArgumentParser(
        description='Commandline tool for cleavage site prediction',
        )

    model.add_argument('-m',
        '--method',
        type=str,
        choices=CleavageSitePredictorFactory.available_methods().keys(),
        default="pcm",
        help='The name of the prediction method'
        )

    model.add_argument('-v',
        '--version',
        type=str,
        default="",
        help='The version of the prediction method'
        )

    model.add_argument('-i',
        '--input',
        type=str,
        required=True,
        help='Path to the input file (in fasta format)'
        )

    model.add_argument('-l',
        '--length',
        type=int,
        default=0,
        help='The length of peptides'
        )

    model.add_argument('-op',
        '--options',
        type=str,
        default="",
        help="Additional options that get directly past to the tool"
    )

    model.add_argument('-o',
        '--output',
        type=str,
        required=True,
        help='Path to the output file'
        )

    args = model.parse_args()

    #fasta protein
    peptides = read_fasta(args.input, in_type=Protein)

    if args.version == "":
        predictor = CleavageSitePredictorFactory(args.method)
        result = predictor.predict(peptides, options=args.method)
    else:
        predictor = CleavageSitePredictorFactory(args.method, version=args.version)
        result = predictor.predict(peptides, options=args.method)

    #if length is specified, than generate compact output
    if int(args.length) > 0:
        length = int(args.length)
        with open(args.output, "w") as f:
            f.write("Sequence\tMethod\tScore\tProtein ID\tPosition\n")
            for seq_id in set(result.index.get_level_values(0)):
                    seq = "".join(result.ix[seq_id]["Seq"])
                    for start in xrange(len(seq)-(length-1)):
                        pep_seq = seq[start:(start+length)]
                        score = result.loc[(seq_id, start+(length-1)), predictor.name]
                        f.write(pep_seq+"\t"+predictor.name+"\t"+"%.3f"%score+"\t"+seq_id+"\t"+str(start)+"\n")
    else:
        result.to_csv(args.output, float_format="%.3f", sep="\t")
    return 0