inputfile1 = open (sys.argv[1], 'r') # Database from dbPTM
inputfile2 = open (sys.argv[2], 'r') # Uniprort DB
outputfile = open (sys.argv[3], 'w') # output

dic ={}
for num2, y in enumerate(SeqIO.parse(inputfile2,'fasta')): # make dictionary for fasta file
    hd = y.id
    seq = str(y.seq)
    hd_ls = hd.split("|")
    acc1 = hd_ls[1].strip()
    dic[acc1] = seq

oneper = progress_counter.linenum(inputfile1,'nonfasta')
for num1, x in enumerate(inputfile1): 
    progress_counter.progress(num1,oneper)
    if num1 > 0:
        x_ls = x.split('\t')
        acc = x_ls[1].strip()
        pos = int(x_ls[2].strip())
        residue = x_ls[6].strip()
        pro_seq = dic.get(acc)
       
        if pro_seq != None:
            if pos < 21:
                pep40 = pro_seq[:(pos -1) + 21]
            elif len(pro_seq) - pos < 20:
                pep40 = pro_seq[(pos - 1) -20 : len(pro_seq) -1]
            else:
                pep40 = pro_seq[(pos-1) -20 : (pos -1) + 21]
            
#!/usr/bin/python
import sys
from Bio import SeqIO
import progress_counter
import re

inputfile1 = open (sys.argv[1], 'r') # input db
outputfile = open (sys.argv[2], 'w') # output db

for num1, x in enumerate(SeqIO.parse(inputfile1,"fasta")): # RefSeq fasta
    progress_counter.progress(inputfile1,num1,"fasta") # inputfile name, line number, fasta or nonfasta
    header = x.description
    seq = str(x.seq)
    strain = re.findall("\[(.*)\]",header)
    if "Saccharomyces cerevisiae" in strain[0]: # Define strain name
        outputfile.write('>' + header + "\n" + seq + "\n")