Пример #1
0
from scripts.fix_sequence import fix_sequences
from scripts.fasta import batch_fasta_save, batch_to_fasta

def read_csv(file_path):
	sequences_ = []
	with open(file_path, 'r') as f:
		raw = f.read().split('\n')
		f.close()
	raw.pop(-1)
	
	for i in raw:
		tmp = i.split(',')
		if (tmp[2] == 'S'):
			sequences_.append((tmp[0], tmp[1]))
	return sequences_


if __name__ == '__main__':
	sequences = read_csv('gram+.signalp.full.csv')
	sequences = fix_sequences(sequences, 20)
	sequences = batch_to_fasta(sequences)
	print('At the end we have %s sequences that contain signal peptide' % str(len(sequences)))
	batch_fasta_save('parse_gram+.signalp.fasta', sequences)
Пример #2
0
from scripts.fix_sequence import fix_sequences
from scripts.fasta import batch_fasta_save, batch_to_fasta

def read_csv(file_path):
	sequences_ = []
	negative_sequences_ = []
	with open(file_path, 'r') as f:
		raw = f.read().split('\n')
		f.close()
	raw.pop(-1)
	
	for i in raw:
		tmp = i.split(',')
		if (tmp[2] == 'S'):
			sequences_.append((tmp[0], tmp[1]))
		else:
			negative_sequences_.append((tmp[0], tmp[1]))
	return sequences_, negative_sequences_


if __name__ == '__main__':
	sequences, negative_sequences = read_csv('euk.signalp.full.csv')
	sequences = fix_sequences(sequences, 96)
	negative_sequences = fix_sequences(negative_sequences, 96)
	sequences = batch_to_fasta(sequences)
	negative_sequences = batch_to_fasta(negative_sequences)
	print('At the end we have %s sequences that contain signal peptide' % str(len(sequences)))
	print('At the end we have %s sequences that dont contain signal peptide' % str(len(negative_sequences)))
	batch_fasta_save('parse_euk.signalp.fasta', sequences)
	batch_fasta_save('parse_euk.-.signalp.fasta', negative_sequences)
Пример #3
0
    Sequences = []
    with open(file_path, 'r') as f:
        Seqs = f.read().split('\n')
        f.close()
    Seqs.pop(-1)
    for i in range(0, len(Seqs), 2):
        Sequences.append((Seqs[i].split('>')[-1], Seqs[i + 1]))
    return Sequences


def pick_Sequences(IDs, Sequences):
    Sequences_ = []
    for i in IDs:
        for j in Sequences:
            if (j[0] == i):
                Sequences_.append(j)
    return Sequences_


if __name__ == '__main__':
    IDs = read_csv('gram-.spds17.csv')
    Sequences = read_fasta('gram-.spds17.fasta')
    print('At the begin we have %s sequences' % str(len(Sequences)))
    Sequences = pick_Sequences(IDs, Sequences)
    Sequences = fix_sequences(Sequences, 20)
    print('At the end we have %s sequences that contain signal peptide' %
          str(len(Sequences)))
    Sequences = batch_to_fasta(Sequences)
    print(Sequences)
    batch_fasta_save('parse_gram-.-.spds17.fasta', Sequences)
Пример #4
0
    for i in range(0, len(Seqs), 2):
        Sequences.append((Seqs[i].split('>')[-1], Seqs[i + 1]))
    return Sequences


def pick_Sequences(IDs, Sequences):
    Sequences_ = []
    for i in IDs:
        for j in Sequences:
            if (j[0] == i):
                Sequences_.append(j)
    return Sequences_


if __name__ == '__main__':
    IDs, negative_IDs = read_csv('euk.spds17.csv')
    Sequences = read_fasta('euk.spds17.fasta')
    print('At the begin we have %s sequences' % str(len(Sequences)))
    sequences = pick_Sequences(IDs, Sequences)
    negative_sequences = pick_Sequences(negative_IDs, Sequences)
    sequences = fix_sequences(sequences, 20)
    negative_sequences = fix_sequences(negative_sequences, 20)
    print('At the end we have %s sequences that contain signal peptide' %
          str(len(sequences)))
    print('At the end we have %s sequences that dont contain signal peptide' %
          str(len(negative_sequences)))
    sequences = batch_to_fasta(sequences)
    negative_sequences = batch_to_fasta(negative_sequences)
    batch_fasta_save('parse_euk.spds17.fasta', sequences)
    batch_fasta_save('parse_euk.-.spds17.fasta', negative_sequences)