def searchAndSave(searchString, filename): """ convenince function to search database and save results to a FASTA file. If a file of that name already exists then the whole process is skipped searchString - this is what is being searched for filename - name of the file where to save the results """ if not(path.isfile(filename)): sequences = [] """ set of ids returned as part of the protein """ ids = seq.searchSequences(searchString) print "Processing ", len(ids), " sequences ..." """ iterate over the ids and for each fetch the record from the database and append it to ex5_sequences """ for seq_id in ids: print "Fetching sequence: ", seq_id sequences.append(seq.getSequence(seq_id)) """ save the completed list of sequences to a FASTA file """ seq.writeFastaFile(filename, sequences) else: print filename, " exists. skipping."
def update_task_list_dir(self, dirpath): """Update task list with detected file sequences in given directory. Pre-existing tasks will not be added, to avoid duplication. """ bases = sequence.getBases(dirpath, delimiter="") for base in bases: path, prefix, frames, ext, count = sequence.getSequence( dirpath, base, delimiter="", ignorePadding=False) self.create_task(path, prefix, frames, ext, count) self.update_tasks()
return os.path.isfile(filename) seq1 = seq.Sequence('AAAAAAGGGGG') print seq1.alphabet seq2 = seq.Sequence('AAAAAGGGUG') print seq2.alphabet seq3 = seq.Sequence('AWAAAAAAGGVG') print seq3.alphabet #seq4 = seq.Sequence('Z') #print seq3.alphabet rns1 = seq.getSequence('RNS1_ARATH', 'uniprot') print rns1.count('S') # ex5_ids = seq.searchSequences("signal+peptide+AND+organism:Arabidopsis+thaliana[3702]+AND+length:[100+TO+*]") id6 = seq.searchSequences("Lipid+metabolism+AND+organism:3702+AND+fragment:no+AND+length:[100+TO+*]") print "ID5: ", ex5_ids.__len__(), " ID6: ", id6.__len__() to_be_written = [] ids = set(ex5_ids).intersection(set(id6)) print ids.__len__() for i in ids: pass