def _extract_clusters(self, tag, qual='ugene_name'): tagre = re.compile(tag) clusters = {} for record in load_files(self._abort_event, self.genomes_files, 'gb'): for f in record.features: if qual in f.qualifiers: q = ' '.join(f.qualifiers[qual]) if not tagre.match(q): continue c = f.extract(record) c.id = c.name = q c.description = record.description if c.seq.alphabet is not NucleotideAlphabet \ or c.seq.alphabet is not ProteinAlphabet: c.seq.alphabet = IUPAC.IUPACAmbiguousDNA() self._process_features(c) clusters[c.id] = c return clusters
def _main(self): accessions_file = '/home/allis/Documents/INMI/Geobacillus-COX/CoxL-markers.txt' with open(accessions_file) as inp: accessions = set(line for line in (l.strip() for l in inp) if line) seqs = load_files(self.abort_event, ['/home/allis/Documents/INMI/Geobacillus-COX/CoxL-analysis.files/CoxL-analysis.gb']) targets = filter(lambda s: s.id in accessions, seqs) to_fetch = set() for t in targets: for f in t.features: coded = f.qualifiers.get('coded_by') if not coded: continue m = coded_re.match(coded[0]) if m: to_fetch.add(m.group(2)) entrez = BatchEntrez(self.abort_event, '*****@*****.**') recs = entrez.get_records_for_terms(list(to_fetch), 'nucleotide') if recs: for r in recs: fname = filename_for_record(r) print 'Saving: %s' % fname safe_write(r, '/home/allis/Documents/INMI/Geobacillus-COX/genomes/'+fname) else: print 'No records were fetched' print 'Done'
if True: # from DegenPrimer import MultiprocessingBase # MultiprocessingBase.cpu_count = 1 abort_event = Event() lb = BlastCLI(abort_event) with user_message('Loading genomes...', '\n'): genomes_dir = u'/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcus' genome_names = [ 'Thermococcus_barophilus_Ch5-complete.gb', 'Thermococcus_onnurineus_NA1-complete-genome.gb', 'Thermococcus_sp._ES1.gb', 'Thermococcus-DS1-preliminary.gb' ] genomes = load_files( abort_event, [os.path.join(genomes_dir, f) for f in genome_names], 'gb') ref = genomes[0] subj = genomes[1:] @shelf_result def g2g2shelf(): return lb.g2g_blastp(ref, subj, 11, features_of_interest=[{ 'ugene_name': 'FC-full' }, { 'ugene_name': 'COC-full' }])
signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) if True: # from DegenPrimer import MultiprocessingBase # MultiprocessingBase.cpu_count = 1 abort_event = Event() lb = BlastCLI(abort_event) with user_message('Loading genomes...', '\n'): genomes_dir = u'/home/allis/Dropbox/Science/Микра/Thermococcus/sequence/GenBank/Thermococcus' genome_names = ['Thermococcus_barophilus_Ch5-complete.gb', 'Thermococcus_onnurineus_NA1-complete-genome.gb', 'Thermococcus_sp._ES1.gb', 'Thermococcus-DS1-preliminary.gb'] genomes = load_files(abort_event, [os.path.join(genomes_dir, f) for f in genome_names], 'gb') ref = genomes[0] subj = genomes[1:] @shelf_result def g2g2shelf(): return lb.g2g_blastp(ref, subj, 11, features_of_interest=[{'ugene_name': 'FC-full'}, {'ugene_name': 'COC-full'}]) g2g_res = '/tmp/DP-PCR-N_KAEs' if not os.path.isfile(g2g_res): g2g_res = g2g2shelf() print g2g_res if g2g_res: with roDict(g2g_res) as db: results = db['result']