def __get_dna(self, ice_id): '''Gets the sequence ICE entry.''' url = self.__url + '/rest/file/' + self.__get_ice_number(ice_id) + \ '/sequence/sbol1?sid=' + self.__sid temp_file = tempfile.NamedTemporaryFile(delete=False) with codecs.open(temp_file.name, 'w', 'utf-8') as text_file: text_file.write(net_utils.get(url)) return sbol_utils.read(temp_file.name)
def get_genbank(self, ice_id, out=None): '''Get Genbank file.''' url = self.__url + '/rest/file/' + self.__get_ice_number(ice_id) + \ '/sequence/genbank' genbank = net_utils.get(url, self.__headers) if out: with open(out, 'w') as out_file: out_file.write(genbank) return genbank
def __get_sbol_doc(self, ice_id): '''Gets the sequence ICE entry.''' url = self.__url + '/rest/file/' + self.__get_ice_number(ice_id) + \ '/sequence/sbol?sid=' + self.__sid temp_file = tempfile.NamedTemporaryFile(delete=False) with open(temp_file.name, 'w') as text_file: text_file.write(net_utils.get(url)) document = sbol.Document() document.read(temp_file.name) return document
def get_fasta(prefix, suffix, entries, out_file='out.fasta', organism='4932', seqs_per_class=500): '''Get fasta file of protein variants.''' base_url = 'http://codon.synbiochem.co.uk/codons?codon=%s&organism=%s' class_seqs = defaultdict(list) for entry in entries: seq_class, nucl_seq = entry.split('_') whole_seq = prefix + nucl_seq + suffix num_of_codons = len(whole_seq) // 3 codon_indexes = range(0, num_of_codons * 3, 3) aa_seqs = [] for codon_index in codon_indexes: codon = whole_seq[codon_index:codon_index + 3] url = base_url % (codon, organism) resp = json.loads(net_utils.get(url, headers=None, verify=True)) aas = [ aa['amino_acid'] for aa in resp[0]['amino_acids'] if aa['amino_acid'] != 'Stop' ] if aa_seqs: new_aa_seq = [] for aa in aas: for aa_seq in aa_seqs: new_aa_seq.append(aa_seq + aa) aa_seqs = new_aa_seq else: aa_seqs = aas class_seqs[seq_class].extend(aa_seqs) with open(out_file, 'w') as out: for seq_class, seqs in class_seqs.items(): seqs = [ choice(seqs) for _ in range(min(len(seqs), seqs_per_class)) ] SeqIO.write([ SeqRecord(Seq(aa_seq, generic_protein), '%s_%i' % (seq_class, aa_idx + 1), '', '') for aa_idx, aa_seq in enumerate(seqs) ], out, 'fasta')
def __get_meta_data(self, ice_id): '''Returns an ICE entry metadata.''' return _read_resp(net_utils.get( self.__url + '/rest/parts/' + self.__get_ice_number(ice_id), self.__headers))
def search(self, term, limit=5): '''Searches ICE.''' url = self.__url + '/rest/search?offset=0&limit=' + str(limit) + \ '&sort=relevance&q="' + term + '"' return _read_resp(net_utils.get(url, self.__headers))
def search_groups(self, term): '''Gets groups from search terms.''' url = self.__url + '/rest/groups/autocomplete?token=' + term return _read_resp(net_utils.get(url, headers=self.__headers))
def get_groups(self): '''Gets a group name to id map.''' url = self.__url + '/rest/groups?offset=0&limit=4096' groups = _read_resp(net_utils.get(url, headers=self.__headers)) return {group['label']: group['id'] for group in groups['data']}
df = pd.DataFrame(resp['data']) return df, resp['resultCount'] if __name__ == '__main__': parser = arguments() arg = parser.parse_args() iceurl = 'https://ice.synbiochem.co.uk' client = ice_utils.ICEClient(iceurl, os.environ['ICE_USERNAME'], os.environ['ICE_PASSWORD']) dfs = [] for cole in arg.collection.split(','): print(cole) url = iceurl + '/rest/collections/' + cole + '/entries?limit=' + str( arg.chunk) response = net_utils.get(url, headers=client._ICEClient__headers) df, total = response2df(response) while df.shape[0] < total and df.shape[0] < arg.limit: print(df.shape[0]) url = iceurl + '/rest/collections/' + cole + '/entries?limit=' + str( arg.chunk) + '&offset=' + str(df.shape[0]) response = net_utils.get(url, headers=client._ICEClient__headers) df1, total = response2df(response) df = pd.concat([df, df1], ignore_index=True, sort=True) if 'Name' not in df.columns: # Use both namings to avoid issues try: df['Name'] = df['name'] df['Part ID'] = df['partId'] except: pass