def get_structure(pdb_id): '''Returns a PDB structure.''' source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb' target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR, pdb_id + '.pdb') with open(io_utils.get_file(source_url, target_filename)) as pdb_file: parser = PDBParser(QUIET=True) return parser.get_structure(pdb_id, pdb_file.name)
def get_seq_structs(pdb_ids=None): '''Returns sequence and structure.''' seq_structs = {} pdb_ids = sorted(pdb_ids) if pdb_ids is not None else None in_field = False tokens = () str_data = '' source_url = 'http://www.rcsb.org/pdb/files/ss.txt' target_filename = os.path.join(os.path.expanduser('~'), _DIR, 'ss.txt') with open(io_utils.get_file(source_url, target_filename)) as fle: for line in fle: if line.startswith('>'): pdb_id = re.search('(?<=\\>)[^:]*', line).group(0) if pdb_ids is None or pdb_id in pdb_ids: if in_field: if tokens[:2] not in seq_structs: seq_structs[tokens[:2]] = [None, None] seq_structs[tokens[:2]][0 if tokens[2] == 'sequence' else 1] = str_data str_data = '' tokens = tuple(re.split('>|:', line.strip())[1:]) in_field = True elif in_field: if tokens[:2] not in seq_structs: seq_structs[tokens[:2]] = [None, None] seq_structs[tokens[:2]][0 if tokens[2] == 'sequence' else 1] = str_data str_data = '' in_field = False tokens = () str_data = '' elif in_field: str_data += line[:-1] return {key: value for key, value in seq_structs.iteritems() if all(val is not None for val in value)}
def get_pdb_ids(max_ids=None, local_only=False): '''Returns PDB ids.''' if local_only: # Returns local PDB ids. pdb_dir = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR) ids = [filename[:-4].upper() for _, _, files in os.walk(pdb_dir) for filename in files if filename.endswith('.pdb')] else: # Returns all PDB ids. source_url = 'http://www.uniprot.org/uniprot/?query=database:pdb' \ + '&format=tab&columns=id,database(PDB)' target_filename = os.path.join(os.path.expanduser('~'), _DIR, 'pdb_ids.txt') with open(io_utils.get_file(source_url, target_filename)) as fle: ids = [x for line in fle for x in line.split()[1].split(';') if len(x) > 0 and x != 'Cross-reference'] return ids if max_ids is None \ else random.sample(ids, min(len(ids), max_ids))