示例#1
0
def get_structure(pdb_id):
    '''Returns a PDB structure.'''
    source_url = 'http://www.rcsb.org/pdb/files/' + pdb_id + '.pdb'
    target_filename = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR,
                                   pdb_id + '.pdb')

    with open(io_utils.get_file(source_url, target_filename)) as pdb_file:
        parser = PDBParser(QUIET=True)
        return parser.get_structure(pdb_id, pdb_file.name)
示例#2
0
def get_seq_structs(pdb_ids=None):
    '''Returns sequence and structure.'''
    seq_structs = {}
    pdb_ids = sorted(pdb_ids) if pdb_ids is not None else None
    in_field = False
    tokens = ()
    str_data = ''

    source_url = 'http://www.rcsb.org/pdb/files/ss.txt'
    target_filename = os.path.join(os.path.expanduser('~'), _DIR,
                                   'ss.txt')

    with open(io_utils.get_file(source_url, target_filename)) as fle:
        for line in fle:
            if line.startswith('>'):
                pdb_id = re.search('(?<=\\>)[^:]*', line).group(0)

                if pdb_ids is None or pdb_id in pdb_ids:
                    if in_field:
                        if tokens[:2] not in seq_structs:
                            seq_structs[tokens[:2]] = [None, None]

                        seq_structs[tokens[:2]][0 if tokens[2] == 'sequence'
                                                else 1] = str_data
                        str_data = ''

                    tokens = tuple(re.split('>|:', line.strip())[1:])
                    in_field = True

                elif in_field:
                    if tokens[:2] not in seq_structs:
                        seq_structs[tokens[:2]] = [None, None]

                    seq_structs[tokens[:2]][0 if tokens[2] == 'sequence'
                                            else 1] = str_data
                    str_data = ''

                    in_field = False
                    tokens = ()
                    str_data = ''

            elif in_field:
                str_data += line[:-1]

    return {key: value for key, value in seq_structs.iteritems()
            if all(val is not None for val in value)}
示例#3
0
def get_pdb_ids(max_ids=None, local_only=False):
    '''Returns PDB ids.'''
    if local_only:
        # Returns local PDB ids.
        pdb_dir = os.path.join(os.path.expanduser('~'), _DIR, _PDB_DIR)
        ids = [filename[:-4].upper()
               for _, _, files in os.walk(pdb_dir)
               for filename in files if filename.endswith('.pdb')]
    else:
        # Returns all PDB ids.
        source_url = 'http://www.uniprot.org/uniprot/?query=database:pdb' \
            + '&format=tab&columns=id,database(PDB)'
        target_filename = os.path.join(os.path.expanduser('~'), _DIR,
                                       'pdb_ids.txt')

        with open(io_utils.get_file(source_url, target_filename)) as fle:
            ids = [x for line in fle
                   for x in line.split()[1].split(';')
                   if len(x) > 0 and x != 'Cross-reference']

    return ids if max_ids is None \
        else random.sample(ids, min(len(ids), max_ids))