Exemplo n.º 1
def test_adjacency_matrix(cell_size, threshold, periodic):
    Compare the construction of an adjacency matrix using a cell list
    and using a computationally expensive but simpler distance matrix.
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    if periodic:
        # Create an orthorhombic box
        # with the outer coordinates as bounds
        array.box = np.diag(
            np.max(array.coord, axis=-2) - np.min(array.coord, axis=-2))
    cell_list = struc.CellList(array, cell_size=cell_size, periodic=periodic)
    matrix = cell_list.create_adjacency_matrix(threshold)

    # Create distance matrix
    # Convert to float64 to avoid errorenous warning
    # https://github.com/ContinuumIO/anaconda-issues/issues/9129
    array.coord = array.coord.astype(np.float64)
    length = array.array_length()
    distance = struc.index_distance(
            np.repeat(np.arange(length), length),
            np.tile(np.arange(length), length)
                 axis=-1), periodic)
    distance = np.reshape(distance, (length, length))
    # Create adjacency matrix from distance matrix
    expected_matrix = (distance <= threshold)

    # Both ways to create an adjacency matrix
    # should give the same result
    assert np.array_equal(matrix, expected_matrix)
Exemplo n.º 2
def test_get_atoms(cell_size):
    Test the correct functionality of a cell list on a simple test case
    with known solutions.
    array = struc.AtomArray(length=5)
    array.coord = np.array([[0,0,i] for i in range(5)])
    cell_list = struc.CellList(array, cell_size=cell_size)
    assert cell_list.get_atoms(np.array([0,0,0.1]), 1).tolist() == [0,1]
    assert cell_list.get_atoms(np.array([0,0,1.1]), 1).tolist() == [1,2]
    assert cell_list.get_atoms(np.array([0,0,1.1]), 2).tolist() == [0,1,2,3]
    # Multiple positions
    pos = np.array([[0,0,0.1],
    expected_indices = [0, 1, 2,
                        0, 1, 2, 3,
                        3, 4]
    indices = cell_list.get_atoms(pos, 2)
    assert indices[indices != -1].tolist() == expected_indices
    # Multiple positions and multiple radii
    pos = np.array([[0,0,0.1],
    rad = np.array([1.0, 2.0, 3.0])
    expected_indices = [0, 1,
                        0, 1, 2, 3,
                        2, 3, 4]
    indices = cell_list.get_atoms(pos, rad)
    assert indices[indices != -1].tolist() == expected_indices
Exemplo n.º 3
    def __getitem__(self, index):
        print(os.path.join(self.fileDir, self.files[index]))
        array = strucio.load_structure(
            os.path.join(self.fileDir, self.files[index]))
        if type(array) == biotite.structure.AtomArrayStack:
            array = array[0]
        # print(os.path.join(self.fileDir, self.files[index]))
        # print(type(array))

        ca = array[array.atom_name == "CA"]
        cell_list = struc.CellList(ca, cell_size=self.threshold)

        # cell_list = struc.CellList(array, cell_size=self.threshold)
        adj_matrix = cell_list.create_adjacency_matrix(

        shape = adj_matrix.shape

        if shape[0] % 2 != 0:
            adj_matrix = np.append(adj_matrix,
                                   np.zeros((1, shape[0]), dtype=float),
            adj_matrix = np.append(adj_matrix,
                                   np.zeros((shape[0] + 1, 1), dtype=float),

        # return torch.tensor(adj_matrix.astype('float'))
        return adj_matrix.astype('double')
Exemplo n.º 4
def test_outside_location():
    # Test result for location outside any cell
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    array = array[struc.filter_amino_acids(array)]
    cell_list = struc.CellList(array, cell_size=5)
    outside_coord = np.min(array.coord, axis=0) - 100
    # Expect empty array
    assert len(cell_list.get_atoms(outside_coord, 5)) == 0
Exemplo n.º 5
def detect_disulfide_bonds(structure,
    # Array where detected disulfide bonds are stored
    disulfide_bonds = []
    # A mask that selects only S-gamma atoms of cysteins
    sulfide_mask = (structure.res_name == "CYS") & \
                   (structure.atom_name == "SG")
    # sulfides in adjacency to other sulfides are detected in an
    # efficient manner via a cell list
    cell_list = struc.CellList(structure,
                               cell_size=distance + distance_tol,
    # Iterate over every index corresponding to an S-gamma atom
    for sulfide_i in np.where(sulfide_mask)[0]:
        # Find indices corresponding to other S-gamma atoms,
        # that are adjacent to the position of structure[sulfide_i]
        # We use the faster 'get_atoms_in_cells()' instead of
        # `get_atoms()`, as precise distance measurement is done
        # afterwards anyway
        potential_bond_partner_indices = cell_list.get_atoms_in_cells(
        # Iterate over every index corresponding to an S-gamma atom
        # as bond partner
        for sulfide_j in potential_bond_partner_indices:
            if sulfide_i == sulfide_j:
                # A sulfide cannot create a bond with itself:
            # Create 'Atom' instances
            # of the potentially bonds S-gamma atoms
            sg1 = structure[sulfide_i]
            sg2 = structure[sulfide_j]
            # For dihedral angle measurement the corresponding
            # C-beta atoms are required, too
            cb1 = structure[(structure.chain_id == sg1.chain_id)
                            & (structure.res_id == sg1.res_id) &
                            (structure.atom_name == "CB")]
            cb2 = structure[(structure.chain_id == sg2.chain_id)
                            & (structure.res_id == sg2.res_id) &
                            (structure.atom_name == "CB")]
            # Measure distance and dihedral angle and check criteria
            bond_dist = struc.distance(sg1, sg2)
            bond_dihed = np.abs(np.rad2deg(struc.dihedral(cb1, sg1, sg2, cb2)))
            if bond_dist  > distance - distance_tol and \
               bond_dist  < distance + distance_tol and \
               bond_dihed > dihedral - dihedral_tol and \
               bond_dihed < dihedral + dihedral_tol:
                # Atom meet criteria -> we found a disulfide bond
                # -> the indices of the bond S-gamma atoms
                # are put into a tuple with the lower index first
                bond_tuple = sorted((sulfide_i, sulfide_j))
                # Add bond to list of bonds, but each bond only once
                if bond_tuple not in disulfide_bonds:
    return np.array(disulfide_bonds, dtype=int)
Exemplo n.º 6
def find_leaflets(structure,
    Identify which lipids molecules belong to the same lipid bilayer

    structure : AtomArray, shape=(n,)
        The structure containing the membrane.
        May also include other molecules, e.g. water or an embedded
    head_atom_mask : ndarray, dtype=bool, shape=(n,)
        A boolean mask that selects atoms from `structure` that
        represent lipid head groups.
    cutoff_distance : float, optional
        When the distance of two head groups is larger than this value,
        they are not (directly) connected in the same leaflet.
    periodic : bool, optional,
        If true, periodic boundary conditions are considered.
        This requires that `structure` has an associated `box`.
    leaflets : ndarray, dtype=bool, shape=(m,n)
        Multiple boolean masks, one for each identified leaflet.
        Each masks indicates which atoms of the input `structure`
        are in the leaflet.

    cell_list = struc.CellList(structure,
    adjacency_matrix = cell_list.create_adjacency_matrix(cutoff_distance)
    graph = nx.Graph(adjacency_matrix)

    head_leaflets = [
        sorted(c) for c in nx.connected_components(graph)
        # A leaflet cannot consist of a single lipid
        # This also removes all entries
        # for atoms not in 'head_atom_mask'
        if len(c) > 1

    # 'leaflets' contains indices to head atoms
    # Broadcast each head atom index to all atoms in its corresponding
    # residue
    leaflet_masks = np.empty((len(head_leaflets), structure.array_length()),
    for i, head_leaflet in enumerate(head_leaflets):
        leaflet_masks[i] = struc.get_residue_masks(structure, head_leaflet) \
    return leaflet_masks
Exemplo n.º 7
def test_selection():
    Test whether the `selection` parameter in the constructor works.
    This is tested by comparing the selection done prior to cell list
    creation with the selection done in the cell list construction.
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    selection = np.array([False, True] * (array.array_length() // 2))
    # Selection prior to cell list creation
    selected = array[selection]
    cell_list = struc.CellList(selected, cell_size=10)
    ref_near_atoms = selected[cell_list.get_atoms(array.coord[0], 20.0)]

    # Selection in cell list creation
    cell_list = struc.CellList(array, cell_size=10, selection=selection)
    test_near_atoms = array[cell_list.get_atoms(array.coord[0], 20.0)]

    assert test_near_atoms == ref_near_atoms
Exemplo n.º 8
def water_in_prox(atoms, sele, cutoff):
    Get the atom indices of water oxygen atoms that are in vicinity of
    the selected atoms.
    cell_list = struct.CellList(atoms, cell_size=5,
                                selection=atoms.atom_name == "OW")
    adjacent_atoms = cell_list.get_atoms(atoms[sele].coord, cutoff)
    adjacent_atoms = np.unique(adjacent_atoms.flatten())
    adjacent_atoms = adjacent_atoms[adjacent_atoms > 0]
    return adjacent_atoms
Exemplo n.º 9
 def get_matrices(array):
     Create a periodic and non-periodic adjacency matrix.
     nonlocal CUTOFF
     if isinstance(array, struc.AtomArray):
         matrix     = struc.CellList(array, CUTOFF, periodic=False) \
         matrix_pbc = struc.CellList(array, CUTOFF, periodic=True) \
     elif isinstance(array, struc.AtomArrayStack):
         matrix = np.array([
             struc.CellList(model, CUTOFF,
             for model in array
         matrix_pbc = np.array([
             struc.CellList(model, CUTOFF,
             for model in array
     return matrix, matrix_pbc
Exemplo n.º 10
def test_adjacency_matrix(cell_size, threshold, periodic, use_selection):
    Compare the construction of an adjacency matrix using a cell list
    and using a computationally expensive but simpler distance matrix.
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    if periodic:
        # Create an orthorhombic box
        # with the outer coordinates as bounds
        array.box = np.diag(
            np.max(array.coord, axis=-2) - np.min(array.coord, axis=-2)

    if use_selection:
        selection = np.random.choice((False, True), array.array_length())
        selection = None

    cell_list = struc.CellList(
        array, cell_size=cell_size, periodic=periodic, selection=selection
    test_matrix = cell_list.create_adjacency_matrix(threshold)
    length = array.array_length()
    distance = struc.index_distance(
                np.repeat(np.arange(length), length),
                  np.tile(np.arange(length), length)
    distance = np.reshape(distance, (length, length))
    # Create adjacency matrix from distance matrix
    exp_matrix = (distance <= threshold)
    if use_selection:
        # Set rows and columns to False for filtered out atoms
        exp_matrix[~selection, :] = False
        exp_matrix[:, ~selection] = False
    # Both ways to create an adjacency matrix
    # should give the same result
    assert np.array_equal(test_matrix, exp_matrix)
Exemplo n.º 11
def test_adjacency_matrix(cell_size, threshold):
    array = strucio.load_structure(join(data_dir, "3o5r.mmtf"))
    array = array[struc.filter_amino_acids(array)]
    cell_list = struc.CellList(array, cell_size=cell_size)
    matrix = cell_list.create_adjacency_matrix(threshold)
    coord = array.coord
    # Create distance matrix
    diff = coord[:, np.newaxis, :] - coord[np.newaxis, :, :]
    # Convert to float64 to avoid errorenous warning
    # https://github.com/ContinuumIO/anaconda-issues/issues/9129
    diff = diff.astype(np.float64)
    distance = np.sqrt(np.sum(diff**2, axis=-1))
    # Create adjacency matrix from distance matrix
    expected_matrix = (distance <= threshold)
    # Both ways to create an adjacency matrix
    # should give the same result
    assert matrix.tolist() == expected_matrix.tolist()
Exemplo n.º 12
structure = mmtf.get_structure(mmtf_file, model=1)

# Separate structure into the DNA and the two identical protein chains
dna = structure[np.isin(structure.chain_id, ["A", "B"])
                & (structure.hetero == False)]
protein_l = structure[(structure.chain_id == "L")
                      & (structure.hetero == False)]
protein_r = structure[(structure.chain_id == "R")
                      & (structure.hetero == False)]
# Quick check if the two protein chains are really identical
assert len(struc.get_residues(protein_l)) == len(struc.get_residues(protein_r))

# Fast identification of contacts via a cell list:
# The cell list is initiliazed with the coordinates of the DNA
# and later provided with the atom coordinates of the two protein chains
cell_list = struc.CellList(dna, cell_size=THRESHOLD_DISTANCE)

# Sets to store the residue IDs of contact residues
# for each protein chain
id_set_l = set()
id_set_r = set()

for protein, res_id_set in zip((protein_l, protein_r), (id_set_l, id_set_r)):
    # For each atom in the protein chain,
    # find all atoms in the DNA that are in contact with it
    contacts = cell_list.get_atoms(protein.coord, radius=THRESHOLD_DISTANCE)
    # Only retain atoms in the protein with contact
    # to at least one atom of the DNA
    contact_indices = np.where((contacts != -1).any(axis=1))[0]
    # Get residue IDs for the atoms in the protein
    contact_res_ids = protein.res_id[contact_indices]
Exemplo n.º 13
def pdb2Gdata(dirName, fileName, saveDir=False):
    # print(os.path.join(dirName, fileName))
    array = strucio.load_structure(
        os.path.join(dirName, fileName),
        # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'],
        extra_fields=['b_factor', 'occupancy'],

    # уникальные цепи
    chainIdUnique = []
    for chain in array.chain_id:
        if chain not in chainIdUnique:

    # вторичная структура используя алгоритм DSSP
    sse = dssp.DsspApp.annotate_sse(array)

    # "маски" цепи и остатки СА атомов
    chainMask = array[array.atom_name == 'CA'].chain_id
    resMask = array[array.atom_name == 'CA'].res_id

    # если sse короче масок, то расширим
    tmp = resMask.shape[0] - sse.shape[0]
    if tmp > 0:
        sse = np.append(sse, ['Null'] * tmp)

    # для каждой цепи, для каждого остатка - вторичная структура
    sseMaskDict = dict([(chain, {}) for chain in chainIdUnique])
    for chainId, resId, sseId in zip(chainMask, resMask, sse):
        sseMaskDict[chainId][resId] = sseId

    # матрица смежности
    cell_list = struc.CellList(array, cell_size=cfg.threshold)
    adj_matrix = cell_list.create_adjacency_matrix(cfg.threshold)

    # (adj_matrix[adj_matrix == True].shape[0] - 5385) / 2
    edge_index = [[], []]
    nodeFeatures = []

    # переводим матрицу смежности в COO и собираем признаки
    arrayShp = array.shape[0]
    for i in range(arrayShp - 1):
        for j in range(i + 1, arrayShp):
            if adj_matrix[i][j]:

            list(array.coord[i]) + [
                array.res_id[i], array.b_factor[i],
                float(array.hetero[i]), array.occupancy[i]
            ] + atomsDict.get(array.atom_name[i], atomsDict['Null']) +
            residualesDict.get(array.res_name[i], residualesDict['Null']) +
                sseMaskDict[array.chain_id[i]].get(array.res_id[i], 'Null'),
        list(array.coord[arrayShp - 1]) + [
            array.res_id[arrayShp - 1], array.b_factor[arrayShp - 1],
            float(array.hetero[arrayShp - 1]), array.occupancy[arrayShp - 1]
        ] + atomsDict.get(array.atom_name[arrayShp - 1], atomsDict['Null']) +
        residualesDict.get(array.res_name[arrayShp -
                                          1], residualesDict['Null']) +
            sseMaskDict[array.chain_id[arrayShp - 1]].get(
                array.res_id[arrayShp - 1], 'Null'), ssesTypeDict['Null']))

    # графовый формат
    # nodeFeaturesT = torch.tensor(nodeFeatures, dtype=torch.float)
    # edge_indexT = torch.tensor(edge_index, dtype=torch.long)
    # data = Data(x=nodeFeaturesT, edge_index=edge_indexT)
    data = Data(x=torch.tensor(nodeFeatures, dtype=torch.float),
                edge_index=torch.tensor(edge_index, dtype=torch.long))

    if saveDir:
        torch.save(data, os.path.join(saveDir, fileName))

    return data
Exemplo n.º 14
def pdb2Gdata(dirName, fileName, saveDir=False):
    # print(os.path.join(dirName, fileName))
    array = strucio.load_structure(
        os.path.join(dirName, fileName),
        # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'],
        extra_fields=['b_factor', 'occupancy'],
    # if type(array) == biotite.structure.AtomArrayStack:
    #     array = array[0]

    # ca = array[array.atom_name == "CA"]
    # cell_list = struc.CellList(ca, cell_size=self.threshold)

    chain_id = []
    for chain in array.chain_id:
        if chain not in chain_id:

    sseDict = dict([(chain, struc.annotate_sse(array, chain_id=chain))
                    for chain in chain_id])

    sseMaskDict = {}
    for key, value in sseDict.items():
        mask = array[(array.chain_id == key)
                     & (array.atom_name == 'CA')].res_id
        tmp = mask.shape[0] - value.shape[0]
        if tmp > 0:
            sseDict[key] = np.append(value, ['Null'] * tmp)

        sseMaskDict[key] = {}
        for maskId, sseId in zip(mask, sseDict[key]):
            sseMaskDict[key][maskId] = sseId

    cell_list = struc.CellList(array, cell_size=cfg.threshold)
    adj_matrix = cell_list.create_adjacency_matrix(cfg.threshold)

    # (adj_matrix[adj_matrix == True].shape[0] - 5385) / 2
    edge_index = [[], []]

    nodeFeatures = []
    arrayShp = array.shape[0]
    for i in range(arrayShp - 1):
        for j in range(i + 1, arrayShp):
            if adj_matrix[i][j]:

            list(array.coord[i]) +
            [atomsDict.get(array.atom_name[i], atomsDict['Null'])] +
            [elementsDict.get(array.element[i], elementsDict['Null'])] +
            [array.res_id[i]] +
            [residualesDict.get(array.res_name[i], residualesDict['Null'])] +
            [float(array.hetero[i])] + [array.occupancy[i]] +
            [array.b_factor[i]] + [
                        array.res_id[i], 'Null'), ssesTypeDict['Null'])
        list(array.coord[arrayShp - 1]) +
        [atomsDict.get(array.atom_name[arrayShp - 1], atomsDict['Null'])] +
        [elementsDict.get(array.element[arrayShp - 1], elementsDict['Null'])] +
        [array.res_id[arrayShp - 1]] + [
            residualesDict.get(array.res_name[arrayShp -
                                              1], residualesDict['Null'])
        ] + [float(array.hetero[arrayShp - 1])] +
        [array.occupancy[arrayShp - 1]] + [array.b_factor[arrayShp - 1]] + [
                sseMaskDict[array.chain_id[arrayShp - 1]].get(
                    array.res_id[arrayShp - 1], 'Null'), ssesTypeDict['Null'])

    nodeFeaturesT = torch.tensor(nodeFeatures, dtype=torch.float)
    edge_indexT = torch.tensor(edge_index, dtype=torch.long)
    data = Data(x=nodeFeaturesT, edge_index=edge_indexT)

    if saveDir:
        torch.save(data, os.path.join(saveDir, fileName))

    return data
Exemplo n.º 15
def pdb2Gdata(dirName, fileName, saveDir=False):
    array = strucio.load_structure(os.path.join(dirName, fileName), model=1)

    # уникальные цепи
    chainIdUnique = np.unique(array.chain_id)

    data = {}
    # для каждой цепи
    for chain in chainIdUnique:
        sseMaskDict = {}

        # берем текущую цепь, исключаем heatem атомы (== numpy.False)
        oneChainArray = array[(array.chain_id == chain)
                              & (array.hetero == False)]

        # только СА атомы
        backbone = oneChainArray[oneChainArray.atom_name == 'CA']

        backboneShp = backbone.shape[0]
        # НЕ считаем вторичную стуктуру, если в цепи нет (или мало) CA атомов
        if backboneShp < 5:

        # вторичная структура используя алгоритм DSSP
        sse = dssp.DsspApp.annotate_sse(oneChainArray)

        # если sse короче маски, то расширим
        tmp = backboneShp - sse.shape[0]
        if tmp > 0:
            sse = np.append(sse, ['C'] * tmp)

        # для каждого остатка - вторичная структура
        for resId, sseId in zip(backbone.res_id, sse):
            sseMaskDict[resId] = sseId

        # матрица смежности
        cellList = struc.CellList(backbone, cell_size=cfg.threshold)
        adjMatrix = cellList.create_adjacency_matrix(cfg.threshold)

        # вычитаем центроиду - смещаем центр белка в точку (0, 0, 0) (для нормировки признака)
        backbone.coord -= backbone.coord.mean(axis=0)

        # длина максимального вектора (для нормировки признака)
        maxNorm = np.linalg.norm(backbone.coord, axis=1).max()
        if maxNorm != 0:
            backbone.coord /= maxNorm

        edgeIndex = [[], []]
        nodeFeatures = []

        # переводим матрицу смежности в COO и собираем признаки
        for i in range(backboneShp - 1):
            for j in range(i + 1, backboneShp):
                if adjMatrix[i][j]:

                list(backbone.coord[i]) + residualesDict.get(
                    backbone.res_name[i], residualesDict['Null']) +
                ssesTypeDict.get(sseMaskDict.get(backbone.res_id[i], 'C')))
            list(backbone.coord[-1]) +
            residualesDict.get(backbone.res_name[-1], residualesDict['Null']) +
            ssesTypeDict.get(sseMaskDict.get(backbone.res_id[-1], 'C')))

        # графовый формат
        data[chain] = Data(x=torch.tensor(nodeFeatures, dtype=torch.float),

    # сохраняем все графы в отдельные файлы
    if saveDir:
        for chain, graph in data.items():
            fileNameSplit = fileName.split('.')
            # приписываем к названию файла название цепи
            fileNameSplit[0] += chain
            torch.save(graph, os.path.join(saveDir, '.'.join(fileNameSplit)))

    # возвращаем словарь
    return data
Exemplo n.º 16
def pdb2Gdata(dirName, fileName, saveDir=False):
    # print(os.path.join(dirName, fileName))
    array = strucio.load_structure(
        os.path.join(dirName, fileName),
        # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'],
        extra_fields=['b_factor', 'occupancy'],

    # уникальные цепи
    chainIdUnique = []
    for chain in array.chain_id:
        if chain not in chainIdUnique:

    # вторичная структура используя алгоритм DSSP для каждой цепи
    # НЕ считаем вторичную стуктуру, если в цепи нет CA атомов
    sseChainDict = dict([
        (chain, dssp.DsspApp.annotate_sse(array[array.chain_id == chain]))
        for chain in chainIdUnique
        if array[(array.chain_id == chain)
                 & (array.atom_name == 'CA')].shape[0] != 0

    data = {}
    sseMaskDict = dict([(chain, {}) for chain in chainIdUnique])
    for chain, sse in sseChainDict.items():
        # "маска" остатков СА атомов
        resMask = array[(array.chain_id == chain)
                        & (array.atom_name == 'CA')].res_id

        # если sse короче маски, то расширим
        tmp = resMask.shape[0] - sse.shape[0]
        if tmp > 0:
            sseChainDict[chain] = np.append(sse, ['Null'] * tmp)

        # для каждой цепи, для каждого остатка - вторичная структура
        for resId, sseId in zip(resMask, sseChainDict[chain]):
            sseMaskDict[chain][resId] = sseId

        oneChainArray = array[array.chain_id == chain]

        # матрица смежности
        cell_list = struc.CellList(oneChainArray, cell_size=cfg.threshold)
        adj_matrix = cell_list.create_adjacency_matrix(cfg.threshold)

        edge_index = [[], []]
        nodeFeatures = []

        # переводим матрицу смежности в COO и собираем признаки
        arrayShp = oneChainArray.shape[0]
        for i in range(arrayShp - 1):
            for j in range(i + 1, arrayShp):
                if adj_matrix[i][j]:

                list(oneChainArray.coord[i]) + [
                    oneChainArray.res_id[i], oneChainArray.b_factor[i],
                    float(oneChainArray.hetero[i]), oneChainArray.occupancy[i]
                ] +
                atomsDict.get(oneChainArray.atom_name[i], atomsDict['Null']) +
                                   residualesDict['Null']) +
                        oneChainArray.res_id[i], 'Null'), ssesTypeDict['Null'])
            list(oneChainArray.coord[arrayShp - 1]) + [
                oneChainArray.res_id[arrayShp -
                                     1], oneChainArray.b_factor[arrayShp - 1],
                float(oneChainArray.hetero[arrayShp - 1]),
                oneChainArray.occupancy[arrayShp - 1]
            ] + atomsDict.get(oneChainArray.atom_name[arrayShp -
                                                      1], atomsDict['Null']) +
            residualesDict.get(oneChainArray.res_name[arrayShp - 1],
                               residualesDict['Null']) +
                sseMaskDict[oneChainArray.chain_id[arrayShp - 1]].get(
                    oneChainArray.res_id[arrayShp -
                                         1], 'Null'), ssesTypeDict['Null']))

        # графовый формат
        data[chain] = Data(x=torch.tensor(nodeFeatures, dtype=torch.float),

    # сохраняем все графы в отдельные файлы
    if saveDir:
        for chain, graph in data.items():
            fileNameSplit = fileName.split('.')
            fileNameSplit[0] += chain
            torch.save(graph, os.path.join(saveDir, '.'.join(fileNameSplit)))

    # возвращаем словарь
    return data
Exemplo n.º 17
import biotite
import biotite.structure as struc
import biotite.structure.io as strucio
import biotite.database.rcsb as rcsb
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

file_name = rcsb.fetch("1aki", "mmtf", biotite.temp_dir())
array = strucio.load_structure(file_name)
# We only consider CA atoms
ca = array[array.atom_name == "CA"]
# 7 Angstrom adjacency threshold
threshold = 7
# Create cell list of the CA atom array
# for efficient measurement of adjacency
cell_list = struc.CellList(ca, cell_size=threshold)
adjacency_matrix = cell_list.create_adjacency_matrix(threshold)

figure = plt.figure()
ax = figure.add_subplot(111)
cmap = ListedColormap(["white", biotite.colors["dimgreen"]])
#ax.matshow(adjacency_matrix, cmap=cmap, origin="lower")
ax.pcolormesh(ca.res_id, ca.res_id, adjacency_matrix, cmap=cmap)
ax.set_xlabel("Residue number")
ax.set_xlabel("Residue number")
ax.set_title("Adjacency matrix of the lysozyme crystal structure")
Exemplo n.º 18
ammolite.cmd.set("stick_color", "red")
ammolite.cmd.set("stick_radius", 0.5)
ammolite.cmd.set("sphere_scale", 1.0)
ammolite.cmd.set("sphere_quality", 4)

# Adjust camera
ammolite.cmd.rotate("z", 90)



# Find contacts within cutoff distance
adjacency_matrix = struc.CellList(aptamer, CUTOFF) \
for i, j in zip(*np.where(adjacency_matrix)):
    pymol_obj.distance("", i, j, show_label=False, gap=0)

ammolite.cmd.set("dash_color", "firebrick")

# Add black outlines
ammolite.cmd.set("ray_trace_mode", 1)
ammolite.cmd.set("ray_trace_disco_factor", 0.5)

# sphinx_gallery_thumbnail_number = 2
Exemplo n.º 19
def pdb2Gdata(dirName, fileName, saveDir=False):
    # print(os.path.join(dirName, fileName))
    array = strucio.load_structure(os.path.join(dirName, fileName),
                                   # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'],
                                   extra_fields=['b_factor', 'occupancy'],

    # уникальные цепи
    chainIdUnique = np.unique(array.chain_id)

    data = {}
    sseMaskDict = dict([(chain, {}) for chain in chainIdUnique])
    for chain in chainIdUnique:
        # берем текущую цепь
        oneChainArray = array[array.chain_id == chain]

        # исключаем heatem атомы для вычисления sse (== numpy.False)
        notHeatemChain = oneChainArray[oneChainArray.hetero == False]

        # "маска" остатков СА (не heatem) атомов
        resMask = notHeatemChain[notHeatemChain.atom_name == 'CA'].res_id

        # НЕ считаем вторичную стуктуру, если в цепи нет (или мало) CA атомов
        if resMask.shape[0] < 5:

        # вторичная структура используя алгоритм DSSP для каждой цепи
        sse = dssp.DsspApp.annotate_sse(notHeatemChain)

        # если sse короче маски, то расширим
        tmp = resMask.shape[0] - sse.shape[0]
        if tmp > 0:
            sse = np.append(sse, ['Null'] * tmp)

        # для каждой цепи, для каждого остатка - вторичная структура
        for resId, sseId in zip(resMask, sse):
            sseMaskDict[chain][resId] = sseId

        # матрица смежности
        cellList = struc.CellList(oneChainArray, cell_size=cfg.threshold)
        adjMatrix = cellList.create_adjacency_matrix(cfg.threshold)

        # вычитаем центроиду - смещаем цетр белка в точку (0, 0, 0) (для нормировки признака)
        oneChainArray.coord -= oneChainArray.coord.mean(axis=0)

        # длина максимального вектора (для нормировки признака)
        maxNorm = max([np.linalg.norm(point) for point in oneChainArray.coord])
        if maxNorm != 0:
            oneChainArray.coord /= maxNorm

        # максимальный температурный фактор (для нормировки признака)
        maxBFactor = oneChainArray.b_factor.max()
        if maxBFactor != 0:
            oneChainArray.b_factor /= maxBFactor

        edgeIndex = [[], []]
        nodeFeatures = []

        # переводим матрицу смежности в COO и собираем признаки
        arrayShp = oneChainArray.shape[0]
        for i in range(arrayShp - 1):
            for j in range(i + 1, arrayShp):
                if adjMatrix[i][j]:

                list(oneChainArray.coord[i]) +
                 oneChainArray.occupancy[i]] +
                atomsDict.get(oneChainArray.atom_name[i], atomsDict['Null']) +
                residualesDict.get(oneChainArray.res_name[i], residualesDict['Null']) +
            list(oneChainArray.coord[-1]) +
             oneChainArray.occupancy[-1]] +
            atomsDict.get(oneChainArray.atom_name[-1], atomsDict['Null']) +
            residualesDict.get(oneChainArray.res_name[-1], residualesDict['Null']) +

        # графовый формат
        data[chain] = Data(x=torch.tensor(nodeFeatures, dtype=torch.float),
                           edge_index=torch.tensor(edgeIndex, dtype=torch.long))

    # сохраняем все графы в отдельные файлы
    if saveDir:
        for chain, graph in data.items():
            fileNameSplit = fileName.split('.')
            # приписываем к названию файла название цепи
            fileNameSplit[0] += chain
            torch.save(graph, os.path.join(saveDir, '.'.join(fileNameSplit)))

    # возвращаем словарь
    return data