Python Pisces примеры использования

Язык программирования: Python

Пространство имен/Пакет: mmtfPyspark.webfilters

Класс/Тип: Pisces

Примеров на hotexamples.com: 10

Python Pisces - 10 примеров найдено. Это лучшие примеры Python кода для mmtfPyspark.webfilters.Pisces, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Pisces(10)

Основные методы

Pisces (10)

Пример #1

Показать файл

    def test1(self):
        pdb_1 = self.pdb.filter(Pisces(20, 2.0))
        results_1 = pdb_1.keys().collect()

        self.assertTrue('5X42' in results_1)
        self.assertTrue('4R4X' in results_1)
        self.assertFalse('2ONX' in results_1)
        self.assertFalse('1JLP' in results_1)

Пример #2

Показать файл

    def test2(self):
        pdb_2 = self.pdb.flatMap(StructureToPolymerChains())
        pdb_2 = pdb_2.filter(Pisces(20, 2.0))
        results_2 = pdb_2.keys().collect()

        self.assertTrue('5X42.B' in results_2)
        self.assertTrue('4R4X.A' in results_2)
        self.assertFalse('5X42.A' in results_2)
        self.assertFalse('2ONX.A' in results_2)
        self.assertFalse('1JLP.A' in results_2)

Пример #3

Показать файл

# Create variables                                             
APP_NAME = "MMTF_Spark"                                        
path = "../../resources/mmtf_full_sample/"

# Configure Spark                                              
conf = SparkConf().setAppName(APP_NAME).setMaster("local[*]")  
sc = SparkContext(conf=conf)                                   


# ## Read PDB and create PISCES non-redundant set

# In[14]:


pdb = mmtfReader.read_sequence_file(path, sc)
pdb = pdb.filter(Pisces(sequenceIdentity = 30, resolution = 2.5))         


# ## Setup criteria for metal interactions

# In[15]:


# Chemical component codes of metals in different oxidation states
metals = {"V","CR","MN","MN3","FE","FE2","CO","3CO","NI","3NI", "CU","CU1","CU3","ZN","MO","4MO","6MO"}

interactions_filter = InteractionFilter(distanceCutoff = 3.0, minInteractions=4, maxInteractions=6)
interactions_filter.set_query_groups(True, metals)

# Exclude non-polar interactions
interactions_filter.set_target_elements(False, ['H','C','P'])

Пример #4

Показать файл

#  ## Read MMTF Hadoop sequence file and
#
#  Create a non-redundant set(<=20% seq. identity) of L-protein chains

# In[3]:

path = "../../resources/mmtf_reduced_sample/"
sequenceIdentity = 20
resolution = 2.0
fraction = 0.1
seed = 123

pdb = mmtfReader.read_sequence_file(
    path, sc).flatMap(StructureToPolymerChains()).filter(
        Pisces(sequenceIdentity,
               resolution)).filter(ContainsLProteinChain()).sample(
                   False, fraction, seed)

# ## Get content

# In[4]:

segmentLength = 11
data = secondaryStructureSegmentExtractor.get_dataset(pdb,
                                                      segmentLength).cache()
print(f"original data   : {data.count()}")

# ## Drop Q3 and sequence duplicates

# In[5]:

Пример #5

Показать файл


path = "../../resources/mmtf_full_sample/"

pdb = mmtfReader.read_sequence_file(path, sc)


# ## Filter by representative protein chains at 40% sequence identity

# In[7]:


sequenceIdentity = 40
resolution = 2.0

pdb = pdb.filter(Pisces(sequenceIdentity, resolution))          .flatMap(StructureToPolymerChains())          .filter(Pisces(sequenceIdentity, resolution))          .filter(PolymerComposition(PolymerComposition.AMINO_ACIDS_20))


# ## Show top 10 structures

# In[8]:


pdb.top(10)


# ## Save representative set

# In[9]:

Пример #6

Показать файл

Файл: SecondaryStructurePropertyEncoderDemo.ipynb.py Проект: fsimkovic/mmtf-pyspark


#  ## Read MMTF Hadoop sequence file and 
#  
#  Create a non-redundant set(<=20% seq. identity) of L-protein chains

# In[3]:


path = "../../resources/mmtf_reduced_sample/"
sequenceIdentity = 20
resolution = 2.0
fraction = 0.1
seed = 123

pdb = mmtfReader         .read_sequence_file(path, sc)         .flatMap(StructureToPolymerChains())         .filter(Pisces(sequenceIdentity, resolution))         .filter(ContainsLProteinChain())         .sample(False, fraction, seed)


# ## Get content

# In[4]:


segmentLength = 11
data = secondaryStructureSegmentExtractor.get_dataset(pdb, segmentLength).cache()
print(f"original data   : {data.count()}")


# ## Drop Q3 and sequence duplicates

# In[5]:

Пример #7

Показать файл

Файл: ProteinChainClassification.ipynb.py Проект: fsimkovic/mmtf-pyspark

# ## Configure Spark Context

# In[18]:

conf = SparkConf().setMaster("local[*]").setAppName("MachineLearningDemo")

sc = SparkContext(conf=conf)

# ## Read MMTF File and create a non-redundant set (<=40% seq. identity) of L-protein clains

# In[19]:

pdb = mmtfReader.read_sequence_file('../../resources/mmtf_reduced_sample/',
                                    sc).flatMap(
                                        StructureToPolymerChains()).filter(
                                            Pisces(sequenceIdentity=40,
                                                   resolution=3.0))

# ## Get secondary structure content

# In[20]:

data = secondaryStructureExtractor.get_dataset(pdb)

# ## Define addProteinFoldType function

# In[21]:


def add_protein_fold_type(data, minThreshold, maxThreshold):
    '''
    Adds a column "foldType" with three major secondary structure class:

Пример #8

Показать файл

Файл: ProteinFoldDatasetCreatorDemo.ipynb.py Проект: fsimkovic/mmtf-pyspark

    "ProteinFoldDatasetCreatorDemo")

sc = SparkContext(conf=conf)

# ## Read MMTF Hadoop sequence file
#
# Create non-redundant set (<=40% seq. identity) if L-protein chains

# In[15]:

path = "../../resources/mmtf_reduced_sample/"
sequenceIdentity = 40
resolution = 2.0

pdb = mmtfReader.read_sequence_file(path, sc).filter(
    Pisces(sequenceIdentity,
           resolution)).flatMap(StructureToPolymerChains()).filter(
               Pisces(sequenceIdentity,
                      resolution)).filter(ContainsLProteinChain())

# ## Get secondary structure content

# In[16]:

data = secondaryStructureExtractor.get_dataset(pdb)

# ## Classify chains by secondary structure type

# In[17]:

minThreshold = 0.05
maxThreshold = 0.15

Пример #9

Показать файл

Файл: MetalInteractionsExample.ipynb.py Проект: fsimkovic/mmtf-pyspark

resolution = 2.5
minInteractions = 4
maxInteractions = 6
distanceCutoff = 3.0

# chemical component codes of metals in different oxidation states
metals = {"V","CR","MN","MN3","FE","FE2","CO","3CO","NI","3NI", "CU","CU1","CU3","ZN","MO","4MO","6MO"}


# ## Read PDB and create PISCES non-redundant set

# In[12]:


pdb = mmtfReader.read_sequence_file(path, sc)
pdb = pdb.filter(Pisces(sequenceIdentity = sequenceIdentityCutoff, resolution = resolution))         


# ## Setup criteria for metal interactions

# In[13]:


interactions_filter = InteractionFilter()
interactions_filter.set_distance_cutoff(distanceCutoff)
interactions_filter.set_min_interactions(minInteractions)
interactions_filter.set_max_interactions(maxInteractions)
interactions_filter.set_query_groups(True, metals)

#Exclude non-polar interactions
interactions_filter.set_target_elements(False, ['H','C','P'])

Пример #10

Показать файл

# ## Read PDB in MMTF format

# In[3]:

path = "../../resources/mmtf_full_sample/"

pdb = mmtfReader.read_sequence_file(path, sc)

# # Use only representative structures

# In[4]:

seqId = 40
resolution = 2.0

pdb = pdb.filter(Pisces(seqId, resolution))

# ## Extract proteins with Zn interactions

# In[5]:

finder = groupInteractionExtractor("ZN", 3)

interactions = finder.get_dataset(pdb).cache()

# ## List the top 10 residue types that interact with Zn

# In[6]:

interactions.printSchema()