示例#1
0
# In[2]:

conf = SparkConf().setMaster("local[*]").setAppName("FilterByReleaseDate")
sc = SparkContext(conf=conf)

# ## Read in MMTF Files, filter and count

# In[3]:

path = "../../resources/mmtf_reduced_sample/"

structures = mmtfReader.read_sequence_file(path, sc).filter(
    ReleaseDate("2000-01-28", "2017-02-28"))

print(
    f"Number of structure released between 2000-01-28 and 2017-02-28 is: {structures.count()}"
)

# ## Visualize Structures

# In[4]:

structure_names = structures.keys().collect()
view_structure(structure_names, style='line')

# ## Terminate Spark

# In[5]:

sc.stop()
示例#2
0
# ## Filter proteins that cotinas Dna chain and L protein chain
#
# 1) Retain pdb entires that exclusively contain L-peptide chains
# 2) Retain pdb entries that exclusively contain L-Dna

# In[4]:

structures = pdb.filter(ContainsLProteinChain()).filter(ContainsDnaChain())

# ## Count number of entires

# In[5]:

count = structures.count()

print(f"Number of entires that contain L-protein and L-DNA: {count}")

# ## Visualize Structures

# In[7]:

structure_names = structures.keys().collect()
view_structure(structure_names)

# ## Terminate Spark

# In[6]:

sc.stop()
示例#3
0
# * ExperimentalMethods.SOLUTION_NMR
# * ExperimentalMethods.SOLUTION_SCATTERING
# * ExperimentalMethods.THEORETICAL_MODEL
# * ExperimentalMethods.X_RAY_DIFFRACTION

# In[4]:

pdb = pdb.filter(
    ExperimentalMethods(ExperimentalMethods.NEUTRON_DIFFRACTION,
                        ExperimentalMethods.X_RAY_DIFFRACTION))

# ## Print out entries

# In[5]:

filtered_structures = pdb.keys().collect()

print(filtered_structures)

# ## Visualize 3D structures of filtered structures

# In[6]:

view_structure(filtered_structures)

# ## Terminate Spark

# In[7]:

sc.stop()
示例#4
0

structures = structures.filter(NotFilter(ContainsDnaChain()))


# ## Count number of entires

# In[10]:


count = structures.count()

print(f"PDB entires without DNA chains : {count}")


# ## Visualize Structures

# In[11]:


view_structure(structures.keys().collect())


# ## Terminate Spark 

# In[7]:


sc.stop()

示例#5
0
conf = SparkConf().setMaster("local[*]").setAppName("ReadLocalMMTFReduced")
sc = SparkContext(conf=conf)

# ## Read in local Hadoop Sequence Files and count number of entries

# In[3]:

path = "../../resources/mmtf_reduced_sample/"

pdb = mmtfReader.read_sequence_file(path, sc)

count = pdb.count()

print(f'number of pdb entries read : {count}')

# In[ ]:

## Visualize Structures

# In[5]:

structures = pdb.keys().collect()
view_structure(structures, style='sphere')

# ## Terminate Spark

# In[6]:

sc.stop()
# ## Retain high resolution X-ray structures

# In[4]:

pdb = pdb.filter(ExperimentalMethods(
    ExperimentalMethods.X_RAY_DIFFRACTION)).filter(Resolution(0, 2.0)).filter(
        RFree(0, 2.0))

print(f'number of pdb entries left : {pdb.count()}')

# ## Visualize Structures

# In[5]:

structures = pdb.keys().collect()
view_structure(structures)

# ## Save this subset in a Hadoop Sequence File

# In[7]:

write_path = "./mmtf_subset_xray"

# Reduce RDD to 8 partitiions
pdb = pdb.coalesce(8)
mmtfWriter.write_sequence_file(write_path, sc, pdb)

# ## Terminate Spark

# In[8]:
path = "../../resources/mmtf_full_sample/"
fraction = 0.5
seed = 123

pdb = mmtfReader.read_sequence_file(path, sc, fraction=fraction, seed=seed)

count = pdb.count()

print(f'number of pdb entries read : {count}')

# ## Visualize Structures

# In[5]:

structures = pdb.keys().collect()
view_structure(structures, style='stick')

# ## Save this subset in a Hadoop Sequence File

# In[4]:

write_path = "./mmtf_subset"

mmtfWriter.write_sequence_file(write_path, sc, pdb)

# ## Terminate Spark

# In[5]:

sc.stop()
# ## Filter by deposition date

# In[4]:

pdb = pdb.filter(DepositionDate('1999-02-26', '1999-02-28'))

# ## Count number of entires

# In[5]:

count = pdb.count()

print(
    f"Number of structure desposited between 1999-02-26 and 1999-02-28 is : {count}"
)

# ## View 3D structures

# In[6]:

pdbIds = pdb.keys().collect()

view_structure(pdbIds)

# ## Terminate Spark

# In[7]:

sc.stop()