def test_search_composite(): query1 = rcsb.FieldQuery("rcsb_entity_host_organism.scientific_name", exact_match="H**o sapiens") query2 = rcsb.FieldQuery("exptl.method", exact_match="SOLUTION NMR") ids_1 = set(rcsb.search(query1)) ids_2 = set(rcsb.search(query2)) ids_or = set(rcsb.search(query1 | query2)) ids_and = set(rcsb.search(query1 & query2)) assert ids_or == ids_1 | ids_2 assert ids_and == ids_1 & ids_2
def test_search_field(field, molecular_definition, params, ref_ids): query = rcsb.FieldQuery(field, molecular_definition, **params) test_ids = rcsb.search(query) test_count = rcsb.count(query) assert set(test_ids) == set(ref_ids) assert test_count == len(ref_ids)
import matplotlib.pyplot as plt import biotite import biotite.database.rcsb as rcsb from datetime import datetime, time years = np.arange(1990, datetime.today().year + 1) xray_count = np.zeros(len(years), dtype=int) nmr_count = np.zeros(len(years), dtype=int) em_count = np.zeros(len(years), dtype=int) tot_count = np.zeros(len(years), dtype=int) # For each year fetch the list of released PDB IDs # and count the number for i, year in enumerate(years): # A query that comprises one year date_query = rcsb.FieldQuery( "rcsb_accession_info.initial_release_date", range_closed=(datetime.combine(datetime(year, 1, 1), time.min), datetime.combine(datetime(year, 12, 31), time.max))) xray_query = rcsb.FieldQuery("exptl.method", exact_match="X-RAY DIFFRACTION") nmr_query = rcsb.FieldQuery("exptl.method", exact_match="SOLUTION NMR") em_query = rcsb.FieldQuery("exptl.method", exact_match="ELECTRON MICROSCOPY") # Get the amount of structures, that were released in that year # AND were elucidated with the respective method xray_count[i], nmr_count[i], em_count[i] = [ rcsb.count(date_query & method_query) for method_query in [xray_query, nmr_query, em_query] ] # Get the total amount of structures released in that year tot_count[i] = rcsb.count(date_query)
pdb_ids = rcsb.search(query) print(pdb_ids) print(rcsb.count(query)) files = rcsb.fetch(pdb_ids, "mmtf", gettempdir()) ######################################################################## # This was a simple search for the occurrence of the search term in any # field. # You can also search for a value in a specific field with a # :class:`FieldQuery`. # A complete list of the available fields and its supported operators # is documented # `on this page <https://search.rcsb.org/search-attributes.html>`_. # Query for 'lacA' gene query1 = rcsb.FieldQuery("rcsb_entity_source_organism.rcsb_gene_name.value", exact_match="lacA") # Query for resolution below 1.5 Å query2 = rcsb.FieldQuery("reflns.d_resolution_high", less=1.5) ######################################################################## # The search API allows even more complex queries, e.g. for sequence # or structure similarity. Have a look at the API reference of # :mod:`biotite.database.rcsb`. # # Multiple :class:`Query` objects can be combined using the ``|`` (or) # or ``&`` (and) operator for a more fine-grained selection. # A :class:`FieldQuery` is negated with ``~``. composite_query = query1 & ~query2 print(rcsb.search(composite_query))
def test_search_invalid(field, params): invalid_query = rcsb.FieldQuery(field, **params) with pytest.raises(RequestError, match="400"): rcsb.search(invalid_query) with pytest.raises(RequestError, match="400"): rcsb.count(invalid_query)
import datetime import concurrent.futures import tarfile import biotite.database.rcsb as rcsb import biotite.structure.io.mmtf as mmtf ### Download of PDB and archive creation ### # MMTF files are downloaded into a new directory in this path # and the .tar archive is created here base_path = "path/to/directoy" # Obtain all PDB IDs using a query that includes all entries # Each PDB entry has a title all_id_query = rcsb.FieldQuery("struct.title") pdb_ids = rcsb.search(all_id_query) # Name for download directory now = datetime.datetime.now() mmtf_dir = os.path.join( base_path, f"mmtf_{now.year:04d}{now.month:02d}{now.day:02d}" ) if not os.path.isdir(mmtf_dir): os.mkdir(mmtf_dir) # Download all PDB IDs with parallelized HTTP requests with concurrent.futures.ThreadPoolExecutor(max_workers=10) as executor: for pdb_id in pdb_ids: executor.submit(rcsb.fetch, pdb_id, "mmtf", mmtf_dir)