示例#1
0
def findStrucs(query: str) -> pd.DataFrame:
    '''
    Finds structures matching a RCSB PDB query, and returns the dataframe with their information.
    '''
    try:
        search_dict = pypdb.Query(
            query, query_type="sequence"
        )  # create a dictionary containing search information
        # NOTE: ONLY finds the first 500 for now, to limit download size!
        found = search_dict.search(
            search_dict)[:500]  # create a list of these PDBs by searching RCSB
        metadata = []  # create a list with the information and the metadata

        for proteins in found:  # for items in # for the items in the list,
            metadata.append(
                pypdb.describe_pdb(proteins))  # append the dictionary
        return pd.DataFrame(metadata)  # convert, return a Pandas DF
    except:
        # if no search results are found, return an empty df to be caught in downstream functions.
        print("There were no search results found for the query: " + query)
        return pd.DataFrame()
示例#2
0
# %%
query = input("Supply a query (term, accession number, etc.) :")

query = "nuclear receptor"
# Tag the time right when the query is entered

now = datetime.datetime.now()
def now_dir_ts():
    now_ts = str(now.year)+"_"+str(now.month)+"_"+str(now.day)+"_"+str(now.hour)+"_"+str(now.minute)+"_"+str(now.second)
    return now_ts

now = now_dir_ts()
PDB_dl_dir = "ds_"+now

search_dict = pypdb.Query(query)     # create a dictionary containing search information
found = search_dict.search(search_dict)[:500]      # create a list of these PDBs by searching RCSB

# create a list with the information and the metadata
metadata = []

for proteins in found:  # for items in # for the items in the list,
    metadata.append(pypdb.describe_pdb(proteins))  # append the dictionary 

# Save the metadata list as a CSV file
dfm = pd.DataFrame(metadata) # convert to a Pandas DF
dfm.to_csv('metadata_'+now+'.csv')      # save as a CSV file

# %%
parser = PDBParser()       # create a parser
pdbs = list()