url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId=" not_available_list = [] for pdb in pdb_list: pdb_name = "{0}.pdb".format(pdb) if not os.path.exists(pdb_name): pdbid = url+str(pdb) content = urllib.urlopen(pdbid).read() if '404 Not Found' in content: not_available_list.append(pdb) else: open(pdb_name, "w" ).write(content) print pdb_name return not_available_list def check(not_available_list): new_list = not_available_list[:] for pdb in not_available_list: if os.path.exists('{0}-pdb-bundle.tar.gz'.format(pdb)) or os.path.exists('{0}-pdb-bundle.tar'.format(pdb)): new_list.remove(pdb) if new_list: print "copy and paste the {0} structures below in the rcsb.org download feature (could not be downloaded programatically)".format(len(new_list)) #obtain bundle case print ",".join(new_list) if __name__ == "__main__": help_message(help_msg) d = read_in('pdb', 'uniprot', 'pre_seq2struc') pdb_list = [x[:4] for x in d] not_available_list = save_pdb_file(set(pdb_list)) check(not_available_list) print_next_step()
'PPI_degree', 'dosage_tolerance' ] log_zero_list = [ -1, -1, -4, 1, -1 ] #make into dict #dont log dosage tolerance, already logged for yeast, ecoli is discrete for organism in organism_list: pre_d_i = read_in('pdb', 'uniprot', organism=organism) pre_d_i = collections.OrderedDict(sorted(pre_d_i.items())) d_index[organism] = {i: pdb for i, pdb in enumerate(pre_d_i)} d_ref = read_in('oln', 'pdb', organism=organism) for protein_property in protein_property_list: x_input = database(organism, protein_property) d = read_in(*x_input) d_subset = { pdb: d[oln] for oln, pdb in d_ref.iteritems() if oln in d } d_val[organism].append(d_subset) line_list = prepare_sql(d_org, d_index, d_val, protein_property_list, log_zero_list) columns = ['chain_id', 'species', 'pdb'] columns.extend(protein_property_list) write_sqlite = SQLite3('proteomevis_chain', columns, line_list) write_sqlite.run() print_next_step('../')