Exemplo n.º 1
0
	url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId="
	not_available_list = []
	for pdb in pdb_list:
		pdb_name = "{0}.pdb".format(pdb)
		if not os.path.exists(pdb_name):
			pdbid = url+str(pdb)
			content = urllib.urlopen(pdbid).read()
			if '404 Not Found' in content: not_available_list.append(pdb)
			else:
				open(pdb_name, "w" ).write(content)
				print pdb_name 
	return not_available_list
 
def check(not_available_list):
	new_list = not_available_list[:]
	for pdb in not_available_list:
		if os.path.exists('{0}-pdb-bundle.tar.gz'.format(pdb)) or os.path.exists('{0}-pdb-bundle.tar'.format(pdb)):
			new_list.remove(pdb)
	if new_list:
		print "copy and paste the {0} structures below in the rcsb.org download feature (could not be downloaded programatically)".format(len(new_list))	#obtain bundle case
		print ",".join(new_list)


if __name__ == "__main__":
	help_message(help_msg)
	d = read_in('pdb', 'uniprot', 'pre_seq2struc')
	pdb_list = [x[:4] for x in d]
	not_available_list = save_pdb_file(set(pdb_list))
	check(not_available_list)
	print_next_step()
Exemplo n.º 2
0
        'PPI_degree', 'dosage_tolerance'
    ]
    log_zero_list = [
        -1, -1, -4, 1, -1
    ]  #make into dict	#dont log dosage tolerance, already logged for yeast, ecoli is discrete
    for organism in organism_list:
        pre_d_i = read_in('pdb', 'uniprot', organism=organism)
        pre_d_i = collections.OrderedDict(sorted(pre_d_i.items()))
        d_index[organism] = {i: pdb for i, pdb in enumerate(pre_d_i)}

        d_ref = read_in('oln', 'pdb', organism=organism)
        for protein_property in protein_property_list:
            x_input = database(organism, protein_property)
            d = read_in(*x_input)

            d_subset = {
                pdb: d[oln]
                for oln, pdb in d_ref.iteritems() if oln in d
            }
            d_val[organism].append(d_subset)

    line_list = prepare_sql(d_org, d_index, d_val, protein_property_list,
                            log_zero_list)

    columns = ['chain_id', 'species', 'pdb']
    columns.extend(protein_property_list)
    write_sqlite = SQLite3('proteomevis_chain', columns, line_list)
    write_sqlite.run()

    print_next_step('../')