Пример #1
0
	url = "http://www.rcsb.org/pdb/download/downloadFile.do?fileFormat=pdb&compression=NO&structureId="
	not_available_list = []
	for pdb in pdb_list:
		pdb_name = "{0}.pdb".format(pdb)
		if not os.path.exists(pdb_name):
			pdbid = url+str(pdb)
			content = urllib.urlopen(pdbid).read()
			if '404 Not Found' in content: not_available_list.append(pdb)
			else:
				open(pdb_name, "w" ).write(content)
				print pdb_name 
	return not_available_list
 
def check(not_available_list):
	new_list = not_available_list[:]
	for pdb in not_available_list:
		if os.path.exists('{0}-pdb-bundle.tar.gz'.format(pdb)) or os.path.exists('{0}-pdb-bundle.tar'.format(pdb)):
			new_list.remove(pdb)
	if new_list:
		print "copy and paste the {0} structures below in the rcsb.org download feature (could not be downloaded programatically)".format(len(new_list))	#obtain bundle case
		print ",".join(new_list)


if __name__ == "__main__":
	help_message(help_msg)
	d = read_in('pdb', 'uniprot', 'pre_seq2struc')
	pdb_list = [x[:4] for x in d]
	not_available_list = save_pdb_file(set(pdb_list))
	check(not_available_list)
	print_next_step()
Пример #2
0
                except:
                    line.append('')  #no value
                    continue

                if protein_property == 'dosage_tolerance':
                    line.append(val)
                elif val != 0:
                    line.append(np.log10(val))
                else:
                    line.append(log_zero_list[pp])
            line_list.append(line)
    return line_list


if __name__ == "__main__":
    help_message(help_msg, bool_org_dir=False)
    d_org = int2organism()
    d_index = initialize_dict('dict')
    d_val = initialize_dict('list')

    protein_property_list = [
        'length', 'abundance', 'evolutionary_rate', 'contact_density',
        'PPI_degree', 'dosage_tolerance'
    ]
    log_zero_list = [
        -1, -1, -4, 1, -1
    ]  #make into dict	#dont log dosage tolerance, already logged for yeast, ecoli is discrete
    for organism in organism_list:
        pre_d_i = read_in('pdb', 'uniprot', organism=organism)
        pre_d_i = collections.OrderedDict(sorted(pre_d_i.items()))
        d_index[organism] = {i: pdb for i, pdb in enumerate(pre_d_i)}
help_msg = 'write extra.txt files for ./run --extra/--EXTRA'

import sys, os

CWD = os.getcwd()
UTLTS_DIR = CWD[:CWD.index('proteomevis_scripts'
                           )] + '/proteomevis_scripts/utlts'
sys.path.append(UTLTS_DIR)
from parse_user_input import help_message
from read_in_file import read_in
from parse_data import organism
from output import writeout

if __name__ == "__main__":
    help_message(
        help_msg)  #need to adjust help message to allow yeast_ecoli case
    if organism == 'yeast_ecoli':  #dependent on yeast/extra.txt being present
        d = read_in('uniprot', 'pdb', organism='ecoli')
        d_old = read_in('uniprot', 'pdb', filename='../ecoli/extra.txt')
        flag = 'EXTRA'
    else:
        d = read_in('uniprot', 'pdb')
        d_old = read_in(
            'uniprot',
            'pdb',
            filename='../../../0-identify_structure/3-length_check/{0}/{1}'.
            format(organism, 'old_seq2struc.txt'))
        flag = 'extra'
    pdb_list = set(d.items()) - set(d_old.items())
    d_output = dict(x for x in pdb_list)
    writeout(['uniprot', 'pdb'], d_output, filename='extra')
Пример #4
0
    def run(self, verbose=''):
        self.get_all_info()
        self.get_best_pdb_chain()
        if verbose:
            self.print_verbose()
        return self.d_output


def prepare_writeout(d_uniprot_pdb, d_proteome):
    d_output = {}
    for uniprot, pdb in d_uniprot_pdb.iteritems():
        d_output[uniprot] = [pdb, d_proteome[uniprot]]
    return d_output


if __name__ == '__main__':
    args = help_message(help_msg, bool_add_verbose=True)
    d_proteome = read_in('Entry',
                         'Gene names  (ordered locus )',
                         filename='proteome')
    uniprot2pdb = UniProt2PDB(d_proteome.keys())
    d_uniprot_pdb = uniprot2pdb.run(args.verbose)

    d_output = prepare_writeout(d_uniprot_pdb, d_proteome)
    filename = 'pre_seq2struc'
    writeout(['uniprot', 'pdb', 'oln'],
             d_output,
             filename="new_{0}".format(filename))
    database_update_needed(filename)
Пример #5
0
    if os.path.isdir(path):
        make_intermediate_dir(dir1, dir2)
    return path


def save_image(d):
    for organism, d_pdb in d.iteritems():
        for pdb in d_pdb:  #first check if image already exists
            path = '{0}/{1}.png'.format(get_path(pdb), pdb)
            if os.path.exists(path):
                continue  #already exists. easier to be true than false
            get_image(pdb, path, organism)
            subprocess.call(
                ['mv', "{0}0001.png".format(path), "{0}".format(path)]
            )  #seems that appending frame number to output irreversible since it is a movie
            #extra .png because if chain is a number, that info is lost
    cmd.quit()


if __name__ == "pymol":  #not __main__ cuz running through pymol
    d = initialize_dict('dict')
    for organism in organism_list:
        d[organism] = read_in('pdb', 'uniprot', organism=organism)
    save_image(d)
    print_next_step()

if __name__ == "__main__":
    args = help_message(help_msg,
                        bool_org_dir=False,
                        custom_usage=custom_usage)
Пример #6
0
                args):  #make this general for all to import
    line_list = []
    for o in range(len(d_org)):
        organism = d_org[o]
        total = len(d_index[organism])
        for p in range(total):
            pdb = d_index[organism][p]
            line = [p, pdb, d_translate[organism][pdb]]
            line.extend(d_info[organism][pdb])
            line.append(int(o))
            line_list.append(line)
    return line_list


if __name__ == "__main__":
    args = help_message(help_msg, bool_add_verbose=True,
                        bool_org_dir=False)  #add verbose option
    d_org = int2organism()
    d_translate = initialize_dict('dict')
    d_index = initialize_dict('dict')
    d_info = initialize_dict('dict')

    for organism in organism_list:
        pre_d_i = read_in('pdb', 'uniprot', organism=organism)
        pre_d_i = collections.OrderedDict(sorted(pre_d_i.items()))
        d_translate[organism] = pre_d_i
        d_index[organism] = {i: pdb for i, pdb in enumerate(pre_d_i)}
        d_info[organism] = get_info(organism)

    line_list = prepare_sql(d_org, d_translate, d_index, d_info, args)

    columns = [