示例#1
0
def main():
    if len(sys.argv) != 5:  # if no input
        print "ERORR: not the right number of arguments"
        print "syntax: ~/zzz.scripts/be_blasti.py ( --pdbcode pdbcodename | --pdbfile pdbfilename ) ( carbohydrate| nocarbohydrate ) ( renumber | original_numbers )"
        return

    ## input pdb
    #pdbcode = '3T4G'

    flag = sys.argv[1]

    if flag == '--pdbcode':
        ## if a pdbcode is spesified then we will download it from the pdb.
        pdbcode = sys.argv[2]
        #file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb.ori'
        #file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb'
        #url = 'ftp://ftp.wwpdb.org/pub/pdb/data/biounit/coordinates/all/' + pdbcode + '.pdb1.gz'
        url = 'http://www.rcsb.org/pdb/files/' + pdbcode + '.pdb'
        #url = 'http://www.rcsb.org/pdb/files/'+pdbcode+'.pdb1' # biological subunit
        print "downloading with urllib"
        urllib.urlretrieve(url, pdbcode + ".pdb")
        file = pdbcode + ".pdb"
    elif flag == '--pdbfile':
        # if a filename is spesified then we will
        # process the file name
        # the filename should have the following formate:
        #    /path/pdbcode.pdb
        file = sys.argv[2]
        pdbcode = process_filename_for_pdb(file)
    else:
        print "flag is needed."
        print "options: --pdbcode (download from pdb) or --pdbfile (spesify the file locations)"
        exit()

    print "pdbcode = " + pdbcode
    print "file = " + file

    if (sys.argv[3] == 'carbohydrate'):
        flag_carbohydrate = True
    elif (sys.argv[3] == 'nocarbohydrate'):
        flag_carbohydrate = False
    else:
        print "ERORR: the second parameter can be carbohydrate or nocarbohydrate"
        exit()

    if (sys.argv[4] == 'renumber'):
        flag_renumber = True
    elif (sys.argv[4] == 'original_numbers'):
        flag_renumber = False
    else:
        print "ERORR: the third parameter can be renumber (renumber residues or original_numbers (use the curent numbering)"
        print "       the renumber is recomended.  get ride of code for insertion of residues."
        exit()

    # this function will split up "Alternate location indicator"
    # ALI is not stored in BIO PDB.
    # At this step we may want to do other processing
    # This will create 3 files: (1) the original files downloaded, (2) everything incomon + the ALI mark A, and (3) everything incomon + the ALI mark B.
    # the file (2) will be used for additional steps and is copied to pdbcode.pdb
    Preprocess_PDB(file)
    #exit()
    if (not os.path.exists(file) or os.path.getsize(file) == 0):
        print file + " is empty or does not exist "
        exit()

    parser = BP.PDBParser()
    struc = parser.get_structure(pdbcode, file)

    io = BP.PDBIO()
    io.set_structure(struc)
    io.save("everything.pdb")

    surf = BP.get_surface(file)

    get_structure_stat(struc, surf)

    ## remove waters.
    struc = get_substructure_remove_list(struc, ['HOH'])
    ## remove waters and other small molecules (Hetatms) that are not posible ligand.
    struc = get_substructure_remove_list(struc, notligand_list)
    #struc = get_substructure_remove_waters(struc)

    ## remove carbohydrates
    if not (flag_carbohydrate):
        struc = get_substructure_remove_list(struc, carbohydrate_list)

    io.set_structure(struc)
    io.save("nowaters.pdb")

    #get_structure_stat(struc,surf)

    ## if multiple models choose the first model.
    struc = get_structure_one_model(struc)
    #get_structure_stat(struc,surf)

    ## idenify small peptides
    print "idenifying small peptides"
    peptides_list, peptides_struc_list = get_peptides(struc)
    ## write out peptides
    pepnum = write_list('pep', 1, peptides_struc_list, surf, io)
    #remove peptides
    struc = get_receptor(struc, [], peptides_list, -1)

    ## idenify ligands
    print "idenifying ligands"
    lig_list, lig_struc_list = get_ligs(struc)
    ## write out ligands:
    lignum = write_list('lig', 1, lig_struc_list, surf, io)

    ## check if Attached to neighbor
    ## check if Attached to or clash?
    are_ligands_close(lig_list)

    ## Idenify a recptor compatable with the ligands/peptides
    ## we  only include chains close to a ligand/peptide as
    ## part of protein.
    ## evaluate chains by seeing how close the ligand is to chain.
    ## get_receptor will keep chains with in a certein distance (7
    ## Angstroms) from the ligand or the the chain closest to the ligand.
    receptor_struc = get_receptor(struc, lig_list, [], lignum)
    # renumber all chains.  Each chain will start at one.
    if (flag_renumber):
        renumber_residues(receptor_struc)

    io.set_structure(receptor_struc)
    io.save("rec.pdb")
pdbcode = sys.argv[1]
#file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/'+pdbcode+'/'+pdbcode+'/'+pdbcode+'.pdb.ori'
file = '/raid9/tbalius/Projects/ProteomicDOCKing/plversion1/' + pdbcode + '/' + pdbcode + '/' + pdbcode + '.pdb'

if (not os.path.exists(file) or os.path.getsize(file) == 0):
    print file + " is empty or does not exist "
    exit()

parser = BP.PDBParser()
struc = parser.get_structure(pdbcode, file)

io = BP.PDBIO()
io.set_structure(struc)
io.save("everything.pdb")

surf = BP.get_surface(file)

get_structure_stat(struc, surf)

## remove waters.
struc = get_substructure_remove_list(struc, ['HOH'])
#struc = get_substructure_remove_waters(struc)
io.save("nowaters.pdb")

get_structure_stat(struc, surf)

## if multiple models choose the first model.
struc = get_structure_one_model(struc)
get_structure_stat(struc, surf)

## idenify ligands