示例#1
0
def _test_tic_sampling(yaml_file, protein_name, tic_list, n_frames, scheme):
    #test to make sure we are sampling right
    sample_for_all_proteins(yaml_file, [protein_name],
                            tic_list, n_frames, scheme=scheme)
    ser = ProteinSeries(yaml_file)
    prt = Protein(ser, protein_name)

    for tic_index in [0,1]:
        traj_path = os.path.join(base_dir,yaml_file["mdl_dir"],
                                 protein_name,"tic%d.xtc"%tic_index)
        traj_top = os.path.join(base_dir,yaml_file["mdl_dir"],
                                protein_name, "prot.pdb")
        tica_traj = mdt.load(traj_path,top=traj_top)
        print(tica_traj.n_frames)
        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

        f = feat.partial_transform(tica_traj)
        t_f = np.round(prt.tica_mdl.transform([f]))

        #check that the tic goes from min to max
        print("Look here",t_f[0])
        assert t_f[0][0][tic_index] <= t_f[0][-1][tic_index]
        all_vals = []
        for traj_tica_data in prt.tica_data.values():
            all_vals.extend(traj_tica_data[:,tic_index])
            #sort it because all three sampling schemes use it

        all_vals = np.round(np.sort(all_vals))
        print(tic_index)
        print(t_f[0][:,tic_index] >= all_vals[0])
        print(t_f[0][:,tic_index] <= all_vals[-1])
        #make sure the frames are within limitsss
        assert (t_f[0][:,tic_index] >= all_vals[0]).all()
        assert (t_f[0][:,tic_index] <= all_vals[-1]).all()
    return True
示例#2
0
def test_dihedral_feat():

    print(base_dir)
    pool = Pool(6)
    yaml_file = load_yaml_file(os.path.join(base_dir,"mdl_dir","project.yaml"))

    for prt in ["kinase_1", "kinase_2"]:
        print(prt)
        prj = yaml_file["project_dict"][prt][0]
        featurize_project_wrapper(yaml_file, prt, feat=None, stride=1, view=pool)

        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])
        flist = glob.glob(os.path.join(base_dir, prt , yaml_file["protein_dir"],"*.hdf5"))
        for i in np.random.choice(flist, 3):
            trj = mdt.load(i)
            my_feat = feat.partial_transform(trj)
            expected_fname = os.path.join(base_dir, prt,
                                          yaml_file["feature_dir"],
                                          os.path.splitext(os.path.basename(i))[0]+".jl")
            calc_feat = verboseload(expected_fname)

            assert np.allclose(my_feat, calc_feat)



    return True
def featurize_file(job_tuple):

    yaml_file, protein, feat, traj_file,stride = job_tuple
    yaml_file = load_yaml_file(yaml_file)

    if feat is None:
        feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

    _check_output_folder_exists(yaml_file, protein)

    output_folder = os.path.join(yaml_file["base_dir"],
                                 protein,
                                 yaml_file["feature_dir"])

    traj_name = os.path.splitext(os.path.basename(traj_file))[0]
    output_fname = os.path.join(output_folder, traj_name+".jl")

    feat_descriptor = os.path.join(output_folder, "feature_descriptor.h5")
    try:
        trj = mdt.load(traj_file)
    except :
        warnings.warn("Removing %s because of misformed trajectory"%traj_file)
        os.remove(traj_file)
        return

    features = feat.partial_transform(trj)
    verbosedump(features, output_fname)

    if not os.path.isfile(feat_descriptor) and hasattr(feat, "describe_features"):
        dih_df = pd.DataFrame(feat.describe_features(trj[0]))
        verbosedump(dih_df, feat_descriptor)

    return
def feat_traj(traj):
    # load again to get the waters
    trj = mdt.load(traj)
    atp_solute = [
        i.index
        for i in trj.topology.atoms
        if (i.residue.name == "atp" and (i.element.name == "oxygen" or i.element.name == "nitrogen"))
        or (i.residue.name == "MG")
    ]
    # get the oxygen and nitrogen indices
    solute_indices = [
        i.index
        for i in trj.topology.atoms
        if i.residue.is_protein and (i.element.name == "oxygen" or i.element.name == "nitrogen")
    ]
    # get the oxygen solvent indices
    solvent_indices = [i.index for i in trj.topology.atoms if (i.residue.is_water and i.element.name != "hydrogen")]

    # set up featurizers
    atp_feat = wmsm.SolventShellsFeaturizer(atp_solute, solvent_indices, 2, 0.3)
    water_feat = wmsm.SolventShellsFeaturizer(solute_indices, solvent_indices, 2, 0.3)
    dihedral_feat = DihedralFeaturizer(["phi", "psi", "chi1"])

    # calculate features
    water_features = water_feat.partial_transform(trj)
    dihedral_features = dihedral_feat.partial_transform(trj)
    atp_features = atp_feat.partial_transform(trj)

    combined_features = np.hstack((dihedral_features, water_features, atp_features))

    return combined_features
    # dump

    fname = os.path.basename(traj)
    save_path = os.path.join("/nobackup/msultan/research/kinase/fyn_kinase/fah_data/features/")
    verbosedump(dihedral_features, os.path.join((save_path, "dihedral/%s" % fname)))
    verbosedump(water_features, os.path.join((save_path, "water/%s" % fname)))
    verbosedump(atp_features, os.path.join((save_path, "atp/%s" % fname)))
    verbosedump(combined_features, os.path.join((save_path, "combined/%s" % fname)))
    return
def featurize_traj(job_tuple):
    #separate out the job tuple into required things
    mutant,mutant_dir,project,proj_folder,proj_top_folder,traj_file,stride,save_common,allowed_residue_ind \
    = job_tuple
    #load top file to setup solute/solvent indices
    top_path = os.path.join(proj_top_folder, "%s.pdb"%os.path.basename(traj_file).split("_")[0])
    top_trj = mdtraj.load(top_path)

    #set up featurizer objects
    dihedral_feat = DihedralFeaturizer(types=['phi', 'psi','chi1'])

    #load the trajectory
    try:
        trj = mdtraj.load(traj_file,stride=stride)
    except:
        print "Cant featurize %s"%traj_file
        return 
    #setup file name
    traj_name = os.path.splitext(os.path.basename(traj_file))[0]
    print traj_name
    dihedral_output_file = os.path.join(mutant_dir,"features/dihedral_features/")+str(project)+\
    "_"+traj_name+".h5"
    water_output_file = os.path.join(mutant_dir,"features/water_features/")+str(project)+\
    "_"+traj_name+".h5"
    combined_output_file = os.path.join(mutant_dir,"features/combined_features/")+str(project)+\
    "_"+traj_name+".h5"
    do_again=True
    already_done=False
    if os.path.isfile(combined_output_file):
    	f = verboseload(combined_output_file)
	if f.shape[0]!=trj.n_frames:
		already_done=True

    if not already_done or do_again:
        dihedral_features = dihedral_feat.partial_transform(trj)

    	traj_name = os.path.splitext(os.path.basename(traj_file))[0]

        dihedral_output_file = os.path.join(mutant_dir,"features/dihedral_features/")+str(project)+\
        "_"+traj_name+".h5"

        #now we can dump
    	verbosedump(dihedral_features,dihedral_output_file)

        if save_common:
            dih_df = pandas.DataFrame(dihedral_feat.describe_features(top_trj))

            dih_f_ind = numpy.array([set(i).issubset(allowed_residue_ind) for i in dih_df["resid"]])

            subset_dihedral_features = dihedral_features[:,dih_f_ind]

            dihedral_output_file = os.path.join(mutant_dir,"features/common_basis/dihedral_features/")+\
            str(project)+"_"+traj_name+".h5"


            #now we can dump
            verbosedump(subset_dihedral_features,dihedral_output_file)
            #save the featurizer information.
            verbosedump([dih_df,allowed_residue_ind,dih_f_ind,],\
os.path.join(mutant_dir,"features/common_basis/dihedral_features/")+"saved_dihed_feat.h5")

            return

    else:
	   print "skipping featurization for %s since its already done"%traj_name
    return
示例#6
0
def _fit_transform(prt, trj):
    f=DihedralFeaturizer(types=['phi', 'psi','chi1'])
    feat = f.partial_transform(trj)
    t_f = prt.tica_mdl.transform([feat])
    st = prt.kmeans_mdl.transform(t_f)
    return st