def test_common_contacts_featurizer_1(): trajectories = MetEnkephalin().get_cached().trajectories top = trajectories[0].topology met_seq = top.to_fasta(0) #fake sequence has an insertion fake_met_eq ='YGGFMF' alignment={} #do "alignment " alignment["actual"] = met_seq+"-" alignment["fake"] = fake_met_eq feat = CommonContactFeaturizer(alignment=alignment, contacts='all', same_residue=True) rnd_traj = np.random.randint(len(trajectories)) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) features = feat.transform([trajectories[rnd_traj]])
def test_common_contacts_featurizer_1(): trajectories = MetEnkephalin().get_cached().trajectories top = trajectories[0].topology met_seq = top.to_fasta(0) # fake sequence has an insertion fake_met_eq = 'YGGFMF' alignment = {} # do "alignment " alignment["actual"] = met_seq + "-" alignment["fake"] = fake_met_eq max_len = max([len(alignment[i]) for i in alignment.keys()]) contacts = [i for i in itertools.combinations(np.arange(max_len), 2)] feat = CommonContactFeaturizer(alignment=alignment, contacts=contacts, same_residue=True) rnd_traj = np.random.randint(len(trajectories)) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) features = feat.transform([trajectories[rnd_traj]])
def test_common_contacts_featurizer_2(): trajectories = MetEnkephalin().get_cached().trajectories top = trajectories[0].topology met_seq = top.to_fasta(0) #fake sequence fake_met_eq ='FGGFM' alignment={} #do "alignment " alignment["actual"] = met_seq alignment["fake"] = fake_met_eq feat = CommonContactFeaturizer(alignment=alignment, contacts='all', same_residue=True) rnd_traj = np.random.randint(len(trajectories)) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) assert(np.all([j!=0 for i in df.resids for j in i]))
def test_common_contacts_featurizer_2(): #test randomly mutates one of the residues to make sure that residues contacts are not #included trajectories = MetEnkephalin().get_cached().trajectories top = trajectories[0].topology met_seq = top.to_fasta(0) #randomly "mutate one of the residues to alanine rnd_loc = np.random.randint(len(met_seq)) fake_met_eq=met_seq[:rnd_loc]+"A"+met_seq[rnd_loc+1:] alignment={} #do "alignment " alignment["actual"] = met_seq alignment["fake"] = fake_met_eq feat = CommonContactFeaturizer(alignment=alignment, contacts='all', same_residue=True) rnd_traj = np.random.randint(len(trajectories)) df = pd.DataFrame(feat.describe_features(trajectories[rnd_traj])) assert(np.all([j!=rnd_loc for i in df.resids for j in i]))