def main(): mat = load_mat("data/CEpiMatch.csv") mat = 1 / mat Z = linkage(mat) # plt.figure() dendrogram(Z, labels=mat.rlabels, orientation="right", leaf_font_size=5) plt.title("Hierarchical clustering result") plt.ylabel("PDB ID") plt.plot() plt.savefig("img/166.png")
def for_lysozyme(): """ try various version of distance matrix for only lysozyme """ import sys method = sys.argv[1] paths = [ "CEpiMatch.csv", "Multiprot.csv", "SPa.csv", "SPb.csv", "SPe.csv", "TMa.csv", "TMb.csv", "TMc.csv", "MATT.csv", "RMSD.csv", ] for path in paths: mats = categorize(load_mat("data/%s" % path), data) mat = mats["lysozyme"] if path not in ("RMSD.csv",): mat = 1 / mat if method == "upper": mat = np.triu(mat) elif method == "lower": mat = np.transpose(np.tril(mat)) elif method == "average": mat = (np.tril(mat) + np.triu(mat)) / 2 plt.figure() Z = linkage(mat) dendrogram(Z, labels=mat.rlabels, orientation="right") plt.title("Hierarchical clustering for lysozyme") plt.ylabel("PDB ID") plt.savefig("img/for_lysozyme_%s/%s.png" % (method, path.split(".")[0]))
def compare(): """ plot a series of hcluster dendragram for various methods on various sets of virus """ paths = [ "CEpiMatch.csv", "Multiprot.csv", "SPa.csv", "SPb.csv", "SPe.csv", "TMa.csv", "TMb.csv", "TMc.csv", "MATT.csv", "RMSD.csv", ] for path in paths: mats = categorize(load_mat("data/%s" % path), data) for name, mat in mats.items(): if path not in ("RMSD.csv",): mat = 1 / mat Z = linkage(mat) plt.figure() dendrogram(Z, labels=mat.rlabels, orientation="right") type_name = path.split(".")[0] plt.title("%s %s" % (name, type_name)) plt.ylabel("PDB ID") plt.savefig("img/%s/%s.png" % (name, type_name))
Q7_path = os.path.join('acquired_data', 'mat_files', 'Smart') mat_name = 'Siren_0001' # Initializations audios = dict() sampling_rates = [] # Load data df_my_timestamps = load_csv(my_csv_path, csv_name) for audio_name in audio_names: audios[audio_name], sr = librosa.load(os.path.join(audio_path, audio_name + '.wav'), sr=None) sampling_rates.append(sr) mic_mat, siren_mat = load_mat(mic_path=smart_path, siren_path=Q7_path, mat_name=mat_name) # Operations for name, audio in audios.items(): print(name) print(audio.shape) print(mic_mat['Data1_GPS_Time_msec___Time_in_M'].shape) print(mic_mat['Data1_GPS_Time_Week___Start_0h_'].shape) print(mic_mat['Data1_INS_Time_msec___Time_in_M'].shape) print(mic_mat['Data1_INS_Time_Week___Weeks__St'].shape) print(siren_mat['Data1_GPS_Time_msec___Time_in_M'].shape) print(siren_mat['Data1_GPS_Time_Week___Start_0h_'].shape) print(siren_mat['Data1_INS_Time_msec___Time_in_M'].shape)