import os, timeit, h5py import MinHash as MH import numpy as np import logging fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r') file_names = fid.readlines() fid.close() file_names = [name.strip() for name in file_names] training_n = 50000 out_file_names = [ "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/N" + str(training_n) + "k31/" + os.path.basename(item) + ".CE.h5" for item in file_names ] CEs = MH.import_multiple_hdf5(out_file_names) A = MH.form_jaccard_count_matrix( CEs) #NOTE!!! I only need to form this for the indicies where Y[i] > 0 ################################ # Test the lsqnonneg stuff # NOTE: It's probably best to precompute the A matrices, then use plain MH.lsqnonneg() # Read in all the saved hashes import sys, os sys.path.append( '/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/') import MinHash as MH import numpy as np fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r') file_names = fid.readlines() fid.close() file_names = [name.strip() for name in file_names] training_n = 5000
import sys sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/') import os, timeit, h5py import MinHash as MH import numpy as np import logging fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r') file_names = fid.readlines() fid.close() file_names = [name.strip() for name in file_names] training_n = 50000 out_file_names = ["/nfs1/Koslicki_Lab/koslickd/MinHash/Out/N"+str(training_n)+"k31/" + os.path.basename(item) + ".CE.h5" for item in file_names] CEs = MH.import_multiple_hdf5(out_file_names) A = MH.form_jaccard_count_matrix(CEs) #NOTE!!! I only need to form this for the indicies where Y[i] > 0 ################################ # Test the lsqnonneg stuff # NOTE: It's probably best to precompute the A matrices, then use plain MH.lsqnonneg() # Read in all the saved hashes import sys, os sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/') import MinHash as MH import numpy as np fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r') file_names = fid.readlines() fid.close() file_names = [name.strip() for name in file_names] training_n = 5000