import os, timeit, h5py
import MinHash as MH
import numpy as np
import logging
fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r')
file_names = fid.readlines()
fid.close()
file_names = [name.strip() for name in file_names]
training_n = 50000
out_file_names = [
    "/nfs1/Koslicki_Lab/koslickd/MinHash/Out/N" + str(training_n) + "k31/" +
    os.path.basename(item) + ".CE.h5" for item in file_names
]
CEs = MH.import_multiple_hdf5(out_file_names)

A = MH.form_jaccard_count_matrix(
    CEs)  #NOTE!!! I only need to form this for the indicies where Y[i] > 0

################################
# Test the lsqnonneg stuff
# NOTE: It's probably best to precompute the A matrices, then use plain MH.lsqnonneg()
# Read in all the saved hashes
import sys, os
sys.path.append(
    '/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/')
import MinHash as MH
import numpy as np
fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r')
file_names = fid.readlines()
fid.close()
file_names = [name.strip() for name in file_names]
training_n = 5000
import sys
sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/')
import os, timeit, h5py
import MinHash as MH
import numpy as np
import logging
fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r')
file_names = fid.readlines()
fid.close()
file_names = [name.strip() for name in file_names]
training_n = 50000
out_file_names = ["/nfs1/Koslicki_Lab/koslickd/MinHash/Out/N"+str(training_n)+"k31/" + os.path.basename(item) + ".CE.h5" for item in file_names]
CEs = MH.import_multiple_hdf5(out_file_names)

A = MH.form_jaccard_count_matrix(CEs)  #NOTE!!! I only need to form this for the indicies where Y[i] > 0


################################
# Test the lsqnonneg stuff
# NOTE: It's probably best to precompute the A matrices, then use plain MH.lsqnonneg()
# Read in all the saved hashes
import sys, os
sys.path.append('/nfs1/Koslicki_Lab/koslickd/Repositories/MinHashMetagenomics/src/')
import MinHash as MH
import numpy as np
fid = open('/nfs1/Koslicki_Lab/koslickd/MinHash/Data/FileNames.txt', 'r')
file_names = fid.readlines()
fid.close()
file_names = [name.strip() for name in file_names]
training_n = 5000