def matrix_from_file_paths(path_list,s): M = [] for p in path_list: H = array(open_count_hash(p,s),dtype=float32) if len(M): M = concatenate((M,[H])) else: M = [H] return M
def condition_and_map_clusters(eigenvectors,nonzeros,global_weights,file_paths,completed_count_files,s,out_prefix='/mnt/'): M = [] for fp in file_paths: cluster_id = fp[fp[:fp.index('_velvet')].rfind('/')+1:fp.index('_velvet')] outfile = out_prefix+cluster_id+'.txt' if outfile in completed_count_files: H = open_count_hash(outfile,s) else: f = open(fp,'r') #H = create_kmer_hash_counts(f,s,block_size=15000,out_path=outfile,temp_file_size=5*10**5) H = create_kmer_hash_counts_fasta(f,s,block_size=1,out_path=outfile) H = array(H,dtype=float32)[nonzeros] # THIS IS ASKING FOR BAD MAMAJU - NOT USING THE ORIGINAL FUNCTION TO CONDITION... H = log(H + 1)*global_weights H = dot(H,eigenvectors) if len(M): M = concatenate((M,[H])) else: M = [H] print cluster_id return M