def read_sample_patterns(dir_path, NX, step): if MPI_RANK == 0: t0 = timer() filenames = [filename for filename in os.listdir(dir_path)] logging.debug('Found %d files in the directory %s.' % (len(filenames), dir_path)) sample_files = [] for filename in filenames: if filename in blacklist: continue X, Y = idx2XY(int(filename[-9:-4]), NX) if X % step[0] == 0 and Y % step[1] == 0: sample_files.append(os.path.join(dir_path, filename)) logging.debug('Selected %d sample files according to step size %s.' % (len(sample_files), step)) file_groups = split_workload(sample_files, MPI_COMM.size) else: file_groups = None filenames = MPI_COMM.scatter(file_groups, root=0) logging.debug('Assigned %d DAT files to read.' % len(filenames)) t0_loc = timer() patterns = read_file(filenames) logging.debug('Got %d [local] sample patterns. %g sec' % (len(patterns), timer() - t0_loc)) patterns = MPI_COMM.gather(patterns, root=0) if MPI_RANK == 0: patterns = [t for g in patterns for t in g] logging.info('Gathered %d sample patterns in total. %g sec' % (len(patterns), timer() - t0)) return patterns
else: model = None model = MPI_COMM.bcast(model, root=0) scoreinds = score_dir(extractor, model, dir_path, limit=None, batch_size=200) if MPI_RANK == 0: labeler = SeqLabeler(seq_files) else: labeler = None labeler = MPI_COMM.bcast(labeler, root=0) scoreinds = relabel(labeler, scoreinds) if MPI_RANK == 0: Z = np.empty([NY, NX]) Z[:] = np.nan for score, idx in scoreinds: if score is not None: ix, iy = idx2XY(idx, NX) if ix < NY: Z[ix, iy] = score logging.debug('Z matrix has %d nans' % sum(1 for row in Z for z in row if np.isnan(z))) np.savetxt('Z_au31.txt', Z) logging.info('Write Z matrix into Z_au31.txt in ' + os.path.dirname(os.path.abspath(__file__))) from plotseq import plot_seq # # Z = np.loadtxt('Z.txt') plot_seq(Z, step, colormap='jet', filename=scratch + "img/clustering_" + case_name)