def backProjection(db, index_list): """Perform OFFLINE back projection function for a list of indices using given DB. Return a list of high dimensional points (one per index). Assumes NO CACHE or DESHAW. """ logging.debug('-------- BACK PROJECTION: %d POINTS ---', len(index_list)) # Derefernce indices to file, frame tuple: pipe = db.pipeline() for idx in index_list: pipe.lindex('xid:reference', int(idx)) generated_framelist = pipe.execute() # Group all Generated indidces by file index groupbyFileIdx = {} for i, idx in enumerate(generated_framelist): try: file_index, frame = eval(idx) except TypeError as e: print('Bad Index:', str(idx)) continue if file_index not in groupbyFileIdx: groupbyFileIdx[file_index] = [] groupbyFileIdx[file_index].append(frame) # Dereference File index to filenames generated_frameMask = {} generated_filemap = {} for file_index in groupbyFileIdx.keys(): filename = db.lindex('xid:filelist', file_index) if filename is None: logging.warning('Error file not found in catalog: %s', filename) if not os.path.exists(filename): logging.warning('DCD File not found: %s', filename) else: key = os.path.splitext(os.path.basename(filename))[0] generated_frameMask[key] = groupbyFileIdx[file_index] generated_filemap[key] = filename # Add high-dim points to list of source points in a trajectory # Optimized for parallel file loading logging.debug('Sequentially Loading all trajectories') source_points = [] for key, framelist in generated_frameMask.items(): traj = datareduce.load_trajectory(generated_filemap[key]) traj = datareduce.filter_alpha(traj) selected_frames = traj.slice(framelist) source_points.extend(selected_frames.xyz) return np.array(source_points)
def backProjection(self, index_list): """Perform back projection function for a list of indices. Return a list of high dimensional points (one per index). Check cache for each point and condolidate file I/O for all cache misses. """ logging.debug('-------- BACK PROJECTION: %d POINTS ---', len(index_list)) bench = microbench('bkproj', self.seqNumFromID()) # reverse_index = {index_list[i]: i for i in range(len(index_list))} source_points = [] cache_miss = [] self.trajlist_async = deque() # DEShaw topology is assumed here bench.start() # Derefernce indices to file, frame tuple: historical_framelist = [] generated_framelist = [] if self.xidreference is None: self.xidreference = self.catalog.lrange('xid:reference', 0, -1) # pipe = self.catalog.pipeline() logging.debug('Select Index List size = %d', len(index_list)) for idx in index_list: # Negation indicates historical index: index = int(idx) if index < 0: file_index, frame = deshaw.refFromIndex(-idx) historical_framelist.append((file_index, frame)) # logging.debug('[BP] DEShaw: file #%d, frame#%d', file_index, frame) else: generated_framelist.append(self.xidreference[index]) # pipe.lindex('xid:reference', index) # Load higher dim point indices from catalog # logging.debug('Exectuting...') # start = dt.datetime.now() # generated_framelist = pipe.execute() # logging.debug('...Exectuted in %4.1f sec', ((dt.datetime.now()-start).total_seconds())) # start = dt.datetime.now() # all_idx = self.catalog.lrange('xid:reference', 0, -1) # logging.debug('Got ALL pts in %4.1f sec', ((dt.datetime.now()-start).total_seconds())) bench.mark('BP:LD:Redis:xidlist') ref = deshaw.topo_prot # Hard coded for now # Group all Historical indidces by file number and add to frame Mask logging.debug('Group By file idx (DEshaw)') historical_frameMask = {} for i, idx in enumerate(historical_framelist): file_index, frame = idx if file_index not in historical_frameMask: historical_frameMask[file_index] = [] historical_frameMask[file_index].append(frame) for k, v in historical_frameMask.items(): logging.debug('[BP] Deshaw lookups: %d, %s', k, str(v)) # Group all Generated indidces by file index logging.debug('Group By file idx (Gen data)') groupbyFileIdx = {} for i, idx in enumerate(generated_framelist): file_index, frame = eval(idx) if file_index not in groupbyFileIdx: groupbyFileIdx[file_index] = [] groupbyFileIdx[file_index].append(frame) # Dereference File index to filenames logging.debug('Deref fileidx -> file names') generated_frameMask = {} generated_filemap = {} for file_index in groupbyFileIdx.keys(): filename = self.catalog.lindex('xid:filelist', file_index) if filename is None: logging.error('Error file not found in catalog: %s', filename) else: key = os.path.splitext(os.path.basename(filename))[0] generated_frameMask[key] = groupbyFileIdx[file_index] generated_filemap[key] = filename bench.mark('BP:GroupBy:Files') # Ensure the cache is alive an connected logging.debug('Check Cache client') self.cacheclient.connect() # Check cache for historical data points logging.debug('Checking cache for %d DEShaw points to back-project', len(historical_frameMask.keys())) for fileno, frames in historical_frameMask.items(): # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss) if len(frames) == 1: datapt = self.cacheclient.get(fileno, frames[0], 'deshaw') dataptlist = [datapt] if datapt is not None else None else: dataptlist = self.cacheclient.get_many(fileno, frames, 'deshaw') if dataptlist is None: self.cache_miss += 1 # logging.debug('[BP] Cache MISS on: %d', fileno) cache_miss.append(('deshaw', fileno, frames)) else: self.cache_hit += 1 # logging.debug('[BP] Cache HIT on: %d', fileno) source_points.extend(dataptlist) # Check cache for generated data points logging.debug('Checking cache for %d Generated points to back-project', len(generated_frameMask.keys())) for filename, frames in generated_frameMask.items(): # handle 1 frame case (to allow follow on multi-frame, mix cache hit/miss) if len(frames) == 1: datapt = self.cacheclient.get(filename, frames[0], 'sim') dataptlist = [datapt] if datapt is not None else None else: dataptlist = self.cacheclient.get_many(filename, frames, 'sim') if dataptlist is None: self.cache_miss += 1 # logging.debug('[BP] Cache MISS on: %s', filename) cache_miss.append(('sim', generated_filemap[filename], frames)) else: self.cache_hit += 1 # logging.debug('[BP] Cache HIT on: %s', filename) source_points.extend(dataptlist) # Package all cached points into one trajectory logging.debug('Cache hits: %d points.', len(source_points)) if len(source_points) > 0: source_traj_cached = md.Trajectory(source_points, ref.top) else: source_traj_cached = None # All files were cached. Return back-projected points if len(cache_miss) == 0: return source_traj_cached # Add high-dim points to list of source points in a trajectory # Optimized for parallel file loading source_points_uncached = [] logging.debug('Sequentially Loading all trajectories') for miss in cache_miss: ftype, fileno, framelist = miss if ftype == 'deshaw': pdb, dcd = deshaw.getHistoricalTrajectory_prot(fileno) traj = md.load(dcd, top=pdb) elif ftype == 'sim': traj = datareduce.load_trajectory(fileno) selected_frames = traj.slice(framelist) source_points_uncached.extend(selected_frames.xyz) bench.mark('BP:LD:File') logging.debug('All Uncached Data collected Total # points = %d', len(source_points_uncached)) source_traj_uncached = md.Trajectory(np.array(source_points_uncached), ref.top) bench.mark('BP:Build:Traj') # bench.show() logging.info('-------- Back Projection Complete ---------------') if source_traj_cached is None: return source_traj_uncached else: return source_traj_cached.join(source_traj_uncached)
def backProjection(r, index_list): """Perform back projection function for a list of indices. Return a list of high dimensional points (one per index). Check cache for each point and condolidate file I/O for all cache misses. """ logging.debug('-------- BACK PROJECTION: %d POINTS ---', len(index_list)) # reverse_index = {index_list[i]: i for i in range(len(index_list))} source_points = [] pipe = r.pipeline() for idx in index_list: # Negation indicates historical index: index = int(idx) if index < 0: continue else: pipe.lindex('xid:reference', index) # Load higher dim point indices from catalog generated_framelist = [i for i in pipe.execute() if i is not None] ref = deshaw.topo_prot # Hard coded for now # Group all Generated indidces by file index groupbyFileIdx = {} for i, idx in enumerate(generated_framelist): file_index, frame = eval(idx) if file_index not in groupbyFileIdx: groupbyFileIdx[file_index] = [] groupbyFileIdx[file_index].append(frame) # Dereference File index to filenames generated_frameMask = {} generated_filemap = {} for file_index in groupbyFileIdx.keys(): filename = r.lindex('xid:filelist', file_index) if filename is None: logging.error('Error file not found in catalog: %s', filename) else: key = os.path.splitext(os.path.basename(filename))[0] generated_frameMask[key] = groupbyFileIdx[file_index] generated_filemap[key] = filename # Check cache for generated data points bplist = [] for filename, frames in generated_frameMask.items(): bplist.append(('sim', generated_filemap[filename], frames)) source_points = [] logging.debug('Sequentially Loading %d trajectories', len(bplist)) for ftype, fileno, framelist in bplist: traj = datareduce.load_trajectory(fileno) selected_frames = traj.slice(framelist) source_points.extend(selected_frames.xyz) logging.debug('All Uncached Data collected Total # points = %d', len(source_points)) source_traj = md.Trajectory(np.array(source_points), ref.top) logging.info('-------- Back Projection Complete ---------------') return source_traj