def read_img(input_dir): """ Read image metadata. Search for both phase-contrast and fluorescence files. args: input_dir (path): raw data experiment directory """ results = { k: { 'include': False, 'shape': (0, 0), 'dtype': None } for k in ('phase', 'fluor') } # Only look in the directory of the first position p = read.listdirs(input_dir, read.PATTERN['posndir']).next() # Try to read metadata about phase and fluor files (need one image each) for k in ('phase', 'fluor'): pattern = read.PATTERN['%stif' % k] for v in read.listfiles(os.path.join(input_dir, p), pattern): if os.path.splitext(v)[1] == '.tif': phase_img = imread(os.path.join(input_dir, p, v)) results[k]['include'] = True results[k]['shape'] = phase_img.shape results[k]['dtype'] = phase_img.dtype break return results
def read_img(input_dir): """ Read image metadata. Search for both phase-contrast and fluorescence files. args: input_dir (path): raw data experiment directory """ results = {k : {'include' : False, 'shape' : (0, 0), 'dtype' : None} for k in ('phase', 'fluor')} # Only look in the directory of the first position p = read.listdirs(input_dir, read.PATTERN['posndir']).next() # Try to read metadata about phase and fluor files (need one image each) for k in ('phase', 'fluor'): pattern = read.PATTERN['%stif' % k] for v in read.listfiles(os.path.join(input_dir, p), pattern): if os.path.splitext(v)[1] == '.tif': phase_img = imread(os.path.join(input_dir, p, v)) results[k]['include'] = True results[k]['shape'] = phase_img.shape results[k]['dtype'] = phase_img.dtype break return results
def load_posn(self): """ Load data for the current position. """ self.posn_dir = os.path.join(self.analyses_dir, self.posns[self.posn_idx]) b = os.path.join(self.posn_dir, 'blocks') # Read in values for the current position self.data_file = os.path.join(self.posn_dir, 'edits.pickle') self.data = read_pickle(self.data_file) # Read in values from the log file log_file = os.path.join(self.posn_dir, 'log.pickle') log_data = pickle.load(open(log_file, 'rb')) self.img_shape = log_data['image']['phase']['shape'] self.img_dtype = log_data['image']['phase']['dtype'] self.pumps = log_data['pumps'] self.TraceList = list(self.data['Trace']) self.SavedList = [ v for i, v in self.data['Trace'].iteritems() if self.data['Saved'][i] ] self.num_traces = len(self.TraceList) if self.num_traces < 1: return self.num_frames = len(self.data.ix[0]['Label']) self.frames = np.arange(self.num_frames) self.time_phase = log_data['phase'][:self.num_frames] self.time = self.time_phase / 60 if log_data.has_key('fluor'): max_time = np.max(self.time_phase) num_frames_fluor = np.argmin( np.abs(log_data['fluor'] - max_time)) + 1 self.time_fluor = log_data['fluor'][:num_frames_fluor] # Unzip phase-contrast image files and read in names of image files old_dir = os.curdir self.files = [''] * self.num_frames for v in read.listdirs(b, read.PATTERN['blockdir']): # Extract all .tif images in the input directory os.chdir(os.path.join(b, v)) zipfile.ZipFile('PhaseSegment.zip').extractall() for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']): i = read.getframenum(f, read.PATTERN['phasetif']) if i < self.num_frames: self.files[i] = os.path.join(b, v, 'PhaseSegment', f) os.chdir(old_dir)
def load_posn(self): """ Load data for the current position. """ self.posn_dir = os.path.join(self.analyses_dir, self.posns[self.posn_idx]) b = os.path.join(self.posn_dir, 'blocks') # Read in values for the current position self.data_file = os.path.join(self.posn_dir, 'edits.pickle') self.data = read_pickle(self.data_file) # Read in values from the log file log_file = os.path.join(self.posn_dir, 'log.pickle') log_data = pickle.load(open(log_file, 'rb')) self.img_shape = log_data['image']['phase']['shape'] self.img_dtype = log_data['image']['phase']['dtype'] self.pumps = log_data['pumps'] self.TraceList = list(self.data['Trace']) self.SavedList = [v for i, v in self.data['Trace'].iteritems() if self.data['Saved'][i]] self.num_traces = len(self.TraceList) if self.num_traces < 1: return self.num_frames = len(self.data.ix[0]['Label']) self.frames = np.arange(self.num_frames) self.time_phase = log_data['phase'][:self.num_frames] self.time = self.time_phase / 60 if log_data.has_key('fluor'): max_time = np.max(self.time_phase) num_frames_fluor = np.argmin(np.abs(log_data['fluor']-max_time)) + 1 self.time_fluor = log_data['fluor'][:num_frames_fluor] # Unzip phase-contrast image files and read in names of image files old_dir = os.curdir self.files = [''] * self.num_frames for v in read.listdirs(b, read.PATTERN['blockdir']): # Extract all .tif images in the input directory os.chdir(os.path.join(b, v)) zipfile.ZipFile('PhaseSegment.zip').extractall() for f in read.listfiles('PhaseSegment', read.PATTERN['phasetif']): i = read.getframenum(f, read.PATTERN['phasetif']) if i < self.num_frames: self.files[i] = os.path.join(b, v, 'PhaseSegment', f) os.chdir(old_dir)
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop-1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()
def read_pumps(input_dir, log_dict): """ Read in metadata about pump start/stop times, flow rates and units; also save the solution used in each pump into these results. args: input_dir (path): raw data experiment directory log_dict (dict): log file parameter-value pairs (from read_log), with updated fields corresponding to "Start Date" (datetime) and "Total Time" in seconds (float) """ # Read in pump information imported = [] for p in read.listfiles(input_dir, r'^pump[\d]+\.txt$'): df = read_table(os.path.join(input_dir, p), header=None, names=('DateTime', 'Rate'), index_col='DateTime') ts = [datetime.strptime(v, DATETIME_FORMAT) for v in df.index.values] df.index = [(v - log_dict['Start Date']).total_seconds() for v in ts] df['Units'] = '' for k, v in df['Rate'].iteritems(): m = re.match(r'([0-9\.]*)([A-z]*)', v) df['Rate'].ix[k] = float(m.group(1)) df['Units'].ix[k] = m.group(2) imported.append(df) # Find the solutions in the pumps soln_dict = {} for k, v in log_dict.iteritems(): m = re.match('Pump ([0-9]+) Solution', k) if m: soln_dict[int(m.group(1))] = v soln_keys = sorted(soln_dict.keys()) # Reformat according to on vs. off results = [] for i, df in enumerate(imported): d = {'Time' : [], 'Rate' : [], 'Units' : '', 'Solution' : []} d['Solution'] = soln_dict[soln_keys[i]] for j, t1 in enumerate(df.index): r = df['Rate'].ix[t1] u = df['Units'].ix[t1] if j+1 < len(df.index): t2 = df.index[j+1] else: t2 = log_dict['Total Time'] d['Rate'].append(r) if not d['Units']: # Convert pump unit codes to legible values (in TeX format) v, t = u[:2], u[2:] if v == 'UL': v = u'\u03bcL' elif v == 'ML': v == u'mL' if t == 'M': t = '/min' elif t == 'H': t = '/hr' d['Units'] = v + t d['Time'].append([t1, t2]) results.append(d) return results
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[ sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop - 1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()
def read_pumps(input_dir, log_dict): """ Read in metadata about pump start/stop times, flow rates and units; also save the solution used in each pump into these results. args: input_dir (path): raw data experiment directory log_dict (dict): log file parameter-value pairs (from read_log), with updated fields corresponding to "Start Date" (datetime) and "Total Time" in seconds (float) """ # Read in pump information imported = [] for p in read.listfiles(input_dir, r'^pump[\d]+\.txt$'): df = read_table(os.path.join(input_dir, p), header=None, names=('DateTime', 'Rate'), index_col='DateTime') ts = [datetime.strptime(v, DATETIME_FORMAT) for v in df.index.values] df.index = [(v - log_dict['Start Date']).total_seconds() for v in ts] df['Units'] = '' for k, v in df['Rate'].iteritems(): m = re.match(r'([0-9\.]*)([A-z]*)', v) df['Rate'].ix[k] = float(m.group(1)) df['Units'].ix[k] = m.group(2) imported.append(df) # Find the solutions in the pumps soln_dict = {} for k, v in log_dict.iteritems(): m = re.match('Pump ([0-9]+) Solution', k) if m: soln_dict[int(m.group(1))] = v soln_keys = sorted(soln_dict.keys()) # Reformat according to on vs. off results = [] for i, df in enumerate(imported): d = {'Time': [], 'Rate': [], 'Units': '', 'Solution': []} d['Solution'] = soln_dict[soln_keys[i]] for j, t1 in enumerate(df.index): r = df['Rate'].ix[t1] u = df['Units'].ix[t1] if j + 1 < len(df.index): t2 = df.index[j + 1] else: t2 = log_dict['Total Time'] d['Rate'].append(r) if not d['Units']: # Convert pump unit codes to legible values (in TeX format) v, t = u[:2], u[2:] if v == 'UL': v = u'\u03bcL' elif v == 'ML': v == u'mL' if t == 'M': t = '/min' elif t == 'H': t = '/hr' d['Units'] = v + t d['Time'].append([t1, t2]) results.append(d) return results