def main(input_dir, output_dir, positions=None, write_mode=0): """ Read metadata from all log files in the input data directory. args: input_dir (path): raw data experiment directory output_dir (path): path to save pickled results """ expt = dict.fromkeys(('log', 'image', 'pumps', 'stage')) # First read the main log.txt file expt['log'] = read_log(input_dir) # Transfer the log.txt file and rename it to experiment.log shutil.copy(os.path.join(input_dir, 'log.txt'), os.path.join(output_dir, 'experiment.log')) # Read in any image metadata available expt['image'] = read_img(input_dir) # Read in the positions expt['stage'] = read_posns(input_dir) expt['log']['Total Positions'] = len(expt['stage']) # Attempt to read the phase.txt and fluor.txt time files total_time = 0 for k in expt['image'].keys(): if expt['image'][k]['include']: expt[k] = read_time(input_dir, expt['log']['Start Date'], k) total_time = max(total_time, expt[k].max()) expt['log']['Total Frames %s' % k.capitalize()] = expt[k].shape[1] expt['log']['Total Time'] = total_time # Then read in the pump rates and on/off values expt['pumps'] = read_pumps(input_dir, expt['log']) # Iterate over all positions in the experiment if positions is None: positions = read.listdirs(input_dir, read.PATTERN['posndir']) for p in positions: i = int(re.match(read.PATTERN['posndir'], p).group(2)) # Create a log dictionary specific to each position posn = {} for k, v in expt.iteritems(): if k in ('log', 'image', 'pumps'): posn[k] = v elif k in ('phase', 'fluor', 'stage'): posn[k] = v[i] # Add information about the specific position posn['log']['Position Name'] = p posn['log']['Position Number'] = i # Save the results read.rmkdir(os.path.join(output_dir, p)) output_file = os.path.join(output_dir, p, 'log.pickle') if not os.path.isfile(output_file) or write_mode == 1: pickle.dump(posn, open(output_file, 'wb'))
def loadinputs(input_dir): """ Load analysis input parameters from .txt file. args: input_dir (path): experiment analysis directory containing .yaml file returns: dict: input parameters """ input_file = os.path.join(input_dir, 'params.yaml') try: # Load experiment-specific parameters from file inputs = yaml.load(open(input_file, 'r')) except IOError: # Load the default parameters and change the experiment name inputs = yaml.load( open(os.path.join(os.path.dirname(__file__), 'defaults.yaml'), 'r')) for k in inputs['name'].keys(): inputs['name'][k] = os.path.basename(input_dir) # Make experiment directory as necessary, and save parameters to file read.rmkdir(input_dir) yaml.dump(inputs, open(input_file, 'w'), explicit_start=True, default_flow_style=False) msg = ''' Loaded default parameters to %s. Exit now and edit this file to use non-default values. Starting automatic analysis in''' % input_file # countdown(msg) # -BK cls() # Expand all directories for k, v in inputs['paths'].iteritems(): root, pattern = os.path.split(os.path.abspath(os.path.expanduser(v))) try: sub_dir = read.listdirs(root, pattern).next() except StopIteration: sub_dir = pattern inputs['paths'][k] = os.path.join(root, sub_dir) return inputs
def loadinputs(input_dir): """ Load analysis input parameters from .txt file. args: input_dir (path): experiment analysis directory containing .yaml file returns: dict: input parameters """ input_file = os.path.join(input_dir, 'params.yaml') try: # Load experiment-specific parameters from file inputs = yaml.load(open(input_file, 'r')) except IOError: # Load the default parameters and change the experiment name inputs = yaml.load(open(os.path.join(os.path.dirname(__file__), 'defaults.yaml'), 'r')) for k in inputs['name'].keys(): inputs['name'][k] = os.path.basename(input_dir) # Make experiment directory as necessary, and save parameters to file read.rmkdir(input_dir) yaml.dump(inputs, open(input_file, 'w'), explicit_start=True, default_flow_style=False) msg = ''' Loaded default parameters to %s. Exit now and edit this file to use non-default values. Starting automatic analysis in''' % input_file # countdown(msg) # -BK cls() # Expand all directories for k, v in inputs['paths'].iteritems(): root, pattern = os.path.split(os.path.abspath(os.path.expanduser(v))) try: sub_dir = read.listdirs(root, pattern).next() except StopIteration: sub_dir = pattern inputs['paths'][k] = os.path.join(root, sub_dir) return inputs
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop-1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()
def plot(self, key): """ Plot the data and save to a PDF file. args: key (str): name of variable to export """ if key not in self.PLT_KEYS: raise ValueError("'%s' is not in PLT_KEYS" % key) has_var = self[0].has_key(key) if not has_var: try: self.loadvar(key) except IOError: pass # Set the correct units for the axes labels if key in ('Area', ): units = '$\mu$m$^{2}$' elif key in ('Generation', ): units = 'number' elif key in ('DivisionTime', ): units = 'min' else: units = '$\mu$m' # Create a formatted label with spaces between strings idxes = [i for i, v in enumerate(key) if v.isupper()] idxes.append(len(key)) label = ' '.join([key[i1:i2] for i1, i2 in zip(idxes[:-1], idxes[1:])]) # First plot the time series data for each position for i, p in enumerate(self.posns): time = self[i]['TimeP'] gens = self[i]['Gens'] if key in ('Generation', 'DivisionTime'): num_traces = len(gens) num_frames = len(time) frames = range(num_frames) * num_traces traces = [] for v in xrange(num_traces): traces.extend([v] * num_frames) index = MultiIndex.from_arrays([traces, frames], names=('Trace', 'Frame')) vals = np.ones(shape=(num_traces, num_frames)) * np.nan if key in ('Generation', ): for j, g in enumerate(gens): for k, v in enumerate(g): vals[j][v.start:v.stop] = k vals = np.hstack(vals) vals += 1 elif key in ('DivisionTime', ): for j, g in enumerate(gens): d = self[i]['Taus'][j] for k, v in enumerate(g): vals[j][v.start:v.stop] = d[k] vals = np.hstack(vals) data = DataFrame(vals, index=index, columns=(key, ))[key] data = data[np.isfinite(data)] if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join(self.figures_dir, p, key, '_'.join([self.expt_name, p, key])) data.label = label read.rmkdir(os.path.dirname(data.file)) self.plot1Dseries(time, data, gens, bygens=False, showlog=False, showoffset=False) elif key in ('Area', 'Length', 'Perimeter'): data = Series(self[i][key], name=key) if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join(self.figures_dir, p, key, '_'.join([self.expt_name, p, key])) data.label = label read.rmkdir(os.path.dirname(data.file)) self.plot1Dseries(time, data, gens) elif key in ('Centroid', 'StalkedPole', 'SwarmerPole'): for a in ('', 'X', 'Y'): if a is '': data = Series(self[i][key], name=key) else: data = Series([v[0] for v in self[i][key].values], index=self[i][key].index, name=key + a) if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join( self.figures_dir, p, key, '_'.join([self.expt_name, p, key + a])) data.label = label read.rmkdir(os.path.dirname(data.file)) if a is '': self.plot2Dseries(time, data, gens) else: data.label += ' ' + a self.plot1Dseries(time, data, gens, showlog=False, showoffset=False) if not has_var: self.delvar(key)
def export(self, key, upload=False): """ Export data to a CSV file. args: key (str): name of variable to export kwargs: upload (bool): upload to shared folder at time of exporting """ if key not in self.TXT_KEYS: raise ValueError("'%s' is not in TXT_KEYS" % key) has_var = self[0].has_key(key) if not has_var: try: self.loadvar(key) except IOError: pass # The file name is auto-generated, with 3 decimals and tab delimiters read.rmkdir(self.tables_dir) file_name = os.path.join(self.tables_dir, self.expt_name + '_' + key + '.txt') fmt = '%.3f' dlm = '\t' # Recast the DataFrame into a matrix, padded with NaN values nans = np.ones((self.num_traces, self.num_frames)) * np.nan if key in ('EdgeSpline', 'MidSpline'): # Load the StalkedPole and SwarmerPole values has_stalked = self[0].has_key('StalkedPole') if not has_stalked: self.loadvar('StalkedPole') has_swarmer = self[0].has_key('SwarmerPole') if not has_swarmer: self.loadvar('SwarmerPole') # Export data to a new Spline folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + key) read.cmkdir(top_dir) # Number of digits determined by the number of cells traces num_digits = int(np.ceil(np.log10(self.num_traces + 1))) # Loop over each position then cell separately k = 0 u = np.linspace(0., 1., 1e3) for i in range(self.num_posns): for t in range(len(self[i]['Trace'])): j = k + t + 1 data = [[] for _ in range(5)] for f, tck in self[i, t][key].iteritems(): if tck: # Start indexing the frames at 1 data[0].append(f + 1) # Find the indexes of the breaks xs = np.asarray(zip(*splev(u, tck))) p1 = self[i, t]['StalkedPole'].ix[f] p2 = self[i, t]['SwarmerPole'].ix[f] k1 = np.argmin([norm(v) for v in p1 - xs]) k2 = np.argmin([norm(v) for v in p2 - xs]) data[1].append((u[k1], u[k2])) # Save the spline values data[2].append(tck[0]) data[3].append(tck[1][0]) data[4].append(tck[1][1]) # Export data to a subfolder for each trace sub_dir = os.path.join(top_dir, 'trace' + str(j).zfill(num_digits)) read.rmkdir(sub_dir) # Save four files for each trace file_names = [] for v in ('Frames', 'Breaks', 'Knots', 'ControlX', 'ControlY'): n = os.path.join(sub_dir, self.expt_name + '_' + v + '.txt') file_names.append(n) # Save each value as an array with no empty values np.savetxt(file_names[0], data[0], fmt='%.0f', delimiter=dlm) for n, d in zip(file_names[1:], data[1:]): with open(n, 'w') as f: for v in d: f.write(dlm.join([fmt % x for x in v]) + '\n') k += (t + 1) if not has_stalked: self.delvar('StalkedPole') if not has_swarmer: self.delvar('SwarmerPole') elif key in ('Event', ): # Export pump metadata for i, v in enumerate(self.expt_data['Pumps']): n = str(i + 1).join(os.path.splitext(file_name)) with codecs.open(n, encoding='utf-8', mode='w') as f: f.write(v['Solution'] + '\n') f.write('%s\t%s\t%s\n' % (v['Units'], 'TimeOn', 'TimeOff')) for j in range(len(v['Rate'])): f.write( '%.3f\t%.3f\t%.3f\n' % (v['Rate'][j], v['Time'][j][0], v['Time'][j][1])) elif key in ('Label', ): data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v k += (t + 1) np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('Generation', ): # Save the generation counts (start indexing from 1) data = nans.copy() k = 0 for i in range(self.num_posns): for t, v in enumerate(self[i]['Gens']): for u, f in enumerate(v): j = k + t data[j, f] = u k += (t + 1) data += 1 np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('DivisionTime', ): taus = [] for i in range(self.num_posns): for v in self[i]['Taus']: taus.append(v) max_gens = max([len(v) for v in taus]) data = np.ones((self.num_traces, max_gens)) * np.nan for i, v in enumerate(taus): n = len(v) data[i][:n] = v np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Centroid', 'StalkedPole', 'SwarmerPole'): for a, n in enumerate(('X', 'Y')): axis_name = n.join(os.path.splitext(file_name)) data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v[a] k += (t + 1) np.savetxt(axis_name, data, fmt=fmt, delimiter=dlm) elif key in ('FourierFit', 'FourierCoef'): # Export data to a new folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + key) read.cmkdir(top_dir) if 'FourierFit' == key: # Old representation num_coefs = 10 coef_range = range(num_coefs) elif 'FourierCoef' == key: # New representation num_coefs = 20 coef_range = range(-num_coefs, num_coefs + 1) for c in coef_range: for n in ('Real', 'Imag'): file_name = os.path.join( top_dir, self.expt_name + '_' + key + str(c).zfill(2) + n + '.txt') data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v[c].real if n == 'Real' else v[c].imag k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('WidthsSmoothed', ): # Export data to a new Widths folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + 'Widths') read.cmkdir(top_dir) num_points = 500 for c in xrange(num_points): file_name = os.path.join( top_dir, self.expt_name + '_' + 'Widths' + str(c).zfill(3) + '.txt') # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t if np.any(v): data[j, f] = v[c] k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Time', ): # Recast the vector into a matrix data = nans.copy() k = 0 for i in range(self.num_posns): v = self[i]['TimeP'] for t, _ in enumerate(self[i]['Trace']): j = k + t data[j] = v k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Mother', ): # Save the identity of the mother (start indexing from 1) data = np.ones(self.num_traces) * np.nan k = 0 for i in range(self.num_posns): for t, v in enumerate(self[i]['Mother']): if v is not None and v in self[i]['Trace']: d = k + t m = k + self[i]['Trace'].index(v) data[d] = m k += (t + 1) data += 1 np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('WidthMean', ): # Load the Widths values has_widths = self[0].has_key('Widths') if not has_widths: self.loadvar('Widths') # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i]['Widths'].iteritems(): j = k + t data[j, f] = np.nanmean(v) k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) if not has_widths: self.delvar('Widths') else: # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) if upload: self.upload(key) if not has_var: self.delvar(key)
def preeditmovie(expt_raw_data_dir, expt_analyses_dir, positions, params): """ Automated steps to perform prior to editing. """ expt = os.path.basename(expt_analyses_dir) g = params['general'] # First load or create log files for each position log.main(expt_raw_data_dir, expt_analyses_dir, positions, g['write_mode']) # Execute each position in succession for p in positions: # Update the terminal display read.updatelog(expt, p, 'preedit') print 'start position ' + p + ': ' + time.asctime() posn_raw_data_dir = os.path.join(expt_raw_data_dir, p) posn_analyses_dir = os.path.join(expt_analyses_dir, p) # Segmented files will be saved to a temporary directory temp_dir = os.path.join(posn_analyses_dir, 'temp') if g['write_mode'] == 0: read.rmkdir(temp_dir) else: read.cmkdir(temp_dir) # Pad with default parameters, and find frames to process frame_start, frame_stop = float('inf'), 0. for mode in MODES: print '---mode', mode d = params[mode] # Pad with default parameters as necessary d = eval('%s.workflow.fillparams(d)' % mode) # Find all .tif images of specified type in the given directory d['segment']['file_list'] = [] for f in read.listfiles(posn_raw_data_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) if g['frame_range'][0] <= j < g['frame_range'][1]: frame_start = min(frame_start, j) frame_stop = max(frame_stop, j) d['segment']['file_list'].append(f) frame_stop += 1 # Create arguments for parallel processing args = [(posn_raw_data_dir, temp_dir, MODES, copy.deepcopy(params)) for _ in range(g['num_procs'])] file_list = sorted(args[0][3]['phase']['segment']['file_list']) # # debug: select only a few files -BK # print 'initial frame stop', frame_stop # frame_stop = 500 # file_list = file_list[:frame_stop] # # debug: select only a few files -BK inds = partition_indices(file_list, g['num_procs']) for (sta_ind, end_ind), arg in zip(inds, args): arg[3]['phase']['segment']['file_list'] = file_list[ sta_ind:end_ind] # Process each block of frames in parallel parallel.main(preeditblock, args, g['num_procs']) print 'extract: ' + time.asctime() # Archive the output files into .zip files, then delete each .tif num_tifs = frame_stop - frame_start num_digits = int(np.ceil(np.log10(num_tifs + 1))) # Create new set of directories with pre-specified block size frames = range(frame_start, frame_stop - 1, g['block_size']) frames.append(frame_stop) block_frames = zip(frames[:-1], frames[1:]) # Make directories to hold files, named according to frames read.cmkdir(os.path.join(posn_analyses_dir, 'blocks')) block_dirs = [] for j1, j2 in block_frames: strs = [str(v).zfill(num_digits) for v in (j1, j2)] v = os.path.join(posn_analyses_dir, 'blocks', 'frame{}-{}'.format(*strs)) os.mkdir(v) block_dirs.append(v) for m in MODES: # The segmented .tif files will be stored in a .zip file zip_name = m.capitalize() + 'Segment' [read.cmkdir(os.path.join(v, zip_name)) for v in block_dirs] # Find all segmented .tif images and transfer to the new directories d = params[m] for f in read.listfiles(temp_dir, d['segment']['pattern']): j = read.getframenum(f, d['segment']['pattern']) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: old_name = os.path.join(temp_dir, f) zip_dir = os.path.join(block_dirs[i], zip_name) shutil.move(old_name, zip_dir) # Zip each directory of segmented .tif files old_dir = os.path.abspath(os.curdir) for v in block_dirs: os.chdir(v) archive_util.make_zipfile(zip_name, zip_name) shutil.rmtree(zip_name) os.chdir(old_dir) # Make temporary directories for data outputs dat_name = m.capitalize() + 'Data' [read.cmkdir(os.path.join(v, dat_name)) for v in block_dirs] # Find all analyzed .pickle files and transfer to the new directories f, e = os.path.splitext(d['segment']['pattern']) dat_pattern = (f + '.pickle' + e[4:]) for f in read.listfiles(temp_dir, dat_pattern): j = read.getframenum(f, dat_pattern) for i, (j1, j2) in enumerate(block_frames): if j1 <= j < j2: # Transfer each frame to the correct block old_name = os.path.join(temp_dir, f) dat_dir = os.path.join(block_dirs[i], dat_name) shutil.move(old_name, dat_dir) # Concatenate each set of files into a DataFrame for each parameter for block_dir in block_dirs: dat_dir = os.path.join(block_dir, dat_name) data = [] for u in os.listdir(dat_dir): dat_file = os.path.join(dat_dir, u) try: d = read_pickle(dat_file) except: pass data.append(d) df = concat(data) df = df.reindex(sorted(df.index)) for c in df.columns: df[c].to_pickle(os.path.join(block_dir, c + '.pickle')) shutil.rmtree(dat_dir) print 'shuffle: ' + time.asctime() # Delete all temporary files shutil.rmtree(temp_dir) ''' block_dirs = [os.path.join(posn_analyses_dir, 'blocks', v) for v in os.listdir(os.path.join(posn_analyses_dir, 'blocks')) if 'frame' in v] ''' # Track the blocks in parallel args = [] for v in block_dirs: output_file = os.path.join(v, 'Trace.pickle') if os.path.isfile(output_file): os.remove(output_file) args.append((v, output_file, params['phase']['track'])) parallel.main(trackblock, args, g['num_procs']) print 'track: ' + time.asctime() # Stitch independently-tracked trajectories together stitchblocks(block_dirs, params['phase']['track']) print 'stitch: ' + time.asctime() # Collate the data for manual editing output_file = os.path.join(posn_analyses_dir, 'edits.pickle') collateblocks(block_dirs, output_file, params['phase']['collate']) print 'collate: ' + time.asctime() # Update the experiment log file read.updatelog(expt, p, 'preedit', expt_analyses_dir) print 'final: ' + time.asctime()
def plot(self, key): """ Plot the data and save to a PDF file. args: key (str): name of variable to export """ if key not in self.PLT_KEYS: raise ValueError("'%s' is not in PLT_KEYS" % key) has_var = self[0].has_key(key) if not has_var: try: self.loadvar(key) except IOError: pass # Set the correct units for the axes labels if key in ('Area', ): units = '$\mu$m$^{2}$' elif key in ('Generation', ): units = 'number' elif key in ('DivisionTime', ): units = 'min' else: units = '$\mu$m' # Create a formatted label with spaces between strings idxes = [i for i, v in enumerate(key) if v.isupper()] idxes.append(len(key)) label = ' '.join([key[i1:i2] for i1, i2 in zip(idxes[:-1], idxes[1:])]) # First plot the time series data for each position for i, p in enumerate(self.posns): time = self[i]['TimeP'] gens = self[i]['Gens'] if key in ('Generation', 'DivisionTime'): num_traces = len(gens) num_frames = len(time) frames = range(num_frames) * num_traces traces = [] for v in xrange(num_traces): traces.extend([v] * num_frames) index = MultiIndex.from_arrays([traces, frames], names=('Trace', 'Frame')) vals = np.ones(shape=(num_traces, num_frames)) * np.nan if key in ('Generation', ): for j, g in enumerate(gens): for k, v in enumerate(g): vals[j][v.start:v.stop] = k vals = np.hstack(vals) vals += 1 elif key in ('DivisionTime', ): for j, g in enumerate(gens): d = self[i]['Taus'][j] for k, v in enumerate(g): vals[j][v.start:v.stop] = d[k] vals = np.hstack(vals) data = DataFrame(vals, index=index, columns=(key, ))[key] data = data[np.isfinite(data)] if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join(self.figures_dir, p, key, '_'.join([self.expt_name, p, key])) data.label = label read.rmkdir(os.path.dirname(data.file)) self.plot1Dseries(time, data, gens, bygens=False, showlog=False, showoffset=False) elif key in ('Area', 'Length', 'Perimeter'): data = Series(self[i][key], name=key) if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join(self.figures_dir, p, key, '_'.join([self.expt_name, p, key])) data.label = label read.rmkdir(os.path.dirname(data.file)) self.plot1Dseries(time, data, gens) elif key in ('Centroid', 'StalkedPole', 'SwarmerPole'): for a in ('', 'X', 'Y'): if a is '': data = Series(self[i][key], name=key) else: data = Series([v[0] for v in self[i][key].values], index=self[i][key].index, name=key+a) if len(data) == 0: continue data.units = units data.posn = p data.file = os.path.join(self.figures_dir, p, key, '_'.join([self.expt_name, p, key + a])) data.label = label read.rmkdir(os.path.dirname(data.file)) if a is '': self.plot2Dseries(time, data, gens) else: data.label += ' ' + a self.plot1Dseries(time, data, gens, showlog=False, showoffset=False) if not has_var: self.delvar(key)
def export(self, key, upload=False): """ Export data to a CSV file. args: key (str): name of variable to export kwargs: upload (bool): upload to shared folder at time of exporting """ if key not in self.TXT_KEYS: raise ValueError("'%s' is not in TXT_KEYS" % key) has_var = self[0].has_key(key) if not has_var: try: self.loadvar(key) except IOError: pass # The file name is auto-generated, with 3 decimals and tab delimiters read.rmkdir(self.tables_dir) file_name = os.path.join(self.tables_dir, self.expt_name + '_' + key + '.txt') fmt = '%.3f' dlm = '\t' # Recast the DataFrame into a matrix, padded with NaN values nans = np.ones((self.num_traces, self.num_frames)) * np.nan if key in ('EdgeSpline', 'MidSpline'): # Load the StalkedPole and SwarmerPole values has_stalked = self[0].has_key('StalkedPole') if not has_stalked: self.loadvar('StalkedPole') has_swarmer = self[0].has_key('SwarmerPole') if not has_swarmer: self.loadvar('SwarmerPole') # Export data to a new Spline folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + key) read.cmkdir(top_dir) # Number of digits determined by the number of cells traces num_digits = int(np.ceil(np.log10(self.num_traces + 1))) # Loop over each position then cell separately k = 0 u = np.linspace(0., 1., 1e3) for i in range(self.num_posns): for t in range(len(self[i]['Trace'])): j = k + t + 1 data = [[] for _ in range(5)] for f, tck in self[i, t][key].iteritems(): if tck: # Start indexing the frames at 1 data[0].append(f + 1) # Find the indexes of the breaks xs = np.asarray(zip(*splev(u, tck))) p1 = self[i, t]['StalkedPole'].ix[f] p2 = self[i, t]['SwarmerPole'].ix[f] k1 = np.argmin([norm(v) for v in p1 - xs]) k2 = np.argmin([norm(v) for v in p2 - xs]) data[1].append((u[k1], u[k2])) # Save the spline values data[2].append(tck[0]) data[3].append(tck[1][0]) data[4].append(tck[1][1]) # Export data to a subfolder for each trace sub_dir = os.path.join(top_dir, 'trace' + str(j).zfill(num_digits)) read.rmkdir(sub_dir) # Save four files for each trace file_names = [] for v in ('Frames', 'Breaks', 'Knots', 'ControlX', 'ControlY'): n = os.path.join(sub_dir, self.expt_name + '_' + v + '.txt') file_names.append(n) # Save each value as an array with no empty values np.savetxt(file_names[0], data[0], fmt='%.0f', delimiter=dlm) for n, d in zip(file_names[1:], data[1:]): with open(n, 'w') as f: for v in d: f.write(dlm.join([fmt % x for x in v]) + '\n') k += (t + 1) if not has_stalked: self.delvar('StalkedPole') if not has_swarmer: self.delvar('SwarmerPole') elif key in ('Event', ): # Export pump metadata for i, v in enumerate(self.expt_data['Pumps']): n = str(i+1).join(os.path.splitext(file_name)) with codecs.open(n, encoding='utf-8', mode='w') as f: f.write(v['Solution'] + '\n') f.write('%s\t%s\t%s\n' % (v['Units'], 'TimeOn', 'TimeOff')) for j in range(len(v['Rate'])): f.write('%.3f\t%.3f\t%.3f\n' % (v['Rate'][j], v['Time'][j][0], v['Time'][j][1])) elif key in ('Label', ): data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v k += (t + 1) np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('Generation', ): # Save the generation counts (start indexing from 1) data = nans.copy() k = 0 for i in range(self.num_posns): for t, v in enumerate(self[i]['Gens']): for u, f in enumerate(v): j = k + t data[j, f] = u k += (t + 1) data += 1 np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('DivisionTime', ): taus = [] for i in range(self.num_posns): for v in self[i]['Taus']: taus.append(v) max_gens = max([len(v) for v in taus]) data = np.ones((self.num_traces, max_gens)) * np.nan for i, v in enumerate(taus): n = len(v) data[i][:n] = v np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Centroid', 'StalkedPole', 'SwarmerPole'): for a, n in enumerate(('X', 'Y')): axis_name = n.join(os.path.splitext(file_name)) data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v[a] k += (t + 1) np.savetxt(axis_name, data, fmt=fmt, delimiter=dlm) elif key in ('FourierFit', 'FourierCoef'): # Export data to a new folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + key) read.cmkdir(top_dir) if 'FourierFit' == key: # Old representation num_coefs = 10 coef_range = range(num_coefs) elif 'FourierCoef' == key: # New representation num_coefs = 20 coef_range = range(-num_coefs, num_coefs+1) for c in coef_range: for n in ('Real', 'Imag'): file_name = os.path.join(top_dir, self.expt_name + '_' + key + str(c).zfill(2) + n + '.txt') data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v[c].real if n == 'Real' else v[c].imag k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('WidthsSmoothed', ): # Export data to a new Widths folder top_dir = os.path.join(self.tables_dir, self.expt_name + '_' + 'Widths') read.cmkdir(top_dir) num_points = 500 for c in xrange(num_points): file_name = os.path.join(top_dir, self.expt_name + '_' + 'Widths' + str(c).zfill(3) + '.txt') # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t if np.any(v): data[j, f] = v[c] k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Time', ): # Recast the vector into a matrix data = nans.copy() k = 0 for i in range(self.num_posns): v = self[i]['TimeP'] for t, _ in enumerate(self[i]['Trace']): j = k + t data[j] = v k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) elif key in ('Mother', ): # Save the identity of the mother (start indexing from 1) data = np.ones(self.num_traces) * np.nan k = 0 for i in range(self.num_posns): for t, v in enumerate(self[i]['Mother']): if v is not None and v in self[i]['Trace']: d = k + t m = k + self[i]['Trace'].index(v) data[d] = m k += (t + 1) data += 1 np.savetxt(file_name, data, fmt='%.0f', delimiter=dlm) elif key in ('WidthMean', ): # Load the Widths values has_widths = self[0].has_key('Widths') if not has_widths: self.loadvar('Widths') # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i]['Widths'].iteritems(): j = k + t data[j, f] = np.nanmean(v) k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) if not has_widths: self.delvar('Widths') else: # Recast the DataFrame into a matrix, padded with NaN values data = nans.copy() k = 0 for i in range(self.num_posns): for (t, f), v in self[i][key].iteritems(): j = k + t data[j, f] = v k += (t + 1) np.savetxt(file_name, data, fmt=fmt, delimiter=dlm) if upload: self.upload(key) if not has_var: self.delvar(key)