def prepare_data(path, arena, smooth, medfilt, only_laser, gts): LASER_THORAX_MAP = {True:THORAX,False:HEAD} #PROCESS SCORE FILES: pooldf = pd.DataFrame() for csvfile in sorted(glob.glob(path + "/*.csv")): #don't waste time smoothing files not in out genotype list _,_,_,_genotype,_laser,_ = flymad_analysis.extract_metadata_from_filename(csvfile) if _laser != only_laser: print "\tskipping laser", _laser, "!=", only_laser continue if _genotype not in gts: print "\tskipping genotype", _genotype, "!=", gts continue csvfilefn = os.path.basename(csvfile) cache_args = csvfilefn, arena, smoothstr cache_fname = csvfile+'.madplot-cache' results = madplot.load_bagfile_cache(cache_args, cache_fname) if results is None: results = flymad_analysis.load_and_smooth_csv(csvfile, arena, smooth) if results is not None: #update the cache madplot.save_bagfile_cache(results, cache_args, cache_fname) else: print "skipping", csvfile continue df,dt,experimentID,date,time,genotype,laser,repID = results duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn, duration continue print "\t%ss experiment" % duration #we use zx to rotate by pi df['zx'][df['zx'] > 0] = math.pi #ROTATE by pi if orientation is east df['orientation'] = df['theta'] + df['zx'] #ROTATE by pi if orientation is north/south (plusminus 0.25pi) and hemisphere does not match scoring: smask = df[df['as'] == 1] smask = smask[smask['orientation'] < 0.75*(math.pi)] smask = smask[smask['orientation'] > 0.25*(math.pi)] amask = df[df['as'] == 0] amask1 = amask[amask['orientation'] > -0.5*(math.pi)] amask1 = amask1[amask1['orientation'] < -0.25*(math.pi)] amask2 = amask[amask['orientation'] > 1.25*(math.pi)] amask2 = amask2[amask2['orientation'] < 1.5*(math.pi)] df['as'] = 0 df['as'][smask.index] = math.pi df['as'][amask1.index] = math.pi df['as'][amask2.index] = math.pi df['orientation'] = df['orientation'] - df['as'] df['orientation'] = df['orientation'].astype(float) df['orientation'][np.isfinite(df['orientation'])] = np.unwrap(df['orientation'][np.isfinite(df['orientation'])]) #MAXIMUM SPEED = 300: df['v'][df['v'] >= 300] = np.nan #CALCULATE FORWARD VELOCITY df['Vtheta'] = np.arctan2(df['vy'], df['vx']) df['Vfwd'] = (np.cos(df['orientation'] - df['Vtheta'])) * df['v'] df['Afwd'] = np.gradient(df['Vfwd'].values) / dt df['dorientation'] = np.gradient(df['orientation'].values) / dt try: df = flymad_analysis.align_t_by_laser_on( df, min_experiment_duration=EXPERIMENT_DURATION, align_first_only=False, t_range=(-1,6), min_num_ranges=5) except flymad_analysis.AlignError, err: print "\talign error %s (%s)" % (csvfilefn, err) continue #median filter if medfilt: df['Vfwd'] = scipy.signal.medfilt(df['Vfwd'].values, medfilt) df['obj_id'] = flymad_analysis.create_object_id(date,time) df['Genotype'] = genotype df['lasergroup'] = laser df['RepID'] = repID pooldf = pd.concat([pooldf, df])
def prepare_data(path, resample_bin, gts): LASER_THORAX_MAP = {True: THORAX, False: HEAD} #PROCESS SCORE FILES: pooldf = pd.DataFrame() for df, metadata in flymad_analysis.load_courtship_csv(path): csvfilefn, experimentID, date, time, genotype, laser, repID = metadata dlaser = np.gradient(df['laser_state'].values) num_on_periods = (dlaser == 0.5).sum() if num_on_periods != 12: print "\tskipping file %s (%d laser on periods)" % ( csvfilefn, num_on_periods / 2) continue if genotype not in gts: print "\tskipping genotype", genotype continue duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn continue print "\t%ss experiment" % duration #make new columns that indicates HEAD/THORAX targeting thorax = True laser_state = False trg = [] for i0, i1 in madplot.pairwise(df.iterrows()): t0idx, t0row = i0 t1idx, t1row = i1 if t1row['laser_state'] >= 0.5 and t0row['laser_state'] == 0: thorax ^= True laser_state = True elif t0row['laser_state'] >= 0.5 and t1row['laser_state'] == 0: laser_state = False trg.append(OFF if not laser_state else LASER_THORAX_MAP[thorax]) trg.append(OFF) df['ttm'] = trg #resample into 5S bins df = df.resample(resample_bin, fill_method='ffill') #trim dataframe df = df.head( flymad_analysis.get_num_rows(EXPERIMENT_DURATION, resample_bin)) tb = flymad_analysis.get_resampled_timebase(EXPERIMENT_DURATION, resample_bin) #fix cols due to resampling df['laser_state'][df['laser_state'] > 0] = 1 df['zx_binary'] = (df['zx'] > 0).values.astype(float) df['ttm'][df['ttm'] < 0] = HEAD df['ttm'][df['ttm'] > 0] = THORAX dlaser = np.gradient((df['laser_state'].values > 0).astype(int)) > 0 t0idx = np.argmax(dlaser) t0 = tb[t0idx - 1] df['t'] = tb - t0 #groupby on float times is slow. make a special align column df['t_align'] = np.array(range(0, len(df))) - t0idx df['obj_id'] = flymad_analysis.create_object_id(date, time) df['Genotype'] = genotype df['lasergroup'] = laser df['RepID'] = repID pooldf = pd.concat([pooldf, df]) data = {} for gt in gts: gtdf = pooldf[pooldf['Genotype'] == gt] lgs = gtdf['lasergroup'].unique() if len(lgs) != 1: raise Exception("only one lasergroup handled for gt %s: not %s" % (gt, lgs)) grouped = gtdf.groupby(['t'], as_index=False) data[gt] = dict(mean=grouped.mean().astype(float), std=grouped.std().astype(float), n=grouped.count().astype(float), first=grouped.first(), df=gtdf) return data
def prepare_data(path, arena, smooth, medfilt, only_laser, gts): LASER_THORAX_MAP = {True: THORAX, False: HEAD} #PROCESS SCORE FILES: pooldf = pd.DataFrame() for csvfile in sorted(glob.glob(path + "/*.csv")): #don't waste time smoothing files not in out genotype list _, _, _, _genotype, _laser, _ = flymad_analysis.extract_metadata_from_filename( csvfile) if _laser != only_laser: print "\tskipping laser", _laser, "!=", only_laser continue if _genotype not in gts: print "\tskipping genotype", _genotype, "!=", gts continue csvfilefn = os.path.basename(csvfile) cache_args = csvfilefn, arena, smoothstr cache_fname = csvfile + '.madplot-cache' results = madplot.load_bagfile_cache(cache_args, cache_fname) if results is None: results = flymad_analysis.load_and_smooth_csv( csvfile, arena, smooth) if results is not None: #update the cache madplot.save_bagfile_cache(results, cache_args, cache_fname) else: print "skipping", csvfile continue df, dt, experimentID, date, time, genotype, laser, repID = results duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn continue print "\t%ss experiment" % duration #we use zx to rotate by pi df['zx'][df['zx'] > 0] = math.pi #ROTATE by pi if orientation is east df['orientation'] = df['theta'] + df['zx'] #ROTATE by pi if orientation is north/south (plusminus 0.25pi) and hemisphere does not match scoring: smask = df[df['as'] == 1] smask = smask[smask['orientation'] < 0.75 * (math.pi)] smask = smask[smask['orientation'] > 0.25 * (math.pi)] amask = df[df['as'] == 0] amask1 = amask[amask['orientation'] > -0.5 * (math.pi)] amask1 = amask1[amask1['orientation'] < -0.25 * (math.pi)] amask2 = amask[amask['orientation'] > 1.25 * (math.pi)] amask2 = amask2[amask2['orientation'] < 1.5 * (math.pi)] df['as'] = 0 df['as'][smask.index] = math.pi df['as'][amask1.index] = math.pi df['as'][amask2.index] = math.pi df['orientation'] = df['orientation'] - df['as'] df['orientation'] = df['orientation'].astype(float) df['orientation'][np.isfinite(df['orientation'])] = np.unwrap( df['orientation'][np.isfinite(df['orientation'])]) #MAXIMUM SPEED = 300: df['v'][df['v'] >= 300] = np.nan #CALCULATE FORWARD VELOCITY df['Vtheta'] = np.arctan2(df['vy'], df['vx']) df['Vfwd'] = (np.cos(df['orientation'] - df['Vtheta'])) * df['v'] df['Afwd'] = np.gradient(df['Vfwd'].values) / dt df['dorientation'] = np.gradient(df['orientation'].values) / dt try: df = flymad_analysis.align_t_by_laser_on( df, min_experiment_duration=EXPERIMENT_DURATION, align_first_only=False, t_range=(-1, 9), min_num_ranges=5) except flymad_analysis.AlignError, err: print "\talign error %s (%s)" % (csvfilefn, err) continue #median filter if medfilt: df['Vfwd'] = scipy.signal.medfilt(df['Vfwd'].values, medfilt) df['obj_id'] = flymad_analysis.create_object_id(date, time) df['Genotype'] = genotype df['lasergroup'] = laser df['RepID'] = repID pooldf = pd.concat([pooldf, df])
def prepare_data(path, resample_bin, gts): LASER_THORAX_MAP = {True:THORAX,False:HEAD} #PROCESS SCORE FILES: pooldf = pd.DataFrame() for df,metadata in flymad_analysis.load_courtship_csv(path): csvfilefn,experimentID,date,time,genotype,laser,repID = metadata dlaser = np.gradient(df['laser_state'].values) num_on_periods = (dlaser == 0.5).sum() if num_on_periods != 12: print "\tskipping file %s (%d laser on periods)" % (csvfilefn, num_on_periods/2) continue if genotype not in gts: print "\tskipping genotype", genotype continue duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn continue print "\t%ss experiment" % duration #make new columns that indicates HEAD/THORAX targeting thorax = True laser_state = False trg = [] for i0,i1 in madplot.pairwise(df.iterrows()): t0idx,t0row = i0 t1idx,t1row = i1 if t1row['laser_state'] >= 0.5 and t0row['laser_state'] == 0: thorax ^= True laser_state = True elif t0row['laser_state'] >= 0.5 and t1row['laser_state'] == 0: laser_state = False trg.append(OFF if not laser_state else LASER_THORAX_MAP[thorax]) trg.append(OFF) df['ttm'] = trg #resample into 5S bins df = df.resample(resample_bin, fill_method='ffill') #trim dataframe df = df.head(flymad_analysis.get_num_rows(EXPERIMENT_DURATION, resample_bin)) tb = flymad_analysis.get_resampled_timebase(EXPERIMENT_DURATION, resample_bin) #fix cols due to resampling df['laser_state'][df['laser_state'] > 0] = 1 df['zx_binary'] = (df['zx'] > 0).values.astype(float) df['ttm'][df['ttm'] < 0] = HEAD df['ttm'][df['ttm'] > 0] = THORAX dlaser = np.gradient( (df['laser_state'].values > 0).astype(int) ) > 0 t0idx = np.argmax(dlaser) t0 = tb[t0idx-1] df['t'] = tb - t0 #groupby on float times is slow. make a special align column df['t_align'] = np.array(range(0,len(df))) - t0idx df['obj_id'] = flymad_analysis.create_object_id(date,time) df['Genotype'] = genotype df['lasergroup'] = laser df['RepID'] = repID pooldf = pd.concat([pooldf, df]) data = {} for gt in gts: gtdf = pooldf[pooldf['Genotype'] == gt] lgs = gtdf['lasergroup'].unique() if len(lgs) != 1: raise Exception("only one lasergroup handled for gt %s: not %s" % ( gt, lgs)) grouped = gtdf.groupby(['t'], as_index=False) data[gt] = dict(mean=grouped.mean().astype(float), std=grouped.std().astype(float), n=grouped.count().astype(float), first=grouped.first(), df=gtdf) return data
def prepare_data(path, arena, smoothstr, smooth, medfilt, gts): pooldf = DataFrame() for csvfile in sorted(glob.glob(path + "/*.csv")): cache_args = os.path.basename(csvfile), arena, smoothstr cache_fname = csvfile+'.madplot-cache' results = madplot.load_bagfile_cache(cache_args, cache_fname) if results is None: results = flymad_analysis.load_and_smooth_csv(csvfile, arena, smooth) if results is not None: #update the cache madplot.save_bagfile_cache(results, cache_args, cache_fname) else: print "skipping", csvfile continue df,dt,experimentID,date,time,genotype,laser,repID = results #we plot head v thorax v nolaser (so for the same of plotting, consider #these the genotypes genotype = genotype + '-' + laser if genotype not in gts: print "\tskipping genotype", genotype continue if 0: fig = plt.figure() fig.suptitle(os.path.basename(csvfile)) ax = fig.add_subplot(1,1,1) df['experiment'] = 1 df['tobj_id'] = 1 madplot.plot_tracked_trajectory(ax, df, arena, debug_plot=False, color='k', ) ax.add_patch(arena.get_patch(color='k', alpha=0.1)) duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn continue print "\t%ss experiment" % duration #MAXIMUM SPEED = 300: df['v'][df['v'] >= 300] = np.nan df['v'] = df['v'].fillna(method='ffill') try: df = flymad_analysis.align_t_by_laser_on( df, min_experiment_duration=EXPERIMENT_DURATION, align_first_only=True, exact_num_ranges=1) except flymad_analysis.AlignError, err: print "\talign error %s (%s)" % (csvfile, err) continue #median filter if medfilt: df['v'] = scipy.signal.medfilt(df['v'].values, medfilt) df['obj_id'] = flymad_analysis.create_object_id(date,time) df['Genotype'] = genotype df['lasergroup'] = laser pooldf = pd.concat([pooldf, df])
def prepare_data(path, arena, smoothstr, smooth, medfilt, gts): pooldf = DataFrame() for csvfile in sorted(glob.glob(path + "/*.csv")): cache_args = os.path.basename(csvfile), arena, smoothstr cache_fname = csvfile + '.madplot-cache' results = madplot.load_bagfile_cache(cache_args, cache_fname) if results is None: results = flymad_analysis.load_and_smooth_csv( csvfile, arena, smooth) if results is not None: #update the cache madplot.save_bagfile_cache(results, cache_args, cache_fname) else: print "skipping", csvfile continue df, dt, experimentID, date, time, genotype, laser, repID = results #we plot head v thorax v nolaser (so for the same of plotting, consider #these the genotypes genotype = genotype + '-' + laser if genotype not in gts: print "\tskipping genotype", genotype continue if 0: fig = plt.figure() fig.suptitle(os.path.basename(csvfile)) ax = fig.add_subplot(1, 1, 1) df['experiment'] = 1 df['tobj_id'] = 1 madplot.plot_tracked_trajectory( ax, df, arena, debug_plot=False, color='k', ) ax.add_patch(arena.get_patch(color='k', alpha=0.1)) duration = (df.index[-1] - df.index[0]).total_seconds() if duration < EXPERIMENT_DURATION: print "\tmissing data", csvfilefn continue print "\t%ss experiment" % duration #MAXIMUM SPEED = 300: df['v'][df['v'] >= 300] = np.nan df['v'] = df['v'].fillna(method='ffill') try: df = flymad_analysis.align_t_by_laser_on( df, min_experiment_duration=EXPERIMENT_DURATION, align_first_only=True, exact_num_ranges=1) except flymad_analysis.AlignError, err: print "\talign error %s (%s)" % (csvfile, err) continue #median filter if medfilt: df['v'] = scipy.signal.medfilt(df['v'].values, medfilt) df['obj_id'] = flymad_analysis.create_object_id(date, time) df['Genotype'] = genotype df['lasergroup'] = laser pooldf = pd.concat([pooldf, df])