Exemplo n.º 1
0
def prepare_data(bags, arena, smoothstr, smooth, medfilt, gts,
                 min_experiment_duration):

    RESAMPLE_SPEC = '10L'

    found_gts = []

    pooldf = DataFrame()
    for bag in bags:
        df = madplot.load_bagfile_single_dataframe(bag,
                                                   arena,
                                                   ffill=True,
                                                   smooth=smooth)
        df = flymad_analysis.resample(df, resample_specifier=RESAMPLE_SPEC)

        metadata = filename_regexes.parse_filename(
            bag, extract_genotype_and_laser=True)
        dateobj = filename_regexes.parse_date(bag)
        genotype = metadata['genotype'] + '-' + metadata['laser']

        found_gts.append(genotype)

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < min_experiment_duration:
            print "\tmissing data", bag
            continue

        print "\t%ss experiment" % duration

        #MAXIMUM SPEED = 300:
        #df['v'][df['v'] >= 300] = np.nan
        #df['v'] = df['v'].fillna(method='ffill')

        try:
            df = flymad_analysis.align_t_by_laser_on(
                df,
                min_experiment_duration=min_experiment_duration,
                align_first_only=True,
                exact_num_ranges=1,
                resample_bin=RESAMPLE_SPEC)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (bag, err)
            pass

        #median filter
        if medfilt:
            df['v'] = scipy.signal.medfilt(df['v'].values, medfilt)

        df['obj_id'] = calendar.timegm(dateobj)
        df['Genotype'] = genotype

        pooldf = pd.concat([pooldf, df])
Exemplo n.º 2
0
def prepare_data(bags, arena, smoothstr, smooth, medfilt, gts, min_experiment_duration):

    RESAMPLE_SPEC = '10L'

    found_gts = []

    pooldf = DataFrame()
    for bag in bags:
        df = madplot.load_bagfile_single_dataframe(bag, arena,
                                                        ffill=True,
                                                        smooth=smooth)
        df = flymad_analysis.resample(df, resample_specifier=RESAMPLE_SPEC)

        metadata = filename_regexes.parse_filename(bag, extract_genotype_and_laser=True)
        dateobj = filename_regexes.parse_date(bag)
        genotype = metadata['genotype'] + '-' + metadata['laser']

        found_gts.append(genotype)

        if genotype not in gts:
            print "\tskipping genotype", genotype
            continue

        duration = (df.index[-1] - df.index[0]).total_seconds()
        if duration < min_experiment_duration:
            print "\tmissing data", bag
            continue

        print "\t%ss experiment" % duration

        #MAXIMUM SPEED = 300:
        #df['v'][df['v'] >= 300] = np.nan
        #df['v'] = df['v'].fillna(method='ffill')

        try:
            df = flymad_analysis.align_t_by_laser_on(
                    df, min_experiment_duration=min_experiment_duration,
                    align_first_only=True,
                    exact_num_ranges=1,
                    resample_bin=RESAMPLE_SPEC)
        except flymad_analysis.AlignError, err:
            print "\talign error %s (%s)" % (bag, err)
            pass

        #median filter
        if medfilt:
            df['v'] = scipy.signal.medfilt(df['v'].values, medfilt)

        df['obj_id'] = calendar.timegm(dateobj)
        df['Genotype'] = genotype

        pooldf = pd.concat([pooldf, df])
Exemplo n.º 3
0
                print 'will overwrite output file',out_fname
        real_input_mp4s.append( mp4 )

    inputmp4s = real_input_mp4s
    print 'will score %d mp4 movies'%(len(inputmp4s),)

    for mp4 in inputmp4s:
        fname = mp4
        base_fname = os.path.basename(fname)
        out_fname = os.path.join(args.outdir, base_fname+'.csv')

        if args.no_merge_bags:
            bname = None
        else:
            try:
                mp4time = filename_regexes.parse_date(mp4)
            except filename_regexes.RegexError, e:
                print "error: incorrectly named mp4 file?", mp4
                continue

            bname = None
            if inputbags:
                best_diff = np.inf
                for bag in inputbags:
                    try:
                        bagtime = filename_regexes.parse_date(bag)
                    except filename_regexes.RegexError, e:
                        print "error: incorrectly named bag file?", bag
                        continue
                    this_diff = abs(time.mktime(bagtime)-time.mktime(mp4time))
                    if this_diff < best_diff:
Exemplo n.º 4
0
                print 'will overwrite output file', out_fname
        real_input_mp4s.append(mp4)

    inputmp4s = real_input_mp4s
    print 'will score %d mp4 movies' % (len(inputmp4s), )

    for mp4 in inputmp4s:
        fname = mp4
        base_fname = os.path.basename(fname)
        out_fname = os.path.join(args.outdir, base_fname + '.csv')

        if args.no_merge_bags:
            bname = None
        else:
            try:
                mp4time = filename_regexes.parse_date(mp4)
            except filename_regexes.RegexError, e:
                print "error: incorrectly named mp4 file?", mp4
                continue

            bname = None
            if inputbags:
                best_diff = np.inf
                for bag in inputbags:
                    try:
                        bagtime = filename_regexes.parse_date(bag)
                    except filename_regexes.RegexError, e:
                        print "error: incorrectly named bag file?", bag
                        continue
                    this_diff = abs(
                        time.mktime(bagtime) - time.mktime(mp4time))