Exemplo n.º 1
0
 def clean_final_df(self, df_in):
     """Declare NaN value and pad nan data for some."""
     df = index_by_time(df_in)
     df[df == -9999.0] = np.nan
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 2
0
def open_and_accumulate(fname):
    """One CAN NOT accumulate cleaned data files, because I rely on the numbering of calib-blocks
     to be unique! Each cleaning operation starts the numbering from 1 again!
     """
    centerfile = L1ADataFile(fname)
    dataframes = deque()
    dataframes.append(centerfile.open())
    # append previous hours until calib blocks found
    # start with center file:
    fn_handler = FileName(fname)
    while True:
        fn_handler.set_previous_hour()
        f = L1ADataFile(fn_handler.fname)
        print("Appending {0} on the left.".format(fn_handler.timestr))
        dataframes.appendleft(f.open_dirty())
        if any(f.open().is_calib):
            break
    # append next hours until calib blocks found
    # go back to center file name
    fn_handler = FileName(fname)
    while True:
        fn_handler.set_next_hour()
        f = L1ADataFile(fn_handler.fname)
        print("Appending {0} on the right.".format(fn_handler.timestr))
        dataframes.append(f.open_dirty())
        if any(f.open().is_calib):
            break
    df = prepare_data(pd.concat(list(dataframes)))
    define_sdtype(df)
    return df
Exemplo n.º 3
0
 def clean_final_df(self, df_in):
     """Declare NaN value and pad nan data for some."""
     df = index_by_time(df_in)
     df[df == -9999.0] = np.nan
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 4
0
def open_and_accumulate(fname):
    """One CAN NOT accumulate cleaned data files, because I rely on the numbering of calib-blocks
     to be unique! Each cleaning operation starts the numbering from 1 again!
     """
    centerfile = L1ADataFile(fname)
    dataframes = deque()
    dataframes.append(centerfile.open())
    # append previous hours until calib blocks found
    # start with center file:
    fn_handler = FileName(fname)
    while True:
        fn_handler.set_previous_hour()
        f = L1ADataFile(fn_handler.fname)
        print("Appending {0} on the left.".format(fn_handler.timestr))
        dataframes.appendleft(f.open_dirty())
        if any(f.open().is_calib):
            break
    # append next hours until calib blocks found
    # go back to center file name
    fn_handler = FileName(fname)
    while True:
        fn_handler.set_next_hour()
        f = L1ADataFile(fn_handler.fname)
        print("Appending {0} on the right.".format(fn_handler.timestr))
        dataframes.append(f.open_dirty())
        if any(f.open().is_calib):
            break
    df = prepare_data(pd.concat(list(dataframes)))
    define_sdtype(df)
    return df
Exemplo n.º 5
0
def folder_to_df(folder, top_end=None, verbose=False):
    rec_dtype, keys = get_div247_dtypes()
    fnames = glob.glob(folder + '/*.div247')
    fnames.sort()
    if not top_end:
        top_end = len(fnames)
    dfall = pd.DataFrame()
    olddf = None
    for i, fname in enumerate(fnames[:top_end]):
        if verbose:
            print(round(float(i) * 100 / top_end, 1), '%')
        df = fname_to_df(fname, rec_dtype, keys)
        df = prepare_data(df)
        define_sdtype(df)
        if olddf is not None:
            for s in df.filter(regex='_labels'):
                df[s] += olddf[s].max()
        olddf = df.copy()
        dfall = pd.concat([dfall, df])
    to_store = dfall[dfall.calib_block_labels > 0]
    return to_store
Exemplo n.º 6
0
def folder_to_df(folder, top_end=None, verbose=False):
    rec_dtype, keys = get_div247_dtypes()
    fnames = glob.glob(folder + '/*.div247')
    fnames.sort()
    if not top_end:
        top_end = len(fnames)
    dfall = pd.DataFrame()
    olddf = None
    for i, fname in enumerate(fnames[:top_end]):
        if verbose:
            print(round(float(i) * 100 / top_end, 1), '%')
        df = fname_to_df(fname, rec_dtype, keys)
        df = prepare_data(df)
        define_sdtype(df)
        if olddf is not None:
            for s in df.filter(regex='_labels'):
                df[s] += olddf[s].max()
        olddf = df.copy()
        dfall = pd.concat([dfall, df])
    to_store = dfall[dfall.calib_block_labels > 0]
    return to_store
Exemplo n.º 7
0
def folder_to_store(folder):
    rec_dtype, keys = get_div247_dtypes()
    fnames = glob.glob(folder + '/*.div247')
    if not fnames:
        print("Found no files.")
        return
    fnames.sort()
    # opening store in overwrite-mode
    storename = get_storename(folder)
    print(storename)
    store = pd.HDFStore(storename, mode='w')
    nfiles = len(fnames)
    olddf = None
    cols = [
        'calib_block_labels', 'sv_block_labels', 'bb_block_labels',
        'st_block_labels', 'is_spaceview', 'is_bbview', 'is_stview',
        'is_moving', 'is_stowed', 'is_calib'
    ]
    for i, fname in enumerate(fnames):
        print(round(float(i) * 100 / nfiles, 1), '%')
        df = fname_to_df(fname, rec_dtype, keys)
        df = prepare_data(df)
        define_sdtype(df)
        to_store = df[df.calib_block_labels > 0]
        if len(to_store) == 0:
            continue
        if olddf is not None:
            for s in to_store.filter(regex='_labels'):
                to_store[s] += olddf[s].max()
        olddf = to_store.copy()
        try:
            store.append('df', to_store, data_columns=cols)
        except Exception as e:
            store.close()
            print('at', fname)
            print('something went wrong at appending into store.')
            print(e)
            return
    print("Done.")
    store.close()
Exemplo n.º 8
0
def folder_to_store(folder):
    rec_dtype, keys = get_div247_dtypes()
    fnames = glob.glob(folder + '/*.div247')
    if not fnames:
        print("Found no files.")
        return
    fnames.sort()
    # opening store in overwrite-mode
    storename = get_storename(folder)
    print(storename)
    store = pd.HDFStore(storename, mode='w')
    nfiles = len(fnames)
    olddf = None
    cols = ['calib_block_labels', 'sv_block_labels', 'bb_block_labels',
            'st_block_labels', 'is_spaceview', 'is_bbview', 'is_stview',
            'is_moving', 'is_stowed', 'is_calib']
    for i, fname in enumerate(fnames):
        print(round(float(i) * 100 / nfiles, 1), '%')
        df = fname_to_df(fname, rec_dtype, keys)
        df = prepare_data(df)
        define_sdtype(df)
        to_store = df[df.calib_block_labels > 0]
        if len(to_store) == 0:
            continue
        if olddf is not None:
            for s in to_store.filter(regex='_labels'):
                to_store[s] += olddf[s].max()
        olddf = to_store.copy()
        try:
            store.append('df', to_store, data_columns=cols)
        except Exception as e:
            store.close()
            print('at', fname)
            print('something went wrong at appending into store.')
            print(e)
            return
    print("Done.")
    store.close()
Exemplo n.º 9
0
 def clean_final_df(self, df):
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 10
0
 def clean(self):
     df = prepare_data(self.df)
     define_sdtype(df)
     self.df = df
Exemplo n.º 11
0
 def clean_final_df(self, df):
     "need to wait until final df before defining sdtypes."
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 12
0
 def open_and_process(self):
     df = fname_to_df(self.fname, self.rec_dtype, self.keys)
     df = prepare_data(df)
     define_sdtype(df)
     self.df = df
Exemplo n.º 13
0
 def clean_final_df(self, df):
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 14
0
 def clean(self):
     df = prepare_data(self.df)
     define_sdtype(df)
     self.df = df
Exemplo n.º 15
0
 def clean_final_df(self, df):
     "need to wait until final df before defining sdtypes."
     df = prepare_data(df)
     define_sdtype(df)
     return df
Exemplo n.º 16
0
 def open_and_process(self):
     df = fname_to_df(self.fname, self.rec_dtype, self.keys)
     df = prepare_data(df)
     define_sdtype(df)
     self.df = df