Пример #1
0
    def writeFile(self, key, val, file_type):
        if not self.enable :
            logger.debug('Cache is disable')
            return None

        if val is None or len(val)==0:
            logger.debug('Return value is None or empty')
            return val
        elif isinstance(val, tuple):
            val_tuple = val
        else:
            val_tuple = (val,)

        if all([ isinstance(item, (pd.DataFrame, pd.Series)) for item in val_tuple]) :
            path = self.get_path(key, file_type)
            if file_type == 'h5':
                for index, df in enumerate(val_tuple):
                    key = f'{self.df_key}_{index}'
                    logger.debug(f"====Write {len(df)} records to File#{path}, with:{key}")
                    df.to_hdf(path, key)
            elif file_type == 'pickle':
                pd.to_pickle(val, path)
            return val
        else:
            logger.warning(f'The return is not DataFrame or it is None:{[ isinstance(item, pd.DataFrame) for item in val_tuple]}')
            return val
Пример #2
0
 def wrapper(*args, **kwargs):
     val = fn(*args, **kwargs)
     with timed_bolck(f'Reduce_Mem({fn.__name__}:{ex_type_name(val)})'):
         if isinstance(val, (pd.DataFrame,)) :
             val = _reduce_mem_usage(val, verbose=True)
         if isinstance(val, tuple) and all([ isinstance(df, (pd.DataFrame, pd.Series )) for df in val]):
             val = tuple([  _reduce_mem_usage(df, verbose=True)  for df in val])
         else:
             logger.warning(f'The return type for fun#{fn.__name__} is:{type(val)}')
     return val
Пример #3
0
        def wrapper(*args, **kwargs):
            mini_args = get_mini_args(args)
            mini_kwargs = get_mini_args(kwargs)
            """Ignore the file cache, if the input parameter don't support cache"""
            if not is_support_cache(*args, **kwargs):
                logger.warning(f'Do not support cache for fn:{f.__name__}, para:{str(mini_args)}, kw:{str(kwargs)}')
                return f(*args, **kwargs)

            key = '='.join([f.__name__, str(mini_args), str(mini_kwargs)])
            key = key.replace('.', '_')
            key = key.replace('/', '_')
            while '__' in key:
                key = key.replace('__', '_')

            if prefix:
                key = '_'.join([prefix, key])
            if overwrite is False:
                val = cache.readFile(key, type)
            if val is None or overwrite is True:
                val = f(*args, **kwargs) #call the wrapped function, save in cache
                cache.writeFile(key, val, type)
            return val # read value from cache
Пример #4
0
import matplotlib as plt
import pandas as pd

from sklearn.preprocessing import LabelEncoder

from file_cache.utils.util_log import logger, timed
try:
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
    plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

    pd.set_option('display.height', 1000)
    pd.set_option('display.max_rows', 1000)
    pd.set_option('display.max_columns', 500)
    pd.set_option('display.width', 1000)
except Exception as e:
    logger.warning(e)

pd.options.mode.use_inf_as_na = True


@timed(logger)
def convert_label_encode(sample, exclude=[]):
    try:
        #Label encode
        obj_col = sample.select_dtypes(include=['object']).columns
        obj_col = [
            item for item in obj_col
            if item != 'device' and item not in exclude
        ]
        print(f'{obj_col} will convert to label encode, and fillna with Other')