示例#1
0
def get_browser(browser, headless=True, download='', *args):
    """
    Get browser

    Args:
        browser: one of firefox and chrome
        headless: whether to show browser
        download: folder to download files to - relative to ~/Downloads
                  files in the folder WILL BE REMOVED when browser is returned
    """
    br, br_opt = BROWSERS[browser]
    if headless: br_opt.add_argument('--headless')
    br_opt.add_argument('--disable-gpu')
    br_opt.add_experimental_option('excludeSwitches', ['enable-logging'])
    if download:
        dl_path = get_rel_path(folder=f'Downloads/{download}')
        if files.exists(dl_path): shutil.rmtree(dl_path, ignore_errors=True)
        files.create_folder(dl_path)
        br_opt.add_experimental_option(
            'prefs', {
                'download.default_directory': dl_path,
                'download.prompt_for_download': False,
                'download.directory_upgrade': True,
            })
    for arg in args:
        br_opt.add_argument(arg)
    return br(options=br_opt)
示例#2
0
def save_data(data,
              file_fmt,
              append=False,
              drop_dups=None,
              info=None,
              **kwargs):
    """
    Save data to file

    Args:
        data: pd.DataFrame
        file_fmt: data file format in terms of f-strings
        append: if append data to existing data
        drop_dups: list, drop duplicates in columns
        info: dict, infomation to be hashed and passed to f-strings
        **kwargs: additional parameters for f-strings

    Examples:
        >>> data = pd.DataFrame([[1, 2], [3, 4]], columns=['a', 'b'])
        >>> # save_data(
        >>>     # data, '{ROOT}/daily/{typ}.parq',
        >>>     # ROOT='tests/data', typ='earnings'
        >>> # )
    """
    d_file = data_file(file_fmt=file_fmt, info=info, **kwargs)
    if append and files.exists(d_file):
        data = pd.DataFrame(
            pd.concat([pd.read_parquet(d_file), data], sort=False))
        if drop_dups is not None:
            data.drop_duplicates(subset=utils.tolist(drop_dups), inplace=True)

    if not data.empty: data.to_parquet(d_file)
    return data
示例#3
0
def load_file(data_file: str, load_func=None, **kwargs):
    """
    Load data from cache
    """
    logger = logs.get_logger(load_file, level=kwargs.get('log', 'info'))
    if (not data_file) or (not files.exists(data_file)): return

    if callable(load_func): return load_func(data_file)

    ext = data_file.split('.')[-1]
    if ext not in LOAD_FUNC: return

    logger.debug(f'Reading from {data_file} ...')
    return LOAD_FUNC[ext](data_file)
示例#4
0
    def from_json(cls, json_file):
        """
        Instantiate class from json file

        Args:
            json_file: json file path

        Returns:
            Class instance
        """
        if not files.exists(json_file):
            raise FileExistsError(
                f'{json_file} not exists to initiate {cls.__class__.__name__}')
        with open(json_file, 'r') as fp:
            return cls(**json.load(fp=fp))
示例#5
0
    def wrapper(*args, **kwargs):

        default.update(kwargs)
        kwargs.update(default)
        cur_mod = sys.modules[func.__module__]
        logger = logs.get_logger(
            name_or_func=f'{cur_mod.__name__}.{func.__name__}', types='stream')

        root_path = cur_mod.DATA_PATH
        date_type = kwargs.pop('date_type', 'date')
        save_static = kwargs.pop('save_static', True)
        save_dynamic = kwargs.pop('save_dynamic', True)
        symbol = kwargs.get('symbol')
        file_kw = dict(func=func,
                       symbol=symbol,
                       root=root_path,
                       date_type=date_type)
        d_file = cache_file(has_date=True, **file_kw)
        s_file = cache_file(has_date=False, **file_kw)

        cached = kwargs.pop('cached', False)
        if cached and save_static and files.exists(s_file):
            logger.info(f'Reading data from {s_file} ...')
            return pd.read_parquet(s_file)

        data = func(*args, **kwargs)

        if save_static:
            files.create_folder(s_file, is_file=True)
            save_data(data=data, file_fmt=s_file, append=False)
            logger.info(f'Saved data file to {s_file} ...')

        if save_dynamic:
            drop_dups = kwargs.pop('drop_dups', None)
            files.create_folder(d_file, is_file=True)
            save_data(data=data,
                      file_fmt=d_file,
                      append=True,
                      drop_dups=drop_dups)
            logger.info(f'Saved data file to {d_file} ...')

        return data
示例#6
0
        def wrapper(*args, **kwargs):

            # Check function parameters
            param = inspect.signature(func).parameters
            all_kw = {
                k: args[n] if n < len(args) else v.default
                for n, (k, v) in enumerate(param.items())
            }
            all_kw.update(utils.func_kwarg(func=func, **kwargs))
            kwargs.update(all_kw)

            # Data path and file name
            cur_dt = utils.cur_time(
                trading=False,
                tz=kwargs.get('_tz_', utils.DEFAULT_TZ),
            )
            if data_root:
                root_path = data_root
            else:
                root_path = getattr(sys.modules[func.__module__], 'DATA_PATH')
            if file_fmt:
                file_name = target_file_name(fmt=file_fmt, **all_kw)
            else:
                file_name = f'{func.__name__}/[date].pkl'

            if callable(file_func):
                name_pattern = ''
                data_file = f'{root_path}/{file_func(**kwargs)}'
            else:
                name_pattern = (f'{root_path}/{file_name}'.replace(
                    '\\', '/').replace('[today]', '[date]'))
                data_file = name_pattern.replace('[date]', cur_dt)

            # Reload data and override cache if necessary
            use_cache = not kwargs.get('_reload_', False)

            # Load data if exists
            if files.exists(data_file) and use_cache:
                return load_file(data_file=data_file,
                                 load_func=load_func,
                                 **kwargs)

            # Load data if it was updated within update frequency
            if update_freq and use_cache and ('[date]' in name_pattern):
                start_dt = pd.date_range(end=cur_dt,
                                         freq=update_freq,
                                         periods=2)[0]
                for dt in pd.date_range(start=start_dt,
                                        end=cur_dt,
                                        normalize=True)[1:][::-1]:
                    cur_file = name_pattern.replace('[date]',
                                                    dt.strftime('%Y-%m-%d'))
                    if files.exists(cur_file):
                        return load_file(data_file=cur_file,
                                         load_func=load_func,
                                         **kwargs)

            # Retrieve data
            data = func(**all_kw)

            # Save data to cache
            save_file(data=data,
                      data_file=data_file,
                      save_func=save_func,
                      **kwargs)

            return data