示例#1
0
def test_get_update() -> None:
    dfs: List[pd.DataFrame] = get_update(_date.today().nfl_year)
    try:
        not_empty: List[bool] = [len(df.index) > 0 for df in dfs.values()]
    except AttributeError:
        not_empty: List[bool] = [False]
    urls: List[str] = [key for key in _config.sections()]
    assert all(not_empty) and len(dfs) == len(urls) - 1
示例#2
0
def test_config() -> None:
    assert isinstance(_config, ConfigParser) and _config.sections() == [
        'offense',
        'kicking',
        'op_defense',
        'rb_defense',
        'te_defense',
        'qb_defense',
        'wr_defense',
        'coaches',
        'schedule',
        'test']
示例#3
0
文件: test_db.py 项目: deschman/nfetl
def test_update() -> None:
    year: int = _date.today().nfl_year - 1

    db.update(year)

    assert all([t in db.views for t in _config.sections() if t != section_name])
示例#4
0
文件: test_db.py 项目: deschman/nfetl
# %%% User-Defined
from nfetl import DB
from nfetl.core import _config
from nfetl._datetime import _date


# %% Variables
db: DB = DB(os.path.join(os.path.dirname(os.path.dirname(__file__)), 'NFL.db'),
            False)

source: pd.DataFrame = pd.read_hdf(
    _config['DEFAULT']['test_url_data'], 'test_url_data')
clean_source: pd.DataFrame = pd.read_hdf(
    _config['DEFAULT']['test_url_data'], 'test_clean_data')
section_name: str = [i for i in _config.sections() if 'test' in i][0]
source_table_name: str = section_name + '_2019'
extracted_data: Dict[str, pd.DataFrame] = {source_table_name: source}
archive: pd.DataFrame = source.copy()
archive.insert(len(archive.columns), 'DML_Type', 'I')
arc_data: Dict[str, pd.DataFrame] = {f'{source_table_name}': archive}


# %% Functions
# %%% Private
def _truncate_table(prefix: str,
                    dfs: Dict[str, pd.DataFrame] = extracted_data) -> None:
    for table in extracted_data.keys():
        if pd.read_sql(
                f"SELECT COUNT(1) FROM sqlite_master WHERE name = '{prefix}{table}'",
                db.connection).iat[0, 0] > 0:
示例#5
0
from dask import distributed

# %%% User Defined
from nfetl._datetime import _date
from nfetl.core import _config

# %% Variables
# %%% System
__all__ = ['get_url_data', 'get_update']

# %%% Private
_client: object = distributed.Client(processes=False)

_default_sets: List[Tuple[str, str, str]] = [
    (key, _config[key]['url'], _config[key]['extract_columns'])
    for key in _config.sections() if key != 'test'
]
_default_start_year: int = int(_config['DEFAULT']['start_year'])


# %% Functions
def get_url_data(url: str, headers: List[str] = []) -> pd.DataFrame:
    """
    Retrieve data table from URL.

    Parameters
    ----------
    url : str
        URL for HTML page where data table is found.
    headers : List[str], optional
        Headers for retrieved table. Default is source headers.