Exemplo n.º 1
0
def list_branch(name):
    '''
    列举以给定数据为节点的数据分支

    Parameter
    ---------
    name: string
        数据节点的名称

    Return
    ------
    out: list
        包含该节点在内所有数据的名称列表,按照名称排序
    '''
    tree = DependencyTree(load_all())
    return sorted(tree.get_branch(name))
Exemplo n.º 2
0
def delete_computing_file(name, delete_branch=True):
    '''
    删除给定数据的计算文件

    Parameter
    ---------
    name: string
        数据名称
    delete_branch: boolean, default True
        删除所有对该数据具有依赖的数据

    Return
    ------
    result: boolean
        删除成功返回True
    '''
    all_data = get_db_dictionary()
    if name not in all_data:
        logger.warning(
            '[Operation=delete_computing_file, Info=\"Try to delete a data({}) that does not exist!\"]'
            .format(name))
        return False
    data_relpath = all_data[name]['rel_path']
    data_abspath = cf_relpath2abspath(data_relpath)

    if delete_branch:  # 删除所有对当前节点具有依赖的节点
        dd = load_all()
        dd_tree = DependencyTree(dd)
        for child in dd_tree.get_branch(name):
            if child != name:
                delete_computing_file(child, False)
    try:
        remove(data_abspath)
        delete_data(data_relpath, all_data[name]['datatype'])
        logger.info(
            '[Operation=delete_computing_file, Info=\"Delete data(path={}) successfully.\"]'
            .format(data_relpath))
        delete_empty_folder(dirname(data_abspath))
        metadata = load_metadata(METADATA_FILENAME)
        del metadata[name]
        dump_metadata(metadata, METADATA_FILENAME)
    except Exception as e:
        logger.exception(e)
        return False
    return True
Exemplo n.º 3
0
def delete_db_data(name, delete_branch=True):
    '''
    仅删除数据库中给定名称的数据

    Parameter
    ---------
    name: string
        需要被删除的数据的名称
    delete_branch: boolean, default True
        删除所有对该数据具有依赖关系的数据

    Return
    ------
    result: boolean
        删除成功返回True
    '''
    all_data = get_db_dictionary()
    if name not in all_data:
        logger.warning(
            '[Operation=delete_db_data, Info=\"Try to delete a data({}) that does not exist!\"]'
            .format(name))
        return False
    data_path = all_data[name]['rel_path']
    if delete_branch:
        dd = load_all()
        dd_tree = DependencyTree(dd)
        for child in dd_tree.get_branch(name):
            if child != name:
                delete_db_data(child, False)
    try:
        delete_data(data_path, all_data[name]['datatype'])
        logger.info(
            '[Operation=delete_db_data, Info=\"Delete data(path={}) successfully.\"]'
            .format(data_path))
        metadata = load_metadata(METADATA_FILENAME)
        del metadata[name]
        dump_metadata(metadata, METADATA_FILENAME)
    except Exception as e:
        logger.exception(e)
        return False
    return True
Exemplo n.º 4
0
def update_all(show_progress=True):
    '''
    更新所有数据,直至所有数据更新完成或者达到最大循环次数

    Parameter
    ---------
    show_progress: boolean, default True
        是否显示更新进程

    Return
    ------
    result: boolean
    '''
    data_dict = load_all()
    ut_meta = load_metadata(METADATA_FILENAME)
    update_order = [
        d.name for d in DependencyTree(data_dict).generate_dependency_order()
    ]
    end_time = get_endtime()
    default_start_time = trans_date(CONFIG['data_start_date'])
    update_result = True
    stdout_handler = None  # stdout处理函数占位
    try:
        if show_progress:  # 添加终端打印日志处理函数
            stdout_handler = logging.StreamHandler(stdout)
            formater = logging.Formatter("%(asctime)s: %(message)s",
                                         "%Y-%m-%d %H:%M:%S")
            stdout_handler.setFormatter(formater)
            updating_logger.addHandler(stdout_handler)

        for data_name in update_order:
            d_msg = data_dict[data_name]
            if is_test_data(d_msg):  # 不更新处于测试中的数据
                updating_logger.info(
                    '[data_name={dn}, description=\"Testing data will not be updated, ignored\"'
                    .format(dn=data_name))
                continue
            if not is_dependency_updated(d_msg, ut_meta,
                                         end_time):  # 依赖项还未更新,则直接忽视(本次不进行更新)
                updating_logger.info(
                    '[data_name={dn}, description=\"Dependency has not been updated, ignored\"]'
                    .format(dn=data_name))
                continue
            start_time = ut_meta.get(data_name, default_start_time)
            if start_time >= end_time:
                updating_logger.info(
                    '[data_name={dn}, description=\"Data has been updated, ignored\"]'
                    .format(dn=data_name))
                continue
            result = update_single_data(d_msg, start_time, end_time, ut_meta)
            updating_logger.info(
                '[data_name={dn}, start_time={st:%Y-%m-%d}, end_time={et:%Y-%m-%d}, result={res}]'
                .format(dn=data_name, st=start_time, et=end_time, res=result))
            if result:  # 更新成功之后,写入元数据
                dump_metadata(ut_meta, METADATA_FILENAME)
            else:
                update_result = False
    finally:
        if stdout_handler is not None:
            updating_logger.removeHandler(stdout_handler)
    return update_result
Exemplo n.º 5
0
#!/usr/bin/env python
# -*- coding:utf-8
"""
Author:  Hao Li
Email: [email protected]
Github: https://github.com/SAmmer0
Created: 2018/4/24
"""
from tdtools import trans_date
from pitdata.updater.operator import load_metadata, update_single_data, dump_metadata
from pitdata.updater.operator import METADATA_FILENAME
from pitdata.updater.loader import load_all

data_dict = load_all()
universe_dd = data_dict['universe']
meta_data = load_metadata(METADATA_FILENAME)

update_single_data(universe_dd, trans_date('2018-01-01'), trans_date('2018-04-01'), meta_data)
dump_metadata(meta_data, METADATA_FILENAME)
Exemplo n.º 6
0
#!/usr/bin/env python
# -*- coding:utf-8
"""
Author:  Hao Li
Email: [email protected]
Github: https://github.com/SAmmer0
Created: 2018/4/20
"""
from pitdata.updater.loader import load_all

# 在执行下面代码前,先修改配置文件到指定的包含数据计算方法脚本的文件夹
# 如果文件夹为空,则需要添加至少一个数据计算文件脚本

res = load_all()