# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi from kpi import AccKpi each_step_duration_senta_card1 = DurationKpi('each_step_duration_senta_card1', 0.01, 0, actived=False) train_loss_senta_card1 = CostKpi('train_loss_senta_card1', 0.05, 0, actived=True) train_acc_senta_card1 = AccKpi('train_acc_senta_card1', 0.02, 0, actived=True) each_step_duration_senta_card4 = DurationKpi('each_step_duration_senta_card4', 0.01, 0, actived=False) train_loss_senta_card4 = CostKpi('train_loss_senta_card4', 0.05, 0, actived=True) train_acc_senta_card4 = AccKpi('train_acc_senta_card4', 0.02, 0, actived=True) tracking_kpis = [ each_step_duration_senta_card1, train_loss_senta_card1, train_acc_senta_card1, each_step_duration_senta_card4, train_loss_senta_card4, train_acc_senta_card4, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion:
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi train_cost_kpi = CostKpi('train_cost', 0.02, actived=True) test_acc_kpi = AccKpi('test_acc', 0.005, actived=True) train_duration_kpi = DurationKpi('train_duration', 0.02, actived=True) train_acc_kpi = AccKpi('train_acc', 0.005, actived=True) tracking_kpis = [ train_acc_kpi, train_cost_kpi, test_acc_kpi, train_duration_kpi, ]
#### NOTE kpi.py should shared in models in some way!!!! d_train_cost_kpi = CostKpi('d_train_cost', 0.05, 0, actived=True, desc='train cost of discriminator') g_train_cost_kpi = CostKpi('g_train_cost', 0.05, 0, actived=True, desc='train cost of generator') train_speed_kpi = DurationKpi('duration', 0.05, 0, actived=True, unit_repr='second', desc='train time used in one GPU card') tracking_kpis = [d_train_cost_kpi, g_train_cost_kpi, train_speed_kpi] def parse_log(log): ''' This method should be implemented by model developers. The suggestion: each line in the log should be key, value, for example: "
####this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=False) test_cost_kpi = CostKpi('test_cost', 0.005, 0, actived=False) train_duration_kpi = DurationKpi('train_duration', 0.06, 0, actived=False) tracking_kpis = [ train_cost_kpi, test_cost_kpi, train_duration_kpi, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion: each line in the log should be key, value, for example: " train_cost\t1.0 test_cost\t1.0
####this file is only used for continuous evaluation test! import os import sys #sys.path.insert(0, os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_cost_card1_kpi = CostKpi('train_cost_card1', 0.02, 0, actived=True) test_cost_card1_kpi = CostKpi('test_cost_card1', 0.005, 0, actived=True) train_duration_card1_kpi = DurationKpi('train_duration_card1', 0.06, 0, actived=True) train_cost_card4_kpi = CostKpi('train_cost_card4', 0.01, 0, actived=True) test_cost_card4_kpi = CostKpi('test_cost_card4', 0.005, 0, actived=True) train_duration_card4_kpi = DurationKpi('train_duration_card4', 0.06, 0, actived=True) tracking_kpis = [ train_cost_card1_kpi, test_cost_card1_kpi, train_duration_card1_kpi, train_cost_card4_kpi, test_cost_card4_kpi, train_duration_card4_kpi, ]
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi test_avg_ppl_kpi = CostKpi('test_avg_ppl_kpi', 0.2, 0) train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.03, 0, actived=True) test_avg_ppl_kpi_card4 = CostKpi('test_avg_ppl_kpi_card4', 0.05, 0, actived=True) train_pass_duration_kpi_card4 = DurationKpi('train_pass_duration_kpi_card4', 0.03, 0, actived=True) tracking_kpis = [ test_avg_ppl_kpi, train_pass_duration_kpi, test_avg_ppl_kpi_card4, train_pass_duration_kpi_card4, ]
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi from kpi import AccKpi each_step_duration_simnet_card1 = DurationKpi( 'each_step_duration_simnet_card1', 0.03, 0, actived=True) train_loss_simnet_card1 = CostKpi('train_loss_simnet_card1', 0.01, 0, actived=True) each_step_duration_simnet_card4 = DurationKpi( 'each_step_duration_simnet_card4', 0.02, 0, actived=True) train_loss_simnet_card4 = CostKpi('train_loss_simnet_card4', 0.01, 0, actived=True) tracking_kpis = [ each_step_duration_simnet_card1, train_loss_simnet_card1, each_step_duration_simnet_card4, train_loss_simnet_card4, ] def parse_log(log):
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi from kpi import AccKpi each_epoch_duration_frame1_card1 = DurationKpi( "each_epoch_duration_frame1_card1", 0.02, actived=True) train_cost_frame1_card1 = CostKpi("train_cost_frame1_card1", 0.02, actived=True) each_epoch_duration_frame4_card1 = DurationKpi( "each_epoch_duration_frame4_card1", 0.05, actived=True) train_cost_frame4_card1 = CostKpi("train_cost_frame4_card1", 0.02, actived=True) tracking_kpis = [ each_epoch_duration_frame1_card1, train_cost_frame1_card1, each_epoch_duration_frame4_card1, train_cost_frame4_card1, ] def parse_log(log): ''' This method should be implemented by model developers.
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi train_cost_kpi = CostKpi('train_cost', 0.01) train_duration_kpi = DurationKpi('train_duration', 0.04) tracking_kpis = [ train_cost_kpi, train_duration_kpi, ]
sys.path.insert(0, os.environ['ceroot']) #sys.path.append('.') from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_cost_xnli_card1_kpi = CostKpi('train_cost_xnli_card1', 0.002, 0, actived=True) train_acc_xnli_card1_kpi = AccKpi('train_acc_xnli_card1', 0.002, 0, actived=True) train_duration_xnli_card1_kpi = DurationKpi('train_duration_xnli_card1', 0.01, 0, actived=True) train_cost_xnli_card4_kpi = CostKpi('train_cost_xnli_card4', 0.002, 0, actived=True) train_acc_xnli_card4_kpi = AccKpi('train_acc_xnli_card4', 0.02, 0, actived=True) train_duration_xnli_card4_kpi = DurationKpi('train_duration_xnli_card4', 0.03, 0, actived=True) tracking_kpis = [
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import DurationKpi train_acc_kpi = AccKpi('train_acc', 0.2, 0) pass_duration_kpi = DurationKpi('pass_duration', 0.02, 0, actived=True) train_acc_kpi_card4 = AccKpi('train_acc_card4', 0.2, 0) pass_duration_kpi_card4 = DurationKpi('pass_duration_card4', 0.03, 0, actived=True) tracking_kpis = [ train_acc_kpi, pass_duration_kpi, train_acc_kpi_card4, pass_duration_kpi_card4, ]
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi cifar10_128_train_acc_kpi = AccKpi('cifar10_128_train_acc', 0.05, 0) cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.05, 0) cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.01, 0) flowers_64_train_speed_kpi = AccKpi('flowers_64_train_speed', 0.05, 0) flowers_64_gpu_memory_kpi = DurationKpi('flowers_64_gpu_memory', 0.01, 0) tracking_kpis = [ cifar10_128_train_acc_kpi, cifar10_128_train_speed_kpi, cifar10_128_gpu_memory_kpi, flowers_64_train_speed_kpi, flowers_64_gpu_memory_kpi, ]
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import DurationKpi train_acc_kpi = AccKpi('train_acc', 0.2, 0) pass_duration_kpi = DurationKpi('pass_duration', 0.02, 0, actived=True) tracking_kpis = [ train_acc_kpi, pass_duration_kpi, ]
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi lstm_language_model_static_loss_card1 = CostKpi( 'lstm_language_model_static_loss_card1', 0.01, 0) lstm_language_model_static_duration_card1 = DurationKpi( 'lstm_language_model_static_duration_card1', 0.03, 0, actived=True) lstm_language_model_padding_loss_card1 = CostKpi( 'lstm_language_model_padding_loss_card1', 0.01, 0) lstm_language_model_padding_duration_card1 = DurationKpi( 'lstm_language_model_padding_duration_card1', 0.03, 0, actived=True) tracking_kpis = [ lstm_language_model_static_loss_card1, lstm_language_model_static_duration_card1, lstm_language_model_padding_loss_card1, lstm_language_model_padding_duration_card1, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion:
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi p99_kpi = DurationKpi('99', 0.2, actived=True) avg_kpi = DurationKpi('avg', 0.2, actived=True) qps_kpi = AccKpi('qps', 0.2, actived=True) err_kpi = AccKpi('err', 0.2, actived=True) tracking_kpis = [ p99_kpi, avg_kpi, qps_kpi, err_kpi ]
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi imikolov_20_avg_ppl_kpi = CostKpi('imikolov_20_avg_ppl', 0.2, 0) imikolov_20_pass_duration_kpi = DurationKpi('imikolov_20_pass_duration', 0.02, 0, actived=True) imikolov_20_avg_ppl_kpi_card4 = CostKpi('imikolov_20_avg_ppl_card4', 0.2, 0) imikolov_20_pass_duration_kpi_card4 = DurationKpi( 'imikolov_20_pass_duration_card4', 0.03, 0, actived=True) tracking_kpis = [ imikolov_20_avg_ppl_kpi, imikolov_20_pass_duration_kpi, imikolov_20_avg_ppl_kpi_card4, imikolov_20_pass_duration_kpi_card4, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion:
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import CostKpi from kpi import DurationKpi imdb_32_train_speed_kpi = AccKpi('imdb_32_train_speed', 0.08, 0, actived=True) imdb_32_gpu_memory_kpi = DurationKpi('imdb_32_gpu_memory', 0.05, 0, actived=True) tracking_kpis = [ imdb_32_train_speed_kpi, imdb_32_gpu_memory_kpi, ]
####this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi #### NOTE kpi.py should shared in models in some way!!!! train_cost_kpi = CostKpi('train_cost', 0.02, 0, actived=True) test_acc_kpi = AccKpi('test_acc', 0.01, 0, actived=False) train_speed_kpi = DurationKpi('train_speed', 0.1, 0, actived=True, unit_repr="s/epoch") train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0, actived=True) test_acc_card4_kpi = AccKpi('test_acc_card4', 0.01, 0, actived=False) train_speed_card4_kpi = DurationKpi('train_speed_card4', 0.1, 0, actived=True, unit_repr="s/epoch") tracking_kpis = [ train_cost_kpi, test_acc_kpi, train_speed_kpi, train_cost_card4_kpi, test_acc_card4_kpi, train_speed_card4_kpi, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion:
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi train_loss_card1 = CostKpi('train_loss_card1', 0.03, 0, actived=True) train_loss_card4 = CostKpi('train_loss_card4', 0.03, 0, actived=True) train_duration_card1 = DurationKpi('train_duration_card1', 0.01, 0, actived=True) train_duration_card4 = DurationKpi('train_duration_card4', 0.01, 0, actived=True) tracking_kpis = [ train_loss_card1, train_loss_card4, train_duration_card1, train_duration_card4, ] def parse_log(log): ''' This method should be implemented by model developers.
import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi, AccKpi train_avg_ppl_kpi = CostKpi('train_avg_ppl_kpi', 0.2, 0) train_pass_duration_kpi = DurationKpi('train_pass_duration_kpi', 0.2, 0) tracking_kpis = [ train_avg_ppl_kpi, train_pass_duration_kpi, ]
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi from kpi import AccKpi each_pass_duration_cpu1_thread1_kpi = DurationKpi( 'each_pass_duration_cpu1_thread1', 0.08, 0, actived=True) train_acc_cpu1_thread1_kpi = CostKpi('train_acc_cpu1_thread1', 0.08, 0) each_pass_duration_gpu1_kpi = DurationKpi('each_pass_duration_gpu1', 0.08, 0, actived=True) train_acc_gpu1_kpi = CostKpi('train_acc_gpu1', 0.08, 0) each_pass_duration_gpu4_kpi = DurationKpi('each_pass_duration_gpu4', 0.08, 0, actived=True) train_acc_gpu4_kpi = CostKpi('train_acc_gpu4', 0.08, 0) tracking_kpis = [ each_pass_duration_cpu1_thread1_kpi, train_acc_cpu1_thread1_kpi, each_pass_duration_gpu1_kpi, train_acc_gpu1_kpi, each_pass_duration_gpu4_kpi, train_acc_gpu4_kpi, ]
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import CostKpi from kpi import DurationKpi wmb_128_train_speed_kpi = AccKpi('wmb_128_train_speed', 0.2, 0) wmb_128_gpu_memory_kpi = DurationKpi('wmb_128_gpu_memory', 0.2, 0) tracking_kpis = [ wmb_128_train_speed_kpi, wmb_128_gpu_memory_kpi, ]
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi lstm_train_cost_kpi = CostKpi('lstm_train_cost', 5, 0) lstm_pass_duration_kpi = DurationKpi('lstm_pass_duration', 0.02, 0, actived=True) lstm_train_cost_kpi_card4 = CostKpi('lstm_train_cost_card4', 0.2, 0) lstm_pass_duration_kpi_card4 = DurationKpi('lstm_pass_duration_card4', 0.02, 0, actived=True) tracking_kpis = [ lstm_train_cost_kpi, lstm_pass_duration_kpi, lstm_train_cost_kpi_card4, lstm_pass_duration_kpi_card4, ]
""" continuous_evaluation.py """ import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import CostKpi from kpi import DurationKpi cifar10_128_train_speed_kpi = AccKpi('cifar10_128_train_speed', 0.03, 0, actived=True) cifar10_128_gpu_memory_kpi = DurationKpi('cifar10_128_gpu_memory', 0.1, 0, actived=True) flowers_32_train_speed_kpi = AccKpi('flowers_32_train_speed', 0.03, 0, actived=True) flowers_32_gpu_memory_kpi = DurationKpi('flowers_32_gpu_memory', 0.1, 0, actived=True) tracking_kpis = [ cifar10_128_train_speed_kpi, cifar10_128_gpu_memory_kpi, flowers_32_train_speed_kpi,
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi, DurationKpi train_cost_card1_kpi = CostKpi('train_cost_card1', 0.08, 0, actived=True, desc='train cost') train_speed_card1_kpi = DurationKpi('train_speed_card1', 0.08, 0, actived=True, desc='train speed in one GPU card') train_cost_card4_kpi = CostKpi('train_cost_card4', 0.08, 0, actived=True, desc='train cost') train_speed_card4_kpi = DurationKpi('train_speed_card4', 0.3, 0, actived=True, desc='train speed in four GPU card') tracking_kpis = [ train_cost_card1_kpi, train_speed_card1_kpi, train_cost_card4_kpi, train_speed_card4_kpi
actived=True, desc='train cost') test_acc_top1_kpi = AccKpi('test_acc_top1', 0.02, 0, actived=True, desc='TOP1 ACC') test_acc_top5_kpi = AccKpi('test_acc_top5', 0.02, 0, actived=True, desc='TOP5 ACC') test_cost_kpi = CostKpi('test_cost', 0.02, 0, actived=True, desc='train cost') train_speed_kpi = DurationKpi('train_speed', 0.05, 0, actived=True, unit_repr='seconds/image', desc='train speed in one GPU card') train_acc_top1_card4_kpi = AccKpi('train_acc_top1_card4', 0.02, 0, actived=True, desc='TOP1 ACC') train_acc_top5_card4_kpi = AccKpi('train_acc_top5_card4', 0.02, 0, actived=True, desc='TOP5 ACC') train_cost_card4_kpi = CostKpi('train_cost_card4', 0.02, 0,
import os import sys sys.path.append(os.environ['ceroot']) from kpi import AccKpi from kpi import DurationKpi cifar10_128_AllReduce_GPU_4_Cards_train_acc_kpi = AccKpi( 'cifar10_128_AllReduce_GPU_4_Cards_train_acc', 0.03, 0, actived=True) cifar10_128_AllReduce_GPU_4_Cards_train_speed_kpi = AccKpi( 'cifar10_128_AllReduce_GPU_4_Cards_train_speed', 0.06, 0, actived=False) cifar10_128_AllReduce_4_Cards_gpu_memory_kpi = DurationKpi( 'cifar10_128_AllReduce_4_Cards_gpu_memory', 0.1, 0, actived=True) cifar10_128_Reduce_GPU_4_Cards_train_acc_kpi = AccKpi( 'cifar10_128_Reduce_GPU_4_Cards_train_acc', 0.03, 0, actived=True) cifar10_128_Reduce_GPU_4_Cards_train_speed_kpi = AccKpi( 'cifar10_128_Reduce_GPU_4_Cards_train_speed', 0.06, 0, actived=True) cifar10_128_Reduce_4_Cards_gpu_memory_kpi = DurationKpi( 'cifar10_128_Reduce_4_Cards_gpu_memory', 0.1, 0, actived=True) # Single Cards cifar10_128_GPU_1_Cards_train_acc_kpi = AccKpi( 'cifar10_128_GPU_1_Cards_train_acc', 0.03, 0, actived=True) cifar10_128_GPU_1_Cards_train_speed_kpi = AccKpi( 'cifar10_128_GPU_1_Cards_train_speed', 0.06, 0, actived=False) cifar10_128_1_Cards_gpu_memory_kpi = DurationKpi( 'cifar10_128_1_Cards_gpu_memory', 0.1, 0, actived=True) flowers_64_AllReduce_GPU_4_Cards_train_acc_kpi = AccKpi(
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi imikolov_20_avg_ppl_kpi = CostKpi('lstm_language_model_loss', 0.02, 0) imikolov_20_pass_duration_kpi = DurationKpi( 'lstm_language_model_duration', 0.02, 0, actived=True) tracking_kpis = [ imikolov_20_avg_ppl_kpi, imikolov_20_pass_duration_kpi, ] def parse_log(log): ''' This method should be implemented by model developers. The suggestion: each line in the log should be key, value, for example: " train_cost\t1.0 test_cost\t1.0 train_cost\t1.0 train_cost\t1.0
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi each_pass_duration_card1_kpi = DurationKpi('each_pass_duration_card1', 0.08, 0, actived=True) train_avg_cost_card1_kpi = CostKpi('train_avg_cost_card1', 0.08, 0) train_avg_acc_card1_kpi = CostKpi('train_avg_acc_card1', 0.02, 0) each_pass_duration_card4_kpi = DurationKpi('each_pass_duration_card4', 0.08, 0, actived=True) train_avg_cost_card4_kpi = CostKpi('train_avg_cost_card4', 0.08, 0) train_avg_acc_card4_kpi = CostKpi('train_avg_acc_card4', 0.02, 0) tracking_kpis = [ each_pass_duration_card1_kpi, train_avg_cost_card1_kpi, train_avg_acc_card1_kpi, each_pass_duration_card4_kpi, train_avg_cost_card4_kpi, train_avg_acc_card4_kpi, ]
# this file is only used for continuous evaluation test! import os import sys sys.path.append(os.environ['ceroot']) from kpi import CostKpi from kpi import DurationKpi from kpi import AccKpi each_step_duration_card1 = DurationKpi('each_step_duration_card1', 0.01, 0, actived=True) train_cost_card1 = CostKpi('train_cost_card1', 0.03, 0, actived=False) train_precision_card1 = CostKpi('train_precision_card1', 0.03, 0, actived=True) train_recall_card1 = CostKpi('train_recall_card1', 0.03, 0, actived=True) train_f1_card1 = CostKpi('train_f1_card1', 0.03, 0, actived=True) each_step_duration_card4 = DurationKpi('each_step_duration_card4', 0.01, 0, actived=True) train_cost_card4 = CostKpi('train_cost_card4', 0.03, 0, actived=True) train_precision_card4 = CostKpi('train_precision_card4', 0.05, 0, actived=True) train_recall_card4 = CostKpi('train_recall_card4', 0.03, 0, actived=True) train_f1_card4 = CostKpi('train_f1_card4', 0.05, 0, actived=True) tracking_kpis = [ each_step_duration_card1, train_cost_card1, train_precision_card1,